diff options
Diffstat (limited to 'drivers')
300 files changed, 28159 insertions, 6875 deletions
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7ae2750..d668a8a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -48,8 +48,8 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> -#include <acpi/apei.h> -#include <acpi/hed.h> + +#include <acpi/ghes.h> #include <asm/mce.h> #include <asm/tlbflush.h> #include <asm/nmi.h> @@ -84,42 +84,6 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) -/* - * One struct ghes is created for each generic hardware error source. - * It provides the context for APEI hardware error timer/IRQ/SCI/NMI - * handler. - * - * estatus: memory buffer for error status block, allocated during - * HEST parsing. - */ -#define GHES_TO_CLEAR 0x0001 -#define GHES_EXITING 0x0002 - -struct ghes { - struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; - unsigned long flags; - union { - struct list_head list; - struct timer_list timer; - unsigned int irq; - }; -}; - -struct ghes_estatus_node { - struct llist_node llnode; - struct acpi_hest_generic *generic; -}; - -struct ghes_estatus_cache { - u32 estatus_len; - atomic_t count; - struct acpi_hest_generic *generic; - unsigned long long time_in; - struct rcu_head rcu; -}; - bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic->error_status_address); } -enum { - GHES_SEV_NO = 0x0, - GHES_SEV_CORRECTED = 0x1, - GHES_SEV_RECOVERABLE = 0x2, - GHES_SEV_PANIC = 0x3, -}; - static inline int ghes_severity(int severity) { switch (severity) { @@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) +static void ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; @@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); + ghes_edac_report_mem_error(ghes, sev, mem_err); + #ifdef CONFIG_X86_MCE apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); @@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes) if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } - ghes_do_proc(ghes->estatus); + ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus); + ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (estatus_node) { + estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); memcpy(estatus, ghes->estatus, len); @@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes = NULL; goto err; } + + rc = ghes_edac_register(ghes, &ghes_dev->dev); + if (rc < 0) + goto err; + switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: ghes->timer.function = ghes_poll_func; @@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev) if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } if (request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes)) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } break; case ACPI_HEST_NOTIFY_SCI: @@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev) platform_set_drvdata(ghes_dev, ghes); return 0; +err_edac_unreg: + ghes_edac_unregister(ghes); err: if (ghes) { ghes_fini(ghes); @@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev) } ghes_fini(ghes); + + ghes_edac_unregister(ghes); + kfree(ghes); platform_set_drvdata(ghes_dev, NULL); diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 59844ee..33e609f 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id, handler, max_entries); } -static int srat_mem_cnt; - -void __init early_parse_srat(void) +int __init acpi_numa_init(void) { + int cnt = 0; + /* * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= * SRAT cpu entries could have different order with that in MADT. @@ -295,24 +295,21 @@ void __init early_parse_srat(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, 0); + acpi_parse_x2apic_affinity, 0); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, 0); - srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, - acpi_parse_memory_affinity, - NR_NODE_MEMBLKS); + acpi_parse_processor_affinity, 0); + cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, + acpi_parse_memory_affinity, + NR_NODE_MEMBLKS); } -} -int __init acpi_numa_init(void) -{ /* SLIT: System Locality Information Table */ acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); acpi_numa_arch_fixup(); - if (srat_mem_cnt < 0) - return srat_mem_cnt; + if (cnt < 0) + return cnt; else if (!parsed_numa_memblks) return -ENOENT; return 0; diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c index ab92785..093c435 100644 --- a/drivers/amba/tegra-ahb.c +++ b/drivers/amba/tegra-ahb.c @@ -130,7 +130,7 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset) writel(value, ahb->regs + offset); } -#ifdef CONFIG_ARCH_TEGRA_3x_SOC +#ifdef CONFIG_TEGRA_IOMMU_SMMU static int tegra_ahb_match_by_smmu(struct device *dev, void *data) { struct tegra_ahb *ahb = dev_get_drvdata(dev); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index ff5b745..2a7cb0d 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data; + BUG_ON(dmabuf->vmapping_counter); + dmabuf->ops->release(dmabuf); kfree(dmabuf); return 0; @@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap); int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, unsigned long pgoff) { + struct file *oldfile; + int ret; + if (WARN_ON(!dmabuf || !vma)) return -EINVAL; @@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, return -EINVAL; /* readjust the vma */ - if (vma->vm_file) - fput(vma->vm_file); - - vma->vm_file = get_file(dmabuf->file); - + get_file(dmabuf->file); + oldfile = vma->vm_file; + vma->vm_file = dmabuf->file; vma->vm_pgoff = pgoff; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) { + /* restore old parameters on failure */ + vma->vm_file = oldfile; + fput(dmabuf->file); + } else { + if (oldfile) + fput(oldfile); + } + return ret; + } EXPORT_SYMBOL_GPL(dma_buf_mmap); @@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); */ void *dma_buf_vmap(struct dma_buf *dmabuf) { + void *ptr; + if (WARN_ON(!dmabuf)) return NULL; - if (dmabuf->ops->vmap) - return dmabuf->ops->vmap(dmabuf); - return NULL; + if (!dmabuf->ops->vmap) + return NULL; + + mutex_lock(&dmabuf->lock); + if (dmabuf->vmapping_counter) { + dmabuf->vmapping_counter++; + BUG_ON(!dmabuf->vmap_ptr); + ptr = dmabuf->vmap_ptr; + goto out_unlock; + } + + BUG_ON(dmabuf->vmap_ptr); + + ptr = dmabuf->ops->vmap(dmabuf); + if (IS_ERR_OR_NULL(ptr)) + goto out_unlock; + + dmabuf->vmap_ptr = ptr; + dmabuf->vmapping_counter = 1; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ptr; } EXPORT_SYMBOL_GPL(dma_buf_vmap); @@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) if (WARN_ON(!dmabuf)) return; - if (dmabuf->ops->vunmap) - dmabuf->ops->vunmap(dmabuf, vaddr); + BUG_ON(!dmabuf->vmap_ptr); + BUG_ON(dmabuf->vmapping_counter == 0); + BUG_ON(dmabuf->vmap_ptr != vaddr); + + mutex_lock(&dmabuf->lock); + if (--dmabuf->vmapping_counter == 0) { + if (dmabuf->ops->vunmap) + dmabuf->ops->vunmap(dmabuf, vaddr); + dmabuf->vmap_ptr = NULL; + } + mutex_unlock(&dmabuf->lock); } EXPORT_SYMBOL_GPL(dma_buf_vunmap); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8f12dc7..5b5ee79 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, else ErrorCode = 0; } + break; default: ErrorCode = -ENOTTY; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 824e09c..5dc0dae 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -63,19 +63,6 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. -config BLK_DEV_XD - tristate "XT hard disk support" - depends on ISA && ISA_DMA_API - select CHECK_SIGNATURE - help - Very old 8 bit hard disk controllers used in the IBM XT computer - will be supported if you say Y here. - - To compile this driver as a module, choose M here: the - module will be called xd. - - It's pretty unlikely that you have one of these: say N. - config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST @@ -544,4 +531,14 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_RSXX + tristate "RamSam PCIe Flash SSD Device Driver" + depends on PCI + help + Device driver for IBM's high speed PCIe SSD + storage devices: RamSan-70 and RamSan-80. + + To compile this driver as a module, choose M here: the + module will be called rsxx. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 17e82df..a3b4023 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o @@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f47dccb..747bb2a 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, loopsize; + loff_t loopsize; /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - loopsize = size - offset; - /* offset is beyond i_size, wierd but possible */ + loopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + loopsize -= offset; + /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; @@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; + struct block_device *bdev = lo->lo_device; if (unlikely((loff_t)x != size)) return -EFBIG; @@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); + bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } @@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) return err; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { + lo->lo_sizelimit != info->lo_sizelimit) if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; - } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) { - int err; - sector_t sec; - loff_t sz; - - err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) - goto out; - err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); - if (unlikely(err)) - goto out; - sec = get_capacity(lo->lo_disk); - /* the width of sector_t may be narrow for bit-shift */ - sz = sec; - sz <<= 9; - mutex_lock(&bdev->bd_mutex); - bd_set_size(bdev, sz); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - mutex_unlock(&bdev->bd_mutex); + return -ENXIO; - out: - return err; + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1845,11 +1832,15 @@ static int __init loop_init(void) max_part = (1UL << part_shift) - 1; } - if ((1UL << part_shift) > DISK_MAX_PARTS) - return -EINVAL; + if ((1UL << part_shift) > DISK_MAX_PARTS) { + err = -EINVAL; + goto misc_out; + } - if (max_loop > 1UL << (MINORBITS - part_shift)) - return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) { + err = -EINVAL; + goto misc_out; + } /* * If max_loop is specified, create that many devices upfront. @@ -1867,8 +1858,10 @@ static int __init loop_init(void) range = 1UL << MINORBITS; } - if (register_blkdev(LOOP_MAJOR, "loop")) - return -EIO; + if (register_blkdev(LOOP_MAJOR, "loop")) { + err = -EIO; + goto misc_out; + } blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1881,6 +1874,10 @@ static int __init loop_init(void) printk(KERN_INFO "loop: module loaded\n"); return 0; + +misc_out: + misc_deregister(&loop_misc); + return err; } static int loop_exit_cb(int id, void *ptr, void *data) diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 0ba837f..1fca1f99 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI + depends on PCI && GENERIC_HARDIRQS help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3fd1009..11cc952 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -88,6 +88,8 @@ static int instance; static int mtip_major; static struct dentry *dfs_parent; +static u32 cpu_use[NR_CPUS]; + static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); + int group = tag >> 5; - spin_lock(&port->cmd_issue_lock); + atomic_set(&port->commands[tag].active, 1); + /* guard SACT and CI registers */ + spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - - spin_unlock(&port->cmd_issue_lock); + spin_unlock(&port->cmd_issue_lock[group]); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -964,56 +967,56 @@ handle_tfe_exit: /* * Handle a set device bits interrupt */ -static inline void mtip_process_sdbf(struct driver_data *dd) +static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, + u32 completed) { - struct mtip_port *port = dd->port; - int group, tag, bit; - u32 completed; + struct driver_data *dd = port->dd; + int tag, bit; struct mtip_cmd *command; - /* walk all bits in all slot groups */ - for (group = 0; group < dd->slot_groups; group++) { - completed = readl(port->completed[group]); - if (!completed) - continue; + if (!completed) { + WARN_ON_ONCE(!completed); + return; + } + /* clear completed status register in the hardware.*/ + writel(completed, port->completed[group]); - /* clear completed status register in the hardware.*/ - writel(completed, port->completed[group]); + /* Process completed commands. */ + for (bit = 0; (bit < 32) && completed; bit++) { + if (completed & 0x01) { + tag = (group << 5) | bit; - /* Process completed commands. */ - for (bit = 0; - (bit < 32) && completed; - bit++, completed >>= 1) { - if (completed & 0x01) { - tag = (group << 5) | bit; + /* skip internal command slot. */ + if (unlikely(tag == MTIP_TAG_INTERNAL)) + continue; - /* skip internal command slot. */ - if (unlikely(tag == MTIP_TAG_INTERNAL)) - continue; + command = &port->commands[tag]; + /* make internal callback */ + if (likely(command->comp_func)) { + command->comp_func( + port, + tag, + command->comp_data, + 0); + } else { + dev_warn(&dd->pdev->dev, + "Null completion " + "for tag %d", + tag); - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - mtip_command_cleanup(dd); - return; - } + if (mtip_check_surprise_removal( + dd->pdev)) { + mtip_command_cleanup(dd); + return; } } } + completed >>= 1; } + + /* If last, re-enable interrupts */ + if (atomic_dec_return(&dd->irq_workers_active) == 0) + writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); } /* @@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) struct mtip_port *port = dd->port; u32 hba_stat, port_stat; int rv = IRQ_NONE; + int do_irq_enable = 1, i, workers; + struct mtip_work *twork; hba_stat = readl(dd->mmio + HOST_IRQ_STAT); if (hba_stat) { @@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ - if (likely(port_stat & PORT_IRQ_SDB_FIS)) - mtip_process_sdbf(dd); + if (likely(port_stat & PORT_IRQ_SDB_FIS)) { + do_irq_enable = 0; + WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0); + + /* Start at 1: group zero is always local? */ + for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS; + i++) { + twork = &dd->work[i]; + twork->completed = readl(port->completed[i]); + if (twork->completed) + workers++; + } + + atomic_set(&dd->irq_workers_active, workers); + if (workers) { + for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) { + twork = &dd->work[i]; + if (twork->completed) + queue_work_on( + twork->cpu_binding, + dd->isr_workq, + &twork->work); + } + + if (likely(dd->work[0].completed)) + mtip_workq_sdbfx(port, 0, + dd->work[0].completed); + + } else { + /* + * Chip quirk: SDB interrupt but nothing + * to complete + */ + do_irq_enable = 1; + } + } if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { @@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } /* acknowledge interrupt */ - writel(hba_stat, dd->mmio + HOST_IRQ_STAT); + if (unlikely(do_irq_enable)) + writel(hba_stat, dd->mmio + HOST_IRQ_STAT); return rv; } /* - * Wrapper for mtip_handle_irq - * (ignores return code) - */ -static void mtip_tasklet(unsigned long data) -{ - mtip_handle_irq((struct driver_data *) data); -} - -/* * HBA interrupt subroutine. * * @irq IRQ number. @@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data) static irqreturn_t mtip_irq_handler(int irq, void *instance) { struct driver_data *dd = instance; - tasklet_schedule(&dd->tasklet); - return IRQ_HANDLED; + + return mtip_handle_irq(dd); } static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) @@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) } #endif + /* Demux ID.DRAT & ID.RZAT to determine trim support */ + if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) + port->dd->trim_supp = true; + else + port->dd->trim_supp = false; + /* Set the identify buffer as valid. */ port->identify_valid = 1; @@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, } /* + * Trim unused sectors + * + * @dd pointer to driver_data structure + * @lba starting lba + * @len # of 512b sectors to trim + * + * return value + * -ENOMEM Out of dma memory + * -EINVAL Invalid parameters passed in, trim not supported + * -EIO Error submitting trim request to hw + */ +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +{ + int i, rv = 0; + u64 tlba, tlen, sect_left; + struct mtip_trim_entry *buf; + dma_addr_t dma_addr; + struct host_to_dev_fis fis; + + if (!len || dd->trim_supp == false) + return -EINVAL; + + /* Trim request too big */ + WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); + + /* Trim request not aligned on 4k boundary */ + WARN_ON(len % 8 != 0); + + /* Warn if vu_trim structure is too big */ + WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); + + /* Allocate a DMA buffer for the trim structure */ + buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf, 0, ATA_SECT_SIZE); + + for (i = 0, sect_left = len, tlba = lba; + i < MTIP_MAX_TRIM_ENTRIES && sect_left; + i++) { + tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? + MTIP_MAX_TRIM_ENTRY_LEN : + sect_left); + buf[i].lba = __force_bit2int cpu_to_le32(tlba); + buf[i].range = __force_bit2int cpu_to_le16(tlen); + tlba += tlen; + sect_left -= tlen; + } + WARN_ON(sect_left != 0); + + /* Build the fis */ + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = 0xfb; + fis.features = 0x60; + fis.sect_count = 1; + fis.device = ATA_DEVICE_OBS; + + if (mtip_exec_internal_command(dd->port, + &fis, + 5, + dma_addr, + ATA_SECT_SIZE, + 0, + GFP_KERNEL, + MTIP_TRIM_TIMEOUT_MS) < 0) + rv = -EIO; + + dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); + return rv; +} + +/* * Get the drive capacity. * * @dd Pointer to the device data structure. @@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd) hba_setup(dd); - tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); - - dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); + dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, + dd->numa_node); if (!dd->port) { dev_err(&dd->pdev->dev, "Memory allocation: port structure\n"); return -ENOMEM; } + /* Continue workqueue setup */ + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + dd->work[i].port = dd->port; + /* Counting semaphore to track command slot usage */ sema_init(&dd->port->cmd_slot, num_command_slots - 1); /* Spinlock to prevent concurrent issue */ - spin_lock_init(&dd->port->cmd_issue_lock); + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + spin_lock_init(&dd->port->cmd_issue_lock[i]); /* Set the port mmio base address. */ dd->port->mmio = dd->mmio + PORT_OFFSET; @@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to allocate IRQ %d\n", dd->pdev->irq); goto out2; } + irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding)); /* Enable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, @@ -3241,7 +3358,8 @@ out3: writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); - /*Release the IRQ. */ + /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); out2: @@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd) del_timer_sync(&dd->port->cmd_timer); /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); - /* Stop the bottom half tasklet. */ - tasklet_kill(&dd->tasklet); - /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), @@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } } + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, + bio_sectors(bio))); + return; + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk(MTIP_MAX_MINORS); + dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); if (dd->disk == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate gendisk structure\n"); @@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd) skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue(GFP_KERNEL); + dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); if (dd->queue == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3783,6 +3905,15 @@ skip_create_disk: */ blk_queue_flush(dd->queue, 0); + /* Signal trim support */ + if (dd->trim_supp == true) { + set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + dd->queue->limits.discard_granularity = 4096; + blk_queue_max_discard_sectors(dd->queue, + MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); + dd->queue->limits.discard_zeroes_data = 0; + } + /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { dev_warn(&dd->pdev->dev, @@ -3813,9 +3944,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); - - dd->mtip_svc_handler = kthread_run(mtip_service_thread, - dd, thd_name); + dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, + dd, dd->numa_node, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -3823,7 +3953,7 @@ start_service_thread: rv = -EFAULT; goto kthread_run_error; } - + wake_up_process(dd->mtip_svc_handler); if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) rv = wait_for_rebuild; @@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd) return 0; } +static void drop_cpu(int cpu) +{ + cpu_use[cpu]--; +} + +static int get_least_used_cpu_on_node(int node) +{ + int cpu, least_used_cpu, least_cnt; + const struct cpumask *node_mask; + + node_mask = cpumask_of_node(node); + least_used_cpu = cpumask_first(node_mask); + least_cnt = cpu_use[least_used_cpu]; + cpu = least_used_cpu; + + for_each_cpu(cpu, node_mask) { + if (cpu_use[cpu] < least_cnt) { + least_used_cpu = cpu; + least_cnt = cpu_use[cpu]; + } + } + cpu_use[least_used_cpu]++; + return least_used_cpu; +} + +/* Helper for selecting a node in round robin mode */ +static inline int mtip_get_next_rr_node(void) +{ + static int next_node = -1; + + if (next_node == -1) { + next_node = first_online_node; + return next_node; + } + + next_node = next_online_node(next_node); + if (next_node == MAX_NUMNODES) + next_node = first_online_node; + return next_node; +} + +static DEFINE_HANDLER(0); +static DEFINE_HANDLER(1); +static DEFINE_HANDLER(2); +static DEFINE_HANDLER(3); +static DEFINE_HANDLER(4); +static DEFINE_HANDLER(5); +static DEFINE_HANDLER(6); +static DEFINE_HANDLER(7); + /* * Called for each supported PCI device detected. * @@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev, { int rv = 0; struct driver_data *dd = NULL; + char cpu_list[256]; + const struct cpumask *node_mask; + int cpu, i = 0, j = 0; + int my_node = NUMA_NO_NODE; /* Allocate memory for this devices private data. */ - dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); + my_node = pcibus_to_node(pdev->bus); + if (my_node != NUMA_NO_NODE) { + if (!node_online(my_node)) + my_node = mtip_get_next_rr_node(); + } else { + dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n"); + my_node = mtip_get_next_rr_node(); + } + dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", + my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), + cpu_to_node(smp_processor_id()), smp_processor_id()); + + dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory for driver data\n"); @@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev, } } - pci_set_master(pdev); + /* Copy the info we may need later into the private data structure. */ + dd->major = mtip_major; + dd->instance = instance; + dd->pdev = pdev; + dd->numa_node = my_node; + memset(dd->workq_name, 0, 32); + snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); + + dd->isr_workq = create_workqueue(dd->workq_name); + if (!dd->isr_workq) { + dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + goto block_initialize_err; + } + + memset(cpu_list, 0, sizeof(cpu_list)); + + node_mask = cpumask_of_node(dd->numa_node); + if (!cpumask_empty(node_mask)) { + for_each_cpu(cpu, node_mask) + { + snprintf(&cpu_list[j], 256 - j, "%d ", cpu); + j = strlen(cpu_list); + } + + dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n", + dd->numa_node, + topology_physical_package_id(cpumask_first(node_mask)), + nr_cpus_node(dd->numa_node), + cpu_list); + } else + dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n"); + + dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node); + dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n", + cpu_to_node(dd->isr_binding), dd->isr_binding); + + /* first worker context always runs in ISR */ + dd->work[0].cpu_binding = dd->isr_binding; + dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[3].cpu_binding = dd->work[0].cpu_binding; + dd->work[4].cpu_binding = dd->work[1].cpu_binding; + dd->work[5].cpu_binding = dd->work[2].cpu_binding; + dd->work[6].cpu_binding = dd->work[2].cpu_binding; + dd->work[7].cpu_binding = dd->work[1].cpu_binding; + + /* Log the bindings */ + for_each_present_cpu(cpu) { + memset(cpu_list, 0, sizeof(cpu_list)); + for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) { + if (dd->work[i].cpu_binding == cpu) { + snprintf(&cpu_list[j], 256 - j, "%d ", i); + j = strlen(cpu_list); + } + } + if (j) + dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list); + } + + INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0); + INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1); + INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2); + INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3); + INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4); + INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5); + INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6); + INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); + + pci_set_master(pdev); if (pci_enable_msi(pdev)) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; } - /* Copy the info we may need later into the private data structure. */ - dd->major = mtip_major; - dd->instance = instance; - dd->pdev = pdev; - /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); - + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } setmask_err: pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev) /* Clean up the block layer. */ mtip_block_remove(dd); + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } + pci_disable_msi(pdev); kfree(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b174264..3bffff5 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -164,6 +164,35 @@ struct smart_attr { u8 res[3]; } __packed; +struct mtip_work { + struct work_struct work; + void *port; + int cpu_binding; + u32 completed; +} ____cacheline_aligned_in_smp; + +#define DEFINE_HANDLER(group) \ + void mtip_workq_sdbf##group(struct work_struct *work) \ + { \ + struct mtip_work *w = (struct mtip_work *) work; \ + mtip_workq_sdbfx(w->port, group, w->completed); \ + } + +#define MTIP_TRIM_TIMEOUT_MS 240000 +#define MTIP_MAX_TRIM_ENTRIES 8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 + +struct mtip_trim_entry { + u32 lba; /* starting lba of region */ + u16 rsvd; /* unused */ + u16 range; /* # of 512b blocks to trim */ +} __packed; + +struct mtip_trim { + /* Array of regions to trim */ + struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; +} __packed; + /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { /* @@ -424,7 +453,7 @@ struct mtip_port { */ struct semaphore cmd_slot; /* Spinlock for working around command-issue bug. */ - spinlock_t cmd_issue_lock; + spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; }; /* @@ -447,9 +476,6 @@ struct driver_data { struct mtip_port *port; /* Pointer to the port data structure. */ - /* Tasklet used to process the bottom half of the ISR. */ - struct tasklet_struct tasklet; - unsigned product_type; /* magic value declaring the product type */ unsigned slot_groups; /* number of slot groups the product supports */ @@ -461,6 +487,20 @@ struct driver_data { struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ struct dentry *dfs_node; + + bool trim_supp; /* flag indicating trim support */ + + int numa_node; /* NUMA support */ + + char workq_name[32]; + + struct workqueue_struct *isr_workq; + + struct mtip_work work[MTIP_MAX_SLOT_GROUPS]; + + atomic_t irq_workers_active; + + int isr_binding; }; #endif diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0..6c81a4c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -52,9 +52,12 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have this defined elsewhere too */ +/* It might be useful to have these defined elsewhere */ -#define U64_MAX ((u64) (~0ULL)) +#define U8_MAX ((u8) (~0U)) +#define U16_MAX ((u16) (~0U)) +#define U32_MAX ((u32) (~0U)) +#define U64_MAX ((u64) (~0ULL)) #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -66,7 +69,6 @@ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ -#define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" @@ -93,8 +95,6 @@ #define DEV_NAME_LEN 32 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) -#define RBD_READ_ONLY_DEFAULT false - /* * block device image metadata (in-memory version) */ @@ -119,16 +119,33 @@ struct rbd_image_header { * An rbd image specification. * * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely - * identify an image. + * identify an image. Each rbd_dev structure includes a pointer to + * an rbd_spec structure that encapsulates this identity. + * + * Each of the id's in an rbd_spec has an associated name. For a + * user-mapped image, the names are supplied and the id's associated + * with them are looked up. For a layered image, a parent image is + * defined by the tuple, and the names are looked up. + * + * An rbd_dev structure contains a parent_spec pointer which is + * non-null if the image it represents is a child in a layered + * image. This pointer will refer to the rbd_spec structure used + * by the parent rbd_dev for its own identity (i.e., the structure + * is shared between the parent and child). + * + * Since these structures are populated once, during the discovery + * phase of image construction, they are effectively immutable so + * we make no effort to synchronize access to them. + * + * Note that code herein does not assume the image name is known (it + * could be a null pointer). */ struct rbd_spec { u64 pool_id; char *pool_name; char *image_id; - size_t image_id_len; char *image_name; - size_t image_name_len; u64 snap_id; char *snap_name; @@ -136,10 +153,6 @@ struct rbd_spec { struct kref kref; }; -struct rbd_options { - bool read_only; -}; - /* * an instance of the client. multiple devices may share an rbd client. */ @@ -149,37 +162,76 @@ struct rbd_client { struct list_head node; }; -/* - * a request completion status - */ -struct rbd_req_status { - int done; - int rc; - u64 bytes; +struct rbd_img_request; +typedef void (*rbd_img_callback_t)(struct rbd_img_request *); + +#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ + +struct rbd_obj_request; +typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); + +enum obj_request_type { + OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; -/* - * a collection of requests - */ -struct rbd_req_coll { - int total; - int num_done; +struct rbd_obj_request { + const char *object_name; + u64 offset; /* object start byte */ + u64 length; /* bytes from offset */ + + struct rbd_img_request *img_request; + struct list_head links; /* img_request->obj_requests */ + u32 which; /* posn image request list */ + + enum obj_request_type type; + union { + struct bio *bio_list; + struct { + struct page **pages; + u32 page_count; + }; + }; + + struct ceph_osd_request *osd_req; + + u64 xferred; /* bytes transferred */ + u64 version; + int result; + atomic_t done; + + rbd_obj_callback_t callback; + struct completion completion; + struct kref kref; - struct rbd_req_status status[0]; }; -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; +struct rbd_img_request { + struct request *rq; + struct rbd_device *rbd_dev; + u64 offset; /* starting image byte offset */ + u64 length; /* byte count from offset */ + bool write_request; /* false for read */ + union { + struct ceph_snap_context *snapc; /* for writes */ + u64 snap_id; /* for reads */ + }; + spinlock_t completion_lock;/* protects next_completion */ + u32 next_completion; + rbd_img_callback_t callback; + + u32 obj_request_count; + struct list_head obj_requests; /* rbd_obj_request structs */ + + struct kref kref; }; +#define for_each_obj_request(ireq, oreq) \ + list_for_each_entry(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_from(ireq, oreq) \ + list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_safe(ireq, oreq, n) \ + list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + struct rbd_snap { struct device dev; const char *name; @@ -209,16 +261,18 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - spinlock_t lock; /* queue lock */ + spinlock_t lock; /* queue, flags, open_count */ struct rbd_image_header header; - bool exists; + unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; char *header_name; + struct ceph_file_layout layout; + struct ceph_osd_event *watch_event; - struct ceph_osd_request *watch_request; + struct rbd_obj_request *watch_request; struct rbd_spec *parent_spec; u64 parent_overlap; @@ -235,7 +289,19 @@ struct rbd_device { /* sysfs related */ struct device dev; - unsigned long open_count; + unsigned long open_count; /* protected by lock */ +}; + +/* + * Flag bits for rbd_dev->flags. If atomicity is required, + * rbd_dev->lock is used to protect access. + * + * Currently, only the "removing" flag (which is coupled with the + * "open_count" field) requires atomic access. + */ +enum rbd_dev_flags { + RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -277,6 +343,33 @@ static struct device rbd_root_dev = { .release = rbd_root_dev_release, }; +static __printf(2, 3) +void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (!rbd_dev) + printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); + else if (rbd_dev->disk) + printk(KERN_WARNING "%s: %s: %pV\n", + RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_name) + printk(KERN_WARNING "%s: image %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_id) + printk(KERN_WARNING "%s: id %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); + else /* punt */ + printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", + RBD_DRV_NAME, rbd_dev, &vaf); + va_end(args); +} + #ifdef RBD_DEBUG #define rbd_assert(expr) \ if (unlikely(!(expr))) { \ @@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + bool removing = false; if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + spin_lock_irq(&rbd_dev->lock); + if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) + removing = true; + else + rbd_dev->open_count++; + spin_unlock_irq(&rbd_dev->lock); + if (removing) + return -ENOENT; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - rbd_dev->open_count++; mutex_unlock(&ctl_mutex); return 0; @@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + unsigned long open_count_before; + + spin_lock_irq(&rbd_dev->lock); + open_count_before = rbd_dev->open_count--; + spin_unlock_irq(&rbd_dev->lock); + rbd_assert(open_count_before > 0); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_assert(rbd_dev->open_count > 0); - rbd_dev->open_count--; put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); @@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) struct rbd_client *rbdc; int ret = -ENOMEM; - dout("rbd_client_create\n"); + dout("%s:\n", __func__); rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); if (!rbdc) goto out_opt; @@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) spin_unlock(&rbd_client_list_lock); mutex_unlock(&ctl_mutex); + dout("%s: rbdc %p\n", __func__, rbdc); - dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: @@ -373,6 +479,8 @@ out_mutex: out_opt: if (ceph_opts) ceph_destroy_options(ceph_opts); + dout("%s: error %d\n", __func__, ret); + return ERR_PTR(ret); } @@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = { {-1, NULL} }; +struct rbd_options { + bool read_only; +}; + +#define RBD_READ_ONLY_DEFAULT false + static int parse_rbd_opts_token(char *c, void *private) { struct rbd_options *rbd_opts = private; @@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - dout("rbd_release_client %p\n", rbdc); + dout("%s: rbdc %p\n", __func__, rbdc); spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); spin_unlock(&rbd_client_list_lock); @@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -/* - * Destroy requests collection - */ -static void rbd_coll_release(struct kref *kref) -{ - struct rbd_req_coll *coll = - container_of(kref, struct rbd_req_coll, kref); - - dout("rbd_coll_release %p\n", coll); - kfree(coll); -} - static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->exists = true; + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + done: return ret; } @@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header) header->snapc = NULL; } -static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) +static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; u64 segment; @@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, return length; } -static int rbd_get_num_segments(struct rbd_image_header *header, - u64 ofs, u64 len) -{ - u64 start_seg; - u64 end_seg; - - if (!len) - return 0; - if (len - 1 > U64_MAX - ofs) - return -ERANGE; - - start_seg = ofs >> header->obj_order; - end_seg = (ofs + len - 1) >> header->obj_order; - - return end_seg - start_seg + 1; -} - /* * returns the size of an object in the image */ @@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, unsigned int bi_size; struct bio *bio; - if (!bi) + if (!bi) { + rbd_warn(NULL, "bio_chain exhausted with %u left", len); goto out_err; /* EINVAL; ran out of bio's */ + } bi_size = min_t(unsigned int, bi->bi_size - off, len); bio = bio_clone_range(bi, off, bi_size, gfpmask); if (!bio) @@ -976,399 +1064,721 @@ out_err: return NULL; } -/* - * helpers for osd request op vectors. - */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, - int opcode, u32 payload_len) +static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { - struct ceph_osd_req_op *ops; + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_get(&obj_request->kref); +} + +static void rbd_obj_request_destroy(struct kref *kref); +static void rbd_obj_request_put(struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request != NULL); + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_put(&obj_request->kref, rbd_obj_request_destroy); +} + +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + +static void rbd_img_request_destroy(struct kref *kref); +static void rbd_img_request_put(struct rbd_img_request *img_request) +{ + rbd_assert(img_request != NULL); + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_put(&img_request->kref, rbd_img_request_destroy); +} + +static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->img_request == NULL); + + rbd_obj_request_get(obj_request); + obj_request->img_request = img_request; + obj_request->which = img_request->obj_request_count; + rbd_assert(obj_request->which != BAD_WHICH); + img_request->obj_request_count++; + list_add_tail(&obj_request->links, &img_request->obj_requests); + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); +} - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); - if (!ops) +static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->which != BAD_WHICH); + + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); + list_del(&obj_request->links); + rbd_assert(img_request->obj_request_count > 0); + img_request->obj_request_count--; + rbd_assert(obj_request->which == img_request->obj_request_count); + obj_request->which = BAD_WHICH; + rbd_assert(obj_request->img_request == img_request); + obj_request->img_request = NULL; + obj_request->callback = NULL; + rbd_obj_request_put(obj_request); +} + +static bool obj_request_type_valid(enum obj_request_type type) +{ + switch (type) { + case OBJ_REQUEST_NODATA: + case OBJ_REQUEST_BIO: + case OBJ_REQUEST_PAGES: + return true; + default: + return false; + } +} + +static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) +{ + struct ceph_osd_req_op *op; + va_list args; + size_t size; + + op = kzalloc(sizeof (*op), GFP_NOIO); + if (!op) return NULL; + op->op = opcode; + va_start(args, opcode); + switch (opcode) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + /* rbd_osd_req_op_create(READ, offset, length) */ + /* rbd_osd_req_op_create(WRITE, offset, length) */ + op->extent.offset = va_arg(args, u64); + op->extent.length = va_arg(args, u64); + if (opcode == CEPH_OSD_OP_WRITE) + op->payload_len = op->extent.length; + break; + case CEPH_OSD_OP_STAT: + break; + case CEPH_OSD_OP_CALL: + /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ + op->cls.class_name = va_arg(args, char *); + size = strlen(op->cls.class_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.class_len = size; + op->payload_len = size; + + op->cls.method_name = va_arg(args, char *); + size = strlen(op->cls.method_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.method_len = size; + op->payload_len += size; + + op->cls.argc = 0; + op->cls.indata = va_arg(args, void *); + size = va_arg(args, size_t); + rbd_assert(size <= (size_t) U32_MAX); + op->cls.indata_len = (u32) size; + op->payload_len += size; + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ + /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ + op->watch.cookie = va_arg(args, u64); + op->watch.ver = va_arg(args, u64); + op->watch.ver = cpu_to_le64(op->watch.ver); + if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) + op->watch.flag = (u8) 1; + break; + default: + rbd_warn(NULL, "unsupported opcode %hu\n", opcode); + kfree(op); + op = NULL; + break; + } + va_end(args); - ops[0].op = opcode; + return op; +} - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - ops[0].payload_len = payload_len; +static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) +{ + kfree(op); +} + +static int rbd_obj_request_submit(struct ceph_osd_client *osdc, + struct rbd_obj_request *obj_request) +{ + dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - return ops; + return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +static void rbd_img_request_complete(struct rbd_img_request *img_request) { - kfree(ops); + dout("%s: img %p\n", __func__, img_request); + if (img_request->callback) + img_request->callback(img_request); + else + rbd_img_request_put(img_request); } -static void rbd_coll_end_req_index(struct request *rq, - struct rbd_req_coll *coll, - int index, - int ret, u64 len) +/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) { - struct request_queue *q; - int min, max, i; + dout("%s: obj %p\n", __func__, obj_request); - dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", - coll, index, ret, (unsigned long long) len); + return wait_for_completion_interruptible(&obj_request->completion); +} - if (!rq) - return; +static void obj_request_done_init(struct rbd_obj_request *obj_request) +{ + atomic_set(&obj_request->done, 0); + smp_wmb(); +} - if (!coll) { - blk_end_request(rq, ret, len); - return; +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + int done; + + done = atomic_inc_return(&obj_request->done); + if (done > 1) { + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev; + + rbd_dev = img_request ? img_request->rbd_dev : NULL; + rbd_warn(rbd_dev, "obj_request %p was already done\n", + obj_request); } +} - q = rq->q; - - spin_lock_irq(q->queue_lock); - coll->status[index].done = 1; - coll->status[index].rc = ret; - coll->status[index].bytes = len; - max = min = coll->num_done; - while (max < coll->total && coll->status[max].done) - max++; - - for (i = min; i<max; i++) { - __blk_end_request(rq, coll->status[i].rc, - coll->status[i].bytes); - coll->num_done++; - kref_put(&coll->kref, rbd_coll_release); +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return atomic_read(&obj_request->done) != 0; +} + +static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p cb %p\n", __func__, obj_request, + obj_request->callback); + if (obj_request->callback) + obj_request->callback(obj_request); + else + complete_all(&obj_request->completion); +} + +static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} + +static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, + obj_request->result, obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; } - spin_unlock_irq(q->queue_lock); + obj_request_done_set(obj_request); } -static void rbd_coll_end_req(struct rbd_request *req, - int ret, u64 len) +static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + obj_request_done_set(obj_request); } /* - * Send ceph osd request + * For a simple stat call there's nothing to do. We'll do more if + * this is part of a write sequence for a layered image. */ -static int rbd_do_request(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *object_name, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - struct rbd_req_coll *coll, - int coll_index, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg), - struct ceph_osd_request **linger_req, - u64 *ver) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_client *osdc; +static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) { - if (coll) - rbd_coll_end_req_index(rq, coll, coll_index, - -ENOMEM, len); - return -ENOMEM; +static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, + struct ceph_msg *msg) +{ + struct rbd_obj_request *obj_request = osd_req->r_priv; + u16 opcode; + + dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); + rbd_assert(osd_req == obj_request->osd_req); + rbd_assert(!!obj_request->img_request ^ + (obj_request->which == BAD_WHICH)); + + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; + obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); + + WARN_ON(osd_req->r_num_ops != 1); /* For now */ + + /* + * We support a 64-bit length, but ultimately it has to be + * passed to blk_end_request(), which takes an unsigned int. + */ + obj_request->xferred = osd_req->r_reply_op_len[0]; + rbd_assert(obj_request->xferred < (u64) UINT_MAX); + opcode = osd_req->r_request_ops[0].op; + switch (opcode) { + case CEPH_OSD_OP_READ: + rbd_osd_read_callback(obj_request); + break; + case CEPH_OSD_OP_WRITE: + rbd_osd_write_callback(obj_request); + break; + case CEPH_OSD_OP_STAT: + rbd_osd_stat_callback(obj_request); + break; + case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + rbd_osd_trivial_callback(obj_request); + break; + default: + rbd_warn(NULL, "%s: unsupported op %hu\n", + obj_request->object_name, (unsigned short) opcode); + break; } - if (coll) { - req_data->coll = coll; - req_data->coll_index = coll_index; + if (obj_request_done_test(obj_request)) + rbd_obj_request_complete(obj_request); +} + +static struct ceph_osd_request *rbd_osd_req_create( + struct rbd_device *rbd_dev, + bool write_request, + struct rbd_obj_request *obj_request, + struct ceph_osd_req_op *op) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_snap_context *snapc = NULL; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + struct timespec now; + struct timespec *mtime; + u64 snap_id = CEPH_NOSNAP; + u64 offset = obj_request->offset; + u64 length = obj_request->length; + + if (img_request) { + rbd_assert(img_request->write_request == write_request); + if (img_request->write_request) + snapc = img_request->snapc; + else + snap_id = img_request->snap_id; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", - object_name, (unsigned long long) ofs, - (unsigned long long) len, coll, coll_index); + /* Allocate and initialize the request, for the single op */ osdc = &rbd_dev->rbd_client->client->osdc; - req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, - false, GFP_NOIO, pages, bio); - if (!req) { - ret = -ENOMEM; - goto done_pages; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + rbd_assert(obj_request->bio_list != NULL); + osd_req->r_bio = obj_request->bio_list; + break; + case OBJ_REQUEST_PAGES: + osd_req->r_pages = obj_request->pages; + osd_req->r_num_pages = obj_request->page_count; + osd_req->r_page_alignment = offset & ~PAGE_MASK; + break; } - req->r_callback = rbd_cb; + if (write_request) { + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + now = CURRENT_TIME; + mtime = &now; + } else { + osd_req->r_flags = CEPH_OSD_FLAG_READ; + mtime = NULL; /* not needed for reads */ + offset = 0; /* These are not used... */ + length = 0; /* ...for osd read requests */ + } - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; - req->r_priv = req_data; + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + osd_req->r_file_layout = rbd_dev->layout; /* struct */ - strncpy(req->r_oid, object_name, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); + /* osd_req will get its own reference to snapc (if non-null) */ - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, - req, ops); - rbd_assert(ret == 0); + ceph_osdc_build_request(osd_req, offset, length, 1, op, + snapc, snap_id, mtime); - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); + return osd_req; +} - if (linger_req) { - ceph_osdc_set_request_linger(osdc, req); - *linger_req = req; - } +static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) +{ + ceph_osdc_put_request(osd_req); +} - ret = ceph_osdc_start_request(osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(osdc, req); - if (ver) - *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%llu\n", - (unsigned long long) - le64_to_cpu(req->r_reassert_version.version)); - ceph_osdc_put_request(req); +/* object_name is assumed to be a non-null pointer and NUL-terminated */ + +static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, + u64 offset, u64 length, + enum obj_request_type type) +{ + struct rbd_obj_request *obj_request; + size_t size; + char *name; + + rbd_assert(obj_request_type_valid(type)); + + size = strlen(object_name) + 1; + obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); + if (!obj_request) + return NULL; + + name = (char *)(obj_request + 1); + obj_request->object_name = memcpy(name, object_name, size); + obj_request->offset = offset; + obj_request->length = length; + obj_request->which = BAD_WHICH; + obj_request->type = type; + INIT_LIST_HEAD(&obj_request->links); + obj_request_done_init(obj_request); + init_completion(&obj_request->completion); + kref_init(&obj_request->kref); + + dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, + offset, length, (int)type, obj_request); + + return obj_request; +} + +static void rbd_obj_request_destroy(struct kref *kref) +{ + struct rbd_obj_request *obj_request; + + obj_request = container_of(kref, struct rbd_obj_request, kref); + + dout("%s: obj %p\n", __func__, obj_request); + + rbd_assert(obj_request->img_request == NULL); + rbd_assert(obj_request->which == BAD_WHICH); + + if (obj_request->osd_req) + rbd_osd_req_destroy(obj_request->osd_req); + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + if (obj_request->bio_list) + bio_chain_put(obj_request->bio_list); + break; + case OBJ_REQUEST_PAGES: + if (obj_request->pages) + ceph_release_page_vector(obj_request->pages, + obj_request->page_count); + break; } - return ret; -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - rbd_coll_end_req(req_data, ret, len); - kfree(req_data); - return ret; + kfree(obj_request); } /* - * Ceph osd op callback + * Caller is responsible for filling in the list of object requests + * that comprises the image request, and the Linux request pointer + * (if there is one). */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", - (unsigned long long) bytes, read_op, (int) rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } +static struct rbd_img_request *rbd_img_request_create( + struct rbd_device *rbd_dev, + u64 offset, u64 length, + bool write_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; - rbd_coll_end_req(req_data, rc, bytes); + img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + if (!img_request) + return NULL; - if (req_data->bio) - bio_chain_put(req_data->bio); + if (write_request) { + down_read(&rbd_dev->header_rwsem); + snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + if (WARN_ON(!snapc)) { + kfree(img_request); + return NULL; /* Shouldn't happen */ + } + } - ceph_osdc_put_request(req); - kfree(req_data); + img_request->rq = NULL; + img_request->rbd_dev = rbd_dev; + img_request->offset = offset; + img_request->length = length; + img_request->write_request = write_request; + if (write_request) + img_request->snapc = snapc; + else + img_request->snap_id = rbd_dev->spec->snap_id; + spin_lock_init(&img_request->completion_lock); + img_request->next_completion = 0; + img_request->callback = NULL; + img_request->obj_request_count = 0; + INIT_LIST_HEAD(&img_request->obj_requests); + kref_init(&img_request->kref); + + rbd_img_request_get(img_request); /* Avoid a warning */ + rbd_img_request_put(img_request); /* TEMPORARY */ + + dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, + write_request ? "write" : "read", offset, length, + img_request); + + return img_request; } -static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +static void rbd_img_request_destroy(struct kref *kref) { - ceph_osdc_put_request(req); + struct rbd_img_request *img_request; + struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; + + img_request = container_of(kref, struct rbd_img_request, kref); + + dout("%s: img %p\n", __func__, img_request); + + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_img_obj_request_del(img_request, obj_request); + rbd_assert(img_request->obj_request_count == 0); + + if (img_request->write_request) + ceph_put_snap_context(img_request->snapc); + + kfree(img_request); } -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - int flags, - struct ceph_osd_req_op *ops, - const char *object_name, - u64 ofs, u64 inbound_size, - char *inbound, - struct ceph_osd_request **linger_req, - u64 *ver) +static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, + struct bio *bio_list) { - int ret; - struct page **pages; - int num_pages; - - rbd_assert(ops != NULL); + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct rbd_obj_request *obj_request = NULL; + struct rbd_obj_request *next_obj_request; + unsigned int bio_offset; + u64 image_offset; + u64 resid; + u16 opcode; + + dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + + opcode = img_request->write_request ? CEPH_OSD_OP_WRITE + : CEPH_OSD_OP_READ; + bio_offset = 0; + image_offset = img_request->offset; + rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + resid = img_request->length; + rbd_assert(resid > 0); + while (resid) { + const char *object_name; + unsigned int clone_size; + struct ceph_osd_req_op *op; + u64 offset; + u64 length; + + object_name = rbd_segment_name(rbd_dev, image_offset); + if (!object_name) + goto out_unwind; + offset = rbd_segment_offset(rbd_dev, image_offset); + length = rbd_segment_length(rbd_dev, image_offset, resid); + obj_request = rbd_obj_request_create(object_name, + offset, length, + OBJ_REQUEST_BIO); + kfree(object_name); /* object request has its own copy */ + if (!obj_request) + goto out_unwind; + + rbd_assert(length <= (u64) UINT_MAX); + clone_size = (unsigned int) length; + obj_request->bio_list = bio_chain_clone_range(&bio_list, + &bio_offset, clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; - num_pages = calc_pages_for(ofs, inbound_size); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + /* + * Build up the op to use in building the osd + * request. Note that the contents of the op are + * copied by rbd_osd_req_create(). + */ + op = rbd_osd_req_op_create(opcode, offset, length); + if (!op) + goto out_partial; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, + img_request->write_request, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_partial; + /* status and version are initially zero-filled */ + + rbd_img_obj_request_add(img_request, obj_request); + + image_offset += length; + resid -= length; + } - ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, - object_name, ofs, inbound_size, NULL, - pages, num_pages, - flags, - ops, - NULL, 0, - NULL, - linger_req, ver); - if (ret < 0) - goto done; + return 0; - if ((flags & CEPH_OSD_FLAG_READ) && inbound) - ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); +out_partial: + rbd_obj_request_put(obj_request); +out_unwind: + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_obj_request_put(obj_request); -done: - ceph_release_page_vector(pages, num_pages); - return ret; + return -ENOMEM; } -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - int opcode; - int flags; - u64 snapid; - - seg_name = rbd_segment_name(rbd_dev, ofs); - if (!seg_name) - return -ENOMEM; - seg_len = rbd_segment_length(rbd_dev, ofs, len); - seg_ofs = rbd_segment_offset(rbd_dev, ofs); - - if (rq_data_dir(rq) == WRITE) { - opcode = CEPH_OSD_OP_WRITE; - flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; - snapid = CEPH_NOSNAP; - payload_len = seg_len; - } else { - opcode = CEPH_OSD_OP_READ; - flags = CEPH_OSD_FLAG_READ; - snapc = NULL; - snapid = rbd_dev->spec->snap_id; - payload_len = 0; +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->rq != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + unsigned int xferred; + int result; + + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + + rbd_assert(obj_request->xferred <= (u64) UINT_MAX); + xferred = (unsigned int) obj_request->xferred; + result = (int) obj_request->result; + if (result) + rbd_warn(NULL, "obj_request %s result %d xferred %u\n", + img_request->write_request ? "write" : "read", + result, xferred); + + more = blk_end_request(img_request->rq, result, xferred); + which++; } - ret = -ENOMEM; - ops = rbd_create_rw_ops(1, opcode, payload_len); - if (!ops) - goto done; + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - rbd_assert(seg_len == len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - coll, coll_index, - rbd_req_cb, 0, NULL); - - rbd_destroy_ops(ops); -done: - kfree(seg_name); - return ret; + if (!more) + rbd_img_request_complete(img_request); } -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *rbd_dev, - u64 snapid, - const char *object_name, - u64 ofs, u64 len, - char *buf, - u64 *ver) -{ - struct ceph_osd_req_op *ops; - int ret; +static int rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); - if (!ops) - return -ENOMEM; + dout("%s: img %p\n", __func__, img_request); + for_each_obj_request(img_request, obj_request) { + int ret; - ret = rbd_req_sync_op(rbd_dev, NULL, - snapid, - CEPH_OSD_FLAG_READ, - ops, object_name, ofs, len, buf, NULL, ver); - rbd_destroy_ops(ops); + obj_request->callback = rbd_img_obj_callback; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + return ret; + /* + * The image request has its own reference to each + * of its object requests, so we can safely drop the + * initial one here. + */ + rbd_obj_request_put(obj_request); + } - return ret; + return 0; } -/* - * Request sync osd watch - */ -static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, - u64 ver, - u64 notify_id) +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, + u64 ver, u64 notify_id) { - struct ceph_osd_req_op *ops; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (!ops) + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = notify_id; - ops[0].watch.flag = 0; + ret = -ENOMEM; + op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, - rbd_dev->header_name, 0, 0, NULL, - NULL, 0, - CEPH_OSD_FLAG_READ, - ops, - NULL, 0, - rbd_simple_req_cb, 0, NULL); + osdc = &rbd_dev->rbd_client->client->osdc; + obj_request->callback = rbd_obj_request_put; + ret = rbd_obj_request_submit(osdc, obj_request); +out: + if (ret) + rbd_obj_request_put(obj_request); - rbd_destroy_ops(ops); return ret; } @@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) - pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", rbd_dev->major, rc); + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", rc); - rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, hver, notify_id); } /* - * Request sync osd watch + * Request sync osd watch/unwatch. The value of "start" determines + * whether a watch request is being initiated or torn down. */ -static int rbd_req_sync_watch(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { - struct ceph_osd_req_op *ops; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; + rbd_assert(start ^ !!rbd_dev->watch_event); + rbd_assert(start ^ !!rbd_dev->watch_request); - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)rbd_dev, &rbd_dev->watch_event); - if (ret < 0) - goto fail; + if (start) { + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + rbd_assert(rbd_dev->watch_event != NULL); + } - ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 1; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) + goto out_cancel; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, + rbd_dev->header.obj_version, start); + if (!op) + goto out_cancel; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_cancel; + + if (start) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + else + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_cancel; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_cancel; + ret = obj_request->result; + if (ret) + goto out_cancel; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, - &rbd_dev->watch_request, NULL); + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to + * it. We'll drop that reference (below) after we've + * unregistered it. + */ + if (start) { + rbd_dev->watch_request = obj_request; - if (ret < 0) - goto fail_event; + return 0; + } - rbd_destroy_ops(ops); - return 0; + /* We have successfully torn down the watch request */ -fail_event: + rbd_obj_request_put(rbd_dev->watch_request); + rbd_dev->watch_request = NULL; +out_cancel: + /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; -fail: - rbd_destroy_ops(ops); - return ret; -} + if (obj_request) + rbd_obj_request_put(obj_request); -/* - * Request sync osd unwatch - */ -static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) -{ - struct ceph_osd_req_op *ops; - int ret; - - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; - - ops[0].watch.ver = 0; - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 0; - - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, NULL, NULL); - - - rbd_destroy_ops(ops); - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; return ret; } /* * Synchronous osd object method call */ -static int rbd_req_sync_exec(struct rbd_device *rbd_dev, +static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, @@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, size_t outbound_size, char *inbound, size_t inbound_size, - int flags, - u64 *ver) + u64 *version) { - struct ceph_osd_req_op *ops; - int class_name_len = strlen(class_name); - int method_name_len = strlen(method_name); - int payload_size; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct ceph_osd_req_op *op; + struct page **pages; + u32 page_count; int ret; /* - * Any input parameters required by the method we're calling - * will be sent along with the class and method names as - * part of the message payload. That data and its size are - * supplied via the indata and indata_len fields (named from - * the perspective of the server side) in the OSD request - * operation. + * Method calls are ultimately read operations but they + * don't involve object data (so no offset or length). + * The result should placed into the inbound buffer + * provided. They also supply outbound data--parameters for + * the object method. Currently if this is present it will + * be a snapshot id. */ - payload_size = class_name_len + method_name_len + outbound_size; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); - if (!ops) - return -ENOMEM; + page_count = (u32) calc_pages_for(0, inbound_size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); - ops[0].cls.class_name = class_name; - ops[0].cls.class_len = (__u8) class_name_len; - ops[0].cls.method_name = method_name; - ops[0].cls.method_len = (__u8) method_name_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = outbound; - ops[0].cls.indata_len = outbound_size; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - flags, ops, - object_name, 0, inbound_size, inbound, - NULL, ver); + obj_request->pages = pages; + obj_request->page_count = page_count; - rbd_destroy_ops(ops); + op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, + method_name, outbound, outbound_size); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - dout("cls_exec returned %d\n", ret); - return ret; -} + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; -static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) -{ - struct rbd_req_coll *coll = - kzalloc(sizeof(struct rbd_req_coll) + - sizeof(struct rbd_req_status) * num_reqs, - GFP_ATOMIC); + ret = obj_request->result; + if (ret < 0) + goto out; + ret = 0; + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); - if (!coll) - return NULL; - coll->total = num_reqs; - kref_init(&coll->kref); - return coll; + return ret; } -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) +static void rbd_request_fn(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; + bool read_only = rbd_dev->mapping.read_only; struct request *rq; + int result; while ((rq = blk_fetch_request(q))) { - struct bio *bio; - bool do_write; - unsigned int size; - u64 ofs; - int num_segs, cur_seg = 0; - struct rbd_req_coll *coll; - struct ceph_snap_context *snapc; - unsigned int bio_offset; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - continue; - } + bool write_request = rq_data_dir(rq) == WRITE; + struct rbd_img_request *img_request; + u64 offset; + u64 length; + + /* Ignore any non-FS requests that filter through. */ - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - if (do_write && rbd_dev->mapping.read_only) { - __blk_end_request_all(rq, -EROFS); + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); continue; } - spin_unlock_irq(q->queue_lock); + /* Ignore/skip any zero-length requests */ - down_read(&rbd_dev->header_rwsem); + offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; + length = (u64) blk_rq_bytes(rq); - if (!rbd_dev->exists) { - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - up_read(&rbd_dev->header_rwsem); - dout("request for non-existent snapshot"); - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENXIO); + if (!length) { + dout("%s: zero-length request\n", __func__); + __blk_end_request_all(rq, 0); continue; } - snapc = ceph_get_snap_context(rbd_dev->header.snapc); - - up_read(&rbd_dev->header_rwsem); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - bio = rq->bio; + spin_unlock_irq(q->queue_lock); - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); + /* Disallow writes to a read-only device */ - num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); - if (num_segs <= 0) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, num_segs); - ceph_put_snap_context(snapc); - continue; + if (write_request) { + result = -EROFS; + if (read_only) + goto end_request; + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } - coll = rbd_alloc_coll(num_segs); - if (!coll) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - ceph_put_snap_context(snapc); - continue; - } - - bio_offset = 0; - do { - u64 limit = rbd_segment_length(rbd_dev, ofs, size); - unsigned int chain_size; - struct bio *bio_chain; - - BUG_ON(limit > (u64) UINT_MAX); - chain_size = (unsigned int) limit; - dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - kref_get(&coll->kref); + /* + * Quit early if the mapped snapshot no longer + * exists. It's still possible the snapshot will + * have disappeared by the time our request arrives + * at the osd, but there's no sense in sending it if + * we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto end_request; + } - /* Pass a cloned bio chain via an osd request */ + result = -EINVAL; + if (WARN_ON(offset && length > U64_MAX - offset + 1)) + goto end_request; /* Shouldn't happen */ - bio_chain = bio_chain_clone_range(&bio, - &bio_offset, chain_size, - GFP_ATOMIC); - if (bio_chain) - (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, chain_size, - bio_chain, coll, cur_seg); - else - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, chain_size); - size -= chain_size; - ofs += chain_size; + result = -ENOMEM; + img_request = rbd_img_request_create(rbd_dev, offset, length, + write_request); + if (!img_request) + goto end_request; - cur_seg++; - } while (size > 0); - kref_put(&coll->kref, rbd_coll_release); + img_request->rq = rq; + result = rbd_img_request_fill_bio(img_request, rq->bio); + if (!result) + result = rbd_img_request_submit(img_request); + if (result) + rbd_img_request_put(img_request); +end_request: spin_lock_irq(q->queue_lock); - - ceph_put_snap_context(snapc); + if (result < 0) { + rbd_warn(rbd_dev, "obj_request %s result %d\n", + write_request ? "write" : "read", result); + __blk_end_request_all(rq, result); + } } } @@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) put_disk(disk); } +static int rbd_obj_read_sync(struct rbd_device *rbd_dev, + const char *object_name, + u64 offset, u64 length, + char *buf, u64 *version) + +{ + struct ceph_osd_req_op *op; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + page_count = (u32) calc_pages_for(offset, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + ret = PTR_ERR(pages); + + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, offset, length, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; + + obj_request->pages = pages; + obj_request->page_count = page_count; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; + + ret = obj_request->result; + if (ret < 0) + goto out; + + rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); + size = (size_t) obj_request->xferred; + ceph_copy_from_page_vector(pages, buf, 0, size); + rbd_assert(size <= (size_t) INT_MAX); + ret = (int) size; + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); + + return ret; +} + /* * Read the complete header for the given rbd device. * @@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) if (!ondisk) return ERR_PTR(-ENOMEM); - ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, - rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 0, size, (char *) ondisk, version); - if (ret < 0) goto out_err; if (WARN_ON((size_t) ret < size)) { ret = -ENXIO; - pr_warning("short header read for image %s" - " (want %zd got %d)\n", - rbd_dev->spec->image_name, size, ret); + rbd_warn(rbd_dev, "short header read (want %zd got %d)", + size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; - pr_warning("invalid header for image %s\n", - rbd_dev->spec->image_name); + rbd_warn(rbd_dev, "invalid header"); goto out_err; } @@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - /* init rq */ - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); if (!q) goto out_disk; @@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } -struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, +static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); @@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + /* Initialize the layout used for all rbd requests */ + + rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); + rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + return rbd_dev; } @@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) &size_buf, sizeof (size_buf), NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 incompat; int ret; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", (char *) &snapid, sizeof (snapid), (char *) &features_buf, sizeof (features_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; char *image_id; u64 overlap; - size_t len = 0; int ret; parent_spec = rbd_spec_alloc(); @@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(CEPH_NOSNAP); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ - image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + /* The ceph file layout needs to fit pool id in 32 bits */ + + ret = -EIO; + if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) + goto out; + + image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } parent_spec->image_id = image_id; - parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) rbd_assert(!rbd_dev->spec->image_name); - image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + len = strlen(rbd_dev->spec->image_id); + image_id_size = sizeof (__le32) + len; image_id = kmalloc(image_id_size, GFP_KERNEL); if (!image_id) return NULL; p = image_id; end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, - (u32) rbd_dev->spec->image_id_len); + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); if (!reply_buf) goto out; - ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); + (char *) reply_buf, size, NULL); if (ret < 0) goto out; p = reply_buf; @@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) osdc = &rbd_dev->rbd_client->client->osdc; name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) - return -EIO; /* pool id too large (>= 2^31) */ + if (!name) { + rbd_warn(rbd_dev, "there is no pool with id %llu", + rbd_dev->spec->pool_id); /* Really a BUG() */ + return -EIO; + } rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); if (!rbd_dev->spec->pool_name) @@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) /* Fetch the image name; tolerate failure here */ name = rbd_dev_image_name(rbd_dev); - if (name) { - rbd_dev->spec->image_name_len = strlen(name); + if (name) rbd_dev->spec->image_name = (char *) name; - } else { - pr_warning(RBD_DRV_NAME "%d " - "unable to get image name for image id %s\n", - rbd_dev->major, rbd_dev->spec->image_id); - } + else + rbd_warn(rbd_dev, "unable to get image name"); /* Look up the snapshot name. */ name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); if (!name) { + rbd_warn(rbd_dev, "no snapshot with id %llu", + rbd_dev->spec->snap_id); /* Really a BUG() */ ret = -EIO; goto out_err; } @@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapcontext", NULL, 0, - reply_buf, size, - CEPH_OSD_FLAG_READ, ver); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, ver); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) return ERR_PTR(-ENOMEM); snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", (char *) &snap_id, sizeof (snap_id), - reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2766,7 +3235,7 @@ out: static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, u64 *snap_size, u64 *snap_features) { - __le64 snap_id; + u64 snap_id; u8 order; int ret; @@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { struct list_head *next = links->next; - /* Existing snapshot not in the new snap context */ - + /* + * A previously-existing snapshot is not in + * the new snap context. + * + * If the now missing snapshot is the one the + * image is mapped to, clear its exists flag + * so we can avoid sending any more requests + * to it. + */ if (rbd_dev->spec->snap_id == snap->id) - rbd_dev->exists = false; + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->spec->snap_id == snap->id ? @@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) struct rbd_snap *snap; int ret = 0; - dout("%s called\n", __func__); + dout("%s:\n", __func__); if (WARN_ON(!device_is_registered(&rbd_dev->dev))) return -EIO; @@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) device_unregister(&rbd_dev->dev); } -static int rbd_init_watch_dev(struct rbd_device *rbd_dev) -{ - int ret, rc; - - do { - ret = rbd_req_sync_watch(rbd_dev); - if (ret == -ERANGE) { - rc = rbd_dev_refresh(rbd_dev, NULL); - if (rc < 0) - return rc; - } - } while (ret == -ERANGE); - - return ret; -} - static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); /* @@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) size_t len; len = next_token(buf); - dup = kmalloc(len + 1, GFP_KERNEL); + dup = kmemdup(*buf, len + 1, GFP_KERNEL); if (!dup) return NULL; - - memcpy(dup, *buf, len); *(dup + len) = '\0'; *buf += len; @@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf, /* The first four tokens are required */ len = next_token(&buf); - if (!len) - return -EINVAL; /* Missing monitor address(es) */ + if (!len) { + rbd_warn(NULL, "no monitor address(es) provided"); + return -EINVAL; + } mon_addrs = buf; mon_addrs_size = len + 1; buf += len; @@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf, options = dup_token(&buf, NULL); if (!options) return -ENOMEM; - if (!*options) - goto out_err; /* Missing options */ + if (!*options) { + rbd_warn(NULL, "no options provided"); + goto out_err; + } spec = rbd_spec_alloc(); if (!spec) @@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf, spec->pool_name = dup_token(&buf, NULL); if (!spec->pool_name) goto out_mem; - if (!*spec->pool_name) - goto out_err; /* Missing pool name */ + if (!*spec->pool_name) { + rbd_warn(NULL, "no pool name provided"); + goto out_err; + } - spec->image_name = dup_token(&buf, &spec->image_name_len); + spec->image_name = dup_token(&buf, NULL); if (!spec->image_name) goto out_mem; - if (!*spec->image_name) - goto out_err; /* Missing image name */ + if (!*spec->image_name) { + rbd_warn(NULL, "no image name provided"); + goto out_err; + } /* * Snapshot name is optional; default is to use "-" @@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); if (!spec->snap_name) goto out_mem; - memcpy(spec->snap_name, buf, len); *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; + size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; @@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } - ret = rbd_req_sync_exec(rbd_dev, object_name, + ret = rbd_obj_method_sync(rbd_dev, object_name, "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + response, RBD_IMAGE_ID_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->spec->image_id_len, - GFP_NOIO); + NULL, GFP_NOIO); if (IS_ERR(rbd_dev->spec->image_id)) { ret = PTR_ERR(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); + size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; @@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; @@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_bus; - ret = rbd_init_watch_dev(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 1); if (ret) goto err_out_bus; @@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + /* The ceph file layout needs to fit pool id in 32 bits */ + + if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + rc = -EIO; + goto err_out_client; + } + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; @@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) { - struct ceph_client *client = rbd_dev->rbd_client->client; - - ceph_osdc_unregister_linger_request(&client->osdc, - rbd_dev->watch_request); - } if (rbd_dev->watch_event) - rbd_req_sync_unwatch(rbd_dev); - + rbd_dev_header_watch_sync(rbd_dev, 0); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - if (rbd_dev->open_count) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) ret = -EBUSY; + else + set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + if (ret < 0) goto done; - } rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } -int __init rbd_init(void) +static int __init rbd_init(void) { int rc; + if (!libceph_compatible(NULL)) { + rbd_warn(NULL, "libceph incompatibility (quitting)"); + + return -EINVAL; + } rc = rbd_sysfs_init(); if (rc) return rc; @@ -3793,7 +4263,7 @@ int __init rbd_init(void) return 0; } -void __exit rbd_exit(void) +static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); } diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile new file mode 100644 index 0000000..f35cd0b --- /dev/null +++ b/drivers/block/rsxx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o +rsxx-y := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c new file mode 100644 index 0000000..a295e7e --- /dev/null +++ b/drivers/block/rsxx/config.c @@ -0,0 +1,213 @@ +/* +* Filename: config.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/types.h> +#include <linux/crc32.h> +#include <linux/swab.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +static void initialize_config(void *config) +{ + struct rsxx_card_cfg *cfg = config; + + cfg->hdr.version = RSXX_CFG_VERSION; + + cfg->data.block_size = RSXX_HW_BLK_SIZE; + cfg->data.stripe_size = RSXX_HW_BLK_SIZE; + cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.cache_order = (-1); + cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; + cfg->data.intr_coal.count = 0; + cfg->data.intr_coal.latency = 0; +} + +static u32 config_data_crc32(struct rsxx_card_cfg *cfg) +{ + /* + * Return the compliment of the CRC to ensure compatibility + * (i.e. this is how early rsxx drivers did it.) + */ + + return ~crc32(~0, &cfg->data, sizeof(cfg->data)); +} + + +/*----------------- Config Byte Swap Functions -------------------*/ +static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) +{ + hdr->version = be32_to_cpu((__force __be32) hdr->version); + hdr->crc = be32_to_cpu((__force __be32) hdr->crc); +} + +static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) +{ + hdr->version = (__force u32) cpu_to_be32(hdr->version); + hdr->crc = (__force u32) cpu_to_be32(hdr->crc); +} + +static void config_data_swab(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = swab32(data[i]); +} + +static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = le32_to_cpu((__force __le32) data[i]); +} + +static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = (__force u32) cpu_to_le32(data[i]); +} + + +/*----------------- Config Operations ------------------*/ +static int rsxx_save_config(struct rsxx_cardinfo *card) +{ + struct rsxx_card_cfg cfg; + int st; + + memcpy(&cfg, &card->config, sizeof(cfg)); + + if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { + dev_err(CARD_TO_DEV(card), + "Cannot save config with invalid version %d\n", + cfg.hdr.version); + return -EINVAL; + } + + /* Convert data to little endian for the CRC calculation. */ + config_data_cpu_to_le(&cfg); + + cfg.hdr.crc = config_data_crc32(&cfg); + + /* + * Swap the data from little endian to big endian so it can be + * stored. + */ + config_data_swab(&cfg); + config_hdr_cpu_to_be(&cfg.hdr); + + st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); + if (st) + return st; + + return 0; +} + +int rsxx_load_config(struct rsxx_cardinfo *card) +{ + int st; + u32 crc; + + st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), + &card->config, 1); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed reading card config.\n"); + return st; + } + + config_hdr_be_to_cpu(&card->config.hdr); + + if (card->config.hdr.version == RSXX_CFG_VERSION) { + /* + * We calculate the CRC with the data in little endian, because + * early drivers did not take big endian CPUs into account. + * The data is always stored in big endian, so we need to byte + * swap it before calculating the CRC. + */ + + config_data_swab(&card->config); + + /* Check the CRC */ + crc = config_data_crc32(&card->config); + if (crc != card->config.hdr.crc) { + dev_err(CARD_TO_DEV(card), + "Config corruption detected!\n"); + dev_info(CARD_TO_DEV(card), + "CRC (sb x%08x is x%08x)\n", + card->config.hdr.crc, crc); + return -EIO; + } + + /* Convert the data to CPU byteorder */ + config_data_le_to_cpu(&card->config); + + } else if (card->config.hdr.version != 0) { + dev_err(CARD_TO_DEV(card), + "Invalid config version %d.\n", + card->config.hdr.version); + /* + * Config version changes require special handling from the + * user + */ + return -EINVAL; + } else { + dev_info(CARD_TO_DEV(card), + "Initializing card configuration.\n"); + initialize_config(card); + st = rsxx_save_config(card); + if (st) + return st; + } + + card->config_valid = 1; + + dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", + card->config.hdr.version); + dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", + card->config.hdr.crc); + dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", + card->config.data.block_size); + dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", + card->config.data.stripe_size); + dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", + card->config.data.vendor_id); + dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", + card->config.data.cache_order); + dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", + card->config.data.intr_coal.mode); + dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", + card->config.data.intr_coal.count); + dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", + card->config.data.intr_coal.latency); + + return 0; +} + diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c new file mode 100644 index 0000000..e516248 --- /dev/null +++ b/drivers/block/rsxx/core.c @@ -0,0 +1,649 @@ +/* +* Filename: core.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/bitops.h> + +#include <linux/genhd.h> +#include <linux/idr.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +#define NO_LEGACY 0 + +MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); +MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static unsigned int force_legacy = NO_LEGACY; +module_param(force_legacy, uint, 0444); +MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); + +static DEFINE_IDA(rsxx_disk_ida); +static DEFINE_SPINLOCK(rsxx_ida_lock); + +/*----------------- Interrupt Control & Handling -------------------*/ +static void __enable_intr(unsigned int *mask, unsigned int intr) +{ + *mask |= intr; +} + +static void __disable_intr(unsigned int *mask, unsigned int intr) +{ + *mask &= ~intr; +} + +/* + * NOTE: Disabling the IER will disable the hardware interrupt. + * Disabling the ISR will disable the software handling of the ISR bit. + * + * Enable/Disable interrupt functions assume the card->irq_lock + * is held by the caller. + */ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->isr_mask, intr); + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + __disable_intr(&card->isr_mask, intr); + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +static irqreturn_t rsxx_isr(int irq, void *pdata) +{ + struct rsxx_cardinfo *card = pdata; + unsigned int isr; + int handled = 0; + int reread_isr; + int i; + + spin_lock(&card->irq_lock); + + do { + reread_isr = 0; + + isr = ioread32(card->regmap + ISR); + if (isr == 0xffffffff) { + /* + * A few systems seem to have an intermittent issue + * where PCI reads return all Fs, but retrying the read + * a little later will return as expected. + */ + dev_info(CARD_TO_DEV(card), + "ISR = 0xFFFFFFFF, retrying later\n"); + break; + } + + isr &= card->isr_mask; + if (!isr) + break; + + for (i = 0; i < card->n_targets; i++) { + if (isr & CR_INTR_DMA(i)) { + if (card->ier_mask & CR_INTR_DMA(i)) { + rsxx_disable_ier(card, CR_INTR_DMA(i)); + reread_isr = 1; + } + queue_work(card->ctrl[i].done_wq, + &card->ctrl[i].dma_done_work); + handled++; + } + } + + if (isr & CR_INTR_CREG) { + schedule_work(&card->creg_ctrl.done_work); + handled++; + } + + if (isr & CR_INTR_EVENT) { + schedule_work(&card->event_work); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + handled++; + } + } while (reread_isr); + + spin_unlock(&card->irq_lock); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +/*----------------- Card Event Handler -------------------*/ +static char *rsxx_card_state_to_str(unsigned int state) +{ + static char *state_strings[] = { + "Unknown", "Shutdown", "Starting", "Formatting", + "Uninitialized", "Good", "Shutting Down", + "Fault", "Read Only Fault", "dStroying" + }; + + return state_strings[ffs(state)]; +} + +static void card_state_change(struct rsxx_cardinfo *card, + unsigned int new_state) +{ + int st; + + dev_info(CARD_TO_DEV(card), + "card state change detected.(%s -> %s)\n", + rsxx_card_state_to_str(card->state), + rsxx_card_state_to_str(new_state)); + + card->state = new_state; + + /* Don't attach DMA interfaces if the card has an invalid config */ + if (!card->config_valid) + return; + + switch (new_state) { + case CARD_STATE_RD_ONLY_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware has entered read-only mode!\n"); + /* + * Fall through so the DMA devices can be attached and + * the user can attempt to pull off their data. + */ + case CARD_STATE_GOOD: + st = rsxx_get_card_size8(card, &card->size8); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed attaching DMA devices\n"); + + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + break; + + case CARD_STATE_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware Fault reported!\n"); + /* Fall through. */ + + /* Everything else, detach DMA interface if it's attached. */ + case CARD_STATE_SHUTDOWN: + case CARD_STATE_STARTING: + case CARD_STATE_FORMATTING: + case CARD_STATE_UNINITIALIZED: + case CARD_STATE_SHUTTING_DOWN: + /* + * dStroy is a term coined by marketing to represent the low level + * secure erase. + */ + case CARD_STATE_DSTROYING: + set_capacity(card->gendisk, 0); + break; + } +} + +static void card_event_handler(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + unsigned int state; + unsigned long flags; + int st; + + card = container_of(work, struct rsxx_cardinfo, event_work); + + if (unlikely(card->halt)) + return; + + /* + * Enable the interrupt now to avoid any weird race conditions where a + * state change might occur while rsxx_get_card_state() is + * processing a returned creg cmd. + */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + st = rsxx_get_card_state(card, &state); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed reading state after event.\n"); + return; + } + + if (card->state != state) + card_state_change(card, state); + + if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +/*----------------- Card Operations -------------------*/ +static int card_shutdown(struct rsxx_cardinfo *card) +{ + unsigned int state; + signed long start; + const int timeout = msecs_to_jiffies(120000); + int st; + + /* We can't issue a shutdown if the card is in a transition state */ + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state == CARD_STATE_STARTING && + (jiffies - start < timeout)); + + if (state == CARD_STATE_STARTING) + return -ETIMEDOUT; + + /* Only issue a shutdown if we need to */ + if ((state != CARD_STATE_SHUTTING_DOWN) && + (state != CARD_STATE_SHUTDOWN)) { + st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); + if (st) + return st; + } + + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state != CARD_STATE_SHUTDOWN && + (jiffies - start < timeout)); + + if (state != CARD_STATE_SHUTDOWN) + return -ETIMEDOUT; + + return 0; +} + +/*----------------- Driver Initialization & Setup -------------------*/ +/* Returns: 0 if the driver is compatible with the device + -1 if the driver is NOT compatible with the device */ +static int rsxx_compatibility_check(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + if (pci_rev > RS70_PCI_REV_SUPPORTED) + return -1; + return 0; +} + +static int rsxx_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct rsxx_cardinfo *card; + int st; + + dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); + + card = kzalloc(sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + + card->dev = dev; + pci_set_drvdata(dev, card); + + do { + if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) { + st = -ENOMEM; + goto failed_ida_get; + } + + spin_lock(&rsxx_ida_lock); + st = ida_get_new(&rsxx_disk_ida, &card->disk_id); + spin_unlock(&rsxx_ida_lock); + } while (st == -EAGAIN); + + if (st) + goto failed_ida_get; + + st = pci_enable_device(dev); + if (st) + goto failed_enable; + + pci_set_master(dev); + pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); + + st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (st) { + dev_err(CARD_TO_DEV(card), + "No usable DMA configuration,aborting\n"); + goto failed_dma_mask; + } + + st = pci_request_regions(dev, DRIVER_NAME); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed to request memory region\n"); + goto failed_request_regions; + } + + if (pci_resource_len(dev, 0) == 0) { + dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); + st = -ENOMEM; + goto failed_iomap; + } + + card->regmap = pci_iomap(dev, 0, 0); + if (!card->regmap) { + dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); + st = -ENOMEM; + goto failed_iomap; + } + + spin_lock_init(&card->irq_lock); + card->halt = 0; + + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + + if (!force_legacy) { + st = pci_enable_msi(dev); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed to enable MSI\n"); + } + + st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + DRIVER_NAME, card); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed requesting IRQ%d\n", dev->irq); + goto failed_irq; + } + + /************* Setup Processor Command Interface *************/ + rsxx_creg_setup(card); + + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG); + spin_unlock_irq(&card->irq_lock); + + st = rsxx_compatibility_check(card); + if (st) { + dev_warn(CARD_TO_DEV(card), + "Incompatible driver detected. Please update the driver.\n"); + st = -EINVAL; + goto failed_compatiblity_check; + } + + /************* Load Card Config *************/ + st = rsxx_load_config(card); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed loading card config\n"); + + /************* Setup DMA Engine *************/ + st = rsxx_get_num_targets(card, &card->n_targets); + if (st) + dev_info(CARD_TO_DEV(card), + "Failed reading the number of DMA targets\n"); + + card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL); + if (!card->ctrl) { + st = -ENOMEM; + goto failed_dma_setup; + } + + st = rsxx_dma_setup(card); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed to setup DMA engine\n"); + goto failed_dma_setup; + } + + /************* Setup Card Event Handler *************/ + INIT_WORK(&card->event_work, card_event_handler); + + st = rsxx_setup_dev(card); + if (st) + goto failed_create_dev; + + rsxx_get_card_state(card, &card->state); + + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + + /* + * Now that the DMA Engine and devices have been setup, + * we can enable the event interrupt(it kicks off actions in + * those layers so we couldn't enable it right away.) + */ + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irq(&card->irq_lock); + + if (card->state == CARD_STATE_SHUTDOWN) { + st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); + if (st) + dev_crit(CARD_TO_DEV(card), + "Failed issuing card startup\n"); + } else if (card->state == CARD_STATE_GOOD || + card->state == CARD_STATE_RD_ONLY_FAULT) { + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + + rsxx_attach_dev(card); + + return 0; + +failed_create_dev: + rsxx_dma_destroy(card); +failed_dma_setup: +failed_compatiblity_check: + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + free_irq(dev->irq, card); + if (!force_legacy) + pci_disable_msi(dev); +failed_irq: + pci_iounmap(dev, card->regmap); +failed_iomap: + pci_release_regions(dev); +failed_request_regions: +failed_dma_mask: + pci_disable_device(dev); +failed_enable: + spin_lock(&rsxx_ida_lock); + ida_remove(&rsxx_disk_ida, card->disk_id); + spin_unlock(&rsxx_ida_lock); +failed_ida_get: + kfree(card); + + return st; +} + +static void rsxx_pci_remove(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int st; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), + "Removing PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + st = card_shutdown(card); + if (st) + dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); + + /* Sync outstanding event handlers. */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + + cancel_work_sync(&card->event_work); + + rsxx_destroy_dev(card); + rsxx_dma_destroy(card); + + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irqrestore(&card->irq_lock, flags); + free_irq(dev->irq, card); + + if (!force_legacy) + pci_disable_msi(dev); + + rsxx_creg_destroy(card); + + pci_iounmap(dev, card->regmap); + + pci_disable_device(dev); + pci_release_regions(dev); + + kfree(card); +} + +static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + /* We don't support suspend at this time. */ + return -ENOSYS; +} + +static void rsxx_pci_shutdown(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + card_shutdown(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {0,}, +}; + +MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); + +static struct pci_driver rsxx_pci_driver = { + .name = DRIVER_NAME, + .id_table = rsxx_pci_ids, + .probe = rsxx_pci_probe, + .remove = rsxx_pci_remove, + .suspend = rsxx_pci_suspend, + .shutdown = rsxx_pci_shutdown, +}; + +static int __init rsxx_core_init(void) +{ + int st; + + st = rsxx_dev_init(); + if (st) + return st; + + st = rsxx_dma_init(); + if (st) + goto dma_init_failed; + + st = rsxx_creg_init(); + if (st) + goto creg_init_failed; + + return pci_register_driver(&rsxx_pci_driver); + +creg_init_failed: + rsxx_dma_cleanup(); +dma_init_failed: + rsxx_dev_cleanup(); + + return st; +} + +static void __exit rsxx_core_cleanup(void) +{ + pci_unregister_driver(&rsxx_pci_driver); + rsxx_creg_cleanup(); + rsxx_dma_cleanup(); + rsxx_dev_cleanup(); +} + +module_init(rsxx_core_init); +module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c new file mode 100644 index 0000000..80bbe63 --- /dev/null +++ b/drivers/block/rsxx/cregs.c @@ -0,0 +1,758 @@ +/* +* Filename: cregs.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/completion.h> +#include <linux/slab.h> + +#include "rsxx_priv.h" + +#define CREG_TIMEOUT_MSEC 10000 + +typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st); + +struct creg_cmd { + struct list_head list; + creg_cmd_cb cb; + void *cb_private; + unsigned int op; + unsigned int addr; + int cnt8; + void *buf; + unsigned int stream; + unsigned int status; +}; + +static struct kmem_cache *creg_cmd_pool; + + +/*------------ Private Functions --------------*/ + +#if defined(__LITTLE_ENDIAN) +#define LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN) +#define LITTLE_ENDIAN 0 +#else +#error Unknown endianess!!! Aborting... +#endif + +static void copy_to_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + iowrite32be(data[i], card->regmap + CREG_DATA(i)); + else + iowrite32(data[i], card->regmap + CREG_DATA(i)); + } +} + + +static void copy_from_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + data[i] = ioread32be(card->regmap + CREG_DATA(i)); + else + data[i] = ioread32(card->regmap + CREG_DATA(i)); + } +} + +static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + + /* + * Spin lock is needed because this can be called in atomic/interrupt + * context. + */ + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + + return cmd; +} + +static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) +{ + iowrite32(cmd->addr, card->regmap + CREG_ADD); + iowrite32(cmd->cnt8, card->regmap + CREG_CNT); + + if (cmd->op == CREG_OP_WRITE) { + if (cmd->buf) + copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + } + + /* + * Data copy must complete before initiating the command. This is + * needed for weakly ordered processors (i.e. PowerPC), so that all + * neccessary registers are written before we kick the hardware. + */ + wmb(); + + /* Setting the valid bit will kick off the command. */ + iowrite32(cmd->op, card->regmap + CREG_CMD); +} + +static void creg_kick_queue(struct rsxx_cardinfo *card) +{ + if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) + return; + + card->creg_ctrl.active = 1; + card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, + struct creg_cmd, list); + list_del(&card->creg_ctrl.active_cmd->list); + card->creg_ctrl.q_depth--; + + /* + * We have to set the timer before we push the new command. Otherwise, + * we could create a race condition that would occur if the timer + * was not canceled, and expired after the new command was pushed, + * but before the command was issued to hardware. + */ + mod_timer(&card->creg_ctrl.cmd_timer, + jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); + + creg_issue_cmd(card, card->creg_ctrl.active_cmd); +} + +static int creg_queue_cmd(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + creg_cmd_cb callback, + void *cb_private) +{ + struct creg_cmd *cmd; + + /* Don't queue stuff up if we're halted. */ + if (unlikely(card->halt)) + return -EINVAL; + + if (card->creg_ctrl.reset) + return -EAGAIN; + + if (cnt8 > MAX_CREG_DATA8) + return -EINVAL; + + cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + INIT_LIST_HEAD(&cmd->list); + + cmd->op = op; + cmd->addr = addr; + cmd->cnt8 = cnt8; + cmd->buf = buf; + cmd->stream = stream; + cmd->cb = callback; + cmd->cb_private = cb_private; + cmd->status = 0; + + spin_lock(&card->creg_ctrl.lock); + list_add_tail(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); + + return 0; +} + +static void creg_cmd_timed_out(unsigned long data) +{ + struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; + struct creg_cmd *cmd; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + card->creg_ctrl.creg_stats.creg_timeout++; + dev_warn(CARD_TO_DEV(card), + "No active command associated with timeout!\n"); + return; + } + + if (cmd->cb) + cmd->cb(card, cmd, -ETIMEDOUT); + + kmem_cache_free(creg_cmd_pool, cmd); + + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + + +static void creg_cmd_done(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + struct creg_cmd *cmd; + int st = 0; + + card = container_of(work, struct rsxx_cardinfo, + creg_ctrl.done_work); + + /* + * The timer could not be cancelled for some reason, + * race to pop the active command. + */ + if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) + card->creg_ctrl.creg_stats.failed_cancel_timer++; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + dev_err(CARD_TO_DEV(card), + "Spurious creg interrupt!\n"); + return; + } + + card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); + cmd->status = card->creg_ctrl.creg_stats.stat; + if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { + dev_err(CARD_TO_DEV(card), + "Invalid status on creg command\n"); + /* + * At this point we're probably reading garbage from HW. Don't + * do anything else that could mess up the system and let + * the sync function return an error. + */ + st = -EIO; + goto creg_done; + } else if (cmd->status & CREG_STAT_ERROR) { + st = -EIO; + } + + if ((cmd->op == CREG_OP_READ)) { + unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); + + /* Paranoid Sanity Checks */ + if (!cmd->buf) { + dev_err(CARD_TO_DEV(card), + "Buffer not given for read.\n"); + st = -EIO; + goto creg_done; + } + if (cnt8 != cmd->cnt8) { + dev_err(CARD_TO_DEV(card), + "count mismatch\n"); + st = -EIO; + goto creg_done; + } + + copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + } + +creg_done: + if (cmd->cb) + cmd->cb(card, cmd, st); + + kmem_cache_free(creg_cmd_pool, cmd); + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + +static void creg_reset(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + struct creg_cmd *tmp; + unsigned long flags; + + /* + * mutex_trylock is used here because if reset_lock is taken then a + * reset is already happening. So, we can just go ahead and return. + */ + if (!mutex_trylock(&card->creg_ctrl.reset_lock)) + return; + + card->creg_ctrl.reset = 1; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + dev_warn(CARD_TO_DEV(card), + "Resetting creg interface for recovery\n"); + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + card->creg_ctrl.q_depth--; + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + } + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + + card->creg_ctrl.active = 0; + } + spin_unlock(&card->creg_ctrl.lock); + + card->creg_ctrl.reset = 0; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + mutex_unlock(&card->creg_ctrl.reset_lock); +} + +/* Used for synchronous accesses */ +struct creg_completion { + struct completion *cmd_done; + int st; + u32 creg_status; +}; + +static void creg_cmd_done_cb(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + struct creg_completion *cmd_completion; + + cmd_completion = cmd->cb_private; + BUG_ON(!cmd_completion); + + cmd_completion->st = st; + cmd_completion->creg_status = cmd->status; + complete(cmd_completion->cmd_done); +} + +static int __issue_creg_rw(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + unsigned int *hw_stat) +{ + DECLARE_COMPLETION_ONSTACK(cmd_done); + struct creg_completion completion; + unsigned long timeout; + int st; + + completion.cmd_done = &cmd_done; + completion.st = 0; + completion.creg_status = 0; + + st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, + &completion); + if (st) + return st; + + /* + * This timeout is neccessary for unresponsive hardware. The additional + * 20 seconds to used to guarantee that each cregs requests has time to + * complete. + */ + timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth) + 20000); + + /* + * The creg interface is guaranteed to complete. It has a timeout + * mechanism that will kick in if hardware does not respond. + */ + st = wait_for_completion_timeout(completion.cmd_done, timeout); + if (st == 0) { + /* + * This is really bad, because the kernel timer did not + * expire and notify us of a timeout! + */ + dev_crit(CARD_TO_DEV(card), + "cregs timer failed\n"); + creg_reset(card); + return -EIO; + } + + *hw_stat = completion.creg_status; + + if (completion.st) { + dev_warn(CARD_TO_DEV(card), + "creg command failed(%d x%08x)\n", + completion.st, addr); + return completion.st; + } + + return 0; +} + +static int issue_creg_rw(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int stream, + int read) +{ + unsigned int hw_stat; + unsigned int xfer; + unsigned int op; + int st; + + op = read ? CREG_OP_READ : CREG_OP_WRITE; + + do { + xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); + + st = __issue_creg_rw(card, op, addr, xfer, + data, stream, &hw_stat); + if (st) + return st; + + data = (char *)data + xfer; + addr += xfer; + size8 -= xfer; + } while (size8); + + return 0; +} + +/* ---------------------------- Public API ---------------------------------- */ +int rsxx_creg_write(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 0); +} + +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 1); +} + +int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) +{ + return rsxx_creg_read(card, CREG_ADD_CARD_STATE, + sizeof(*state), state, 0); +} + +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) +{ + unsigned int size; + int st; + + st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, + sizeof(size), &size, 0); + if (st) + return st; + + *size8 = (u64)size * RSXX_HW_BLK_SIZE; + return 0; +} + +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets) +{ + return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, + sizeof(*n_targets), n_targets, 0); +} + +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities) +{ + return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, + sizeof(*capabilities), capabilities, 0); +} + +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) +{ + return rsxx_creg_write(card, CREG_ADD_CARD_CMD, + sizeof(cmd), &cmd, 0); +} + + +/*----------------- HW Log Functions -------------------*/ +static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) +{ + static char level; + + /* + * New messages start with "<#>", where # is the log level. Messages + * that extend past the log buffer will use the previous level + */ + if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { + level = str[1]; + str += 3; /* Skip past the log level. */ + len -= 3; + } + + switch (level) { + case '0': + dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '1': + dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '2': + dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '3': + dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '4': + dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '5': + dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '6': + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '7': + dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + default: + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + } +} + +/* + * The substrncpy function copies the src string (which includes the + * terminating '\0' character), up to the count into the dest pointer. + * Returns the number of bytes copied to dest. + */ +static int substrncpy(char *dest, const char *src, int count) +{ + int max_cnt = count; + + while (count) { + count--; + *dest = *src; + if (*dest == '\0') + break; + src++; + dest++; + } + return max_cnt - count; +} + + +static void read_hw_log_done(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + char *buf; + char *log_str; + int cnt; + int len; + int off; + + buf = cmd->buf; + off = 0; + + /* Failed getting the log message */ + if (st) + return; + + while (off < cmd->cnt8) { + log_str = &card->log.buf[card->log.buf_len]; + cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); + len = substrncpy(log_str, &buf[off], cnt); + + off += len; + card->log.buf_len += len; + + /* + * Flush the log if we've hit the end of a message or if we've + * run out of buffer space. + */ + if ((log_str[len - 1] == '\0') || + (card->log.buf_len == LOG_BUF_SIZE8)) { + if (card->log.buf_len != 1) /* Don't log blank lines. */ + hw_log_msg(card, card->log.buf, + card->log.buf_len); + card->log.buf_len = 0; + } + + } + + if (cmd->status & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +int rsxx_read_hw_log(struct rsxx_cardinfo *card) +{ + int st; + + st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, + sizeof(card->log.tmp), card->log.tmp, + 1, read_hw_log_done, NULL); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed getting log text\n"); + + return st; +} + +/*-------------- IOCTL REG Access ------------------*/ +static int issue_reg_cmd(struct rsxx_cardinfo *card, + struct rsxx_reg_access *cmd, + int read) +{ + unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; + + return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, + cmd->stream, &cmd->stat); +} + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read) +{ + struct rsxx_reg_access cmd; + int st; + + st = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (st) + return -EFAULT; + + if (cmd.cnt > RSXX_MAX_REG_CNT) + return -EFAULT; + + st = issue_reg_cmd(card, &cmd, read); + if (st) + return st; + + st = put_user(cmd.stat, &ucmd->stat); + if (st) + return -EFAULT; + + if (read) { + st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); + if (st) + return -EFAULT; + } + + return 0; +} + +/*------------ Initialization & Setup --------------*/ +int rsxx_creg_setup(struct rsxx_cardinfo *card) +{ + card->creg_ctrl.active_cmd = NULL; + + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); + mutex_init(&card->creg_ctrl.reset_lock); + INIT_LIST_HEAD(&card->creg_ctrl.queue); + spin_lock_init(&card->creg_ctrl.lock); + setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, + (unsigned long) card); + + return 0; +} + +void rsxx_creg_destroy(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + struct creg_cmd *tmp; + int cnt = 0; + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + cnt++; + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Canceled %d queue creg commands\n", cnt); + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + dev_info(CARD_TO_DEV(card), + "Canceled active creg command\n"); + kmem_cache_free(creg_cmd_pool, cmd); + } + spin_unlock(&card->creg_ctrl.lock); + + cancel_work_sync(&card->creg_ctrl.done_work); +} + + +int rsxx_creg_init(void) +{ + creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); + if (!creg_cmd_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_creg_cleanup(void) +{ + kmem_cache_destroy(creg_cmd_pool); +} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c new file mode 100644 index 0000000..4346d17 --- /dev/null +++ b/drivers/block/rsxx/dev.c @@ -0,0 +1,367 @@ +/* +* Filename: dev.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include <linux/fs.h> + +#include "rsxx_priv.h" + +static unsigned int blkdev_minors = 64; +module_param(blkdev_minors, uint, 0444); +MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); + +/* + * For now I'm making this tweakable in case any applications hit this limit. + * If you see a "bio too big" error in the log you will need to raise this + * value. + */ +static unsigned int blkdev_max_hw_sectors = 1024; +module_param(blkdev_max_hw_sectors, uint, 0444); +MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); + +static unsigned int enable_blkdev = 1; +module_param(enable_blkdev , uint, 0444); +MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); + + +struct rsxx_bio_meta { + struct bio *bio; + atomic_t pending_dmas; + atomic_t error; + unsigned long start_time; +}; + +static struct kmem_cache *bio_meta_pool; + +/*----------------- Block Device Operations -----------------*/ +static int rsxx_blkdev_ioctl(struct block_device *bdev, + fmode_t mode, + unsigned int cmd, + unsigned long arg) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + + switch (cmd) { + case RSXX_GETREG: + return rsxx_reg_access(card, (void __user *)arg, 1); + case RSXX_SETREG: + return rsxx_reg_access(card, (void __user *)arg, 0); + } + + return -ENOTTY; +} + +static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + u64 blocks = card->size8 >> 9; + + /* + * get geometry: Fake it. I haven't found any drivers that set + * geo->start, so we won't either. + */ + if (card->size8) { + geo->heads = 64; + geo->sectors = 16; + do_div(blocks, (geo->heads * geo->sectors)); + geo->cylinders = blocks; + } else { + geo->heads = 0; + geo->sectors = 0; + geo->cylinders = 0; + } + return 0; +} + +static const struct block_device_operations rsxx_fops = { + .owner = THIS_MODULE, + .getgeo = rsxx_getgeo, + .ioctl = rsxx_blkdev_ioctl, +}; + +static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) +{ + struct hd_struct *part0 = &card->gendisk->part0; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_round_stats(cpu, part0); + part_inc_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void disk_stats_complete(struct rsxx_cardinfo *card, + struct bio *bio, + unsigned long start_time) +{ + struct hd_struct *part0 = &card->gendisk->part0; + unsigned long duration = jiffies - start_time; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); + part_stat_inc(cpu, part0, ios[rw]); + part_stat_add(cpu, part0, ticks[rw], duration); + + part_round_stats(cpu, part0); + part_dec_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void bio_dma_done_cb(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int error) +{ + struct rsxx_bio_meta *meta = cb_data; + + if (error) + atomic_set(&meta->error, 1); + + if (atomic_dec_and_test(&meta->pending_dmas)) { + disk_stats_complete(card, meta->bio, meta->start_time); + + bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + kmem_cache_free(bio_meta_pool, meta); + } +} + +static void rsxx_make_request(struct request_queue *q, struct bio *bio) +{ + struct rsxx_cardinfo *card = q->queuedata; + struct rsxx_bio_meta *bio_meta; + int st = -EINVAL; + + might_sleep(); + + if (unlikely(card->halt)) { + st = -EFAULT; + goto req_err; + } + + if (unlikely(card->dma_fault)) { + st = (-EFAULT); + goto req_err; + } + + if (bio->bi_size == 0) { + dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); + goto req_err; + } + + bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); + if (!bio_meta) { + st = -ENOMEM; + goto req_err; + } + + bio_meta->bio = bio; + atomic_set(&bio_meta->error, 0); + atomic_set(&bio_meta->pending_dmas, 0); + bio_meta->start_time = jiffies; + + disk_stats_start(card, bio); + + dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", + bio_data_dir(bio) ? 'W' : 'R', bio_meta, + (u64)bio->bi_sector << 9, bio->bi_size); + + st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, + bio_dma_done_cb, bio_meta); + if (st) + goto queue_err; + + return; + +queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +req_err: + bio_endio(bio, st); +} + +/*----------------- Device Setup -------------------*/ +static bool rsxx_discard_supported(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + return (pci_rev >= RSXX_DISCARD_SUPPORT); +} + +static unsigned short rsxx_get_logical_block_size( + struct rsxx_cardinfo *card) +{ + u32 capabilities = 0; + int st; + + st = rsxx_get_card_capabilities(card, &capabilities); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed reading card capabilities register\n"); + + /* Earlier firmware did not have support for 512 byte accesses */ + if (capabilities & CARD_CAP_SUBPAGE_WRITES) + return 512; + else + return RSXX_HW_BLK_SIZE; +} + +int rsxx_attach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ + if (enable_blkdev) { + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); + add_disk(card->gendisk); + + card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + + return 0; +} + +void rsxx_detach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + if (card->bdev_attached) { + del_gendisk(card->gendisk); + card->bdev_attached = 0; + } + + mutex_unlock(&card->dev_lock); +} + +int rsxx_setup_dev(struct rsxx_cardinfo *card) +{ + unsigned short blk_size; + + mutex_init(&card->dev_lock); + + if (!enable_blkdev) + return 0; + + card->major = register_blkdev(0, DRIVER_NAME); + if (card->major < 0) { + dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); + return -ENOMEM; + } + + card->queue = blk_alloc_queue(GFP_KERNEL); + if (!card->queue) { + dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + card->gendisk = alloc_disk(blkdev_minors); + if (!card->gendisk) { + dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + blk_size = rsxx_get_logical_block_size(card); + + blk_queue_make_request(card->queue, rsxx_make_request); + blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); + blk_queue_dma_alignment(card->queue, blk_size - 1); + blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); + blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + if (rsxx_discard_supported(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_max_discard_sectors(card->queue, + RSXX_HW_BLK_SIZE >> 9); + card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_zeroes_data = 1; + } + + card->queue->queuedata = card; + + snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), + "rsxx%d", card->disk_id); + card->gendisk->driverfs_dev = &card->dev->dev; + card->gendisk->major = card->major; + card->gendisk->first_minor = 0; + card->gendisk->fops = &rsxx_fops; + card->gendisk->private_data = card; + card->gendisk->queue = card->queue; + + return 0; +} + +void rsxx_destroy_dev(struct rsxx_cardinfo *card) +{ + if (!enable_blkdev) + return; + + put_disk(card->gendisk); + card->gendisk = NULL; + + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); +} + +int rsxx_dev_init(void) +{ + bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); + if (!bio_meta_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_dev_cleanup(void) +{ + kmem_cache_destroy(bio_meta_pool); +} + + diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c new file mode 100644 index 0000000..63176e6 --- /dev/null +++ b/drivers/block/rsxx/dma.c @@ -0,0 +1,998 @@ +/* +* Filename: dma.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/slab.h> +#include "rsxx_priv.h" + +struct rsxx_dma { + struct list_head list; + u8 cmd; + unsigned int laddr; /* Logical address on the ramsan */ + struct { + u32 off; + u32 cnt; + } sub_page; + dma_addr_t dma_addr; + struct page *page; + unsigned int pg_off; /* Page Offset */ + rsxx_dma_cb cb; + void *cb_data; +}; + +/* This timeout is used to detect a stalled DMA channel */ +#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) + +struct hw_status { + u8 status; + u8 tag; + __le16 count; + __le32 _rsvd2; + __le64 _rsvd3; +} __packed; + +enum rsxx_dma_status { + DMA_SW_ERR = 0x1, + DMA_HW_FAULT = 0x2, + DMA_CANCELLED = 0x4, +}; + +struct hw_cmd { + u8 command; + u8 tag; + u8 _rsvd; + u8 sub_page; /* Bit[0:2]: 512byte offset */ + /* Bit[4:6]: 512byte count */ + __le32 device_addr; + __le64 host_addr; +} __packed; + +enum rsxx_hw_cmd { + HW_CMD_BLK_DISCARD = 0x70, + HW_CMD_BLK_WRITE = 0x80, + HW_CMD_BLK_READ = 0xC0, + HW_CMD_BLK_RECON_READ = 0xE0, +}; + +enum rsxx_hw_status { + HW_STATUS_CRC = 0x01, + HW_STATUS_HARD_ERR = 0x02, + HW_STATUS_SOFT_ERR = 0x04, + HW_STATUS_FAULT = 0x08, +}; + +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + +static struct kmem_cache *rsxx_dma_pool; + +struct dma_tracker { + int next_tag; + struct rsxx_dma *dma; +}; + +#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ + (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) + +struct dma_tracker_list { + spinlock_t lock; + int head; + struct dma_tracker list[0]; +}; + + +/*----------------- Misc Utility Functions -------------------*/ +static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) +{ + unsigned long long tgt_addr8; + + tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & + card->_stripe.upper_mask) | + ((addr8) & card->_stripe.lower_mask); + do_div(tgt_addr8, RSXX_HW_BLK_SIZE); + return tgt_addr8; +} + +static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) +{ + unsigned int tgt; + + tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; + + return tgt; +} + +static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +{ + /* Reset all DMA Command/Status Queues */ + iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); +} + +static unsigned int get_dma_size(struct rsxx_dma *dma) +{ + if (dma->sub_page.cnt) + return dma->sub_page.cnt << 9; + else + return RSXX_HW_BLK_SIZE; +} + + +/*----------------- DMA Tracker -------------------*/ +static void set_tracker_dma(struct dma_tracker_list *trackers, + int tag, + struct rsxx_dma *dma) +{ + trackers->list[tag].dma = dma; +} + +static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, + int tag) +{ + return trackers->list[tag].dma; +} + +static int pop_tracker(struct dma_tracker_list *trackers) +{ + int tag; + + spin_lock(&trackers->lock); + tag = trackers->head; + if (tag != -1) { + trackers->head = trackers->list[tag].next_tag; + trackers->list[tag].next_tag = -1; + } + spin_unlock(&trackers->lock); + + return tag; +} + +static void push_tracker(struct dma_tracker_list *trackers, int tag) +{ + spin_lock(&trackers->lock); + trackers->list[tag].next_tag = trackers->head; + trackers->head = tag; + trackers->list[tag].dma = NULL; + spin_unlock(&trackers->lock); +} + + +/*----------------- Interrupt Coalescing -------------*/ +/* + * Interrupt Coalescing Register Format: + * Interrupt Timer (64ns units) [15:0] + * Interrupt Count [24:16] + * Reserved [31:25] +*/ +#define INTR_COAL_LATENCY_MASK (0x0000ffff) + +#define INTR_COAL_COUNT_SHIFT 16 +#define INTR_COAL_COUNT_BITS 9 +#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ + INTR_COAL_COUNT_SHIFT) +#define INTR_COAL_LATENCY_UNITS_NS 64 + + +static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) +{ + u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; + + if (mode == RSXX_INTR_COAL_DISABLED) + return 0; + + return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | + (latency_units & INTR_COAL_LATENCY_MASK); + +} + +static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) +{ + int i; + u32 q_depth = 0; + u32 intr_coal; + + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + return; + + for (i = 0; i < card->n_targets; i++) + q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + q_depth / 2, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); +} + +/*----------------- RSXX DMA Handling -------------------*/ +static void rsxx_complete_dma(struct rsxx_cardinfo *card, + struct rsxx_dma *dma, + unsigned int status) +{ + if (status & DMA_SW_ERR) + printk_ratelimited(KERN_ERR + "SW Error in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_HW_FAULT) + printk_ratelimited(KERN_ERR + "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_CANCELLED) + printk_ratelimited(KERN_ERR + "DMA Cancelled(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + + if (dma->cb) + dma->cb(card, dma->cb_data, status ? 1 : 0); + + kmem_cache_free(rsxx_dma_pool, dma); +} + +static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma) +{ + /* + * Requeued DMAs go to the front of the queue so they are issued + * first. + */ + spin_lock(&ctrl->queue_lock); + list_add(&dma->list, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); +} + +static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma, + u8 hw_st) +{ + unsigned int status = 0; + int requeue_cmd = 0; + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", + dma->cmd, dma->laddr, hw_st); + + if (hw_st & HW_STATUS_CRC) + ctrl->stats.crc_errors++; + if (hw_st & HW_STATUS_HARD_ERR) + ctrl->stats.hard_errors++; + if (hw_st & HW_STATUS_SOFT_ERR) + ctrl->stats.soft_errors++; + + switch (dma->cmd) { + case HW_CMD_BLK_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + if (ctrl->card->scrub_hard) { + dma->cmd = HW_CMD_BLK_RECON_READ; + requeue_cmd = 1; + ctrl->stats.reads_retried++; + } else { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + } else if (hw_st & HW_STATUS_FAULT) { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_RECON_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + /* Data could not be reconstructed. */ + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_WRITE: + status |= DMA_HW_FAULT; + ctrl->stats.writes_failed++; + + break; + case HW_CMD_BLK_DISCARD: + status |= DMA_HW_FAULT; + ctrl->stats.discards_failed++; + + break; + default: + dev_err(CARD_TO_DEV(ctrl->card), + "Unknown command in DMA!(cmd: x%02x " + "laddr x%08x st: x%02x\n", + dma->cmd, dma->laddr, hw_st); + status |= DMA_SW_ERR; + + break; + } + + if (requeue_cmd) + rsxx_requeue_dma(ctrl, dma); + else + rsxx_complete_dma(ctrl->card, dma, status); +} + +static void dma_engine_stalled(unsigned long data) +{ + struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + return; + + if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { + /* + * The dma engine was stalled because the SW_CMD_IDX write + * was lost. Issue it again to recover. + */ + dev_warn(CARD_TO_DEV(ctrl->card), + "SW_CMD_IDX write was lost, re-writing...\n"); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + } else { + dev_warn(CARD_TO_DEV(ctrl->card), + "DMA channel %d has stalled, faulting interface.\n", + ctrl->id); + ctrl->card->dma_fault = 1; + } +} + +static void rsxx_issue_dmas(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int tag; + int cmds_pending = 0; + struct hw_cmd *hw_cmd_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + hw_cmd_buf = ctrl->cmd.buf; + + if (unlikely(ctrl->card->halt)) + return; + + while (1) { + spin_lock(&ctrl->queue_lock); + if (list_empty(&ctrl->queue)) { + spin_unlock(&ctrl->queue_lock); + break; + } + spin_unlock(&ctrl->queue_lock); + + tag = pop_tracker(ctrl->trackers); + if (tag == -1) + break; + + spin_lock(&ctrl->queue_lock); + dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); + list_del(&dma->list); + ctrl->stats.sw_q_depth--; + spin_unlock(&ctrl->queue_lock); + + /* + * This will catch any DMAs that slipped in right before the + * fault, but was queued after all the other DMAs were + * cancelled. + */ + if (unlikely(ctrl->card->dma_fault)) { + push_tracker(ctrl->trackers, tag); + rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + continue; + } + + set_tracker_dma(ctrl->trackers, tag, dma); + hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; + hw_cmd_buf[ctrl->cmd.idx].tag = tag; + hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; + hw_cmd_buf[ctrl->cmd.idx].sub_page = + ((dma->sub_page.cnt & 0x7) << 4) | + (dma->sub_page.off & 0x7); + + hw_cmd_buf[ctrl->cmd.idx].device_addr = + cpu_to_le32(dma->laddr); + + hw_cmd_buf[ctrl->cmd.idx].host_addr = + cpu_to_le64(dma->dma_addr); + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Issue DMA%d(laddr %d tag %d) to idx %d\n", + ctrl->id, dma->laddr, tag, ctrl->cmd.idx); + + ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; + cmds_pending++; + + if (dma->cmd == HW_CMD_BLK_WRITE) + ctrl->stats.writes_issued++; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + ctrl->stats.discards_issued++; + else + ctrl->stats.reads_issued++; + } + + /* Let HW know we've queued commands. */ + if (cmds_pending) { + /* + * We must guarantee that the CPU writes to 'ctrl->cmd.buf' + * (which is in PCI-consistent system-memory) from the loop + * above make it into the coherency domain before the + * following PIO "trigger" updating the cmd.idx. A WMB is + * sufficient. We need not explicitly CPU cache-flush since + * the memory is a PCI-consistent (ie; coherent) mapping. + */ + wmb(); + + atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + } +} + +static void rsxx_dma_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + unsigned long flags; + u16 count; + u8 status; + u8 tag; + struct hw_status *hw_st_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + hw_st_buf = ctrl->status.buf; + + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->dma_fault)) + return; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + + while (count == ctrl->e_cnt) { + /* + * The read memory-barrier is necessary to keep aggressive + * processors/optimizers (such as the PPC Apple G5) from + * reordering the following status-buffer tag & status read + * *before* the count read on subsequent iterations of the + * loop! + */ + rmb(); + + status = hw_st_buf[ctrl->status.idx].status; + tag = hw_st_buf[ctrl->status.idx].tag; + + dma = get_tracker_dma(ctrl->trackers, tag); + if (dma == NULL) { + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + dev_err(CARD_TO_DEV(ctrl->card), + "No tracker for tag %d " + "(idx %d id %d)\n", + tag, ctrl->status.idx, ctrl->id); + return; + } + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Completing DMA%d" + "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", + ctrl->id, dma->laddr, tag, status, count, + ctrl->status.idx); + + atomic_dec(&ctrl->stats.hw_q_depth); + + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + + if (status) + rsxx_handle_dma_error(ctrl, dma, status); + else + rsxx_complete_dma(ctrl->card, dma, 0); + + push_tracker(ctrl->trackers, tag); + + ctrl->status.idx = (ctrl->status.idx + 1) & + RSXX_CS_IDX_MASK; + ctrl->e_cnt++; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + } + + dma_intr_coal_auto_tune(ctrl->card); + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + del_timer_sync(&ctrl->activity_timer); + + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + spin_lock(&ctrl->queue_lock); + if (ctrl->stats.sw_q_depth) + queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); + spin_unlock(&ctrl->queue_lock); +} + +static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + + return cnt; +} + +static int rsxx_queue_discard(struct rsxx_cardinfo *card, + struct list_head *q, + unsigned int laddr, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->cmd = HW_CMD_BLK_DISCARD; + dma->laddr = laddr; + dma->dma_addr = 0; + dma->sub_page.off = 0; + dma->sub_page.cnt = 0; + dma->page = NULL; + dma->pg_off = 0; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); + + list_add_tail(&dma->list, q); + + return 0; +} + +static int rsxx_queue_dma(struct rsxx_cardinfo *card, + struct list_head *q, + int dir, + unsigned int dma_off, + unsigned int dma_len, + unsigned int laddr, + struct page *page, + unsigned int pg_off, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len, + dir ? PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + + dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; + dma->laddr = laddr; + dma->sub_page.off = (dma_off >> 9); + dma->sub_page.cnt = (dma_len >> 9); + dma->page = page; + dma->pg_off = pg_off; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), + "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", + dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, + dma->sub_page.cnt, dma->page, dma->pg_off); + + /* Queue the DMA */ + list_add_tail(&dma->list, q); + + return 0; +} + +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data) +{ + struct list_head dma_list[RSXX_MAX_TARGETS]; + struct bio_vec *bvec; + unsigned long long addr8; + unsigned int laddr; + unsigned int bv_len; + unsigned int bv_off; + unsigned int dma_off; + unsigned int dma_len; + int dma_cnt[RSXX_MAX_TARGETS]; + int tgt; + int st; + int i; + + addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ + atomic_set(n_dmas, 0); + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&dma_list[i]); + dma_cnt[i] = 0; + } + + if (bio->bi_rw & REQ_DISCARD) { + bv_len = bio->bi_size; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + + st = rsxx_queue_discard(card, &dma_list[tgt], laddr, + cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += RSXX_HW_BLK_SIZE; + bv_len -= RSXX_HW_BLK_SIZE; + } + } else { + bio_for_each_segment(bvec, bio, i) { + bv_len = bvec->bv_len; + bv_off = bvec->bv_offset; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + dma_off = addr8 & RSXX_HW_BLK_MASK; + dma_len = min(bv_len, + RSXX_HW_BLK_SIZE - dma_off); + + st = rsxx_queue_dma(card, &dma_list[tgt], + bio_data_dir(bio), + dma_off, dma_len, + laddr, bvec->bv_page, + bv_off, cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += dma_len; + bv_off += dma_len; + bv_len -= dma_len; + } + } + } + + for (i = 0; i < card->n_targets; i++) { + if (!list_empty(&dma_list[i])) { + spin_lock(&card->ctrl[i].queue_lock); + card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; + list_splice_tail(&dma_list[i], &card->ctrl[i].queue); + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + } + + return 0; + +bvec_err: + for (i = 0; i < card->n_targets; i++) + rsxx_cleanup_dma_queue(card, &dma_list[i]); + + return st; +} + + +/*----------------- DMA Engine Initialization & Setup -------------------*/ +static int rsxx_dma_ctrl_init(struct pci_dev *dev, + struct rsxx_dma_ctrl *ctrl) +{ + int i; + + memset(&ctrl->stats, 0, sizeof(ctrl->stats)); + + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + if (!ctrl->trackers) + return -ENOMEM; + + ctrl->trackers->head = 0; + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + ctrl->trackers->list[i].next_tag = i + 1; + ctrl->trackers->list[i].dma = NULL; + } + ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; + spin_lock_init(&ctrl->trackers->lock); + + spin_lock_init(&ctrl->queue_lock); + INIT_LIST_HEAD(&ctrl->queue); + + setup_timer(&ctrl->activity_timer, dma_engine_stalled, + (unsigned long)ctrl); + + ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); + if (!ctrl->issue_wq) + return -ENOMEM; + + ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); + if (!ctrl->done_wq) + return -ENOMEM; + + INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); + INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + wmb(); + + return 0; +} + +static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, + unsigned int stripe_size8) +{ + if (!is_power_of_2(stripe_size8)) { + dev_err(CARD_TO_DEV(card), + "stripe_size is NOT a power of 2!\n"); + return -EINVAL; + } + + card->_stripe.lower_mask = stripe_size8 - 1; + + card->_stripe.upper_mask = ~(card->_stripe.lower_mask); + card->_stripe.upper_shift = ffs(card->n_targets) - 1; + + card->_stripe.target_mask = card->n_targets - 1; + card->_stripe.target_shift = ffs(stripe_size8) - 1; + + dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", + card->_stripe.lower_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", + card->_stripe.upper_shift); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", + card->_stripe.upper_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", + card->_stripe.target_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", + card->_stripe.target_shift); + + return 0; +} + +static int rsxx_dma_configure(struct rsxx_cardinfo *card) +{ + u32 intr_coal; + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + card->config.data.intr_coal.count, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); + + return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); +} + +int rsxx_dma_setup(struct rsxx_cardinfo *card) +{ + unsigned long flags; + int st; + int i; + + dev_info(CARD_TO_DEV(card), + "Initializing %d DMA targets\n", + card->n_targets); + + /* Regmap is divided up into 4K chunks. One for each DMA channel */ + for (i = 0; i < card->n_targets; i++) + card->ctrl[i].regmap = card->regmap + (i * 4096); + + card->dma_fault = 0; + + /* Reset the DMA queues */ + rsxx_dma_queue_reset(card); + + /************* Setup DMA Control *************/ + for (i = 0; i < card->n_targets; i++) { + st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); + if (st) + goto failed_dma_setup; + + card->ctrl[i].card = card; + card->ctrl[i].id = i; + } + + card->scrub_hard = 1; + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Enable the interrupts after all setup has completed. */ + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + return 0; + +failed_dma_setup: + for (i = 0; i < card->n_targets; i++) { + struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (ctrl->trackers) + vfree(ctrl->trackers); + + if (ctrl->status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); + if (ctrl->cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } + + return st; +} + + +void rsxx_dma_destroy(struct rsxx_cardinfo *card) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int i, j; + int cnt = 0; + + for (i = 0; i < card->n_targets; i++) { + ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (timer_pending(&ctrl->activity_timer)) + del_timer_sync(&ctrl->activity_timer); + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, i); + + /* Clean up issued DMAs */ + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(ctrl->trackers, j); + if (dma) { + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d pending DMAs on channel %d\n", + cnt, i); + + vfree(ctrl->trackers); + + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } +} + + +int rsxx_dma_init(void) +{ + rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); + if (!rsxx_dma_pool) + return -ENOMEM; + + return 0; +} + + +void rsxx_dma_cleanup(void) +{ + kmem_cache_destroy(rsxx_dma_pool); +} + diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h new file mode 100644 index 0000000..2e50b65 --- /dev/null +++ b/drivers/block/rsxx/rsxx.h @@ -0,0 +1,45 @@ +/* +* Filename: rsxx.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_H__ +#define __RSXX_H__ + +/*----------------- IOCTL Definitions -------------------*/ + +struct rsxx_reg_access { + __u32 addr; + __u32 cnt; + __u32 stat; + __u32 stream; + __u32 data[8]; +}; + +#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) + +#define RSXX_IOC_MAGIC 'r' + +#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) +#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) + +#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h new file mode 100644 index 0000000..c025fe5 --- /dev/null +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -0,0 +1,72 @@ +/* +* Filename: rsXX_cfg.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_CFG_H__ +#define __RSXX_CFG_H__ + +/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ +#include <linux/types.h> + +/* + * The card config version must match the driver's expected version. If it does + * not, the DMA interfaces will not be attached and the user will need to + * initialize/upgrade the card configuration using the card config utility. + */ +#define RSXX_CFG_VERSION 4 + +struct card_cfg_hdr { + __u32 version; + __u32 crc; +}; + +struct card_cfg_data { + __u32 block_size; + __u32 stripe_size; + __u32 vendor_id; + __u32 cache_order; + struct { + __u32 mode; /* Disabled, manual, auto-tune... */ + __u32 count; /* Number of intr to coalesce */ + __u32 latency;/* Max wait time (in ns) */ + } intr_coal; +}; + +struct rsxx_card_cfg { + struct card_cfg_hdr hdr; + struct card_cfg_data data; +}; + +/* Vendor ID Values */ +#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_DSI 1 +#define RSXX_VENDOR_COUNT 2 + +/* Interrupt Coalescing Values */ +#define RSXX_INTR_COAL_DISABLED 0 +#define RSXX_INTR_COAL_EXPLICIT 1 +#define RSXX_INTR_COAL_AUTO_TUNE 2 + + +#endif /* __RSXX_CFG_H__ */ + diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h new file mode 100644 index 0000000..a1ac907 --- /dev/null +++ b/drivers/block/rsxx/rsxx_priv.h @@ -0,0 +1,399 @@ +/* +* Filename: rsxx_priv.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_PRIV_H__ +#define __RSXX_PRIV_H__ + +#include <linux/version.h> +#include <linux/semaphore.h> + +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> +#include <linux/bio.h> +#include <linux/vmalloc.h> +#include <linux/timer.h> +#include <linux/ioctl.h> + +#include "rsxx.h" +#include "rsxx_cfg.h" + +struct proc_cmd; + +#define PCI_VENDOR_ID_TMS_IBM 0x15B6 +#define PCI_DEVICE_ID_RS70_FLASH 0x0019 +#define PCI_DEVICE_ID_RS70D_FLASH 0x001A +#define PCI_DEVICE_ID_RS80_FLASH 0x001C +#define PCI_DEVICE_ID_RS81_FLASH 0x001E + +#define RS70_PCI_REV_SUPPORTED 4 + +#define DRIVER_NAME "rsxx" +#define DRIVER_VERSION "3.7" + +/* Block size is 4096 */ +#define RSXX_HW_BLK_SHIFT 12 +#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) +#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) + +#define MAX_CREG_DATA8 32 +#define LOG_BUF_SIZE8 128 + +#define RSXX_MAX_OUTSTANDING_CMDS 255 +#define RSXX_CS_IDX_MASK 0xff + +#define RSXX_MAX_TARGETS 8 + +struct dma_tracker_list; + +/* DMA Command/Status Buffer structure */ +struct rsxx_cs_buffer { + dma_addr_t dma_addr; + void *buf; + u32 idx; +}; + +struct rsxx_dma_stats { + u32 crc_errors; + u32 hard_errors; + u32 soft_errors; + u32 writes_issued; + u32 writes_failed; + u32 reads_issued; + u32 reads_failed; + u32 reads_retried; + u32 discards_issued; + u32 discards_failed; + u32 done_rescheduled; + u32 issue_rescheduled; + u32 sw_q_depth; /* Number of DMAs on the SW queue. */ + atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ +}; + +struct rsxx_dma_ctrl { + struct rsxx_cardinfo *card; + int id; + void __iomem *regmap; + struct rsxx_cs_buffer status; + struct rsxx_cs_buffer cmd; + u16 e_cnt; + spinlock_t queue_lock; + struct list_head queue; + struct workqueue_struct *issue_wq; + struct work_struct issue_dma_work; + struct workqueue_struct *done_wq; + struct work_struct dma_done_work; + struct timer_list activity_timer; + struct dma_tracker_list *trackers; + struct rsxx_dma_stats stats; +}; + +struct rsxx_cardinfo { + struct pci_dev *dev; + unsigned int halt; + + void __iomem *regmap; + spinlock_t irq_lock; + unsigned int isr_mask; + unsigned int ier_mask; + + struct rsxx_card_cfg config; + int config_valid; + + /* Embedded CPU Communication */ + struct { + spinlock_t lock; + bool active; + struct creg_cmd *active_cmd; + struct work_struct done_work; + struct list_head queue; + unsigned int q_depth; + /* Cache the creg status to prevent ioreads */ + struct { + u32 stat; + u32 failed_cancel_timer; + u32 creg_timeout; + } creg_stats; + struct timer_list cmd_timer; + struct mutex reset_lock; + int reset; + } creg_ctrl; + + struct { + char tmp[MAX_CREG_DATA8]; + char buf[LOG_BUF_SIZE8]; /* terminated */ + int buf_len; + } log; + + struct work_struct event_work; + unsigned int state; + u64 size8; + + /* Lock the device attach/detach function */ + struct mutex dev_lock; + + /* Block Device Variables */ + bool bdev_attached; + int disk_id; + int major; + struct request_queue *queue; + struct gendisk *gendisk; + struct { + /* Used to convert a byte address to a device address. */ + u64 lower_mask; + u64 upper_shift; + u64 upper_mask; + u64 target_mask; + u64 target_shift; + } _stripe; + unsigned int dma_fault; + + int scrub_hard; + + int n_targets; + struct rsxx_dma_ctrl *ctrl; +}; + +enum rsxx_pci_regmap { + HWID = 0x00, /* Hardware Identification Register */ + SCRATCH = 0x04, /* Scratch/Debug Register */ + RESET = 0x08, /* Reset Register */ + ISR = 0x10, /* Interrupt Status Register */ + IER = 0x14, /* Interrupt Enable Register */ + IPR = 0x18, /* Interrupt Poll Register */ + CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ + CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ + HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ + SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ + SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ + SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ + HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ + SW_STATUS_CNT = 0x3C, /* Deprecated */ + CREG_CMD = 0x40, /* CPU Command Register */ + CREG_ADD = 0x44, /* CPU Address Register */ + CREG_CNT = 0x48, /* CPU Count Register */ + CREG_STAT = 0x4C, /* CPU Status Register */ + CREG_DATA0 = 0x50, /* CPU Data Registers */ + CREG_DATA1 = 0x54, + CREG_DATA2 = 0x58, + CREG_DATA3 = 0x5C, + CREG_DATA4 = 0x60, + CREG_DATA5 = 0x64, + CREG_DATA6 = 0x68, + CREG_DATA7 = 0x6c, + INTR_COAL = 0x70, /* Interrupt Coalescing Register */ + HW_ERROR = 0x74, /* Card Error Register */ + PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ + PCI_DEBUG1 = 0x7C, + PCI_DEBUG2 = 0x80, + PCI_DEBUG3 = 0x84, + PCI_DEBUG4 = 0x88, + PCI_DEBUG5 = 0x8C, + PCI_DEBUG6 = 0x90, + PCI_DEBUG7 = 0x94, + PCI_POWER_THROTTLE = 0x98, + PERF_CTRL = 0x9c, + PERF_TIMER_LO = 0xa0, + PERF_TIMER_HI = 0xa4, + PERF_RD512_LO = 0xa8, + PERF_RD512_HI = 0xac, + PERF_WR512_LO = 0xb0, + PERF_WR512_HI = 0xb4, +}; + +enum rsxx_intr { + CR_INTR_DMA0 = 0x00000001, + CR_INTR_CREG = 0x00000002, + CR_INTR_DMA1 = 0x00000004, + CR_INTR_EVENT = 0x00000008, + CR_INTR_DMA2 = 0x00000010, + CR_INTR_DMA3 = 0x00000020, + CR_INTR_DMA4 = 0x00000040, + CR_INTR_DMA5 = 0x00000080, + CR_INTR_DMA6 = 0x00000100, + CR_INTR_DMA7 = 0x00000200, + CR_INTR_DMA_ALL = 0x000003f5, + CR_INTR_ALL = 0xffffffff, +}; + +static inline int CR_INTR_DMA(int N) +{ + static const unsigned int _CR_INTR_DMA[] = { + CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, + CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 + }; + return _CR_INTR_DMA[N]; +} +enum rsxx_pci_reset { + DMA_QUEUE_RESET = 0x00000001, +}; + +enum rsxx_pci_revision { + RSXX_DISCARD_SUPPORT = 2, +}; + +enum rsxx_creg_cmd { + CREG_CMD_TAG_MASK = 0x0000FF00, + CREG_OP_WRITE = 0x000000C0, + CREG_OP_READ = 0x000000E0, +}; + +enum rsxx_creg_addr { + CREG_ADD_CARD_CMD = 0x80001000, + CREG_ADD_CARD_STATE = 0x80001004, + CREG_ADD_CARD_SIZE = 0x8000100c, + CREG_ADD_CAPABILITIES = 0x80001050, + CREG_ADD_LOG = 0x80002000, + CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CONFIG = 0xB0000000, +}; + +enum rsxx_creg_card_cmd { + CARD_CMD_STARTUP = 1, + CARD_CMD_SHUTDOWN = 2, + CARD_CMD_LOW_LEVEL_FORMAT = 3, + CARD_CMD_FPGA_RECONFIG_BR = 4, + CARD_CMD_FPGA_RECONFIG_MAIN = 5, + CARD_CMD_BACKUP = 6, + CARD_CMD_RESET = 7, + CARD_CMD_deprecated = 8, + CARD_CMD_UNINITIALIZE = 9, + CARD_CMD_DSTROY_EMERGENCY = 10, + CARD_CMD_DSTROY_NORMAL = 11, + CARD_CMD_DSTROY_EXTENDED = 12, + CARD_CMD_DSTROY_ABORT = 13, +}; + +enum rsxx_card_state { + CARD_STATE_SHUTDOWN = 0x00000001, + CARD_STATE_STARTING = 0x00000002, + CARD_STATE_FORMATTING = 0x00000004, + CARD_STATE_UNINITIALIZED = 0x00000008, + CARD_STATE_GOOD = 0x00000010, + CARD_STATE_SHUTTING_DOWN = 0x00000020, + CARD_STATE_FAULT = 0x00000040, + CARD_STATE_RD_ONLY_FAULT = 0x00000080, + CARD_STATE_DSTROYING = 0x00000100, +}; + +enum rsxx_led { + LED_DEFAULT = 0x0, + LED_IDENTIFY = 0x1, + LED_SOAK = 0x2, +}; + +enum rsxx_creg_flash_lock { + CREG_FLASH_LOCK = 1, + CREG_FLASH_UNLOCK = 2, +}; + +enum rsxx_card_capabilities { + CARD_CAP_SUBPAGE_WRITES = 0x00000080, +}; + +enum rsxx_creg_stat { + CREG_STAT_STATUS_MASK = 0x00000003, + CREG_STAT_SUCCESS = 0x1, + CREG_STAT_ERROR = 0x2, + CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ + CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ + CREG_STAT_TAG_MASK = 0x0000ff00, +}; + +static inline unsigned int CREG_DATA(int N) +{ + return CREG_DATA0 + (N << 2); +} + +/*----------------- Convenient Log Wrappers -------------------*/ +#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) + +/***** config.c *****/ +int rsxx_load_config(struct rsxx_cardinfo *card); + +/***** core.c *****/ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); + +/***** dev.c *****/ +int rsxx_attach_dev(struct rsxx_cardinfo *card); +void rsxx_detach_dev(struct rsxx_cardinfo *card); +int rsxx_setup_dev(struct rsxx_cardinfo *card); +void rsxx_destroy_dev(struct rsxx_cardinfo *card); +int rsxx_dev_init(void); +void rsxx_dev_cleanup(void); + +/***** dma.c ****/ +typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int status); +int rsxx_dma_setup(struct rsxx_cardinfo *card); +void rsxx_dma_destroy(struct rsxx_cardinfo *card); +int rsxx_dma_init(void); +void rsxx_dma_cleanup(void); +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data); + +/***** cregs.c *****/ +int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_read_hw_log(struct rsxx_cardinfo *card); +int rsxx_get_card_state(struct rsxx_cardinfo *card, + unsigned int *state); +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets); +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities); +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); +int rsxx_creg_setup(struct rsxx_cardinfo *card); +void rsxx_creg_destroy(struct rsxx_cardinfo *card); +int rsxx_creg_init(void); +void rsxx_creg_cleanup(void); + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read); + + + +#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 57763c5..758f2ac 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = { static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); - struct swim3 __iomem *sw = fs->swim3; + struct swim3 __iomem *sw; if (!fs) return; + + sw = fs->swim3; + if (mb_state != MB_FD) return; diff --git a/drivers/block/xd.c b/drivers/block/xd.c deleted file mode 100644 index ff54052..0000000 --- a/drivers/block/xd.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * This file contains the driver for an XT hard disk controller - * (at least the DTC 5150X) for Linux. - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, - * kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and - * Wim Van Dorst. - * - * Revised: 04/04/94 by Risto Kankkunen - * Moved the detection code from xd_init() to xd_geninit() as it needed - * interrupts enabled and Linus didn't want to enable them in that first - * phase. xd_geninit() is the place to do these kinds of things anyway, - * he says. - * - * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu - * - * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl - * Fixed some problems with disk initialization and module initiation. - * Added support for manual geometry setting (except Seagate controllers) - * in form: - * xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>] - * Recovered DMA access. Abridged messages. Added support for DTC5051CX, - * WD1002-27X & XEBEC controllers. Driver uses now some jumper settings. - * Extended ioctl() support. - * - * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/blkpg.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/gfp.h> - -#include <asm/uaccess.h> -#include <asm/dma.h> - -#include "xd.h" - -static DEFINE_MUTEX(xd_mutex); -static void __init do_xd_setup (int *integers); -#ifdef MODULE -static int xd[5] = { -1,-1,-1,-1, }; -#endif - -#define XD_DONT_USE_DMA 0 /* Initial value. may be overriden using - "nodma" module option */ -#define XD_INIT_DISK_DELAY (30) /* 30 ms delay during disk initialization */ - -/* Above may need to be increased if a problem with the 2nd drive detection - (ST11M controller) or resetting a controller (WD) appears */ - -static XD_INFO xd_info[XD_MAXDRIVES]; - -/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS - signature and details to the following list of signatures. A BIOS signature is a string embedded into the first - few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG - command. Run DEBUG, and then you can examine your BIOS signature with: - - d xxxx:0000 - - where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should - be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters - in the table are, in order: - - offset ; this is the offset (in bytes) from the start of your ROM where the signature starts - signature ; this is the actual text of the signature - xd_?_init_controller ; this is the controller init routine used by your controller - xd_?_init_drive ; this is the drive init routine used by your controller - - The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is - made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your - best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and - may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>. - - NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver - should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ - -#include <asm/page.h> -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) -static char *xd_dma_buffer; - -static XD_SIGNATURE xd_sigs[] __initdata = { - { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x000B,"CRD18A Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */ - { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */ - { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */ - { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */ - { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */ - { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */ - { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" }, - { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */ -}; - -static unsigned int xd_bases[] __initdata = -{ - 0xC8000, 0xCA000, 0xCC000, - 0xCE000, 0xD0000, 0xD2000, - 0xD4000, 0xD6000, 0xD8000, - 0xDA000, 0xDC000, 0xDE000, - 0xE0000 -}; - -static DEFINE_SPINLOCK(xd_lock); - -static struct gendisk *xd_gendisk[2]; - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); - -static const struct block_device_operations xd_fops = { - .owner = THIS_MODULE, - .ioctl = xd_ioctl, - .getgeo = xd_getgeo, -}; -static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); -static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; -static u_char xd_override __initdata = 0, xd_type __initdata = 0; -static u_short xd_iobase = 0x320; -static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, }; - -static volatile int xdc_busy; -static struct timer_list xd_watchdog_int; - -static volatile u_char xd_error; -static bool nodma = XD_DONT_USE_DMA; - -static struct request_queue *xd_queue; - -/* xd_init: register the block device number and set up pointer tables */ -static int __init xd_init(void) -{ - u_char i,controller; - unsigned int address; - int err; - -#ifdef MODULE - { - u_char count = 0; - for (i = 4; i > 0; i--) - if (((xd[i] = xd[i-1]) >= 0) && !count) - count = i; - if ((xd[0] = count)) - do_xd_setup(xd); - } -#endif - - init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog; - - err = -EBUSY; - if (register_blkdev(XT_DISK_MAJOR, "xd")) - goto out1; - - err = -ENOMEM; - xd_queue = blk_init_queue(do_xd_request, &xd_lock); - if (!xd_queue) - goto out1a; - - if (xd_detect(&controller,&address)) { - - printk("Detected a%s controller (type %d) at address %06x\n", - xd_sigs[controller].name,controller,address); - if (!request_region(xd_iobase,4,"xd")) { - printk("xd: Ports at 0x%x are not available\n", - xd_iobase); - goto out2; - } - if (controller) - xd_sigs[controller].init_controller(address); - xd_drives = xd_initdrives(xd_sigs[controller].init_drive); - - printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n", - xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma); - } - - /* - * With the drive detected, xd_maxsectors should now be known. - * If xd_maxsectors is 0, nothing was detected and we fall through - * to return -ENODEV - */ - if (!xd_dma_buffer && xd_maxsectors) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - printk(KERN_ERR "xd: Out of memory.\n"); - goto out3; - } - } - - err = -ENODEV; - if (!xd_drives) - goto out3; - - for (i = 0; i < xd_drives; i++) { - XD_INFO *p = &xd_info[i]; - struct gendisk *disk = alloc_disk(64); - if (!disk) - goto Enomem; - p->unit = i; - disk->major = XT_DISK_MAJOR; - disk->first_minor = i<<6; - sprintf(disk->disk_name, "xd%c", i+'a'); - disk->fops = &xd_fops; - disk->private_data = p; - disk->queue = xd_queue; - set_capacity(disk, p->heads * p->cylinders * p->sectors); - printk(" %s: CHS=%d/%d/%d\n", disk->disk_name, - p->cylinders, p->heads, p->sectors); - xd_gendisk[i] = disk; - } - - err = -EBUSY; - if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) { - printk("xd: unable to get IRQ%d\n",xd_irq); - goto out4; - } - - if (request_dma(xd_dma,"xd")) { - printk("xd: unable to get DMA%d\n",xd_dma); - goto out5; - } - - /* xd_maxsectors depends on controller - so set after detection */ - blk_queue_max_hw_sectors(xd_queue, xd_maxsectors); - - for (i = 0; i < xd_drives; i++) - add_disk(xd_gendisk[i]); - - return 0; - -out5: - free_irq(xd_irq, NULL); -out4: - for (i = 0; i < xd_drives; i++) - put_disk(xd_gendisk[i]); -out3: - if (xd_maxsectors) - release_region(xd_iobase,4); - - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); -out2: - blk_cleanup_queue(xd_queue); -out1a: - unregister_blkdev(XT_DISK_MAJOR, "xd"); -out1: - return err; -Enomem: - err = -ENOMEM; - while (i--) - put_disk(xd_gendisk[i]); - goto out3; -} - -/* xd_detect: scan the possible BIOS ROM locations for the signature strings */ -static u_char __init xd_detect (u_char *controller, unsigned int *address) -{ - int i, j; - - if (xd_override) - { - *controller = xd_type; - *address = 0; - return(1); - } - - for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { - void __iomem *p = ioremap(xd_bases[i], 0x2000); - if (!p) - continue; - for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { - const char *s = xd_sigs[j].string; - if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { - *controller = j; - xd_type = j; - *address = xd_bases[i]; - iounmap(p); - return 1; - } - } - iounmap(p); - } - return 0; -} - -/* do_xd_request: handle an incoming request */ -static void do_xd_request (struct request_queue * q) -{ - struct request *req; - - if (xdc_busy) - return; - - req = blk_fetch_request(q); - while (req) { - unsigned block = blk_rq_pos(req); - unsigned count = blk_rq_cur_sectors(req); - XD_INFO *disk = req->rq_disk->private_data; - int res = -EIO; - int retry; - - if (req->cmd_type != REQ_TYPE_FS) - goto done; - if (block + count > get_capacity(req->rq_disk)) - goto done; - for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rq_data_dir(req), disk, req->buffer, - block, count); - done: - /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) - req = blk_fetch_request(q); - } -} - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - XD_INFO *p = bdev->bd_disk->private_data; - - geo->heads = p->heads; - geo->sectors = p->sectors; - geo->cylinders = p->cylinders; - return 0; -} - -/* xd_ioctl: handle device ioctl's */ -static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg) -{ - switch (cmd) { - case HDIO_SET_DMA: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (xdc_busy) return -EBUSY; - nodma = !arg; - if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); - xd_dma_buffer = NULL; - } else if (!nodma && !xd_dma_buffer) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - nodma = XD_DONT_USE_DMA; - return -ENOMEM; - } - } - return 0; - case HDIO_GET_DMA: - return put_user(!nodma, (long __user *) arg); - case HDIO_GET_MULTCOUNT: - return put_user(xd_maxsectors, (long __user *) arg); - default: - return -EINVAL; - } -} - -static int xd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&xd_mutex); - ret = xd_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&xd_mutex); - - return ret; -} - -/* xd_readwrite: handle a read/write request */ -static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count) -{ - int drive = p->unit; - u_char cmdblk[6],sense[4]; - u_short track,cylinder; - u_char head,sector,control,mode = PIO_MODE,temp; - char **real_buffer; - register int i; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count); -#endif /* DEBUG_READWRITE */ - - spin_unlock_irq(&xd_lock); - - control = p->control; - if (!xd_dma_buffer) - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - while (count) { - temp = count < xd_maxsectors ? count : xd_maxsectors; - - track = block / p->sectors; - head = track % p->heads; - cylinder = track / p->heads; - sector = block % p->sectors; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp); -#endif /* DEBUG_READWRITE */ - - if (xd_dma_buffer) { - mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200); - real_buffer = &xd_dma_buffer; - for (i=0; i < (temp * 0x200); i++) - xd_dma_buffer[i] = buffer[i]; - } - else - real_buffer = &buffer; - - xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control); - - switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) { - case 1: - printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); - xd_recalibrate(drive); - spin_lock_irq(&xd_lock); - return -EIO; - case 2: - if (sense[0] & 0x30) { - printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); - switch ((sense[0] & 0x30) >> 4) { - case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F); - break; - case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F); - break; - case 2: printk("command error, code = 0x%X",sense[0] & 0x0F); - break; - case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F); - break; - } - } - if (sense[0] & 0x80) - printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F); - /* reported drive number = (sense[1] & 0xE0) >> 5 */ - else - printk(" - no valid disk address\n"); - spin_lock_irq(&xd_lock); - return -EIO; - } - if (xd_dma_buffer) - for (i=0; i < (temp * 0x200); i++) - buffer[i] = xd_dma_buffer[i]; - - count -= temp, buffer += temp * 0x200, block += temp; - } - spin_lock_irq(&xd_lock); - return 0; -} - -/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ -static void xd_recalibrate (u_char drive) -{ - u_char cmdblk[6]; - - xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8)) - printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive); -} - -/* xd_interrupt_handler: interrupt service routine */ -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id) -{ - if (inb(XD_STATUS) & STAT_INTERRUPT) { /* check if it was our device */ -#ifdef DEBUG_OTHER - printk("xd_interrupt_handler: interrupt detected\n"); -#endif /* DEBUG_OTHER */ - outb(0,XD_CONTROL); /* acknowledge interrupt */ - wake_up(&xd_wait_int); /* and wake up sleeping processes */ - return IRQ_HANDLED; - } - else - printk("xd: unexpected interrupt\n"); - return IRQ_NONE; -} - -/* xd_setup_dma: set up the DMA controller for a data transfer */ -static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count) -{ - unsigned long f; - - if (nodma) - return (PIO_MODE); - if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) { -#ifdef DEBUG_OTHER - printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n"); -#endif /* DEBUG_OTHER */ - return (PIO_MODE); - } - - f=claim_dma_lock(); - disable_dma(xd_dma); - clear_dma_ff(xd_dma); - set_dma_mode(xd_dma,mode); - set_dma_addr(xd_dma, (unsigned long) buffer); - set_dma_count(xd_dma,count); - - release_dma_lock(f); - - return (DMA_MODE); /* use DMA and INT */ -} - -/* xd_build: put stuff into an array in a format suitable for the controller */ -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control) -{ - cmdblk[0] = command; - cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F); - cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F); - cmdblk[3] = cylinder & 0xFF; - cmdblk[4] = count; - cmdblk[5] = control; - - return (cmdblk); -} - -static void xd_watchdog (unsigned long unused) -{ - xd_error = 1; - wake_up(&xd_wait_int); -} - -/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */ -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout) -{ - u_long expiry = jiffies + timeout; - int success; - - xdc_busy = 1; - while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) - schedule_timeout_uninterruptible(1); - xdc_busy = 0; - return (success); -} - -static inline u_int xd_wait_for_IRQ (void) -{ - unsigned long flags; - xd_watchdog_int.expires = jiffies + 8 * HZ; - add_timer(&xd_watchdog_int); - - flags=claim_dma_lock(); - enable_dma(xd_dma); - release_dma_lock(flags); - - sleep_on(&xd_wait_int); - del_timer(&xd_watchdog_int); - xdc_busy = 0; - - flags=claim_dma_lock(); - disable_dma(xd_dma); - release_dma_lock(flags); - - if (xd_error) { - printk("xd: missed IRQ - command aborted\n"); - xd_error = 0; - return (1); - } - return (0); -} - -/* xd_command: handle all data transfers necessary for a single command */ -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout) -{ - u_char cmdblk[6],csb,complete = 0; - -#ifdef DEBUG_COMMAND - printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense); -#endif /* DEBUG_COMMAND */ - - outb(0,XD_SELECT); - outb(mode,XD_CONTROL); - - if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout)) - return (1); - - while (!complete) { - if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout)) - return (1); - - switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) { - case 0: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - outb(outdata ? *outdata++ : 0,XD_DATA); - break; - case STAT_INPUT: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - if (indata) - *indata++ = inb(XD_DATA); - else - inb(XD_DATA); - break; - case STAT_COMMAND: - outb(command ? *command++ : 0,XD_DATA); - break; - case STAT_COMMAND | STAT_INPUT: - complete = 1; - break; - } - } - csb = inb(XD_DATA); - - if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout)) /* wait until deselected */ - return (1); - - if (csb & CSB_ERROR) { /* read sense data if error */ - xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0); - if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT)) - printk("xd: warning! sense command failed!\n"); - } - -#ifdef DEBUG_COMMAND - printk("xd_command: completed with csb = 0x%X\n",csb); -#endif /* DEBUG_COMMAND */ - - return (csb & CSB_ERROR); -} - -static u_char __init xd_initdrives (void (*init_drive)(u_char drive)) -{ - u_char cmdblk[6],i,count = 0; - - for (i = 0; i < XD_MAXDRIVES; i++) { - xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) { - msleep_interruptible(XD_INIT_DISK_DELAY); - - init_drive(count); - count++; - - msleep_interruptible(XD_INIT_DISK_DELAY); - } - } - return (count); -} - -static void __init xd_manual_geo_set (u_char drive) -{ - xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]); - xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]); - xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]); -} - -static void __init xd_dtc_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; - case 0xD0000: /*5150CX*/ - case 0xD8000: break; /*5150CX & 5150XL*/ - default: printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* my card seems to have trouble doing multi-block transfers? */ - - outb(0,XD_RESET); /* reset the controller */ -} - - -static void __init xd_dtc5150cx_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][4] = { - {0x200,8,0x200,0x100}, - {0x267,2,0x267,0x267}, - {0x264,4,0x264,0x80}, - {0x132,4,0x132,0x0}, - {0x132,2,0x80, 0x132}, - {0x177,8,0x177,0x0}, - {0x132,8,0x84, 0x0}, - {}, /* not used */ - {0x132,6,0x80, 0x100}, - {0x200,6,0x100,0x100}, - {0x264,2,0x264,0x80}, - {0x280,4,0x280,0x100}, - {0x2B9,3,0x2B9,0x2B9}, - {0x2B9,5,0x2B9,0x2B9}, - {0x280,6,0x280,0x100}, - {0x132,4,0x132,0x0}}; - u_char n; - - n = inb(XD_JUMPER); - n = (drive ? n : (n >> 2)) & 0x33; - n = (n | (n >> 2)) & 0x0F; - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else - if (n != 7) { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - else { - printk("xd%c: undetermined drive geometry\n",'a'+drive); - return; - } - xd_info[drive].control = 5; /* control byte */ - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -static void __init xd_dtc_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[64]; - - xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x0A]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf))[0x04]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05]; /* reduced write */ - xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */ - xd_info[drive].ecc = buf[0x0F]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = 0; /* control byte */ - - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]); - xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive); - } - else - printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive); -} - -static void __init xd_wd_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; break; - case 0xCC000: xd_iobase = 0x328; break; - case 0xCE000: xd_iobase = 0x32C; break; - case 0xD0000: xd_iobase = 0x328; break; /* ? */ - case 0xD8000: xd_iobase = 0x32C; break; /* ? */ - default: printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* this one doesn't wrap properly either... */ - - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_wd_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS may be disabled */ - static u_short geometry_table[][4] = { - {0x264,4,0x1C2,0x1C2}, /* common part */ - {0x132,4,0x099,0x0}, - {0x267,2,0x1C2,0x1C2}, - {0x267,4,0x1C2,0x1C2}, - - {0x334,6,0x335,0x335}, /* 1004 series RLL */ - {0x30E,4,0x30F,0x3DC}, - {0x30E,2,0x30F,0x30F}, - {0x267,4,0x268,0x268}, - - {0x3D5,5,0x3D6,0x3D6}, /* 1002 series RLL */ - {0x3DB,7,0x3DC,0x3DC}, - {0x264,4,0x265,0x265}, - {0x267,4,0x268,0x268}}; - - u_char cmdblk[6],buf[0x200]; - u_char n = 0,rll,jumper_state,use_jumper_geo; - u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6'); - - jumper_state = ~(inb(0x322)); - if (jumper_state & 0x40) - xd_irq = 9; - rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0; - xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x1AF]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf))[0xD8]; /* reduced write */ - xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA]; /* write precomp */ - xd_info[drive].ecc = buf[0x1B4]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = buf[0x1B5]; /* control byte */ - use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders); - if (xd_geo[3*drive]) { - xd_manual_geo_set(drive); - xd_info[drive].control = rll ? 7 : 5; - } - else if (use_jumper_geo) { - n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll; - xd_info[drive].cylinders = geometry_table[n][0]; - xd_info[drive].heads = (u_char)(geometry_table[n][1]); - xd_info[drive].control = rll ? 7 : 5; -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; - xd_info[drive].wprecomp = geometry_table[n][3]; - xd_info[drive].ecc = 0x0B; -#endif /* 0 */ - } - if (!wd_1002) { - if (use_jumper_geo) - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - geometry_table[n][2],geometry_table[n][3],0x0B); - else - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]); - } - /* 1002 based RLL controller requests converted addressing, but reports physical - (physical 26 sec., logical 17 sec.) - 1004 based ???? */ - if (rll & wd_1002) { - if ((xd_info[drive].cylinders *= 26, - xd_info[drive].cylinders /= 17) > 1023) - xd_info[drive].cylinders = 1023; /* 1024 ? */ -#if 0 - xd_info[drive].rwrite *= 26; - xd_info[drive].rwrite /= 17; - xd_info[drive].wprecomp *= 26 - xd_info[drive].wprecomp /= 17; -#endif /* 0 */ - } - } - else - printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive); - -} - -static void __init xd_seagate_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_seagate_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[0x200]; - - xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x04]; /* heads */ - xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03]; /* cylinders */ - xd_info[drive].sectors = buf[0x05]; /* sectors */ - xd_info[drive].control = 0; /* control byte */ - } - else - printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive); -} - -/* Omti support courtesy Dirk Melchers */ -static void __init xd_omti_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_omti_init_drive (u_char drive) -{ - /* gets infos from drive */ - xd_override_init_drive(drive); - - /* set other parameters, Hardcoded, not that nice :-) */ - xd_info[drive].control = 2; -} - -/* Xebec support (AK) */ -static void __init xd_xebec_init_controller (unsigned int address) -{ -/* iobase may be set manually in range 0x300 - 0x33C - irq may be set manually to 2(9),3,4,5,6,7 - dma may be set manually to 1,2,3 - (How to detect them ???) -BIOS address may be set manually in range 0x0 - 0xF8000 -If you need non-standard settings use the xd=... command */ - - switch (address) { - case 0x00000: - case 0xC8000: /* initially: xd_iobase==0x320 */ - case 0xD0000: - case 0xD2000: - case 0xD4000: - case 0xD6000: - case 0xD8000: - case 0xDA000: - case 0xDC000: - case 0xDE000: - case 0xE0000: break; - default: printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x01; - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_xebec_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][5] = { - {0x132,4,0x080,0x080,0x7}, - {0x132,4,0x080,0x080,0x17}, - {0x264,2,0x100,0x100,0x7}, - {0x264,2,0x100,0x100,0x17}, - {0x132,8,0x080,0x080,0x7}, - {0x132,8,0x080,0x080,0x17}, - {0x264,4,0x100,0x100,0x6}, - {0x264,4,0x100,0x100,0x17}, - {0x2BC,5,0x2BC,0x12C,0x6}, - {0x3A5,4,0x3A5,0x3A5,0x7}, - {0x26C,6,0x26C,0x26C,0x7}, - {0x200,8,0x200,0x100,0x17}, - {0x400,5,0x400,0x400,0x7}, - {0x400,6,0x400,0x400,0x7}, - {0x264,8,0x264,0x200,0x17}, - {0x33E,7,0x33E,0x200,0x7}}; - u_char n; - - n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry - is assumed for BOTH drives */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - xd_info[drive].control = geometry_table[n][4]; /* control byte */ - xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads - etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */ -static void __init xd_override_init_drive (u_char drive) -{ - u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 }; - u_char cmdblk[6],i; - - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - for (i = 0; i < 3; i++) { - while (min[i] != max[i] - 1) { - test[i] = (min[i] + max[i]) / 2; - xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - min[i] = test[i]; - else - max[i] = test[i]; - } - test[i] = min[i]; - } - xd_info[drive].heads = (u_char) min[0] + 1; - xd_info[drive].cylinders = (u_short) min[1] + 1; - xd_info[drive].sectors = (u_char) min[2] + 1; - } - xd_info[drive].control = 0; -} - -/* xd_setup: initialise controller from command line parameters */ -static void __init do_xd_setup (int *integers) -{ - switch (integers[0]) { - case 4: if (integers[4] < 0) - nodma = 1; - else if (integers[4] < 8) - xd_dma = integers[4]; - case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC)) - xd_iobase = integers[3]; - case 2: if ((integers[2] > 0) && (integers[2] < 16)) - xd_irq = integers[2]; - case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) - xd_type = integers[1]; - case 0: break; - default:printk("xd: too many parameters for xd\n"); - } - xd_maxsectors = 0x01; -} - -/* xd_setparam: set the drive characteristics */ -static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc) -{ - u_char cmdblk[14]; - - xd_build(cmdblk,command,drive,0,0,0,0,0); - cmdblk[6] = (u_char) (cylinders >> 8) & 0x03; - cmdblk[7] = (u_char) (cylinders & 0xFF); - cmdblk[8] = heads & 0x1F; - cmdblk[9] = (u_char) (rwrite >> 8) & 0x03; - cmdblk[10] = (u_char) (rwrite & 0xFF); - cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03; - cmdblk[12] = (u_char) (wprecomp & 0xFF); - cmdblk[13] = ecc; - - /* Some controllers require geometry info as data, not command */ - - if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2)) - printk("xd: error setting characteristics for xd%c\n", 'a'+drive); -} - - -#ifdef MODULE - -module_param_array(xd, int, NULL, 0); -module_param_array(xd_geo, int, NULL, 0); -module_param(nodma, bool, 0); - -MODULE_LICENSE("GPL"); - -void cleanup_module(void) -{ - int i; - unregister_blkdev(XT_DISK_MAJOR, "xd"); - for (i = 0; i < xd_drives; i++) { - del_gendisk(xd_gendisk[i]); - put_disk(xd_gendisk[i]); - } - blk_cleanup_queue(xd_queue); - release_region(xd_iobase,4); - if (xd_drives) { - free_irq(xd_irq, NULL); - free_dma(xd_dma); - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); - } -} -#else - -static int __init xd_setup (char *str) -{ - int ints[5]; - get_options (str, ARRAY_SIZE (ints), ints); - do_xd_setup (ints); - return 1; -} - -/* xd_manual_geo_init: initialise drive geometry from command line parameters - (used only for WD drives) */ -static int __init xd_manual_geo_init (char *str) -{ - int i, integers[1 + 3*XD_MAXDRIVES]; - - get_options (str, ARRAY_SIZE (integers), integers); - if (integers[0]%3 != 0) { - printk("xd: incorrect number of parameters for xd_geo\n"); - return 1; - } - for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++) - xd_geo[i] = integers[i+1]; - return 1; -} - -__setup ("xd=", xd_setup); -__setup ("xd_geo=", xd_manual_geo_init); - -#endif /* MODULE */ - -module_init(xd_init); -MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR); diff --git a/drivers/block/xd.h b/drivers/block/xd.h deleted file mode 100644 index 37cacef..0000000 --- a/drivers/block/xd.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _LINUX_XD_H -#define _LINUX_XD_H - -/* - * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X). - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst. - */ - -#include <linux/interrupt.h> - -/* XT hard disk controller registers */ -#define XD_DATA (xd_iobase + 0x00) /* data RW register */ -#define XD_RESET (xd_iobase + 0x01) /* reset WO register */ -#define XD_STATUS (xd_iobase + 0x01) /* status RO register */ -#define XD_SELECT (xd_iobase + 0x02) /* select WO register */ -#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */ -#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */ -#define XD_RESERVED (xd_iobase + 0x03) /* reserved */ - -/* XT hard disk controller commands (incomplete list) */ -#define CMD_TESTREADY 0x00 /* test drive ready */ -#define CMD_RECALIBRATE 0x01 /* recalibrate drive */ -#define CMD_SENSE 0x03 /* request sense */ -#define CMD_FORMATDRV 0x04 /* format drive */ -#define CMD_VERIFY 0x05 /* read verify */ -#define CMD_FORMATTRK 0x06 /* format track */ -#define CMD_FORMATBAD 0x07 /* format bad track */ -#define CMD_READ 0x08 /* read */ -#define CMD_WRITE 0x0A /* write */ -#define CMD_SEEK 0x0B /* seek */ - -/* Controller specific commands */ -#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */ -#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */ -#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */ -#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */ -#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */ -#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */ -#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */ -#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */ -#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */ -#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */ -#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */ -#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */ - -/* Bits for command status byte */ -#define CSB_ERROR 0x02 /* error */ -#define CSB_LUN 0x20 /* logical Unit Number */ - -/* XT hard disk controller status bits */ -#define STAT_READY 0x01 /* controller is ready */ -#define STAT_INPUT 0x02 /* data flowing from controller to host */ -#define STAT_COMMAND 0x04 /* controller in command phase */ -#define STAT_SELECT 0x08 /* controller is selected */ -#define STAT_REQUEST 0x10 /* controller requesting data */ -#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */ - -/* XT hard disk controller control bits */ -#define PIO_MODE 0x00 /* control bits to set for PIO */ -#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */ - -#define XD_MAXDRIVES 2 /* maximum 2 drives */ -#define XD_TIMEOUT HZ /* 1 second timeout */ -#define XD_RETRIES 4 /* maximum 4 retries */ - -#undef DEBUG /* define for debugging output */ - -#ifdef DEBUG - #define DEBUG_STARTUP /* debug driver initialisation */ - #define DEBUG_OVERRIDE /* debug override geometry detection */ - #define DEBUG_READWRITE /* debug each read/write command */ - #define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */ - #define DEBUG_COMMAND /* debug each controller command */ -#endif /* DEBUG */ - -/* this structure defines the XT drives and their types */ -typedef struct { - u_char heads; - u_short cylinders; - u_char sectors; - u_char control; - int unit; -} XD_INFO; - -/* this structure defines a ROM BIOS signature */ -typedef struct { - unsigned int offset; - const char *string; - void (*init_controller)(unsigned int address); - void (*init_drive)(u_char drive); - const char *name; -} XD_SIGNATURE; - -#ifndef MODULE -static int xd_manual_geo_init (char *command); -#endif /* MODULE */ -static u_char xd_detect (u_char *controller, unsigned int *address); -static u_char xd_initdrives (void (*init_drive)(u_char drive)); - -static void do_xd_request (struct request_queue * q); -static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg); -static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); -static void xd_recalibrate (u_char drive); - -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id); -static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count); -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control); -static void xd_watchdog (unsigned long unused); -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout); -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout); - -/* card specific setup and geometry gathering code */ -static void xd_dtc_init_controller (unsigned int address); -static void xd_dtc5150cx_init_drive (u_char drive); -static void xd_dtc_init_drive (u_char drive); -static void xd_wd_init_controller (unsigned int address); -static void xd_wd_init_drive (u_char drive); -static void xd_seagate_init_controller (unsigned int address); -static void xd_seagate_init_drive (u_char drive); -static void xd_omti_init_controller (unsigned int address); -static void xd_omti_init_drive (u_char drive); -static void xd_xebec_init_controller (unsigned int address); -static void xd_xebec_init_drive (u_char drive); -static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc); -static void xd_override_init_drive (u_char drive); - -#endif /* _LINUX_XD_H */ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5ac841f..de1f319 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,6 +46,7 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #include "common.h" /* @@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); + free_xenballooned_pages(segs_to_unmap, pages); segs_to_unmap = 0; } @@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req, GFP_KERNEL); if (!persistent_gnt) return -ENOMEM; - persistent_gnt->page = alloc_page(GFP_KERNEL); - if (!persistent_gnt->page) { + if (alloc_xenballooned_pages(1, &persistent_gnt->page, + false)) { kfree(persistent_gnt); return -ENOMEM; } @@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6398072..5e237f6 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->blkif = NULL; } + kfree(be->mode); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; int cdrom = 0; + unsigned long handle; char *device_type; DPRINTK(""); @@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch, return; } - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", - be->major, be->minor, major, minor); + if (be->major | be->minor) { + if (be->major != major || be->minor != minor) + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; + /* Front end dir is a number, which is used as the handle. */ + err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); + if (err) + return; - be->major = major; - be->minor = minor; + be->major = major; + be->minor = minor; - err = xen_vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = 0; - be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } + err = xen_vbd_create(be->blkif, handle, major, minor, + !strchr(be->mode, 'w'), cdrom); + if (err) + xenbus_dev_fatal(dev, err, "creating vbd structure"); + else { err = xenvbd_sysfs_addif(dev); if (err) { xen_vbd_free(&be->blkif->vbd); - be->major = 0; - be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; } + } + if (err) { + kfree(be->mode); + be->mode = NULL; + be->major = 0; + be->minor = 0; + } else { /* We're potentially connected now */ xen_update_blkif_status(be->blkif); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c1..c3dae2e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index e920cbe..e507ab7 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK config ARM_ARCH_TIMER bool + +config CLKSRC_METAG_GENERIC + def_bool y if METAG + help + This option enables support for the Meta per-thread timers. diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 7d671b8..4d8283a 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o obj-$(CONFIG_VT8500_TIMER) += vt8500_timer.o obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o +obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c new file mode 100644 index 0000000..ade7513 --- /dev/null +++ b/drivers/clocksource/metag_generic.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2005-2013 Imagination Technologies Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Support for Meta per-thread timers. + * + * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used + * as a free-running time base (hz clocksource), and the interrupt timer + * (TXTIMERI) is used for the timer interrupt (clock event). Both counters + * traditionally count at approximately 1MHz. + */ + +#include <clocksource/metag_generic.h> +#include <linux/cpu.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/time.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/interrupt.h> + +#include <asm/clock.h> +#include <asm/hwthread.h> +#include <asm/core_reg.h> +#include <asm/metag_mem.h> +#include <asm/tbx.h> + +#define HARDWARE_FREQ 1000000 /* 1MHz */ +#define HARDWARE_DIV 1 /* divide by 1 = 1MHz clock */ +#define HARDWARE_TO_NS_SHIFT 10 /* convert ticks to ns */ + +static unsigned int hwtimer_freq = HARDWARE_FREQ; +static DEFINE_PER_CPU(struct clock_event_device, local_clockevent); +static DEFINE_PER_CPU(char [11], local_clockevent_name); + +static int metag_timer_set_next_event(unsigned long delta, + struct clock_event_device *dev) +{ + __core_reg_set(TXTIMERI, -delta); + return 0; +} + +static void metag_timer_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + /* We should disable the IRQ here */ + break; + + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_UNUSED: + WARN_ON(1); + break; + }; +} + +static cycle_t metag_clocksource_read(struct clocksource *cs) +{ + return __core_reg_get(TXTIMER); +} + +static struct clocksource clocksource_metag = { + .name = "META", + .rating = 200, + .mask = CLOCKSOURCE_MASK(32), + .read = metag_clocksource_read, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static irqreturn_t metag_timer_interrupt(int irq, void *dummy) +{ + struct clock_event_device *evt = &__get_cpu_var(local_clockevent); + + evt->event_handler(evt); + + return IRQ_HANDLED; +} + +static struct irqaction metag_timer_irq = { + .name = "META core timer", + .handler = metag_timer_interrupt, + .flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU, +}; + +unsigned long long sched_clock(void) +{ + unsigned long long ticks = __core_reg_get(TXTIMER); + return ticks << HARDWARE_TO_NS_SHIFT; +} + +static void __cpuinit arch_timer_setup(unsigned int cpu) +{ + unsigned int txdivtime; + struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); + char *name = per_cpu(local_clockevent_name, cpu); + + txdivtime = __core_reg_get(TXDIVTIME); + + txdivtime &= ~TXDIVTIME_DIV_BITS; + txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS); + + __core_reg_set(TXDIVTIME, txdivtime); + + sprintf(name, "META %d", cpu); + clk->name = name; + clk->features = CLOCK_EVT_FEAT_ONESHOT, + + clk->rating = 200, + clk->shift = 12, + clk->irq = tbisig_map(TBID_SIGNUM_TRT), + clk->set_mode = metag_timer_set_mode, + clk->set_next_event = metag_timer_set_next_event, + + clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift); + clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk); + clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clk->cpumask = cpumask_of(cpu); + + clockevents_register_device(clk); + + /* + * For all non-boot CPUs we need to synchronize our free + * running clock (TXTIMER) with the boot CPU's clock. + * + * While this won't be accurate, it should be close enough. + */ + if (cpu) { + unsigned int thread0 = cpu_2_hwthread_id[0]; + unsigned long val; + + val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0); + __core_reg_set(TXTIMER, val); + } +} + +static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + switch (action) { + case CPU_STARTING: + case CPU_STARTING_FROZEN: + arch_timer_setup(cpu); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata arch_timer_cpu_nb = { + .notifier_call = arch_timer_cpu_notify, +}; + +int __init metag_generic_timer_init(void) +{ + /* + * On Meta 2 SoCs, the actual frequency of the timer is based on the + * Meta core clock speed divided by an integer, so it is only + * approximately 1MHz. Calculating the real frequency here drastically + * reduces clock skew on these SoCs. + */ +#ifdef CONFIG_METAG_META21 + hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1); +#endif + clocksource_register_hz(&clocksource_metag, hwtimer_freq); + + setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq); + + /* Configure timer on boot CPU */ + arch_timer_setup(smp_processor_id()); + + /* Hook cpu boot to configure other CPU's timers */ + register_cpu_notifier(&arch_timer_cpu_nb); + + return 0; +} diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index a4605fd..47a6730 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -27,8 +27,10 @@ #include <linux/of_address.h> #include <linux/irq.h> #include <linux/module.h> -#include <asm/sched_clock.h> +#include <asm/sched_clock.h> +#include <asm/localtimer.h> +#include <linux/percpu.h> /* * Timer block registers. */ @@ -49,6 +51,7 @@ #define TIMER1_RELOAD_OFF 0x0018 #define TIMER1_VAL_OFF 0x001c +#define LCL_TIMER_EVENTS_STATUS 0x0028 /* Global timers are connected to the coherency fabric clock, and the below divider reduces their incrementing frequency. */ #define TIMER_DIVIDER_SHIFT 5 @@ -57,14 +60,17 @@ /* * SoC-specific data. */ -static void __iomem *timer_base; -static int timer_irq; +static void __iomem *timer_base, *local_base; +static unsigned int timer_clk; +static bool timer25Mhz = true; /* * Number of timer ticks per jiffy. */ static u32 ticks_per_jiffy; +static struct clock_event_device __percpu **percpu_armada_370_xp_evt; + static u32 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); @@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta, struct clock_event_device *dev) { u32 u; - /* * Clear clockevent timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); /* * Setup new clockevent timer value. */ - writel(delta, timer_base + TIMER1_VAL_OFF); + writel(delta, local_base + TIMER0_VAL_OFF); /* * Enable the timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)); - writel(u, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + writel(u, local_base + TIMER_CTRL_OFF); return 0; } @@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, u32 u; if (mode == CLOCK_EVT_MODE_PERIODIC) { + /* * Setup timer to fire at 1/HZ intervals. */ - writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF); - writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF); /* * Enable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel((u | TIMER1_EN | TIMER1_RELOAD_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)), - timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + + writel((u | TIMER0_EN | TIMER0_RELOAD_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)), + local_base + TIMER_CTRL_OFF); } else { /* * Disable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF); /* * ACK pending timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); } } static struct clock_event_device armada_370_xp_clkevt = { - .name = "armada_370_xp_tick", + .name = "armada_370_xp_per_cpu_tick", .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, @@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id) /* * ACK timer interrupt and call event handler. */ + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); + evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction armada_370_xp_timer_irq = { - .name = "armada_370_xp_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, - .handler = armada_370_xp_timer_interrupt +/* + * Setup the local clock events for a CPU. + */ +static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt) +{ + u32 u; + int cpu = smp_processor_id(); + + /* Use existing clock_event for cpu 0 */ + if (!smp_processor_id()) + return 0; + + u = readl(local_base + TIMER_CTRL_OFF); + if (timer25Mhz) + writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + else + writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + + evt->name = armada_370_xp_clkevt.name; + evt->irq = armada_370_xp_clkevt.irq; + evt->features = armada_370_xp_clkevt.features; + evt->shift = armada_370_xp_clkevt.shift; + evt->rating = armada_370_xp_clkevt.rating, + evt->set_next_event = armada_370_xp_clkevt_next_event, + evt->set_mode = armada_370_xp_clkevt_mode, + evt->cpumask = cpumask_of(cpu); + + *__this_cpu_ptr(percpu_armada_370_xp_evt) = evt; + + clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe); + enable_percpu_irq(evt->irq, 0); + + return 0; +} + +static void armada_370_xp_timer_stop(struct clock_event_device *evt) +{ + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); + disable_percpu_irq(evt->irq); +} + +static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = { + .setup = armada_370_xp_timer_setup, + .stop = armada_370_xp_timer_stop, }; void __init armada_370_xp_timer_init(void) { u32 u; struct device_node *np; - unsigned int timer_clk; + int res; + np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer"); timer_base = of_iomap(np, 0); WARN_ON(!timer_base); + local_base = of_iomap(np, 1); if (of_find_property(np, "marvell,timer-25Mhz", NULL)) { /* The fixed 25MHz timer is available so let's use it */ + u = readl(local_base + TIMER_CTRL_OFF); + writel(u | TIMER0_25MHZ, + local_base + TIMER_CTRL_OFF); u = readl(timer_base + TIMER_CTRL_OFF); - writel(u | TIMER0_25MHZ | TIMER1_25MHZ, + writel(u | TIMER0_25MHZ, timer_base + TIMER_CTRL_OFF); timer_clk = 25000000; } else { @@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void) struct clk *clk = of_clk_get(np, 0); WARN_ON(IS_ERR(clk)); rate = clk_get_rate(clk); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~(TIMER0_25MHZ), + local_base + TIMER_CTRL_OFF); + u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ), + writel(u & ~(TIMER0_25MHZ), timer_base + TIMER_CTRL_OFF); + timer_clk = rate / TIMER_DIVIDER; + timer25Mhz = false; } - /* We use timer 0 as clocksource, and timer 1 for - clockevents */ - timer_irq = irq_of_parse_and_map(np, 1); + /* + * We use timer 0 as clocksource, and private(local) timer 0 + * for clockevents + */ + armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4); ticks_per_jiffy = (timer_clk + HZ / 2) / HZ; @@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void) "armada_370_xp_clocksource", timer_clk, 300, 32, clocksource_mmio_readl_down); - /* - * Setup clockevent timer (interrupt-driven). - */ - setup_irq(timer_irq, &armada_370_xp_timer_irq); + /* Register the clockevent on the private timer of CPU 0 */ armada_370_xp_clkevt.cpumask = cpumask_of(0); clockevents_config_and_register(&armada_370_xp_clkevt, timer_clk, 1, 0xfffffffe); -} + percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *); + + + /* + * Setup clockevent timer (interrupt-driven). + */ + *__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt; + res = request_percpu_irq(armada_370_xp_clkevt.irq, + armada_370_xp_timer_interrupt, + armada_370_xp_clkevt.name, + percpu_armada_370_xp_evt); + if (!res) { + enable_percpu_irq(armada_370_xp_clkevt.irq, 0); +#ifdef CONFIG_LOCAL_TIMERS + local_timer_register(&armada_370_xp_local_timer_ops); +#endif + } +} diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 51c3ea2..c599558 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/io.h> #include <linux/of.h> +#include <linux/of_dma.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/platform_device.h> @@ -171,7 +172,13 @@ static void dwc_initialize(struct dw_dma_chan *dwc) if (dwc->initialized == true) return; - if (dws) { + if (dws && dws->cfg_hi == ~0 && dws->cfg_lo == ~0) { + /* autoconfigure based on request line from DT */ + if (dwc->direction == DMA_MEM_TO_DEV) + cfghi = DWC_CFGH_DST_PER(dwc->request_line); + else if (dwc->direction == DMA_DEV_TO_MEM) + cfghi = DWC_CFGH_SRC_PER(dwc->request_line); + } else if (dws) { /* * We need controller-specific data to set up slave * transfers. @@ -1226,49 +1233,64 @@ static void dwc_free_chan_resources(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "%s: done\n", __func__); } -bool dw_dma_generic_filter(struct dma_chan *chan, void *param) +struct dw_dma_filter_args { + struct dw_dma *dw; + unsigned int req; + unsigned int src; + unsigned int dst; +}; + +static bool dw_dma_generic_filter(struct dma_chan *chan, void *param) { + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); - static struct dw_dma *last_dw; - static char *last_bus_id; - int i = -1; + struct dw_dma_filter_args *fargs = param; + struct dw_dma_slave *dws = &dwc->slave; - /* - * dmaengine framework calls this routine for all channels of all dma - * controller, until true is returned. If 'param' bus_id is not - * registered with a dma controller (dw), then there is no need of - * running below function for all channels of dw. - * - * This block of code does this by saving the parameters of last - * failure. If dw and param are same, i.e. trying on same dw with - * different channel, return false. - */ - if ((last_dw == dw) && (last_bus_id == param)) - return false; - /* - * Return true: - * - If dw_dma's platform data is not filled with slave info, then all - * dma controllers are fine for transfer. - * - Or if param is NULL - */ - if (!dw->sd || !param) - return true; + /* ensure the device matches our channel */ + if (chan->device != &fargs->dw->dma) + return false; - while (++i < dw->sd_count) { - if (!strcmp(dw->sd[i].bus_id, param)) { - chan->private = &dw->sd[i]; - last_dw = NULL; - last_bus_id = NULL; + dws->dma_dev = dw->dma.dev; + dws->cfg_hi = ~0; + dws->cfg_lo = ~0; + dws->src_master = fargs->src; + dws->dst_master = fargs->dst; - return true; - } - } + dwc->request_line = fargs->req; - last_dw = dw; - last_bus_id = param; - return false; + chan->private = dws; + + return true; +} + +static struct dma_chan *dw_dma_xlate(struct of_phandle_args *dma_spec, + struct of_dma *ofdma) +{ + struct dw_dma *dw = ofdma->of_dma_data; + struct dw_dma_filter_args fargs = { + .dw = dw, + }; + dma_cap_mask_t cap; + + if (dma_spec->args_count != 3) + return NULL; + + fargs.req = be32_to_cpup(dma_spec->args+0); + fargs.src = be32_to_cpup(dma_spec->args+1); + fargs.dst = be32_to_cpup(dma_spec->args+2); + + if (WARN_ON(fargs.req >= DW_DMA_MAX_NR_REQUESTS || + fargs.src >= dw->nr_masters || + fargs.dst >= dw->nr_masters)) + return NULL; + + dma_cap_zero(cap); + dma_cap_set(DMA_SLAVE, cap); + + /* TODO: there should be a simpler way to do this */ + return dma_request_channel(cap, dw_dma_generic_filter, &fargs); } -EXPORT_SYMBOL(dw_dma_generic_filter); /* --------------------- Cyclic DMA API extensions -------------------- */ @@ -1554,9 +1576,8 @@ static void dw_dma_off(struct dw_dma *dw) static struct dw_dma_platform_data * dw_dma_parse_dt(struct platform_device *pdev) { - struct device_node *sn, *cn, *np = pdev->dev.of_node; + struct device_node *np = pdev->dev.of_node; struct dw_dma_platform_data *pdata; - struct dw_dma_slave *sd; u32 tmp, arr[4]; if (!np) { @@ -1568,7 +1589,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!pdata) return NULL; - if (of_property_read_u32(np, "nr_channels", &pdata->nr_channels)) + if (of_property_read_u32(np, "dma-channels", &pdata->nr_channels)) return NULL; if (of_property_read_bool(np, "is_private")) @@ -1583,7 +1604,7 @@ dw_dma_parse_dt(struct platform_device *pdev) if (!of_property_read_u32(np, "block_size", &tmp)) pdata->block_size = tmp; - if (!of_property_read_u32(np, "nr_masters", &tmp)) { + if (!of_property_read_u32(np, "dma-masters", &tmp)) { if (tmp > 4) return NULL; @@ -1595,36 +1616,6 @@ dw_dma_parse_dt(struct platform_device *pdev) for (tmp = 0; tmp < pdata->nr_masters; tmp++) pdata->data_width[tmp] = arr[tmp]; - /* parse slave data */ - sn = of_find_node_by_name(np, "slave_info"); - if (!sn) - return pdata; - - /* calculate number of slaves */ - tmp = of_get_child_count(sn); - if (!tmp) - return NULL; - - sd = devm_kzalloc(&pdev->dev, sizeof(*sd) * tmp, GFP_KERNEL); - if (!sd) - return NULL; - - pdata->sd = sd; - pdata->sd_count = tmp; - - for_each_child_of_node(sn, cn) { - sd->dma_dev = &pdev->dev; - of_property_read_string(cn, "bus_id", &sd->bus_id); - of_property_read_u32(cn, "cfg_hi", &sd->cfg_hi); - of_property_read_u32(cn, "cfg_lo", &sd->cfg_lo); - if (!of_property_read_u32(cn, "src_master", &tmp)) - sd->src_master = tmp; - - if (!of_property_read_u32(cn, "dst_master", &tmp)) - sd->dst_master = tmp; - sd++; - } - return pdata; } #else @@ -1705,8 +1696,6 @@ static int dw_probe(struct platform_device *pdev) clk_prepare_enable(dw->clk); dw->regs = regs; - dw->sd = pdata->sd; - dw->sd_count = pdata->sd_count; /* get hardware configuration parameters */ if (autocfg) { @@ -1837,6 +1826,14 @@ static int dw_probe(struct platform_device *pdev) dma_async_device_register(&dw->dma); + if (pdev->dev.of_node) { + err = of_dma_controller_register(pdev->dev.of_node, + dw_dma_xlate, dw); + if (err && err != -ENODEV) + dev_err(&pdev->dev, + "could not register of_dma_controller\n"); + } + return 0; } @@ -1845,6 +1842,8 @@ static int dw_remove(struct platform_device *pdev) struct dw_dma *dw = platform_get_drvdata(pdev); struct dw_dma_chan *dwc, *_dwc; + if (pdev->dev.of_node) + of_dma_controller_free(pdev->dev.of_node); dw_dma_off(dw); dma_async_device_unregister(&dw->dma); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 88dd8eb..cf0ce5c 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -13,6 +13,7 @@ #include <linux/dw_dmac.h> #define DW_DMA_MAX_NR_CHANNELS 8 +#define DW_DMA_MAX_NR_REQUESTS 16 /* flow controller */ enum dw_dma_fc { @@ -211,6 +212,8 @@ struct dw_dma_chan { /* hardware configuration */ unsigned int block_size; bool nollp; + unsigned int request_line; + struct dw_dma_slave slave; /* configuration passed via DMA_SLAVE_CONFIG */ struct dma_slave_config dma_sconfig; @@ -239,10 +242,6 @@ struct dw_dma { struct tasklet_struct tasklet; struct clk *clk; - /* slave information */ - struct dw_dma_slave *sd; - unsigned int sd_count; - u8 all_chan_mask; /* hardware configuration */ diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index acb709b..e443f2c1 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -80,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9b..4154ed6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99f..3c2625e 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d1e9eb1..cdb81aa 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,12 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -666,9 +665,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG @@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; @@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_owner = mci->mod_name; + mutex_unlock(&mem_ctls_mutex); return 0; @@ -783,7 +792,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -907,6 +917,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); - - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca7..4f4b613 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> * The entire API were re-written, and ported to use struct device * */ @@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1056,6 +1069,7 @@ fail: continue; device_unregister(&dimm->dev); } +fail2: device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a..a66941f 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -146,7 +146,7 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 0056c4d..e8658e4 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 0000000..bb53467 --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,537 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + } + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e83376..aa44c17 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2..1b63517 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) } } +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -999,6 +1160,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } @@ -1060,13 +1231,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d03..0ec3e95 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e298..57244f9 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c index 2d58f93..833dd1a 100644 --- a/drivers/hsi/hsi.c +++ b/drivers/hsi/hsi.c @@ -420,7 +420,7 @@ static int hsi_event_notifier_call(struct notifier_block *nb, /** * hsi_register_port_event - Register a client to receive port events * @cl: HSI client that wants to receive port events - * @cb: Event handler callback + * @handler: Event handler callback * * Clients should register a callback to be able to receive * events from the ports. Registration should happen after diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 53a8600..ff1be16 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -318,7 +318,7 @@ static u32 get_vp_index(uuid_le *type_guid) return 0; } cur_cpu = (++next_vp % max_cpus); - return 0; + return cur_cpu; } /* diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 1c5481d..7311589 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -272,7 +272,7 @@ u16 hv_signal_event(void *con_id) * retrieve the initialized message and event pages. Otherwise, we create and * initialize the message and event pages. */ -void hv_synic_init(void *irqarg) +void hv_synic_init(void *arg) { u64 version; union hv_synic_simp simp; @@ -281,7 +281,6 @@ void hv_synic_init(void *irqarg) union hv_synic_scontrol sctrl; u64 vp_index; - u32 irq_vector = *((u32 *)(irqarg)); int cpu = smp_processor_id(); if (!hv_context.hypercall_page) @@ -335,7 +334,7 @@ void hv_synic_init(void *irqarg) rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.as_uint64 = 0; - shared_sint.vector = irq_vector; /* HV_SHARED_SINT_IDT_VECTOR + 0x20; */ + shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; shared_sint.masked = false; shared_sint.auto_eoi = true; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index cf19dfa..bf421e0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -36,6 +36,7 @@ #include <linux/kernel_stat.h> #include <asm/hyperv.h> #include <asm/hypervisor.h> +#include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -528,7 +529,6 @@ static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) static int vmbus_bus_init(int irq) { int ret; - unsigned int vector; /* Hypervisor initialization...setup hypercall page..etc */ ret = hv_init(); @@ -558,13 +558,16 @@ static int vmbus_bus_init(int irq) */ irq_set_handler(irq, vmbus_flow_handler); - vector = IRQ0_VECTOR + irq; + /* + * Register our interrupt handler. + */ + hv_register_vmbus_handler(irq, vmbus_isr); /* - * Notify the hypervisor of our irq and + * Initialize the per-cpu interrupt state and * connect to the host. */ - on_each_cpu(hv_synic_init, (void *)&vector, 1); + on_each_cpu(hv_synic_init, NULL, 1); ret = vmbus_connect(); if (ret) goto err_irq; diff --git a/drivers/input/misc/hp_sdc_rtc.c b/drivers/input/misc/hp_sdc_rtc.c index 0b4f542..2e3334b 100644 --- a/drivers/input/misc/hp_sdc_rtc.c +++ b/drivers/input/misc/hp_sdc_rtc.c @@ -109,7 +109,9 @@ static int hp_sdc_rtc_do_read_bbrtc (struct rtc_time *rtctm) if (hp_sdc_enqueue_transaction(&t)) return -1; - down_interruptible(&tsem); /* Put ourselves to sleep for results. */ + /* Put ourselves to sleep for results. */ + if (WARN_ON(down_interruptible(&tsem))) + return -1; /* Check for nonpresence of BBRTC */ if (!((tseq[83] | tseq[90] | tseq[69] | tseq[76] | @@ -176,11 +178,16 @@ static int64_t hp_sdc_rtc_read_i8042timer (uint8_t loadcmd, int numreg) t.seq = tseq; t.act.semaphore = &i8042tregs; - down_interruptible(&i8042tregs); /* Sleep if output regs in use. */ + /* Sleep if output regs in use. */ + if (WARN_ON(down_interruptible(&i8042tregs))) + return -1; if (hp_sdc_enqueue_transaction(&t)) return -1; - down_interruptible(&i8042tregs); /* Sleep until results come back. */ + /* Sleep until results come back. */ + if (WARN_ON(down_interruptible(&i8042tregs))) + return -1; + up(&i8042tregs); return (tseq[5] | @@ -276,6 +283,7 @@ static inline int hp_sdc_rtc_read_ct(struct timeval *res) { } +#if 0 /* not used yet */ /* Set the i8042 real-time clock */ static int hp_sdc_rtc_set_rt (struct timeval *setto) { @@ -386,6 +394,7 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd) } return 0; } +#endif static ssize_t hp_sdc_rtc_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index e65fbf2..98e3b87 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o +obj-$(CONFIG_METAG) += irq-metag-ext.o +obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c new file mode 100644 index 0000000..92c41ab --- /dev/null +++ b/drivers/irqchip/irq-metag-ext.c @@ -0,0 +1,868 @@ +/* + * Meta External interrupt code. + * + * Copyright (C) 2005-2012 Imagination Technologies Ltd. + * + * External interrupts on Meta are configured at two-levels, in the CPU core and + * in the external trigger block. Interrupts from SoC peripherals are + * multiplexed onto a single Meta CPU "trigger" - traditionally it has always + * been trigger 2 (TR2). For info on how de-multiplexing happens check out + * meta_intc_irq_demux(). + */ + +#include <linux/interrupt.h> +#include <linux/irqchip/metag-ext.h> +#include <linux/irqdomain.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define HWSTAT_STRIDE 8 +#define HWVEC_BLK_STRIDE 0x1000 + +/** + * struct meta_intc_priv - private meta external interrupt data + * @nr_banks: Number of interrupt banks + * @domain: IRQ domain for all banks of external IRQs + * @unmasked: Record of unmasked IRQs + * @levels_altered: Record of altered level bits + */ +struct meta_intc_priv { + unsigned int nr_banks; + struct irq_domain *domain; + + unsigned long unmasked[4]; + +#ifdef CONFIG_METAG_SUSPEND_MEM + unsigned long levels_altered[4]; +#endif +}; + +/* Private data for the one and only external interrupt controller */ +static struct meta_intc_priv meta_intc_priv; + +/** + * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bit offset into the IRQ's bank registers + */ +static unsigned int meta_intc_offset(irq_hw_number_t hw) +{ + return hw & 0x1f; +} + +/** + * meta_intc_bank() - Get the bank number of a hardware IRQ number + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Bank number indicating which register the IRQ's bits are + */ +static unsigned int meta_intc_bank(irq_hw_number_t hw) +{ + return hw >> 5; +} + +/** + * meta_intc_stat_addr() - Get the address of a HWSTATEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWSTATEXT register containing the status bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWSTATEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_level_addr() - Get the address of a HWLEVELEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWLEVELEXT register containing the sense bit for + * the specified hardware IRQ number + */ +static void __iomem *meta_intc_level_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWLEVELEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_mask_addr() - Get the address of a HWMASKEXT register + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWMASKEXT register containing the mask bit for the + * specified hardware IRQ number + */ +static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWMASKEXT + + HWSTAT_STRIDE * meta_intc_bank(hw)); +} + +/** + * meta_intc_vec_addr() - Get the vector address of a hardware interrupt + * @hw: Hardware IRQ number (within external trigger block) + * + * Returns: Address of a HWVECEXT register controlling the core trigger to + * vector the IRQ onto + */ +static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw) +{ + return (void __iomem *)(HWVEC0EXT + + HWVEC_BLK_STRIDE * meta_intc_bank(hw) + + HWVECnEXT_STRIDE * meta_intc_offset(hw)); +} + +/** + * meta_intc_startup_irq() - set up an external irq + * @data: data for the external irq to start up + * + * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and + * unmask irq, both using the appropriate callbacks. + */ +static unsigned int meta_intc_startup_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + int thread = hard_processor_id(); + + /* Perform any necessary acking. */ + if (data->chip->irq_ack) + data->chip->irq_ack(data); + + /* Wire up this interrupt to the core with HWVECxEXT. */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Perform any necessary unmasking. */ + data->chip->irq_unmask(data); + + return 0; +} + +/** + * meta_intc_shutdown_irq() - turn off an external irq + * @data: data for the external irq to turn off + * + * Mask irq using the appropriate callback and stop muxing it onto TR2. + */ +static void meta_intc_shutdown_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + /* Mask the IRQ */ + data->chip->irq_mask(data); + + /* + * Disable the IRQ at the core by removing the interrupt from + * the HW vector mapping. + */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_ack_irq() - acknowledge an external irq + * @data: data for the external irq to ack + * + * Clear down an edge interrupt in the status register. + */ +static void meta_intc_ack_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + + /* Ack the int, if it is still 'on'. + * NOTE - this only works for edge triggered interrupts. + */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * record_irq_is_masked() - record the IRQ masked so it doesn't get handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is masked (by whichever + * callback is used). It records the IRQ masked so that it doesn't get handled + * if it still shows up in the status register. + */ +static void record_irq_is_masked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/** + * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled + * @data: data for the external irq to record + * + * This should get called whenever an external IRQ is unmasked (by whichever + * callback is used). It records the IRQ unmasked so that it gets handled if it + * shows up in the status register. + */ +static void record_irq_is_unmasked(struct irq_data *data) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw = data->hwirq; + + set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]); +} + +/* + * For use by wrapper IRQ drivers + */ + +/** + * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers + * @data: data for the external irq being masked + * + * This should be called by any wrapper IRQ driver mask functions. it doesn't do + * any masking but records the IRQ as masked so that the core code knows the + * mask has taken place. It is the callers responsibility to ensure that the IRQ + * won't trigger an interrupt to the core. + */ +void meta_intc_mask_irq_simple(struct irq_data *data) +{ + record_irq_is_masked(data); +} + +/** + * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers + * @data: data for the external irq being unmasked + * + * This should be called by any wrapper IRQ driver unmask functions. it doesn't + * do any unmasking but records the IRQ as unmasked so that the core code knows + * the unmask has taken place. It is the callers responsibility to ensure that + * the IRQ can now trigger an interrupt to the core. + */ +void meta_intc_unmask_irq_simple(struct irq_data *data) +{ + record_irq_is_unmasked(data); +} + + +/** + * meta_intc_mask_irq() - mask an external irq using HWMASKEXT + * @data: data for the external irq to mask + * + * This is a default implementation of a mask function which makes use of the + * HWMASKEXT registers available in newer versions. + * + * Earlier versions without these registers should use SoC level IRQ masking + * which call the meta_intc_*_simple() functions above, or if that isn't + * available should use the fallback meta_intc_*_nomask() functions below. + */ +static void meta_intc_mask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_masked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) & ~bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT + * @data: data for the external irq to unmask + * + * This is a default implementation of an unmask function which makes use of the + * HWMASKEXT registers available on new versions. It should be paired with + * meta_intc_mask_irq() above. + */ +static void meta_intc_unmask_irq(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *mask_addr = meta_intc_mask_addr(hw); + unsigned long flags; + + record_irq_is_unmasked(data); + + /* update the interrupt mask */ + __global_lock2(flags); + metag_out32(metag_in32(mask_addr) | bit, mask_addr); + __global_unlock2(flags); +} + +/** + * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring + * @data: data for the external irq to mask + * + * This is the version of the mask function for older versions which don't have + * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is + * unvectored from the core and retriggered if necessary later. + */ +static void meta_intc_mask_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + + record_irq_is_masked(data); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to edge interrupts. + */ +static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(stat_addr) & bit) { + metag_out32(bit, stat_addr); + while (!(metag_in32(stat_addr) & bit)) + metag_out32(bit, stat_addr); + } +} + +/** + * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring + * @data: data for the external irq to unmask + * + * This is the version of the unmask function for older versions which don't + * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the + * IRQ is revectored back to the core and retriggered if necessary. + * + * The retriggering done by this function is specific to level interrupts. + */ +static void meta_intc_unmask_level_irq_nomask(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *stat_addr = meta_intc_stat_addr(hw); + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int thread = hard_processor_id(); + + record_irq_is_unmasked(data); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + /* Re-trigger interrupt */ + /* Writing a 1 triggers interrupt */ + if (metag_in32(stat_addr) & bit) + metag_out32(bit, stat_addr); +} + +/** + * meta_intc_irq_set_type() - set the type of an external irq + * @data: data for the external irq to set the type of + * @flow_type: new irq flow type + * + * Set the flow type of an external interrupt. This updates the irq chip and irq + * handler depending on whether the irq is edge or level sensitive (the polarity + * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows + * when to trigger. + */ +static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type) +{ +#ifdef CONFIG_METAG_SUSPEND_MEM + struct meta_intc_priv *priv = &meta_intc_priv; +#endif + unsigned int irq = data->irq; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + unsigned long flags; + unsigned int level; + + /* update the chip/handler */ + if (flow_type & IRQ_TYPE_LEVEL_MASK) + __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip, + handle_level_irq, NULL); + else + __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip, + handle_edge_irq, NULL); + + /* and clear/set the bit in HWLEVELEXT */ + __global_lock2(flags); + level = metag_in32(level_addr); + if (flow_type & IRQ_TYPE_LEVEL_MASK) + level |= bit; + else + level &= ~bit; + metag_out32(level, level_addr); +#ifdef CONFIG_METAG_SUSPEND_MEM + priv->levels_altered[meta_intc_bank(hw)] |= bit; +#endif + __global_unlock2(flags); + + return 0; +} + +/** + * meta_intc_irq_demux() - external irq de-multiplexer + * @irq: the virtual interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is + * this function's job to demux this irq and figure out exactly which external + * irq needs servicing. + * + * Whilst using TR2 to detect external interrupts is a software convention it is + * (hopefully) unlikely to change. + */ +static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + irq_hw_number_t hw; + unsigned int bank, irq_no, status; + void __iomem *stat_addr = meta_intc_stat_addr(0); + + /* + * Locate which interrupt has caused our handler to run. + */ + for (bank = 0; bank < priv->nr_banks; ++bank) { + /* Which interrupts are currently pending in this bank? */ +recalculate: + status = metag_in32(stat_addr) & priv->unmasked[bank]; + + for (hw = bank*32; status; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual + * Linux IRQ number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off external interrupts that are + * registered to be handled by the kernel. + * Other external interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } + stat_addr += HWSTAT_STRIDE; + } +} + +#ifdef CONFIG_SMP +/** + * meta_intc_set_affinity() - set the affinity for an interrupt + * @data: data for the external irq to set the affinity of + * @cpumask: cpu mask representing cpus which can handle the interrupt + * @force: whether to force (ignored) + * + * Revector the specified external irq onto a specific cpu's TR2 trigger, so + * that that cpu tends to be the one who handles it. + */ +static int meta_intc_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = meta_intc_vec_addr(hw); + unsigned int cpu, thread; + + /* + * Wire up this interrupt from HWVECxEXT to the Meta core. + * + * Note that we can't wire up HWVECxEXT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); + + return 0; +} +#else +#define meta_intc_set_affinity NULL +#endif + +#ifdef CONFIG_PM_SLEEP +#define META_INTC_CHIP_FLAGS (IRQCHIP_MASK_ON_SUSPEND \ + | IRQCHIP_SKIP_SET_WAKE) +#else +#define META_INTC_CHIP_FLAGS 0 +#endif + +/* public edge/level irq chips which SoCs can override */ + +struct irq_chip meta_intc_edge_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_ack = meta_intc_ack_irq, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +struct irq_chip meta_intc_level_chip = { + .irq_startup = meta_intc_startup_irq, + .irq_shutdown = meta_intc_shutdown_irq, + .irq_set_type = meta_intc_irq_set_type, + .irq_mask = meta_intc_mask_irq, + .irq_unmask = meta_intc_unmask_irq, + .irq_set_affinity = meta_intc_set_affinity, + .flags = META_INTC_CHIP_FLAGS, +}; + +/** + * meta_intc_map() - map an external irq + * @d: irq domain of external trigger block + * @irq: virtual irq number + * @hw: hardware irq number within external trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured, using the HWLEVELEXT registers to determine + * edge/level flow type. These registers will have been set when the irq type is + * set (or set to a default at init time). + */ +static int meta_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + unsigned int bit = 1 << meta_intc_offset(hw); + void __iomem *level_addr = meta_intc_level_addr(hw); + + /* Go by the current sense in the HWLEVELEXT register */ + if (metag_in32(level_addr) & bit) + irq_set_chip_and_handler(irq, &meta_intc_level_chip, + handle_level_irq); + else + irq_set_chip_and_handler(irq, &meta_intc_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops meta_intc_domain_ops = { + .map = meta_intc_map, + .xlate = irq_domain_xlate_twocell, +}; + +#ifdef CONFIG_METAG_SUSPEND_MEM + +/** + * struct meta_intc_context - suspend context + * @levels: State of HWLEVELEXT registers + * @masks: State of HWMASKEXT registers + * @vectors: State of HWVECEXT registers + * @txvecint: State of TxVECINT registers + * + * This structure stores the IRQ state across suspend. + */ +struct meta_intc_context { + u32 levels[4]; + u32 masks[4]; + u8 vectors[4*32]; + + u8 txvecint[4][4]; +}; + +/* suspend context */ +static struct meta_intc_context *meta_intc_context; + +/** + * meta_intc_suspend() - store irq state + * + * To avoid interfering with other threads we only save the IRQ state of IRQs in + * use by Linux. + */ +static int meta_intc_suspend(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit; + + context = kzalloc(sizeof(*context), GFP_ATOMIC); + if (!context) + return -ENOMEM; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* save mapped irqs which are enabled or have actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* save trigger vector */ + context->vectors[hw] = metag_in32(vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + /* save level state if any IRQ levels altered */ + if (priv->levels_altered[bank]) + context->levels[bank] = metag_in32(level_addr); + /* save mask state if any IRQs in use */ + if (mask) + context->masks[bank] = metag_in32(mask_addr); + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* save trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) + context->txvecint[i][j] = metag_in32(T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + __global_unlock2(flags); + + meta_intc_context = context; + return 0; +} + +/** + * meta_intc_resume() - restore saved irq state + * + * Restore the saved IRQ state and drop it. + */ +static void meta_intc_resume(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + int i, j; + irq_hw_number_t hw; + unsigned int bank; + unsigned long flags; + struct meta_intc_context *context = meta_intc_context; + void __iomem *level_addr, *mask_addr, *vec_addr; + u32 mask, bit, tmp; + + meta_intc_context = NULL; + + hw = 0; + level_addr = meta_intc_level_addr(0); + mask_addr = meta_intc_mask_addr(0); + for (bank = 0; bank < priv->nr_banks; ++bank) { + vec_addr = meta_intc_vec_addr(hw); + + /* create mask of interrupts in use */ + mask = 0; + for (bit = 1; bit; bit <<= 1) { + i = irq_linear_revmap(priv->domain, hw); + /* restore mapped irqs, enabled or with actions */ + if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) || + irq_has_action(i))) { + mask |= bit; + + /* restore trigger vector */ + metag_out32(context->vectors[hw], vec_addr); + } + + ++hw; + vec_addr += HWVECnEXT_STRIDE; + } + + if (mask) { + /* restore mask state */ + __global_lock2(flags); + tmp = metag_in32(mask_addr); + tmp = (tmp & ~mask) | (context->masks[bank] & mask); + metag_out32(tmp, mask_addr); + __global_unlock2(flags); + } + + mask = priv->levels_altered[bank]; + if (mask) { + /* restore level state */ + __global_lock2(flags); + tmp = metag_in32(level_addr); + tmp = (tmp & ~mask) | (context->levels[bank] & mask); + metag_out32(tmp, level_addr); + __global_unlock2(flags); + } + + level_addr += HWSTAT_STRIDE; + mask_addr += HWSTAT_STRIDE; + } + + /* restore trigger matrixing */ + __global_lock2(flags); + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) { + metag_out32(context->txvecint[i][j], + T0VECINT_BHALT + + TnVECINT_STRIDE*i + + 8*j); + } + } + __global_unlock2(flags); + + kfree(context); +} + +static struct syscore_ops meta_intc_syscore_ops = { + .suspend = meta_intc_suspend, + .resume = meta_intc_resume, +}; + +static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv) +{ + register_syscore_ops(&meta_intc_syscore_ops); +} +#else +#define meta_intc_init_syscore_ops(priv) do {} while (0) +#endif + +/** + * meta_intc_init_cpu() - register with a Meta cpu + * @priv: private interrupt controller data + * @cpu: the CPU to register on + * + * Configure @cpu's TR2 irq so that we can demux external irqs. + */ +static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR2(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_chained_handler(irq, meta_intc_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * meta_intc_no_mask() - indicate lack of HWMASKEXT registers + * + * Called from SoC code (or init code below) to dynamically indicate the lack of + * HWMASKEXT registers (for example depending on some SoC revision register). + * This alters the irq mask and unmask callbacks to use the fallback + * unvectoring/retriggering technique instead of using HWMASKEXT registers. + */ +void __init meta_intc_no_mask(void) +{ + meta_intc_edge_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_edge_chip.irq_unmask = meta_intc_unmask_edge_irq_nomask; + meta_intc_level_chip.irq_mask = meta_intc_mask_irq_nomask; + meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask; +} + +/** + * init_external_IRQ() - initialise the external irq controller + * + * Set up the external irq controller using device tree properties. This is + * called from init_IRQ(). + */ +int __init init_external_IRQ(void) +{ + struct meta_intc_priv *priv = &meta_intc_priv; + struct device_node *node; + int ret, cpu; + u32 val; + bool no_masks = false; + + node = of_find_compatible_node(NULL, NULL, "img,meta-intc"); + if (!node) + return -ENOENT; + + /* Get number of banks */ + ret = of_property_read_u32(node, "num-banks", &val); + if (ret) { + pr_err("meta-intc: No num-banks property found\n"); + return ret; + } + if (val < 1 || val > 4) { + pr_err("meta-intc: num-banks (%u) out of range\n", val); + return -EINVAL; + } + priv->nr_banks = val; + + /* Are any mask registers present? */ + if (of_get_property(node, "no-mask", NULL)) + no_masks = true; + + /* No HWMASKEXT registers present? */ + if (no_masks) + meta_intc_no_mask(); + + /* Set up an IRQ domain */ + /* + * This is a legacy IRQ domain for now until all the platform setup code + * has been converted to devicetree. + */ + priv->domain = irq_domain_add_linear(node, priv->nr_banks*32, + &meta_intc_domain_ops, priv); + if (unlikely(!priv->domain)) { + pr_err("meta-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR2 for all cpus. */ + for_each_possible_cpu(cpu) + meta_intc_init_cpu(priv, cpu); + + /* Set up system suspend/resume callbacks */ + meta_intc_init_syscore_ops(priv); + + pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n", + priv->nr_banks*32); + + return 0; +} diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c new file mode 100644 index 0000000..8e94d7a --- /dev/null +++ b/drivers/irqchip/irq-metag.c @@ -0,0 +1,343 @@ +/* + * Meta internal (HWSTATMETA) interrupt code. + * + * Copyright (C) 2011-2012 Imagination Technologies Ltd. + * + * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c + * The code base could be generalised/merged as a lot of the functionality is + * similar. Until this is done, we try to keep the code simple here. + */ + +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/irqdomain.h> + +#include <asm/irq.h> +#include <asm/hwthread.h> + +#define PERF0VECINT 0x04820580 +#define PERF1VECINT 0x04820588 +#define PERF0TRIG_OFFSET 16 +#define PERF1TRIG_OFFSET 17 + +/** + * struct metag_internal_irq_priv - private meta internal interrupt data + * @domain: IRQ domain for all internal Meta IRQs (HWSTATMETA) + * @unmasked: Record of unmasked IRQs + */ +struct metag_internal_irq_priv { + struct irq_domain *domain; + + unsigned long unmasked; +}; + +/* Private data for the one and only internal interrupt controller */ +static struct metag_internal_irq_priv metag_internal_irq_priv; + +static unsigned int metag_internal_irq_startup(struct irq_data *data); +static void metag_internal_irq_shutdown(struct irq_data *data); +static void metag_internal_irq_ack(struct irq_data *data); +static void metag_internal_irq_mask(struct irq_data *data); +static void metag_internal_irq_unmask(struct irq_data *data); +#ifdef CONFIG_SMP +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force); +#endif + +static struct irq_chip internal_irq_edge_chip = { + .name = "HWSTATMETA-IRQ", + .irq_startup = metag_internal_irq_startup, + .irq_shutdown = metag_internal_irq_shutdown, + .irq_ack = metag_internal_irq_ack, + .irq_mask = metag_internal_irq_mask, + .irq_unmask = metag_internal_irq_unmask, +#ifdef CONFIG_SMP + .irq_set_affinity = metag_internal_irq_set_affinity, +#endif +}; + +/* + * metag_hwvec_addr - get the address of *VECINT regs of irq + * + * This function is a table of supported triggers on HWSTATMETA + * Could do with a structure, but better keep it simple. Changes + * in this code should be rare. + */ +static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw) +{ + void __iomem *addr; + + switch (hw) { + case PERF0TRIG_OFFSET: + addr = (void __iomem *)PERF0VECINT; + break; + case PERF1TRIG_OFFSET: + addr = (void __iomem *)PERF1VECINT; + break; + default: + addr = NULL; + break; + } + return addr; +} + +/* + * metag_internal_startup - setup an internal irq + * @irq: the irq to startup + * + * Multiplex interrupts for @irq onto TR1. Clear any pending + * interrupts. + */ +static unsigned int metag_internal_irq_startup(struct irq_data *data) +{ + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); + + /* Enable the interrupt by unmasking it */ + metag_internal_irq_unmask(data); + + return 0; +} + +/* + * metag_internal_irq_shutdown - turn off the irq + * @irq: the irq number to turn off + * + * Mask @irq and clear any pending interrupts. + * Stop muxing @irq onto TR1. + */ +static void metag_internal_irq_shutdown(struct irq_data *data) +{ + /* Disable the IRQ at the core by masking it. */ + metag_internal_irq_mask(data); + + /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */ + metag_internal_irq_ack(data); +} + +/* + * metag_internal_irq_ack - acknowledge irq + * @irq: the irq to ack + */ +static void metag_internal_irq_ack(struct irq_data *data) +{ + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + + if (metag_in32(HWSTATMETA) & bit) + metag_out32(bit, HWSTATMETA); +} + +/** + * metag_internal_irq_mask() - mask an internal irq by unvectoring + * @data: data for the internal irq to mask + * + * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core + * and retriggered if necessary later. + */ +static void metag_internal_irq_mask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + void __iomem *vec_addr = metag_hwvec_addr(hw); + + clear_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so unvector the interrupt */ + metag_out32(0, vec_addr); +} + +/** + * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring + * @data: data for the internal irq to unmask + * + * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the + * core and retriggered if necessary. + */ +static void metag_internal_irq_unmask(struct irq_data *data) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + irq_hw_number_t hw = data->hwirq; + unsigned int bit = 1 << hw; + void __iomem *vec_addr = metag_hwvec_addr(hw); + unsigned int thread = hard_processor_id(); + + set_bit(hw, &priv->unmasked); + + /* there is no interrupt mask, so revector the interrupt */ + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr); + + /* + * Re-trigger interrupt + * + * Writing a 1 toggles, and a 0->1 transition triggers. We only + * retrigger if the status bit is already set, which means we + * need to clear it first. Retriggering is fundamentally racy + * because if the interrupt fires again after we clear it we + * could end up clearing it again and the interrupt handler + * thinking it hasn't fired. Therefore we need to keep trying to + * retrigger until the bit is set. + */ + if (metag_in32(HWSTATMETA) & bit) { + metag_out32(bit, HWSTATMETA); + while (!(metag_in32(HWSTATMETA) & bit)) + metag_out32(bit, HWSTATMETA); + } +} + +#ifdef CONFIG_SMP +/* + * metag_internal_irq_set_affinity - set the affinity for an interrupt + */ +static int metag_internal_irq_set_affinity(struct irq_data *data, + const struct cpumask *cpumask, bool force) +{ + unsigned int cpu, thread; + irq_hw_number_t hw = data->hwirq; + /* + * Wire up this interrupt from *VECINT to the Meta core. + * + * Note that we can't wire up *VECINT to interrupt more than + * one cpu (the interrupt code doesn't support it), so we just + * pick the first cpu we find in 'cpumask'. + */ + cpu = cpumask_any(cpumask); + thread = cpu_2_hwthread_id[cpu]; + + metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), + metag_hwvec_addr(hw)); + + return 0; +} +#endif + +/* + * metag_internal_irq_demux - irq de-multiplexer + * @irq: the interrupt number + * @desc: the interrupt description structure for this irq + * + * The cpu receives an interrupt on TR1 when an interrupt has + * occurred. It is this function's job to demux this irq and + * figure out exactly which trigger needs servicing. + */ +static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc) +{ + struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc); + irq_hw_number_t hw; + unsigned int irq_no; + u32 status; + +recalculate: + status = metag_in32(HWSTATMETA) & priv->unmasked; + + for (hw = 0; status != 0; status >>= 1, ++hw) { + if (status & 0x1) { + /* + * Map the hardware IRQ number to a virtual Linux IRQ + * number. + */ + irq_no = irq_linear_revmap(priv->domain, hw); + + /* + * Only fire off interrupts that are + * registered to be handled by the kernel. + * Other interrupts are probably being + * handled by other Meta hardware threads. + */ + generic_handle_irq(irq_no); + + /* + * The handler may have re-enabled interrupts + * which could have caused a nested invocation + * of this code and make the copy of the + * status register we are using invalid. + */ + goto recalculate; + } + } +} + +/** + * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number. + * @hw: Number of the internal IRQ. Must be in range. + * + * Returns: The virtual IRQ number of the Meta internal IRQ specified by + * @hw. + */ +int internal_irq_map(unsigned int hw) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + if (!priv->domain) + return -ENODEV; + return irq_create_mapping(priv->domain, hw); +} + +/** + * metag_internal_irq_init_cpu - regsister with the Meta cpu + * @cpu: the CPU to register on + * + * Configure @cpu's TR1 irq so that we can demux irqs. + */ +static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv, + int cpu) +{ + unsigned int thread = cpu_2_hwthread_id[cpu]; + unsigned int signum = TBID_SIGNUM_TR1(thread); + int irq = tbisig_map(signum); + + /* Register the multiplexed IRQ handler */ + irq_set_handler_data(irq, priv); + irq_set_chained_handler(irq, metag_internal_irq_demux); + irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW); +} + +/** + * metag_internal_intc_map() - map an internal irq + * @d: irq domain of internal trigger block + * @irq: virtual irq number + * @hw: hardware irq number within internal trigger block + * + * This sets up a virtual irq for a specified hardware interrupt. The irq chip + * and handler is configured. + */ +static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* only register interrupt if it is mapped */ + if (!metag_hwvec_addr(hw)) + return -EINVAL; + + irq_set_chip_and_handler(irq, &internal_irq_edge_chip, + handle_edge_irq); + return 0; +} + +static const struct irq_domain_ops metag_internal_intc_domain_ops = { + .map = metag_internal_intc_map, +}; + +/** + * metag_internal_irq_register - register internal IRQs + * + * Register the irq chip and handler function for all internal IRQs + */ +int __init init_internal_IRQ(void) +{ + struct metag_internal_irq_priv *priv = &metag_internal_irq_priv; + unsigned int cpu; + + /* Set up an IRQ domain */ + priv->domain = irq_domain_add_linear(NULL, 32, + &metag_internal_intc_domain_ops, + priv); + if (unlikely(!priv->domain)) { + pr_err("meta-internal-intc: cannot add IRQ domain\n"); + return -ENOMEM; + } + + /* Setup TR1 for all cpus. */ + for_each_possible_cpu(cpu) + metag_internal_irq_init_cpu(priv, cpu); + + return 0; +}; diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 91a02ee..e30b490 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -210,7 +210,7 @@ config DM_DEBUG config DM_BUFIO tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM ---help--- This interface allows you to do buffered I/O on a device and acts as a cache, holding recently-read blocks in memory and performing @@ -218,7 +218,7 @@ config DM_BUFIO config DM_BIO_PRISON tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM ---help--- Some bio locking schemes used by other device-mapper targets including thin provisioning. @@ -251,8 +251,8 @@ config DM_SNAPSHOT Allow volume managers to take writable snapshots of a device. config DM_THIN_PROVISIONING - tristate "Thin provisioning target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Thin provisioning target" + depends on BLK_DEV_DM select DM_PERSISTENT_DATA select DM_BIO_PRISON ---help--- @@ -268,6 +268,37 @@ config DM_DEBUG_BLOCK_STACK_TRACING If unsure, say N. +config DM_CACHE + tristate "Cache target (EXPERIMENTAL)" + depends on BLK_DEV_DM + default n + select DM_PERSISTENT_DATA + select DM_BIO_PRISON + ---help--- + dm-cache attempts to improve performance of a block device by + moving frequently used data to a smaller, higher performance + device. Different 'policy' plugins can be used to change the + algorithms used to select which blocks are promoted, demoted, + cleaned etc. It supports writeback and writethrough modes. + +config DM_CACHE_MQ + tristate "MQ Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE + default y + ---help--- + A cache policy that uses a multiqueue ordered by recent hit + count to select which blocks should be promoted and demoted. + This is meant to be a general purpose policy. It prioritises + reads over writes. + +config DM_CACHE_CLEANER + tristate "Cleaner Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE + default y + ---help--- + A simple cache policy that writes back all data to the + origin. Used when decommissioning a dm-cache. + config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM @@ -302,8 +333,8 @@ config DM_RAID in one of the available parity distribution methods. config DM_LOG_USERSPACE - tristate "Mirror userspace logging (EXPERIMENTAL)" - depends on DM_MIRROR && EXPERIMENTAL && NET + tristate "Mirror userspace logging" + depends on DM_MIRROR && NET select CONNECTOR ---help--- The userspace logging module provides a mechanism for @@ -350,8 +381,8 @@ config DM_MULTIPATH_ST If unsure, say N. config DM_DELAY - tristate "I/O delaying target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "I/O delaying target" + depends on BLK_DEV_DM ---help--- A target that delays reads and/or writes and can send them to different devices. Useful for testing. @@ -365,14 +396,14 @@ config DM_UEVENT Generate udev events for DM events. config DM_FLAKEY - tristate "Flakey target (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Flakey target" + depends on BLK_DEV_DM ---help--- A target that intermittently fails I/O for debugging purposes. config DM_VERITY - tristate "Verity target support (EXPERIMENTAL)" - depends on BLK_DEV_DM && EXPERIMENTAL + tristate "Verity target support" + depends on BLK_DEV_DM select CRYPTO select CRYPTO_HASH select DM_BUFIO diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 94dce8b..7ceeaef 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -11,6 +11,9 @@ dm-mirror-y += dm-raid1.o dm-log-userspace-y \ += dm-log-userspace-base.o dm-log-userspace-transfer.o dm-thin-pool-y += dm-thin.o dm-thin-metadata.o +dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o +dm-cache-mq-y += dm-cache-policy-mq.o +dm-cache-cleaner-y += dm-cache-policy-cleaner.o md-mod-y += md.o bitmap.o raid456-y += raid5.o @@ -44,6 +47,9 @@ obj-$(CONFIG_DM_ZERO) += dm-zero.o obj-$(CONFIG_DM_RAID) += dm-raid.o obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o obj-$(CONFIG_DM_VERITY) += dm-verity.o +obj-$(CONFIG_DM_CACHE) += dm-cache.o +obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o +obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o ifeq ($(CONFIG_DM_UEVENT),y) dm-mod-objs += dm-uevent.o diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index d9d3f1c..85f0b70 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -14,14 +14,6 @@ /*----------------------------------------------------------------*/ -struct dm_bio_prison_cell { - struct hlist_node list; - struct dm_bio_prison *prison; - struct dm_cell_key key; - struct bio *holder; - struct bio_list bios; -}; - struct dm_bio_prison { spinlock_t lock; mempool_t *cell_pool; @@ -87,6 +79,19 @@ void dm_bio_prison_destroy(struct dm_bio_prison *prison) } EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp) +{ + return mempool_alloc(prison->cell_pool, gfp); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell); + +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) +{ + mempool_free(cell, prison->cell_pool); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell); + static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) { const unsigned long BIG_PRIME = 4294967291UL; @@ -114,91 +119,95 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, return NULL; } -/* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. - * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. - */ -int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref) +static void __setup_new_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *holder, + uint32_t hash, + struct dm_bio_prison_cell *cell) { - int r = 1; - unsigned long flags; - uint32_t hash = hash_key(prison, key); - struct dm_bio_prison_cell *cell, *cell2; - - BUG_ON(hash > prison->nr_buckets); - - spin_lock_irqsave(&prison->lock, flags); - - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - bio_list_add(&cell->bios, inmate); - goto out; - } + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = holder; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); +} - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); +static int __bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + uint32_t hash = hash_key(prison, key); + struct dm_bio_prison_cell *cell; - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ cell = __search_bucket(prison->cells + hash, key); if (cell) { - mempool_free(cell2, prison->cell_pool); - bio_list_add(&cell->bios, inmate); - goto out; + if (inmate) + bio_list_add(&cell->bios, inmate); + *cell_result = cell; + return 1; } - /* - * Use new cell. - */ - cell = cell2; - - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = inmate; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); + __setup_new_cell(prison, key, inmate, hash, cell_prealloc); + *cell_result = cell_prealloc; + return 0; +} - r = 0; +static int bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + int r; + unsigned long flags; -out: + spin_lock_irqsave(&prison->lock, flags); + r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); spin_unlock_irqrestore(&prison->lock, flags); - *ref = cell; - return r; } + +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, inmate, cell_prealloc, cell_result); +} EXPORT_SYMBOL_GPL(dm_bio_detain); +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result) +{ + return bio_detain(prison, key, NULL, cell_prealloc, cell_result); +} +EXPORT_SYMBOL_GPL(dm_get_cell); + /* * @inmates must have been initialised prior to this call */ -static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +static void __cell_release(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { - struct dm_bio_prison *prison = cell->prison; - hlist_del(&cell->list); if (inmates) { - bio_list_add(inmates, cell->holder); + if (cell->holder) + bio_list_add(inmates, cell->holder); bio_list_merge(inmates, &cell->bios); } - - mempool_free(cell, prison->cell_pool); } -void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) { unsigned long flags; - struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release(cell, bios); @@ -209,20 +218,18 @@ EXPORT_SYMBOL_GPL(dm_cell_release); /* * Sometimes we don't want the holder, just the additional bios. */ -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { - struct dm_bio_prison *prison = cell->prison; - hlist_del(&cell->list); bio_list_merge(inmates, &cell->bios); - - mempool_free(cell, prison->cell_pool); } -void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { unsigned long flags; - struct dm_bio_prison *prison = cell->prison; spin_lock_irqsave(&prison->lock, flags); __cell_release_no_holder(cell, inmates); @@ -230,9 +237,9 @@ void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list } EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); -void dm_cell_error(struct dm_bio_prison_cell *cell) +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell) { - struct dm_bio_prison *prison = cell->prison; struct bio_list bios; struct bio *bio; unsigned long flags; diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 53d1a7a..3f83319 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -22,7 +22,6 @@ * subsequently unlocked the bios become available. */ struct dm_bio_prison; -struct dm_bio_prison_cell; /* FIXME: this needs to be more abstract */ struct dm_cell_key { @@ -31,21 +30,62 @@ struct dm_cell_key { dm_block_t block; }; +/* + * Treat this as opaque, only in header so callers can manage allocation + * themselves. + */ +struct dm_bio_prison_cell { + struct hlist_node list; + struct dm_cell_key key; + struct bio *holder; + struct bio_list bios; +}; + struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); void dm_bio_prison_destroy(struct dm_bio_prison *prison); /* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. + * These two functions just wrap a mempool. This is a transitory step: + * Eventually all bio prison clients should manage their own cell memory. * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called + * in interrupt context or passed GFP_NOWAIT. */ -int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref); +struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, + gfp_t gfp); +void dm_bio_prison_free_cell(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); -void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); -void dm_cell_error(struct dm_bio_prison_cell *cell); +/* + * Creates, or retrieves a cell for the given key. + * + * Returns 1 if pre-existing cell returned, zero if new cell created using + * @cell_prealloc. + */ +int dm_get_cell(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +/* + * An atomic op that combines retrieving a cell, and adding a bio to it. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, + struct dm_cell_key *key, + struct bio *inmate, + struct dm_bio_prison_cell *cell_prealloc, + struct dm_bio_prison_cell **cell_result); + +void dm_cell_release(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *bios); +void dm_cell_release_no_holder(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell, + struct bio_list *inmates); +void dm_cell_error(struct dm_bio_prison *prison, + struct dm_bio_prison_cell *cell); /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 93205e3..3c955e1 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1192,7 +1192,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { - .bi_rw = REQ_FLUSH, + .bi_rw = WRITE_FLUSH, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h new file mode 100644 index 0000000..bed4ad4 --- /dev/null +++ b/drivers/md/dm-cache-block-types.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_BLOCK_TYPES_H +#define DM_CACHE_BLOCK_TYPES_H + +#include "persistent-data/dm-block-manager.h" + +/*----------------------------------------------------------------*/ + +/* + * It's helpful to get sparse to differentiate between indexes into the + * origin device, indexes into the cache device, and indexes into the + * discard bitset. + */ + +typedef dm_block_t __bitwise__ dm_oblock_t; +typedef uint32_t __bitwise__ dm_cblock_t; +typedef dm_block_t __bitwise__ dm_dblock_t; + +static inline dm_oblock_t to_oblock(dm_block_t b) +{ + return (__force dm_oblock_t) b; +} + +static inline dm_block_t from_oblock(dm_oblock_t b) +{ + return (__force dm_block_t) b; +} + +static inline dm_cblock_t to_cblock(uint32_t b) +{ + return (__force dm_cblock_t) b; +} + +static inline uint32_t from_cblock(dm_cblock_t b) +{ + return (__force uint32_t) b; +} + +static inline dm_dblock_t to_dblock(dm_block_t b) +{ + return (__force dm_dblock_t) b; +} + +static inline dm_block_t from_dblock(dm_dblock_t b) +{ + return (__force dm_block_t) b; +} + +#endif /* DM_CACHE_BLOCK_TYPES_H */ diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c new file mode 100644 index 0000000..fbd3625 --- /dev/null +++ b/drivers/md/dm-cache-metadata.c @@ -0,0 +1,1146 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-cache-metadata.h" + +#include "persistent-data/dm-array.h" +#include "persistent-data/dm-bitset.h" +#include "persistent-data/dm-space-map.h" +#include "persistent-data/dm-space-map-disk.h" +#include "persistent-data/dm-transaction-manager.h" + +#include <linux/device-mapper.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache metadata" + +#define CACHE_SUPERBLOCK_MAGIC 06142003 +#define CACHE_SUPERBLOCK_LOCATION 0 +#define CACHE_VERSION 1 +#define CACHE_METADATA_CACHE_SIZE 64 + +/* + * 3 for btree insert + + * 2 for btree lookup used within space map + */ +#define CACHE_MAX_CONCURRENT_LOCKS 5 +#define SPACE_MAP_ROOT_SIZE 128 + +enum superblock_flag_bits { + /* for spotting crashes that would invalidate the dirty bitset */ + CLEAN_SHUTDOWN, +}; + +/* + * Each mapping from cache block -> origin block carries a set of flags. + */ +enum mapping_bits { + /* + * A valid mapping. Because we're using an array we clear this + * flag for an non existant mapping. + */ + M_VALID = 1, + + /* + * The data on the cache is different from that on the origin. + */ + M_DIRTY = 2 +}; + +struct cache_disk_superblock { + __le32 csum; + __le32 flags; + __le64 blocknr; + + __u8 uuid[16]; + __le64 magic; + __le32 version; + + __u8 policy_name[CACHE_POLICY_NAME_SIZE]; + __le32 policy_hint_size; + + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; + __le64 mapping_root; + __le64 hint_root; + + __le64 discard_root; + __le64 discard_block_size; + __le64 discard_nr_blocks; + + __le32 data_block_size; + __le32 metadata_block_size; + __le32 cache_blocks; + + __le32 compat_flags; + __le32 compat_ro_flags; + __le32 incompat_flags; + + __le32 read_hits; + __le32 read_misses; + __le32 write_hits; + __le32 write_misses; +} __packed; + +struct dm_cache_metadata { + struct block_device *bdev; + struct dm_block_manager *bm; + struct dm_space_map *metadata_sm; + struct dm_transaction_manager *tm; + + struct dm_array_info info; + struct dm_array_info hint_info; + struct dm_disk_bitset discard_info; + + struct rw_semaphore root_lock; + dm_block_t root; + dm_block_t hint_root; + dm_block_t discard_root; + + sector_t discard_block_size; + dm_dblock_t discard_nr_blocks; + + sector_t data_block_size; + dm_cblock_t cache_blocks; + bool changed:1; + bool clean_when_opened:1; + + char policy_name[CACHE_POLICY_NAME_SIZE]; + size_t policy_hint_size; + struct dm_cache_statistics stats; +}; + +/*------------------------------------------------------------------- + * superblock validator + *-----------------------------------------------------------------*/ + +#define SUPERBLOCK_CSUM_XOR 9031977 + +static void sb_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t sb_block_size) +{ + struct cache_disk_superblock *disk_super = dm_block_data(b); + + disk_super->blocknr = cpu_to_le64(dm_block_location(b)); + disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags, + sb_block_size - sizeof(__le32), + SUPERBLOCK_CSUM_XOR)); +} + +static int sb_check(struct dm_block_validator *v, + struct dm_block *b, + size_t sb_block_size) +{ + struct cache_disk_superblock *disk_super = dm_block_data(b); + __le32 csum_le; + + if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) { + DMERR("sb_check failed: blocknr %llu: wanted %llu", + le64_to_cpu(disk_super->blocknr), + (unsigned long long)dm_block_location(b)); + return -ENOTBLK; + } + + if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) { + DMERR("sb_check failed: magic %llu: wanted %llu", + le64_to_cpu(disk_super->magic), + (unsigned long long)CACHE_SUPERBLOCK_MAGIC); + return -EILSEQ; + } + + csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags, + sb_block_size - sizeof(__le32), + SUPERBLOCK_CSUM_XOR)); + if (csum_le != disk_super->csum) { + DMERR("sb_check failed: csum %u: wanted %u", + le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum)); + return -EILSEQ; + } + + return 0; +} + +static struct dm_block_validator sb_validator = { + .name = "superblock", + .prepare_for_write = sb_prepare_for_write, + .check = sb_check +}; + +/*----------------------------------------------------------------*/ + +static int superblock_read_lock(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock_zero(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_cache_metadata *cmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +/*----------------------------------------------------------------*/ + +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) +{ + int r; + unsigned i; + struct dm_block *b; + __le64 *data_le, zero = cpu_to_le64(0); + unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); + + /* + * We can't use a validator here - it may be all zeroes. + */ + r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b); + if (r) + return r; + + data_le = dm_block_data(b); + *result = 1; + for (i = 0; i < sb_block_size; i++) { + if (data_le[i] != zero) { + *result = 0; + break; + } + } + + return dm_bm_unlock(b); +} + +static void __setup_mapping_info(struct dm_cache_metadata *cmd) +{ + struct dm_btree_value_type vt; + + vt.context = NULL; + vt.size = sizeof(__le64); + vt.inc = NULL; + vt.dec = NULL; + vt.equal = NULL; + dm_array_info_init(&cmd->info, cmd->tm, &vt); + + if (cmd->policy_hint_size) { + vt.size = sizeof(__le32); + dm_array_info_init(&cmd->hint_info, cmd->tm, &vt); + } +} + +static int __write_initial_superblock(struct dm_cache_metadata *cmd) +{ + int r; + struct dm_block *sblock; + size_t metadata_len; + struct cache_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT; + + /* FIXME: see if we can lose the max sectors limit */ + if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS) + bdev_size = DM_CACHE_METADATA_MAX_SECTORS; + + r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = dm_tm_pre_commit(cmd->tm); + if (r < 0) + return r; + + r = superblock_lock_zero(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); + disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(CACHE_VERSION); + memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE); + disk_super->policy_hint_size = 0; + + r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) + goto bad_locked; + + disk_super->mapping_root = cpu_to_le64(cmd->root); + disk_super->hint_root = cpu_to_le64(cmd->hint_root); + disk_super->discard_root = cpu_to_le64(cmd->discard_root); + disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); + disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); + disk_super->cache_blocks = cpu_to_le32(0); + memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name)); + + disk_super->read_hits = cpu_to_le32(0); + disk_super->read_misses = cpu_to_le32(0); + disk_super->write_hits = cpu_to_le32(0); + disk_super->write_misses = cpu_to_le32(0); + + return dm_tm_commit(cmd->tm, sblock); + +bad_locked: + dm_bm_unlock(sblock); + return r; +} + +static int __format_metadata(struct dm_cache_metadata *cmd) +{ + int r; + + r = dm_tm_create_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + &cmd->tm, &cmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } + + __setup_mapping_info(cmd); + + r = dm_array_empty(&cmd->info, &cmd->root); + if (r < 0) + goto bad; + + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); + + r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root); + if (r < 0) + goto bad; + + cmd->discard_block_size = 0; + cmd->discard_nr_blocks = 0; + + r = __write_initial_superblock(cmd); + if (r) + goto bad; + + cmd->clean_when_opened = true; + return 0; + +bad: + dm_tm_destroy(cmd->tm); + dm_sm_destroy(cmd->metadata_sm); + + return r; +} + +static int __check_incompat_features(struct cache_disk_superblock *disk_super, + struct dm_cache_metadata *cmd) +{ + uint32_t features; + + features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP; + if (features) { + DMERR("could not access metadata due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + /* + * Check for read-only metadata to skip the following RDWR checks. + */ + if (get_disk_ro(cmd->bdev->bd_disk)) + return 0; + + features = le32_to_cpu(disk_super->compat_ro_flags) & ~DM_CACHE_FEATURE_COMPAT_RO_SUPP; + if (features) { + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", + (unsigned long)features); + return -EINVAL; + } + + return 0; +} + +static int __open_metadata(struct dm_cache_metadata *cmd) +{ + int r; + struct dm_block *sblock; + struct cache_disk_superblock *disk_super; + unsigned long sb_flags; + + r = superblock_read_lock(cmd, &sblock); + if (r < 0) { + DMERR("couldn't read lock superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + + r = __check_incompat_features(disk_super, cmd); + if (r < 0) + goto bad; + + r = dm_tm_open_with_sm(cmd->bm, CACHE_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &cmd->tm, &cmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + goto bad; + } + + __setup_mapping_info(cmd); + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); + sb_flags = le32_to_cpu(disk_super->flags); + cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); + return dm_bm_unlock(sblock); + +bad: + dm_bm_unlock(sblock); + return r; +} + +static int __open_or_format_metadata(struct dm_cache_metadata *cmd, + bool format_device) +{ + int r, unformatted; + + r = __superblock_all_zeroes(cmd->bm, &unformatted); + if (r) + return r; + + if (unformatted) + return format_device ? __format_metadata(cmd) : -EPERM; + + return __open_metadata(cmd); +} + +static int __create_persistent_data_objects(struct dm_cache_metadata *cmd, + bool may_format_device) +{ + int r; + cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, + CACHE_METADATA_CACHE_SIZE, + CACHE_MAX_CONCURRENT_LOCKS); + if (IS_ERR(cmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(cmd->bm); + } + + r = __open_or_format_metadata(cmd, may_format_device); + if (r) + dm_block_manager_destroy(cmd->bm); + + return r; +} + +static void __destroy_persistent_data_objects(struct dm_cache_metadata *cmd) +{ + dm_sm_destroy(cmd->metadata_sm); + dm_tm_destroy(cmd->tm); + dm_block_manager_destroy(cmd->bm); +} + +typedef unsigned long (*flags_mutator)(unsigned long); + +static void update_flags(struct cache_disk_superblock *disk_super, + flags_mutator mutator) +{ + uint32_t sb_flags = mutator(le32_to_cpu(disk_super->flags)); + disk_super->flags = cpu_to_le32(sb_flags); +} + +static unsigned long set_clean_shutdown(unsigned long flags) +{ + set_bit(CLEAN_SHUTDOWN, &flags); + return flags; +} + +static unsigned long clear_clean_shutdown(unsigned long flags) +{ + clear_bit(CLEAN_SHUTDOWN, &flags); + return flags; +} + +static void read_superblock_fields(struct dm_cache_metadata *cmd, + struct cache_disk_superblock *disk_super) +{ + cmd->root = le64_to_cpu(disk_super->mapping_root); + cmd->hint_root = le64_to_cpu(disk_super->hint_root); + cmd->discard_root = le64_to_cpu(disk_super->discard_root); + cmd->discard_block_size = le64_to_cpu(disk_super->discard_block_size); + cmd->discard_nr_blocks = to_dblock(le64_to_cpu(disk_super->discard_nr_blocks)); + cmd->data_block_size = le32_to_cpu(disk_super->data_block_size); + cmd->cache_blocks = to_cblock(le32_to_cpu(disk_super->cache_blocks)); + strncpy(cmd->policy_name, disk_super->policy_name, sizeof(cmd->policy_name)); + cmd->policy_hint_size = le32_to_cpu(disk_super->policy_hint_size); + + cmd->stats.read_hits = le32_to_cpu(disk_super->read_hits); + cmd->stats.read_misses = le32_to_cpu(disk_super->read_misses); + cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits); + cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses); + + cmd->changed = false; +} + +/* + * The mutator updates the superblock flags. + */ +static int __begin_transaction_flags(struct dm_cache_metadata *cmd, + flags_mutator mutator) +{ + int r; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + r = superblock_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + update_flags(disk_super, mutator); + read_superblock_fields(cmd, disk_super); + + return dm_bm_flush_and_unlock(cmd->bm, sblock); +} + +static int __begin_transaction(struct dm_cache_metadata *cmd) +{ + int r; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + /* + * We re-read the superblock every time. Shouldn't need to do this + * really. + */ + r = superblock_read_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + read_superblock_fields(cmd, disk_super); + dm_bm_unlock(sblock); + + return 0; +} + +static int __commit_transaction(struct dm_cache_metadata *cmd, + flags_mutator mutator) +{ + int r; + size_t metadata_len; + struct cache_disk_superblock *disk_super; + struct dm_block *sblock; + + /* + * We need to know if the cache_disk_superblock exceeds a 512-byte sector. + */ + BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512); + + r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, + &cmd->discard_root); + if (r) + return r; + + r = dm_tm_pre_commit(cmd->tm); + if (r < 0) + return r; + + r = dm_sm_root_size(cmd->metadata_sm, &metadata_len); + if (r < 0) + return r; + + r = superblock_lock(cmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + + if (mutator) + update_flags(disk_super, mutator); + + disk_super->mapping_root = cpu_to_le64(cmd->root); + disk_super->hint_root = cpu_to_le64(cmd->hint_root); + disk_super->discard_root = cpu_to_le64(cmd->discard_root); + disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); + disk_super->discard_nr_blocks = cpu_to_le64(from_dblock(cmd->discard_nr_blocks)); + disk_super->cache_blocks = cpu_to_le32(from_cblock(cmd->cache_blocks)); + strncpy(disk_super->policy_name, cmd->policy_name, sizeof(disk_super->policy_name)); + + disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); + disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); + disk_super->write_hits = cpu_to_le32(cmd->stats.write_hits); + disk_super->write_misses = cpu_to_le32(cmd->stats.write_misses); + + r = dm_sm_copy_root(cmd->metadata_sm, &disk_super->metadata_space_map_root, + metadata_len); + if (r < 0) { + dm_bm_unlock(sblock); + return r; + } + + return dm_tm_commit(cmd->tm, sblock); +} + +/*----------------------------------------------------------------*/ + +/* + * The mappings are held in a dm-array that has 64-bit values stored in + * little-endian format. The index is the cblock, the high 48bits of the + * value are the oblock and the low 16 bit the flags. + */ +#define FLAGS_MASK ((1 << 16) - 1) + +static __le64 pack_value(dm_oblock_t block, unsigned flags) +{ + uint64_t value = from_oblock(block); + value <<= 16; + value = value | (flags & FLAGS_MASK); + return cpu_to_le64(value); +} + +static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags) +{ + uint64_t value = le64_to_cpu(value_le); + uint64_t b = value >> 16; + *block = to_oblock(b); + *flags = value & FLAGS_MASK; +} + +/*----------------------------------------------------------------*/ + +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size) +{ + int r; + struct dm_cache_metadata *cmd; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) { + DMERR("could not allocate metadata struct"); + return NULL; + } + + init_rwsem(&cmd->root_lock); + cmd->bdev = bdev; + cmd->data_block_size = data_block_size; + cmd->cache_blocks = 0; + cmd->policy_hint_size = policy_hint_size; + cmd->changed = true; + + r = __create_persistent_data_objects(cmd, may_format_device); + if (r) { + kfree(cmd); + return ERR_PTR(r); + } + + r = __begin_transaction_flags(cmd, clear_clean_shutdown); + if (r < 0) { + dm_cache_metadata_close(cmd); + return ERR_PTR(r); + } + + return cmd; +} + +void dm_cache_metadata_close(struct dm_cache_metadata *cmd) +{ + __destroy_persistent_data_objects(cmd); + kfree(cmd); +} + +int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) +{ + int r; + __le64 null_mapping = pack_value(0, 0); + + down_write(&cmd->root_lock); + __dm_bless_for_disk(&null_mapping); + r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), + from_cblock(new_cache_size), + &null_mapping, &cmd->root); + if (!r) + cmd->cache_blocks = new_cache_size; + cmd->changed = true; + up_write(&cmd->root_lock); + + return r; +} + +int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, + sector_t discard_block_size, + dm_dblock_t new_nr_entries) +{ + int r; + + down_write(&cmd->root_lock); + r = dm_bitset_resize(&cmd->discard_info, + cmd->discard_root, + from_dblock(cmd->discard_nr_blocks), + from_dblock(new_nr_entries), + false, &cmd->discard_root); + if (!r) { + cmd->discard_block_size = discard_block_size; + cmd->discard_nr_blocks = new_nr_entries; + } + + cmd->changed = true; + up_write(&cmd->root_lock); + + return r; +} + +static int __set_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) +{ + return dm_bitset_set_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root); +} + +static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b) +{ + return dm_bitset_clear_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root); +} + +static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b, + bool *is_discarded) +{ + return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root, + from_dblock(b), &cmd->discard_root, + is_discarded); +} + +static int __discard(struct dm_cache_metadata *cmd, + dm_dblock_t dblock, bool discard) +{ + int r; + + r = (discard ? __set_discard : __clear_discard)(cmd, dblock); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_set_discard(struct dm_cache_metadata *cmd, + dm_dblock_t dblock, bool discard) +{ + int r; + + down_write(&cmd->root_lock); + r = __discard(cmd, dblock, discard); + up_write(&cmd->root_lock); + + return r; +} + +static int __load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context) +{ + int r = 0; + dm_block_t b; + bool discard; + + for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) { + dm_dblock_t dblock = to_dblock(b); + + if (cmd->clean_when_opened) { + r = __is_discarded(cmd, dblock, &discard); + if (r) + return r; + } else + discard = false; + + r = fn(context, cmd->discard_block_size, dblock, discard); + if (r) + break; + } + + return r; +} + +int dm_cache_load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context) +{ + int r; + + down_read(&cmd->root_lock); + r = __load_discards(cmd, fn, context); + up_read(&cmd->root_lock); + + return r; +} + +dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd) +{ + dm_cblock_t r; + + down_read(&cmd->root_lock); + r = cmd->cache_blocks; + up_read(&cmd->root_lock); + + return r; +} + +static int __remove(struct dm_cache_metadata *cmd, dm_cblock_t cblock) +{ + int r; + __le64 value = pack_value(0, 0); + + __dm_bless_for_disk(&value); + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock) +{ + int r; + + down_write(&cmd->root_lock); + r = __remove(cmd, cblock); + up_write(&cmd->root_lock); + + return r; +} + +static int __insert(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, dm_oblock_t oblock) +{ + int r; + __le64 value = pack_value(oblock, M_VALID); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; +} + +int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, dm_oblock_t oblock) +{ + int r; + + down_write(&cmd->root_lock); + r = __insert(cmd, cblock, oblock); + up_write(&cmd->root_lock); + + return r; +} + +struct thunk { + load_mapping_fn fn; + void *context; + + struct dm_cache_metadata *cmd; + bool respect_dirty_flags; + bool hints_valid; +}; + +static bool hints_array_initialized(struct dm_cache_metadata *cmd) +{ + return cmd->hint_root && cmd->policy_hint_size; +} + +static bool hints_array_available(struct dm_cache_metadata *cmd, + const char *policy_name) +{ + bool policy_names_match = !strncmp(cmd->policy_name, policy_name, + sizeof(cmd->policy_name)); + + return cmd->clean_when_opened && policy_names_match && + hints_array_initialized(cmd); +} + +static int __load_mapping(void *context, uint64_t cblock, void *leaf) +{ + int r = 0; + bool dirty; + __le64 value; + __le32 hint_value = 0; + dm_oblock_t oblock; + unsigned flags; + struct thunk *thunk = context; + struct dm_cache_metadata *cmd = thunk->cmd; + + memcpy(&value, leaf, sizeof(value)); + unpack_value(value, &oblock, &flags); + + if (flags & M_VALID) { + if (thunk->hints_valid) { + r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, + cblock, &hint_value); + if (r && r != -ENODATA) + return r; + } + + dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; + r = thunk->fn(thunk->context, oblock, to_cblock(cblock), + dirty, le32_to_cpu(hint_value), thunk->hints_valid); + } + + return r; +} + +static int __load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, + load_mapping_fn fn, void *context) +{ + struct thunk thunk; + + thunk.fn = fn; + thunk.context = context; + + thunk.cmd = cmd; + thunk.respect_dirty_flags = cmd->clean_when_opened; + thunk.hints_valid = hints_array_available(cmd, policy_name); + + return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk); +} + +int dm_cache_load_mappings(struct dm_cache_metadata *cmd, const char *policy_name, + load_mapping_fn fn, void *context) +{ + int r; + + down_read(&cmd->root_lock); + r = __load_mappings(cmd, policy_name, fn, context); + up_read(&cmd->root_lock); + + return r; +} + +static int __dump_mapping(void *context, uint64_t cblock, void *leaf) +{ + int r = 0; + __le64 value; + dm_oblock_t oblock; + unsigned flags; + + memcpy(&value, leaf, sizeof(value)); + unpack_value(value, &oblock, &flags); + + return r; +} + +static int __dump_mappings(struct dm_cache_metadata *cmd) +{ + return dm_array_walk(&cmd->info, cmd->root, __dump_mapping, NULL); +} + +void dm_cache_dump(struct dm_cache_metadata *cmd) +{ + down_read(&cmd->root_lock); + __dump_mappings(cmd); + up_read(&cmd->root_lock); +} + +int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd) +{ + int r; + + down_read(&cmd->root_lock); + r = cmd->changed; + up_read(&cmd->root_lock); + + return r; +} + +static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty) +{ + int r; + unsigned flags; + dm_oblock_t oblock; + __le64 value; + + r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(cblock), &value); + if (r) + return r; + + unpack_value(value, &oblock, &flags); + + if (((flags & M_DIRTY) && dirty) || (!(flags & M_DIRTY) && !dirty)) + /* nothing to be done */ + return 0; + + value = pack_value(oblock, flags | (dirty ? M_DIRTY : 0)); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->info, cmd->root, from_cblock(cblock), + &value, &cmd->root); + if (r) + return r; + + cmd->changed = true; + return 0; + +} + +int dm_cache_set_dirty(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, bool dirty) +{ + int r; + + down_write(&cmd->root_lock); + r = __dirty(cmd, cblock, dirty); + up_write(&cmd->root_lock); + + return r; +} + +void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats) +{ + down_read(&cmd->root_lock); + memcpy(stats, &cmd->stats, sizeof(*stats)); + up_read(&cmd->root_lock); +} + +void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats) +{ + down_write(&cmd->root_lock); + memcpy(&cmd->stats, stats, sizeof(*stats)); + up_write(&cmd->root_lock); +} + +int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown) +{ + int r; + flags_mutator mutator = (clean_shutdown ? set_clean_shutdown : + clear_clean_shutdown); + + down_write(&cmd->root_lock); + r = __commit_transaction(cmd, mutator); + if (r) + goto out; + + r = __begin_transaction(cmd); + +out: + up_write(&cmd->root_lock); + return r; +} + +int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, + dm_block_t *result) +{ + int r = -EINVAL; + + down_read(&cmd->root_lock); + r = dm_sm_get_nr_free(cmd->metadata_sm, result); + up_read(&cmd->root_lock); + + return r; +} + +int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, + dm_block_t *result) +{ + int r = -EINVAL; + + down_read(&cmd->root_lock); + r = dm_sm_get_nr_blocks(cmd->metadata_sm, result); + up_read(&cmd->root_lock); + + return r; +} + +/*----------------------------------------------------------------*/ + +static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) +{ + int r; + __le32 value; + size_t hint_size; + const char *policy_name = dm_cache_policy_get_name(policy); + + if (!policy_name[0] || + (strlen(policy_name) > sizeof(cmd->policy_name) - 1)) + return -EINVAL; + + if (strcmp(cmd->policy_name, policy_name)) { + strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); + + hint_size = dm_cache_policy_get_hint_size(policy); + if (!hint_size) + return 0; /* short-circuit hints initialization */ + cmd->policy_hint_size = hint_size; + + if (cmd->hint_root) { + r = dm_array_del(&cmd->hint_info, cmd->hint_root); + if (r) + return r; + } + + r = dm_array_empty(&cmd->hint_info, &cmd->hint_root); + if (r) + return r; + + value = cpu_to_le32(0); + __dm_bless_for_disk(&value); + r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, + from_cblock(cmd->cache_blocks), + &value, &cmd->hint_root); + if (r) + return r; + } + + return 0; +} + +int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) +{ + int r; + + down_write(&cmd->root_lock); + r = begin_hints(cmd, policy); + up_write(&cmd->root_lock); + + return r; +} + +static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, + uint32_t hint) +{ + int r; + __le32 value = cpu_to_le32(hint); + __dm_bless_for_disk(&value); + + r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, + from_cblock(cblock), &value, &cmd->hint_root); + cmd->changed = true; + + return r; +} + +int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, + uint32_t hint) +{ + int r; + + if (!hints_array_initialized(cmd)) + return 0; + + down_write(&cmd->root_lock); + r = save_hint(cmd, cblock, hint); + up_write(&cmd->root_lock); + + return r; +} diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h new file mode 100644 index 0000000..135864e --- /dev/null +++ b/drivers/md/dm-cache-metadata.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_METADATA_H +#define DM_CACHE_METADATA_H + +#include "dm-cache-block-types.h" +#include "dm-cache-policy-internal.h" + +/*----------------------------------------------------------------*/ + +#define DM_CACHE_METADATA_BLOCK_SIZE 4096 + +/* FIXME: remove this restriction */ +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries. Each + * index entry contains allocation info about 16k metadata blocks. + */ +#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) + +/* + * A metadata device larger than 16GB triggers a warning. + */ +#define DM_CACHE_METADATA_MAX_SECTORS_WARNING (16 * (1024 * 1024 * 1024 >> SECTOR_SHIFT)) + +/*----------------------------------------------------------------*/ + +/* + * Ext[234]-style compat feature flags. + * + * A new feature which old metadata will still be compatible with should + * define a DM_CACHE_FEATURE_COMPAT_* flag (rarely useful). + * + * A new feature that is not compatible with old code should define a + * DM_CACHE_FEATURE_INCOMPAT_* flag and guard the relevant code with + * that flag. + * + * A new feature that is not compatible with old code accessing the + * metadata RDWR should define a DM_CACHE_FEATURE_RO_COMPAT_* flag and + * guard the relevant code with that flag. + * + * As these various flags are defined they should be added to the + * following masks. + */ +#define DM_CACHE_FEATURE_COMPAT_SUPP 0UL +#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL +#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL + +/* + * Reopens or creates a new, empty metadata volume. + * Returns an ERR_PTR on failure. + */ +struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev, + sector_t data_block_size, + bool may_format_device, + size_t policy_hint_size); + +void dm_cache_metadata_close(struct dm_cache_metadata *cmd); + +/* + * The metadata needs to know how many cache blocks there are. We don't + * care about the origin, assuming the core target is giving us valid + * origin blocks to map to. + */ +int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size); +dm_cblock_t dm_cache_size(struct dm_cache_metadata *cmd); + +int dm_cache_discard_bitset_resize(struct dm_cache_metadata *cmd, + sector_t discard_block_size, + dm_dblock_t new_nr_entries); + +typedef int (*load_discard_fn)(void *context, sector_t discard_block_size, + dm_dblock_t dblock, bool discarded); +int dm_cache_load_discards(struct dm_cache_metadata *cmd, + load_discard_fn fn, void *context); + +int dm_cache_set_discard(struct dm_cache_metadata *cmd, dm_dblock_t dblock, bool discard); + +int dm_cache_remove_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock); +int dm_cache_insert_mapping(struct dm_cache_metadata *cmd, dm_cblock_t cblock, dm_oblock_t oblock); +int dm_cache_changed_this_transaction(struct dm_cache_metadata *cmd); + +typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock, + dm_cblock_t cblock, bool dirty, + uint32_t hint, bool hint_valid); +int dm_cache_load_mappings(struct dm_cache_metadata *cmd, + const char *policy_name, + load_mapping_fn fn, + void *context); + +int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty); + +struct dm_cache_statistics { + uint32_t read_hits; + uint32_t read_misses; + uint32_t write_hits; + uint32_t write_misses; +}; + +void dm_cache_metadata_get_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats); +void dm_cache_metadata_set_stats(struct dm_cache_metadata *cmd, + struct dm_cache_statistics *stats); + +int dm_cache_commit(struct dm_cache_metadata *cmd, bool clean_shutdown); + +int dm_cache_get_free_metadata_block_count(struct dm_cache_metadata *cmd, + dm_block_t *result); + +int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd, + dm_block_t *result); + +void dm_cache_dump(struct dm_cache_metadata *cmd); + +/* + * The policy is invited to save a 32bit hint value for every cblock (eg, + * for a hit count). These are stored against the policy name. If + * policies are changed, then hints will be lost. If the machine crashes, + * hints will be lost. + * + * The hints are indexed by the cblock, but many policies will not + * neccessarily have a fast way of accessing efficiently via cblock. So + * rather than querying the policy for each cblock, we let it walk its data + * structures and fill in the hints in whatever order it wishes. + */ + +int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p); + +/* + * requests hints for every cblock and stores in the metadata device. + */ +int dm_cache_save_hint(struct dm_cache_metadata *cmd, + dm_cblock_t cblock, uint32_t hint); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_METADATA_H */ diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c new file mode 100644 index 0000000..cc05d70 --- /dev/null +++ b/drivers/md/dm-cache-policy-cleaner.c @@ -0,0 +1,464 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * writeback cache policy supporting flushing out dirty cache blocks. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy.h" +#include "dm.h" + +#include <linux/hash.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache cleaner" +#define CLEANER_VERSION "1.0.0" + +/* Cache entry struct. */ +struct wb_cache_entry { + struct list_head list; + struct hlist_node hlist; + + dm_oblock_t oblock; + dm_cblock_t cblock; + bool dirty:1; + bool pending:1; +}; + +struct hash { + struct hlist_head *table; + dm_block_t hash_bits; + unsigned nr_buckets; +}; + +struct policy { + struct dm_cache_policy policy; + spinlock_t lock; + + struct list_head free; + struct list_head clean; + struct list_head clean_pending; + struct list_head dirty; + + /* + * We know exactly how many cblocks will be needed, + * so we can allocate them up front. + */ + dm_cblock_t cache_size, nr_cblocks_allocated; + struct wb_cache_entry *cblocks; + struct hash chash; +}; + +/*----------------------------------------------------------------------------*/ + +/* + * Low-level functions. + */ +static unsigned next_power(unsigned n, unsigned min) +{ + return roundup_pow_of_two(max(n, min)); +} + +static struct policy *to_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct policy, policy); +} + +static struct list_head *list_pop(struct list_head *q) +{ + struct list_head *r = q->next; + + list_del(r); + + return r; +} + +/*----------------------------------------------------------------------------*/ + +/* Allocate/free various resources. */ +static int alloc_hash(struct hash *hash, unsigned elts) +{ + hash->nr_buckets = next_power(elts >> 4, 16); + hash->hash_bits = ffs(hash->nr_buckets) - 1; + hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets); + + return hash->table ? 0 : -ENOMEM; +} + +static void free_hash(struct hash *hash) +{ + vfree(hash->table); +} + +static int alloc_cache_blocks_with_hash(struct policy *p, dm_cblock_t cache_size) +{ + int r = -ENOMEM; + + p->cblocks = vzalloc(sizeof(*p->cblocks) * from_cblock(cache_size)); + if (p->cblocks) { + unsigned u = from_cblock(cache_size); + + while (u--) + list_add(&p->cblocks[u].list, &p->free); + + p->nr_cblocks_allocated = 0; + + /* Cache entries hash. */ + r = alloc_hash(&p->chash, from_cblock(cache_size)); + if (r) + vfree(p->cblocks); + } + + return r; +} + +static void free_cache_blocks_and_hash(struct policy *p) +{ + free_hash(&p->chash); + vfree(p->cblocks); +} + +static struct wb_cache_entry *alloc_cache_entry(struct policy *p) +{ + struct wb_cache_entry *e; + + BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size)); + + e = list_entry(list_pop(&p->free), struct wb_cache_entry, list); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1); + + return e; +} + +/*----------------------------------------------------------------------------*/ + +/* Hash functions (lookup, insert, remove). */ +static struct wb_cache_entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock) +{ + struct hash *hash = &p->chash; + unsigned h = hash_64(from_oblock(oblock), hash->hash_bits); + struct wb_cache_entry *cur; + struct hlist_head *bucket = &hash->table[h]; + + hlist_for_each_entry(cur, bucket, hlist) { + if (cur->oblock == oblock) { + /* Move upfront bucket for faster access. */ + hlist_del(&cur->hlist); + hlist_add_head(&cur->hlist, bucket); + return cur; + } + } + + return NULL; +} + +static void insert_cache_hash_entry(struct policy *p, struct wb_cache_entry *e) +{ + unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits); + + hlist_add_head(&e->hlist, &p->chash.table[h]); +} + +static void remove_cache_hash_entry(struct wb_cache_entry *e) +{ + hlist_del(&e->hlist); +} + +/* Public interface (see dm-cache-policy.h */ +static int wb_map(struct dm_cache_policy *pe, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + result->op = POLICY_MISS; + + if (can_block) + spin_lock_irqsave(&p->lock, flags); + + else if (!spin_trylock_irqsave(&p->lock, flags)) + return -EWOULDBLOCK; + + e = lookup_cache_entry(p, oblock); + if (e) { + result->op = POLICY_HIT; + result->cblock = e->cblock; + + } + + spin_unlock_irqrestore(&p->lock, flags); + + return 0; +} + +static int wb_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + int r; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + if (!spin_trylock_irqsave(&p->lock, flags)) + return -EWOULDBLOCK; + + e = lookup_cache_entry(p, oblock); + if (e) { + *cblock = e->cblock; + r = 0; + + } else + r = -ENOENT; + + spin_unlock_irqrestore(&p->lock, flags); + + return r; +} + +static void __set_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock, bool set) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + + e = lookup_cache_entry(p, oblock); + BUG_ON(!e); + + if (set) { + if (!e->dirty) { + e->dirty = true; + list_move(&e->list, &p->dirty); + } + + } else { + if (e->dirty) { + e->pending = false; + e->dirty = false; + list_move(&e->list, &p->clean); + } + } +} + +static void wb_set_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + __set_clear_dirty(pe, oblock, true); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void wb_clear_dirty(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + __set_clear_dirty(pe, oblock, false); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void add_cache_entry(struct policy *p, struct wb_cache_entry *e) +{ + insert_cache_hash_entry(p, e); + if (e->dirty) + list_add(&e->list, &p->dirty); + else + list_add(&e->list, &p->clean); +} + +static int wb_load_mapping(struct dm_cache_policy *pe, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + int r; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e = alloc_cache_entry(p); + + if (e) { + e->cblock = cblock; + e->oblock = oblock; + e->dirty = false; /* blocks default to clean */ + add_cache_entry(p, e); + r = 0; + + } else + r = -ENOMEM; + + return r; +} + +static void wb_destroy(struct dm_cache_policy *pe) +{ + struct policy *p = to_policy(pe); + + free_cache_blocks_and_hash(p); + kfree(p); +} + +static struct wb_cache_entry *__wb_force_remove_mapping(struct policy *p, dm_oblock_t oblock) +{ + struct wb_cache_entry *r = lookup_cache_entry(p, oblock); + + BUG_ON(!r); + + remove_cache_hash_entry(r); + list_del(&r->list); + + return r; +} + +static void wb_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + e = __wb_force_remove_mapping(p, oblock); + list_add_tail(&e->list, &p->free); + BUG_ON(!from_cblock(p->nr_cblocks_allocated)); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1); + spin_unlock_irqrestore(&p->lock, flags); +} + +static void wb_force_mapping(struct dm_cache_policy *pe, + dm_oblock_t current_oblock, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + e = __wb_force_remove_mapping(p, current_oblock); + e->oblock = oblock; + add_cache_entry(p, e); + spin_unlock_irqrestore(&p->lock, flags); +} + +static struct wb_cache_entry *get_next_dirty_entry(struct policy *p) +{ + struct list_head *l; + struct wb_cache_entry *r; + + if (list_empty(&p->dirty)) + return NULL; + + l = list_pop(&p->dirty); + r = container_of(l, struct wb_cache_entry, list); + list_add(l, &p->clean_pending); + + return r; +} + +static int wb_writeback_work(struct dm_cache_policy *pe, + dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + int r = -ENOENT; + struct policy *p = to_policy(pe); + struct wb_cache_entry *e; + unsigned long flags; + + spin_lock_irqsave(&p->lock, flags); + + e = get_next_dirty_entry(p); + if (e) { + *oblock = e->oblock; + *cblock = e->cblock; + r = 0; + } + + spin_unlock_irqrestore(&p->lock, flags); + + return r; +} + +static dm_cblock_t wb_residency(struct dm_cache_policy *pe) +{ + return to_policy(pe)->nr_cblocks_allocated; +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct policy *p) +{ + p->policy.destroy = wb_destroy; + p->policy.map = wb_map; + p->policy.lookup = wb_lookup; + p->policy.set_dirty = wb_set_dirty; + p->policy.clear_dirty = wb_clear_dirty; + p->policy.load_mapping = wb_load_mapping; + p->policy.walk_mappings = NULL; + p->policy.remove_mapping = wb_remove_mapping; + p->policy.writeback_work = wb_writeback_work; + p->policy.force_mapping = wb_force_mapping; + p->policy.residency = wb_residency; + p->policy.tick = NULL; +} + +static struct dm_cache_policy *wb_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + int r; + struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL); + + if (!p) + return NULL; + + init_policy_functions(p); + INIT_LIST_HEAD(&p->free); + INIT_LIST_HEAD(&p->clean); + INIT_LIST_HEAD(&p->clean_pending); + INIT_LIST_HEAD(&p->dirty); + + p->cache_size = cache_size; + spin_lock_init(&p->lock); + + /* Allocate cache entry structs and add them to free list. */ + r = alloc_cache_blocks_with_hash(p, cache_size); + if (!r) + return &p->policy; + + kfree(p); + + return NULL; +} +/*----------------------------------------------------------------------------*/ + +static struct dm_cache_policy_type wb_policy_type = { + .name = "cleaner", + .hint_size = 0, + .owner = THIS_MODULE, + .create = wb_create +}; + +static int __init wb_init(void) +{ + int r = dm_cache_policy_register(&wb_policy_type); + + if (r < 0) + DMERR("register failed %d", r); + else + DMINFO("version " CLEANER_VERSION " loaded"); + + return r; +} + +static void __exit wb_exit(void) +{ + dm_cache_policy_unregister(&wb_policy_type); +} + +module_init(wb_init); +module_exit(wb_exit); + +MODULE_AUTHOR("Heinz Mauelshagen <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("cleaner cache policy"); diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h new file mode 100644 index 0000000..52a75be --- /dev/null +++ b/drivers/md/dm-cache-policy-internal.h @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_POLICY_INTERNAL_H +#define DM_CACHE_POLICY_INTERNAL_H + +#include "dm-cache-policy.h" + +/*----------------------------------------------------------------*/ + +/* + * Little inline functions that simplify calling the policy methods. + */ +static inline int policy_map(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + return p->map(p, oblock, can_block, can_migrate, discarded_oblock, bio, result); +} + +static inline int policy_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + BUG_ON(!p->lookup); + return p->lookup(p, oblock, cblock); +} + +static inline void policy_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + if (p->set_dirty) + p->set_dirty(p, oblock); +} + +static inline void policy_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + if (p->clear_dirty) + p->clear_dirty(p, oblock); +} + +static inline int policy_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + return p->load_mapping(p, oblock, cblock, hint, hint_valid); +} + +static inline int policy_walk_mappings(struct dm_cache_policy *p, + policy_walk_fn fn, void *context) +{ + return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0; +} + +static inline int policy_writeback_work(struct dm_cache_policy *p, + dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + return p->writeback_work ? p->writeback_work(p, oblock, cblock) : -ENOENT; +} + +static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + return p->remove_mapping(p, oblock); +} + +static inline void policy_force_mapping(struct dm_cache_policy *p, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + return p->force_mapping(p, current_oblock, new_oblock); +} + +static inline dm_cblock_t policy_residency(struct dm_cache_policy *p) +{ + return p->residency(p); +} + +static inline void policy_tick(struct dm_cache_policy *p) +{ + if (p->tick) + return p->tick(p); +} + +static inline int policy_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen) +{ + ssize_t sz = 0; + if (p->emit_config_values) + return p->emit_config_values(p, result, maxlen); + + DMEMIT("0"); + return 0; +} + +static inline int policy_set_config_value(struct dm_cache_policy *p, + const char *key, const char *value) +{ + return p->set_config_value ? p->set_config_value(p, key, value) : -EINVAL; +} + +/*----------------------------------------------------------------*/ + +/* + * Creates a new cache policy given a policy name, a cache size, an origin size and the block size. + */ +struct dm_cache_policy *dm_cache_policy_create(const char *name, dm_cblock_t cache_size, + sector_t origin_size, sector_t block_size); + +/* + * Destroys the policy. This drops references to the policy module as well + * as calling it's destroy method. So always use this rather than calling + * the policy->destroy method directly. + */ +void dm_cache_policy_destroy(struct dm_cache_policy *p); + +/* + * In case we've forgotten. + */ +const char *dm_cache_policy_get_name(struct dm_cache_policy *p); + +size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_POLICY_INTERNAL_H */ diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c new file mode 100644 index 0000000..9641532 --- /dev/null +++ b/drivers/md/dm-cache-policy-mq.c @@ -0,0 +1,1195 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy.h" +#include "dm.h" + +#include <linux/hash.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define DM_MSG_PREFIX "cache-policy-mq" +#define MQ_VERSION "1.0.0" + +static struct kmem_cache *mq_entry_cache; + +/*----------------------------------------------------------------*/ + +static unsigned next_power(unsigned n, unsigned min) +{ + return roundup_pow_of_two(max(n, min)); +} + +/*----------------------------------------------------------------*/ + +static unsigned long *alloc_bitset(unsigned nr_entries) +{ + size_t s = sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); + return vzalloc(s); +} + +static void free_bitset(unsigned long *bits) +{ + vfree(bits); +} + +/*----------------------------------------------------------------*/ + +/* + * Large, sequential ios are probably better left on the origin device since + * spindles tend to have good bandwidth. + * + * The io_tracker tries to spot when the io is in one of these sequential + * modes. + * + * Two thresholds to switch between random and sequential io mode are defaulting + * as follows and can be adjusted via the constructor and message interfaces. + */ +#define RANDOM_THRESHOLD_DEFAULT 4 +#define SEQUENTIAL_THRESHOLD_DEFAULT 512 + +enum io_pattern { + PATTERN_SEQUENTIAL, + PATTERN_RANDOM +}; + +struct io_tracker { + enum io_pattern pattern; + + unsigned nr_seq_samples; + unsigned nr_rand_samples; + unsigned thresholds[2]; + + dm_oblock_t last_end_oblock; +}; + +static void iot_init(struct io_tracker *t, + int sequential_threshold, int random_threshold) +{ + t->pattern = PATTERN_RANDOM; + t->nr_seq_samples = 0; + t->nr_rand_samples = 0; + t->last_end_oblock = 0; + t->thresholds[PATTERN_RANDOM] = random_threshold; + t->thresholds[PATTERN_SEQUENTIAL] = sequential_threshold; +} + +static enum io_pattern iot_pattern(struct io_tracker *t) +{ + return t->pattern; +} + +static void iot_update_stats(struct io_tracker *t, struct bio *bio) +{ + if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1) + t->nr_seq_samples++; + else { + /* + * Just one non-sequential IO is enough to reset the + * counters. + */ + if (t->nr_seq_samples) { + t->nr_seq_samples = 0; + t->nr_rand_samples = 0; + } + + t->nr_rand_samples++; + } + + t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1); +} + +static void iot_check_for_pattern_switch(struct io_tracker *t) +{ + switch (t->pattern) { + case PATTERN_SEQUENTIAL: + if (t->nr_rand_samples >= t->thresholds[PATTERN_RANDOM]) { + t->pattern = PATTERN_RANDOM; + t->nr_seq_samples = t->nr_rand_samples = 0; + } + break; + + case PATTERN_RANDOM: + if (t->nr_seq_samples >= t->thresholds[PATTERN_SEQUENTIAL]) { + t->pattern = PATTERN_SEQUENTIAL; + t->nr_seq_samples = t->nr_rand_samples = 0; + } + break; + } +} + +static void iot_examine_bio(struct io_tracker *t, struct bio *bio) +{ + iot_update_stats(t, bio); + iot_check_for_pattern_switch(t); +} + +/*----------------------------------------------------------------*/ + + +/* + * This queue is divided up into different levels. Allowing us to push + * entries to the back of any of the levels. Think of it as a partially + * sorted queue. + */ +#define NR_QUEUE_LEVELS 16u + +struct queue { + struct list_head qs[NR_QUEUE_LEVELS]; +}; + +static void queue_init(struct queue *q) +{ + unsigned i; + + for (i = 0; i < NR_QUEUE_LEVELS; i++) + INIT_LIST_HEAD(q->qs + i); +} + +/* + * Insert an entry to the back of the given level. + */ +static void queue_push(struct queue *q, unsigned level, struct list_head *elt) +{ + list_add_tail(elt, q->qs + level); +} + +static void queue_remove(struct list_head *elt) +{ + list_del(elt); +} + +/* + * Shifts all regions down one level. This has no effect on the order of + * the queue. + */ +static void queue_shift_down(struct queue *q) +{ + unsigned level; + + for (level = 1; level < NR_QUEUE_LEVELS; level++) + list_splice_init(q->qs + level, q->qs + level - 1); +} + +/* + * Gives us the oldest entry of the lowest popoulated level. If the first + * level is emptied then we shift down one level. + */ +static struct list_head *queue_pop(struct queue *q) +{ + unsigned level; + struct list_head *r; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + if (!list_empty(q->qs + level)) { + r = q->qs[level].next; + list_del(r); + + /* have we just emptied the bottom level? */ + if (level == 0 && list_empty(q->qs)) + queue_shift_down(q); + + return r; + } + + return NULL; +} + +static struct list_head *list_pop(struct list_head *lh) +{ + struct list_head *r = lh->next; + + BUG_ON(!r); + list_del_init(r); + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Describes a cache entry. Used in both the cache and the pre_cache. + */ +struct entry { + struct hlist_node hlist; + struct list_head list; + dm_oblock_t oblock; + dm_cblock_t cblock; /* valid iff in_cache */ + + /* + * FIXME: pack these better + */ + bool in_cache:1; + unsigned hit_count; + unsigned generation; + unsigned tick; +}; + +struct mq_policy { + struct dm_cache_policy policy; + + /* protects everything */ + struct mutex lock; + dm_cblock_t cache_size; + struct io_tracker tracker; + + /* + * We maintain two queues of entries. The cache proper contains + * the currently active mappings. Whereas the pre_cache tracks + * blocks that are being hit frequently and potential candidates + * for promotion to the cache. + */ + struct queue pre_cache; + struct queue cache; + + /* + * Keeps track of time, incremented by the core. We use this to + * avoid attributing multiple hits within the same tick. + * + * Access to tick_protected should be done with the spin lock held. + * It's copied to tick at the start of the map function (within the + * mutex). + */ + spinlock_t tick_lock; + unsigned tick_protected; + unsigned tick; + + /* + * A count of the number of times the map function has been called + * and found an entry in the pre_cache or cache. Currently used to + * calculate the generation. + */ + unsigned hit_count; + + /* + * A generation is a longish period that is used to trigger some + * book keeping effects. eg, decrementing hit counts on entries. + * This is needed to allow the cache to evolve as io patterns + * change. + */ + unsigned generation; + unsigned generation_period; /* in lookups (will probably change) */ + + /* + * Entries in the pre_cache whose hit count passes the promotion + * threshold move to the cache proper. Working out the correct + * value for the promotion_threshold is crucial to this policy. + */ + unsigned promote_threshold; + + /* + * We need cache_size entries for the cache, and choose to have + * cache_size entries for the pre_cache too. One motivation for + * using the same size is to make the hit counts directly + * comparable between pre_cache and cache. + */ + unsigned nr_entries; + unsigned nr_entries_allocated; + struct list_head free; + + /* + * Cache blocks may be unallocated. We store this info in a + * bitset. + */ + unsigned long *allocation_bitset; + unsigned nr_cblocks_allocated; + unsigned find_free_nr_words; + unsigned find_free_last_word; + + /* + * The hash table allows us to quickly find an entry by origin + * block. Both pre_cache and cache entries are in here. + */ + unsigned nr_buckets; + dm_block_t hash_bits; + struct hlist_head *table; +}; + +/*----------------------------------------------------------------*/ +/* Free/alloc mq cache entry structures. */ +static void takeout_queue(struct list_head *lh, struct queue *q) +{ + unsigned level; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_splice(q->qs + level, lh); +} + +static void free_entries(struct mq_policy *mq) +{ + struct entry *e, *tmp; + + takeout_queue(&mq->free, &mq->pre_cache); + takeout_queue(&mq->free, &mq->cache); + + list_for_each_entry_safe(e, tmp, &mq->free, list) + kmem_cache_free(mq_entry_cache, e); +} + +static int alloc_entries(struct mq_policy *mq, unsigned elts) +{ + unsigned u = mq->nr_entries; + + INIT_LIST_HEAD(&mq->free); + mq->nr_entries_allocated = 0; + + while (u--) { + struct entry *e = kmem_cache_zalloc(mq_entry_cache, GFP_KERNEL); + + if (!e) { + free_entries(mq); + return -ENOMEM; + } + + + list_add(&e->list, &mq->free); + } + + return 0; +} + +/*----------------------------------------------------------------*/ + +/* + * Simple hash table implementation. Should replace with the standard hash + * table that's making its way upstream. + */ +static void hash_insert(struct mq_policy *mq, struct entry *e) +{ + unsigned h = hash_64(from_oblock(e->oblock), mq->hash_bits); + + hlist_add_head(&e->hlist, mq->table + h); +} + +static struct entry *hash_lookup(struct mq_policy *mq, dm_oblock_t oblock) +{ + unsigned h = hash_64(from_oblock(oblock), mq->hash_bits); + struct hlist_head *bucket = mq->table + h; + struct entry *e; + + hlist_for_each_entry(e, bucket, hlist) + if (e->oblock == oblock) { + hlist_del(&e->hlist); + hlist_add_head(&e->hlist, bucket); + return e; + } + + return NULL; +} + +static void hash_remove(struct entry *e) +{ + hlist_del(&e->hlist); +} + +/*----------------------------------------------------------------*/ + +/* + * Allocates a new entry structure. The memory is allocated in one lump, + * so we just handing it out here. Returns NULL if all entries have + * already been allocated. Cannot fail otherwise. + */ +static struct entry *alloc_entry(struct mq_policy *mq) +{ + struct entry *e; + + if (mq->nr_entries_allocated >= mq->nr_entries) { + BUG_ON(!list_empty(&mq->free)); + return NULL; + } + + e = list_entry(list_pop(&mq->free), struct entry, list); + INIT_LIST_HEAD(&e->list); + INIT_HLIST_NODE(&e->hlist); + + mq->nr_entries_allocated++; + return e; +} + +/*----------------------------------------------------------------*/ + +/* + * Mark cache blocks allocated or not in the bitset. + */ +static void alloc_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); + BUG_ON(test_bit(from_cblock(cblock), mq->allocation_bitset)); + + set_bit(from_cblock(cblock), mq->allocation_bitset); + mq->nr_cblocks_allocated++; +} + +static void free_cblock(struct mq_policy *mq, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) > from_cblock(mq->cache_size)); + BUG_ON(!test_bit(from_cblock(cblock), mq->allocation_bitset)); + + clear_bit(from_cblock(cblock), mq->allocation_bitset); + mq->nr_cblocks_allocated--; +} + +static bool any_free_cblocks(struct mq_policy *mq) +{ + return mq->nr_cblocks_allocated < from_cblock(mq->cache_size); +} + +/* + * Fills result out with a cache block that isn't in use, or return + * -ENOSPC. This does _not_ mark the cblock as allocated, the caller is + * reponsible for that. + */ +static int __find_free_cblock(struct mq_policy *mq, unsigned begin, unsigned end, + dm_cblock_t *result, unsigned *last_word) +{ + int r = -ENOSPC; + unsigned w; + + for (w = begin; w < end; w++) { + /* + * ffz is undefined if no zero exists + */ + if (mq->allocation_bitset[w] != ~0UL) { + *last_word = w; + *result = to_cblock((w * BITS_PER_LONG) + ffz(mq->allocation_bitset[w])); + if (from_cblock(*result) < from_cblock(mq->cache_size)) + r = 0; + + break; + } + } + + return r; +} + +static int find_free_cblock(struct mq_policy *mq, dm_cblock_t *result) +{ + int r; + + if (!any_free_cblocks(mq)) + return -ENOSPC; + + r = __find_free_cblock(mq, mq->find_free_last_word, mq->find_free_nr_words, result, &mq->find_free_last_word); + if (r == -ENOSPC && mq->find_free_last_word) + r = __find_free_cblock(mq, 0, mq->find_free_last_word, result, &mq->find_free_last_word); + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Now we get to the meat of the policy. This section deals with deciding + * when to to add entries to the pre_cache and cache, and move between + * them. + */ + +/* + * The queue level is based on the log2 of the hit count. + */ +static unsigned queue_level(struct entry *e) +{ + return min((unsigned) ilog2(e->hit_count), NR_QUEUE_LEVELS - 1u); +} + +/* + * Inserts the entry into the pre_cache or the cache. Ensures the cache + * block is marked as allocated if necc. Inserts into the hash table. Sets the + * tick which records when the entry was last moved about. + */ +static void push(struct mq_policy *mq, struct entry *e) +{ + e->tick = mq->tick; + hash_insert(mq, e); + + if (e->in_cache) { + alloc_cblock(mq, e->cblock); + queue_push(&mq->cache, queue_level(e), &e->list); + } else + queue_push(&mq->pre_cache, queue_level(e), &e->list); +} + +/* + * Removes an entry from pre_cache or cache. Removes from the hash table. + * Frees off the cache block if necc. + */ +static void del(struct mq_policy *mq, struct entry *e) +{ + queue_remove(&e->list); + hash_remove(e); + if (e->in_cache) + free_cblock(mq, e->cblock); +} + +/* + * Like del, except it removes the first entry in the queue (ie. the least + * recently used). + */ +static struct entry *pop(struct mq_policy *mq, struct queue *q) +{ + struct entry *e = container_of(queue_pop(q), struct entry, list); + + if (e) { + hash_remove(e); + + if (e->in_cache) + free_cblock(mq, e->cblock); + } + + return e; +} + +/* + * Has this entry already been updated? + */ +static bool updated_this_tick(struct mq_policy *mq, struct entry *e) +{ + return mq->tick == e->tick; +} + +/* + * The promotion threshold is adjusted every generation. As are the counts + * of the entries. + * + * At the moment the threshold is taken by averaging the hit counts of some + * of the entries in the cache (the first 20 entries of the first level). + * + * We can be much cleverer than this though. For example, each promotion + * could bump up the threshold helping to prevent churn. Much more to do + * here. + */ + +#define MAX_TO_AVERAGE 20 + +static void check_generation(struct mq_policy *mq) +{ + unsigned total = 0, nr = 0, count = 0, level; + struct list_head *head; + struct entry *e; + + if ((mq->hit_count >= mq->generation_period) && + (mq->nr_cblocks_allocated == from_cblock(mq->cache_size))) { + + mq->hit_count = 0; + mq->generation++; + + for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { + head = mq->cache.qs + level; + list_for_each_entry(e, head, list) { + nr++; + total += e->hit_count; + + if (++count >= MAX_TO_AVERAGE) + break; + } + } + + mq->promote_threshold = nr ? total / nr : 1; + if (mq->promote_threshold * nr < total) + mq->promote_threshold++; + } +} + +/* + * Whenever we use an entry we bump up it's hit counter, and push it to the + * back to it's current level. + */ +static void requeue_and_update_tick(struct mq_policy *mq, struct entry *e) +{ + if (updated_this_tick(mq, e)) + return; + + e->hit_count++; + mq->hit_count++; + check_generation(mq); + + /* generation adjustment, to stop the counts increasing forever. */ + /* FIXME: divide? */ + /* e->hit_count -= min(e->hit_count - 1, mq->generation - e->generation); */ + e->generation = mq->generation; + + del(mq, e); + push(mq, e); +} + +/* + * Demote the least recently used entry from the cache to the pre_cache. + * Returns the new cache entry to use, and the old origin block it was + * mapped to. + * + * We drop the hit count on the demoted entry back to 1 to stop it bouncing + * straight back into the cache if it's subsequently hit. There are + * various options here, and more experimentation would be good: + * + * - just forget about the demoted entry completely (ie. don't insert it + into the pre_cache). + * - divide the hit count rather that setting to some hard coded value. + * - set the hit count to a hard coded value other than 1, eg, is it better + * if it goes in at level 2? + */ +static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +{ + dm_cblock_t result; + struct entry *demoted = pop(mq, &mq->cache); + + BUG_ON(!demoted); + result = demoted->cblock; + *oblock = demoted->oblock; + demoted->in_cache = false; + demoted->hit_count = 1; + push(mq, demoted); + + return result; +} + +/* + * We modify the basic promotion_threshold depending on the specific io. + * + * If the origin block has been discarded then there's no cost to copy it + * to the cache. + * + * We bias towards reads, since they can be demoted at no cost if they + * haven't been dirtied. + */ +#define DISCARDED_PROMOTE_THRESHOLD 1 +#define READ_PROMOTE_THRESHOLD 4 +#define WRITE_PROMOTE_THRESHOLD 8 + +static unsigned adjusted_promote_threshold(struct mq_policy *mq, + bool discarded_oblock, int data_dir) +{ + if (discarded_oblock && any_free_cblocks(mq) && data_dir == WRITE) + /* + * We don't need to do any copying at all, so give this a + * very low threshold. In practice this only triggers + * during initial population after a format. + */ + return DISCARDED_PROMOTE_THRESHOLD; + + return data_dir == READ ? + (mq->promote_threshold + READ_PROMOTE_THRESHOLD) : + (mq->promote_threshold + WRITE_PROMOTE_THRESHOLD); +} + +static bool should_promote(struct mq_policy *mq, struct entry *e, + bool discarded_oblock, int data_dir) +{ + return e->hit_count >= + adjusted_promote_threshold(mq, discarded_oblock, data_dir); +} + +static int cache_entry_found(struct mq_policy *mq, + struct entry *e, + struct policy_result *result) +{ + requeue_and_update_tick(mq, e); + + if (e->in_cache) { + result->op = POLICY_HIT; + result->cblock = e->cblock; + } + + return 0; +} + +/* + * Moves and entry from the pre_cache to the cache. The main work is + * finding which cache block to use. + */ +static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, + struct policy_result *result) +{ + dm_cblock_t cblock; + + if (find_free_cblock(mq, &cblock) == -ENOSPC) { + result->op = POLICY_REPLACE; + cblock = demote_cblock(mq, &result->old_oblock); + } else + result->op = POLICY_NEW; + + result->cblock = e->cblock = cblock; + + del(mq, e); + e->in_cache = true; + push(mq, e); + + return 0; +} + +static int pre_cache_entry_found(struct mq_policy *mq, struct entry *e, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + int r = 0; + bool updated = updated_this_tick(mq, e); + + requeue_and_update_tick(mq, e); + + if ((!discarded_oblock && updated) || + !should_promote(mq, e, discarded_oblock, data_dir)) + result->op = POLICY_MISS; + else if (!can_migrate) + r = -EWOULDBLOCK; + else + r = pre_cache_to_cache(mq, e, result); + + return r; +} + +static void insert_in_pre_cache(struct mq_policy *mq, + dm_oblock_t oblock) +{ + struct entry *e = alloc_entry(mq); + + if (!e) + /* + * There's no spare entry structure, so we grab the least + * used one from the pre_cache. + */ + e = pop(mq, &mq->pre_cache); + + if (unlikely(!e)) { + DMWARN("couldn't pop from pre cache"); + return; + } + + e->in_cache = false; + e->oblock = oblock; + e->hit_count = 1; + e->generation = mq->generation; + push(mq, e); +} + +static void insert_in_cache(struct mq_policy *mq, dm_oblock_t oblock, + struct policy_result *result) +{ + struct entry *e; + dm_cblock_t cblock; + + if (find_free_cblock(mq, &cblock) == -ENOSPC) { + result->op = POLICY_MISS; + insert_in_pre_cache(mq, oblock); + return; + } + + e = alloc_entry(mq); + if (unlikely(!e)) { + result->op = POLICY_MISS; + return; + } + + e->oblock = oblock; + e->cblock = cblock; + e->in_cache = true; + e->hit_count = 1; + e->generation = mq->generation; + push(mq, e); + + result->op = POLICY_NEW; + result->cblock = e->cblock; +} + +static int no_entry_found(struct mq_policy *mq, dm_oblock_t oblock, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + if (adjusted_promote_threshold(mq, discarded_oblock, data_dir) == 1) { + if (can_migrate) + insert_in_cache(mq, oblock, result); + else + return -EWOULDBLOCK; + } else { + insert_in_pre_cache(mq, oblock); + result->op = POLICY_MISS; + } + + return 0; +} + +/* + * Looks the oblock up in the hash table, then decides whether to put in + * pre_cache, or cache etc. + */ +static int map(struct mq_policy *mq, dm_oblock_t oblock, + bool can_migrate, bool discarded_oblock, + int data_dir, struct policy_result *result) +{ + int r = 0; + struct entry *e = hash_lookup(mq, oblock); + + if (e && e->in_cache) + r = cache_entry_found(mq, e, result); + else if (iot_pattern(&mq->tracker) == PATTERN_SEQUENTIAL) + result->op = POLICY_MISS; + else if (e) + r = pre_cache_entry_found(mq, e, can_migrate, discarded_oblock, + data_dir, result); + else + r = no_entry_found(mq, oblock, can_migrate, discarded_oblock, + data_dir, result); + + if (r == -EWOULDBLOCK) + result->op = POLICY_MISS; + + return r; +} + +/*----------------------------------------------------------------*/ + +/* + * Public interface, via the policy struct. See dm-cache-policy.h for a + * description of these. + */ + +static struct mq_policy *to_mq_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct mq_policy, policy); +} + +static void mq_destroy(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + + free_bitset(mq->allocation_bitset); + kfree(mq->table); + free_entries(mq); + kfree(mq); +} + +static void copy_tick(struct mq_policy *mq) +{ + unsigned long flags; + + spin_lock_irqsave(&mq->tick_lock, flags); + mq->tick = mq->tick_protected; + spin_unlock_irqrestore(&mq->tick_lock, flags); +} + +static int mq_map(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + + result->op = POLICY_MISS; + + if (can_block) + mutex_lock(&mq->lock); + else if (!mutex_trylock(&mq->lock)) + return -EWOULDBLOCK; + + copy_tick(mq); + + iot_examine_bio(&mq->tracker, bio); + r = map(mq, oblock, can_migrate, discarded_oblock, + bio_data_dir(bio), result); + + mutex_unlock(&mq->lock); + + return r; +} + +static int mq_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + struct entry *e; + + if (!mutex_trylock(&mq->lock)) + return -EWOULDBLOCK; + + e = hash_lookup(mq, oblock); + if (e && e->in_cache) { + *cblock = e->cblock; + r = 0; + } else + r = -ENOENT; + + mutex_unlock(&mq->lock); + + return r; +} + +static int mq_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + struct mq_policy *mq = to_mq_policy(p); + struct entry *e; + + e = alloc_entry(mq); + if (!e) + return -ENOMEM; + + e->cblock = cblock; + e->oblock = oblock; + e->in_cache = true; + e->hit_count = hint_valid ? hint : 1; + e->generation = mq->generation; + push(mq, e); + + return 0; +} + +static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, + void *context) +{ + struct mq_policy *mq = to_mq_policy(p); + int r = 0; + struct entry *e; + unsigned level; + + mutex_lock(&mq->lock); + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_for_each_entry(e, &mq->cache.qs[level], list) { + r = fn(context, e->cblock, e->oblock, e->hit_count); + if (r) + goto out; + } + +out: + mutex_unlock(&mq->lock); + + return r; +} + +static void remove_mapping(struct mq_policy *mq, dm_oblock_t oblock) +{ + struct entry *e = hash_lookup(mq, oblock); + + BUG_ON(!e || !e->in_cache); + + del(mq, e); + e->in_cache = false; + push(mq, e); +} + +static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + remove_mapping(mq, oblock); + mutex_unlock(&mq->lock); +} + +static void force_mapping(struct mq_policy *mq, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + struct entry *e = hash_lookup(mq, current_oblock); + + BUG_ON(!e || !e->in_cache); + + del(mq, e); + e->oblock = new_oblock; + push(mq, e); +} + +static void mq_force_mapping(struct dm_cache_policy *p, + dm_oblock_t current_oblock, dm_oblock_t new_oblock) +{ + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + force_mapping(mq, current_oblock, new_oblock); + mutex_unlock(&mq->lock); +} + +static dm_cblock_t mq_residency(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + + /* FIXME: lock mutex, not sure we can block here */ + return to_cblock(mq->nr_cblocks_allocated); +} + +static void mq_tick(struct dm_cache_policy *p) +{ + struct mq_policy *mq = to_mq_policy(p); + unsigned long flags; + + spin_lock_irqsave(&mq->tick_lock, flags); + mq->tick_protected++; + spin_unlock_irqrestore(&mq->tick_lock, flags); +} + +static int mq_set_config_value(struct dm_cache_policy *p, + const char *key, const char *value) +{ + struct mq_policy *mq = to_mq_policy(p); + enum io_pattern pattern; + unsigned long tmp; + + if (!strcasecmp(key, "random_threshold")) + pattern = PATTERN_RANDOM; + else if (!strcasecmp(key, "sequential_threshold")) + pattern = PATTERN_SEQUENTIAL; + else + return -EINVAL; + + if (kstrtoul(value, 10, &tmp)) + return -EINVAL; + + mq->tracker.thresholds[pattern] = tmp; + + return 0; +} + +static int mq_emit_config_values(struct dm_cache_policy *p, char *result, unsigned maxlen) +{ + ssize_t sz = 0; + struct mq_policy *mq = to_mq_policy(p); + + DMEMIT("4 random_threshold %u sequential_threshold %u", + mq->tracker.thresholds[PATTERN_RANDOM], + mq->tracker.thresholds[PATTERN_SEQUENTIAL]); + + return 0; +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct mq_policy *mq) +{ + mq->policy.destroy = mq_destroy; + mq->policy.map = mq_map; + mq->policy.lookup = mq_lookup; + mq->policy.load_mapping = mq_load_mapping; + mq->policy.walk_mappings = mq_walk_mappings; + mq->policy.remove_mapping = mq_remove_mapping; + mq->policy.writeback_work = NULL; + mq->policy.force_mapping = mq_force_mapping; + mq->policy.residency = mq_residency; + mq->policy.tick = mq_tick; + mq->policy.emit_config_values = mq_emit_config_values; + mq->policy.set_config_value = mq_set_config_value; +} + +static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + int r; + struct mq_policy *mq = kzalloc(sizeof(*mq), GFP_KERNEL); + + if (!mq) + return NULL; + + init_policy_functions(mq); + iot_init(&mq->tracker, SEQUENTIAL_THRESHOLD_DEFAULT, RANDOM_THRESHOLD_DEFAULT); + + mq->cache_size = cache_size; + mq->tick_protected = 0; + mq->tick = 0; + mq->hit_count = 0; + mq->generation = 0; + mq->promote_threshold = 0; + mutex_init(&mq->lock); + spin_lock_init(&mq->tick_lock); + mq->find_free_nr_words = dm_div_up(from_cblock(mq->cache_size), BITS_PER_LONG); + mq->find_free_last_word = 0; + + queue_init(&mq->pre_cache); + queue_init(&mq->cache); + mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); + + mq->nr_entries = 2 * from_cblock(cache_size); + r = alloc_entries(mq, mq->nr_entries); + if (r) + goto bad_cache_alloc; + + mq->nr_entries_allocated = 0; + mq->nr_cblocks_allocated = 0; + + mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); + mq->hash_bits = ffs(mq->nr_buckets) - 1; + mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); + if (!mq->table) + goto bad_alloc_table; + + mq->allocation_bitset = alloc_bitset(from_cblock(cache_size)); + if (!mq->allocation_bitset) + goto bad_alloc_bitset; + + return &mq->policy; + +bad_alloc_bitset: + kfree(mq->table); +bad_alloc_table: + free_entries(mq); +bad_cache_alloc: + kfree(mq); + + return NULL; +} + +/*----------------------------------------------------------------*/ + +static struct dm_cache_policy_type mq_policy_type = { + .name = "mq", + .hint_size = 4, + .owner = THIS_MODULE, + .create = mq_create +}; + +static struct dm_cache_policy_type default_policy_type = { + .name = "default", + .hint_size = 4, + .owner = THIS_MODULE, + .create = mq_create +}; + +static int __init mq_init(void) +{ + int r; + + mq_entry_cache = kmem_cache_create("dm_mq_policy_cache_entry", + sizeof(struct entry), + __alignof__(struct entry), + 0, NULL); + if (!mq_entry_cache) + goto bad; + + r = dm_cache_policy_register(&mq_policy_type); + if (r) { + DMERR("register failed %d", r); + goto bad_register_mq; + } + + r = dm_cache_policy_register(&default_policy_type); + if (!r) { + DMINFO("version " MQ_VERSION " loaded"); + return 0; + } + + DMERR("register failed (as default) %d", r); + + dm_cache_policy_unregister(&mq_policy_type); +bad_register_mq: + kmem_cache_destroy(mq_entry_cache); +bad: + return -ENOMEM; +} + +static void __exit mq_exit(void) +{ + dm_cache_policy_unregister(&mq_policy_type); + dm_cache_policy_unregister(&default_policy_type); + + kmem_cache_destroy(mq_entry_cache); +} + +module_init(mq_init); +module_exit(mq_exit); + +MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("mq cache policy"); + +MODULE_ALIAS("dm-cache-default"); diff --git a/drivers/md/dm-cache-policy.c b/drivers/md/dm-cache-policy.c new file mode 100644 index 0000000..2cbf5fd --- /dev/null +++ b/drivers/md/dm-cache-policy.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm-cache-policy-internal.h" +#include "dm.h" + +#include <linux/module.h> +#include <linux/slab.h> + +/*----------------------------------------------------------------*/ + +#define DM_MSG_PREFIX "cache-policy" + +static DEFINE_SPINLOCK(register_lock); +static LIST_HEAD(register_list); + +static struct dm_cache_policy_type *__find_policy(const char *name) +{ + struct dm_cache_policy_type *t; + + list_for_each_entry(t, ®ister_list, list) + if (!strcmp(t->name, name)) + return t; + + return NULL; +} + +static struct dm_cache_policy_type *__get_policy_once(const char *name) +{ + struct dm_cache_policy_type *t = __find_policy(name); + + if (t && !try_module_get(t->owner)) { + DMWARN("couldn't get module %s", name); + t = ERR_PTR(-EINVAL); + } + + return t; +} + +static struct dm_cache_policy_type *get_policy_once(const char *name) +{ + struct dm_cache_policy_type *t; + + spin_lock(®ister_lock); + t = __get_policy_once(name); + spin_unlock(®ister_lock); + + return t; +} + +static struct dm_cache_policy_type *get_policy(const char *name) +{ + struct dm_cache_policy_type *t; + + t = get_policy_once(name); + if (IS_ERR(t)) + return NULL; + + if (t) + return t; + + request_module("dm-cache-%s", name); + + t = get_policy_once(name); + if (IS_ERR(t)) + return NULL; + + return t; +} + +static void put_policy(struct dm_cache_policy_type *t) +{ + module_put(t->owner); +} + +int dm_cache_policy_register(struct dm_cache_policy_type *type) +{ + int r; + + /* One size fits all for now */ + if (type->hint_size != 0 && type->hint_size != 4) { + DMWARN("hint size must be 0 or 4 but %llu supplied.", (unsigned long long) type->hint_size); + return -EINVAL; + } + + spin_lock(®ister_lock); + if (__find_policy(type->name)) { + DMWARN("attempt to register policy under duplicate name %s", type->name); + r = -EINVAL; + } else { + list_add(&type->list, ®ister_list); + r = 0; + } + spin_unlock(®ister_lock); + + return r; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_register); + +void dm_cache_policy_unregister(struct dm_cache_policy_type *type) +{ + spin_lock(®ister_lock); + list_del_init(&type->list); + spin_unlock(®ister_lock); +} +EXPORT_SYMBOL_GPL(dm_cache_policy_unregister); + +struct dm_cache_policy *dm_cache_policy_create(const char *name, + dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + struct dm_cache_policy *p = NULL; + struct dm_cache_policy_type *type; + + type = get_policy(name); + if (!type) { + DMWARN("unknown policy type"); + return NULL; + } + + p = type->create(cache_size, origin_size, cache_block_size); + if (!p) { + put_policy(type); + return NULL; + } + p->private = type; + + return p; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_create); + +void dm_cache_policy_destroy(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + p->destroy(p); + put_policy(t); +} +EXPORT_SYMBOL_GPL(dm_cache_policy_destroy); + +const char *dm_cache_policy_get_name(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + return t->name; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_get_name); + +size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p) +{ + struct dm_cache_policy_type *t = p->private; + + return t->hint_size; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_get_hint_size); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h new file mode 100644 index 0000000..f0f51b2 --- /dev/null +++ b/drivers/md/dm-cache-policy.h @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#ifndef DM_CACHE_POLICY_H +#define DM_CACHE_POLICY_H + +#include "dm-cache-block-types.h" + +#include <linux/device-mapper.h> + +/*----------------------------------------------------------------*/ + +/* FIXME: make it clear which methods are optional. Get debug policy to + * double check this at start. + */ + +/* + * The cache policy makes the important decisions about which blocks get to + * live on the faster cache device. + * + * When the core target has to remap a bio it calls the 'map' method of the + * policy. This returns an instruction telling the core target what to do. + * + * POLICY_HIT: + * That block is in the cache. Remap to the cache and carry on. + * + * POLICY_MISS: + * This block is on the origin device. Remap and carry on. + * + * POLICY_NEW: + * This block is currently on the origin device, but the policy wants to + * move it. The core should: + * + * - hold any further io to this origin block + * - copy the origin to the given cache block + * - release all the held blocks + * - remap the original block to the cache + * + * POLICY_REPLACE: + * This block is currently on the origin device. The policy wants to + * move it to the cache, with the added complication that the destination + * cache block needs a writeback first. The core should: + * + * - hold any further io to this origin block + * - hold any further io to the origin block that's being written back + * - writeback + * - copy new block to cache + * - release held blocks + * - remap bio to cache and reissue. + * + * Should the core run into trouble while processing a POLICY_NEW or + * POLICY_REPLACE instruction it will roll back the policies mapping using + * remove_mapping() or force_mapping(). These methods must not fail. This + * approach avoids having transactional semantics in the policy (ie, the + * core informing the policy when a migration is complete), and hence makes + * it easier to write new policies. + * + * In general policy methods should never block, except in the case of the + * map function when can_migrate is set. So be careful to implement using + * bounded, preallocated memory. + */ +enum policy_operation { + POLICY_HIT, + POLICY_MISS, + POLICY_NEW, + POLICY_REPLACE +}; + +/* + * This is the instruction passed back to the core target. + */ +struct policy_result { + enum policy_operation op; + dm_oblock_t old_oblock; /* POLICY_REPLACE */ + dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */ +}; + +typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock, + dm_oblock_t oblock, uint32_t hint); + +/* + * The cache policy object. Just a bunch of methods. It is envisaged that + * this structure will be embedded in a bigger, policy specific structure + * (ie. use container_of()). + */ +struct dm_cache_policy { + + /* + * FIXME: make it clear which methods are optional, and which may + * block. + */ + + /* + * Destroys this object. + */ + void (*destroy)(struct dm_cache_policy *p); + + /* + * See large comment above. + * + * oblock - the origin block we're interested in. + * + * can_block - indicates whether the current thread is allowed to + * block. -EWOULDBLOCK returned if it can't and would. + * + * can_migrate - gives permission for POLICY_NEW or POLICY_REPLACE + * instructions. If denied and the policy would have + * returned one of these instructions it should + * return -EWOULDBLOCK. + * + * discarded_oblock - indicates whether the whole origin block is + * in a discarded state (FIXME: better to tell the + * policy about this sooner, so it can recycle that + * cache block if it wants.) + * bio - the bio that triggered this call. + * result - gets filled in with the instruction. + * + * May only return 0, or -EWOULDBLOCK (if !can_migrate) + */ + int (*map)(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result); + + /* + * Sometimes we want to see if a block is in the cache, without + * triggering any update of stats. (ie. it's not a real hit). + * + * Must not block. + * + * Returns 1 iff in cache, 0 iff not, < 0 on error (-EWOULDBLOCK + * would be typical). + */ + int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); + + /* + * oblock must be a mapped block. Must not block. + */ + void (*set_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); + void (*clear_dirty)(struct dm_cache_policy *p, dm_oblock_t oblock); + + /* + * Called when a cache target is first created. Used to load a + * mapping from the metadata device into the policy. + */ + int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock, + dm_cblock_t cblock, uint32_t hint, bool hint_valid); + + int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn, + void *context); + + /* + * Override functions used on the error paths of the core target. + * They must succeed. + */ + void (*remove_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock); + void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock, + dm_oblock_t new_oblock); + + int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); + + + /* + * How full is the cache? + */ + dm_cblock_t (*residency)(struct dm_cache_policy *p); + + /* + * Because of where we sit in the block layer, we can be asked to + * map a lot of little bios that are all in the same block (no + * queue merging has occurred). To stop the policy being fooled by + * these the core target sends regular tick() calls to the policy. + * The policy should only count an entry as hit once per tick. + */ + void (*tick)(struct dm_cache_policy *p); + + /* + * Configuration. + */ + int (*emit_config_values)(struct dm_cache_policy *p, + char *result, unsigned maxlen); + int (*set_config_value)(struct dm_cache_policy *p, + const char *key, const char *value); + + /* + * Book keeping ptr for the policy register, not for general use. + */ + void *private; +}; + +/*----------------------------------------------------------------*/ + +/* + * We maintain a little register of the different policy types. + */ +#define CACHE_POLICY_NAME_SIZE 16 + +struct dm_cache_policy_type { + /* For use by the register code only. */ + struct list_head list; + + /* + * Policy writers should fill in these fields. The name field is + * what gets passed on the target line to select your policy. + */ + char name[CACHE_POLICY_NAME_SIZE]; + + /* + * Policies may store a hint for each each cache block. + * Currently the size of this hint must be 0 or 4 bytes but we + * expect to relax this in future. + */ + size_t hint_size; + + struct module *owner; + struct dm_cache_policy *(*create)(dm_cblock_t cache_size, + sector_t origin_size, + sector_t block_size); +}; + +int dm_cache_policy_register(struct dm_cache_policy_type *type); +void dm_cache_policy_unregister(struct dm_cache_policy_type *type); + +/*----------------------------------------------------------------*/ + +#endif /* DM_CACHE_POLICY_H */ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c new file mode 100644 index 0000000..0f4e84b --- /dev/null +++ b/drivers/md/dm-cache-target.c @@ -0,0 +1,2584 @@ +/* + * Copyright (C) 2012 Red Hat. All rights reserved. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison.h" +#include "dm-cache-metadata.h" + +#include <linux/dm-io.h> +#include <linux/dm-kcopyd.h> +#include <linux/init.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +#define DM_MSG_PREFIX "cache" + +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle, + "A percentage of time allocated for copying to and/or from cache"); + +/*----------------------------------------------------------------*/ + +/* + * Glossary: + * + * oblock: index of an origin block + * cblock: index of a cache block + * promotion: movement of a block from origin to cache + * demotion: movement of a block from cache to origin + * migration: movement of a block between the origin and cache device, + * either direction + */ + +/*----------------------------------------------------------------*/ + +static size_t bitset_size_in_bytes(unsigned nr_entries) +{ + return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG); +} + +static unsigned long *alloc_bitset(unsigned nr_entries) +{ + size_t s = bitset_size_in_bytes(nr_entries); + return vzalloc(s); +} + +static void clear_bitset(void *bitset, unsigned nr_entries) +{ + size_t s = bitset_size_in_bytes(nr_entries); + memset(bitset, 0, s); +} + +static void free_bitset(unsigned long *bits) +{ + vfree(bits); +} + +/*----------------------------------------------------------------*/ + +#define PRISON_CELLS 1024 +#define MIGRATION_POOL_SIZE 128 +#define COMMIT_PERIOD HZ +#define MIGRATION_COUNT_WINDOW 10 + +/* + * The block size of the device holding cache data must be >= 32KB + */ +#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT) + +/* + * FIXME: the cache is read/write for the time being. + */ +enum cache_mode { + CM_WRITE, /* metadata may be changed */ + CM_READ_ONLY, /* metadata may not be changed */ +}; + +struct cache_features { + enum cache_mode mode; + bool write_through:1; +}; + +struct cache_stats { + atomic_t read_hit; + atomic_t read_miss; + atomic_t write_hit; + atomic_t write_miss; + atomic_t demotion; + atomic_t promotion; + atomic_t copies_avoided; + atomic_t cache_cell_clash; + atomic_t commit_count; + atomic_t discard_count; +}; + +struct cache { + struct dm_target *ti; + struct dm_target_callbacks callbacks; + + /* + * Metadata is written to this device. + */ + struct dm_dev *metadata_dev; + + /* + * The slower of the two data devices. Typically a spindle. + */ + struct dm_dev *origin_dev; + + /* + * The faster of the two data devices. Typically an SSD. + */ + struct dm_dev *cache_dev; + + /* + * Cache features such as write-through. + */ + struct cache_features features; + + /* + * Size of the origin device in _complete_ blocks and native sectors. + */ + dm_oblock_t origin_blocks; + sector_t origin_sectors; + + /* + * Size of the cache device in blocks. + */ + dm_cblock_t cache_size; + + /* + * Fields for converting from sectors to blocks. + */ + uint32_t sectors_per_block; + int sectors_per_block_shift; + + struct dm_cache_metadata *cmd; + + spinlock_t lock; + struct bio_list deferred_bios; + struct bio_list deferred_flush_bios; + struct list_head quiesced_migrations; + struct list_head completed_migrations; + struct list_head need_commit_migrations; + sector_t migration_threshold; + atomic_t nr_migrations; + wait_queue_head_t migration_wait; + + /* + * cache_size entries, dirty if set + */ + dm_cblock_t nr_dirty; + unsigned long *dirty_bitset; + + /* + * origin_blocks entries, discarded if set. + */ + sector_t discard_block_size; /* a power of 2 times sectors per block */ + dm_dblock_t discard_nr_blocks; + unsigned long *discard_bitset; + + struct dm_kcopyd_client *copier; + struct workqueue_struct *wq; + struct work_struct worker; + + struct delayed_work waker; + unsigned long last_commit_jiffies; + + struct dm_bio_prison *prison; + struct dm_deferred_set *all_io_ds; + + mempool_t *migration_pool; + struct dm_cache_migration *next_migration; + + struct dm_cache_policy *policy; + unsigned policy_nr_args; + + bool need_tick_bio:1; + bool sized:1; + bool quiescing:1; + bool commit_requested:1; + bool loaded_mappings:1; + bool loaded_discards:1; + + struct cache_stats stats; + + /* + * Rather than reconstructing the table line for the status we just + * save it and regurgitate. + */ + unsigned nr_ctr_args; + const char **ctr_args; +}; + +struct per_bio_data { + bool tick:1; + unsigned req_nr:2; + struct dm_deferred_entry *all_io_entry; +}; + +struct dm_cache_migration { + struct list_head list; + struct cache *cache; + + unsigned long start_jiffies; + dm_oblock_t old_oblock; + dm_oblock_t new_oblock; + dm_cblock_t cblock; + + bool err:1; + bool writeback:1; + bool demote:1; + bool promote:1; + + struct dm_bio_prison_cell *old_ocell; + struct dm_bio_prison_cell *new_ocell; +}; + +/* + * Processing a bio in the worker thread may require these memory + * allocations. We prealloc to avoid deadlocks (the same worker thread + * frees them back to the mempool). + */ +struct prealloc { + struct dm_cache_migration *mg; + struct dm_bio_prison_cell *cell1; + struct dm_bio_prison_cell *cell2; +}; + +static void wake_worker(struct cache *cache) +{ + queue_work(cache->wq, &cache->worker); +} + +/*----------------------------------------------------------------*/ + +static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache) +{ + /* FIXME: change to use a local slab. */ + return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT); +} + +static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell) +{ + dm_bio_prison_free_cell(cache->prison, cell); +} + +static int prealloc_data_structs(struct cache *cache, struct prealloc *p) +{ + if (!p->mg) { + p->mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT); + if (!p->mg) + return -ENOMEM; + } + + if (!p->cell1) { + p->cell1 = alloc_prison_cell(cache); + if (!p->cell1) + return -ENOMEM; + } + + if (!p->cell2) { + p->cell2 = alloc_prison_cell(cache); + if (!p->cell2) + return -ENOMEM; + } + + return 0; +} + +static void prealloc_free_structs(struct cache *cache, struct prealloc *p) +{ + if (p->cell2) + free_prison_cell(cache, p->cell2); + + if (p->cell1) + free_prison_cell(cache, p->cell1); + + if (p->mg) + mempool_free(p->mg, cache->migration_pool); +} + +static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p) +{ + struct dm_cache_migration *mg = p->mg; + + BUG_ON(!mg); + p->mg = NULL; + + return mg; +} + +/* + * You must have a cell within the prealloc struct to return. If not this + * function will BUG() rather than returning NULL. + */ +static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p) +{ + struct dm_bio_prison_cell *r = NULL; + + if (p->cell1) { + r = p->cell1; + p->cell1 = NULL; + + } else if (p->cell2) { + r = p->cell2; + p->cell2 = NULL; + } else + BUG(); + + return r; +} + +/* + * You can't have more than two cells in a prealloc struct. BUG() will be + * called if you try and overfill. + */ +static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell) +{ + if (!p->cell2) + p->cell2 = cell; + + else if (!p->cell1) + p->cell1 = cell; + + else + BUG(); +} + +/*----------------------------------------------------------------*/ + +static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) +{ + key->virtual = 0; + key->dev = 0; + key->block = from_oblock(oblock); +} + +/* + * The caller hands in a preallocated cell, and a free function for it. + * The cell will be freed if there's an error, or if it wasn't used because + * a cell with that key already exists. + */ +typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); + +static int bio_detain(struct cache *cache, dm_oblock_t oblock, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_cell_key key; + + build_key(oblock, &key); + r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); + if (r) + free_fn(free_context, cell_prealloc); + + return r; +} + +static int get_cell(struct cache *cache, + dm_oblock_t oblock, + struct prealloc *structs, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_cell_key key; + struct dm_bio_prison_cell *cell_prealloc; + + cell_prealloc = prealloc_get_cell(structs); + + build_key(oblock, &key); + r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); + if (r) + prealloc_put_cell(structs, cell_prealloc); + + return r; +} + + /*----------------------------------------------------------------*/ + +static bool is_dirty(struct cache *cache, dm_cblock_t b) +{ + return test_bit(from_cblock(b), cache->dirty_bitset); +} + +static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +{ + if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { + cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); + policy_set_dirty(cache->policy, oblock); + } +} + +static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) +{ + if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { + policy_clear_dirty(cache->policy, oblock); + cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); + if (!from_cblock(cache->nr_dirty)) + dm_table_event(cache->ti->table); + } +} + +/*----------------------------------------------------------------*/ +static bool block_size_is_power_of_two(struct cache *cache) +{ + return cache->sectors_per_block_shift >= 0; +} + +static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) +{ + sector_t discard_blocks = cache->discard_block_size; + dm_block_t b = from_oblock(oblock); + + if (!block_size_is_power_of_two(cache)) + (void) sector_div(discard_blocks, cache->sectors_per_block); + else + discard_blocks >>= cache->sectors_per_block_shift; + + (void) sector_div(b, discard_blocks); + + return to_dblock(b); +} + +static void set_discard(struct cache *cache, dm_dblock_t b) +{ + unsigned long flags; + + atomic_inc(&cache->stats.discard_count); + + spin_lock_irqsave(&cache->lock, flags); + set_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void clear_discard(struct cache *cache, dm_dblock_t b) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + clear_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); +} + +static bool is_discarded(struct cache *cache, dm_dblock_t b) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = test_bit(from_dblock(b), cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = test_bit(from_dblock(oblock_to_dblock(cache, b)), + cache->discard_bitset); + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +/*----------------------------------------------------------------*/ + +static void load_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + dm_cache_metadata_get_stats(cache->cmd, &stats); + atomic_set(&cache->stats.read_hit, stats.read_hits); + atomic_set(&cache->stats.read_miss, stats.read_misses); + atomic_set(&cache->stats.write_hit, stats.write_hits); + atomic_set(&cache->stats.write_miss, stats.write_misses); +} + +static void save_stats(struct cache *cache) +{ + struct dm_cache_statistics stats; + + stats.read_hits = atomic_read(&cache->stats.read_hit); + stats.read_misses = atomic_read(&cache->stats.read_miss); + stats.write_hits = atomic_read(&cache->stats.write_hit); + stats.write_misses = atomic_read(&cache->stats.write_miss); + + dm_cache_metadata_set_stats(cache->cmd, &stats); +} + +/*---------------------------------------------------------------- + * Per bio data + *--------------------------------------------------------------*/ +static struct per_bio_data *get_per_bio_data(struct bio *bio) +{ + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + BUG_ON(!pb); + return pb; +} + +static struct per_bio_data *init_per_bio_data(struct bio *bio) +{ + struct per_bio_data *pb = get_per_bio_data(bio); + + pb->tick = false; + pb->req_nr = dm_bio_get_target_bio_nr(bio); + pb->all_io_entry = NULL; + + return pb; +} + +/*---------------------------------------------------------------- + * Remapping + *--------------------------------------------------------------*/ +static void remap_to_origin(struct cache *cache, struct bio *bio) +{ + bio->bi_bdev = cache->origin_dev->bdev; +} + +static void remap_to_cache(struct cache *cache, struct bio *bio, + dm_cblock_t cblock) +{ + sector_t bi_sector = bio->bi_sector; + + bio->bi_bdev = cache->cache_dev->bdev; + if (!block_size_is_power_of_two(cache)) + bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + + sector_div(bi_sector, cache->sectors_per_block); + else + bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | + (bi_sector & (cache->sectors_per_block - 1)); +} + +static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + struct per_bio_data *pb = get_per_bio_data(bio); + + spin_lock_irqsave(&cache->lock, flags); + if (cache->need_tick_bio && + !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) { + pb->tick = true; + cache->need_tick_bio = false; + } + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, + dm_oblock_t oblock) +{ + check_if_tick_bio_needed(cache, bio); + remap_to_origin(cache, bio); + if (bio_data_dir(bio) == WRITE) + clear_discard(cache, oblock_to_dblock(cache, oblock)); +} + +static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, + dm_oblock_t oblock, dm_cblock_t cblock) +{ + remap_to_cache(cache, bio, cblock); + if (bio_data_dir(bio) == WRITE) { + set_dirty(cache, oblock, cblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); + } +} + +static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) +{ + sector_t block_nr = bio->bi_sector; + + if (!block_size_is_power_of_two(cache)) + (void) sector_div(block_nr, cache->sectors_per_block); + else + block_nr >>= cache->sectors_per_block_shift; + + return to_oblock(block_nr); +} + +static int bio_triggers_commit(struct cache *cache, struct bio *bio) +{ + return bio->bi_rw & (REQ_FLUSH | REQ_FUA); +} + +static void issue(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + + if (!bio_triggers_commit(cache, bio)) { + generic_make_request(bio); + return; + } + + /* + * Batch together any bios that trigger commits and then issue a + * single commit for them in do_worker(). + */ + spin_lock_irqsave(&cache->lock, flags); + cache->commit_requested = true; + bio_list_add(&cache->deferred_flush_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); +} + +/*---------------------------------------------------------------- + * Migration processing + * + * Migration covers moving data from the origin device to the cache, or + * vice versa. + *--------------------------------------------------------------*/ +static void free_migration(struct dm_cache_migration *mg) +{ + mempool_free(mg, mg->cache->migration_pool); +} + +static void inc_nr_migrations(struct cache *cache) +{ + atomic_inc(&cache->nr_migrations); +} + +static void dec_nr_migrations(struct cache *cache) +{ + atomic_dec(&cache->nr_migrations); + + /* + * Wake the worker in case we're suspending the target. + */ + wake_up(&cache->migration_wait); +} + +static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, + bool holder) +{ + (holder ? dm_cell_release : dm_cell_release_no_holder) + (cache->prison, cell, &cache->deferred_bios); + free_prison_cell(cache, cell); +} + +static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell, + bool holder) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + __cell_defer(cache, cell, holder); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void cleanup_migration(struct dm_cache_migration *mg) +{ + dec_nr_migrations(mg->cache); + free_migration(mg); +} + +static void migration_failure(struct dm_cache_migration *mg) +{ + struct cache *cache = mg->cache; + + if (mg->writeback) { + DMWARN_LIMIT("writeback failed; couldn't copy block"); + set_dirty(cache, mg->old_oblock, mg->cblock); + cell_defer(cache, mg->old_ocell, false); + + } else if (mg->demote) { + DMWARN_LIMIT("demotion failed; couldn't copy block"); + policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock); + + cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); + if (mg->promote) + cell_defer(cache, mg->new_ocell, 1); + } else { + DMWARN_LIMIT("promotion failed; couldn't copy block"); + policy_remove_mapping(cache->policy, mg->new_oblock); + cell_defer(cache, mg->new_ocell, 1); + } + + cleanup_migration(mg); +} + +static void migration_success_pre_commit(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + if (mg->writeback) { + cell_defer(cache, mg->old_ocell, false); + clear_dirty(cache, mg->old_oblock, mg->cblock); + cleanup_migration(mg); + return; + + } else if (mg->demote) { + if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) { + DMWARN_LIMIT("demotion failed; couldn't update on disk metadata"); + policy_force_mapping(cache->policy, mg->new_oblock, + mg->old_oblock); + if (mg->promote) + cell_defer(cache, mg->new_ocell, true); + cleanup_migration(mg); + return; + } + } else { + if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) { + DMWARN_LIMIT("promotion failed; couldn't update on disk metadata"); + policy_remove_mapping(cache->policy, mg->new_oblock); + cleanup_migration(mg); + return; + } + } + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->need_commit_migrations); + cache->commit_requested = true; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void migration_success_post_commit(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + if (mg->writeback) { + DMWARN("writeback unexpectedly triggered commit"); + return; + + } else if (mg->demote) { + cell_defer(cache, mg->old_ocell, mg->promote ? 0 : 1); + + if (mg->promote) { + mg->demote = false; + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->quiesced_migrations); + spin_unlock_irqrestore(&cache->lock, flags); + + } else + cleanup_migration(mg); + + } else { + cell_defer(cache, mg->new_ocell, true); + clear_dirty(cache, mg->new_oblock, mg->cblock); + cleanup_migration(mg); + } +} + +static void copy_complete(int read_err, unsigned long write_err, void *context) +{ + unsigned long flags; + struct dm_cache_migration *mg = (struct dm_cache_migration *) context; + struct cache *cache = mg->cache; + + if (read_err || write_err) + mg->err = true; + + spin_lock_irqsave(&cache->lock, flags); + list_add_tail(&mg->list, &cache->completed_migrations); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void issue_copy_real(struct dm_cache_migration *mg) +{ + int r; + struct dm_io_region o_region, c_region; + struct cache *cache = mg->cache; + + o_region.bdev = cache->origin_dev->bdev; + o_region.count = cache->sectors_per_block; + + c_region.bdev = cache->cache_dev->bdev; + c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; + c_region.count = cache->sectors_per_block; + + if (mg->writeback || mg->demote) { + /* demote */ + o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block; + r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg); + } else { + /* promote */ + o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block; + r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg); + } + + if (r < 0) + migration_failure(mg); +} + +static void avoid_copy(struct dm_cache_migration *mg) +{ + atomic_inc(&mg->cache->stats.copies_avoided); + migration_success_pre_commit(mg); +} + +static void issue_copy(struct dm_cache_migration *mg) +{ + bool avoid; + struct cache *cache = mg->cache; + + if (mg->writeback || mg->demote) + avoid = !is_dirty(cache, mg->cblock) || + is_discarded_oblock(cache, mg->old_oblock); + else + avoid = is_discarded_oblock(cache, mg->new_oblock); + + avoid ? avoid_copy(mg) : issue_copy_real(mg); +} + +static void complete_migration(struct dm_cache_migration *mg) +{ + if (mg->err) + migration_failure(mg); + else + migration_success_pre_commit(mg); +} + +static void process_migrations(struct cache *cache, struct list_head *head, + void (*fn)(struct dm_cache_migration *)) +{ + unsigned long flags; + struct list_head list; + struct dm_cache_migration *mg, *tmp; + + INIT_LIST_HEAD(&list); + spin_lock_irqsave(&cache->lock, flags); + list_splice_init(head, &list); + spin_unlock_irqrestore(&cache->lock, flags); + + list_for_each_entry_safe(mg, tmp, &list, list) + fn(mg); +} + +static void __queue_quiesced_migration(struct dm_cache_migration *mg) +{ + list_add_tail(&mg->list, &mg->cache->quiesced_migrations); +} + +static void queue_quiesced_migration(struct dm_cache_migration *mg) +{ + unsigned long flags; + struct cache *cache = mg->cache; + + spin_lock_irqsave(&cache->lock, flags); + __queue_quiesced_migration(mg); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void queue_quiesced_migrations(struct cache *cache, struct list_head *work) +{ + unsigned long flags; + struct dm_cache_migration *mg, *tmp; + + spin_lock_irqsave(&cache->lock, flags); + list_for_each_entry_safe(mg, tmp, work, list) + __queue_quiesced_migration(mg); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void check_for_quiesced_migrations(struct cache *cache, + struct per_bio_data *pb) +{ + struct list_head work; + + if (!pb->all_io_entry) + return; + + INIT_LIST_HEAD(&work); + if (pb->all_io_entry) + dm_deferred_entry_dec(pb->all_io_entry, &work); + + if (!list_empty(&work)) + queue_quiesced_migrations(cache, &work); +} + +static void quiesce_migration(struct dm_cache_migration *mg) +{ + if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list)) + queue_quiesced_migration(mg); +} + +static void promote(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = false; + mg->promote = true; + mg->cache = cache; + mg->new_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = NULL; + mg->new_ocell = cell; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +static void writeback(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = true; + mg->demote = false; + mg->promote = false; + mg->cache = cache; + mg->old_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = cell; + mg->new_ocell = NULL; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +static void demote_then_promote(struct cache *cache, struct prealloc *structs, + dm_oblock_t old_oblock, dm_oblock_t new_oblock, + dm_cblock_t cblock, + struct dm_bio_prison_cell *old_ocell, + struct dm_bio_prison_cell *new_ocell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = true; + mg->promote = true; + mg->cache = cache; + mg->old_oblock = old_oblock; + mg->new_oblock = new_oblock; + mg->cblock = cblock; + mg->old_ocell = old_ocell; + mg->new_ocell = new_ocell; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + +/*---------------------------------------------------------------- + * bio processing + *--------------------------------------------------------------*/ +static void defer_bio(struct cache *cache, struct bio *bio) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + bio_list_add(&cache->deferred_bios, bio); + spin_unlock_irqrestore(&cache->lock, flags); + + wake_worker(cache); +} + +static void process_flush_bio(struct cache *cache, struct bio *bio) +{ + struct per_bio_data *pb = get_per_bio_data(bio); + + BUG_ON(bio->bi_size); + if (!pb->req_nr) + remap_to_origin(cache, bio); + else + remap_to_cache(cache, bio, 0); + + issue(cache, bio); +} + +/* + * People generally discard large parts of a device, eg, the whole device + * when formatting. Splitting these large discards up into cache block + * sized ios and then quiescing (always neccessary for discard) takes too + * long. + * + * We keep it simple, and allow any size of discard to come in, and just + * mark off blocks on the discard bitset. No passdown occurs! + * + * To implement passdown we need to change the bio_prison such that a cell + * can have a key that spans many blocks. + */ +static void process_discard_bio(struct cache *cache, struct bio *bio) +{ + dm_block_t start_block = dm_sector_div_up(bio->bi_sector, + cache->discard_block_size); + dm_block_t end_block = bio->bi_sector + bio_sectors(bio); + dm_block_t b; + + (void) sector_div(end_block, cache->discard_block_size); + + for (b = start_block; b < end_block; b++) + set_discard(cache, to_dblock(b)); + + bio_endio(bio, 0); +} + +static bool spare_migration_bandwidth(struct cache *cache) +{ + sector_t current_volume = (atomic_read(&cache->nr_migrations) + 1) * + cache->sectors_per_block; + return current_volume < cache->migration_threshold; +} + +static bool is_writethrough_io(struct cache *cache, struct bio *bio, + dm_cblock_t cblock) +{ + return bio_data_dir(bio) == WRITE && + cache->features.write_through && !is_dirty(cache, cblock); +} + +static void inc_hit_counter(struct cache *cache, struct bio *bio) +{ + atomic_inc(bio_data_dir(bio) == READ ? + &cache->stats.read_hit : &cache->stats.write_hit); +} + +static void inc_miss_counter(struct cache *cache, struct bio *bio) +{ + atomic_inc(bio_data_dir(bio) == READ ? + &cache->stats.read_miss : &cache->stats.write_miss); +} + +static void process_bio(struct cache *cache, struct prealloc *structs, + struct bio *bio) +{ + int r; + bool release_cell = true; + dm_oblock_t block = get_bio_block(cache, bio); + struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; + struct policy_result lookup_result; + struct per_bio_data *pb = get_per_bio_data(bio); + bool discarded_block = is_discarded_oblock(cache, block); + bool can_migrate = discarded_block || spare_migration_bandwidth(cache); + + /* + * Check to see if that block is currently migrating. + */ + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain(cache, block, bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &new_ocell); + if (r > 0) + return; + + r = policy_map(cache->policy, block, true, can_migrate, discarded_block, + bio, &lookup_result); + + if (r == -EWOULDBLOCK) + /* migration has been denied */ + lookup_result.op = POLICY_MISS; + + switch (lookup_result.op) { + case POLICY_HIT: + inc_hit_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (is_writethrough_io(cache, bio, lookup_result.cblock)) { + /* + * No need to mark anything dirty in write through mode. + */ + pb->req_nr == 0 ? + remap_to_cache(cache, bio, lookup_result.cblock) : + remap_to_origin_clear_discard(cache, bio, block); + } else + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + + issue(cache, bio); + break; + + case POLICY_MISS: + inc_miss_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (pb->req_nr != 0) { + /* + * This is a duplicate writethrough io that is no + * longer needed because the block has been demoted. + */ + bio_endio(bio, 0); + } else { + remap_to_origin_clear_discard(cache, bio, block); + issue(cache, bio); + } + break; + + case POLICY_NEW: + atomic_inc(&cache->stats.promotion); + promote(cache, structs, block, lookup_result.cblock, new_ocell); + release_cell = false; + break; + + case POLICY_REPLACE: + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain(cache, lookup_result.old_oblock, bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &old_ocell); + if (r > 0) { + /* + * We have to be careful to avoid lock inversion of + * the cells. So we back off, and wait for the + * old_ocell to become free. + */ + policy_force_mapping(cache->policy, block, + lookup_result.old_oblock); + atomic_inc(&cache->stats.cache_cell_clash); + break; + } + atomic_inc(&cache->stats.demotion); + atomic_inc(&cache->stats.promotion); + + demote_then_promote(cache, structs, lookup_result.old_oblock, + block, lookup_result.cblock, + old_ocell, new_ocell); + release_cell = false; + break; + + default: + DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__, + (unsigned) lookup_result.op); + bio_io_error(bio); + } + + if (release_cell) + cell_defer(cache, new_ocell, false); +} + +static int need_commit_due_to_time(struct cache *cache) +{ + return jiffies < cache->last_commit_jiffies || + jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; +} + +static int commit_if_needed(struct cache *cache) +{ + if (dm_cache_changed_this_transaction(cache->cmd) && + (cache->commit_requested || need_commit_due_to_time(cache))) { + atomic_inc(&cache->stats.commit_count); + cache->last_commit_jiffies = jiffies; + cache->commit_requested = false; + return dm_cache_commit(cache->cmd, false); + } + + return 0; +} + +static void process_deferred_bios(struct cache *cache) +{ + unsigned long flags; + struct bio_list bios; + struct bio *bio; + struct prealloc structs; + + memset(&structs, 0, sizeof(structs)); + bio_list_init(&bios); + + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&bios, &cache->deferred_bios); + bio_list_init(&cache->deferred_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + while (!bio_list_empty(&bios)) { + /* + * If we've got no free migration structs, and processing + * this bio might require one, we pause until there are some + * prepared mappings to process. + */ + if (prealloc_data_structs(cache, &structs)) { + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&cache->deferred_bios, &bios); + spin_unlock_irqrestore(&cache->lock, flags); + break; + } + + bio = bio_list_pop(&bios); + + if (bio->bi_rw & REQ_FLUSH) + process_flush_bio(cache, bio); + else if (bio->bi_rw & REQ_DISCARD) + process_discard_bio(cache, bio); + else + process_bio(cache, &structs, bio); + } + + prealloc_free_structs(cache, &structs); +} + +static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) +{ + unsigned long flags; + struct bio_list bios; + struct bio *bio; + + bio_list_init(&bios); + + spin_lock_irqsave(&cache->lock, flags); + bio_list_merge(&bios, &cache->deferred_flush_bios); + bio_list_init(&cache->deferred_flush_bios); + spin_unlock_irqrestore(&cache->lock, flags); + + while ((bio = bio_list_pop(&bios))) + submit_bios ? generic_make_request(bio) : bio_io_error(bio); +} + +static void writeback_some_dirty_blocks(struct cache *cache) +{ + int r = 0; + dm_oblock_t oblock; + dm_cblock_t cblock; + struct prealloc structs; + struct dm_bio_prison_cell *old_ocell; + + memset(&structs, 0, sizeof(structs)); + + while (spare_migration_bandwidth(cache)) { + if (prealloc_data_structs(cache, &structs)) + break; + + r = policy_writeback_work(cache->policy, &oblock, &cblock); + if (r) + break; + + r = get_cell(cache, oblock, &structs, &old_ocell); + if (r) { + policy_set_dirty(cache->policy, oblock); + break; + } + + writeback(cache, &structs, oblock, cblock, old_ocell); + } + + prealloc_free_structs(cache, &structs); +} + +/*---------------------------------------------------------------- + * Main worker loop + *--------------------------------------------------------------*/ +static void start_quiescing(struct cache *cache) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + cache->quiescing = 1; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static void stop_quiescing(struct cache *cache) +{ + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + cache->quiescing = 0; + spin_unlock_irqrestore(&cache->lock, flags); +} + +static bool is_quiescing(struct cache *cache) +{ + int r; + unsigned long flags; + + spin_lock_irqsave(&cache->lock, flags); + r = cache->quiescing; + spin_unlock_irqrestore(&cache->lock, flags); + + return r; +} + +static void wait_for_migrations(struct cache *cache) +{ + wait_event(cache->migration_wait, !atomic_read(&cache->nr_migrations)); +} + +static void stop_worker(struct cache *cache) +{ + cancel_delayed_work(&cache->waker); + flush_workqueue(cache->wq); +} + +static void requeue_deferred_io(struct cache *cache) +{ + struct bio *bio; + struct bio_list bios; + + bio_list_init(&bios); + bio_list_merge(&bios, &cache->deferred_bios); + bio_list_init(&cache->deferred_bios); + + while ((bio = bio_list_pop(&bios))) + bio_endio(bio, DM_ENDIO_REQUEUE); +} + +static int more_work(struct cache *cache) +{ + if (is_quiescing(cache)) + return !list_empty(&cache->quiesced_migrations) || + !list_empty(&cache->completed_migrations) || + !list_empty(&cache->need_commit_migrations); + else + return !bio_list_empty(&cache->deferred_bios) || + !bio_list_empty(&cache->deferred_flush_bios) || + !list_empty(&cache->quiesced_migrations) || + !list_empty(&cache->completed_migrations) || + !list_empty(&cache->need_commit_migrations); +} + +static void do_worker(struct work_struct *ws) +{ + struct cache *cache = container_of(ws, struct cache, worker); + + do { + if (!is_quiescing(cache)) + process_deferred_bios(cache); + + process_migrations(cache, &cache->quiesced_migrations, issue_copy); + process_migrations(cache, &cache->completed_migrations, complete_migration); + + writeback_some_dirty_blocks(cache); + + if (commit_if_needed(cache)) { + process_deferred_flush_bios(cache, false); + + /* + * FIXME: rollback metadata or just go into a + * failure mode and error everything + */ + } else { + process_deferred_flush_bios(cache, true); + process_migrations(cache, &cache->need_commit_migrations, + migration_success_post_commit); + } + } while (more_work(cache)); +} + +/* + * We want to commit periodically so that not too much + * unwritten metadata builds up. + */ +static void do_waker(struct work_struct *ws) +{ + struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker); + wake_worker(cache); + queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD); +} + +/*----------------------------------------------------------------*/ + +static int is_congested(struct dm_dev *dev, int bdi_bits) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + return bdi_congested(&q->backing_dev_info, bdi_bits); +} + +static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits) +{ + struct cache *cache = container_of(cb, struct cache, callbacks); + + return is_congested(cache->origin_dev, bdi_bits) || + is_congested(cache->cache_dev, bdi_bits); +} + +/*---------------------------------------------------------------- + * Target methods + *--------------------------------------------------------------*/ + +/* + * This function gets called on the error paths of the constructor, so we + * have to cope with a partially initialised struct. + */ +static void destroy(struct cache *cache) +{ + unsigned i; + + if (cache->next_migration) + mempool_free(cache->next_migration, cache->migration_pool); + + if (cache->migration_pool) + mempool_destroy(cache->migration_pool); + + if (cache->all_io_ds) + dm_deferred_set_destroy(cache->all_io_ds); + + if (cache->prison) + dm_bio_prison_destroy(cache->prison); + + if (cache->wq) + destroy_workqueue(cache->wq); + + if (cache->dirty_bitset) + free_bitset(cache->dirty_bitset); + + if (cache->discard_bitset) + free_bitset(cache->discard_bitset); + + if (cache->copier) + dm_kcopyd_client_destroy(cache->copier); + + if (cache->cmd) + dm_cache_metadata_close(cache->cmd); + + if (cache->metadata_dev) + dm_put_device(cache->ti, cache->metadata_dev); + + if (cache->origin_dev) + dm_put_device(cache->ti, cache->origin_dev); + + if (cache->cache_dev) + dm_put_device(cache->ti, cache->cache_dev); + + if (cache->policy) + dm_cache_policy_destroy(cache->policy); + + for (i = 0; i < cache->nr_ctr_args ; i++) + kfree(cache->ctr_args[i]); + kfree(cache->ctr_args); + + kfree(cache); +} + +static void cache_dtr(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + destroy(cache); +} + +static sector_t get_dev_size(struct dm_dev *dev) +{ + return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; +} + +/*----------------------------------------------------------------*/ + +/* + * Construct a cache device mapping. + * + * cache <metadata dev> <cache dev> <origin dev> <block size> + * <#feature args> [<feature arg>]* + * <policy> <#policy args> [<policy arg>]* + * + * metadata dev : fast device holding the persistent metadata + * cache dev : fast device holding cached data blocks + * origin dev : slow device holding original data blocks + * block size : cache unit size in sectors + * + * #feature args : number of feature arguments passed + * feature args : writethrough. (The default is writeback.) + * + * policy : the replacement policy to use + * #policy args : an even number of policy arguments corresponding + * to key/value pairs passed to the policy + * policy args : key/value pairs passed to the policy + * E.g. 'sequential_threshold 1024' + * See cache-policies.txt for details. + * + * Optional feature arguments are: + * writethrough : write through caching that prohibits cache block + * content from being different from origin block content. + * Without this argument, the default behaviour is to write + * back cache block contents later for performance reasons, + * so they may differ from the corresponding origin blocks. + */ +struct cache_args { + struct dm_target *ti; + + struct dm_dev *metadata_dev; + + struct dm_dev *cache_dev; + sector_t cache_sectors; + + struct dm_dev *origin_dev; + sector_t origin_sectors; + + uint32_t block_size; + + const char *policy_name; + int policy_argc; + const char **policy_argv; + + struct cache_features features; +}; + +static void destroy_cache_args(struct cache_args *ca) +{ + if (ca->metadata_dev) + dm_put_device(ca->ti, ca->metadata_dev); + + if (ca->cache_dev) + dm_put_device(ca->ti, ca->cache_dev); + + if (ca->origin_dev) + dm_put_device(ca->ti, ca->origin_dev); + + kfree(ca); +} + +static bool at_least_one_arg(struct dm_arg_set *as, char **error) +{ + if (!as->argc) { + *error = "Insufficient args"; + return false; + } + + return true; +} + +static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + sector_t metadata_dev_size; + char b[BDEVNAME_SIZE]; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->metadata_dev); + if (r) { + *error = "Error opening metadata device"; + return r; + } + + metadata_dev_size = get_dev_size(ca->metadata_dev); + if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING) + DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", + bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS); + + return 0; +} + +static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->cache_dev); + if (r) { + *error = "Error opening cache device"; + return r; + } + ca->cache_sectors = get_dev_size(ca->cache_dev); + + return 0; +} + +static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE, + &ca->origin_dev); + if (r) { + *error = "Error opening origin device"; + return r; + } + + ca->origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > ca->origin_sectors) { + *error = "Device size larger than cached device"; + return -EINVAL; + } + + return 0; +} + +static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + unsigned long tmp; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + if (kstrtoul(dm_shift_arg(as), 10, &tmp) || !tmp || + tmp < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || + tmp & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { + *error = "Invalid data block size"; + return -EINVAL; + } + + if (tmp > ca->cache_sectors) { + *error = "Data block size is larger than the cache device"; + return -EINVAL; + } + + ca->block_size = tmp; + + return 0; +} + +static void init_features(struct cache_features *cf) +{ + cf->mode = CM_WRITE; + cf->write_through = false; +} + +static int parse_features(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + static struct dm_arg _args[] = { + {0, 1, "Invalid number of cache feature arguments"}, + }; + + int r; + unsigned argc; + const char *arg; + struct cache_features *cf = &ca->features; + + init_features(cf); + + r = dm_read_arg_group(_args, as, &argc, error); + if (r) + return -EINVAL; + + while (argc--) { + arg = dm_shift_arg(as); + + if (!strcasecmp(arg, "writeback")) + cf->write_through = false; + + else if (!strcasecmp(arg, "writethrough")) + cf->write_through = true; + + else { + *error = "Unrecognised cache feature requested"; + return -EINVAL; + } + } + + return 0; +} + +static int parse_policy(struct cache_args *ca, struct dm_arg_set *as, + char **error) +{ + static struct dm_arg _args[] = { + {0, 1024, "Invalid number of policy arguments"}, + }; + + int r; + + if (!at_least_one_arg(as, error)) + return -EINVAL; + + ca->policy_name = dm_shift_arg(as); + + r = dm_read_arg_group(_args, as, &ca->policy_argc, error); + if (r) + return -EINVAL; + + ca->policy_argv = (const char **)as->argv; + dm_consume_args(as, ca->policy_argc); + + return 0; +} + +static int parse_cache_args(struct cache_args *ca, int argc, char **argv, + char **error) +{ + int r; + struct dm_arg_set as; + + as.argc = argc; + as.argv = argv; + + r = parse_metadata_dev(ca, &as, error); + if (r) + return r; + + r = parse_cache_dev(ca, &as, error); + if (r) + return r; + + r = parse_origin_dev(ca, &as, error); + if (r) + return r; + + r = parse_block_size(ca, &as, error); + if (r) + return r; + + r = parse_features(ca, &as, error); + if (r) + return r; + + r = parse_policy(ca, &as, error); + if (r) + return r; + + return 0; +} + +/*----------------------------------------------------------------*/ + +static struct kmem_cache *migration_cache; + +static int set_config_values(struct dm_cache_policy *p, int argc, const char **argv) +{ + int r = 0; + + if (argc & 1) { + DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs."); + return -EINVAL; + } + + while (argc) { + r = policy_set_config_value(p, argv[0], argv[1]); + if (r) { + DMWARN("policy_set_config_value failed: key = '%s', value = '%s'", + argv[0], argv[1]); + return r; + } + + argc -= 2; + argv += 2; + } + + return r; +} + +static int create_cache_policy(struct cache *cache, struct cache_args *ca, + char **error) +{ + int r; + + cache->policy = dm_cache_policy_create(ca->policy_name, + cache->cache_size, + cache->origin_sectors, + cache->sectors_per_block); + if (!cache->policy) { + *error = "Error creating cache's policy"; + return -ENOMEM; + } + + r = set_config_values(cache->policy, ca->policy_argc, ca->policy_argv); + if (r) + dm_cache_policy_destroy(cache->policy); + + return r; +} + +/* + * We want the discard block size to be a power of two, at least the size + * of the cache block size, and have no more than 2^14 discard blocks + * across the origin. + */ +#define MAX_DISCARD_BLOCKS (1 << 14) + +static bool too_many_discard_blocks(sector_t discard_block_size, + sector_t origin_size) +{ + (void) sector_div(origin_size, discard_block_size); + + return origin_size > MAX_DISCARD_BLOCKS; +} + +static sector_t calculate_discard_block_size(sector_t cache_block_size, + sector_t origin_size) +{ + sector_t discard_block_size; + + discard_block_size = roundup_pow_of_two(cache_block_size); + + if (origin_size) + while (too_many_discard_blocks(discard_block_size, origin_size)) + discard_block_size *= 2; + + return discard_block_size; +} + +#define DEFAULT_MIGRATION_THRESHOLD (2048 * 100) + +static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio); + +static int cache_create(struct cache_args *ca, struct cache **result) +{ + int r = 0; + char **error = &ca->ti->error; + struct cache *cache; + struct dm_target *ti = ca->ti; + dm_block_t origin_blocks; + struct dm_cache_metadata *cmd; + bool may_format = ca->features.mode == CM_WRITE; + + cache = kzalloc(sizeof(*cache), GFP_KERNEL); + if (!cache) + return -ENOMEM; + + cache->ti = ca->ti; + ti->private = cache; + ti->per_bio_data_size = sizeof(struct per_bio_data); + ti->num_flush_bios = 2; + ti->flush_supported = true; + + ti->num_discard_bios = 1; + ti->discards_supported = true; + ti->discard_zeroes_data_unsupported = true; + + memcpy(&cache->features, &ca->features, sizeof(cache->features)); + + if (cache->features.write_through) + ti->num_write_bios = cache_num_write_bios; + + cache->callbacks.congested_fn = cache_is_congested; + dm_table_add_target_callbacks(ti->table, &cache->callbacks); + + cache->metadata_dev = ca->metadata_dev; + cache->origin_dev = ca->origin_dev; + cache->cache_dev = ca->cache_dev; + + ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; + + /* FIXME: factor out this whole section */ + origin_blocks = cache->origin_sectors = ca->origin_sectors; + (void) sector_div(origin_blocks, ca->block_size); + cache->origin_blocks = to_oblock(origin_blocks); + + cache->sectors_per_block = ca->block_size; + if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) { + r = -EINVAL; + goto bad; + } + + if (ca->block_size & (ca->block_size - 1)) { + dm_block_t cache_size = ca->cache_sectors; + + cache->sectors_per_block_shift = -1; + (void) sector_div(cache_size, ca->block_size); + cache->cache_size = to_cblock(cache_size); + } else { + cache->sectors_per_block_shift = __ffs(ca->block_size); + cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); + } + + r = create_cache_policy(cache, ca, error); + if (r) + goto bad; + cache->policy_nr_args = ca->policy_argc; + + cmd = dm_cache_metadata_open(cache->metadata_dev->bdev, + ca->block_size, may_format, + dm_cache_policy_get_hint_size(cache->policy)); + if (IS_ERR(cmd)) { + *error = "Error creating metadata object"; + r = PTR_ERR(cmd); + goto bad; + } + cache->cmd = cmd; + + spin_lock_init(&cache->lock); + bio_list_init(&cache->deferred_bios); + bio_list_init(&cache->deferred_flush_bios); + INIT_LIST_HEAD(&cache->quiesced_migrations); + INIT_LIST_HEAD(&cache->completed_migrations); + INIT_LIST_HEAD(&cache->need_commit_migrations); + cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD; + atomic_set(&cache->nr_migrations, 0); + init_waitqueue_head(&cache->migration_wait); + + cache->nr_dirty = 0; + cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); + if (!cache->dirty_bitset) { + *error = "could not allocate dirty bitset"; + goto bad; + } + clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); + + cache->discard_block_size = + calculate_discard_block_size(cache->sectors_per_block, + cache->origin_sectors); + cache->discard_nr_blocks = oblock_to_dblock(cache, cache->origin_blocks); + cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); + if (!cache->discard_bitset) { + *error = "could not allocate discard bitset"; + goto bad; + } + clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); + + cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); + if (IS_ERR(cache->copier)) { + *error = "could not create kcopyd client"; + r = PTR_ERR(cache->copier); + goto bad; + } + + cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); + if (!cache->wq) { + *error = "could not create workqueue for metadata object"; + goto bad; + } + INIT_WORK(&cache->worker, do_worker); + INIT_DELAYED_WORK(&cache->waker, do_waker); + cache->last_commit_jiffies = jiffies; + + cache->prison = dm_bio_prison_create(PRISON_CELLS); + if (!cache->prison) { + *error = "could not create bio prison"; + goto bad; + } + + cache->all_io_ds = dm_deferred_set_create(); + if (!cache->all_io_ds) { + *error = "could not create all_io deferred set"; + goto bad; + } + + cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE, + migration_cache); + if (!cache->migration_pool) { + *error = "Error creating cache's migration mempool"; + goto bad; + } + + cache->next_migration = NULL; + + cache->need_tick_bio = true; + cache->sized = false; + cache->quiescing = false; + cache->commit_requested = false; + cache->loaded_mappings = false; + cache->loaded_discards = false; + + load_stats(cache); + + atomic_set(&cache->stats.demotion, 0); + atomic_set(&cache->stats.promotion, 0); + atomic_set(&cache->stats.copies_avoided, 0); + atomic_set(&cache->stats.cache_cell_clash, 0); + atomic_set(&cache->stats.commit_count, 0); + atomic_set(&cache->stats.discard_count, 0); + + *result = cache; + return 0; + +bad: + destroy(cache); + return r; +} + +static int copy_ctr_args(struct cache *cache, int argc, const char **argv) +{ + unsigned i; + const char **copy; + + copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); + if (!copy) + return -ENOMEM; + for (i = 0; i < argc; i++) { + copy[i] = kstrdup(argv[i], GFP_KERNEL); + if (!copy[i]) { + while (i--) + kfree(copy[i]); + kfree(copy); + return -ENOMEM; + } + } + + cache->nr_ctr_args = argc; + cache->ctr_args = copy; + + return 0; +} + +static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv) +{ + int r = -EINVAL; + struct cache_args *ca; + struct cache *cache = NULL; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) { + ti->error = "Error allocating memory for cache"; + return -ENOMEM; + } + ca->ti = ti; + + r = parse_cache_args(ca, argc, argv, &ti->error); + if (r) + goto out; + + r = cache_create(ca, &cache); + + r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); + if (r) { + destroy(cache); + goto out; + } + + ti->private = cache; + +out: + destroy_cache_args(ca); + return r; +} + +static unsigned cache_num_write_bios(struct dm_target *ti, struct bio *bio) +{ + int r; + struct cache *cache = ti->private; + dm_oblock_t block = get_bio_block(cache, bio); + dm_cblock_t cblock; + + r = policy_lookup(cache->policy, block, &cblock); + if (r < 0) + return 2; /* assume the worst */ + + return (!r && !is_dirty(cache, cblock)) ? 2 : 1; +} + +static int cache_map(struct dm_target *ti, struct bio *bio) +{ + struct cache *cache = ti->private; + + int r; + dm_oblock_t block = get_bio_block(cache, bio); + bool can_migrate = false; + bool discarded_block; + struct dm_bio_prison_cell *cell; + struct policy_result lookup_result; + struct per_bio_data *pb; + + if (from_oblock(block) > from_oblock(cache->origin_blocks)) { + /* + * This can only occur if the io goes to a partial block at + * the end of the origin device. We don't cache these. + * Just remap to the origin and carry on. + */ + remap_to_origin_clear_discard(cache, bio, block); + return DM_MAPIO_REMAPPED; + } + + pb = init_per_bio_data(bio); + + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { + defer_bio(cache, bio); + return DM_MAPIO_SUBMITTED; + } + + /* + * Check to see if that block is currently migrating. + */ + cell = alloc_prison_cell(cache); + if (!cell) { + defer_bio(cache, bio); + return DM_MAPIO_SUBMITTED; + } + + r = bio_detain(cache, block, bio, cell, + (cell_free_fn) free_prison_cell, + cache, &cell); + if (r) { + if (r < 0) + defer_bio(cache, bio); + + return DM_MAPIO_SUBMITTED; + } + + discarded_block = is_discarded_oblock(cache, block); + + r = policy_map(cache->policy, block, false, can_migrate, discarded_block, + bio, &lookup_result); + if (r == -EWOULDBLOCK) { + cell_defer(cache, cell, true); + return DM_MAPIO_SUBMITTED; + + } else if (r) { + DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + + switch (lookup_result.op) { + case POLICY_HIT: + inc_hit_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (is_writethrough_io(cache, bio, lookup_result.cblock)) { + /* + * No need to mark anything dirty in write through mode. + */ + pb->req_nr == 0 ? + remap_to_cache(cache, bio, lookup_result.cblock) : + remap_to_origin_clear_discard(cache, bio, block); + cell_defer(cache, cell, false); + } else { + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + cell_defer(cache, cell, false); + } + break; + + case POLICY_MISS: + inc_miss_counter(cache, bio); + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + + if (pb->req_nr != 0) { + /* + * This is a duplicate writethrough io that is no + * longer needed because the block has been demoted. + */ + bio_endio(bio, 0); + cell_defer(cache, cell, false); + return DM_MAPIO_SUBMITTED; + } else { + remap_to_origin_clear_discard(cache, bio, block); + cell_defer(cache, cell, false); + } + break; + + default: + DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, + (unsigned) lookup_result.op); + bio_io_error(bio); + return DM_MAPIO_SUBMITTED; + } + + return DM_MAPIO_REMAPPED; +} + +static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) +{ + struct cache *cache = ti->private; + unsigned long flags; + struct per_bio_data *pb = get_per_bio_data(bio); + + if (pb->tick) { + policy_tick(cache->policy); + + spin_lock_irqsave(&cache->lock, flags); + cache->need_tick_bio = true; + spin_unlock_irqrestore(&cache->lock, flags); + } + + check_for_quiesced_migrations(cache, pb); + + return 0; +} + +static int write_dirty_bitset(struct cache *cache) +{ + unsigned i, r; + + for (i = 0; i < from_cblock(cache->cache_size); i++) { + r = dm_cache_set_dirty(cache->cmd, to_cblock(i), + is_dirty(cache, to_cblock(i))); + if (r) + return r; + } + + return 0; +} + +static int write_discard_bitset(struct cache *cache) +{ + unsigned i, r; + + r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, + cache->discard_nr_blocks); + if (r) { + DMERR("could not resize on-disk discard bitset"); + return r; + } + + for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { + r = dm_cache_set_discard(cache->cmd, to_dblock(i), + is_discarded(cache, to_dblock(i))); + if (r) + return r; + } + + return 0; +} + +static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, + uint32_t hint) +{ + struct cache *cache = context; + return dm_cache_save_hint(cache->cmd, cblock, hint); +} + +static int write_hints(struct cache *cache) +{ + int r; + + r = dm_cache_begin_hints(cache->cmd, cache->policy); + if (r) { + DMERR("dm_cache_begin_hints failed"); + return r; + } + + r = policy_walk_mappings(cache->policy, save_hint, cache); + if (r) + DMERR("policy_walk_mappings failed"); + + return r; +} + +/* + * returns true on success + */ +static bool sync_metadata(struct cache *cache) +{ + int r1, r2, r3, r4; + + r1 = write_dirty_bitset(cache); + if (r1) + DMERR("could not write dirty bitset"); + + r2 = write_discard_bitset(cache); + if (r2) + DMERR("could not write discard bitset"); + + save_stats(cache); + + r3 = write_hints(cache); + if (r3) + DMERR("could not write hints"); + + /* + * If writing the above metadata failed, we still commit, but don't + * set the clean shutdown flag. This will effectively force every + * dirty bit to be set on reload. + */ + r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3); + if (r4) + DMERR("could not write cache metadata. Data loss may occur."); + + return !r1 && !r2 && !r3 && !r4; +} + +static void cache_postsuspend(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + start_quiescing(cache); + wait_for_migrations(cache); + stop_worker(cache); + requeue_deferred_io(cache); + stop_quiescing(cache); + + (void) sync_metadata(cache); +} + +static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, + bool dirty, uint32_t hint, bool hint_valid) +{ + int r; + struct cache *cache = context; + + r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid); + if (r) + return r; + + if (dirty) + set_dirty(cache, oblock, cblock); + else + clear_dirty(cache, oblock, cblock); + + return 0; +} + +static int load_discard(void *context, sector_t discard_block_size, + dm_dblock_t dblock, bool discard) +{ + struct cache *cache = context; + + /* FIXME: handle mis-matched block size */ + + if (discard) + set_discard(cache, dblock); + else + clear_discard(cache, dblock); + + return 0; +} + +static int cache_preresume(struct dm_target *ti) +{ + int r = 0; + struct cache *cache = ti->private; + sector_t actual_cache_size = get_dev_size(cache->cache_dev); + (void) sector_div(actual_cache_size, cache->sectors_per_block); + + /* + * Check to see if the cache has resized. + */ + if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { + cache->cache_size = to_cblock(actual_cache_size); + + r = dm_cache_resize(cache->cmd, cache->cache_size); + if (r) { + DMERR("could not resize cache metadata"); + return r; + } + + cache->sized = true; + } + + if (!cache->loaded_mappings) { + r = dm_cache_load_mappings(cache->cmd, + dm_cache_policy_get_name(cache->policy), + load_mapping, cache); + if (r) { + DMERR("could not load cache mappings"); + return r; + } + + cache->loaded_mappings = true; + } + + if (!cache->loaded_discards) { + r = dm_cache_load_discards(cache->cmd, load_discard, cache); + if (r) { + DMERR("could not load origin discards"); + return r; + } + + cache->loaded_discards = true; + } + + return r; +} + +static void cache_resume(struct dm_target *ti) +{ + struct cache *cache = ti->private; + + cache->need_tick_bio = true; + do_waker(&cache->waker.work); +} + +/* + * Status format: + * + * <#used metadata blocks>/<#total metadata blocks> + * <#read hits> <#read misses> <#write hits> <#write misses> + * <#demotions> <#promotions> <#blocks in cache> <#dirty> + * <#features> <features>* + * <#core args> <core args> + * <#policy args> <policy args>* + */ +static void cache_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) +{ + int r = 0; + unsigned i; + ssize_t sz = 0; + dm_block_t nr_free_blocks_metadata = 0; + dm_block_t nr_blocks_metadata = 0; + char buf[BDEVNAME_SIZE]; + struct cache *cache = ti->private; + dm_cblock_t residency; + + switch (type) { + case STATUSTYPE_INFO: + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) { + r = dm_cache_commit(cache->cmd, false); + if (r) + DMERR("could not commit metadata for accurate status"); + } + + r = dm_cache_get_free_metadata_block_count(cache->cmd, + &nr_free_blocks_metadata); + if (r) { + DMERR("could not get metadata free block count"); + goto err; + } + + r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata); + if (r) { + DMERR("could not get metadata device size"); + goto err; + } + + residency = policy_residency(cache->policy); + + DMEMIT("%llu/%llu %u %u %u %u %u %u %llu %u ", + (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), + (unsigned long long)nr_blocks_metadata, + (unsigned) atomic_read(&cache->stats.read_hit), + (unsigned) atomic_read(&cache->stats.read_miss), + (unsigned) atomic_read(&cache->stats.write_hit), + (unsigned) atomic_read(&cache->stats.write_miss), + (unsigned) atomic_read(&cache->stats.demotion), + (unsigned) atomic_read(&cache->stats.promotion), + (unsigned long long) from_cblock(residency), + cache->nr_dirty); + + if (cache->features.write_through) + DMEMIT("1 writethrough "); + else + DMEMIT("0 "); + + DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); + if (sz < maxlen) { + r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz); + if (r) + DMERR("policy_emit_config_values returned %d", r); + } + + break; + + case STATUSTYPE_TABLE: + format_dev_t(buf, cache->metadata_dev->bdev->bd_dev); + DMEMIT("%s ", buf); + format_dev_t(buf, cache->cache_dev->bdev->bd_dev); + DMEMIT("%s ", buf); + format_dev_t(buf, cache->origin_dev->bdev->bd_dev); + DMEMIT("%s", buf); + + for (i = 0; i < cache->nr_ctr_args - 1; i++) + DMEMIT(" %s", cache->ctr_args[i]); + if (cache->nr_ctr_args) + DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]); + } + + return; + +err: + DMEMIT("Error"); +} + +#define NOT_CORE_OPTION 1 + +static int process_config_option(struct cache *cache, char **argv) +{ + unsigned long tmp; + + if (!strcasecmp(argv[0], "migration_threshold")) { + if (kstrtoul(argv[1], 10, &tmp)) + return -EINVAL; + + cache->migration_threshold = tmp; + return 0; + } + + return NOT_CORE_OPTION; +} + +/* + * Supports <key> <value>. + * + * The key migration_threshold is supported by the cache target core. + */ +static int cache_message(struct dm_target *ti, unsigned argc, char **argv) +{ + int r; + struct cache *cache = ti->private; + + if (argc != 2) + return -EINVAL; + + r = process_config_option(cache, argv); + if (r == NOT_CORE_OPTION) + return policy_set_config_value(cache->policy, argv[0], argv[1]); + + return r; +} + +static int cache_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, void *data) +{ + int r = 0; + struct cache *cache = ti->private; + + r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data); + if (!r) + r = fn(ti, cache->origin_dev, 0, ti->len, data); + + return r; +} + +/* + * We assume I/O is going to the origin (which is the volume + * more likely to have restrictions e.g. by being striped). + * (Looking up the exact location of the data would be expensive + * and could always be out of date by the time the bio is submitted.) + */ +static int cache_bvec_merge(struct dm_target *ti, + struct bvec_merge_data *bvm, + struct bio_vec *biovec, int max_size) +{ + struct cache *cache = ti->private; + struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev); + + if (!q->merge_bvec_fn) + return max_size; + + bvm->bi_bdev = cache->origin_dev->bdev; + return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); +} + +static void set_discard_limits(struct cache *cache, struct queue_limits *limits) +{ + /* + * FIXME: these limits may be incompatible with the cache device + */ + limits->max_discard_sectors = cache->discard_block_size * 1024; + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; +} + +static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + struct cache *cache = ti->private; + + blk_limits_io_min(limits, 0); + blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); + set_discard_limits(cache, limits); +} + +/*----------------------------------------------------------------*/ + +static struct target_type cache_target = { + .name = "cache", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = cache_ctr, + .dtr = cache_dtr, + .map = cache_map, + .end_io = cache_end_io, + .postsuspend = cache_postsuspend, + .preresume = cache_preresume, + .resume = cache_resume, + .status = cache_status, + .message = cache_message, + .iterate_devices = cache_iterate_devices, + .merge = cache_bvec_merge, + .io_hints = cache_io_hints, +}; + +static int __init dm_cache_init(void) +{ + int r; + + r = dm_register_target(&cache_target); + if (r) { + DMERR("cache target registration failed: %d", r); + return r; + } + + migration_cache = KMEM_CACHE(dm_cache_migration, 0); + if (!migration_cache) { + dm_unregister_target(&cache_target); + return -ENOMEM; + } + + return 0; +} + +static void __exit dm_cache_exit(void) +{ + dm_unregister_target(&cache_target); + kmem_cache_destroy(migration_cache); +} + +module_init(dm_cache_init); +module_exit(dm_cache_exit); + +MODULE_DESCRIPTION(DM_NAME " cache target"); +MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index f7369f9..13c1548 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1234,20 +1234,6 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) return 0; } -/* - * Encode key into its hex representation - */ -static void crypt_encode_key(char *hex, u8 *key, unsigned int size) -{ - unsigned int i; - - for (i = 0; i < size; i++) { - sprintf(hex, "%02x", *key); - hex += 2; - key++; - } -} - static void crypt_free_tfms(struct crypt_config *cc) { unsigned i; @@ -1651,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (opt_params == 1 && opt_string && !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; else if (opt_params) { ret = -EINVAL; ti->error = "Invalid feature arguments"; @@ -1679,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; return 0; @@ -1717,11 +1703,11 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int crypt_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void crypt_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; - unsigned int sz = 0; + unsigned i, sz = 0; switch (type) { case STATUSTYPE_INFO: @@ -1731,27 +1717,20 @@ static int crypt_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: DMEMIT("%s ", cc->cipher_string); - if (cc->key_size > 0) { - if ((maxlen - sz) < ((cc->key_size << 1) + 1)) - return -ENOMEM; - - crypt_encode_key(result + sz, cc->key, cc->key_size); - sz += cc->key_size << 1; - } else { - if (sz >= maxlen) - return -ENOMEM; - result[sz++] = '-'; - } + if (cc->key_size > 0) + for (i = 0; i < cc->key_size; i++) + DMEMIT("%02x", cc->key[i]); + else + DMEMIT("-"); DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_requests) + if (ti->num_discard_bios) DMEMIT(" 1 allow_discards"); break; } - return 0; } static void crypt_postsuspend(struct dm_target *ti) @@ -1845,7 +1824,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 12, 0}, + .version = {1, 12, 1}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index cc1bd04..496d5f3 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -198,8 +198,8 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->private = dc; return 0; @@ -293,8 +293,8 @@ static int delay_map(struct dm_target *ti, struct bio *bio) return delay_bio(dc, dc->read_delay, bio); } -static int delay_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void delay_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; @@ -314,8 +314,6 @@ static int delay_status(struct dm_target *ti, status_type_t type, dc->write_delay); break; } - - return 0; } static int delay_iterate_devices(struct dm_target *ti, @@ -337,7 +335,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 9721f2f..7fcf21c 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -337,8 +337,8 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) return error; } -static int flakey_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void flakey_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; @@ -368,7 +368,6 @@ static int flakey_status(struct dm_target *ti, status_type_t type, break; } - return 0; } static int flakey_ioctl(struct dm_target *ti, unsigned int cmd, unsigned long arg) @@ -411,7 +410,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 3, 0}, + .version = {1, 3, 1}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 0666b5d..aa04f02 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1067,6 +1067,7 @@ static void retrieve_status(struct dm_table *table, num_targets = dm_table_get_num_targets(table); for (i = 0; i < num_targets; i++) { struct dm_target *ti = dm_table_get_target(table, i); + size_t l; remaining = len - (outptr - outbuf); if (remaining <= sizeof(struct dm_target_spec)) { @@ -1093,14 +1094,17 @@ static void retrieve_status(struct dm_table *table, if (ti->type->status) { if (param->flags & DM_NOFLUSH_FLAG) status_flags |= DM_STATUS_NOFLUSH_FLAG; - if (ti->type->status(ti, type, status_flags, outptr, remaining)) { - param->flags |= DM_BUFFER_FULL_FLAG; - break; - } + ti->type->status(ti, type, status_flags, outptr, remaining); } else outptr[0] = '\0'; - outptr += strlen(outptr) + 1; + l = strlen(outptr) + 1; + if (l == remaining) { + param->flags |= DM_BUFFER_FULL_FLAG; + break; + } + + outptr += l; used = param->data_start + (outptr - outbuf); outptr = align_ptr(outptr); @@ -1410,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size) return 0; } +static bool buffer_test_overflow(char *result, unsigned maxlen) +{ + return !maxlen || strlen(result) + 1 >= maxlen; +} + +/* + * Process device-mapper dependent messages. + * Returns a number <= 1 if message was processed by device mapper. + * Returns 2 if message should be delivered to the target. + */ +static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, + char *result, unsigned maxlen) +{ + return 2; +} + /* * Pass a message to the target that's at the supplied device offset. */ @@ -1421,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_table *table; struct dm_target *ti; struct dm_target_msg *tmsg = (void *) param + param->data_start; + size_t maxlen; + char *result = get_result_buffer(param, param_size, &maxlen); md = find_device(param); if (!md) @@ -1444,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out_argv; } + r = message_for_md(md, argc, argv, result, maxlen); + if (r <= 1) + goto out_argv; + table = dm_get_live_table(md); if (!table) goto out_argv; @@ -1469,44 +1495,68 @@ static int target_message(struct dm_ioctl *param, size_t param_size) out_argv: kfree(argv); out: - param->data_size = 0; + if (r >= 0) + __dev_status(md, param); + + if (r == 1) { + param->flags |= DM_DATA_OUT_FLAG; + if (buffer_test_overflow(result, maxlen)) + param->flags |= DM_BUFFER_FULL_FLAG; + else + param->data_size = param->data_start + strlen(result) + 1; + r = 0; + } + dm_put(md); return r; } +/* + * The ioctl parameter block consists of two parts, a dm_ioctl struct + * followed by a data buffer. This flag is set if the second part, + * which has a variable size, is not used by the function processing + * the ioctl. + */ +#define IOCTL_FLAGS_NO_PARAMS 1 + /*----------------------------------------------------------------- * Implementation of open/close/ioctl on the special char * device. *---------------------------------------------------------------*/ -static ioctl_fn lookup_ioctl(unsigned int cmd) +static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags) { static struct { int cmd; + int flags; ioctl_fn fn; } _ioctls[] = { - {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ - {DM_REMOVE_ALL_CMD, remove_all}, - {DM_LIST_DEVICES_CMD, list_devices}, - - {DM_DEV_CREATE_CMD, dev_create}, - {DM_DEV_REMOVE_CMD, dev_remove}, - {DM_DEV_RENAME_CMD, dev_rename}, - {DM_DEV_SUSPEND_CMD, dev_suspend}, - {DM_DEV_STATUS_CMD, dev_status}, - {DM_DEV_WAIT_CMD, dev_wait}, - - {DM_TABLE_LOAD_CMD, table_load}, - {DM_TABLE_CLEAR_CMD, table_clear}, - {DM_TABLE_DEPS_CMD, table_deps}, - {DM_TABLE_STATUS_CMD, table_status}, - - {DM_LIST_VERSIONS_CMD, list_versions}, - - {DM_TARGET_MSG_CMD, target_message}, - {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry} + {DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */ + {DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all}, + {DM_LIST_DEVICES_CMD, 0, list_devices}, + + {DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create}, + {DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove}, + {DM_DEV_RENAME_CMD, 0, dev_rename}, + {DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend}, + {DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status}, + {DM_DEV_WAIT_CMD, 0, dev_wait}, + + {DM_TABLE_LOAD_CMD, 0, table_load}, + {DM_TABLE_CLEAR_CMD, IOCTL_FLAGS_NO_PARAMS, table_clear}, + {DM_TABLE_DEPS_CMD, 0, table_deps}, + {DM_TABLE_STATUS_CMD, 0, table_status}, + + {DM_LIST_VERSIONS_CMD, 0, list_versions}, + + {DM_TARGET_MSG_CMD, 0, target_message}, + {DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry} }; - return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; + if (unlikely(cmd >= ARRAY_SIZE(_ioctls))) + return NULL; + + *ioctl_flags = _ioctls[cmd].flags; + return _ioctls[cmd].fn; } /* @@ -1543,7 +1593,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */ +#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */ #define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) @@ -1551,66 +1602,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla if (param_flags & DM_WIPE_BUFFER) memset(param, 0, param_size); + if (param_flags & DM_PARAMS_KMALLOC) + kfree(param); if (param_flags & DM_PARAMS_VMALLOC) vfree(param); - else - kfree(param); } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, + int ioctl_flags, + struct dm_ioctl **param, int *param_flags) { - struct dm_ioctl tmp, *dmi; + struct dm_ioctl *dmi; int secure_data; + const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); - if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data))) + if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data))) + if (param_kernel->data_size < minimum_data_size) return -EINVAL; - secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) { + dmi = param_kernel; + dmi->data_size = minimum_data_size; + goto data_copied; + } + /* * Try to avoid low memory issues when a device is suspended. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - if (tmp.data_size <= KMALLOC_MAX_SIZE) - dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (param_kernel->data_size <= KMALLOC_MAX_SIZE) { + dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (dmi) + *param_flags |= DM_PARAMS_KMALLOC; + } if (!dmi) { - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); - *param_flags |= DM_PARAMS_VMALLOC; + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + if (dmi) + *param_flags |= DM_PARAMS_VMALLOC; } if (!dmi) { - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) return -EFAULT; return -ENOMEM; } - if (copy_from_user(dmi, user, tmp.data_size)) + if (copy_from_user(dmi, user, param_kernel->data_size)) goto bad; +data_copied: /* * Abort if something changed the ioctl data while it was being copied. */ - if (dmi->data_size != tmp.data_size) { + if (dmi->data_size != param_kernel->data_size) { DMERR("rejecting ioctl: data size modified while processing parameters"); goto bad; } /* Wipe the user buffer so we do not return it to userspace */ - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; *param = dmi; return 0; bad: - free_params(dmi, tmp.data_size, *param_flags); + free_params(dmi, param_kernel->data_size, *param_flags); return -EFAULT; } @@ -1621,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) param->flags &= ~DM_BUFFER_FULL_FLAG; param->flags &= ~DM_UEVENT_GENERATED_FLAG; param->flags &= ~DM_SECURE_DATA_FLAG; + param->flags &= ~DM_DATA_OUT_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || @@ -1648,11 +1714,13 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; + int ioctl_flags; int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; size_t input_param_size; + struct dm_ioctl param_kernel; /* only root can play with this */ if (!capable(CAP_SYS_ADMIN)) @@ -1677,7 +1745,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) if (cmd == DM_VERSION_CMD) return 0; - fn = lookup_ioctl(cmd); + fn = lookup_ioctl(cmd, &ioctl_flags); if (!fn) { DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); return -ENOTTY; @@ -1686,7 +1754,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m, ¶m_flags); + r = copy_params(user, ¶m_kernel, ioctl_flags, ¶m, ¶m_flags); if (r) return r; @@ -1699,6 +1767,10 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) param->data_size = sizeof(*param); r = fn(param, input_param_size); + if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) && + unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS)) + DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd); + /* * Copy the results back to userland. */ diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 68c0267..d581fe5 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/delay.h> #include <linux/device-mapper.h> #include <linux/dm-kcopyd.h> @@ -51,6 +52,8 @@ struct dm_kcopyd_client { struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; + struct dm_kcopyd_throttle *throttle; + /* * We maintain three lists of jobs: * @@ -68,6 +71,117 @@ struct dm_kcopyd_client { static struct page_list zero_page_list; +static DEFINE_SPINLOCK(throttle_spinlock); + +/* + * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. + * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided + * by 2. + */ +#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ + +/* + * Sleep this number of milliseconds. + * + * The value was decided experimentally. + * Smaller values seem to cause an increased copy rate above the limit. + * The reason for this is unknown but possibly due to jiffies rounding errors + * or read/write cache inside the disk. + */ +#define SLEEP_MSEC 100 + +/* + * Maximum number of sleep events. There is a theoretical livelock if more + * kcopyd clients do work simultaneously which this limit avoids. + */ +#define MAX_SLEEPS 10 + +static void io_job_start(struct dm_kcopyd_throttle *t) +{ + unsigned throttle, now, difference; + int slept = 0, skew; + + if (unlikely(!t)) + return; + +try_again: + spin_lock_irq(&throttle_spinlock); + + throttle = ACCESS_ONCE(t->throttle); + + if (likely(throttle >= 100)) + goto skip_limit; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + if (t->num_io_jobs) + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + + if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { + int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); + t->total_period >>= shift; + t->io_period >>= shift; + } + + skew = t->io_period - throttle * t->total_period / 100; + + if (unlikely(skew > 0) && slept < MAX_SLEEPS) { + slept++; + spin_unlock_irq(&throttle_spinlock); + msleep(SLEEP_MSEC); + goto try_again; + } + +skip_limit: + t->num_io_jobs++; + + spin_unlock_irq(&throttle_spinlock); +} + +static void io_job_finish(struct dm_kcopyd_throttle *t) +{ + unsigned long flags; + + if (unlikely(!t)) + return; + + spin_lock_irqsave(&throttle_spinlock, flags); + + t->num_io_jobs--; + + if (likely(ACCESS_ONCE(t->throttle) >= 100)) + goto skip_limit; + + if (!t->num_io_jobs) { + unsigned now, difference; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + } + +skip_limit: + spin_unlock_irqrestore(&throttle_spinlock, flags); +} + + static void wake(struct dm_kcopyd_client *kc) { queue_work(kc->kcopyd_wq, &kc->kcopyd_work); @@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context) struct kcopyd_job *job = (struct kcopyd_job *) context; struct dm_kcopyd_client *kc = job->kc; + io_job_finish(kc->throttle); + if (error) { if (job->rw & WRITE) job->write_err |= error; @@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + io_job_start(job->kc->throttle); + if (job->rw == READ) r = dm_io(&io_req, 1, &job->source, NULL); else @@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -struct dm_kcopyd_client *dm_kcopyd_client_create(void) +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->throttle = throttle; kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!kc->job_pool) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 328cad5..4f99d26 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; - ti->num_write_same_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; + ti->num_write_same_bios = 1; ti->private = lc; return 0; @@ -95,8 +95,8 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -static int linear_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void linear_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; @@ -110,7 +110,6 @@ static int linear_status(struct dm_target *ti, status_type_t type, (unsigned long long)lc->start); break; } - return 0; } static int linear_ioctl(struct dm_target *ti, unsigned int cmd, @@ -155,7 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 2, 0}, + .version = {1, 2, 1}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 573bd04..51bb816 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; return 0; @@ -1378,8 +1378,8 @@ static void multipath_resume(struct dm_target *ti) * [priority selector-name num_ps_args [ps_args]* * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ -static int multipath_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void multipath_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; @@ -1485,8 +1485,6 @@ static int multipath_status(struct dm_target *ti, status_type_t type, } spin_unlock_irqrestore(&m->lock, flags); - - return 0; } static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) @@ -1695,7 +1693,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9e58dbd..9a01d1e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); @@ -1201,8 +1201,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_SUBMITTED; } -static int raid_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void raid_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -1344,8 +1344,6 @@ static int raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" -"); } } - - return 0; } static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) @@ -1405,7 +1403,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 4, 1}, + .version = {1, 4, 2}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fa51918..d053098 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -82,6 +82,9 @@ struct mirror_set { struct mirror mirror[0]; }; +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, + "A percentage of time allocated for raid resynchronization"); + static void wakeup_mirrord(void *context) { struct mirror_set *ms = context; @@ -1072,8 +1075,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto err_free_context; - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; @@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - ms->kcopyd_client = dm_kcopyd_client_create(); + ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(ms->kcopyd_client)) { r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; @@ -1347,8 +1350,8 @@ static char device_status_char(struct mirror *m) } -static int mirror_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void mirror_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; @@ -1383,8 +1386,6 @@ static int mirror_status(struct dm_target *ti, status_type_t type, if (ms->features & DM_RAID1_HANDLE_ERRORS) DMEMIT(" 1 handle_errors"); } - - return 0; } static int mirror_iterate_devices(struct dm_target *ti, @@ -1403,7 +1404,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 13, 1}, + .version = {1, 13, 2}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 10079e0..c0e0702 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -124,6 +124,9 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + struct dm_dev *dm_snap_origin(struct dm_snapshot *s) { return s->origin; @@ -1037,7 +1040,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; - unsigned args_used, num_flush_requests = 1; + unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; if (argc != 4) { @@ -1047,7 +1050,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (dm_target_is_snapshot_merge(ti)) { - num_flush_requests = 2; + num_flush_bios = 2; origin_mode = FMODE_WRITE; } @@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - s->kcopyd_client = dm_kcopyd_client_create(); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; @@ -1127,7 +1130,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); ti->private = s; - ti->num_flush_requests = num_flush_requests; + ti->num_flush_bios = num_flush_bios; ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1691,7 +1694,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_rw & REQ_FLUSH) { - if (!dm_bio_get_target_request_nr(bio)) + if (!dm_bio_get_target_bio_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; @@ -1836,8 +1839,8 @@ static void snapshot_merge_resume(struct dm_target *ti) start_merge(s); } -static int snapshot_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void snapshot_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -1883,8 +1886,6 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, maxlen - sz); break; } - - return 0; } static int snapshot_iterate_devices(struct dm_target *ti, @@ -2104,7 +2105,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dev; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; return 0; } @@ -2138,8 +2139,8 @@ static void origin_resume(struct dm_target *ti) ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; @@ -2152,8 +2153,6 @@ static int origin_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, "%s", dev->name); break; } - - return 0; } static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, @@ -2180,7 +2179,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 8, 0}, + .version = {1, 8, 1}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2193,7 +2192,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 11, 0}, + .version = {1, 11, 1}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2306,3 +2305,5 @@ module_exit(dm_snapshot_exit); MODULE_DESCRIPTION(DM_NAME " snapshot target"); MODULE_AUTHOR("Joe Thornber"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("dm-snapshot-origin"); +MODULE_ALIAS("dm-snapshot-merge"); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index c89cde8..d8837d3 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) return r; - ti->num_flush_requests = stripes; - ti->num_discard_requests = stripes; - ti->num_write_same_requests = stripes; + ti->num_flush_bios = stripes; + ti->num_discard_bios = stripes; + ti->num_write_same_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; - unsigned target_request_nr; + unsigned target_bio_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; return DM_MAPIO_REMAPPED; } if (unlikely(bio->bi_rw & REQ_DISCARD) || unlikely(bio->bi_rw & REQ_WRITE_SAME)) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_range(sc, bio, target_request_nr); + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + return stripe_map_range(sc, bio, target_bio_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -312,8 +312,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) * */ -static int stripe_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; @@ -340,7 +340,6 @@ static int stripe_status(struct dm_target *ti, status_type_t type, (unsigned long long)sc->stripe[i].physical_start); break; } - return 0; } static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) @@ -428,7 +427,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 5, 0}, + .version = {1, 5, 1}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index daf25d0..e50dad0c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -217,7 +217,6 @@ int dm_table_create(struct dm_table **result, fmode_t mode, if (alloc_targets(t, num_targets)) { kfree(t); - t = NULL; return -ENOMEM; } @@ -823,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests && tgt->discards_supported) - DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + if (!tgt->num_discard_bios && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); return 0; @@ -1360,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_flush_requests) + if (!ti->num_flush_bios) continue; if (ti->flush_supported) @@ -1439,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_write_same_requests) + if (!ti->num_write_same_bios) return false; if (!ti->type->iterate_devices || @@ -1657,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_discard_requests) + if (!ti->num_discard_bios) continue; if (ti->discards_supported) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 617d21a..37ba5db 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) /* * Return error for discards instead of -EOPNOTSUPP */ - tt->num_discard_requests = 1; + tt->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 4d6e853..00cee02 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -280,7 +280,7 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) *t = v & ((1 << 24) - 1); } -static void data_block_inc(void *context, void *value_le) +static void data_block_inc(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -292,7 +292,7 @@ static void data_block_inc(void *context, void *value_le) dm_sm_inc_block(sm, b); } -static void data_block_dec(void *context, void *value_le) +static void data_block_dec(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -304,7 +304,7 @@ static void data_block_dec(void *context, void *value_le) dm_sm_dec_block(sm, b); } -static int data_block_equal(void *context, void *value1_le, void *value2_le) +static int data_block_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; uint64_t b1, b2; @@ -318,7 +318,7 @@ static int data_block_equal(void *context, void *value1_le, void *value2_le) return b1 == b2; } -static void subtree_inc(void *context, void *value) +static void subtree_inc(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -329,7 +329,7 @@ static void subtree_inc(void *context, void *value) dm_tm_inc(info->tm, root); } -static void subtree_dec(void *context, void *value) +static void subtree_dec(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -341,7 +341,7 @@ static void subtree_dec(void *context, void *value) DMERR("btree delete failed\n"); } -static int subtree_equal(void *context, void *value1_le, void *value2_le) +static int subtree_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; memcpy(&v1_le, value1_le, sizeof(v1_le)); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 5409607..009339d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -26,6 +26,9 @@ #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + /* * The block size of the device holding pool data must be * between 64KB and 1GB. @@ -227,6 +230,78 @@ struct thin_c { /*----------------------------------------------------------------*/ /* + * wake_worker() is used when new work is queued and when pool_resume is + * ready to continue deferred IO processing. + */ +static void wake_worker(struct pool *pool) +{ + queue_work(pool->wq, &pool->worker); +} + +/*----------------------------------------------------------------*/ + +static int bio_detain(struct pool *pool, struct dm_cell_key *key, struct bio *bio, + struct dm_bio_prison_cell **cell_result) +{ + int r; + struct dm_bio_prison_cell *cell_prealloc; + + /* + * Allocate a cell from the prison's mempool. + * This might block but it can't fail. + */ + cell_prealloc = dm_bio_prison_alloc_cell(pool->prison, GFP_NOIO); + + r = dm_bio_detain(pool->prison, key, bio, cell_prealloc, cell_result); + if (r) + /* + * We reused an old cell; we can get rid of + * the new one. + */ + dm_bio_prison_free_cell(pool->prison, cell_prealloc); + + return r; +} + +static void cell_release(struct pool *pool, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) +{ + dm_cell_release(pool->prison, cell, bios); + dm_bio_prison_free_cell(pool->prison, cell); +} + +static void cell_release_no_holder(struct pool *pool, + struct dm_bio_prison_cell *cell, + struct bio_list *bios) +{ + dm_cell_release_no_holder(pool->prison, cell, bios); + dm_bio_prison_free_cell(pool->prison, cell); +} + +static void cell_defer_no_holder_no_free(struct thin_c *tc, + struct dm_bio_prison_cell *cell) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + dm_cell_release_no_holder(pool->prison, cell, &pool->deferred_bios); + spin_unlock_irqrestore(&pool->lock, flags); + + wake_worker(pool); +} + +static void cell_error(struct pool *pool, + struct dm_bio_prison_cell *cell) +{ + dm_cell_error(pool->prison, cell); + dm_bio_prison_free_cell(pool->prison, cell); +} + +/*----------------------------------------------------------------*/ + +/* * A global list of pools that uses a struct mapped_device as a key. */ static struct dm_thin_pool_table { @@ -330,14 +405,20 @@ static void requeue_io(struct thin_c *tc) * target. */ +static bool block_size_is_power_of_two(struct pool *pool) +{ + return pool->sectors_per_block_shift >= 0; +} + static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { + struct pool *pool = tc->pool; sector_t block_nr = bio->bi_sector; - if (tc->pool->sectors_per_block_shift < 0) - (void) sector_div(block_nr, tc->pool->sectors_per_block); + if (block_size_is_power_of_two(pool)) + block_nr >>= pool->sectors_per_block_shift; else - block_nr >>= tc->pool->sectors_per_block_shift; + (void) sector_div(block_nr, pool->sectors_per_block); return block_nr; } @@ -348,12 +429,12 @@ static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - if (tc->pool->sectors_per_block_shift < 0) - bio->bi_sector = (block * pool->sectors_per_block) + - sector_div(bi_sector, pool->sectors_per_block); - else + if (block_size_is_power_of_two(pool)) bio->bi_sector = (block << pool->sectors_per_block_shift) | (bi_sector & (pool->sectors_per_block - 1)); + else + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -420,15 +501,6 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio, issue(tc, bio); } -/* - * wake_worker() is used when new work is queued and when pool_resume is - * ready to continue deferred IO processing. - */ -static void wake_worker(struct pool *pool) -{ - queue_work(pool->wq, &pool->worker); -} - /*----------------------------------------------------------------*/ /* @@ -515,14 +587,14 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - dm_cell_release(cell, &pool->deferred_bios); + cell_release(pool, cell, &pool->deferred_bios); spin_unlock_irqrestore(&tc->pool->lock, flags); wake_worker(pool); } /* - * Same as cell_defer except it omits the original holder of the cell. + * Same as cell_defer above, except it omits the original holder of the cell. */ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { @@ -530,7 +602,7 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - dm_cell_release_no_holder(cell, &pool->deferred_bios); + cell_release_no_holder(pool, cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); wake_worker(pool); @@ -540,13 +612,15 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - dm_cell_error(m->cell); + cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); } + static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + struct pool *pool = tc->pool; struct bio *bio; int r; @@ -555,7 +629,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) bio->bi_end_io = m->saved_bi_end_io; if (m->err) { - dm_cell_error(m->cell); + cell_error(pool, m->cell); goto out; } @@ -567,7 +641,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { DMERR_LIMIT("dm_thin_insert_block() failed"); - dm_cell_error(m->cell); + cell_error(pool, m->cell); goto out; } @@ -585,7 +659,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) out: list_del(&m->list); - mempool_free(m, tc->pool->mapping_pool); + mempool_free(m, pool->mapping_pool); } static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) @@ -736,7 +810,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR_LIMIT("dm_kcopyd_copy() failed"); - dm_cell_error(cell); + cell_error(pool, cell); } } } @@ -802,7 +876,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, if (r < 0) { mempool_free(m, pool->mapping_pool); DMERR_LIMIT("dm_kcopyd_zero() failed"); - dm_cell_error(cell); + cell_error(pool, cell); } } } @@ -908,13 +982,13 @@ static void retry_on_resume(struct bio *bio) spin_unlock_irqrestore(&pool->lock, flags); } -static void no_space(struct dm_bio_prison_cell *cell) +static void no_space(struct pool *pool, struct dm_bio_prison_cell *cell) { struct bio *bio; struct bio_list bios; bio_list_init(&bios); - dm_cell_release(cell, &bios); + cell_release(pool, cell, &bios); while ((bio = bio_list_pop(&bios))) retry_on_resume(bio); @@ -932,7 +1006,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) struct dm_thin_new_mapping *m; build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) + if (bio_detain(tc->pool, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -944,7 +1018,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * on this block. */ build_data_key(tc->td, lookup_result.block, &key2); - if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { + if (bio_detain(tc->pool, &key2, bio, &cell2)) { cell_defer_no_holder(tc, cell); break; } @@ -1020,13 +1094,13 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; case -ENOSPC: - no_space(cell); + no_space(tc->pool, cell); break; default: DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", __func__, r); - dm_cell_error(cell); + cell_error(tc->pool, cell); break; } } @@ -1044,7 +1118,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, * of being broken so we have nothing further to do here. */ build_data_key(tc->td, lookup_result->block, &key); - if (dm_bio_detain(pool->prison, &key, bio, &cell)) + if (bio_detain(pool, &key, bio, &cell)) return; if (bio_data_dir(bio) == WRITE && bio->bi_size) @@ -1065,12 +1139,13 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block { int r; dm_block_t data_block; + struct pool *pool = tc->pool; /* * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, 0); @@ -1097,14 +1172,14 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; case -ENOSPC: - no_space(cell); + no_space(pool, cell); break; default: DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", __func__, r); - set_pool_mode(tc->pool, PM_READ_ONLY); - dm_cell_error(cell); + set_pool_mode(pool, PM_READ_ONLY); + cell_error(pool, cell); break; } } @@ -1112,6 +1187,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block static void process_bio(struct thin_c *tc, struct bio *bio) { int r; + struct pool *pool = tc->pool; dm_block_t block = get_bio_block(tc, bio); struct dm_bio_prison_cell *cell; struct dm_cell_key key; @@ -1122,7 +1198,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) * being provisioned so we have nothing further to do here. */ build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell)) + if (bio_detain(pool, &key, bio, &cell)) return; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); @@ -1130,9 +1206,9 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell); /* FIXME: pass this cell into process_shared? */ } else { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, lookup_result.block); @@ -1141,7 +1217,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - inc_all_io_entry(tc->pool, bio); + inc_all_io_entry(pool, bio); cell_defer_no_holder(tc, cell); remap_to_origin_and_issue(tc, bio); @@ -1378,7 +1454,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; - struct dm_bio_prison_cell *cell1, *cell2; + struct dm_bio_prison_cell cell1, cell2; + struct dm_bio_prison_cell *cell_result; struct dm_cell_key key; thin_hook_bio(tc, bio); @@ -1420,18 +1497,18 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) } build_virtual_key(tc->td, block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1, &cell_result)) return DM_MAPIO_SUBMITTED; build_data_key(tc->td, result.block, &key); - if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { - cell_defer_no_holder(tc, cell1); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2, &cell_result)) { + cell_defer_no_holder_no_free(tc, &cell1); return DM_MAPIO_SUBMITTED; } inc_all_io_entry(tc->pool, bio); - cell_defer_no_holder(tc, cell2); - cell_defer_no_holder(tc, cell1); + cell_defer_no_holder_no_free(tc, &cell2); + cell_defer_no_holder_no_free(tc, &cell1); remap(tc, bio, result.block); return DM_MAPIO_REMAPPED; @@ -1636,7 +1713,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_prison; } - pool->copier = dm_kcopyd_client_create(); + pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(pool->copier)) { r = PTR_ERR(pool->copier); *error = "Error creating pool's kcopyd client"; @@ -1938,7 +2015,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; /* * Only need to enable discards if the pool should pass @@ -1946,7 +2023,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * processing will cause mappings to be removed from the btree. */ if (pf.discard_enabled && pf.discard_passdown) { - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; /* * Setting 'discards_supported' circumvents the normal @@ -2299,8 +2376,8 @@ static void emit_flags(struct pool_features *pf, char *result, * <transaction id> <used metadata sectors>/<total metadata sectors> * <used data sectors>/<total data sectors> <held metadata root> */ -static int pool_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void pool_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; unsigned sz = 0; @@ -2326,32 +2403,41 @@ static int pool_status(struct dm_target *ti, status_type_t type, if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) (void) commit_or_fallback(pool); - r = dm_pool_get_metadata_transaction_id(pool->pmd, - &transaction_id); - if (r) - return r; + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); + if (r) { + DMERR("dm_pool_get_metadata_transaction_id returned %d", r); + goto err; + } - r = dm_pool_get_free_metadata_block_count(pool->pmd, - &nr_free_blocks_metadata); - if (r) - return r; + r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata); + if (r) { + DMERR("dm_pool_get_free_metadata_block_count returned %d", r); + goto err; + } r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_dev_size returned %d", r); + goto err; + } - r = dm_pool_get_free_block_count(pool->pmd, - &nr_free_blocks_data); - if (r) - return r; + r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data); + if (r) { + DMERR("dm_pool_get_free_block_count returned %d", r); + goto err; + } r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_data_dev_size returned %d", r); + goto err; + } r = dm_pool_get_metadata_snap(pool->pmd, &held_root); - if (r) - return r; + if (r) { + DMERR("dm_pool_get_metadata_snap returned %d", r); + goto err; + } DMEMIT("%llu %llu/%llu %llu/%llu ", (unsigned long long)transaction_id, @@ -2388,8 +2474,10 @@ static int pool_status(struct dm_target *ti, status_type_t type, emit_flags(&pt->requested_pf, result, sz, maxlen); break; } + return; - return 0; +err: + DMEMIT("Error"); } static int pool_iterate_devices(struct dm_target *ti, @@ -2414,11 +2502,6 @@ static int pool_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } -static bool block_size_is_power_of_two(struct pool *pool) -{ - return pool->sectors_per_block_shift >= 0; -} - static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) { struct pool *pool = pt->pool; @@ -2432,15 +2515,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; limits->discard_granularity = data_limits->discard_granularity; - } else if (block_size_is_power_of_two(pool)) + } else limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; - else - /* - * Use largest power of 2 that is a factor of sectors_per_block - * but at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS. - */ - limits->discard_granularity = max(1 << (ffs(pool->sectors_per_block) - 1), - DATA_DEV_BLOCK_SIZE_MIN_SECTORS) << SECTOR_SHIFT; } static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -2468,7 +2544,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 6, 0}, + .version = {1, 6, 1}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2588,17 +2664,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (r) goto bad_thin_open; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->flush_supported = true; ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; ti->discard_zeroes_data_unsupported = true; - /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = true; + /* Discard bios must be split on a block boundary */ + ti->split_discard_bios = true; } dm_put(pool_md); @@ -2676,8 +2752,8 @@ static void thin_postsuspend(struct dm_target *ti) /* * <nr mapped sectors> <highest mapped sector> */ -static int thin_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void thin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2687,7 +2763,7 @@ static int thin_status(struct dm_target *ti, status_type_t type, if (get_pool_mode(tc->pool) == PM_FAIL) { DMEMIT("Fail"); - return 0; + return; } if (!tc->td) @@ -2696,12 +2772,16 @@ static int thin_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: r = dm_thin_get_mapped_count(tc->td, &mapped); - if (r) - return r; + if (r) { + DMERR("dm_thin_get_mapped_count returned %d", r); + goto err; + } r = dm_thin_get_highest_mapped_block(tc->td, &highest); - if (r < 0) - return r; + if (r < 0) { + DMERR("dm_thin_get_highest_mapped_block returned %d", r); + goto err; + } DMEMIT("%llu ", mapped * tc->pool->sectors_per_block); if (r) @@ -2721,7 +2801,10 @@ static int thin_status(struct dm_target *ti, status_type_t type, } } - return 0; + return; + +err: + DMEMIT("Error"); } static int thin_iterate_devices(struct dm_target *ti, @@ -2748,7 +2831,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 7, 0}, + .version = {1, 7, 1}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 52cde982..6ad5383 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -508,8 +508,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio) /* * Status: V (valid) or C (corruption found) */ -static int verity_status(struct dm_target *ti, status_type_t type, - unsigned status_flags, char *result, unsigned maxlen) +static void verity_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; @@ -540,8 +540,6 @@ static int verity_status(struct dm_target *ti, status_type_t type, DMEMIT("%02x", v->salt[x]); break; } - - return 0; } static int verity_ioctl(struct dm_target *ti, unsigned cmd, @@ -860,7 +858,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 1, 0}, + .version = {1, 1, 1}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 69a5c3b..c99003e 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Silently drop discards, avoiding -EOPNOTSUPP. */ - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e67a4be..7e46926 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -163,7 +163,6 @@ struct mapped_device { * io objects are allocated from here. */ mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; @@ -197,7 +196,6 @@ struct mapped_device { */ struct dm_md_mempools { mempool_t *io_pool; - mempool_t *tio_pool; struct bio_set *bs; }; @@ -205,12 +203,6 @@ struct dm_md_mempools { static struct kmem_cache *_io_cache; static struct kmem_cache *_rq_tio_cache; -/* - * Unused now, and needs to be deleted. But since io_pool is overloaded and it's - * still used for _io_cache, I'm leaving this for a later cleanup - */ -static struct kmem_cache *_rq_bio_info_cache; - static int __init local_init(void) { int r = -ENOMEM; @@ -224,13 +216,9 @@ static int __init local_init(void) if (!_rq_tio_cache) goto out_free_io_cache; - _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); - if (!_rq_bio_info_cache) - goto out_free_rq_tio_cache; - r = dm_uevent_init(); if (r) - goto out_free_rq_bio_info_cache; + goto out_free_rq_tio_cache; _major = major; r = register_blkdev(_major, _name); @@ -244,8 +232,6 @@ static int __init local_init(void) out_uevent_exit: dm_uevent_exit(); -out_free_rq_bio_info_cache: - kmem_cache_destroy(_rq_bio_info_cache); out_free_rq_tio_cache: kmem_cache_destroy(_rq_tio_cache); out_free_io_cache: @@ -256,7 +242,6 @@ out_free_io_cache: static void local_exit(void) { - kmem_cache_destroy(_rq_bio_info_cache); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); @@ -448,12 +433,12 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md, gfp_t gfp_mask) { - return mempool_alloc(md->tio_pool, gfp_mask); + return mempool_alloc(md->io_pool, gfp_mask); } static void free_rq_tio(struct dm_rq_target_io *tio) { - mempool_free(tio, tio->md->tio_pool); + mempool_free(tio, tio->md->io_pool); } static int md_in_flight(struct mapped_device *md) @@ -626,7 +611,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } @@ -986,12 +970,13 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); -static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) +static void __map_bio(struct dm_target_io *tio) { int r; sector_t sector; struct mapped_device *md; struct bio *clone = &tio->clone; + struct dm_target *ti = tio->ti; clone->bi_end_io = clone_endio; clone->bi_private = tio; @@ -1032,32 +1017,54 @@ struct clone_info { unsigned short idx; }; +static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +{ + bio->bi_sector = sector; + bio->bi_size = to_bytes(len); +} + +static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count) +{ + bio->bi_idx = idx; + bio->bi_vcnt = idx + bv_count; + bio->bi_flags &= ~(1 << BIO_SEG_VALID); +} + +static void clone_bio_integrity(struct bio *bio, struct bio *clone, + unsigned short idx, unsigned len, unsigned offset, + unsigned trim) +{ + if (!bio_integrity(bio)) + return; + + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (trim) + bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len); +} + /* * Creates a little bio that just does part of a bvec. */ -static void split_bvec(struct dm_target_io *tio, struct bio *bio, - sector_t sector, unsigned short idx, unsigned int offset, - unsigned int len, struct bio_set *bs) +static void clone_split_bio(struct dm_target_io *tio, struct bio *bio, + sector_t sector, unsigned short idx, + unsigned offset, unsigned len) { struct bio *clone = &tio->clone; struct bio_vec *bv = bio->bi_io_vec + idx; *clone->bi_io_vec = *bv; - clone->bi_sector = sector; + bio_setup_sector(clone, sector, len); + clone->bi_bdev = bio->bi_bdev; clone->bi_rw = bio->bi_rw; clone->bi_vcnt = 1; - clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, offset), len); - } + clone_bio_integrity(bio, clone, idx, len, offset, 1); } /* @@ -1065,29 +1072,23 @@ static void split_bvec(struct dm_target_io *tio, struct bio *bio, */ static void clone_bio(struct dm_target_io *tio, struct bio *bio, sector_t sector, unsigned short idx, - unsigned short bv_count, unsigned int len, - struct bio_set *bs) + unsigned short bv_count, unsigned len) { struct bio *clone = &tio->clone; + unsigned trim = 0; __bio_clone(clone, bio); - clone->bi_sector = sector; - clone->bi_idx = idx; - clone->bi_vcnt = idx + bv_count; - clone->bi_size = to_bytes(len); - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - if (bio_integrity(bio)) { - bio_integrity_clone(clone, bio, GFP_NOIO); - - if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) - bio_integrity_trim(clone, - bio_sector_offset(bio, idx, 0), len); - } + bio_setup_sector(clone, sector, len); + bio_setup_bv(clone, idx, bv_count); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + trim = 1; + clone_bio_integrity(bio, clone, idx, len, 0, trim); } static struct dm_target_io *alloc_tio(struct clone_info *ci, - struct dm_target *ti, int nr_iovecs) + struct dm_target *ti, int nr_iovecs, + unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; @@ -1098,96 +1099,104 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); - tio->target_request_nr = 0; + tio->target_bio_nr = target_bio_nr; return tio; } -static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, - unsigned request_nr, sector_t len) +static void __clone_and_map_simple_bio(struct clone_info *ci, + struct dm_target *ti, + unsigned target_bio_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; - tio->target_request_nr = request_nr; - /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush * and discard, so no need for concern about wasted bvec allocations. */ - __bio_clone(clone, ci->bio); - if (len) { - clone->bi_sector = ci->sector; - clone->bi_size = to_bytes(len); - } + if (len) + bio_setup_sector(clone, ci->sector, len); - __map_bio(ti, tio); + __map_bio(tio); } -static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, - unsigned num_requests, sector_t len) +static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios, sector_t len) { - unsigned request_nr; + unsigned target_bio_nr; - for (request_nr = 0; request_nr < num_requests; request_nr++) - __issue_target_request(ci, ti, request_nr, len); + for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) + __clone_and_map_simple_bio(ci, ti, target_bio_nr, len); } -static int __clone_and_map_empty_flush(struct clone_info *ci) +static int __send_empty_flush(struct clone_info *ci) { unsigned target_nr = 0; struct dm_target *ti; BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __issue_target_requests(ci, ti, ti->num_flush_requests, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); return 0; } -/* - * Perform all io with a single clone. - */ -static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) +static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, + sector_t sector, int nr_iovecs, + unsigned short idx, unsigned short bv_count, + unsigned offset, unsigned len, + unsigned split_bvec) { struct bio *bio = ci->bio; struct dm_target_io *tio; + unsigned target_bio_nr; + unsigned num_target_bios = 1; + + /* + * Does the target want to receive duplicate copies of the bio? + */ + if (bio_data_dir(bio) == WRITE && ti->num_write_bios) + num_target_bios = ti->num_write_bios(ti, bio); - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, bio->bi_vcnt - ci->idx, - ci->sector_count, ci->md->bs); - __map_bio(ti, tio); - ci->sector_count = 0; + for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { + tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); + if (split_bvec) + clone_split_bio(tio, bio, sector, idx, offset, len); + else + clone_bio(tio, bio, sector, idx, bv_count, len); + __map_bio(tio); + } } -typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); +typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); -static unsigned get_num_discard_requests(struct dm_target *ti) +static unsigned get_num_discard_bios(struct dm_target *ti) { - return ti->num_discard_requests; + return ti->num_discard_bios; } -static unsigned get_num_write_same_requests(struct dm_target *ti) +static unsigned get_num_write_same_bios(struct dm_target *ti) { - return ti->num_write_same_requests; + return ti->num_write_same_bios; } typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) { - return ti->split_discard_requests; + return ti->split_discard_bios; } -static int __clone_and_map_changing_extent_only(struct clone_info *ci, - get_num_requests_fn get_num_requests, - is_split_required_fn is_split_required) +static int __send_changing_extent_only(struct clone_info *ci, + get_num_bios_fn get_num_bios, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; - unsigned num_requests; + unsigned num_bios; do { ti = dm_table_find_target(ci->map, ci->sector); @@ -1200,8 +1209,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, * reconfiguration might also have changed that since the * check was performed. */ - num_requests = get_num_requests ? get_num_requests(ti) : 0; - if (!num_requests) + num_bios = get_num_bios ? get_num_bios(ti) : 0; + if (!num_bios) return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) @@ -1209,7 +1218,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, else len = min(ci->sector_count, max_io_len(ci->sector, ti)); - __issue_target_requests(ci, ti, num_requests, len); + __send_duplicate_bios(ci, ti, num_bios, len); ci->sector += len; } while (ci->sector_count -= len); @@ -1217,108 +1226,129 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, return 0; } -static int __clone_and_map_discard(struct clone_info *ci) +static int __send_discard(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, - is_split_required_for_discard); + return __send_changing_extent_only(ci, get_num_discard_bios, + is_split_required_for_discard); } -static int __clone_and_map_write_same(struct clone_info *ci) +static int __send_write_same(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); + return __send_changing_extent_only(ci, get_num_write_same_bios, NULL); } -static int __clone_and_map(struct clone_info *ci) +/* + * Find maximum number of sectors / bvecs we can process with a single bio. + */ +static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx) { struct bio *bio = ci->bio; - struct dm_target *ti; - sector_t len = 0, max; - struct dm_target_io *tio; - - if (unlikely(bio->bi_rw & REQ_DISCARD)) - return __clone_and_map_discard(ci); - else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) - return __clone_and_map_write_same(ci); + sector_t bv_len, total_len = 0; - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) { + bv_len = to_sector(bio->bi_io_vec[*idx].bv_len); - max = max_io_len(ci->sector, ti); + if (bv_len > max) + break; - if (ci->sector_count <= max) { - /* - * Optimise for the simple case where we can do all of - * the remaining io with a single clone. - */ - __clone_and_map_simple(ci, ti); + max -= bv_len; + total_len += bv_len; + } - } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { - /* - * There are some bvecs that don't span targets. - * Do as many of these as possible. - */ - int i; - sector_t remaining = max; - sector_t bv_len; + return total_len; +} - for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { - bv_len = to_sector(bio->bi_io_vec[i].bv_len); +static int __split_bvec_across_targets(struct clone_info *ci, + struct dm_target *ti, sector_t max) +{ + struct bio *bio = ci->bio; + struct bio_vec *bv = bio->bi_io_vec + ci->idx; + sector_t remaining = to_sector(bv->bv_len); + unsigned offset = 0; + sector_t len; - if (bv_len > remaining) - break; + do { + if (offset) { + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - remaining -= bv_len; - len += bv_len; + max = max_io_len(ci->sector, ti); } - tio = alloc_tio(ci, ti, bio->bi_max_vecs); - clone_bio(tio, bio, ci->sector, ci->idx, i - ci->idx, len, - ci->md->bs); - __map_bio(ti, tio); + len = min(remaining, max); + + __clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0, + bv->bv_offset + offset, len, 1); ci->sector += len; ci->sector_count -= len; - ci->idx = i; + offset += to_bytes(len); + } while (remaining -= len); - } else { - /* - * Handle a bvec that must be split between two or more targets. - */ - struct bio_vec *bv = bio->bi_io_vec + ci->idx; - sector_t remaining = to_sector(bv->bv_len); - unsigned int offset = 0; + ci->idx++; + + return 0; +} + +/* + * Select the correct strategy for processing a non-flush bio. + */ +static int __split_and_process_non_flush(struct clone_info *ci) +{ + struct bio *bio = ci->bio; + struct dm_target *ti; + sector_t len, max; + int idx; + + if (unlikely(bio->bi_rw & REQ_DISCARD)) + return __send_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __send_write_same(ci); - do { - if (offset) { - ti = dm_table_find_target(ci->map, ci->sector); - if (!dm_target_is_valid(ti)) - return -EIO; + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; - max = max_io_len(ci->sector, ti); - } + max = max_io_len(ci->sector, ti); - len = min(remaining, max); + /* + * Optimise for the simple case where we can do all of + * the remaining io with a single clone. + */ + if (ci->sector_count <= max) { + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, bio->bi_vcnt - ci->idx, 0, + ci->sector_count, 0); + ci->sector_count = 0; + return 0; + } - tio = alloc_tio(ci, ti, 1); - split_bvec(tio, bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, ci->md->bs); + /* + * There are some bvecs that don't span targets. + * Do as many of these as possible. + */ + if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { + len = __len_within_target(ci, max, &idx); - __map_bio(ti, tio); + __clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs, + ci->idx, idx - ci->idx, 0, len, 0); - ci->sector += len; - ci->sector_count -= len; - offset += to_bytes(len); - } while (remaining -= len); + ci->sector += len; + ci->sector_count -= len; + ci->idx = idx; - ci->idx++; + return 0; } - return 0; + /* + * Handle a bvec that must be split between two or more targets. + */ + return __split_bvec_across_targets(ci, ti, max); } /* - * Split the bio into several clones and submit it to targets. + * Entry point to split a bio into clones and submit them to the targets. */ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) { @@ -1342,16 +1372,17 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.idx = bio->bi_idx; start_io_acct(ci.io); + if (bio->bi_rw & REQ_FLUSH) { ci.bio = &ci.md->flush_bio; ci.sector_count = 0; - error = __clone_and_map_empty_flush(&ci); + error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ } else { ci.bio = bio; ci.sector_count = bio_sectors(bio); while (ci.sector_count && !error) - error = __clone_and_map(&ci); + error = __split_and_process_non_flush(&ci); } /* drop the extra reference count */ @@ -1924,8 +1955,6 @@ static void free_dev(struct mapped_device *md) unlock_fs(md); bdput(md->bdev); destroy_workqueue(md->wq); - if (md->tio_pool) - mempool_destroy(md->tio_pool); if (md->io_pool) mempool_destroy(md->io_pool); if (md->bs) @@ -1948,24 +1977,33 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { - /* - * The md already has necessary mempools. Reload just the - * bioset because front_pad may have changed because - * a different table was loaded. - */ - bioset_free(md->bs); - md->bs = p->bs; - p->bs = NULL; + if (md->io_pool && md->bs) { + /* The md already has necessary mempools. */ + if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) { + /* + * Reload bioset because front_pad may have changed + * because a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; + } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) { + /* + * There's no need to reload with request-based dm + * because the size of front_pad doesn't change. + * Note for future: If you are to reload bioset, + * prep-ed requests in the queue may refer + * to bio from the old bioset, so you must walk + * through the queue to unprep. + */ + } goto out; } - BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); + BUG_ON(!p || md->io_pool || md->bs); md->io_pool = p->io_pool; p->io_pool = NULL; - md->tio_pool = p->tio_pool; - p->tio_pool = NULL; md->bs = p->bs; p->bs = NULL; @@ -2396,7 +2434,7 @@ static void dm_queue_flush(struct mapped_device *md) */ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) { - struct dm_table *live_map, *map = ERR_PTR(-EINVAL); + struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; @@ -2419,10 +2457,12 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) dm_table_put(live_map); } - r = dm_calculate_queue_limits(table, &limits); - if (r) { - map = ERR_PTR(r); - goto out; + if (!live_map) { + r = dm_calculate_queue_limits(table, &limits); + if (r) { + map = ERR_PTR(r); + goto out; + } } map = __bind(md, table, &limits); @@ -2720,52 +2760,42 @@ EXPORT_SYMBOL_GPL(dm_noflush_suspending); struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { - struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); - unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; + struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); + struct kmem_cache *cachep; + unsigned int pool_size; + unsigned int front_pad; if (!pools) return NULL; - per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + if (type == DM_TYPE_BIO_BASED) { + cachep = _io_cache; + pool_size = 16; + front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); + } else if (type == DM_TYPE_REQUEST_BASED) { + cachep = _rq_tio_cache; + pool_size = MIN_IOS; + front_pad = offsetof(struct dm_rq_clone_bio_info, clone); + /* per_bio_data_size is not used. See __bind_mempools(). */ + WARN_ON(per_bio_data_size != 0); + } else + goto out; - pools->io_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(MIN_IOS, _io_cache) : - mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); + pools->io_pool = mempool_create_slab_pool(MIN_IOS, cachep); if (!pools->io_pool) - goto free_pools_and_out; - - pools->tio_pool = NULL; - if (type == DM_TYPE_REQUEST_BASED) { - pools->tio_pool = mempool_create_slab_pool(MIN_IOS, _rq_tio_cache); - if (!pools->tio_pool) - goto free_io_pool_and_out; - } + goto out; - pools->bs = (type == DM_TYPE_BIO_BASED) ? - bioset_create(pool_size, - per_bio_data_size + offsetof(struct dm_target_io, clone)) : - bioset_create(pool_size, - offsetof(struct dm_rq_clone_bio_info, clone)); + pools->bs = bioset_create(pool_size, front_pad); if (!pools->bs) - goto free_tio_pool_and_out; + goto out; if (integrity && bioset_integrity_create(pools->bs, pool_size)) - goto free_bioset_and_out; + goto out; return pools; -free_bioset_and_out: - bioset_free(pools->bs); - -free_tio_pool_and_out: - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - -free_io_pool_and_out: - mempool_destroy(pools->io_pool); - -free_pools_and_out: - kfree(pools); +out: + dm_free_md_mempools(pools); return NULL; } @@ -2778,9 +2808,6 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) if (pools->io_pool) mempool_destroy(pools->io_pool); - if (pools->tio_pool) - mempool_destroy(pools->tio_pool); - if (pools->bs) bioset_free(pools->bs); diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index ceb3590..19b2687 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -1,6 +1,6 @@ config DM_PERSISTENT_DATA tristate - depends on BLK_DEV_DM && EXPERIMENTAL + depends on BLK_DEV_DM select LIBCRC32C select DM_BUFIO ---help--- diff --git a/drivers/md/persistent-data/Makefile b/drivers/md/persistent-data/Makefile index d8e7cb7..ff52879 100644 --- a/drivers/md/persistent-data/Makefile +++ b/drivers/md/persistent-data/Makefile @@ -1,5 +1,7 @@ obj-$(CONFIG_DM_PERSISTENT_DATA) += dm-persistent-data.o dm-persistent-data-objs := \ + dm-array.o \ + dm-bitset.o \ dm-block-manager.o \ dm-space-map-common.o \ dm-space-map-disk.o \ diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c new file mode 100644 index 0000000..172147e --- /dev/null +++ b/drivers/md/persistent-data/dm-array.c @@ -0,0 +1,808 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-array.h" +#include "dm-space-map.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "array" + +/*----------------------------------------------------------------*/ + +/* + * The array is implemented as a fully populated btree, which points to + * blocks that contain the packed values. This is more space efficient + * than just using a btree since we don't store 1 key per value. + */ +struct array_block { + __le32 csum; + __le32 max_entries; + __le32 nr_entries; + __le32 value_size; + __le64 blocknr; /* Block this node is supposed to live in. */ +} __packed; + +/*----------------------------------------------------------------*/ + +/* + * Validator methods. As usual we calculate a checksum, and also write the + * block location into the header (paranoia about ssds remapping areas by + * mistake). + */ +#define CSUM_XOR 595846735 + +static void array_block_prepare_for_write(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + + bh_le->blocknr = cpu_to_le64(dm_block_location(b)); + bh_le->csum = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); +} + +static int array_block_check(struct dm_block_validator *v, + struct dm_block *b, + size_t size_of_block) +{ + struct array_block *bh_le = dm_block_data(b); + __le32 csum_disk; + + if (dm_block_location(b) != le64_to_cpu(bh_le->blocknr)) { + DMERR_LIMIT("array_block_check failed: blocknr %llu != wanted %llu", + (unsigned long long) le64_to_cpu(bh_le->blocknr), + (unsigned long long) dm_block_location(b)); + return -ENOTBLK; + } + + csum_disk = cpu_to_le32(dm_bm_checksum(&bh_le->max_entries, + size_of_block - sizeof(__le32), + CSUM_XOR)); + if (csum_disk != bh_le->csum) { + DMERR_LIMIT("array_block_check failed: csum %u != wanted %u", + (unsigned) le32_to_cpu(csum_disk), + (unsigned) le32_to_cpu(bh_le->csum)); + return -EILSEQ; + } + + return 0; +} + +static struct dm_block_validator array_validator = { + .name = "array", + .prepare_for_write = array_block_prepare_for_write, + .check = array_block_check +}; + +/*----------------------------------------------------------------*/ + +/* + * Functions for manipulating the array blocks. + */ + +/* + * Returns a pointer to a value within an array block. + * + * index - The index into _this_ specific block. + */ +static void *element_at(struct dm_array_info *info, struct array_block *ab, + unsigned index) +{ + unsigned char *entry = (unsigned char *) (ab + 1); + + entry += index * info->value_type.size; + + return entry; +} + +/* + * Utility function that calls one of the value_type methods on every value + * in an array block. + */ +static void on_entries(struct dm_array_info *info, struct array_block *ab, + void (*fn)(void *, const void *)) +{ + unsigned i, nr_entries = le32_to_cpu(ab->nr_entries); + + for (i = 0; i < nr_entries; i++) + fn(info->value_type.context, element_at(info, ab, i)); +} + +/* + * Increment every value in an array block. + */ +static void inc_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->inc) + on_entries(info, ab, vt->inc); +} + +/* + * Decrement every value in an array block. + */ +static void dec_ablock_entries(struct dm_array_info *info, struct array_block *ab) +{ + struct dm_btree_value_type *vt = &info->value_type; + + if (vt->dec) + on_entries(info, ab, vt->dec); +} + +/* + * Each array block can hold this many values. + */ +static uint32_t calc_max_entries(size_t value_size, size_t size_of_block) +{ + return (size_of_block - sizeof(struct array_block)) / value_size; +} + +/* + * Allocate a new array block. The caller will need to unlock block. + */ +static int alloc_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_new_block(info->btree_info.tm, &array_validator, block); + if (r) + return r; + + (*ab) = dm_block_data(*block); + (*ab)->max_entries = cpu_to_le32(max_entries); + (*ab)->nr_entries = cpu_to_le32(0); + (*ab)->value_size = cpu_to_le32(info->value_type.size); + + return 0; +} + +/* + * Pad an array block out with a particular value. Every instance will + * cause an increment of the value_type. new_nr must always be more than + * the current number of entries. + */ +static void fill_ablock(struct dm_array_info *info, struct array_block *ab, + const void *value, unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr < le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i < new_nr; i++) { + if (vt->inc) + vt->inc(vt->context, value); + memcpy(element_at(info, ab, i), value, vt->size); + } + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Remove some entries from the back of an array block. Every value + * removed will be decremented. new_nr must be <= the current number of + * entries. + */ +static void trim_ablock(struct dm_array_info *info, struct array_block *ab, + unsigned new_nr) +{ + unsigned i; + uint32_t nr_entries; + struct dm_btree_value_type *vt = &info->value_type; + + BUG_ON(new_nr > le32_to_cpu(ab->max_entries)); + BUG_ON(new_nr > le32_to_cpu(ab->nr_entries)); + + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = nr_entries; i > new_nr; i--) + if (vt->dec) + vt->dec(vt->context, element_at(info, ab, i - 1)); + ab->nr_entries = cpu_to_le32(new_nr); +} + +/* + * Read locks a block, and coerces it to an array block. The caller must + * unlock 'block' when finished. + */ +static int get_ablock(struct dm_array_info *info, dm_block_t b, + struct dm_block **block, struct array_block **ab) +{ + int r; + + r = dm_tm_read_lock(info->btree_info.tm, b, &array_validator, block); + if (r) + return r; + + *ab = dm_block_data(*block); + return 0; +} + +/* + * Unlocks an array block. + */ +static int unlock_ablock(struct dm_array_info *info, struct dm_block *block) +{ + return dm_tm_unlock(info->btree_info.tm, block); +} + +/*----------------------------------------------------------------*/ + +/* + * Btree manipulation. + */ + +/* + * Looks up an array block in the btree, and then read locks it. + * + * index is the index of the index of the array_block, (ie. the array index + * / max_entries). + */ +static int lookup_ablock(struct dm_array_info *info, dm_block_t root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r; + uint64_t key = index; + __le64 block_le; + + r = dm_btree_lookup(&info->btree_info, root, &key, &block_le); + if (r) + return r; + + return get_ablock(info, le64_to_cpu(block_le), block, ab); +} + +/* + * Insert an array block into the btree. The block is _not_ unlocked. + */ +static int insert_ablock(struct dm_array_info *info, uint64_t index, + struct dm_block *block, dm_block_t *root) +{ + __le64 block_le = cpu_to_le64(dm_block_location(block)); + + __dm_bless_for_disk(block_le); + return dm_btree_insert(&info->btree_info, *root, &index, &block_le, root); +} + +/* + * Looks up an array block in the btree. Then shadows it, and updates the + * btree to point to this new shadow. 'root' is an input/output parameter + * for both the current root block, and the new one. + */ +static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, + unsigned index, struct dm_block **block, + struct array_block **ab) +{ + int r, inc; + uint64_t key = index; + dm_block_t b; + __le64 block_le; + + /* + * lookup + */ + r = dm_btree_lookup(&info->btree_info, *root, &key, &block_le); + if (r) + return r; + b = le64_to_cpu(block_le); + + /* + * shadow + */ + r = dm_tm_shadow_block(info->btree_info.tm, b, + &array_validator, block, &inc); + if (r) + return r; + + *ab = dm_block_data(*block); + if (inc) + inc_ablock_entries(info, *ab); + + /* + * Reinsert. + * + * The shadow op will often be a noop. Only insert if it really + * copied data. + */ + if (dm_block_location(*block) != b) + r = insert_ablock(info, index, *block, root); + + return r; +} + +/* + * Allocate an new array block, and fill it with some values. + */ +static int insert_new_ablock(struct dm_array_info *info, size_t size_of_block, + uint32_t max_entries, + unsigned block_index, uint32_t nr, + const void *value, dm_block_t *root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = alloc_ablock(info, size_of_block, max_entries, &block, &ab); + if (r) + return r; + + fill_ablock(info, ab, value, nr); + r = insert_ablock(info, block_index, block, root); + unlock_ablock(info, block); + + return r; +} + +static int insert_full_ablocks(struct dm_array_info *info, size_t size_of_block, + unsigned begin_block, unsigned end_block, + unsigned max_entries, const void *value, + dm_block_t *root) +{ + int r = 0; + + for (; !r && begin_block != end_block; begin_block++) + r = insert_new_ablock(info, size_of_block, max_entries, begin_block, max_entries, value, root); + + return r; +} + +/* + * There are a bunch of functions involved with resizing an array. This + * structure holds information that commonly needed by them. Purely here + * to reduce parameter count. + */ +struct resize { + /* + * Describes the array. + */ + struct dm_array_info *info; + + /* + * The current root of the array. This gets updated. + */ + dm_block_t root; + + /* + * Metadata block size. Used to calculate the nr entries in an + * array block. + */ + size_t size_of_block; + + /* + * Maximum nr entries in an array block. + */ + unsigned max_entries; + + /* + * nr of completely full blocks in the array. + * + * 'old' refers to before the resize, 'new' after. + */ + unsigned old_nr_full_blocks, new_nr_full_blocks; + + /* + * Number of entries in the final block. 0 iff only full blocks in + * the array. + */ + unsigned old_nr_entries_in_last_block, new_nr_entries_in_last_block; + + /* + * The default value used when growing the array. + */ + const void *value; +}; + +/* + * Removes a consecutive set of array blocks from the btree. The values + * in block are decremented as a side effect of the btree remove. + * + * begin_index - the index of the first array block to remove. + * end_index - the one-past-the-end value. ie. this block is not removed. + */ +static int drop_blocks(struct resize *resize, unsigned begin_index, + unsigned end_index) +{ + int r; + + while (begin_index != end_index) { + uint64_t key = begin_index++; + r = dm_btree_remove(&resize->info->btree_info, resize->root, + &key, &resize->root); + if (r) + return r; + } + + return 0; +} + +/* + * Calculates how many blocks are needed for the array. + */ +static unsigned total_nr_blocks_needed(unsigned nr_full_blocks, + unsigned nr_entries_in_last_block) +{ + return nr_full_blocks + (nr_entries_in_last_block ? 1 : 0); +} + +/* + * Shrink an array. + */ +static int shrink(struct resize *resize) +{ + int r; + unsigned begin, end; + struct dm_block *block; + struct array_block *ab; + + /* + * Lose some blocks from the back? + */ + if (resize->new_nr_full_blocks < resize->old_nr_full_blocks) { + begin = total_nr_blocks_needed(resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block); + end = total_nr_blocks_needed(resize->old_nr_full_blocks, + resize->old_nr_entries_in_last_block); + + r = drop_blocks(resize, begin, end); + if (r) + return r; + } + + /* + * Trim the new tail block + */ + if (resize->new_nr_entries_in_last_block) { + r = shadow_ablock(resize->info, &resize->root, + resize->new_nr_full_blocks, &block, &ab); + if (r) + return r; + + trim_ablock(resize->info, ab, resize->new_nr_entries_in_last_block); + unlock_ablock(resize->info, block); + } + + return 0; +} + +/* + * Grow an array. + */ +static int grow_extend_tail_block(struct resize *resize, uint32_t new_nr_entries) +{ + int r; + struct dm_block *block; + struct array_block *ab; + + r = shadow_ablock(resize->info, &resize->root, + resize->old_nr_full_blocks, &block, &ab); + if (r) + return r; + + fill_ablock(resize->info, ab, resize->value, new_nr_entries); + unlock_ablock(resize->info, block); + + return r; +} + +static int grow_add_tail_block(struct resize *resize) +{ + return insert_new_ablock(resize->info, resize->size_of_block, + resize->max_entries, + resize->new_nr_full_blocks, + resize->new_nr_entries_in_last_block, + resize->value, &resize->root); +} + +static int grow_needs_more_blocks(struct resize *resize) +{ + int r; + + if (resize->old_nr_entries_in_last_block > 0) { + r = grow_extend_tail_block(resize, resize->max_entries); + if (r) + return r; + } + + r = insert_full_ablocks(resize->info, resize->size_of_block, + resize->old_nr_full_blocks, + resize->new_nr_full_blocks, + resize->max_entries, resize->value, + &resize->root); + if (r) + return r; + + if (resize->new_nr_entries_in_last_block) + r = grow_add_tail_block(resize); + + return r; +} + +static int grow(struct resize *resize) +{ + if (resize->new_nr_full_blocks > resize->old_nr_full_blocks) + return grow_needs_more_blocks(resize); + + else if (resize->old_nr_entries_in_last_block) + return grow_extend_tail_block(resize, resize->new_nr_entries_in_last_block); + + else + return grow_add_tail_block(resize); +} + +/*----------------------------------------------------------------*/ + +/* + * These are the value_type functions for the btree elements, which point + * to array blocks. + */ +static void block_inc(void *context, const void *value) +{ + __le64 block_le; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + dm_tm_inc(info->btree_info.tm, le64_to_cpu(block_le)); +} + +static void block_dec(void *context, const void *value) +{ + int r; + uint64_t b; + __le64 block_le; + uint32_t ref_count; + struct dm_block *block; + struct array_block *ab; + struct dm_array_info *info = context; + + memcpy(&block_le, value, sizeof(block_le)); + b = le64_to_cpu(block_le); + + r = dm_tm_ref(info->btree_info.tm, b, &ref_count); + if (r) { + DMERR_LIMIT("couldn't get reference count for block %llu", + (unsigned long long) b); + return; + } + + if (ref_count == 1) { + /* + * We're about to drop the last reference to this ablock. + * So we need to decrement the ref count of the contents. + */ + r = get_ablock(info, b, &block, &ab); + if (r) { + DMERR_LIMIT("couldn't get array block %llu", + (unsigned long long) b); + return; + } + + dec_ablock_entries(info, ab); + unlock_ablock(info, block); + } + + dm_tm_dec(info->btree_info.tm, b); +} + +static int block_equal(void *context, const void *value1, const void *value2) +{ + return !memcmp(value1, value2, sizeof(__le64)); +} + +/*----------------------------------------------------------------*/ + +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt) +{ + struct dm_btree_value_type *bvt = &info->btree_info.value_type; + + memcpy(&info->value_type, vt, sizeof(info->value_type)); + info->btree_info.tm = tm; + info->btree_info.levels = 1; + + bvt->context = info; + bvt->size = sizeof(__le64); + bvt->inc = block_inc; + bvt->dec = block_dec; + bvt->equal = block_equal; +} +EXPORT_SYMBOL_GPL(dm_array_info_init); + +int dm_array_empty(struct dm_array_info *info, dm_block_t *root) +{ + return dm_btree_empty(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_empty); + +static int array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) +{ + int r; + struct resize resize; + + if (old_size == new_size) + return 0; + + resize.info = info; + resize.root = root; + resize.size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + resize.max_entries = calc_max_entries(info->value_type.size, + resize.size_of_block); + + resize.old_nr_full_blocks = old_size / resize.max_entries; + resize.old_nr_entries_in_last_block = old_size % resize.max_entries; + resize.new_nr_full_blocks = new_size / resize.max_entries; + resize.new_nr_entries_in_last_block = new_size % resize.max_entries; + resize.value = value; + + r = ((new_size > old_size) ? grow : shrink)(&resize); + if (r) + return r; + + *new_root = resize.root; + return 0; +} + +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r = array_resize(info, root, old_size, new_size, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_resize); + +int dm_array_del(struct dm_array_info *info, dm_block_t root) +{ + return dm_btree_del(&info->btree_info, root); +} +EXPORT_SYMBOL_GPL(dm_array_del); + +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value_le) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned entry, max_entries; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = lookup_ablock(info, root, index / max_entries, &block, &ab); + if (r) + return r; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) + r = -ENODATA; + else + memcpy(value_le, element_at(info, ab, entry), + info->value_type.size); + + unlock_ablock(info, block); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_get_value); + +static int array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) +{ + int r; + struct dm_block *block; + struct array_block *ab; + size_t size_of_block; + unsigned max_entries; + unsigned entry; + void *old_value; + struct dm_btree_value_type *vt = &info->value_type; + + size_of_block = dm_bm_block_size(dm_tm_get_bm(info->btree_info.tm)); + max_entries = calc_max_entries(info->value_type.size, size_of_block); + + r = shadow_ablock(info, &root, index / max_entries, &block, &ab); + if (r) + return r; + *new_root = root; + + entry = index % max_entries; + if (entry >= le32_to_cpu(ab->nr_entries)) { + r = -ENODATA; + goto out; + } + + old_value = element_at(info, ab, entry); + if (vt->dec && + (!vt->equal || !vt->equal(vt->context, old_value, value))) { + vt->dec(vt->context, old_value); + if (vt->inc) + vt->inc(vt->context, value); + } + + memcpy(old_value, value, info->value_type.size); + +out: + unlock_ablock(info, block); + return r; +} + +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value) +{ + int r; + + r = array_set_value(info, root, index, value, new_root); + __dm_unbless_for_disk(value); + return r; +} +EXPORT_SYMBOL_GPL(dm_array_set_value); + +struct walk_info { + struct dm_array_info *info; + int (*fn)(void *context, uint64_t key, void *leaf); + void *context; +}; + +static int walk_ablock(void *context, uint64_t *keys, void *leaf) +{ + struct walk_info *wi = context; + + int r; + unsigned i; + __le64 block_le; + unsigned nr_entries, max_entries; + struct dm_block *block; + struct array_block *ab; + + memcpy(&block_le, leaf, sizeof(block_le)); + r = get_ablock(wi->info, le64_to_cpu(block_le), &block, &ab); + if (r) + return r; + + max_entries = le32_to_cpu(ab->max_entries); + nr_entries = le32_to_cpu(ab->nr_entries); + for (i = 0; i < nr_entries; i++) { + r = wi->fn(wi->context, keys[0] * max_entries + i, + element_at(wi->info, ab, i)); + + if (r) + break; + } + + unlock_ablock(wi->info, block); + return r; +} + +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *, uint64_t key, void *leaf), + void *context) +{ + struct walk_info wi; + + wi.info = info; + wi.fn = fn; + wi.context = context; + + return dm_btree_walk(&info->btree_info, root, walk_ablock, &wi); +} +EXPORT_SYMBOL_GPL(dm_array_walk); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-array.h b/drivers/md/persistent-data/dm-array.h new file mode 100644 index 0000000..ea177d6 --- /dev/null +++ b/drivers/md/persistent-data/dm-array.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_ARRAY_H +#define _LINUX_DM_ARRAY_H + +#include "dm-btree.h" + +/*----------------------------------------------------------------*/ + +/* + * The dm-array is a persistent version of an array. It packs the data + * more efficiently than a btree which will result in less disk space use, + * and a performance boost. The element get and set operations are still + * O(ln(n)), but with a much smaller constant. + * + * The value type structure is reused from the btree type to support proper + * reference counting of values. + * + * The arrays implicitly know their length, and bounds are checked for + * lookups and updated. It doesn't store this in an accessible place + * because it would waste a whole metadata block. Make sure you store the + * size along with the array root in your encompassing data. + * + * Array entries are indexed via an unsigned integer starting from zero. + * Arrays are not sparse; if you resize an array to have 'n' entries then + * 'n - 1' will be the last valid index. + * + * Typical use: + * + * a) initialise a dm_array_info structure. This describes the array + * values and ties it into a specific transaction manager. It holds no + * instance data; the same info can be used for many similar arrays if + * you wish. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an array. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty array with dm_array_empty(). + * + * Like the other data structures in this library, dm_array objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * c) resize an array with dm_array_resize(). + * + * d) Get a value from the array with dm_array_get_value(). + * + * e) Set a value in the array with dm_array_set_value(). + * + * f) Walk an array of values in index order with dm_array_walk(). More + * efficient than making many calls to dm_array_get_value(). + * + * g) Destroy the array with dm_array_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_array_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Describes an array. Don't initialise this structure yourself, use the + * init function below. + */ +struct dm_array_info { + struct dm_transaction_manager *tm; + struct dm_btree_value_type value_type; + struct dm_btree_info btree_info; +}; + +/* + * Sets up a dm_array_info structure. You don't need to do anything with + * this structure when you finish using it. + * + * info - the structure being filled in. + * tm - the transaction manager that should supervise this structure. + * vt - describes the leaf values. + */ +void dm_array_info_init(struct dm_array_info *info, + struct dm_transaction_manager *tm, + struct dm_btree_value_type *vt); + +/* + * Create an empty, zero length array. + * + * info - describes the array + * root - on success this will be filled out with the root block + */ +int dm_array_empty(struct dm_array_info *info, dm_block_t *root); + +/* + * Resizes the array. + * + * info - describes the array + * root - the root block of the array on disk + * old_size - the caller is responsible for remembering the size of + * the array + * new_size - can be bigger or smaller than old_size + * value - if we're growing the array the new entries will have this value + * new_root - on success, points to the new root block + * + * If growing the inc function for 'value' will be called the appropriate + * number of times. So if the caller is holding a reference they may want + * to drop it. + */ +int dm_array_resize(struct dm_array_info *info, dm_block_t root, + uint32_t old_size, uint32_t new_size, + const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Frees a whole array. The value_type's decrement operation will be called + * for all values in the array + */ +int dm_array_del(struct dm_array_info *info, dm_block_t root); + +/* + * Lookup a value in the array + * + * info - describes the array + * root - root block of the array + * index - array index + * value - the value to be read. Will be in on-disk format of course. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_get_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, void *value); + +/* + * Set an entry in the array. + * + * info - describes the array + * root - root block of the array + * index - array index + * value - value to be written to disk. Make sure you confirm the value is + * in on-disk format with__dm_bless_for_disk() before calling. + * new_root - the new root block + * + * The old value being overwritten will be decremented, the new value + * incremented. + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_array_set_value(struct dm_array_info *info, dm_block_t root, + uint32_t index, const void *value, dm_block_t *new_root) + __dm_written_to_disk(value); + +/* + * Walk through all the entries in an array. + * + * info - describes the array + * root - root block of the array + * fn - called back for every element + * context - passed to the callback + */ +int dm_array_walk(struct dm_array_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t key, void *leaf), + void *context); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_ARRAY_H */ diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c new file mode 100644 index 0000000..cd9a86d --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm-bitset.h" +#include "dm-transaction-manager.h" + +#include <linux/export.h> +#include <linux/device-mapper.h> + +#define DM_MSG_PREFIX "bitset" +#define BITS_PER_ARRAY_ENTRY 64 + +/*----------------------------------------------------------------*/ + +static struct dm_btree_value_type bitset_bvt = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL, +}; + +/*----------------------------------------------------------------*/ + +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info) +{ + dm_array_info_init(&info->array_info, tm, &bitset_bvt); + info->current_index_set = false; +} +EXPORT_SYMBOL_GPL(dm_disk_bitset_init); + +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root) +{ + return dm_array_empty(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_empty); + +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root) +{ + uint32_t old_blocks = dm_div_up(old_nr_entries, BITS_PER_ARRAY_ENTRY); + uint32_t new_blocks = dm_div_up(new_nr_entries, BITS_PER_ARRAY_ENTRY); + __le64 value = default_value ? cpu_to_le64(~0) : cpu_to_le64(0); + + __dm_bless_for_disk(&value); + return dm_array_resize(&info->array_info, root, old_blocks, new_blocks, + &value, new_root); +} +EXPORT_SYMBOL_GPL(dm_bitset_resize); + +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root) +{ + return dm_array_del(&info->array_info, root); +} +EXPORT_SYMBOL_GPL(dm_bitset_del); + +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root) +{ + int r; + __le64 value; + + if (!info->current_index_set) + return 0; + + value = cpu_to_le64(info->current_bits); + + __dm_bless_for_disk(&value); + r = dm_array_set_value(&info->array_info, root, info->current_index, + &value, new_root); + if (r) + return r; + + info->current_index_set = false; + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_flush); + +static int read_bits(struct dm_disk_bitset *info, dm_block_t root, + uint32_t array_index) +{ + int r; + __le64 value; + + r = dm_array_get_value(&info->array_info, root, array_index, &value); + if (r) + return r; + + info->current_bits = le64_to_cpu(value); + info->current_index_set = true; + info->current_index = array_index; + return 0; +} + +static int get_array_entry(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned array_index = index / BITS_PER_ARRAY_ENTRY; + + if (info->current_index_set) { + if (info->current_index == array_index) + return 0; + + r = dm_bitset_flush(info, root, new_root); + if (r) + return r; + } + + return read_bits(info, root, array_index); +} + +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + set_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_set_bit); + +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + clear_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); + +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result) +{ + int r; + unsigned b = index % BITS_PER_ARRAY_ENTRY; + + r = get_array_entry(info, root, index, new_root); + if (r) + return r; + + *result = test_bit(b, (unsigned long *) &info->current_bits); + return 0; +} +EXPORT_SYMBOL_GPL(dm_bitset_test_bit); + +/*----------------------------------------------------------------*/ diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h new file mode 100644 index 0000000..e1b9bea --- /dev/null +++ b/drivers/md/persistent-data/dm-bitset.h @@ -0,0 +1,165 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ +#ifndef _LINUX_DM_BITSET_H +#define _LINUX_DM_BITSET_H + +#include "dm-array.h" + +/*----------------------------------------------------------------*/ + +/* + * This bitset type is a thin wrapper round a dm_array of 64bit words. It + * uses a tiny, one word cache to reduce the number of array lookups and so + * increase performance. + * + * Like the dm-array that it's based on, the caller needs to keep track of + * the size of the bitset separately. The underlying dm-array implicitly + * knows how many words it's storing and will return -ENODATA if you try + * and access an out of bounds word. However, an out of bounds bit in the + * final word will _not_ be detected, you have been warned. + * + * Bits are indexed from zero. + + * Typical use: + * + * a) Initialise a dm_disk_bitset structure with dm_disk_bitset_init(). + * This describes the bitset and includes the cache. It's not called it + * dm_bitset_info in line with other data structures because it does + * include instance data. + * + * b) Get yourself a root. The root is the index of a block of data on the + * disk that holds a particular instance of an bitset. You may have a + * pre existing root in your metadata that you wish to use, or you may + * want to create a brand new, empty bitset with dm_bitset_empty(). + * + * Like the other data structures in this library, dm_bitset objects are + * immutable between transactions. Update functions will return you the + * root for a _new_ array. If you've incremented the old root, via + * dm_tm_inc(), before calling the update function you may continue to use + * it in parallel with the new root. + * + * Even read operations may trigger the cache to be flushed and as such + * return a root for a new, updated bitset. + * + * c) resize a bitset with dm_bitset_resize(). + * + * d) Set a bit with dm_bitset_set_bit(). + * + * e) Clear a bit with dm_bitset_clear_bit(). + * + * f) Test a bit with dm_bitset_test_bit(). + * + * g) Flush all updates from the cache with dm_bitset_flush(). + * + * h) Destroy the bitset with dm_bitset_del(). This tells the transaction + * manager that you're no longer using this data structure so it can + * recycle it's blocks. (dm_bitset_dec() would be a better name for it, + * but del is in keeping with dm_btree_del()). + */ + +/* + * Opaque object. Unlike dm_array_info, you should have one of these per + * bitset. Initialise with dm_disk_bitset_init(). + */ +struct dm_disk_bitset { + struct dm_array_info array_info; + + uint32_t current_index; + uint64_t current_bits; + + bool current_index_set:1; +}; + +/* + * Sets up a dm_disk_bitset structure. You don't need to do anything with + * this structure when you finish using it. + * + * tm - the transaction manager that should supervise this structure + * info - the structure being initialised + */ +void dm_disk_bitset_init(struct dm_transaction_manager *tm, + struct dm_disk_bitset *info); + +/* + * Create an empty, zero length bitset. + * + * info - describes the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root); + +/* + * Resize the bitset. + * + * info - describes the bitset + * old_root - the root block of the array on disk + * old_nr_entries - the number of bits in the old bitset + * new_nr_entries - the number of bits you want in the new bitset + * default_value - the value for any new bits + * new_root - on success, points to the new root block + */ +int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t old_root, + uint32_t old_nr_entries, uint32_t new_nr_entries, + bool default_value, dm_block_t *new_root); + +/* + * Frees the bitset. + */ +int dm_bitset_del(struct dm_disk_bitset *info, dm_block_t root); + +/* + * Set a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Clears a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root); + +/* + * Tests a bit. + * + * info - describes the bitset + * root - the root block of the bitset + * index - the bit index + * new_root - on success, points to the new root block (cached values may have been written) + * result - the bit value you're after + * + * -ENODATA will be returned if the index is out of bounds. + */ +int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root, + uint32_t index, dm_block_t *new_root, bool *result); + +/* + * Flush any cached changes to disk. + * + * info - describes the bitset + * root - the root block of the bitset + * new_root - on success, points to the new root block + */ +int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, + dm_block_t *new_root); + +/*----------------------------------------------------------------*/ + +#endif /* _LINUX_DM_BITSET_H */ diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 28c3ed0..81b5138 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -613,6 +613,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, return dm_bufio_write_dirty_buffers(bm->bufio); } +EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); void dm_bm_set_read_only(struct dm_block_manager *bm) { diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index accbb05..37d367b 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -64,6 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); +void ro_pop(struct ro_spine *s); struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index f199a0c..cf9fd67 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -164,6 +164,13 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } +void ro_pop(struct ro_spine *s) +{ + BUG_ON(!s->count); + --s->count; + unlock_block(s->info, s->nodes[s->count]); +} + struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 4caf669..3586542 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -807,3 +807,55 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, return r ? r : count; } EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); + +/* + * FIXME: We shouldn't use a recursive algorithm when we have limited stack + * space. Also this only works for single level trees. + */ +static int walk_node(struct ro_spine *s, dm_block_t block, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + unsigned i, nr; + struct btree_node *n; + uint64_t keys; + + r = ro_step(s, block); + n = ro_node(s); + + nr = le32_to_cpu(n->header.nr_entries); + for (i = 0; i < nr; i++) { + if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { + r = walk_node(s, value64(n, i), fn, context); + if (r) + goto out; + } else { + keys = le64_to_cpu(*key_ptr(n, i)); + r = fn(context, &keys, value_ptr(n, i)); + if (r) + goto out; + } + } + +out: + ro_pop(s); + return r; +} + +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context) +{ + int r; + struct ro_spine spine; + + BUG_ON(info->levels > 1); + + init_ro_spine(&spine, info); + r = walk_node(&spine, root, fn, context); + exit_ro_spine(&spine); + + return r; +} +EXPORT_SYMBOL_GPL(dm_btree_walk); diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index a2cd5044..8672d15 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -58,21 +58,21 @@ struct dm_btree_value_type { * somewhere.) This method is _not_ called for insertion of a new * value: It is assumed the ref count is already 1. */ - void (*inc)(void *context, void *value); + void (*inc)(void *context, const void *value); /* * This value is being deleted. The btree takes care of freeing * the memory pointed to by @value. Often the del function just * needs to decrement a reference count somewhere. */ - void (*dec)(void *context, void *value); + void (*dec)(void *context, const void *value); /* * A test for equality between two values. When a value is * overwritten with a new one, the old one has the dec method * called _unless_ the new and old value are deemed equal. */ - int (*equal)(void *context, void *value1, void *value2); + int (*equal)(void *context, const void *value1, const void *value2); }; /* @@ -142,4 +142,13 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, uint64_t *result_keys); +/* + * Iterate through the a btree, calling fn() on each entry. + * It only works for single level trees and is internally recursive, so + * monitor stack usage carefully. + */ +int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, + int (*fn)(void *context, uint64_t *keys, void *leaf), + void *context); + #endif /* _LINUX_DM_BTREE_H */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 697f026..5af2d27 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -3916,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4376,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); } } @@ -4754,11 +4748,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) { - trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), - raid_bio, 0); + if (remaining == 0) bio_endio(raid_bio, 0); - } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 383a727..6e5ad8e 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp) { int r; unsigned long rate; - int divisor; - - /* - * cam_mclk clock chain: - * dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk - * - * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are - * set to the same value. Hence the rate set for dpll4_m5 - * has to be twice of what is set on OMAP3430 to get - * the required value for cam_mclk - */ - divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2; r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]); if (r) { dev_err(isp->dev, "failed to enable cam_ick clock\n"); goto out_clk_enable_ick; } - r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK], - CM_CAM_MCLK_HZ/divisor); + r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ); if (r) { - dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n"); + dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n"); goto out_clk_enable_mclk; } r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]); @@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp) static const char *isp_clocks[] = { "cam_ick", "cam_mclk", - "dpll4_m5_ck", "csi2_96m_fck", "l3_ick", }; diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 517d348..c77e1f2 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -147,7 +147,6 @@ struct isp_platform_callback { * @ref_count: Reference count for handling multiple ISP requests. * @cam_ick: Pointer to camera interface clock structure. * @cam_mclk: Pointer to camera functional clock structure. - * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure. * @csi2_fck: Pointer to camera CSI2 complexIO clock structure. * @l3_ick: Pointer to OMAP3 L3 bus interface clock. * @irq: Currently attached ISP ISR callbacks information structure. @@ -189,10 +188,9 @@ struct isp_device { u32 xclk_divisor[2]; /* Two clocks, a and b. */ #define ISP_CLK_CAM_ICK 0 #define ISP_CLK_CAM_MCLK 1 -#define ISP_CLK_DPLL4_M5_CK 2 -#define ISP_CLK_CSI2_FCK 3 -#define ISP_CLK_L3_ICK 4 - struct clk *clock[5]; +#define ISP_CLK_CSI2_FCK 2 +#define ISP_CLK_L3_ICK 3 + struct clk *clock[4]; /* ISP modules */ struct ispstat isp_af; diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 3aa9a96..36f5d52 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -103,6 +103,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/module.h> +#include <asm/sections.h> #define v1printk(a...) do { \ if (verbose) \ @@ -222,6 +223,7 @@ static unsigned long lookup_addr(char *arg) addr = (unsigned long)do_fork; else if (!strcmp(arg, "hw_break_val")) addr = (unsigned long)&hw_break_val; + addr = (unsigned long) dereference_function_descriptor((void *)addr); return addr; } diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 03f2eb5..557bec5 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -74,8 +74,8 @@ config MTD_REDBOOT_PARTS_READONLY endif # MTD_REDBOOT_PARTS config MTD_CMDLINE_PARTS - bool "Command line partition table parsing" - depends on MTD = "y" + tristate "Command line partition table parsing" + depends on MTD ---help--- Allow generic configuration of the MTD partition tables via the kernel command line. Multiple flash resources are supported for hardware where diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c index 7c057a0..ddc0a42 100644 --- a/drivers/mtd/ar7part.c +++ b/drivers/mtd/ar7part.c @@ -142,7 +142,13 @@ static int __init ar7_parser_init(void) return register_mtd_parser(&ar7_parser); } +static void __exit ar7_parser_exit(void) +{ + deregister_mtd_parser(&ar7_parser); +} + module_init(ar7_parser_init); +module_exit(ar7_parser_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR( "Felix Fietkau <nbd@openwrt.org>, " diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index e06d782..63feb75 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -14,17 +14,11 @@ #include <linux/slab.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> /* 10 parts were found on sflash on Netgear WNDR4500 */ #define BCM47XXPART_MAX_PARTS 12 -/* - * Amount of bytes we read when analyzing each block of flash memory. - * Set it big enough to allow detecting partition and reading important data. - */ -#define BCM47XXPART_BYTES_TO_READ 0x404 - /* Magics */ #define BOARD_DATA_MAGIC 0x5246504D /* MPFR */ #define POT_MAGIC1 0x54544f50 /* POTT */ @@ -59,13 +53,21 @@ static int bcm47xxpart_parse(struct mtd_info *master, uint32_t *buf; size_t bytes_read; uint32_t offset; - uint32_t blocksize = 0x10000; + uint32_t blocksize = master->erasesize; struct trx_header *trx; + int trx_part = -1; + int last_trx_part = -1; + int max_bytes_to_read = 0x8004; + + if (blocksize <= 0x10000) + blocksize = 0x10000; + if (blocksize == 0x20000) + max_bytes_to_read = 0x18004; /* Alloc */ parts = kzalloc(sizeof(struct mtd_partition) * BCM47XXPART_MAX_PARTS, GFP_KERNEL); - buf = kzalloc(BCM47XXPART_BYTES_TO_READ, GFP_KERNEL); + buf = kzalloc(max_bytes_to_read, GFP_KERNEL); /* Parse block by block looking for magics */ for (offset = 0; offset <= master->size - blocksize; @@ -80,7 +82,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read beginning of the block */ - if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ, + if (mtd_read(master, offset, max_bytes_to_read, &bytes_read, (uint8_t *)buf) < 0) { pr_err("mtd_read error while parsing (offset: 0x%X)!\n", offset); @@ -95,9 +97,16 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Standard NVRAM */ - if (buf[0x000 / 4] == NVRAM_HEADER) { + if (buf[0x000 / 4] == NVRAM_HEADER || + buf[0x1000 / 4] == NVRAM_HEADER || + buf[0x8000 / 4] == NVRAM_HEADER || + (blocksize == 0x20000 && ( + buf[0x10000 / 4] == NVRAM_HEADER || + buf[0x11000 / 4] == NVRAM_HEADER || + buf[0x18000 / 4] == NVRAM_HEADER))) { bcm47xxpart_add_part(&parts[curr_part++], "nvram", offset, 0); + offset = rounddown(offset, blocksize); continue; } @@ -131,6 +140,10 @@ static int bcm47xxpart_parse(struct mtd_info *master, if (buf[0x000 / 4] == TRX_MAGIC) { trx = (struct trx_header *)buf; + trx_part = curr_part; + bcm47xxpart_add_part(&parts[curr_part++], "firmware", + offset, 0); + i = 0; /* We have LZMA loader if offset[2] points to sth */ if (trx->offset[2]) { @@ -154,6 +167,8 @@ static int bcm47xxpart_parse(struct mtd_info *master, offset + trx->offset[i], 0); i++; + last_trx_part = curr_part - 1; + /* * We have whole TRX scanned, skip to the next part. Use * roundown (not roundup), as the loop will increase @@ -169,11 +184,15 @@ static int bcm47xxpart_parse(struct mtd_info *master, * Assume that partitions end at the beginning of the one they are * followed by. */ - for (i = 0; i < curr_part - 1; i++) - parts[i].size = parts[i + 1].offset - parts[i].offset; - if (curr_part > 0) - parts[curr_part - 1].size = - master->size - parts[curr_part - 1].offset; + for (i = 0; i < curr_part; i++) { + u64 next_part_offset = (i < curr_part - 1) ? + parts[i + 1].offset : master->size; + + parts[i].size = next_part_offset - parts[i].offset; + if (i == last_trx_part && trx_part >= 0) + parts[trx_part].size = next_part_offset - + parts[trx_part].offset; + } *pparts = parts; return curr_part; diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index b861972..fff665d 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -33,6 +33,8 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/of.h> +#include <linux/of_platform.h> #include <linux/mtd/map.h> #include <linux/mtd/mtd.h> #include <linux/mtd/cfi.h> @@ -74,6 +76,10 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad static int cfi_atmel_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); +static int cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len); + static struct mtd_chip_driver cfi_amdstd_chipdrv = { .probe = NULL, /* Not usable directly */ .destroy = cfi_amdstd_destroy, @@ -496,6 +502,7 @@ static void cfi_fixup_m29ew_delay_after_resume(struct cfi_private *cfi) struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) { struct cfi_private *cfi = map->fldrv_priv; + struct device_node __maybe_unused *np = map->device_node; struct mtd_info *mtd; int i; @@ -570,6 +577,17 @@ struct mtd_info *cfi_cmdset_0002(struct map_info *map, int primary) cfi_tell_features(extp); #endif +#ifdef CONFIG_OF + if (np && of_property_read_bool( + np, "use-advanced-sector-protection") + && extp->BlkProtUnprot == 8) { + printk(KERN_INFO " Advanced Sector Protection (PPB Locking) supported\n"); + mtd->_lock = cfi_ppb_lock; + mtd->_unlock = cfi_ppb_unlock; + mtd->_is_locked = cfi_ppb_is_locked; + } +#endif + bootloc = extp->TopBottom; if ((bootloc < 2) || (bootloc > 5)) { printk(KERN_WARNING "%s: CFI contains unrecognised boot " @@ -2172,6 +2190,205 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return cfi_varsize_frob(mtd, do_atmel_unlock, ofs, len, NULL); } +/* + * Advanced Sector Protection - PPB (Persistent Protection Bit) locking + */ + +struct ppb_lock { + struct flchip *chip; + loff_t offset; + int locked; +}; + +#define MAX_SECTORS 512 + +#define DO_XXLOCK_ONEBLOCK_LOCK ((void *)1) +#define DO_XXLOCK_ONEBLOCK_UNLOCK ((void *)2) +#define DO_XXLOCK_ONEBLOCK_GETLOCK ((void *)3) + +static int __maybe_unused do_ppb_xxlock(struct map_info *map, + struct flchip *chip, + unsigned long adr, int len, void *thunk) +{ + struct cfi_private *cfi = map->fldrv_priv; + unsigned long timeo; + int ret; + + mutex_lock(&chip->mutex); + ret = get_chip(map, chip, adr + chip->start, FL_LOCKING); + if (ret) { + mutex_unlock(&chip->mutex); + return ret; + } + + pr_debug("MTD %s(): XXLOCK 0x%08lx len %d\n", __func__, adr, len); + + cfi_send_gen_cmd(0xAA, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + cfi_send_gen_cmd(0x55, cfi->addr_unlock2, chip->start, map, cfi, + cfi->device_type, NULL); + /* PPB entry command */ + cfi_send_gen_cmd(0xC0, cfi->addr_unlock1, chip->start, map, cfi, + cfi->device_type, NULL); + + if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) { + chip->state = FL_LOCKING; + map_write(map, CMD(0xA0), chip->start + adr); + map_write(map, CMD(0x00), chip->start + adr); + } else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) { + /* + * Unlocking of one specific sector is not supported, so we + * have to unlock all sectors of this device instead + */ + chip->state = FL_UNLOCKING; + map_write(map, CMD(0x80), chip->start); + map_write(map, CMD(0x30), chip->start); + } else if (thunk == DO_XXLOCK_ONEBLOCK_GETLOCK) { + chip->state = FL_JEDEC_QUERY; + /* Return locked status: 0->locked, 1->unlocked */ + ret = !cfi_read_query(map, adr); + } else + BUG(); + + /* + * Wait for some time as unlocking of all sectors takes quite long + */ + timeo = jiffies + msecs_to_jiffies(2000); /* 2s max (un)locking */ + for (;;) { + if (chip_ready(map, adr)) + break; + + if (time_after(jiffies, timeo)) { + printk(KERN_ERR "Waiting for chip to be ready timed out.\n"); + ret = -EIO; + break; + } + + UDELAY(map, chip, adr, 1); + } + + /* Exit BC commands */ + map_write(map, CMD(0x90), chip->start); + map_write(map, CMD(0x00), chip->start); + + chip->state = FL_READY; + put_chip(map, chip, adr + chip->start); + mutex_unlock(&chip->mutex); + + return ret; +} + +static int __maybe_unused cfi_ppb_lock(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_LOCK); +} + +static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + struct mtd_erase_region_info *regions = mtd->eraseregions; + struct map_info *map = mtd->priv; + struct cfi_private *cfi = map->fldrv_priv; + struct ppb_lock *sect; + unsigned long adr; + loff_t offset; + uint64_t length; + int chipnum; + int i; + int sectors; + int ret; + + /* + * PPB unlocking always unlocks all sectors of the flash chip. + * We need to re-lock all previously locked sectors. So lets + * first check the locking status of all sectors and save + * it for future use. + */ + sect = kzalloc(MAX_SECTORS * sizeof(struct ppb_lock), GFP_KERNEL); + if (!sect) + return -ENOMEM; + + /* + * This code to walk all sectors is a slightly modified version + * of the cfi_varsize_frob() code. + */ + i = 0; + chipnum = 0; + adr = 0; + sectors = 0; + offset = 0; + length = mtd->size; + + while (length) { + int size = regions[i].erasesize; + + /* + * Only test sectors that shall not be unlocked. The other + * sectors shall be unlocked, so lets keep their locking + * status at "unlocked" (locked=0) for the final re-locking. + */ + if ((adr < ofs) || (adr >= (ofs + len))) { + sect[sectors].chip = &cfi->chips[chipnum]; + sect[sectors].offset = offset; + sect[sectors].locked = do_ppb_xxlock( + map, &cfi->chips[chipnum], adr, 0, + DO_XXLOCK_ONEBLOCK_GETLOCK); + } + + adr += size; + offset += size; + length -= size; + + if (offset == regions[i].offset + size * regions[i].numblocks) + i++; + + if (adr >> cfi->chipshift) { + adr = 0; + chipnum++; + + if (chipnum >= cfi->numchips) + break; + } + + sectors++; + if (sectors >= MAX_SECTORS) { + printk(KERN_ERR "Only %d sectors for PPB locking supported!\n", + MAX_SECTORS); + kfree(sect); + return -EINVAL; + } + } + + /* Now unlock the whole chip */ + ret = cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_UNLOCK); + if (ret) { + kfree(sect); + return ret; + } + + /* + * PPB unlocking always unlocks all sectors of the flash chip. + * We need to re-lock all previously locked sectors. + */ + for (i = 0; i < sectors; i++) { + if (sect[i].locked) + do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0, + DO_XXLOCK_ONEBLOCK_LOCK); + } + + kfree(sect); + return ret; +} + +static int __maybe_unused cfi_ppb_is_locked(struct mtd_info *mtd, loff_t ofs, + uint64_t len) +{ + return cfi_varsize_frob(mtd, do_ppb_xxlock, ofs, len, + DO_XXLOCK_ONEBLOCK_GETLOCK) ? 1 : 0; +} static void cfi_amdstd_sync (struct mtd_info *mtd) { diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c index c533f27..721caeb 100644 --- a/drivers/mtd/cmdlinepart.c +++ b/drivers/mtd/cmdlinepart.c @@ -22,11 +22,22 @@ * * mtdparts=<mtddef>[;<mtddef] * <mtddef> := <mtd-id>:<partdef>[,<partdef>] - * where <mtd-id> is the name from the "cat /proc/mtd" command - * <partdef> := <size>[@offset][<name>][ro][lk] + * <partdef> := <size>[@<offset>][<name>][ro][lk] * <mtd-id> := unique name used in mapping driver/device (mtd->name) * <size> := standard linux memsize OR "-" to denote all remaining space + * size is automatically truncated at end of device + * if specified or trucated size is 0 the part is skipped + * <offset> := standard linux memsize + * if omitted the part will immediately follow the previous part + * or 0 if the first part * <name> := '(' NAME ')' + * NAME will appear in /proc/mtd + * + * <size> and <offset> can be specified such that the parts are out of order + * in physical memory and may even overlap. + * + * The parts are assigned MTD numbers in the order they are specified in the + * command line regardless of their order in physical memory. * * Examples: * @@ -70,6 +81,7 @@ struct cmdline_mtd_partition { static struct cmdline_mtd_partition *partitions; /* the command line passed to mtdpart_setup() */ +static char *mtdparts; static char *cmdline; static int cmdline_parsed; @@ -330,6 +342,14 @@ static int parse_cmdline_partitions(struct mtd_info *master, if (part->parts[i].size == SIZE_REMAINING) part->parts[i].size = master->size - offset; + if (offset + part->parts[i].size > master->size) { + printk(KERN_WARNING ERRP + "%s: partitioning exceeds flash size, truncating\n", + part->mtd_id); + part->parts[i].size = master->size - offset; + } + offset += part->parts[i].size; + if (part->parts[i].size == 0) { printk(KERN_WARNING ERRP "%s: skipping zero sized partition\n", @@ -337,16 +357,8 @@ static int parse_cmdline_partitions(struct mtd_info *master, part->num_parts--; memmove(&part->parts[i], &part->parts[i + 1], sizeof(*part->parts) * (part->num_parts - i)); - continue; - } - - if (offset + part->parts[i].size > master->size) { - printk(KERN_WARNING ERRP - "%s: partitioning exceeds flash size, truncating\n", - part->mtd_id); - part->parts[i].size = master->size - offset; + i--; } - offset += part->parts[i].size; } *pparts = kmemdup(part->parts, sizeof(*part->parts) * part->num_parts, @@ -365,7 +377,7 @@ static int parse_cmdline_partitions(struct mtd_info *master, * * This function needs to be visible for bootloaders. */ -static int mtdpart_setup(char *s) +static int __init mtdpart_setup(char *s) { cmdline = s; return 1; @@ -381,10 +393,21 @@ static struct mtd_part_parser cmdline_parser = { static int __init cmdline_parser_init(void) { + if (mtdparts) + mtdpart_setup(mtdparts); return register_mtd_parser(&cmdline_parser); } +static void __exit cmdline_parser_exit(void) +{ + deregister_mtd_parser(&cmdline_parser); +} + module_init(cmdline_parser_init); +module_exit(cmdline_parser_exit); + +MODULE_PARM_DESC(mtdparts, "Partitioning specification"); +module_param(mtdparts, charp, 0); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marius Groeger <mag@sysgo.de>"); diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index 395733a..369a194 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -17,8 +17,10 @@ obj-$(CONFIG_MTD_LART) += lart.o obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o obj-$(CONFIG_MTD_M25P80) += m25p80.o +obj-$(CONFIG_MTD_NAND_OMAP_BCH) += elm.o obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o -CFLAGS_docg3.o += -I$(src)
\ No newline at end of file + +CFLAGS_docg3.o += -I$(src) diff --git a/drivers/mtd/devices/bcm47xxsflash.c b/drivers/mtd/devices/bcm47xxsflash.c index 4714584..9526628 100644 --- a/drivers/mtd/devices/bcm47xxsflash.c +++ b/drivers/mtd/devices/bcm47xxsflash.c @@ -5,6 +5,8 @@ #include <linux/platform_device.h> #include <linux/bcma/bcma.h> +#include "bcm47xxsflash.h" + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Serial flash driver for BCMA bus"); @@ -13,26 +15,28 @@ static const char *probes[] = { "bcm47xxpart", NULL }; static int bcm47xxsflash_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { - struct bcma_sflash *sflash = mtd->priv; + struct bcm47xxsflash *b47s = mtd->priv; /* Check address range */ if ((from + len) > mtd->size) return -EINVAL; - memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(sflash->window + from), + memcpy_fromio(buf, (void __iomem *)KSEG0ADDR(b47s->window + from), len); + *retlen = len; return len; } -static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash, - struct mtd_info *mtd) +static void bcm47xxsflash_fill_mtd(struct bcm47xxsflash *b47s) { - mtd->priv = sflash; + struct mtd_info *mtd = &b47s->mtd; + + mtd->priv = b47s; mtd->name = "bcm47xxsflash"; mtd->owner = THIS_MODULE; mtd->type = MTD_ROM; - mtd->size = sflash->size; + mtd->size = b47s->size; mtd->_read = bcm47xxsflash_read; /* TODO: implement writing support and verify/change following code */ @@ -40,19 +44,30 @@ static void bcm47xxsflash_fill_mtd(struct bcma_sflash *sflash, mtd->writebufsize = mtd->writesize = 1; } -static int bcm47xxsflash_probe(struct platform_device *pdev) +/************************************************** + * BCMA + **************************************************/ + +static int bcm47xxsflash_bcma_probe(struct platform_device *pdev) { struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev); + struct bcm47xxsflash *b47s; int err; - sflash->mtd = kzalloc(sizeof(struct mtd_info), GFP_KERNEL); - if (!sflash->mtd) { + b47s = kzalloc(sizeof(*b47s), GFP_KERNEL); + if (!b47s) { err = -ENOMEM; goto out; } - bcm47xxsflash_fill_mtd(sflash, sflash->mtd); + sflash->priv = b47s; - err = mtd_device_parse_register(sflash->mtd, probes, NULL, NULL, 0); + b47s->window = sflash->window; + b47s->blocksize = sflash->blocksize; + b47s->numblocks = sflash->numblocks; + b47s->size = sflash->size; + bcm47xxsflash_fill_mtd(b47s); + + err = mtd_device_parse_register(&b47s->mtd, probes, NULL, NULL, 0); if (err) { pr_err("Failed to register MTD device: %d\n", err); goto err_dev_reg; @@ -61,34 +76,40 @@ static int bcm47xxsflash_probe(struct platform_device *pdev) return 0; err_dev_reg: - kfree(sflash->mtd); + kfree(&b47s->mtd); out: return err; } -static int bcm47xxsflash_remove(struct platform_device *pdev) +static int bcm47xxsflash_bcma_remove(struct platform_device *pdev) { struct bcma_sflash *sflash = dev_get_platdata(&pdev->dev); + struct bcm47xxsflash *b47s = sflash->priv; - mtd_device_unregister(sflash->mtd); - kfree(sflash->mtd); + mtd_device_unregister(&b47s->mtd); + kfree(b47s); return 0; } static struct platform_driver bcma_sflash_driver = { - .remove = bcm47xxsflash_remove, + .probe = bcm47xxsflash_bcma_probe, + .remove = bcm47xxsflash_bcma_remove, .driver = { .name = "bcma_sflash", .owner = THIS_MODULE, }, }; +/************************************************** + * Init + **************************************************/ + static int __init bcm47xxsflash_init(void) { int err; - err = platform_driver_probe(&bcma_sflash_driver, bcm47xxsflash_probe); + err = platform_driver_register(&bcma_sflash_driver); if (err) pr_err("Failed to register BCMA serial flash driver: %d\n", err); diff --git a/drivers/mtd/devices/bcm47xxsflash.h b/drivers/mtd/devices/bcm47xxsflash.h new file mode 100644 index 0000000..ebf6f71 --- /dev/null +++ b/drivers/mtd/devices/bcm47xxsflash.h @@ -0,0 +1,15 @@ +#ifndef __BCM47XXSFLASH_H +#define __BCM47XXSFLASH_H + +#include <linux/mtd/mtd.h> + +struct bcm47xxsflash { + u32 window; + u32 blocksize; + u16 numblocks; + u32 size; + + struct mtd_info mtd; +}; + +#endif /* BCM47XXSFLASH */ diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c new file mode 100644 index 0000000..2ec5da9 --- /dev/null +++ b/drivers/mtd/devices/elm.c @@ -0,0 +1,404 @@ +/* + * Error Location Module + * + * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/pm_runtime.h> +#include <linux/platform_data/elm.h> + +#define ELM_IRQSTATUS 0x018 +#define ELM_IRQENABLE 0x01c +#define ELM_LOCATION_CONFIG 0x020 +#define ELM_PAGE_CTRL 0x080 +#define ELM_SYNDROME_FRAGMENT_0 0x400 +#define ELM_SYNDROME_FRAGMENT_6 0x418 +#define ELM_LOCATION_STATUS 0x800 +#define ELM_ERROR_LOCATION_0 0x880 + +/* ELM Interrupt Status Register */ +#define INTR_STATUS_PAGE_VALID BIT(8) + +/* ELM Interrupt Enable Register */ +#define INTR_EN_PAGE_MASK BIT(8) + +/* ELM Location Configuration Register */ +#define ECC_BCH_LEVEL_MASK 0x3 + +/* ELM syndrome */ +#define ELM_SYNDROME_VALID BIT(16) + +/* ELM_LOCATION_STATUS Register */ +#define ECC_CORRECTABLE_MASK BIT(8) +#define ECC_NB_ERRORS_MASK 0x1f + +/* ELM_ERROR_LOCATION_0-15 Registers */ +#define ECC_ERROR_LOCATION_MASK 0x1fff + +#define ELM_ECC_SIZE 0x7ff + +#define SYNDROME_FRAGMENT_REG_SIZE 0x40 +#define ERROR_LOCATION_SIZE 0x100 + +struct elm_info { + struct device *dev; + void __iomem *elm_base; + struct completion elm_completion; + struct list_head list; + enum bch_ecc bch_type; +}; + +static LIST_HEAD(elm_devices); + +static void elm_write_reg(struct elm_info *info, int offset, u32 val) +{ + writel(val, info->elm_base + offset); +} + +static u32 elm_read_reg(struct elm_info *info, int offset) +{ + return readl(info->elm_base + offset); +} + +/** + * elm_config - Configure ELM module + * @dev: ELM device + * @bch_type: Type of BCH ecc + */ +void elm_config(struct device *dev, enum bch_ecc bch_type) +{ + u32 reg_val; + struct elm_info *info = dev_get_drvdata(dev); + + reg_val = (bch_type & ECC_BCH_LEVEL_MASK) | (ELM_ECC_SIZE << 16); + elm_write_reg(info, ELM_LOCATION_CONFIG, reg_val); + info->bch_type = bch_type; +} +EXPORT_SYMBOL(elm_config); + +/** + * elm_configure_page_mode - Enable/Disable page mode + * @info: elm info + * @index: index number of syndrome fragment vector + * @enable: enable/disable flag for page mode + * + * Enable page mode for syndrome fragment index + */ +static void elm_configure_page_mode(struct elm_info *info, int index, + bool enable) +{ + u32 reg_val; + + reg_val = elm_read_reg(info, ELM_PAGE_CTRL); + if (enable) + reg_val |= BIT(index); /* enable page mode */ + else + reg_val &= ~BIT(index); /* disable page mode */ + + elm_write_reg(info, ELM_PAGE_CTRL, reg_val); +} + +/** + * elm_load_syndrome - Load ELM syndrome reg + * @info: elm info + * @err_vec: elm error vectors + * @ecc: buffer with calculated ecc + * + * Load syndrome fragment registers with calculated ecc in reverse order. + */ +static void elm_load_syndrome(struct elm_info *info, + struct elm_errorvec *err_vec, u8 *ecc) +{ + int i, offset; + u32 val; + + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + + /* Check error reported */ + if (err_vec[i].error_reported) { + elm_configure_page_mode(info, i, true); + offset = ELM_SYNDROME_FRAGMENT_0 + + SYNDROME_FRAGMENT_REG_SIZE * i; + + /* BCH8 */ + if (info->bch_type) { + + /* syndrome fragment 0 = ecc[9-12B] */ + val = cpu_to_be32(*(u32 *) &ecc[9]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 1 = ecc[5-8B] */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[5]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 2 = ecc[1-4B] */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[1]); + elm_write_reg(info, offset, val); + + /* syndrome fragment 3 = ecc[0B] */ + offset += 4; + val = ecc[0]; + elm_write_reg(info, offset, val); + } else { + /* syndrome fragment 0 = ecc[20-52b] bits */ + val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) | + ((ecc[2] & 0xf) << 28); + elm_write_reg(info, offset, val); + + /* syndrome fragment 1 = ecc[0-20b] bits */ + offset += 4; + val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12; + elm_write_reg(info, offset, val); + } + } + + /* Update ecc pointer with ecc byte size */ + ecc += info->bch_type ? BCH8_SIZE : BCH4_SIZE; + } +} + +/** + * elm_start_processing - start elm syndrome processing + * @info: elm info + * @err_vec: elm error vectors + * + * Set syndrome valid bit for syndrome fragment registers for which + * elm syndrome fragment registers are loaded. This enables elm module + * to start processing syndrome vectors. + */ +static void elm_start_processing(struct elm_info *info, + struct elm_errorvec *err_vec) +{ + int i, offset; + u32 reg_val; + + /* + * Set syndrome vector valid, so that ELM module + * will process it for vectors error is reported + */ + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + if (err_vec[i].error_reported) { + offset = ELM_SYNDROME_FRAGMENT_6 + + SYNDROME_FRAGMENT_REG_SIZE * i; + reg_val = elm_read_reg(info, offset); + reg_val |= ELM_SYNDROME_VALID; + elm_write_reg(info, offset, reg_val); + } + } +} + +/** + * elm_error_correction - locate correctable error position + * @info: elm info + * @err_vec: elm error vectors + * + * On completion of processing by elm module, error location status + * register updated with correctable/uncorrectable error information. + * In case of correctable errors, number of errors located from + * elm location status register & read the positions from + * elm error location register. + */ +static void elm_error_correction(struct elm_info *info, + struct elm_errorvec *err_vec) +{ + int i, j, errors = 0; + int offset; + u32 reg_val; + + for (i = 0; i < ERROR_VECTOR_MAX; i++) { + + /* Check error reported */ + if (err_vec[i].error_reported) { + offset = ELM_LOCATION_STATUS + ERROR_LOCATION_SIZE * i; + reg_val = elm_read_reg(info, offset); + + /* Check correctable error or not */ + if (reg_val & ECC_CORRECTABLE_MASK) { + offset = ELM_ERROR_LOCATION_0 + + ERROR_LOCATION_SIZE * i; + + /* Read count of correctable errors */ + err_vec[i].error_count = reg_val & + ECC_NB_ERRORS_MASK; + + /* Update the error locations in error vector */ + for (j = 0; j < err_vec[i].error_count; j++) { + + reg_val = elm_read_reg(info, offset); + err_vec[i].error_loc[j] = reg_val & + ECC_ERROR_LOCATION_MASK; + + /* Update error location register */ + offset += 4; + } + + errors += err_vec[i].error_count; + } else { + err_vec[i].error_uncorrectable = true; + } + + /* Clearing interrupts for processed error vectors */ + elm_write_reg(info, ELM_IRQSTATUS, BIT(i)); + + /* Disable page mode */ + elm_configure_page_mode(info, i, false); + } + } +} + +/** + * elm_decode_bch_error_page - Locate error position + * @dev: device pointer + * @ecc_calc: calculated ECC bytes from GPMC + * @err_vec: elm error vectors + * + * Called with one or more error reported vectors & vectors with + * error reported is updated in err_vec[].error_reported + */ +void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, + struct elm_errorvec *err_vec) +{ + struct elm_info *info = dev_get_drvdata(dev); + u32 reg_val; + + /* Enable page mode interrupt */ + reg_val = elm_read_reg(info, ELM_IRQSTATUS); + elm_write_reg(info, ELM_IRQSTATUS, reg_val & INTR_STATUS_PAGE_VALID); + elm_write_reg(info, ELM_IRQENABLE, INTR_EN_PAGE_MASK); + + /* Load valid ecc byte to syndrome fragment register */ + elm_load_syndrome(info, err_vec, ecc_calc); + + /* Enable syndrome processing for which syndrome fragment is updated */ + elm_start_processing(info, err_vec); + + /* Wait for ELM module to finish locating error correction */ + wait_for_completion(&info->elm_completion); + + /* Disable page mode interrupt */ + reg_val = elm_read_reg(info, ELM_IRQENABLE); + elm_write_reg(info, ELM_IRQENABLE, reg_val & ~INTR_EN_PAGE_MASK); + elm_error_correction(info, err_vec); +} +EXPORT_SYMBOL(elm_decode_bch_error_page); + +static irqreturn_t elm_isr(int this_irq, void *dev_id) +{ + u32 reg_val; + struct elm_info *info = dev_id; + + reg_val = elm_read_reg(info, ELM_IRQSTATUS); + + /* All error vectors processed */ + if (reg_val & INTR_STATUS_PAGE_VALID) { + elm_write_reg(info, ELM_IRQSTATUS, + reg_val & INTR_STATUS_PAGE_VALID); + complete(&info->elm_completion); + return IRQ_HANDLED; + } + + return IRQ_NONE; +} + +static int elm_probe(struct platform_device *pdev) +{ + int ret = 0; + struct resource *res, *irq; + struct elm_info *info; + + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&pdev->dev, "failed to allocate memory\n"); + return -ENOMEM; + } + + info->dev = &pdev->dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!irq) { + dev_err(&pdev->dev, "no irq resource defined\n"); + return -ENODEV; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource defined\n"); + return -ENODEV; + } + + info->elm_base = devm_request_and_ioremap(&pdev->dev, res); + if (!info->elm_base) + return -EADDRNOTAVAIL; + + ret = devm_request_irq(&pdev->dev, irq->start, elm_isr, 0, + pdev->name, info); + if (ret) { + dev_err(&pdev->dev, "failure requesting irq %i\n", irq->start); + return ret; + } + + pm_runtime_enable(&pdev->dev); + if (pm_runtime_get_sync(&pdev->dev)) { + ret = -EINVAL; + pm_runtime_disable(&pdev->dev); + dev_err(&pdev->dev, "can't enable clock\n"); + return ret; + } + + init_completion(&info->elm_completion); + INIT_LIST_HEAD(&info->list); + list_add(&info->list, &elm_devices); + platform_set_drvdata(pdev, info); + return ret; +} + +static int elm_remove(struct platform_device *pdev) +{ + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + platform_set_drvdata(pdev, NULL); + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id elm_of_match[] = { + { .compatible = "ti,am3352-elm" }, + {}, +}; +MODULE_DEVICE_TABLE(of, elm_of_match); +#endif + +static struct platform_driver elm_driver = { + .driver = { + .name = "elm", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(elm_of_match), + }, + .probe = elm_probe, + .remove = elm_remove, +}; + +module_platform_driver(elm_driver); + +MODULE_DESCRIPTION("ELM driver for BCH error correction"); +MODULE_AUTHOR("Texas Instruments"); +MODULE_ALIAS("platform: elm"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index 4eeeb2d..5b6b072 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -565,6 +565,96 @@ time_out: return ret; } +static int m25p80_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct m25p *flash = mtd_to_m25p(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int res = 0; + + mutex_lock(&flash->lock); + /* Wait until finished previous command */ + if (wait_till_ready(flash)) { + res = 1; + goto err; + } + + status_old = read_sr(flash); + + if (offset < flash->mtd.size-(flash->mtd.size/2)) + status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/4)) + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + else if (offset < flash->mtd.size-(flash->mtd.size/8)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/16)) + status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2; + else if (offset < flash->mtd.size-(flash->mtd.size/32)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset < flash->mtd.size-(flash->mtd.size/64)) + status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1; + else + status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0; + + /* Only modify protection if it will not unlock other areas */ + if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) > + (status_old&(SR_BP2|SR_BP1|SR_BP0))) { + write_enable(flash); + if (write_sr(flash, status_new) < 0) { + res = 1; + goto err; + } + } + +err: mutex_unlock(&flash->lock); + return res; +} + +static int m25p80_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) +{ + struct m25p *flash = mtd_to_m25p(mtd); + uint32_t offset = ofs; + uint8_t status_old, status_new; + int res = 0; + + mutex_lock(&flash->lock); + /* Wait until finished previous command */ + if (wait_till_ready(flash)) { + res = 1; + goto err; + } + + status_old = read_sr(flash); + + if (offset+len > flash->mtd.size-(flash->mtd.size/64)) + status_new = status_old & ~(SR_BP2|SR_BP1|SR_BP0); + else if (offset+len > flash->mtd.size-(flash->mtd.size/32)) + status_new = (status_old & ~(SR_BP2|SR_BP1)) | SR_BP0; + else if (offset+len > flash->mtd.size-(flash->mtd.size/16)) + status_new = (status_old & ~(SR_BP2|SR_BP0)) | SR_BP1; + else if (offset+len > flash->mtd.size-(flash->mtd.size/8)) + status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0; + else if (offset+len > flash->mtd.size-(flash->mtd.size/4)) + status_new = (status_old & ~(SR_BP0|SR_BP1)) | SR_BP2; + else if (offset+len > flash->mtd.size-(flash->mtd.size/2)) + status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0; + else + status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1; + + /* Only modify protection if it will not lock other areas */ + if ((status_new&(SR_BP2|SR_BP1|SR_BP0)) < + (status_old&(SR_BP2|SR_BP1|SR_BP0))) { + write_enable(flash); + if (write_sr(flash, status_new) < 0) { + res = 1; + goto err; + } + } + +err: mutex_unlock(&flash->lock); + return res; +} + /****************************************************************************/ /* @@ -642,6 +732,10 @@ static const struct spi_device_id m25p_ids[] = { /* Everspin */ { "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2) }, + /* GigaDevice */ + { "gd25q32", INFO(0xc84016, 0, 64 * 1024, 64, SECT_4K) }, + { "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) }, + /* Intel/Numonyx -- xxxs33b */ { "160s33b", INFO(0x898911, 0, 64 * 1024, 32, 0) }, { "320s33b", INFO(0x898912, 0, 64 * 1024, 64, 0) }, @@ -899,6 +993,12 @@ static int m25p_probe(struct spi_device *spi) flash->mtd._erase = m25p80_erase; flash->mtd._read = m25p80_read; + /* flash protection support for STmicro chips */ + if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) { + flash->mtd._lock = m25p80_lock; + flash->mtd._unlock = m25p80_unlock; + } + /* sst flash chips use AAI word program */ if (JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) flash->mtd._write = sst_write; diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 62ba82c..3ed17c4 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -429,7 +429,7 @@ config MTD_GPIO_ADDR config MTD_UCLINUX bool "Generic uClinux RAM/ROM filesystem support" - depends on MTD_RAM=y && (!MMU || COLDFIRE) + depends on (MTD_RAM=y || MTD_ROM=y) && (!MMU || COLDFIRE) help Map driver to support image based filesystems for uClinux. diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 7901d72..363939d 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -68,9 +68,6 @@ static int of_flash_remove(struct platform_device *dev) kfree(info->list[i].res); } } - - kfree(info); - return 0; } @@ -199,8 +196,9 @@ static int of_flash_probe(struct platform_device *dev) map_indirect = of_property_read_bool(dp, "no-unaligned-direct-access"); err = -ENOMEM; - info = kzalloc(sizeof(struct of_flash) + - sizeof(struct of_flash_list) * count, GFP_KERNEL); + info = devm_kzalloc(&dev->dev, + sizeof(struct of_flash) + + sizeof(struct of_flash_list) * count, GFP_KERNEL); if (!info) goto err_flash_remove; @@ -241,6 +239,7 @@ static int of_flash_probe(struct platform_device *dev) info->list[i].map.phys = res.start; info->list[i].map.size = res_size; info->list[i].map.bankwidth = be32_to_cpup(width); + info->list[i].map.device_node = dp; err = -ENOMEM; info->list[i].map.virt = ioremap(info->list[i].map.phys, diff --git a/drivers/mtd/maps/uclinux.c b/drivers/mtd/maps/uclinux.c index 299bf88..c1af83d 100644 --- a/drivers/mtd/maps/uclinux.c +++ b/drivers/mtd/maps/uclinux.c @@ -23,12 +23,26 @@ /****************************************************************************/ +#ifdef CONFIG_MTD_ROM +#define MAP_NAME "rom" +#else +#define MAP_NAME "ram" +#endif + +/* + * Blackfin uses uclinux_ram_map during startup, so it must not be static. + * Provide a dummy declaration to make sparse happy. + */ +extern struct map_info uclinux_ram_map; + struct map_info uclinux_ram_map = { - .name = "RAM", - .phys = (unsigned long)__bss_stop, + .name = MAP_NAME, .size = 0, }; +static unsigned long physaddr = -1; +module_param(physaddr, ulong, S_IRUGO); + static struct mtd_info *uclinux_ram_mtdinfo; /****************************************************************************/ @@ -60,11 +74,17 @@ static int __init uclinux_mtd_init(void) struct map_info *mapp; mapp = &uclinux_ram_map; + + if (physaddr == -1) + mapp->phys = (resource_size_t)__bss_stop; + else + mapp->phys = physaddr; + if (!mapp->size) mapp->size = PAGE_ALIGN(ntohl(*((unsigned long *)(mapp->phys + 8)))); mapp->bankwidth = 4; - printk("uclinux[mtd]: RAM probe address=0x%x size=0x%x\n", + printk("uclinux[mtd]: probe address=0x%x size=0x%x\n", (int) mapp->phys, (int) mapp->size); /* @@ -82,7 +102,7 @@ static int __init uclinux_mtd_init(void) simple_map_init(mapp); - mtd = do_map_probe("map_ram", mapp); + mtd = do_map_probe("map_" MAP_NAME, mapp); if (!mtd) { printk("uclinux[mtd]: failed to find a mapping?\n"); return(-ENXIO); @@ -118,6 +138,6 @@ module_exit(uclinux_mtd_cleanup); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Greg Ungerer <gerg@snapgear.com>"); -MODULE_DESCRIPTION("Generic RAM based MTD for uClinux"); +MODULE_DESCRIPTION("Generic MTD for uClinux"); /****************************************************************************/ diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index c516a94..ffcbcca 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -101,6 +101,8 @@ struct atmel_nand_host { u8 pmecc_corr_cap; u16 pmecc_sector_size; u32 pmecc_lookup_table_offset; + u32 pmecc_lookup_table_offset_512; + u32 pmecc_lookup_table_offset_1024; int pmecc_bytes_per_sector; int pmecc_sector_number; @@ -908,6 +910,84 @@ static void atmel_pmecc_core_init(struct mtd_info *mtd) pmecc_writel(host->ecc, CTRL, PMECC_CTRL_ENABLE); } +/* + * Get ECC requirement in ONFI parameters, returns -1 if ONFI + * parameters is not supported. + * return 0 if success to get the ECC requirement. + */ +static int get_onfi_ecc_param(struct nand_chip *chip, + int *ecc_bits, int *sector_size) +{ + *ecc_bits = *sector_size = 0; + + if (chip->onfi_params.ecc_bits == 0xff) + /* TODO: the sector_size and ecc_bits need to be find in + * extended ecc parameter, currently we don't support it. + */ + return -1; + + *ecc_bits = chip->onfi_params.ecc_bits; + + /* The default sector size (ecc codeword size) is 512 */ + *sector_size = 512; + + return 0; +} + +/* + * Get ecc requirement from ONFI parameters ecc requirement. + * If pmecc-cap, pmecc-sector-size in DTS are not specified, this function + * will set them according to ONFI ecc requirement. Otherwise, use the + * value in DTS file. + * return 0 if success. otherwise return error code. + */ +static int pmecc_choose_ecc(struct atmel_nand_host *host, + int *cap, int *sector_size) +{ + /* Get ECC requirement from ONFI parameters */ + *cap = *sector_size = 0; + if (host->nand_chip.onfi_version) { + if (!get_onfi_ecc_param(&host->nand_chip, cap, sector_size)) + dev_info(host->dev, "ONFI params, minimum required ECC: %d bits in %d bytes\n", + *cap, *sector_size); + else + dev_info(host->dev, "NAND chip ECC reqirement is in Extended ONFI parameter, we don't support yet.\n"); + } else { + dev_info(host->dev, "NAND chip is not ONFI compliant, assume ecc_bits is 2 in 512 bytes"); + } + if (*cap == 0 && *sector_size == 0) { + *cap = 2; + *sector_size = 512; + } + + /* If dts file doesn't specify then use the one in ONFI parameters */ + if (host->pmecc_corr_cap == 0) { + /* use the most fitable ecc bits (the near bigger one ) */ + if (*cap <= 2) + host->pmecc_corr_cap = 2; + else if (*cap <= 4) + host->pmecc_corr_cap = 4; + else if (*cap < 8) + host->pmecc_corr_cap = 8; + else if (*cap < 12) + host->pmecc_corr_cap = 12; + else if (*cap < 24) + host->pmecc_corr_cap = 24; + else + return -EINVAL; + } + if (host->pmecc_sector_size == 0) { + /* use the most fitable sector size (the near smaller one ) */ + if (*sector_size >= 1024) + host->pmecc_sector_size = 1024; + else if (*sector_size >= 512) + host->pmecc_sector_size = 512; + else + return -EINVAL; + } + return 0; +} + static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev, struct atmel_nand_host *host) { @@ -916,8 +996,22 @@ static int __init atmel_pmecc_nand_init_params(struct platform_device *pdev, struct resource *regs, *regs_pmerr, *regs_rom; int cap, sector_size, err_no; + err_no = pmecc_choose_ecc(host, &cap, §or_size); + if (err_no) { + dev_err(host->dev, "The NAND flash's ECC requirement are not support!"); + return err_no; + } + + if (cap != host->pmecc_corr_cap || + sector_size != host->pmecc_sector_size) + dev_info(host->dev, "WARNING: Be Caution! Using different PMECC parameters from Nand ONFI ECC reqirement.\n"); + cap = host->pmecc_corr_cap; sector_size = host->pmecc_sector_size; + host->pmecc_lookup_table_offset = (sector_size == 512) ? + host->pmecc_lookup_table_offset_512 : + host->pmecc_lookup_table_offset_1024; + dev_info(host->dev, "Initialize PMECC params, cap: %d, sector: %d\n", cap, sector_size); @@ -1215,7 +1309,7 @@ static void atmel_nand_hwctl(struct mtd_info *mtd, int mode) static int atmel_of_init_port(struct atmel_nand_host *host, struct device_node *np) { - u32 val, table_offset; + u32 val; u32 offset[2]; int ecc_mode; struct atmel_nand_data *board = &host->board; @@ -1259,42 +1353,41 @@ static int atmel_of_init_port(struct atmel_nand_host *host, /* use PMECC, get correction capability, sector size and lookup * table offset. + * If correction bits and sector size are not specified, then find + * them from NAND ONFI parameters. */ - if (of_property_read_u32(np, "atmel,pmecc-cap", &val) != 0) { - dev_err(host->dev, "Cannot decide PMECC Capability\n"); - return -EINVAL; - } else if ((val != 2) && (val != 4) && (val != 8) && (val != 12) && - (val != 24)) { - dev_err(host->dev, - "Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n", - val); - return -EINVAL; + if (of_property_read_u32(np, "atmel,pmecc-cap", &val) == 0) { + if ((val != 2) && (val != 4) && (val != 8) && (val != 12) && + (val != 24)) { + dev_err(host->dev, + "Unsupported PMECC correction capability: %d; should be 2, 4, 8, 12 or 24\n", + val); + return -EINVAL; + } + host->pmecc_corr_cap = (u8)val; } - host->pmecc_corr_cap = (u8)val; - if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) != 0) { - dev_err(host->dev, "Cannot decide PMECC Sector Size\n"); - return -EINVAL; - } else if ((val != 512) && (val != 1024)) { - dev_err(host->dev, - "Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n", - val); - return -EINVAL; + if (of_property_read_u32(np, "atmel,pmecc-sector-size", &val) == 0) { + if ((val != 512) && (val != 1024)) { + dev_err(host->dev, + "Unsupported PMECC sector size: %d; should be 512 or 1024 bytes\n", + val); + return -EINVAL; + } + host->pmecc_sector_size = (u16)val; } - host->pmecc_sector_size = (u16)val; if (of_property_read_u32_array(np, "atmel,pmecc-lookup-table-offset", offset, 2) != 0) { dev_err(host->dev, "Cannot get PMECC lookup table offset\n"); return -EINVAL; } - table_offset = host->pmecc_sector_size == 512 ? offset[0] : offset[1]; - - if (!table_offset) { + if (!offset[0] && !offset[1]) { dev_err(host->dev, "Invalid PMECC lookup table offset\n"); return -EINVAL; } - host->pmecc_lookup_table_offset = table_offset; + host->pmecc_lookup_table_offset_512 = offset[0]; + host->pmecc_lookup_table_offset_1024 = offset[1]; return 0; } diff --git a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h index 0bdb2ce..c005a62 100644 --- a/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h +++ b/drivers/mtd/nand/bcm47xxnflash/bcm47xxnflash.h @@ -1,6 +1,10 @@ #ifndef __BCM47XXNFLASH_H #define __BCM47XXNFLASH_H +#ifndef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#endif + #include <linux/mtd/mtd.h> #include <linux/mtd/nand.h> diff --git a/drivers/mtd/nand/bcm47xxnflash/main.c b/drivers/mtd/nand/bcm47xxnflash/main.c index 8363a9a..7bae569 100644 --- a/drivers/mtd/nand/bcm47xxnflash/main.c +++ b/drivers/mtd/nand/bcm47xxnflash/main.c @@ -9,14 +9,14 @@ * */ +#include "bcm47xxnflash.h" + #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/platform_device.h> #include <linux/bcma/bcma.h> -#include "bcm47xxnflash.h" - MODULE_DESCRIPTION("NAND flash driver for BCMA bus"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("RafaÅ‚ MiÅ‚ecki"); @@ -77,6 +77,7 @@ static int bcm47xxnflash_remove(struct platform_device *pdev) } static struct platform_driver bcm47xxnflash_driver = { + .probe = bcm47xxnflash_probe, .remove = bcm47xxnflash_remove, .driver = { .name = "bcma_nflash", @@ -88,13 +89,10 @@ static int __init bcm47xxnflash_init(void) { int err; - /* - * Platform device "bcma_nflash" exists on SoCs and is registered very - * early, it won't be added during runtime (use platform_driver_probe). - */ - err = platform_driver_probe(&bcm47xxnflash_driver, bcm47xxnflash_probe); + err = platform_driver_register(&bcm47xxnflash_driver); if (err) - pr_err("Failed to register serial flash driver: %d\n", err); + pr_err("Failed to register bcm47xx nand flash driver: %d\n", + err); return err; } diff --git a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c index 595de40..b2ab373 100644 --- a/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c +++ b/drivers/mtd/nand/bcm47xxnflash/ops_bcm4706.c @@ -9,13 +9,13 @@ * */ +#include "bcm47xxnflash.h" + #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/bcma/bcma.h> -#include "bcm47xxnflash.h" - /* Broadcom uses 1'000'000 but it seems to be too many. Tests on WNDR4500 has * shown ~1000 retries as maxiumum. */ #define NFLASH_READY_RETRIES 10000 diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c index feae55c..94e17af 100644 --- a/drivers/mtd/nand/davinci_nand.c +++ b/drivers/mtd/nand/davinci_nand.c @@ -606,7 +606,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev) if (pdev->id < 0 || pdev->id > 3) return -ENODEV; - info = kzalloc(sizeof(*info), GFP_KERNEL); + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); if (!info) { dev_err(&pdev->dev, "unable to allocate memory\n"); ret = -ENOMEM; @@ -623,11 +623,11 @@ static int __init nand_davinci_probe(struct platform_device *pdev) goto err_nomem; } - vaddr = ioremap(res1->start, resource_size(res1)); - base = ioremap(res2->start, resource_size(res2)); + vaddr = devm_request_and_ioremap(&pdev->dev, res1); + base = devm_request_and_ioremap(&pdev->dev, res2); if (!vaddr || !base) { dev_err(&pdev->dev, "ioremap failed\n"); - ret = -EINVAL; + ret = -EADDRNOTAVAIL; goto err_ioremap; } @@ -717,7 +717,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev) } info->chip.ecc.mode = ecc_mode; - info->clk = clk_get(&pdev->dev, "aemif"); + info->clk = devm_clk_get(&pdev->dev, "aemif"); if (IS_ERR(info->clk)) { ret = PTR_ERR(info->clk); dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret); @@ -845,8 +845,6 @@ err_timing: clk_disable_unprepare(info->clk); err_clk_enable: - clk_put(info->clk); - spin_lock_irq(&davinci_nand_lock); if (ecc_mode == NAND_ECC_HW_SYNDROME) ecc4_busy = false; @@ -855,13 +853,7 @@ err_clk_enable: err_ecc: err_clk: err_ioremap: - if (base) - iounmap(base); - if (vaddr) - iounmap(vaddr); - err_nomem: - kfree(info); return ret; } @@ -874,15 +866,9 @@ static int __exit nand_davinci_remove(struct platform_device *pdev) ecc4_busy = false; spin_unlock_irq(&davinci_nand_lock); - iounmap(info->base); - iounmap(info->vaddr); - nand_release(&info->mtd); clk_disable_unprepare(info->clk); - clk_put(info->clk); - - kfree(info); return 0; } diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index ad62226..f1f7f12 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -176,8 +176,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob) ifc_nand_ctrl->page = page_addr; /* Program ROW0/COL0 */ - out_be32(&ifc->ifc_nand.row0, page_addr); - out_be32(&ifc->ifc_nand.col0, (oob ? IFC_NAND_COL_MS : 0) | column); + iowrite32be(page_addr, &ifc->ifc_nand.row0); + iowrite32be((oob ? IFC_NAND_COL_MS : 0) | column, &ifc->ifc_nand.col0); buf_num = page_addr & priv->bufnum_mask; @@ -239,18 +239,19 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int i; /* set the chip select for NAND Transaction */ - out_be32(&ifc->ifc_nand.nand_csel, priv->bank << IFC_NAND_CSEL_SHIFT); + iowrite32be(priv->bank << IFC_NAND_CSEL_SHIFT, + &ifc->ifc_nand.nand_csel); dev_vdbg(priv->dev, "%s: fir0=%08x fcr0=%08x\n", __func__, - in_be32(&ifc->ifc_nand.nand_fir0), - in_be32(&ifc->ifc_nand.nand_fcr0)); + ioread32be(&ifc->ifc_nand.nand_fir0), + ioread32be(&ifc->ifc_nand.nand_fcr0)); ctrl->nand_stat = 0; /* start read/write seq */ - out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT); + iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -273,7 +274,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int sector_end = sector + chip->ecc.steps - 1; for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = in_be32(&ifc->ifc_nand.nand_eccstat[i]); + eccstat[i] = ioread32be(&ifc->ifc_nand.nand_eccstat[i]); for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); @@ -313,31 +314,33 @@ static void fsl_ifc_do_read(struct nand_chip *chip, /* Program FIR/IFC_NAND_FCR0 for Small/Large page */ if (mtd->writesize > 512) { - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, 0x0); - - out_be32(&ifc->ifc_nand.nand_fcr0, - (NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT)); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); + + iowrite32be((NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); } else { - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, 0x0); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); if (oob) - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READOOB << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(NAND_CMD_READOOB << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); else - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(NAND_CMD_READ0 << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); } } @@ -357,7 +360,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, switch (command) { /* READ0 read the entire buffer to use hardware ECC. */ case NAND_CMD_READ0: - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, page_addr, 0); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -372,7 +375,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* READOOB reads only the OOB because no ECC is performed. */ case NAND_CMD_READOOB: - out_be32(&ifc->ifc_nand.nand_fbcr, mtd->oobsize - column); + iowrite32be(mtd->oobsize - column, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, column, page_addr, 1); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -388,19 +391,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, if (command == NAND_CMD_PARAM) timing = IFC_FIR_OP_RBCD; - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (timing << IFC_NAND_FIR0_OP2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - command << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.row3, column); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (timing << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(command << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(column, &ifc->ifc_nand.row3); /* * although currently it's 8 bytes for READID, we always read * the maximum 256 bytes(for PARAM) */ - out_be32(&ifc->ifc_nand.nand_fbcr, 256); + iowrite32be(256, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 256; set_addr(mtd, 0, 0, 0); @@ -415,16 +418,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* ERASE2 uses the block and page address from ERASE1 */ case NAND_CMD_ERASE2: - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT)); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); - out_be32(&ifc->ifc_nand.nand_fcr0, - (NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT)); + iowrite32be((NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 0; fsl_ifc_run_command(mtd); return; @@ -440,26 +443,28 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, (NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD0_SHIFT) | (NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT); - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT)); + iowrite32be( + (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_CW1 << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); } else { nand_fcr0 = ((NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT) | (NAND_CMD_SEQIN << IFC_NAND_FCR0_CMD2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fir1, - (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT)); + iowrite32be( + (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT, + &ifc->ifc_nand.nand_fir1); if (column >= mtd->writesize) nand_fcr0 |= @@ -474,7 +479,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, column -= mtd->writesize; ifc_nand_ctrl->oob = 1; } - out_be32(&ifc->ifc_nand.nand_fcr0, nand_fcr0); + iowrite32be(nand_fcr0, &ifc->ifc_nand.nand_fcr0); set_addr(mtd, column, page_addr, ifc_nand_ctrl->oob); return; } @@ -482,10 +487,11 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* PAGEPROG reuses all of the setup from SEQIN and adds the length */ case NAND_CMD_PAGEPROG: { if (ifc_nand_ctrl->oob) { - out_be32(&ifc->ifc_nand.nand_fbcr, - ifc_nand_ctrl->index - ifc_nand_ctrl->column); + iowrite32be(ifc_nand_ctrl->index - + ifc_nand_ctrl->column, + &ifc->ifc_nand.nand_fbcr); } else { - out_be32(&ifc->ifc_nand.nand_fbcr, 0); + iowrite32be(0, &ifc->ifc_nand.nand_fbcr); } fsl_ifc_run_command(mtd); @@ -493,12 +499,12 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, } case NAND_CMD_STATUS: - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fbcr, 1); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; @@ -512,10 +518,10 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, return; case NAND_CMD_RESET: - out_be32(&ifc->ifc_nand.nand_fir0, - IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT); + iowrite32be(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT, + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); fsl_ifc_run_command(mtd); return; @@ -639,18 +645,18 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) u32 nand_fsr; /* Use READ_STATUS command, but wait for the device to be ready */ - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, NAND_CMD_STATUS << - IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.nand_fbcr, 1); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; fsl_ifc_run_command(mtd); - nand_fsr = in_be32(&ifc->ifc_nand.nand_fsr); + nand_fsr = ioread32be(&ifc->ifc_nand.nand_fsr); /* * The chip always seems to report that it is @@ -744,34 +750,34 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) uint32_t cs = priv->bank; /* Save CSOR and CSOR_ext */ - csor = in_be32(&ifc->csor_cs[cs].csor); - csor_ext = in_be32(&ifc->csor_cs[cs].csor_ext); + csor = ioread32be(&ifc->csor_cs[cs].csor); + csor_ext = ioread32be(&ifc->csor_cs[cs].csor_ext); /* chage PageSize 8K and SpareSize 1K*/ csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000; - out_be32(&ifc->csor_cs[cs].csor, csor_8k); - out_be32(&ifc->csor_cs[cs].csor_ext, 0x0000400); + iowrite32be(csor_8k, &ifc->csor_cs[cs].csor); + iowrite32be(0x0000400, &ifc->csor_cs[cs].csor_ext); /* READID */ - out_be32(&ifc->ifc_nand.nand_fir0, - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT)); - out_be32(&ifc->ifc_nand.nand_fcr0, - NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT); - out_be32(&ifc->ifc_nand.row3, 0x0); + iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + iowrite32be(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + iowrite32be(0x0, &ifc->ifc_nand.row3); - out_be32(&ifc->ifc_nand.nand_fbcr, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.nand_fbcr); /* Program ROW0/COL0 */ - out_be32(&ifc->ifc_nand.row0, 0x0); - out_be32(&ifc->ifc_nand.col0, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.row0); + iowrite32be(0x0, &ifc->ifc_nand.col0); /* set the chip select for NAND Transaction */ - out_be32(&ifc->ifc_nand.nand_csel, cs << IFC_NAND_CSEL_SHIFT); + iowrite32be(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel); /* start read seq */ - out_be32(&ifc->ifc_nand.nandseq_strt, IFC_NAND_SEQ_STRT_FIR_STRT); + iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -781,8 +787,8 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n"); /* Restore CSOR and CSOR_ext */ - out_be32(&ifc->csor_cs[cs].csor, csor); - out_be32(&ifc->csor_cs[cs].csor_ext, csor_ext); + iowrite32be(csor, &ifc->csor_cs[cs].csor); + iowrite32be(csor_ext, &ifc->csor_cs[cs].csor_ext); } static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) @@ -799,7 +805,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) /* fill in nand_chip structure */ /* set up function call table */ - if ((in_be32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) + if ((ioread32be(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) chip->read_byte = fsl_ifc_read_byte16; else chip->read_byte = fsl_ifc_read_byte; @@ -813,13 +819,13 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->bbt_td = &bbt_main_descr; chip->bbt_md = &bbt_mirror_descr; - out_be32(&ifc->ifc_nand.ncfgr, 0x0); + iowrite32be(0x0, &ifc->ifc_nand.ncfgr); /* set up nand options */ chip->bbt_options = NAND_BBT_USE_FLASH; - if (in_be32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { + if (ioread32be(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { chip->read_byte = fsl_ifc_read_byte16; chip->options |= NAND_BUSWIDTH_16; } else { @@ -832,7 +838,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.read_page = fsl_ifc_read_page; chip->ecc.write_page = fsl_ifc_write_page; - csor = in_be32(&ifc->csor_cs[priv->bank].csor); + csor = ioread32be(&ifc->csor_cs[priv->bank].csor); /* Hardware generates ECC per 512 Bytes */ chip->ecc.size = 512; @@ -884,7 +890,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.mode = NAND_ECC_SOFT; } - ver = in_be32(&ifc->ifc_rev); + ver = ioread32be(&ifc->ifc_rev); if (ver == FSL_IFC_V1_1_0) fsl_ifc_sram_init(priv); @@ -910,7 +916,7 @@ static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv) static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank, phys_addr_t addr) { - u32 cspr = in_be32(&ifc->cspr_cs[bank].cspr); + u32 cspr = ioread32be(&ifc->cspr_cs[bank].cspr); if (!(cspr & CSPR_V)) return 0; @@ -997,17 +1003,16 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) dev_set_drvdata(priv->dev, priv); - out_be32(&ifc->ifc_nand.nand_evter_en, - IFC_NAND_EVTER_EN_OPC_EN | - IFC_NAND_EVTER_EN_FTOER_EN | - IFC_NAND_EVTER_EN_WPER_EN); + iowrite32be(IFC_NAND_EVTER_EN_OPC_EN | + IFC_NAND_EVTER_EN_FTOER_EN | + IFC_NAND_EVTER_EN_WPER_EN, + &ifc->ifc_nand.nand_evter_en); /* enable NAND Machine Interrupts */ - out_be32(&ifc->ifc_nand.nand_evter_intr_en, - IFC_NAND_EVTER_INTR_OPCIR_EN | - IFC_NAND_EVTER_INTR_FTOERIR_EN | - IFC_NAND_EVTER_INTR_WPERIR_EN); - + iowrite32be(IFC_NAND_EVTER_INTR_OPCIR_EN | + IFC_NAND_EVTER_INTR_FTOERIR_EN | + IFC_NAND_EVTER_INTR_WPERIR_EN, + &ifc->ifc_nand.nand_evter_intr_en); priv->mtd.name = kasprintf(GFP_KERNEL, "%x.flash", (unsigned)res.start); if (!priv->mtd.name) { ret = -ENOMEM; diff --git a/drivers/mtd/nand/gpmi-nand/bch-regs.h b/drivers/mtd/nand/gpmi-nand/bch-regs.h index a092451..588f537 100644 --- a/drivers/mtd/nand/gpmi-nand/bch-regs.h +++ b/drivers/mtd/nand/gpmi-nand/bch-regs.h @@ -61,6 +61,16 @@ & BM_BCH_FLASH0LAYOUT0_ECC0) \ ) +#define MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14 10 +#define MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14 \ + (0x1 << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14) +#define BF_BCH_FLASH0LAYOUT0_GF(v, x) \ + ((GPMI_IS_MX6Q(x) && ((v) == 14)) \ + ? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT0_GF_13_14) \ + & MX6Q_BM_BCH_FLASH0LAYOUT0_GF_13_14) \ + : 0 \ + ) + #define BP_BCH_FLASH0LAYOUT0_DATA0_SIZE 0 #define BM_BCH_FLASH0LAYOUT0_DATA0_SIZE \ (0xfff << BP_BCH_FLASH0LAYOUT0_DATA0_SIZE) @@ -93,6 +103,16 @@ & BM_BCH_FLASH0LAYOUT1_ECCN) \ ) +#define MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14 10 +#define MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14 \ + (0x1 << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14) +#define BF_BCH_FLASH0LAYOUT1_GF(v, x) \ + ((GPMI_IS_MX6Q(x) && ((v) == 14)) \ + ? (((1) << MX6Q_BP_BCH_FLASH0LAYOUT1_GF_13_14) \ + & MX6Q_BM_BCH_FLASH0LAYOUT1_GF_13_14) \ + : 0 \ + ) + #define BP_BCH_FLASH0LAYOUT1_DATAN_SIZE 0 #define BM_BCH_FLASH0LAYOUT1_DATAN_SIZE \ (0xfff << BP_BCH_FLASH0LAYOUT1_DATAN_SIZE) @@ -103,4 +123,6 @@ ? (((v) >> 2) & MX6Q_BM_BCH_FLASH0LAYOUT1_DATAN_SIZE) \ : ((v) & BM_BCH_FLASH0LAYOUT1_DATAN_SIZE) \ ) + +#define HW_BCH_VERSION 0x00000160 #endif diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index d84699c..4f8857f 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -208,6 +208,11 @@ void gpmi_dump_info(struct gpmi_nand_data *this) } /* start to print out the BCH info */ + pr_err("Show BCH registers :\n"); + for (i = 0; i <= HW_BCH_VERSION / 0x10 + 1; i++) { + reg = readl(r->bch_regs + i * 0x10); + pr_err("offset 0x%.3x : 0x%.8x\n", i * 0x10, reg); + } pr_err("BCH Geometry :\n"); pr_err("GF length : %u\n", geo->gf_len); pr_err("ECC Strength : %u\n", geo->ecc_strength); @@ -232,6 +237,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) unsigned int metadata_size; unsigned int ecc_strength; unsigned int page_size; + unsigned int gf_len; int ret; if (common_nfc_set_geometry(this)) @@ -242,6 +248,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) metadata_size = bch_geo->metadata_size; ecc_strength = bch_geo->ecc_strength >> 1; page_size = bch_geo->page_size; + gf_len = bch_geo->gf_len; ret = gpmi_enable_clk(this); if (ret) @@ -263,11 +270,13 @@ int bch_set_geometry(struct gpmi_nand_data *this) writel(BF_BCH_FLASH0LAYOUT0_NBLOCKS(block_count) | BF_BCH_FLASH0LAYOUT0_META_SIZE(metadata_size) | BF_BCH_FLASH0LAYOUT0_ECC0(ecc_strength, this) + | BF_BCH_FLASH0LAYOUT0_GF(gf_len, this) | BF_BCH_FLASH0LAYOUT0_DATA0_SIZE(block_size, this), r->bch_regs + HW_BCH_FLASH0LAYOUT0); writel(BF_BCH_FLASH0LAYOUT1_PAGE_SIZE(page_size) | BF_BCH_FLASH0LAYOUT1_ECCN(ecc_strength, this) + | BF_BCH_FLASH0LAYOUT1_GF(gf_len, this) | BF_BCH_FLASH0LAYOUT1_DATAN_SIZE(block_size, this), r->bch_regs + HW_BCH_FLASH0LAYOUT1); diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index e9b1c47..717881a 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -94,6 +94,25 @@ static inline int get_ecc_strength(struct gpmi_nand_data *this) return round_down(ecc_strength, 2); } +static inline bool gpmi_check_ecc(struct gpmi_nand_data *this) +{ + struct bch_geometry *geo = &this->bch_geometry; + + /* Do the sanity check. */ + if (GPMI_IS_MX23(this) || GPMI_IS_MX28(this)) { + /* The mx23/mx28 only support the GF13. */ + if (geo->gf_len == 14) + return false; + + if (geo->ecc_strength > MXS_ECC_STRENGTH_MAX) + return false; + } else if (GPMI_IS_MX6Q(this)) { + if (geo->ecc_strength > MX6_ECC_STRENGTH_MAX) + return false; + } + return true; +} + int common_nfc_set_geometry(struct gpmi_nand_data *this) { struct bch_geometry *geo = &this->bch_geometry; @@ -112,17 +131,24 @@ int common_nfc_set_geometry(struct gpmi_nand_data *this) /* The default for the length of Galois Field. */ geo->gf_len = 13; - /* The default for chunk size. There is no oobsize greater then 512. */ + /* The default for chunk size. */ geo->ecc_chunk_size = 512; - while (geo->ecc_chunk_size < mtd->oobsize) + while (geo->ecc_chunk_size < mtd->oobsize) { geo->ecc_chunk_size *= 2; /* keep C >= O */ + geo->gf_len = 14; + } geo->ecc_chunk_count = mtd->writesize / geo->ecc_chunk_size; /* We use the same ECC strength for all chunks. */ geo->ecc_strength = get_ecc_strength(this); - if (!geo->ecc_strength) { - pr_err("wrong ECC strength.\n"); + if (!gpmi_check_ecc(this)) { + dev_err(this->dev, + "We can not support this nand chip." + " Its required ecc strength(%d) is beyond our" + " capability(%d).\n", geo->ecc_strength, + (GPMI_IS_MX6Q(this) ? MX6_ECC_STRENGTH_MAX + : MXS_ECC_STRENGTH_MAX)); return -EINVAL; } @@ -920,8 +946,7 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, dma_addr_t auxiliary_phys; unsigned int i; unsigned char *status; - unsigned int failed; - unsigned int corrected; + unsigned int max_bitflips = 0; int ret; pr_debug("page number is : %d\n", page); @@ -945,35 +970,25 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, payload_virt, payload_phys); if (ret) { pr_err("Error in ECC-based read: %d\n", ret); - goto exit_nfc; + return ret; } /* handle the block mark swapping */ block_mark_swapping(this, payload_virt, auxiliary_virt); /* Loop over status bytes, accumulating ECC status. */ - failed = 0; - corrected = 0; - status = auxiliary_virt + nfc_geo->auxiliary_status_offset; + status = auxiliary_virt + nfc_geo->auxiliary_status_offset; for (i = 0; i < nfc_geo->ecc_chunk_count; i++, status++) { if ((*status == STATUS_GOOD) || (*status == STATUS_ERASED)) continue; if (*status == STATUS_UNCORRECTABLE) { - failed++; + mtd->ecc_stats.failed++; continue; } - corrected += *status; - } - - /* - * Propagate ECC status to the owning MTD only when failed or - * corrected times nearly reaches our ECC correction threshold. - */ - if (failed || corrected >= (nfc_geo->ecc_strength - 1)) { - mtd->ecc_stats.failed += failed; - mtd->ecc_stats.corrected += corrected; + mtd->ecc_stats.corrected += *status; + max_bitflips = max_t(unsigned int, max_bitflips, *status); } if (oob_required) { @@ -995,8 +1010,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip, this->payload_virt, this->payload_phys, nfc_geo->payload_size, payload_virt, payload_phys); -exit_nfc: - return ret; + + return max_bitflips; } static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip, @@ -1668,8 +1683,8 @@ exit_nfc_init: release_resources(this); exit_acquire_resources: platform_set_drvdata(pdev, NULL); - kfree(this); dev_err(this->dev, "driver registration failed: %d\n", ret); + kfree(this); return ret; } diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h index 3d93a5e..0729477 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h @@ -284,6 +284,10 @@ extern int gpmi_read_page(struct gpmi_nand_data *, #define STATUS_ERASED 0xff #define STATUS_UNCORRECTABLE 0xfe +/* BCH's bit correction capability. */ +#define MXS_ECC_STRENGTH_MAX 20 /* mx23 and mx28 */ +#define MX6_ECC_STRENGTH_MAX 40 + /* Use the platform_id to distinguish different Archs. */ #define IS_MX23 0x0 #define IS_MX28 0x1 diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 60ac5b9..07e5784 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -530,12 +530,23 @@ static void send_page_v1(struct mtd_info *mtd, unsigned int ops) static void send_read_id_v3(struct mxc_nand_host *host) { + struct nand_chip *this = &host->nand; + /* Read ID into main buffer */ writel(NFC_ID, NFC_V3_LAUNCH); wait_op_done(host, true); memcpy32_fromio(host->data_buf, host->main_area0, 16); + + if (this->options & NAND_BUSWIDTH_16) { + /* compress the ID info */ + host->data_buf[1] = host->data_buf[2]; + host->data_buf[2] = host->data_buf[4]; + host->data_buf[3] = host->data_buf[6]; + host->data_buf[4] = host->data_buf[8]; + host->data_buf[5] = host->data_buf[10]; + } } /* Request the NANDFC to perform a read of the NAND device ID. */ diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 3766682..4321415 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -825,13 +825,8 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip, static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) { - unsigned long timeo = jiffies; int status, state = chip->state; - - if (state == FL_ERASING) - timeo += (HZ * 400) / 1000; - else - timeo += (HZ * 20) / 1000; + unsigned long timeo = (state == FL_ERASING ? 400 : 20); led_trigger_event(nand_led_trigger, LED_FULL); @@ -849,6 +844,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip) if (in_interrupt() || oops_in_progress) panic_nand_wait(mtd, chip, timeo); else { + timeo = jiffies + msecs_to_jiffies(timeo); while (time_before(jiffies, timeo)) { if (chip->dev_ready) { if (chip->dev_ready(mtd)) diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c index b7cfe0d..053c9a2 100644 --- a/drivers/mtd/nand/nand_ecc.c +++ b/drivers/mtd/nand/nand_ecc.c @@ -55,8 +55,7 @@ struct mtd_info; #define MODULE_AUTHOR(x) /* x */ #define MODULE_DESCRIPTION(x) /* x */ -#define printk printf -#define KERN_ERR "" +#define pr_err printf #endif /* @@ -507,7 +506,7 @@ int __nand_correct_data(unsigned char *buf, if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1) return 1; /* error in ECC data; no action needed */ - printk(KERN_ERR "uncorrectable error : "); + pr_err("%s: uncorrectable ECC error", __func__); return -1; } EXPORT_SYMBOL(__nand_correct_data); diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index 8f30d38..891c52a 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -1468,12 +1468,12 @@ int do_read_error(struct nandsim *ns, int num) void do_bit_flips(struct nandsim *ns, int num) { - if (bitflips && random32() < (1 << 22)) { + if (bitflips && prandom_u32() < (1 << 22)) { int flips = 1; if (bitflips > 1) - flips = (random32() % (int) bitflips) + 1; + flips = (prandom_u32() % (int) bitflips) + 1; while (flips--) { - int pos = random32() % (num * 8); + int pos = prandom_u32() % (num * 8); ns->buf.byte[pos / 8] ^= (1 << (pos % 8)); NS_WARN("read_page: flipping bit %d in page %d " "reading from %d ecc: corrected=%u failed=%u\n", diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 1d333497c..8e820dd 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -22,9 +22,12 @@ #include <linux/omap-dma.h> #include <linux/io.h> #include <linux/slab.h> +#include <linux/of.h> +#include <linux/of_device.h> #ifdef CONFIG_MTD_NAND_OMAP_BCH #include <linux/bch.h> +#include <linux/platform_data/elm.h> #endif #include <linux/platform_data/mtd-nand-omap2.h> @@ -117,6 +120,33 @@ #define OMAP24XX_DMA_GPMC 4 +#define BCH8_MAX_ERROR 8 /* upto 8 bit correctable */ +#define BCH4_MAX_ERROR 4 /* upto 4 bit correctable */ + +#define SECTOR_BYTES 512 +/* 4 bit padding to make byte aligned, 56 = 52 + 4 */ +#define BCH4_BIT_PAD 4 +#define BCH8_ECC_MAX ((SECTOR_BYTES + BCH8_ECC_OOB_BYTES) * 8) +#define BCH4_ECC_MAX ((SECTOR_BYTES + BCH4_ECC_OOB_BYTES) * 8) + +/* GPMC ecc engine settings for read */ +#define BCH_WRAPMODE_1 1 /* BCH wrap mode 1 */ +#define BCH8R_ECC_SIZE0 0x1a /* ecc_size0 = 26 */ +#define BCH8R_ECC_SIZE1 0x2 /* ecc_size1 = 2 */ +#define BCH4R_ECC_SIZE0 0xd /* ecc_size0 = 13 */ +#define BCH4R_ECC_SIZE1 0x3 /* ecc_size1 = 3 */ + +/* GPMC ecc engine settings for write */ +#define BCH_WRAPMODE_6 6 /* BCH wrap mode 6 */ +#define BCH_ECC_SIZE0 0x0 /* ecc_size0 = 0, no oob protection */ +#define BCH_ECC_SIZE1 0x20 /* ecc_size1 = 32 */ + +#ifdef CONFIG_MTD_NAND_OMAP_BCH +static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc, + 0xac, 0x6b, 0xff, 0x99, 0x7b}; +static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10}; +#endif + /* oob info generated runtime depending on ecc algorithm and layout selected */ static struct nand_ecclayout omap_oobinfo; /* Define some generic bad / good block scan pattern which are used @@ -156,6 +186,9 @@ struct omap_nand_info { #ifdef CONFIG_MTD_NAND_OMAP_BCH struct bch_control *bch; struct nand_ecclayout ecclayout; + bool is_elm_used; + struct device *elm_dev; + struct device_node *of_node; #endif }; @@ -1031,6 +1064,13 @@ static int omap_dev_ready(struct mtd_info *mtd) * omap3_enable_hwecc_bch - Program OMAP3 GPMC to perform BCH ECC correction * @mtd: MTD device structure * @mode: Read/Write mode + * + * When using BCH, sector size is hardcoded to 512 bytes. + * Using wrapping mode 6 both for reading and writing if ELM module not uses + * for error correction. + * On writing, + * eccsize0 = 0 (no additional protected byte in spare area) + * eccsize1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area) */ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) { @@ -1039,32 +1079,57 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); struct nand_chip *chip = mtd->priv; - u32 val; + u32 val, wr_mode; + unsigned int ecc_size1, ecc_size0; + + /* Using wrapping mode 6 for writing */ + wr_mode = BCH_WRAPMODE_6; - nerrors = (info->nand.ecc.bytes == 13) ? 8 : 4; - dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0; - nsectors = 1; /* - * Program GPMC to perform correction on one 512-byte sector at a time. - * Using 4 sectors at a time (i.e. ecc.size = 2048) is also possible and - * gives a slight (5%) performance gain (but requires additional code). + * ECC engine enabled for valid ecc_size0 nibbles + * and disabled for ecc_size1 nibbles. */ + ecc_size0 = BCH_ECC_SIZE0; + ecc_size1 = BCH_ECC_SIZE1; + + /* Perform ecc calculation on 512-byte sector */ + nsectors = 1; + + /* Update number of error correction */ + nerrors = info->nand.ecc.strength; + + /* Multi sector reading/writing for NAND flash with page size < 4096 */ + if (info->is_elm_used && (mtd->writesize <= 4096)) { + if (mode == NAND_ECC_READ) { + /* Using wrapping mode 1 for reading */ + wr_mode = BCH_WRAPMODE_1; + + /* + * ECC engine enabled for ecc_size0 nibbles + * and disabled for ecc_size1 nibbles. + */ + ecc_size0 = (nerrors == 8) ? + BCH8R_ECC_SIZE0 : BCH4R_ECC_SIZE0; + ecc_size1 = (nerrors == 8) ? + BCH8R_ECC_SIZE1 : BCH4R_ECC_SIZE1; + } + + /* Perform ecc calculation for one page (< 4096) */ + nsectors = info->nand.ecc.steps; + } writel(ECC1, info->reg.gpmc_ecc_control); - /* - * When using BCH, sector size is hardcoded to 512 bytes. - * Here we are using wrapping mode 6 both for reading and writing, with: - * size0 = 0 (no additional protected byte in spare area) - * size1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area) - */ - val = (32 << ECCSIZE1_SHIFT) | (0 << ECCSIZE0_SHIFT); + /* Configure ecc size for BCH */ + val = (ecc_size1 << ECCSIZE1_SHIFT) | (ecc_size0 << ECCSIZE0_SHIFT); writel(val, info->reg.gpmc_ecc_size_config); + dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0; + /* BCH configuration */ val = ((1 << 16) | /* enable BCH */ (((nerrors == 8) ? 1 : 0) << 12) | /* 8 or 4 bits */ - (0x06 << 8) | /* wrap mode = 6 */ + (wr_mode << 8) | /* wrap mode */ (dev_width << 7) | /* bus width */ (((nsectors-1) & 0x7) << 4) | /* number of sectors */ (info->gpmc_cs << 1) | /* ECC CS */ @@ -1072,7 +1137,7 @@ static void omap3_enable_hwecc_bch(struct mtd_info *mtd, int mode) writel(val, info->reg.gpmc_ecc_config); - /* clear ecc and enable bits */ + /* Clear ecc and enable bits */ writel(ECCCLEAR | ECC1, info->reg.gpmc_ecc_control); } @@ -1162,6 +1227,298 @@ static int omap3_calculate_ecc_bch8(struct mtd_info *mtd, const u_char *dat, } /** + * omap3_calculate_ecc_bch - Generate bytes of ECC bytes + * @mtd: MTD device structure + * @dat: The pointer to data on which ecc is computed + * @ecc_code: The ecc_code buffer + * + * Support calculating of BCH4/8 ecc vectors for the page + */ +static int omap3_calculate_ecc_bch(struct mtd_info *mtd, const u_char *dat, + u_char *ecc_code) +{ + struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, + mtd); + unsigned long nsectors, bch_val1, bch_val2, bch_val3, bch_val4; + int i, eccbchtsel; + + nsectors = ((readl(info->reg.gpmc_ecc_config) >> 4) & 0x7) + 1; + /* + * find BCH scheme used + * 0 -> BCH4 + * 1 -> BCH8 + */ + eccbchtsel = ((readl(info->reg.gpmc_ecc_config) >> 12) & 0x3); + + for (i = 0; i < nsectors; i++) { + + /* Read hw-computed remainder */ + bch_val1 = readl(info->reg.gpmc_bch_result0[i]); + bch_val2 = readl(info->reg.gpmc_bch_result1[i]); + if (eccbchtsel) { + bch_val3 = readl(info->reg.gpmc_bch_result2[i]); + bch_val4 = readl(info->reg.gpmc_bch_result3[i]); + } + + if (eccbchtsel) { + /* BCH8 ecc scheme */ + *ecc_code++ = (bch_val4 & 0xFF); + *ecc_code++ = ((bch_val3 >> 24) & 0xFF); + *ecc_code++ = ((bch_val3 >> 16) & 0xFF); + *ecc_code++ = ((bch_val3 >> 8) & 0xFF); + *ecc_code++ = (bch_val3 & 0xFF); + *ecc_code++ = ((bch_val2 >> 24) & 0xFF); + *ecc_code++ = ((bch_val2 >> 16) & 0xFF); + *ecc_code++ = ((bch_val2 >> 8) & 0xFF); + *ecc_code++ = (bch_val2 & 0xFF); + *ecc_code++ = ((bch_val1 >> 24) & 0xFF); + *ecc_code++ = ((bch_val1 >> 16) & 0xFF); + *ecc_code++ = ((bch_val1 >> 8) & 0xFF); + *ecc_code++ = (bch_val1 & 0xFF); + /* + * Setting 14th byte to zero to handle + * erased page & maintain compatibility + * with RBL + */ + *ecc_code++ = 0x0; + } else { + /* BCH4 ecc scheme */ + *ecc_code++ = ((bch_val2 >> 12) & 0xFF); + *ecc_code++ = ((bch_val2 >> 4) & 0xFF); + *ecc_code++ = ((bch_val2 & 0xF) << 4) | + ((bch_val1 >> 28) & 0xF); + *ecc_code++ = ((bch_val1 >> 20) & 0xFF); + *ecc_code++ = ((bch_val1 >> 12) & 0xFF); + *ecc_code++ = ((bch_val1 >> 4) & 0xFF); + *ecc_code++ = ((bch_val1 & 0xF) << 4); + /* + * Setting 8th byte to zero to handle + * erased page + */ + *ecc_code++ = 0x0; + } + } + + return 0; +} + +/** + * erased_sector_bitflips - count bit flips + * @data: data sector buffer + * @oob: oob buffer + * @info: omap_nand_info + * + * Check the bit flips in erased page falls below correctable level. + * If falls below, report the page as erased with correctable bit + * flip, else report as uncorrectable page. + */ +static int erased_sector_bitflips(u_char *data, u_char *oob, + struct omap_nand_info *info) +{ + int flip_bits = 0, i; + + for (i = 0; i < info->nand.ecc.size; i++) { + flip_bits += hweight8(~data[i]); + if (flip_bits > info->nand.ecc.strength) + return 0; + } + + for (i = 0; i < info->nand.ecc.bytes - 1; i++) { + flip_bits += hweight8(~oob[i]); + if (flip_bits > info->nand.ecc.strength) + return 0; + } + + /* + * Bit flips falls in correctable level. + * Fill data area with 0xFF + */ + if (flip_bits) { + memset(data, 0xFF, info->nand.ecc.size); + memset(oob, 0xFF, info->nand.ecc.bytes); + } + + return flip_bits; +} + +/** + * omap_elm_correct_data - corrects page data area in case error reported + * @mtd: MTD device structure + * @data: page data + * @read_ecc: ecc read from nand flash + * @calc_ecc: ecc read from HW ECC registers + * + * Calculated ecc vector reported as zero in case of non-error pages. + * In case of error/erased pages non-zero error vector is reported. + * In case of non-zero ecc vector, check read_ecc at fixed offset + * (x = 13/7 in case of BCH8/4 == 0) to find page programmed or not. + * To handle bit flips in this data, count the number of 0's in + * read_ecc[x] and check if it greater than 4. If it is less, it is + * programmed page, else erased page. + * + * 1. If page is erased, check with standard ecc vector (ecc vector + * for erased page to find any bit flip). If check fails, bit flip + * is present in erased page. Count the bit flips in erased page and + * if it falls under correctable level, report page with 0xFF and + * update the correctable bit information. + * 2. If error is reported on programmed page, update elm error + * vector and correct the page with ELM error correction routine. + * + */ +static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data, + u_char *read_ecc, u_char *calc_ecc) +{ + struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, + mtd); + int eccsteps = info->nand.ecc.steps; + int i , j, stat = 0; + int eccsize, eccflag, ecc_vector_size; + struct elm_errorvec err_vec[ERROR_VECTOR_MAX]; + u_char *ecc_vec = calc_ecc; + u_char *spare_ecc = read_ecc; + u_char *erased_ecc_vec; + enum bch_ecc type; + bool is_error_reported = false; + + /* Initialize elm error vector to zero */ + memset(err_vec, 0, sizeof(err_vec)); + + if (info->nand.ecc.strength == BCH8_MAX_ERROR) { + type = BCH8_ECC; + erased_ecc_vec = bch8_vector; + } else { + type = BCH4_ECC; + erased_ecc_vec = bch4_vector; + } + + ecc_vector_size = info->nand.ecc.bytes; + + /* + * Remove extra byte padding for BCH8 RBL + * compatibility and erased page handling + */ + eccsize = ecc_vector_size - 1; + + for (i = 0; i < eccsteps ; i++) { + eccflag = 0; /* initialize eccflag */ + + /* + * Check any error reported, + * In case of error, non zero ecc reported. + */ + + for (j = 0; (j < eccsize); j++) { + if (calc_ecc[j] != 0) { + eccflag = 1; /* non zero ecc, error present */ + break; + } + } + + if (eccflag == 1) { + /* + * Set threshold to minimum of 4, half of ecc.strength/2 + * to allow max bit flip in byte to 4 + */ + unsigned int threshold = min_t(unsigned int, 4, + info->nand.ecc.strength / 2); + + /* + * Check data area is programmed by counting + * number of 0's at fixed offset in spare area. + * Checking count of 0's against threshold. + * In case programmed page expects at least threshold + * zeros in byte. + * If zeros are less than threshold for programmed page/ + * zeros are more than threshold erased page, either + * case page reported as uncorrectable. + */ + if (hweight8(~read_ecc[eccsize]) >= threshold) { + /* + * Update elm error vector as + * data area is programmed + */ + err_vec[i].error_reported = true; + is_error_reported = true; + } else { + /* Error reported in erased page */ + int bitflip_count; + u_char *buf = &data[info->nand.ecc.size * i]; + + if (memcmp(calc_ecc, erased_ecc_vec, eccsize)) { + bitflip_count = erased_sector_bitflips( + buf, read_ecc, info); + + if (bitflip_count) + stat += bitflip_count; + else + return -EINVAL; + } + } + } + + /* Update the ecc vector */ + calc_ecc += ecc_vector_size; + read_ecc += ecc_vector_size; + } + + /* Check if any error reported */ + if (!is_error_reported) + return 0; + + /* Decode BCH error using ELM module */ + elm_decode_bch_error_page(info->elm_dev, ecc_vec, err_vec); + + for (i = 0; i < eccsteps; i++) { + if (err_vec[i].error_reported) { + for (j = 0; j < err_vec[i].error_count; j++) { + u32 bit_pos, byte_pos, error_max, pos; + + if (type == BCH8_ECC) + error_max = BCH8_ECC_MAX; + else + error_max = BCH4_ECC_MAX; + + if (info->nand.ecc.strength == BCH8_MAX_ERROR) + pos = err_vec[i].error_loc[j]; + else + /* Add 4 to take care 4 bit padding */ + pos = err_vec[i].error_loc[j] + + BCH4_BIT_PAD; + + /* Calculate bit position of error */ + bit_pos = pos % 8; + + /* Calculate byte position of error */ + byte_pos = (error_max - pos - 1) / 8; + + if (pos < error_max) { + if (byte_pos < 512) + data[byte_pos] ^= 1 << bit_pos; + else + spare_ecc[byte_pos - 512] ^= + 1 << bit_pos; + } + /* else, not interested to correct ecc */ + } + } + + /* Update number of correctable errors */ + stat += err_vec[i].error_count; + + /* Update page data with sector size */ + data += info->nand.ecc.size; + spare_ecc += ecc_vector_size; + } + + for (i = 0; i < eccsteps; i++) + /* Return error if uncorrectable error present */ + if (err_vec[i].error_uncorrectable) + return -EINVAL; + + return stat; +} + +/** * omap3_correct_data_bch - Decode received data and correct errors * @mtd: MTD device structure * @data: page data @@ -1194,6 +1551,92 @@ static int omap3_correct_data_bch(struct mtd_info *mtd, u_char *data, } /** + * omap_write_page_bch - BCH ecc based write page function for entire page + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: data buffer + * @oob_required: must write chip->oob_poi to OOB + * + * Custom write page method evolved to support multi sector writing in one shot + */ +static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip, + const uint8_t *buf, int oob_required) +{ + int i; + uint8_t *ecc_calc = chip->buffers->ecccalc; + uint32_t *eccpos = chip->ecc.layout->eccpos; + + /* Enable GPMC ecc engine */ + chip->ecc.hwctl(mtd, NAND_ECC_WRITE); + + /* Write data */ + chip->write_buf(mtd, buf, mtd->writesize); + + /* Update ecc vector from GPMC result registers */ + chip->ecc.calculate(mtd, buf, &ecc_calc[0]); + + for (i = 0; i < chip->ecc.total; i++) + chip->oob_poi[eccpos[i]] = ecc_calc[i]; + + /* Write ecc vector to OOB area */ + chip->write_buf(mtd, chip->oob_poi, mtd->oobsize); + return 0; +} + +/** + * omap_read_page_bch - BCH ecc based page read function for entire page + * @mtd: mtd info structure + * @chip: nand chip info structure + * @buf: buffer to store read data + * @oob_required: caller requires OOB data read to chip->oob_poi + * @page: page number to read + * + * For BCH ecc scheme, GPMC used for syndrome calculation and ELM module + * used for error correction. + * Custom method evolved to support ELM error correction & multi sector + * reading. On reading page data area is read along with OOB data with + * ecc engine enabled. ecc vector updated after read of OOB data. + * For non error pages ecc vector reported as zero. + */ +static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip, + uint8_t *buf, int oob_required, int page) +{ + uint8_t *ecc_calc = chip->buffers->ecccalc; + uint8_t *ecc_code = chip->buffers->ecccode; + uint32_t *eccpos = chip->ecc.layout->eccpos; + uint8_t *oob = &chip->oob_poi[eccpos[0]]; + uint32_t oob_pos = mtd->writesize + chip->ecc.layout->eccpos[0]; + int stat; + unsigned int max_bitflips = 0; + + /* Enable GPMC ecc engine */ + chip->ecc.hwctl(mtd, NAND_ECC_READ); + + /* Read data */ + chip->read_buf(mtd, buf, mtd->writesize); + + /* Read oob bytes */ + chip->cmdfunc(mtd, NAND_CMD_RNDOUT, oob_pos, -1); + chip->read_buf(mtd, oob, chip->ecc.total); + + /* Calculate ecc bytes */ + chip->ecc.calculate(mtd, buf, ecc_calc); + + memcpy(ecc_code, &chip->oob_poi[eccpos[0]], chip->ecc.total); + + stat = chip->ecc.correct(mtd, buf, ecc_code, ecc_calc); + + if (stat < 0) { + mtd->ecc_stats.failed++; + } else { + mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } + + return max_bitflips; +} + +/** * omap3_free_bch - Release BCH ecc resources * @mtd: MTD device structure */ @@ -1218,43 +1661,86 @@ static int omap3_init_bch(struct mtd_info *mtd, int ecc_opt) struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); #ifdef CONFIG_MTD_NAND_OMAP_BCH8 - const int hw_errors = 8; + const int hw_errors = BCH8_MAX_ERROR; #else - const int hw_errors = 4; + const int hw_errors = BCH4_MAX_ERROR; #endif + enum bch_ecc bch_type; + const __be32 *parp; + int lenp; + struct device_node *elm_node; + info->bch = NULL; - max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ? 8 : 4; + max_errors = (ecc_opt == OMAP_ECC_BCH8_CODE_HW) ? + BCH8_MAX_ERROR : BCH4_MAX_ERROR; if (max_errors != hw_errors) { pr_err("cannot configure %d-bit BCH ecc, only %d-bit supported", max_errors, hw_errors); goto fail; } - /* software bch library is only used to detect and locate errors */ - info->bch = init_bch(13, max_errors, 0x201b /* hw polynomial */); - if (!info->bch) - goto fail; + info->nand.ecc.size = 512; + info->nand.ecc.hwctl = omap3_enable_hwecc_bch; + info->nand.ecc.mode = NAND_ECC_HW; + info->nand.ecc.strength = max_errors; - info->nand.ecc.size = 512; - info->nand.ecc.hwctl = omap3_enable_hwecc_bch; - info->nand.ecc.correct = omap3_correct_data_bch; - info->nand.ecc.mode = NAND_ECC_HW; + if (hw_errors == BCH8_MAX_ERROR) + bch_type = BCH8_ECC; + else + bch_type = BCH4_ECC; - /* - * The number of corrected errors in an ecc block that will trigger - * block scrubbing defaults to the ecc strength (4 or 8). - * Set mtd->bitflip_threshold here to define a custom threshold. - */ + /* Detect availability of ELM module */ + parp = of_get_property(info->of_node, "elm_id", &lenp); + if ((parp == NULL) && (lenp != (sizeof(void *) * 2))) { + pr_err("Missing elm_id property, fall back to Software BCH\n"); + info->is_elm_used = false; + } else { + struct platform_device *pdev; - if (max_errors == 8) { - info->nand.ecc.strength = 8; - info->nand.ecc.bytes = 13; - info->nand.ecc.calculate = omap3_calculate_ecc_bch8; + elm_node = of_find_node_by_phandle(be32_to_cpup(parp)); + pdev = of_find_device_by_node(elm_node); + info->elm_dev = &pdev->dev; + elm_config(info->elm_dev, bch_type); + info->is_elm_used = true; + } + + if (info->is_elm_used && (mtd->writesize <= 4096)) { + + if (hw_errors == BCH8_MAX_ERROR) + info->nand.ecc.bytes = BCH8_SIZE; + else + info->nand.ecc.bytes = BCH4_SIZE; + + info->nand.ecc.correct = omap_elm_correct_data; + info->nand.ecc.calculate = omap3_calculate_ecc_bch; + info->nand.ecc.read_page = omap_read_page_bch; + info->nand.ecc.write_page = omap_write_page_bch; } else { - info->nand.ecc.strength = 4; - info->nand.ecc.bytes = 7; - info->nand.ecc.calculate = omap3_calculate_ecc_bch4; + /* + * software bch library is only used to detect and + * locate errors + */ + info->bch = init_bch(13, max_errors, + 0x201b /* hw polynomial */); + if (!info->bch) + goto fail; + + info->nand.ecc.correct = omap3_correct_data_bch; + + /* + * The number of corrected errors in an ecc block that will + * trigger block scrubbing defaults to the ecc strength (4 or 8) + * Set mtd->bitflip_threshold here to define a custom threshold. + */ + + if (max_errors == 8) { + info->nand.ecc.bytes = 13; + info->nand.ecc.calculate = omap3_calculate_ecc_bch8; + } else { + info->nand.ecc.bytes = 7; + info->nand.ecc.calculate = omap3_calculate_ecc_bch4; + } } pr_info("enabling NAND BCH ecc with %d-bit correction\n", max_errors); @@ -1270,7 +1756,7 @@ fail: */ static int omap3_init_bch_tail(struct mtd_info *mtd) { - int i, steps; + int i, steps, offset; struct omap_nand_info *info = container_of(mtd, struct omap_nand_info, mtd); struct nand_ecclayout *layout = &info->ecclayout; @@ -1292,11 +1778,21 @@ static int omap3_init_bch_tail(struct mtd_info *mtd) goto fail; } + /* ECC layout compatible with RBL for BCH8 */ + if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE)) + offset = 2; + else + offset = mtd->oobsize - layout->eccbytes; + /* put ecc bytes at oob tail */ for (i = 0; i < layout->eccbytes; i++) - layout->eccpos[i] = mtd->oobsize-layout->eccbytes+i; + layout->eccpos[i] = offset + i; + + if (info->is_elm_used && (info->nand.ecc.bytes == BCH8_SIZE)) + layout->oobfree[0].offset = 2 + layout->eccbytes * steps; + else + layout->oobfree[0].offset = 2; - layout->oobfree[0].offset = 2; layout->oobfree[0].length = mtd->oobsize-2-layout->eccbytes; info->nand.ecc.layout = layout; @@ -1360,6 +1856,9 @@ static int omap_nand_probe(struct platform_device *pdev) info->nand.options = pdata->devsize; info->nand.options |= NAND_SKIP_BBTSCAN; +#ifdef CONFIG_MTD_NAND_OMAP_BCH + info->of_node = pdata->of_node; +#endif res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { diff --git a/drivers/mtd/ofpart.c b/drivers/mtd/ofpart.c index dbd3aa5..30bd907 100644 --- a/drivers/mtd/ofpart.c +++ b/drivers/mtd/ofpart.c @@ -174,7 +174,14 @@ out: return rc; } +static void __exit ofpart_parser_exit(void) +{ + deregister_mtd_parser(&ofpart_parser); + deregister_mtd_parser(&ofoldpart_parser); +} + module_init(ofpart_parser_init); +module_exit(ofpart_parser_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Parser for MTD partitioning information in device tree"); diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c index db2f22e..7010660 100644 --- a/drivers/mtd/tests/mtd_nandecctest.c +++ b/drivers/mtd/tests/mtd_nandecctest.c @@ -44,7 +44,7 @@ struct nand_ecc_test { static void single_bit_error_data(void *error_data, void *correct_data, size_t size) { - unsigned int offset = random32() % (size * BITS_PER_BYTE); + unsigned int offset = prandom_u32() % (size * BITS_PER_BYTE); memcpy(error_data, correct_data, size); __change_bit_le(offset, error_data); @@ -55,9 +55,9 @@ static void double_bit_error_data(void *error_data, void *correct_data, { unsigned int offset[2]; - offset[0] = random32() % (size * BITS_PER_BYTE); + offset[0] = prandom_u32() % (size * BITS_PER_BYTE); do { - offset[1] = random32() % (size * BITS_PER_BYTE); + offset[1] = prandom_u32() % (size * BITS_PER_BYTE); } while (offset[0] == offset[1]); memcpy(error_data, correct_data, size); @@ -68,7 +68,7 @@ static void double_bit_error_data(void *error_data, void *correct_data, static unsigned int random_ecc_bit(size_t size) { - unsigned int offset = random32() % (3 * BITS_PER_BYTE); + unsigned int offset = prandom_u32() % (3 * BITS_PER_BYTE); if (size == 256) { /* @@ -76,7 +76,7 @@ static unsigned int random_ecc_bit(size_t size) * and 17th bit) in ECC code for 256 byte data block */ while (offset == 16 || offset == 17) - offset = random32() % (3 * BITS_PER_BYTE); + offset = prandom_u32() % (3 * BITS_PER_BYTE); } return offset; diff --git a/drivers/mtd/tests/mtd_stresstest.c b/drivers/mtd/tests/mtd_stresstest.c index 2d7e6cf..787f539 100644 --- a/drivers/mtd/tests/mtd_stresstest.c +++ b/drivers/mtd/tests/mtd_stresstest.c @@ -55,7 +55,7 @@ static int rand_eb(void) unsigned int eb; again: - eb = random32(); + eb = prandom_u32(); /* Read or write up 2 eraseblocks at a time - hence 'ebcnt - 1' */ eb %= (ebcnt - 1); if (bbt[eb]) @@ -67,7 +67,7 @@ static int rand_offs(void) { unsigned int offs; - offs = random32(); + offs = prandom_u32(); offs %= bufsize; return offs; } @@ -76,7 +76,7 @@ static int rand_len(int offs) { unsigned int len; - len = random32(); + len = prandom_u32(); len %= (bufsize - offs); return len; } @@ -191,7 +191,7 @@ static int do_write(void) static int do_operation(void) { - if (random32() & 1) + if (prandom_u32() & 1) return do_read(); else return do_write(); diff --git a/drivers/mtd/tests/mtd_torturetest.c b/drivers/mtd/tests/mtd_torturetest.c index c4cde1e..3a9f6a6 100644 --- a/drivers/mtd/tests/mtd_torturetest.c +++ b/drivers/mtd/tests/mtd_torturetest.c @@ -208,7 +208,7 @@ static inline int write_pattern(int ebnum, void *buf) static int __init tort_init(void) { int err = 0, i, infinite = !cycles_count; - int bad_ebs[ebcnt]; + int *bad_ebs; printk(KERN_INFO "\n"); printk(KERN_INFO "=================================================\n"); @@ -250,28 +250,24 @@ static int __init tort_init(void) err = -ENOMEM; patt_5A5 = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_5A5) { - pr_err("error: cannot allocate memory\n"); + if (!patt_5A5) goto out_mtd; - } patt_A5A = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_A5A) { - pr_err("error: cannot allocate memory\n"); + if (!patt_A5A) goto out_patt_5A5; - } patt_FF = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!patt_FF) { - pr_err("error: cannot allocate memory\n"); + if (!patt_FF) goto out_patt_A5A; - } check_buf = kmalloc(mtd->erasesize, GFP_KERNEL); - if (!check_buf) { - pr_err("error: cannot allocate memory\n"); + if (!check_buf) goto out_patt_FF; - } + + bad_ebs = kcalloc(ebcnt, sizeof(*bad_ebs), GFP_KERNEL); + if (!bad_ebs) + goto out_check_buf; err = 0; @@ -290,7 +286,6 @@ static int __init tort_init(void) /* * Check if there is a bad eraseblock among those we are going to test. */ - memset(&bad_ebs[0], 0, sizeof(int) * ebcnt); if (mtd_can_have_bb(mtd)) { for (i = eb; i < eb + ebcnt; i++) { err = mtd_block_isbad(mtd, (loff_t)i * mtd->erasesize); @@ -394,6 +389,8 @@ out: pr_info("finished after %u erase cycles\n", erase_cycles); + kfree(bad_ebs); +out_check_buf: kfree(check_buf); out_patt_FF: kfree(patt_FF); diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 33f8f3b..cba89fc 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -86,7 +86,7 @@ static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { if (ubi->dbg.emulate_bitflips) - return !(random32() % 200); + return !(prandom_u32() % 200); return 0; } @@ -100,7 +100,7 @@ static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) { if (ubi->dbg.emulate_io_failures) - return !(random32() % 500); + return !(prandom_u32() % 500); return 0; } @@ -114,7 +114,7 @@ static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { if (ubi->dbg.emulate_io_failures) - return !(random32() % 400); + return !(prandom_u32() % 400); return 0; } diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a7efec2..9b017d9 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -381,7 +381,7 @@ static void b44_set_flow_ctrl(struct b44 *bp, u32 local, u32 remote) } #ifdef CONFIG_BCM47XX -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> static void b44_wap54g10_workaround(struct b44 *bp) { char buf[20]; @@ -393,7 +393,7 @@ static void b44_wap54g10_workaround(struct b44 *bp) * see https://dev.openwrt.org/ticket/146 * check and reset bit "isolate" */ - if (nvram_getenv("boardnum", buf, sizeof(buf)) < 0) + if (bcm47xx_nvram_getenv("boardnum", buf, sizeof(buf)) < 0) return; if (simple_strtoul(buf, NULL, 0) == 2) { err = __b44_readphy(bp, 0, MII_BMCR, &val); diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index bf985c0..639049d 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -15,7 +15,7 @@ #include <linux/mii.h> #include <linux/interrupt.h> #include <linux/dma-mapping.h> -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> static const struct bcma_device_id bgmac_bcma_tbl[] = { BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT, BCMA_ANY_REV, BCMA_ANY_CLASS), @@ -908,7 +908,7 @@ static void bgmac_chip_reset(struct bgmac *bgmac) BGMAC_CHIPCTL_1_IF_TYPE_RMII; char buf[2]; - if (nvram_getenv("et_swtype", buf, 1) > 0) { + if (bcm47xx_nvram_getenv("et_swtype", buf, 1) > 0) { if (kstrtou8(buf, 0, &et_swtype)) bgmac_err(bgmac, "Failed to parse et_swtype (%s)\n", buf); @@ -1386,7 +1386,7 @@ static int bgmac_probe(struct bcma_device *core) } bgmac->int_mask = BGMAC_IS_ERRMASK | BGMAC_IS_RX | BGMAC_IS_TX_MASK; - if (nvram_getenv("et0_no_txint", NULL, 0) == 0) + if (bcm47xx_nvram_getenv("et0_no_txint", NULL, 0) == 0) bgmac->int_mask &= ~BGMAC_IS_TX_MASK; /* TODO: reset the external phy. Specs are needed */ diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7ab0b2f..3338437 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -79,6 +79,17 @@ config ASUS_LAPTOP If you have an ACPI-compatible ASUS laptop, say Y or M here. +config CHROMEOS_LAPTOP + tristate "Chrome OS Laptop" + depends on I2C + depends on DMI + ---help--- + This driver instantiates i2c and smbus devices such as + light sensors and touchpads. + + If you have a supported Chromebook, choose Y or M here. + The module will be called chromeos_laptop. + config DELL_LAPTOP tristate "Dell Laptop Extras" depends on X86 @@ -288,9 +299,11 @@ config IDEAPAD_LAPTOP depends on ACPI depends on RFKILL && INPUT depends on SERIO_I8042 + depends on BACKLIGHT_CLASS_DEVICE select INPUT_SPARSEKMAP help - This is a driver for the rfkill switches on Lenovo IdeaPad netbooks. + This is a driver for Lenovo IdeaPad netbooks contains drivers for + rfkill switch, hotkey, fan control and backlight control. config THINKPAD_ACPI tristate "ThinkPad ACPI Laptop Extras" diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index bf7e4f9..ace2b38 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -50,3 +50,4 @@ obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o obj-$(CONFIG_SAMSUNG_Q10) += samsung-q10.o obj-$(CONFIG_APPLE_GMUX) += apple-gmux.o +obj-$(CONFIG_CHROMEOS_LAPTOP) += chromeos_laptop.o diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index afed701..c9076bd 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -511,6 +511,24 @@ static struct dmi_system_id acer_quirks[] = { }, .driver_data = &quirk_fujitsu_amilo_li_1718, }, + { + .callback = dmi_matched, + .ident = "Lenovo Ideapad S205-10382JG", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "10382JG"), + }, + .driver_data = &quirk_lenovo_ideapad_s205, + }, + { + .callback = dmi_matched, + .ident = "Lenovo Ideapad S205-1038DPG", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "1038DPG"), + }, + .driver_data = &quirk_lenovo_ideapad_s205, + }, {} }; @@ -1204,6 +1222,9 @@ static acpi_status WMID_set_capabilities(void) devices = *((u32 *) obj->buffer.pointer); } else if (obj->type == ACPI_TYPE_INTEGER) { devices = (u32) obj->integer.value; + } else { + kfree(out.pointer); + return AE_ERROR; } } else { kfree(out.pointer); diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index d9f9a0d..0eea09c 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -128,10 +128,12 @@ MODULE_PARM_DESC(als_status, "Set the ALS status on boot " /* * Some events we use, same for all Asus */ -#define ATKD_BR_UP 0x10 /* (event & ~ATKD_BR_UP) = brightness level */ -#define ATKD_BR_DOWN 0x20 /* (event & ~ATKD_BR_DOWN) = britghness level */ -#define ATKD_BR_MIN ATKD_BR_UP -#define ATKD_BR_MAX (ATKD_BR_DOWN | 0xF) /* 0x2f */ +#define ATKD_BRNUP_MIN 0x10 +#define ATKD_BRNUP_MAX 0x1f +#define ATKD_BRNDOWN_MIN 0x20 +#define ATKD_BRNDOWN_MAX 0x2f +#define ATKD_BRNDOWN 0x20 +#define ATKD_BRNUP 0x2f #define ATKD_LCD_ON 0x33 #define ATKD_LCD_OFF 0x34 @@ -301,40 +303,65 @@ static const struct key_entry asus_keymap[] = { {KE_KEY, 0x17, { KEY_ZOOM } }, {KE_KEY, 0x1f, { KEY_BATTERY } }, /* End of Lenovo SL Specific keycodes */ + {KE_KEY, ATKD_BRNDOWN, { KEY_BRIGHTNESSDOWN } }, + {KE_KEY, ATKD_BRNUP, { KEY_BRIGHTNESSUP } }, {KE_KEY, 0x30, { KEY_VOLUMEUP } }, {KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, {KE_KEY, 0x32, { KEY_MUTE } }, - {KE_KEY, 0x33, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x34, { KEY_SWITCHVIDEOMODE } }, + {KE_KEY, 0x33, { KEY_DISPLAYTOGGLE } }, /* LCD on */ + {KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ {KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, {KE_KEY, 0x41, { KEY_NEXTSONG } }, - {KE_KEY, 0x43, { KEY_STOPCD } }, + {KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ {KE_KEY, 0x45, { KEY_PLAYPAUSE } }, - {KE_KEY, 0x4c, { KEY_MEDIA } }, + {KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */ {KE_KEY, 0x50, { KEY_EMAIL } }, {KE_KEY, 0x51, { KEY_WWW } }, {KE_KEY, 0x55, { KEY_CALC } }, + {KE_IGNORE, 0x57, }, /* Battery mode */ + {KE_IGNORE, 0x58, }, /* AC mode */ {KE_KEY, 0x5C, { KEY_SCREENLOCK } }, /* Screenlock */ - {KE_KEY, 0x5D, { KEY_WLAN } }, - {KE_KEY, 0x5E, { KEY_WLAN } }, - {KE_KEY, 0x5F, { KEY_WLAN } }, - {KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, - {KE_KEY, 0x6B, { KEY_F13 } }, /* Lock Touchpad */ + {KE_KEY, 0x5D, { KEY_WLAN } }, /* WLAN Toggle */ + {KE_KEY, 0x5E, { KEY_WLAN } }, /* WLAN Enable */ + {KE_KEY, 0x5F, { KEY_WLAN } }, /* WLAN Disable */ + {KE_KEY, 0x60, { KEY_TOUCHPAD_ON } }, + {KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */ + {KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */ + {KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */ + {KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */ + {KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */ + {KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */ + {KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */ + {KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, /* Lock Touchpad */ {KE_KEY, 0x6C, { KEY_SLEEP } }, /* Suspend */ {KE_KEY, 0x6D, { KEY_SLEEP } }, /* Hibernate */ - {KE_KEY, 0x7E, { KEY_BLUETOOTH } }, - {KE_KEY, 0x7D, { KEY_BLUETOOTH } }, + {KE_IGNORE, 0x6E, }, /* Low Battery notification */ + {KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ + {KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ {KE_KEY, 0x82, { KEY_CAMERA } }, - {KE_KEY, 0x88, { KEY_WLAN } }, - {KE_KEY, 0x8A, { KEY_PROG1 } }, + {KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ + {KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ + {KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */ + {KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */ + {KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */ + {KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */ + {KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */ + {KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */ + {KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */ + {KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */ {KE_KEY, 0x95, { KEY_MEDIA } }, {KE_KEY, 0x99, { KEY_PHONE } }, - {KE_KEY, 0xc4, { KEY_KBDILLUMUP } }, - {KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } }, - {KE_KEY, 0xb5, { KEY_CALC } }, + {KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */ + {KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */ + {KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */ + {KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */ + {KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */ + {KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */ + {KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */ + {KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */ + {KE_KEY, 0xB5, { KEY_CALC } }, + {KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, + {KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, {KE_END, 0}, }; @@ -1521,15 +1548,19 @@ static void asus_acpi_notify(struct acpi_device *device, u32 event) dev_name(&asus->device->dev), event, count); - /* Brightness events are special */ - if (event >= ATKD_BR_MIN && event <= ATKD_BR_MAX) { + if (event >= ATKD_BRNUP_MIN && event <= ATKD_BRNUP_MAX) + event = ATKD_BRNUP; + else if (event >= ATKD_BRNDOWN_MIN && + event <= ATKD_BRNDOWN_MAX) + event = ATKD_BRNDOWN; - /* Ignore them completely if the acpi video driver is used */ + /* Brightness events are special */ + if (event == ATKD_BRNDOWN || event == ATKD_BRNUP) { if (asus->backlight_device != NULL) { /* Update the backlight device. */ asus_backlight_notify(asus); + return ; } - return ; } /* Accelerometer "coarse orientation change" event */ diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index be79040..210b5b8 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -59,6 +59,17 @@ static struct quirk_entry quirk_asus_unknown = { .wapf = 0, }; +/* + * For those machines that need software to control bt/wifi status + * and can't adjust brightness through ACPI interface + * and have duplicate events(ACPI and WMI) for display toggle + */ +static struct quirk_entry quirk_asus_x55u = { + .wapf = 4, + .wmi_backlight_power = true, + .no_display_toggle = true, +}; + static struct quirk_entry quirk_asus_x401u = { .wapf = 4, }; @@ -77,6 +88,15 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X401U"), }, + .driver_data = &quirk_asus_x55u, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X401A", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X401A"), + }, .driver_data = &quirk_asus_x401u, }, { @@ -95,6 +115,15 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X501U"), }, + .driver_data = &quirk_asus_x55u, + }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. X501A", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X501A"), + }, .driver_data = &quirk_asus_x401u, }, { @@ -131,7 +160,7 @@ static struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X55U"), }, - .driver_data = &quirk_asus_x401u, + .driver_data = &quirk_asus_x55u, }, { .callback = dmi_matched, @@ -161,6 +190,8 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) } static const struct key_entry asus_nb_wmi_keymap[] = { + { KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } }, + { KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } }, { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, { KE_KEY, 0x32, { KEY_MUTE } }, @@ -168,9 +199,9 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x34, { KEY_DISPLAY_OFF } }, /* LCD off */ { KE_KEY, 0x40, { KEY_PREVIOUSSONG } }, { KE_KEY, 0x41, { KEY_NEXTSONG } }, - { KE_KEY, 0x43, { KEY_STOPCD } }, + { KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */ { KE_KEY, 0x45, { KEY_PLAYPAUSE } }, - { KE_KEY, 0x4c, { KEY_MEDIA } }, + { KE_KEY, 0x4c, { KEY_MEDIA } }, /* WMP Key */ { KE_KEY, 0x50, { KEY_EMAIL } }, { KE_KEY, 0x51, { KEY_WWW } }, { KE_KEY, 0x55, { KEY_CALC } }, @@ -180,25 +211,42 @@ static const struct key_entry asus_nb_wmi_keymap[] = { { KE_KEY, 0x5D, { KEY_WLAN } }, /* Wireless console Toggle */ { KE_KEY, 0x5E, { KEY_WLAN } }, /* Wireless console Enable */ { KE_KEY, 0x5F, { KEY_WLAN } }, /* Wireless console Disable */ - { KE_KEY, 0x60, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, - { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, + { KE_KEY, 0x60, { KEY_TOUCHPAD_ON } }, + { KE_KEY, 0x61, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD only */ + { KE_KEY, 0x62, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT only */ + { KE_KEY, 0x63, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT */ + { KE_KEY, 0x64, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV */ + { KE_KEY, 0x65, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV */ + { KE_KEY, 0x66, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV */ + { KE_KEY, 0x67, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV */ { KE_KEY, 0x6B, { KEY_TOUCHPAD_TOGGLE } }, - { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, - { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, + { KE_IGNORE, 0x6E, }, /* Low Battery notification */ + { KE_KEY, 0x7D, { KEY_BLUETOOTH } }, /* Bluetooth Enable */ + { KE_KEY, 0x7E, { KEY_BLUETOOTH } }, /* Bluetooth Disable */ { KE_KEY, 0x82, { KEY_CAMERA } }, - { KE_KEY, 0x88, { KEY_RFKILL } }, - { KE_KEY, 0x8A, { KEY_PROG1 } }, + { KE_KEY, 0x88, { KEY_RFKILL } }, /* Radio Toggle Key */ + { KE_KEY, 0x8A, { KEY_PROG1 } }, /* Color enhancement mode */ + { KE_KEY, 0x8C, { KEY_SWITCHVIDEOMODE } }, /* SDSP DVI only */ + { KE_KEY, 0x8D, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + DVI */ + { KE_KEY, 0x8E, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + DVI */ + { KE_KEY, 0x8F, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + DVI */ + { KE_KEY, 0x90, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + DVI */ + { KE_KEY, 0x91, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + DVI */ + { KE_KEY, 0x92, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + DVI */ + { KE_KEY, 0x93, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + DVI */ { KE_KEY, 0x95, { KEY_MEDIA } }, { KE_KEY, 0x99, { KEY_PHONE } }, { KE_KEY, 0xA0, { KEY_SWITCHVIDEOMODE } }, /* SDSP HDMI only */ { KE_KEY, 0xA1, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + HDMI */ { KE_KEY, 0xA2, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + HDMI */ { KE_KEY, 0xA3, { KEY_SWITCHVIDEOMODE } }, /* SDSP TV + HDMI */ - { KE_KEY, 0xb5, { KEY_CALC } }, - { KE_KEY, 0xc4, { KEY_KBDILLUMUP } }, - { KE_KEY, 0xc5, { KEY_KBDILLUMDOWN } }, + { KE_KEY, 0xA4, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + HDMI */ + { KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */ + { KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */ + { KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */ + { KE_KEY, 0xB5, { KEY_CALC } }, + { KE_KEY, 0xC4, { KEY_KBDILLUMUP } }, + { KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } }, { KE_END, 0}, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index f80ae4d..c11b242 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -187,6 +187,8 @@ struct asus_wmi { struct device *hwmon_device; struct platform_device *platform_device; + struct led_classdev wlan_led; + int wlan_led_wk; struct led_classdev tpd_led; int tpd_led_wk; struct led_classdev kbd_led; @@ -194,6 +196,7 @@ struct asus_wmi { struct workqueue_struct *led_workqueue; struct work_struct tpd_led_work; struct work_struct kbd_led_work; + struct work_struct wlan_led_work; struct asus_rfkill wlan; struct asus_rfkill bluetooth; @@ -456,12 +459,65 @@ static enum led_brightness kbd_led_get(struct led_classdev *led_cdev) return value; } +static int wlan_led_unknown_state(struct asus_wmi *asus) +{ + u32 result; + + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_UNKNOWN_BIT; +} + +static int wlan_led_presence(struct asus_wmi *asus) +{ + u32 result; + + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_PRESENCE_BIT; +} + +static void wlan_led_update(struct work_struct *work) +{ + int ctrl_param; + struct asus_wmi *asus; + + asus = container_of(work, struct asus_wmi, wlan_led_work); + + ctrl_param = asus->wlan_led_wk; + asus_wmi_set_devstate(ASUS_WMI_DEVID_WIRELESS_LED, ctrl_param, NULL); +} + +static void wlan_led_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + struct asus_wmi *asus; + + asus = container_of(led_cdev, struct asus_wmi, wlan_led); + + asus->wlan_led_wk = !!value; + queue_work(asus->led_workqueue, &asus->wlan_led_work); +} + +static enum led_brightness wlan_led_get(struct led_classdev *led_cdev) +{ + struct asus_wmi *asus; + u32 result; + + asus = container_of(led_cdev, struct asus_wmi, wlan_led); + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WIRELESS_LED, &result); + + return result & ASUS_WMI_DSTS_BRIGHTNESS_MASK; +} + static void asus_wmi_led_exit(struct asus_wmi *asus) { if (!IS_ERR_OR_NULL(asus->kbd_led.dev)) led_classdev_unregister(&asus->kbd_led); if (!IS_ERR_OR_NULL(asus->tpd_led.dev)) led_classdev_unregister(&asus->tpd_led); + if (!IS_ERR_OR_NULL(asus->wlan_led.dev)) + led_classdev_unregister(&asus->wlan_led); if (asus->led_workqueue) destroy_workqueue(asus->led_workqueue); } @@ -498,6 +554,23 @@ static int asus_wmi_led_init(struct asus_wmi *asus) rv = led_classdev_register(&asus->platform_device->dev, &asus->kbd_led); + if (rv) + goto error; + } + + if (wlan_led_presence(asus)) { + INIT_WORK(&asus->wlan_led_work, wlan_led_update); + + asus->wlan_led.name = "asus::wlan"; + asus->wlan_led.brightness_set = wlan_led_set; + if (!wlan_led_unknown_state(asus)) + asus->wlan_led.brightness_get = wlan_led_get; + asus->wlan_led.flags = LED_CORE_SUSPENDRESUME; + asus->wlan_led.max_brightness = 1; + asus->wlan_led.default_trigger = "asus-wlan"; + + rv = led_classdev_register(&asus->platform_device->dev, + &asus->wlan_led); } error: @@ -813,6 +886,9 @@ static int asus_new_rfkill(struct asus_wmi *asus, if (!*rfkill) return -EINVAL; + if (dev_id == ASUS_WMI_DEVID_WLAN) + rfkill_set_led_trigger_name(*rfkill, "asus-wlan"); + rfkill_init_sw_state(*rfkill, !result); result = rfkill_register(*rfkill); if (result) { @@ -1265,6 +1341,18 @@ static void asus_wmi_backlight_exit(struct asus_wmi *asus) asus->backlight_device = NULL; } +static int is_display_toggle(int code) +{ + /* display toggle keys */ + if ((code >= 0x61 && code <= 0x67) || + (code >= 0x8c && code <= 0x93) || + (code >= 0xa0 && code <= 0xa7) || + (code >= 0xd0 && code <= 0xd5)) + return 1; + + return 0; +} + static void asus_wmi_notify(u32 value, void *context) { struct asus_wmi *asus = context; @@ -1298,16 +1386,24 @@ static void asus_wmi_notify(u32 value, void *context) } if (code >= NOTIFY_BRNUP_MIN && code <= NOTIFY_BRNUP_MAX) - code = NOTIFY_BRNUP_MIN; + code = ASUS_WMI_BRN_UP; else if (code >= NOTIFY_BRNDOWN_MIN && code <= NOTIFY_BRNDOWN_MAX) - code = NOTIFY_BRNDOWN_MIN; + code = ASUS_WMI_BRN_DOWN; - if (code == NOTIFY_BRNUP_MIN || code == NOTIFY_BRNDOWN_MIN) { - if (!acpi_video_backlight_support()) + if (code == ASUS_WMI_BRN_DOWN || code == ASUS_WMI_BRN_UP) { + if (!acpi_video_backlight_support()) { asus_wmi_backlight_notify(asus, orig_code); - } else if (!sparse_keymap_report_event(asus->inputdev, code, - key_value, autorelease)) + goto exit; + } + } + + if (is_display_toggle(code) && + asus->driver->quirks->no_display_toggle) + goto exit; + + if (!sparse_keymap_report_event(asus->inputdev, code, + key_value, autorelease)) pr_info("Unknown key %x pressed\n", code); exit: diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 4c9bd38..4da4c8b 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -30,6 +30,8 @@ #include <linux/platform_device.h> #define ASUS_WMI_KEY_IGNORE (-1) +#define ASUS_WMI_BRN_DOWN 0x20 +#define ASUS_WMI_BRN_UP 0x2f struct module; struct key_entry; @@ -41,6 +43,13 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_power; int wapf; + /* + * For machines with AMD graphic chips, it will send out WMI event + * and ACPI interrupt at the same time while hitting the hotkey. + * To simplify the problem, we just have to ignore the WMI event, + * and let the ACPI interrupt to send out the key event. + */ + int no_display_toggle; }; struct asus_wmi_driver { diff --git a/drivers/platform/x86/chromeos_laptop.c b/drivers/platform/x86/chromeos_laptop.c new file mode 100644 index 0000000..93d6680 --- /dev/null +++ b/drivers/platform/x86/chromeos_laptop.c @@ -0,0 +1,371 @@ +/* + * chromeos_laptop.c - Driver to instantiate Chromebook i2c/smbus devices. + * + * Author : Benson Leung <bleung@chromium.org> + * + * Copyright (C) 2012 Google, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/dmi.h> +#include <linux/i2c.h> +#include <linux/module.h> + +#define ATMEL_TP_I2C_ADDR 0x4b +#define ATMEL_TP_I2C_BL_ADDR 0x25 +#define ATMEL_TS_I2C_ADDR 0x4a +#define ATMEL_TS_I2C_BL_ADDR 0x26 +#define CYAPA_TP_I2C_ADDR 0x67 +#define ISL_ALS_I2C_ADDR 0x44 +#define TAOS_ALS_I2C_ADDR 0x29 + +static struct i2c_client *als; +static struct i2c_client *tp; +static struct i2c_client *ts; + +const char *i2c_adapter_names[] = { + "SMBus I801 adapter", + "i915 gmbus vga", + "i915 gmbus panel", +}; + +/* Keep this enum consistent with i2c_adapter_names */ +enum i2c_adapter_type { + I2C_ADAPTER_SMBUS = 0, + I2C_ADAPTER_VGADDC, + I2C_ADAPTER_PANEL, +}; + +static struct i2c_board_info __initdata cyapa_device = { + I2C_BOARD_INFO("cyapa", CYAPA_TP_I2C_ADDR), + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_board_info __initdata isl_als_device = { + I2C_BOARD_INFO("isl29018", ISL_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata tsl2583_als_device = { + I2C_BOARD_INFO("tsl2583", TAOS_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata tsl2563_als_device = { + I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR), +}; + +static struct i2c_board_info __initdata atmel_224s_tp_device = { + I2C_BOARD_INFO("atmel_mxt_tp", ATMEL_TP_I2C_ADDR), + .platform_data = NULL, + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_board_info __initdata atmel_1664s_device = { + I2C_BOARD_INFO("atmel_mxt_ts", ATMEL_TS_I2C_ADDR), + .platform_data = NULL, + .flags = I2C_CLIENT_WAKE, +}; + +static struct i2c_client __init *__add_probed_i2c_device( + const char *name, + int bus, + struct i2c_board_info *info, + const unsigned short *addrs) +{ + const struct dmi_device *dmi_dev; + const struct dmi_dev_onboard *dev_data; + struct i2c_adapter *adapter; + struct i2c_client *client; + + if (bus < 0) + return NULL; + /* + * If a name is specified, look for irq platform information stashed + * in DMI_DEV_TYPE_DEV_ONBOARD by the Chrome OS custom system firmware. + */ + if (name) { + dmi_dev = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, name, NULL); + if (!dmi_dev) { + pr_err("%s failed to dmi find device %s.\n", + __func__, + name); + return NULL; + } + dev_data = (struct dmi_dev_onboard *)dmi_dev->device_data; + if (!dev_data) { + pr_err("%s failed to get data from dmi for %s.\n", + __func__, name); + return NULL; + } + info->irq = dev_data->instance; + } + + adapter = i2c_get_adapter(bus); + if (!adapter) { + pr_err("%s failed to get i2c adapter %d.\n", __func__, bus); + return NULL; + } + + /* add the i2c device */ + client = i2c_new_probed_device(adapter, info, addrs, NULL); + if (!client) + pr_err("%s failed to register device %d-%02x\n", + __func__, bus, info->addr); + else + pr_debug("%s added i2c device %d-%02x\n", + __func__, bus, info->addr); + + i2c_put_adapter(adapter); + return client; +} + +static int __init __find_i2c_adap(struct device *dev, void *data) +{ + const char *name = data; + static const char *prefix = "i2c-"; + struct i2c_adapter *adapter; + if (strncmp(dev_name(dev), prefix, strlen(prefix)) != 0) + return 0; + adapter = to_i2c_adapter(dev); + return (strncmp(adapter->name, name, strlen(name)) == 0); +} + +static int __init find_i2c_adapter_num(enum i2c_adapter_type type) +{ + struct device *dev = NULL; + struct i2c_adapter *adapter; + const char *name = i2c_adapter_names[type]; + /* find the adapter by name */ + dev = bus_find_device(&i2c_bus_type, NULL, (void *)name, + __find_i2c_adap); + if (!dev) { + pr_err("%s: i2c adapter %s not found on system.\n", __func__, + name); + return -ENODEV; + } + adapter = to_i2c_adapter(dev); + return adapter->nr; +} + +/* + * Takes a list of addresses in addrs as such : + * { addr1, ... , addrn, I2C_CLIENT_END }; + * add_probed_i2c_device will use i2c_new_probed_device + * and probe for devices at all of the addresses listed. + * Returns NULL if no devices found. + * See Documentation/i2c/instantiating-devices for more information. + */ +static __init struct i2c_client *add_probed_i2c_device( + const char *name, + enum i2c_adapter_type type, + struct i2c_board_info *info, + const unsigned short *addrs) +{ + return __add_probed_i2c_device(name, + find_i2c_adapter_num(type), + info, + addrs); +} + +/* + * Probes for a device at a single address, the one provided by + * info->addr. + * Returns NULL if no device found. + */ +static __init struct i2c_client *add_i2c_device(const char *name, + enum i2c_adapter_type type, + struct i2c_board_info *info) +{ + const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END }; + return __add_probed_i2c_device(name, + find_i2c_adapter_num(type), + info, + addr_list); +} + + +static struct i2c_client __init *add_smbus_device(const char *name, + struct i2c_board_info *info) +{ + return add_i2c_device(name, I2C_ADAPTER_SMBUS, info); +} + +static int __init setup_cyapa_smbus_tp(const struct dmi_system_id *id) +{ + /* add cyapa touchpad on smbus */ + tp = add_smbus_device("trackpad", &cyapa_device); + return 0; +} + +static int __init setup_atmel_224s_tp(const struct dmi_system_id *id) +{ + const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR, + ATMEL_TP_I2C_ADDR, + I2C_CLIENT_END }; + + /* add atmel mxt touchpad on VGA DDC GMBus */ + tp = add_probed_i2c_device("trackpad", I2C_ADAPTER_VGADDC, + &atmel_224s_tp_device, addr_list); + return 0; +} + +static int __init setup_atmel_1664s_ts(const struct dmi_system_id *id) +{ + const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR, + ATMEL_TS_I2C_ADDR, + I2C_CLIENT_END }; + + /* add atmel mxt touch device on PANEL GMBus */ + ts = add_probed_i2c_device("touchscreen", I2C_ADAPTER_PANEL, + &atmel_1664s_device, addr_list); + return 0; +} + + +static int __init setup_isl29018_als(const struct dmi_system_id *id) +{ + /* add isl29018 light sensor */ + als = add_smbus_device("lightsensor", &isl_als_device); + return 0; +} + +static int __init setup_isl29023_als(const struct dmi_system_id *id) +{ + /* add isl29023 light sensor on Panel GMBus */ + als = add_i2c_device("lightsensor", I2C_ADAPTER_PANEL, + &isl_als_device); + return 0; +} + +static int __init setup_tsl2583_als(const struct dmi_system_id *id) +{ + /* add tsl2583 light sensor on smbus */ + als = add_smbus_device(NULL, &tsl2583_als_device); + return 0; +} + +static int __init setup_tsl2563_als(const struct dmi_system_id *id) +{ + /* add tsl2563 light sensor on smbus */ + als = add_smbus_device(NULL, &tsl2563_als_device); + return 0; +} + +static struct dmi_system_id __initdata chromeos_laptop_dmi_table[] = { + { + .ident = "Samsung Series 5 550 - Touchpad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "Chromebook Pixel - Touchscreen", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_atmel_1664s_ts, + }, + { + .ident = "Chromebook Pixel - Touchpad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_atmel_224s_tp, + }, + { + .ident = "Samsung Series 5 550 - Light Sensor", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG"), + DMI_MATCH(DMI_PRODUCT_NAME, "Lumpy"), + }, + .callback = setup_isl29018_als, + }, + { + .ident = "Chromebook Pixel - Light Sensor", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Link"), + }, + .callback = setup_isl29023_als, + }, + { + .ident = "Acer C7 Chromebook - Touchpad", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Parrot"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "HP Pavilion 14 Chromebook - Touchpad", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Butterfly"), + }, + .callback = setup_cyapa_smbus_tp, + }, + { + .ident = "Samsung Series 5 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Alex"), + }, + .callback = setup_tsl2583_als, + }, + { + .ident = "Cr-48 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Mario"), + }, + .callback = setup_tsl2563_als, + }, + { + .ident = "Acer AC700 - Light Sensor", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "ZGB"), + }, + .callback = setup_tsl2563_als, + }, + { } +}; +MODULE_DEVICE_TABLE(dmi, chromeos_laptop_dmi_table); + +static int __init chromeos_laptop_init(void) +{ + if (!dmi_check_system(chromeos_laptop_dmi_table)) { + pr_debug("%s unsupported system.\n", __func__); + return -ENODEV; + } + return 0; +} + +static void __exit chromeos_laptop_exit(void) +{ + if (als) + i2c_unregister_device(als); + if (tp) + i2c_unregister_device(tp); + if (ts) + i2c_unregister_device(ts); +} + +module_init(chromeos_laptop_init); +module_exit(chromeos_laptop_exit); + +MODULE_DESCRIPTION("Chrome OS Laptop driver"); +MODULE_AUTHOR("Benson Leung <bleung@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index 60cb76a..af67e6e 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -63,6 +63,8 @@ MODULE_PARM_DESC(hotplug_wireless, #define HOME_RELEASE 0xe5 static const struct key_entry eeepc_wmi_keymap[] = { + { KE_KEY, ASUS_WMI_BRN_DOWN, { KEY_BRIGHTNESSDOWN } }, + { KE_KEY, ASUS_WMI_BRN_UP, { KEY_BRIGHTNESSUP } }, /* Sleep already handled via generic ACPI code */ { KE_KEY, 0x30, { KEY_VOLUMEUP } }, { KE_KEY, 0x31, { KEY_VOLUMEDOWN } }, diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 1dde7ac..45cacf7 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -60,6 +60,7 @@ enum hp_wmi_radio { HPWMI_WIFI = 0, HPWMI_BLUETOOTH = 1, HPWMI_WWAN = 2, + HPWMI_GPS = 3, }; enum hp_wmi_event_ids { @@ -72,10 +73,6 @@ enum hp_wmi_event_ids { HPWMI_LOCK_SWITCH = 7, }; -static int hp_wmi_bios_setup(struct platform_device *device); -static int __exit hp_wmi_bios_remove(struct platform_device *device); -static int hp_wmi_resume_handler(struct device *device); - struct bios_args { u32 signature; u32 command; @@ -137,6 +134,7 @@ static const struct key_entry hp_wmi_keymap[] = { { KE_KEY, 0x2142, { KEY_MEDIA } }, { KE_KEY, 0x213b, { KEY_INFO } }, { KE_KEY, 0x2169, { KEY_DIRECTION } }, + { KE_KEY, 0x216a, { KEY_SETUP } }, { KE_KEY, 0x231b, { KEY_HELP } }, { KE_END, 0 } }; @@ -147,6 +145,7 @@ static struct platform_device *hp_wmi_platform_dev; static struct rfkill *wifi_rfkill; static struct rfkill *bluetooth_rfkill; static struct rfkill *wwan_rfkill; +static struct rfkill *gps_rfkill; struct rfkill2_device { u8 id; @@ -157,21 +156,6 @@ struct rfkill2_device { static int rfkill2_count; static struct rfkill2_device rfkill2[HPWMI_MAX_RFKILL2_DEVICES]; -static const struct dev_pm_ops hp_wmi_pm_ops = { - .resume = hp_wmi_resume_handler, - .restore = hp_wmi_resume_handler, -}; - -static struct platform_driver hp_wmi_driver = { - .driver = { - .name = "hp-wmi", - .owner = THIS_MODULE, - .pm = &hp_wmi_pm_ops, - }, - .probe = hp_wmi_bios_setup, - .remove = hp_wmi_bios_remove, -}; - /* * hp_wmi_perform_query * @@ -543,6 +527,10 @@ static void hp_wmi_notify(u32 value, void *context) rfkill_set_states(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN), hp_wmi_get_hw_state(HPWMI_WWAN)); + if (gps_rfkill) + rfkill_set_states(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS), + hp_wmi_get_hw_state(HPWMI_GPS)); break; case HPWMI_CPU_BATTERY_THROTTLE: pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n"); @@ -670,7 +658,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) (void *) HPWMI_WWAN); if (!wwan_rfkill) { err = -ENOMEM; - goto register_bluetooth_error; + goto register_gps_error; } rfkill_init_sw_state(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN)); @@ -681,10 +669,33 @@ static int hp_wmi_rfkill_setup(struct platform_device *device) goto register_wwan_err; } + if (wireless & 0x8) { + gps_rfkill = rfkill_alloc("hp-gps", &device->dev, + RFKILL_TYPE_GPS, + &hp_wmi_rfkill_ops, + (void *) HPWMI_GPS); + if (!gps_rfkill) { + err = -ENOMEM; + goto register_bluetooth_error; + } + rfkill_init_sw_state(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS)); + rfkill_set_hw_state(bluetooth_rfkill, + hp_wmi_get_hw_state(HPWMI_GPS)); + err = rfkill_register(gps_rfkill); + if (err) + goto register_gps_error; + } + return 0; register_wwan_err: rfkill_destroy(wwan_rfkill); wwan_rfkill = NULL; + if (gps_rfkill) + rfkill_unregister(gps_rfkill); +register_gps_error: + rfkill_destroy(gps_rfkill); + gps_rfkill = NULL; if (bluetooth_rfkill) rfkill_unregister(bluetooth_rfkill); register_bluetooth_error: @@ -729,6 +740,10 @@ static int hp_wmi_rfkill2_setup(struct platform_device *device) type = RFKILL_TYPE_WWAN; name = "hp-wwan"; break; + case HPWMI_GPS: + type = RFKILL_TYPE_GPS; + name = "hp-gps"; + break; default: pr_warn("unknown device type 0x%x\n", state.device[i].radio_type); @@ -778,7 +793,7 @@ fail: return err; } -static int hp_wmi_bios_setup(struct platform_device *device) +static int __init hp_wmi_bios_setup(struct platform_device *device) { int err; @@ -786,6 +801,7 @@ static int hp_wmi_bios_setup(struct platform_device *device) wifi_rfkill = NULL; bluetooth_rfkill = NULL; wwan_rfkill = NULL; + gps_rfkill = NULL; rfkill2_count = 0; if (hp_wmi_rfkill_setup(device)) @@ -835,6 +851,10 @@ static int __exit hp_wmi_bios_remove(struct platform_device *device) rfkill_unregister(wwan_rfkill); rfkill_destroy(wwan_rfkill); } + if (gps_rfkill) { + rfkill_unregister(gps_rfkill); + rfkill_destroy(gps_rfkill); + } return 0; } @@ -870,51 +890,70 @@ static int hp_wmi_resume_handler(struct device *device) rfkill_set_states(wwan_rfkill, hp_wmi_get_sw_state(HPWMI_WWAN), hp_wmi_get_hw_state(HPWMI_WWAN)); + if (gps_rfkill) + rfkill_set_states(gps_rfkill, + hp_wmi_get_sw_state(HPWMI_GPS), + hp_wmi_get_hw_state(HPWMI_GPS)); return 0; } +static const struct dev_pm_ops hp_wmi_pm_ops = { + .resume = hp_wmi_resume_handler, + .restore = hp_wmi_resume_handler, +}; + +static struct platform_driver hp_wmi_driver = { + .driver = { + .name = "hp-wmi", + .owner = THIS_MODULE, + .pm = &hp_wmi_pm_ops, + }, + .remove = __exit_p(hp_wmi_bios_remove), +}; + static int __init hp_wmi_init(void) { int err; int event_capable = wmi_has_guid(HPWMI_EVENT_GUID); int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID); + if (!bios_capable && !event_capable) + return -ENODEV; + if (event_capable) { err = hp_wmi_input_setup(); if (err) return err; + + //Enable magic for hotkeys that run on the SMBus + ec_write(0xe6,0x6e); } if (bios_capable) { - err = platform_driver_register(&hp_wmi_driver); - if (err) - goto err_driver_reg; - hp_wmi_platform_dev = platform_device_alloc("hp-wmi", -1); - if (!hp_wmi_platform_dev) { - err = -ENOMEM; - goto err_device_alloc; + hp_wmi_platform_dev = + platform_device_register_simple("hp-wmi", -1, NULL, 0); + if (IS_ERR(hp_wmi_platform_dev)) { + err = PTR_ERR(hp_wmi_platform_dev); + goto err_destroy_input; } - err = platform_device_add(hp_wmi_platform_dev); + + err = platform_driver_probe(&hp_wmi_driver, hp_wmi_bios_setup); if (err) - goto err_device_add; + goto err_unregister_device; } - if (!bios_capable && !event_capable) - return -ENODEV; - return 0; -err_device_add: - platform_device_put(hp_wmi_platform_dev); -err_device_alloc: - platform_driver_unregister(&hp_wmi_driver); -err_driver_reg: +err_unregister_device: + platform_device_unregister(hp_wmi_platform_dev); +err_destroy_input: if (event_capable) hp_wmi_input_destroy(); return err; } +module_init(hp_wmi_init); static void __exit hp_wmi_exit(void) { @@ -926,6 +965,4 @@ static void __exit hp_wmi_exit(void) platform_driver_unregister(&hp_wmi_driver); } } - -module_init(hp_wmi_init); module_exit(hp_wmi_exit); diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 2111dbb..6b22938 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -82,8 +82,19 @@ #define MSI_STANDARD_EC_SCM_LOAD_ADDRESS 0x2d #define MSI_STANDARD_EC_SCM_LOAD_MASK (1 << 0) -#define MSI_STANDARD_EC_TOUCHPAD_ADDRESS 0xe4 +#define MSI_STANDARD_EC_FUNCTIONS_ADDRESS 0xe4 +/* Power LED is orange - Turbo mode */ +#define MSI_STANDARD_EC_TURBO_MASK (1 << 1) +/* Power LED is green - ECO mode */ +#define MSI_STANDARD_EC_ECO_MASK (1 << 3) +/* Touchpad is turned on */ #define MSI_STANDARD_EC_TOUCHPAD_MASK (1 << 4) +/* If this bit != bit 1, turbo mode can't be toggled */ +#define MSI_STANDARD_EC_TURBO_COOLDOWN_MASK (1 << 7) + +#define MSI_STANDARD_EC_FAN_ADDRESS 0x33 +/* If zero, fan rotates at maximal speed */ +#define MSI_STANDARD_EC_AUTOFAN_MASK (1 << 0) #ifdef CONFIG_PM_SLEEP static int msi_laptop_resume(struct device *device); @@ -108,23 +119,38 @@ static const struct key_entry msi_laptop_keymap[] = { static struct input_dev *msi_laptop_input_dev; -static bool old_ec_model; static int wlan_s, bluetooth_s, threeg_s; static int threeg_exists; - -/* Some MSI 3G netbook only have one fn key to control Wlan/Bluetooth/3G, - * those netbook will load the SCM (windows app) to disable the original - * Wlan/Bluetooth control by BIOS when user press fn key, then control - * Wlan/Bluetooth/3G by SCM (software control by OS). Without SCM, user - * cann't on/off 3G module on those 3G netbook. - * On Linux, msi-laptop driver will do the same thing to disable the - * original BIOS control, then might need use HAL or other userland - * application to do the software control that simulate with SCM. - * e.g. MSI N034 netbook - */ -static bool load_scm_model; static struct rfkill *rfk_wlan, *rfk_bluetooth, *rfk_threeg; +/* MSI laptop quirks */ +struct quirk_entry { + bool old_ec_model; + + /* Some MSI 3G netbook only have one fn key to control + * Wlan/Bluetooth/3G, those netbook will load the SCM (windows app) to + * disable the original Wlan/Bluetooth control by BIOS when user press + * fn key, then control Wlan/Bluetooth/3G by SCM (software control by + * OS). Without SCM, user cann't on/off 3G module on those 3G netbook. + * On Linux, msi-laptop driver will do the same thing to disable the + * original BIOS control, then might need use HAL or other userland + * application to do the software control that simulate with SCM. + * e.g. MSI N034 netbook + */ + bool load_scm_model; + + /* Some MSI laptops need delay before reading from EC */ + bool ec_delay; + + /* Some MSI Wind netbooks (e.g. MSI Wind U100) need loading SCM to get + * some features working (e.g. ECO mode), but we cannot change + * Wlan/Bluetooth state in software and we can only read its state. + */ + bool ec_read_only; +}; + +static struct quirk_entry *quirks; + /* Hardware access */ static int set_lcd_level(int level) @@ -195,10 +221,13 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask) if (sscanf(buf, "%i", &status) != 1 || (status < 0 || status > 1)) return -EINVAL; + if (quirks->ec_read_only) + return -EOPNOTSUPP; + /* read current device state */ result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata); if (result < 0) - return -EINVAL; + return result; if (!!(rdata & mask) != status) { /* reverse device bit */ @@ -209,7 +238,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask) result = ec_write(MSI_STANDARD_EC_COMMAND_ADDRESS, wdata); if (result < 0) - return -EINVAL; + return result; } return count; @@ -222,7 +251,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) - return -1; + return result; if (wlan) *wlan = !!(rdata & 8); @@ -240,7 +269,7 @@ static int get_wireless_state_ec_standard(void) result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata); if (result < 0) - return -1; + return result; wlan_s = !!(rdata & MSI_STANDARD_EC_WLAN_MASK); @@ -258,7 +287,7 @@ static int get_threeg_exists(void) result = ec_read(MSI_STANDARD_EC_DEVICES_EXISTS_ADDRESS, &rdata); if (result < 0) - return -1; + return result; threeg_exists = !!(rdata & MSI_STANDARD_EC_3G_MASK); @@ -291,9 +320,9 @@ static ssize_t show_wlan(struct device *dev, struct device_attribute *attr, char *buf) { - int ret, enabled; + int ret, enabled = 0; - if (old_ec_model) { + if (quirks->old_ec_model) { ret = get_wireless_state(&enabled, NULL); } else { ret = get_wireless_state_ec_standard(); @@ -315,9 +344,9 @@ static ssize_t show_bluetooth(struct device *dev, struct device_attribute *attr, char *buf) { - int ret, enabled; + int ret, enabled = 0; - if (old_ec_model) { + if (quirks->old_ec_model) { ret = get_wireless_state(NULL, &enabled); } else { ret = get_wireless_state_ec_standard(); @@ -342,8 +371,8 @@ static ssize_t show_threeg(struct device *dev, int ret; /* old msi ec not support 3G */ - if (old_ec_model) - return -1; + if (quirks->old_ec_model) + return -ENODEV; ret = get_wireless_state_ec_standard(); if (ret < 0) @@ -417,18 +446,119 @@ static ssize_t store_auto_brightness(struct device *dev, return count; } +static ssize_t show_touchpad(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TOUCHPAD_MASK)); +} + +static ssize_t show_turbo(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_TURBO_MASK)); +} + +static ssize_t show_eco(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_ECO_MASK)); +} + +static ssize_t show_turbo_cooldown(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", (!!(rdata & MSI_STANDARD_EC_TURBO_MASK)) | + (!!(rdata & MSI_STANDARD_EC_TURBO_COOLDOWN_MASK) << 1)); +} + +static ssize_t show_auto_fan(struct device *dev, + struct device_attribute *attr, char *buf) +{ + + u8 rdata; + int result; + + result = ec_read(MSI_STANDARD_EC_FAN_ADDRESS, &rdata); + if (result < 0) + return result; + + return sprintf(buf, "%i\n", !!(rdata & MSI_STANDARD_EC_AUTOFAN_MASK)); +} + +static ssize_t store_auto_fan(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + + int enable, result; + + if (sscanf(buf, "%i", &enable) != 1 || (enable != (enable & 1))) + return -EINVAL; + + result = ec_write(MSI_STANDARD_EC_FAN_ADDRESS, enable); + if (result < 0) + return result; + + return count; +} + static DEVICE_ATTR(lcd_level, 0644, show_lcd_level, store_lcd_level); static DEVICE_ATTR(auto_brightness, 0644, show_auto_brightness, store_auto_brightness); static DEVICE_ATTR(bluetooth, 0444, show_bluetooth, NULL); static DEVICE_ATTR(wlan, 0444, show_wlan, NULL); static DEVICE_ATTR(threeg, 0444, show_threeg, NULL); +static DEVICE_ATTR(touchpad, 0444, show_touchpad, NULL); +static DEVICE_ATTR(turbo_mode, 0444, show_turbo, NULL); +static DEVICE_ATTR(eco_mode, 0444, show_eco, NULL); +static DEVICE_ATTR(turbo_cooldown, 0444, show_turbo_cooldown, NULL); +static DEVICE_ATTR(auto_fan, 0644, show_auto_fan, store_auto_fan); static struct attribute *msipf_attributes[] = { - &dev_attr_lcd_level.attr, - &dev_attr_auto_brightness.attr, &dev_attr_bluetooth.attr, &dev_attr_wlan.attr, + &dev_attr_touchpad.attr, + &dev_attr_turbo_mode.attr, + &dev_attr_eco_mode.attr, + &dev_attr_turbo_cooldown.attr, + &dev_attr_auto_fan.attr, + NULL +}; + +static struct attribute *msipf_old_attributes[] = { + &dev_attr_lcd_level.attr, + &dev_attr_auto_brightness.attr, NULL }; @@ -436,6 +566,10 @@ static struct attribute_group msipf_attribute_group = { .attrs = msipf_attributes }; +static struct attribute_group msipf_old_attribute_group = { + .attrs = msipf_old_attributes +}; + static struct platform_driver msipf_driver = { .driver = { .name = "msi-laptop-pf", @@ -448,9 +582,26 @@ static struct platform_device *msipf_device; /* Initialization */ -static int dmi_check_cb(const struct dmi_system_id *id) +static struct quirk_entry quirk_old_ec_model = { + .old_ec_model = true, +}; + +static struct quirk_entry quirk_load_scm_model = { + .load_scm_model = true, + .ec_delay = true, +}; + +static struct quirk_entry quirk_load_scm_ro_model = { + .load_scm_model = true, + .ec_read_only = true, +}; + +static int dmi_check_cb(const struct dmi_system_id *dmi) { - pr_info("Identified laptop model '%s'\n", id->ident); + pr_info("Identified laptop model '%s'\n", dmi->ident); + + quirks = dmi->driver_data; + return 1; } @@ -464,6 +615,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT'L CO.,LTD") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -474,6 +626,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "0581"), DMI_MATCH(DMI_BOARD_NAME, "MS-1058") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -484,6 +637,7 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), DMI_MATCH(DMI_BOARD_NAME, "MS-1412") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, { @@ -495,12 +649,9 @@ static struct dmi_system_id __initdata msi_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INT'L CO.,LTD") }, + .driver_data = &quirk_old_ec_model, .callback = dmi_check_cb }, - { } -}; - -static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { { .ident = "MSI N034", .matches = { @@ -510,6 +661,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD") }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -521,6 +673,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD") }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -530,6 +683,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "MICRO-STAR INTERNATIONAL CO., LTD"), DMI_MATCH(DMI_PRODUCT_NAME, "MS-N014"), }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -539,6 +693,7 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "Micro-Star International"), DMI_MATCH(DMI_PRODUCT_NAME, "CR620"), }, + .driver_data = &quirk_load_scm_model, .callback = dmi_check_cb }, { @@ -548,6 +703,17 @@ static struct dmi_system_id __initdata msi_load_scm_models_dmi_table[] = { "Micro-Star International Co., Ltd."), DMI_MATCH(DMI_PRODUCT_NAME, "U270 series"), }, + .driver_data = &quirk_load_scm_model, + .callback = dmi_check_cb + }, + { + .ident = "MSI U90/U100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U90/U100"), + }, + .driver_data = &quirk_load_scm_ro_model, .callback = dmi_check_cb }, { } @@ -560,32 +726,26 @@ static int rfkill_bluetooth_set(void *data, bool blocked) * blocked == false is on * blocked == true is off */ - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_BLUETOOTH_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_BLUETOOTH_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_BLUETOOTH_MASK); - return 0; + return min(result, 0); } static int rfkill_wlan_set(void *data, bool blocked) { - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_WLAN_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_WLAN_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_WLAN_MASK); - return 0; + return min(result, 0); } static int rfkill_threeg_set(void *data, bool blocked) { - if (blocked) - set_device_state("0", 0, MSI_STANDARD_EC_3G_MASK); - else - set_device_state("1", 0, MSI_STANDARD_EC_3G_MASK); + int result = set_device_state(blocked ? "0" : "1", 0, + MSI_STANDARD_EC_3G_MASK); - return 0; + return min(result, 0); } static const struct rfkill_ops rfkill_bluetooth_ops = { @@ -618,25 +778,34 @@ static void rfkill_cleanup(void) } } +static bool msi_rfkill_set_state(struct rfkill *rfkill, bool blocked) +{ + if (quirks->ec_read_only) + return rfkill_set_hw_state(rfkill, blocked); + else + return rfkill_set_sw_state(rfkill, blocked); +} + static void msi_update_rfkill(struct work_struct *ignored) { get_wireless_state_ec_standard(); if (rfk_wlan) - rfkill_set_sw_state(rfk_wlan, !wlan_s); + msi_rfkill_set_state(rfk_wlan, !wlan_s); if (rfk_bluetooth) - rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s); + msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s); if (rfk_threeg) - rfkill_set_sw_state(rfk_threeg, !threeg_s); + msi_rfkill_set_state(rfk_threeg, !threeg_s); } -static DECLARE_DELAYED_WORK(msi_rfkill_work, msi_update_rfkill); +static DECLARE_DELAYED_WORK(msi_rfkill_dwork, msi_update_rfkill); +static DECLARE_WORK(msi_rfkill_work, msi_update_rfkill); static void msi_send_touchpad_key(struct work_struct *ignored) { u8 rdata; int result; - result = ec_read(MSI_STANDARD_EC_TOUCHPAD_ADDRESS, &rdata); + result = ec_read(MSI_STANDARD_EC_FUNCTIONS_ADDRESS, &rdata); if (result < 0) return; @@ -644,7 +813,8 @@ static void msi_send_touchpad_key(struct work_struct *ignored) (rdata & MSI_STANDARD_EC_TOUCHPAD_MASK) ? KEY_TOUCHPAD_ON : KEY_TOUCHPAD_OFF, 1, true); } -static DECLARE_DELAYED_WORK(msi_touchpad_work, msi_send_touchpad_key); +static DECLARE_DELAYED_WORK(msi_touchpad_dwork, msi_send_touchpad_key); +static DECLARE_WORK(msi_touchpad_work, msi_send_touchpad_key); static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str, struct serio *port) @@ -662,14 +832,20 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str, extended = false; switch (data) { case 0xE4: - schedule_delayed_work(&msi_touchpad_work, - round_jiffies_relative(0.5 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_touchpad_dwork, + round_jiffies_relative(0.5 * HZ)); + } else + schedule_work(&msi_touchpad_work); break; case 0x54: case 0x62: case 0x76: - schedule_delayed_work(&msi_rfkill_work, - round_jiffies_relative(0.5 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_rfkill_dwork, + round_jiffies_relative(0.5 * HZ)); + } else + schedule_work(&msi_rfkill_work); break; } } @@ -736,8 +912,11 @@ static int rfkill_init(struct platform_device *sdev) } /* schedule to run rfkill state initial */ - schedule_delayed_work(&msi_rfkill_init, - round_jiffies_relative(1 * HZ)); + if (quirks->ec_delay) { + schedule_delayed_work(&msi_rfkill_init, + round_jiffies_relative(1 * HZ)); + } else + schedule_work(&msi_rfkill_work); return 0; @@ -761,7 +940,7 @@ static int msi_laptop_resume(struct device *device) u8 data; int result; - if (!load_scm_model) + if (!quirks->load_scm_model) return 0; /* set load SCM to disable hardware control by fn key */ @@ -819,13 +998,15 @@ static int __init load_scm_model_init(struct platform_device *sdev) u8 data; int result; - /* allow userland write sysfs file */ - dev_attr_bluetooth.store = store_bluetooth; - dev_attr_wlan.store = store_wlan; - dev_attr_threeg.store = store_threeg; - dev_attr_bluetooth.attr.mode |= S_IWUSR; - dev_attr_wlan.attr.mode |= S_IWUSR; - dev_attr_threeg.attr.mode |= S_IWUSR; + if (!quirks->ec_read_only) { + /* allow userland write sysfs file */ + dev_attr_bluetooth.store = store_bluetooth; + dev_attr_wlan.store = store_wlan; + dev_attr_threeg.store = store_threeg; + dev_attr_bluetooth.attr.mode |= S_IWUSR; + dev_attr_wlan.attr.mode |= S_IWUSR; + dev_attr_threeg.attr.mode |= S_IWUSR; + } /* disable hardware control by fn key */ result = ec_read(MSI_STANDARD_EC_SCM_LOAD_ADDRESS, &data); @@ -874,21 +1055,22 @@ static int __init msi_init(void) if (acpi_disabled) return -ENODEV; - if (force || dmi_check_system(msi_dmi_table)) - old_ec_model = 1; + dmi_check_system(msi_dmi_table); + if (!quirks) + /* quirks may be NULL if no match in DMI table */ + quirks = &quirk_load_scm_model; + if (force) + quirks = &quirk_old_ec_model; - if (!old_ec_model) + if (!quirks->old_ec_model) get_threeg_exists(); - if (!old_ec_model && dmi_check_system(msi_load_scm_models_dmi_table)) - load_scm_model = 1; - if (auto_brightness < 0 || auto_brightness > 2) return -EINVAL; /* Register backlight stuff */ - if (acpi_video_backlight_support()) { + if (!quirks->old_ec_model || acpi_video_backlight_support()) { pr_info("Brightness ignored, must be controlled by ACPI video driver\n"); } else { struct backlight_properties props; @@ -918,7 +1100,7 @@ static int __init msi_init(void) if (ret) goto fail_platform_device1; - if (load_scm_model && (load_scm_model_init(msipf_device) < 0)) { + if (quirks->load_scm_model && (load_scm_model_init(msipf_device) < 0)) { ret = -EINVAL; goto fail_platform_device1; } @@ -928,20 +1110,25 @@ static int __init msi_init(void) if (ret) goto fail_platform_device2; - if (!old_ec_model) { + if (!quirks->old_ec_model) { if (threeg_exists) ret = device_create_file(&msipf_device->dev, &dev_attr_threeg); if (ret) goto fail_platform_device2; - } + } else { + ret = sysfs_create_group(&msipf_device->dev.kobj, + &msipf_old_attribute_group); + if (ret) + goto fail_platform_device2; - /* Disable automatic brightness control by default because - * this module was probably loaded to do brightness control in - * software. */ + /* Disable automatic brightness control by default because + * this module was probably loaded to do brightness control in + * software. */ - if (auto_brightness != 2) - set_auto_brightness(auto_brightness); + if (auto_brightness != 2) + set_auto_brightness(auto_brightness); + } pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n"); @@ -949,9 +1136,10 @@ static int __init msi_init(void) fail_platform_device2: - if (load_scm_model) { + if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); - cancel_delayed_work_sync(&msi_rfkill_work); + cancel_delayed_work_sync(&msi_rfkill_dwork); + cancel_work_sync(&msi_rfkill_work); rfkill_cleanup(); } platform_device_del(msipf_device); @@ -973,23 +1161,26 @@ fail_backlight: static void __exit msi_cleanup(void) { - if (load_scm_model) { + if (quirks->load_scm_model) { i8042_remove_filter(msi_laptop_i8042_filter); msi_laptop_input_destroy(); - cancel_delayed_work_sync(&msi_rfkill_work); + cancel_delayed_work_sync(&msi_rfkill_dwork); + cancel_work_sync(&msi_rfkill_work); rfkill_cleanup(); } sysfs_remove_group(&msipf_device->dev.kobj, &msipf_attribute_group); - if (!old_ec_model && threeg_exists) + if (!quirks->old_ec_model && threeg_exists) device_remove_file(&msipf_device->dev, &dev_attr_threeg); platform_device_unregister(msipf_device); platform_driver_unregister(&msipf_driver); backlight_device_unregister(msibl_device); - /* Enable automatic brightness control again */ - if (auto_brightness != 2) - set_auto_brightness(1); + if (quirks->old_ec_model) { + /* Enable automatic brightness control again */ + if (auto_brightness != 2) + set_auto_brightness(1); + } pr_info("driver unloaded\n"); } @@ -1011,3 +1202,4 @@ MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N051:*"); MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnMS-N014:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnCR620:*"); MODULE_ALIAS("dmi:*:svnMicro-StarInternational*:pnU270series:*"); +MODULE_ALIAS("dmi:*:svnMICRO-STARINTERNATIONAL*:pnU90/U100:*"); diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index 2264331..70222f2 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -34,29 +34,65 @@ MODULE_AUTHOR("Thomas Renninger <trenn@suse.de>"); MODULE_DESCRIPTION("MSI laptop WMI hotkeys driver"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("wmi:551A1F84-FBDD-4125-91DB-3EA8F44F1D45"); -MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"); - #define DRV_NAME "msi-wmi" #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45" -#define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" - -#define SCANCODE_BASE 0xD0 -#define MSI_WMI_BRIGHTNESSUP SCANCODE_BASE -#define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1) -#define MSI_WMI_VOLUMEUP (SCANCODE_BASE + 2) -#define MSI_WMI_VOLUMEDOWN (SCANCODE_BASE + 3) -#define MSI_WMI_MUTE (SCANCODE_BASE + 4) +#define MSIWMI_MSI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" +#define MSIWMI_WIND_EVENT_GUID "5B3CC38A-40D9-7245-8AE6-1145B751BE3F" + +MODULE_ALIAS("wmi:" MSIWMI_BIOS_GUID); +MODULE_ALIAS("wmi:" MSIWMI_MSI_EVENT_GUID); +MODULE_ALIAS("wmi:" MSIWMI_WIND_EVENT_GUID); + +enum msi_scancodes { + /* Generic MSI keys (not present on MSI Wind) */ + MSI_KEY_BRIGHTNESSUP = 0xD0, + MSI_KEY_BRIGHTNESSDOWN, + MSI_KEY_VOLUMEUP, + MSI_KEY_VOLUMEDOWN, + MSI_KEY_MUTE, + /* MSI Wind keys */ + WIND_KEY_TOUCHPAD = 0x08, /* Fn+F3 touchpad toggle */ + WIND_KEY_BLUETOOTH = 0x56, /* Fn+F11 Bluetooth toggle */ + WIND_KEY_CAMERA, /* Fn+F6 webcam toggle */ + WIND_KEY_WLAN = 0x5f, /* Fn+F11 Wi-Fi toggle */ + WIND_KEY_TURBO, /* Fn+F10 turbo mode toggle */ + WIND_KEY_ECO = 0x69, /* Fn+F10 ECO mode toggle */ +}; static struct key_entry msi_wmi_keymap[] = { - { KE_KEY, MSI_WMI_BRIGHTNESSUP, {KEY_BRIGHTNESSUP} }, - { KE_KEY, MSI_WMI_BRIGHTNESSDOWN, {KEY_BRIGHTNESSDOWN} }, - { KE_KEY, MSI_WMI_VOLUMEUP, {KEY_VOLUMEUP} }, - { KE_KEY, MSI_WMI_VOLUMEDOWN, {KEY_VOLUMEDOWN} }, - { KE_KEY, MSI_WMI_MUTE, {KEY_MUTE} }, - { KE_END, 0} + { KE_KEY, MSI_KEY_BRIGHTNESSUP, {KEY_BRIGHTNESSUP} }, + { KE_KEY, MSI_KEY_BRIGHTNESSDOWN, {KEY_BRIGHTNESSDOWN} }, + { KE_KEY, MSI_KEY_VOLUMEUP, {KEY_VOLUMEUP} }, + { KE_KEY, MSI_KEY_VOLUMEDOWN, {KEY_VOLUMEDOWN} }, + { KE_KEY, MSI_KEY_MUTE, {KEY_MUTE} }, + + /* These keys work without WMI. Ignore them to avoid double keycodes */ + { KE_IGNORE, WIND_KEY_TOUCHPAD, {KEY_TOUCHPAD_TOGGLE} }, + { KE_IGNORE, WIND_KEY_BLUETOOTH, {KEY_BLUETOOTH} }, + { KE_IGNORE, WIND_KEY_CAMERA, {KEY_CAMERA} }, + { KE_IGNORE, WIND_KEY_WLAN, {KEY_WLAN} }, + + /* These are unknown WMI events found on MSI Wind */ + { KE_IGNORE, 0x00 }, + { KE_IGNORE, 0x62 }, + { KE_IGNORE, 0x63 }, + + /* These are MSI Wind keys that should be handled via WMI */ + { KE_KEY, WIND_KEY_TURBO, {KEY_PROG1} }, + { KE_KEY, WIND_KEY_ECO, {KEY_PROG2} }, + + { KE_END, 0 } +}; + +static ktime_t last_pressed; + +static const struct { + const char *guid; + bool quirk_last_pressed; +} *event_wmi, event_wmis[] = { + { MSIWMI_MSI_EVENT_GUID, true }, + { MSIWMI_WIND_EVENT_GUID, false }, }; -static ktime_t last_pressed[ARRAY_SIZE(msi_wmi_keymap) - 1]; static struct backlight_device *backlight; @@ -149,7 +185,6 @@ static void msi_wmi_notify(u32 value, void *context) struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL }; static struct key_entry *key; union acpi_object *obj; - ktime_t cur; acpi_status status; status = wmi_get_event_data(value, &response); @@ -165,39 +200,67 @@ static void msi_wmi_notify(u32 value, void *context) pr_debug("Eventcode: 0x%x\n", eventcode); key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev, eventcode); - if (key) { - ktime_t diff; - cur = ktime_get_real(); - diff = ktime_sub(cur, last_pressed[key->code - - SCANCODE_BASE]); - /* Ignore event if the same event happened in a 50 ms + if (!key) { + pr_info("Unknown key pressed - %x\n", eventcode); + goto msi_wmi_notify_exit; + } + + if (event_wmi->quirk_last_pressed) { + ktime_t cur = ktime_get_real(); + ktime_t diff = ktime_sub(cur, last_pressed); + /* Ignore event if any event happened in a 50 ms timeframe -> Key press may result in 10-20 GPEs */ if (ktime_to_us(diff) < 1000 * 50) { pr_debug("Suppressed key event 0x%X - " "Last press was %lld us ago\n", key->code, ktime_to_us(diff)); - return; - } - last_pressed[key->code - SCANCODE_BASE] = cur; - - if (key->type == KE_KEY && - /* Brightness is served via acpi video driver */ - (!acpi_video_backlight_support() || - (key->code != MSI_WMI_BRIGHTNESSUP && - key->code != MSI_WMI_BRIGHTNESSDOWN))) { - pr_debug("Send key: 0x%X - " - "Input layer keycode: %d\n", - key->code, key->keycode); - sparse_keymap_report_entry(msi_wmi_input_dev, - key, 1, true); + goto msi_wmi_notify_exit; } - } else - pr_info("Unknown key pressed - %x\n", eventcode); + last_pressed = cur; + } + + if (key->type == KE_KEY && + /* Brightness is served via acpi video driver */ + (backlight || + (key->code != MSI_KEY_BRIGHTNESSUP && + key->code != MSI_KEY_BRIGHTNESSDOWN))) { + pr_debug("Send key: 0x%X - Input layer keycode: %d\n", + key->code, key->keycode); + sparse_keymap_report_entry(msi_wmi_input_dev, key, 1, + true); + } } else pr_info("Unknown event received\n"); + +msi_wmi_notify_exit: kfree(response.pointer); } +static int __init msi_wmi_backlight_setup(void) +{ + int err; + struct backlight_properties props; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.type = BACKLIGHT_PLATFORM; + props.max_brightness = ARRAY_SIZE(backlight_map) - 1; + backlight = backlight_device_register(DRV_NAME, NULL, NULL, + &msi_backlight_ops, + &props); + if (IS_ERR(backlight)) + return PTR_ERR(backlight); + + err = bl_get(NULL); + if (err < 0) { + backlight_device_unregister(backlight); + return err; + } + + backlight->props.brightness = err; + + return 0; +} + static int __init msi_wmi_input_setup(void) { int err; @@ -219,7 +282,7 @@ static int __init msi_wmi_input_setup(void) if (err) goto err_free_keymap; - memset(last_pressed, 0, sizeof(last_pressed)); + last_pressed = ktime_set(0, 0); return 0; @@ -233,61 +296,66 @@ err_free_dev: static int __init msi_wmi_init(void) { int err; + int i; - if (!wmi_has_guid(MSIWMI_EVENT_GUID)) { - pr_err("This machine doesn't have MSI-hotkeys through WMI\n"); - return -ENODEV; - } - err = wmi_install_notify_handler(MSIWMI_EVENT_GUID, - msi_wmi_notify, NULL); - if (ACPI_FAILURE(err)) - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(event_wmis); i++) { + if (!wmi_has_guid(event_wmis[i].guid)) + continue; - err = msi_wmi_input_setup(); - if (err) - goto err_uninstall_notifier; - - if (!acpi_video_backlight_support()) { - struct backlight_properties props; - memset(&props, 0, sizeof(struct backlight_properties)); - props.type = BACKLIGHT_PLATFORM; - props.max_brightness = ARRAY_SIZE(backlight_map) - 1; - backlight = backlight_device_register(DRV_NAME, NULL, NULL, - &msi_backlight_ops, - &props); - if (IS_ERR(backlight)) { - err = PTR_ERR(backlight); + err = msi_wmi_input_setup(); + if (err) { + pr_err("Unable to setup input device\n"); + return err; + } + + err = wmi_install_notify_handler(event_wmis[i].guid, + msi_wmi_notify, NULL); + if (ACPI_FAILURE(err)) { + pr_err("Unable to setup WMI notify handler\n"); goto err_free_input; } - err = bl_get(NULL); - if (err < 0) - goto err_free_backlight; + pr_debug("Event handler installed\n"); + event_wmi = &event_wmis[i]; + break; + } - backlight->props.brightness = err; + if (wmi_has_guid(MSIWMI_BIOS_GUID) && !acpi_video_backlight_support()) { + err = msi_wmi_backlight_setup(); + if (err) { + pr_err("Unable to setup backlight device\n"); + goto err_uninstall_handler; + } + pr_debug("Backlight device created\n"); + } + + if (!event_wmi && !backlight) { + pr_err("This machine doesn't have neither MSI-hotkeys nor backlight through WMI\n"); + return -ENODEV; } - pr_debug("Event handler installed\n"); return 0; -err_free_backlight: - backlight_device_unregister(backlight); +err_uninstall_handler: + if (event_wmi) + wmi_remove_notify_handler(event_wmi->guid); err_free_input: - sparse_keymap_free(msi_wmi_input_dev); - input_unregister_device(msi_wmi_input_dev); -err_uninstall_notifier: - wmi_remove_notify_handler(MSIWMI_EVENT_GUID); + if (event_wmi) { + sparse_keymap_free(msi_wmi_input_dev); + input_unregister_device(msi_wmi_input_dev); + } return err; } static void __exit msi_wmi_exit(void) { - if (wmi_has_guid(MSIWMI_EVENT_GUID)) { - wmi_remove_notify_handler(MSIWMI_EVENT_GUID); + if (event_wmi) { + wmi_remove_notify_handler(event_wmi->guid); sparse_keymap_free(msi_wmi_input_dev); input_unregister_device(msi_wmi_input_dev); - backlight_device_unregister(backlight); } + if (backlight) + backlight_device_unregister(backlight); } module_init(msi_wmi_init); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 8da2187..14d4dce 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -158,6 +158,11 @@ static void sony_nc_thermal_cleanup(struct platform_device *pd); static int sony_nc_lid_resume_setup(struct platform_device *pd); static void sony_nc_lid_resume_cleanup(struct platform_device *pd); +static int sony_nc_gfx_switch_setup(struct platform_device *pd, + unsigned int handle); +static void sony_nc_gfx_switch_cleanup(struct platform_device *pd); +static int __sony_nc_gfx_switch_status_get(void); + static int sony_nc_highspeed_charging_setup(struct platform_device *pd); static void sony_nc_highspeed_charging_cleanup(struct platform_device *pd); @@ -1241,17 +1246,13 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) /* Hybrid GFX switching */ sony_call_snc_handle(handle, 0x0000, &result); dprintk("GFX switch event received (reason: %s)\n", - (result & 0x01) ? - "switch change" : "unknown"); - - /* verify the switch state - * 1: discrete GFX - * 0: integrated GFX - */ - sony_call_snc_handle(handle, 0x0100, &result); + (result == 0x1) ? "switch change" : + (result == 0x2) ? "output switch" : + (result == 0x3) ? "output switch" : + ""); ev_type = GFX_SWITCH; - real_ev = result & 0xff; + real_ev = __sony_nc_gfx_switch_status_get(); break; default: @@ -1350,6 +1351,13 @@ static void sony_nc_function_setup(struct acpi_device *device, pr_err("couldn't set up thermal profile function (%d)\n", result); break; + case 0x0128: + case 0x0146: + result = sony_nc_gfx_switch_setup(pf_device, handle); + if (result) + pr_err("couldn't set up GFX Switch status (%d)\n", + result); + break; case 0x0131: result = sony_nc_highspeed_charging_setup(pf_device); if (result) @@ -1365,6 +1373,8 @@ static void sony_nc_function_setup(struct acpi_device *device, break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: result = sony_nc_kbd_backlight_setup(pf_device, handle); if (result) pr_err("couldn't set up keyboard backlight function (%d)\n", @@ -1414,6 +1424,10 @@ static void sony_nc_function_cleanup(struct platform_device *pd) case 0x0122: sony_nc_thermal_cleanup(pd); break; + case 0x0128: + case 0x0146: + sony_nc_gfx_switch_cleanup(pd); + break; case 0x0131: sony_nc_highspeed_charging_cleanup(pd); break; @@ -1423,6 +1437,8 @@ static void sony_nc_function_cleanup(struct platform_device *pd) break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: sony_nc_kbd_backlight_cleanup(pd); break; default: @@ -1467,6 +1483,8 @@ static void sony_nc_function_resume(void) break; case 0x0137: case 0x0143: + case 0x014b: + case 0x014c: sony_nc_kbd_backlight_resume(); break; default: @@ -1534,7 +1552,7 @@ static int sony_nc_rfkill_set(void *data, bool blocked) int argument = sony_rfkill_address[(long) data] + 0x100; if (!blocked) - argument |= 0x030000; + argument |= 0x070000; return sony_call_snc_handle(sony_rfkill_handle, argument, &result); } @@ -2333,7 +2351,7 @@ static int sony_nc_lid_resume_setup(struct platform_device *pd) return 0; liderror: - for (; i > 0; i--) + for (i--; i >= 0; i--) device_remove_file(&pd->dev, &lid_ctl->attrs[i]); kfree(lid_ctl); @@ -2355,6 +2373,97 @@ static void sony_nc_lid_resume_cleanup(struct platform_device *pd) } } +/* GFX Switch position */ +enum gfx_switch { + SPEED, + STAMINA, + AUTO +}; +struct snc_gfx_switch_control { + struct device_attribute attr; + unsigned int handle; +}; +static struct snc_gfx_switch_control *gfxs_ctl; + +/* returns 0 for speed, 1 for stamina */ +static int __sony_nc_gfx_switch_status_get(void) +{ + unsigned int result; + + if (sony_call_snc_handle(gfxs_ctl->handle, 0x0100, &result)) + return -EIO; + + switch (gfxs_ctl->handle) { + case 0x0146: + /* 1: discrete GFX (speed) + * 0: integrated GFX (stamina) + */ + return result & 0x1 ? SPEED : STAMINA; + break; + case 0x0128: + /* it's a more elaborated bitmask, for now: + * 2: integrated GFX (stamina) + * 0: discrete GFX (speed) + */ + dprintk("GFX Status: 0x%x\n", result); + return result & 0x80 ? AUTO : + result & 0x02 ? STAMINA : SPEED; + break; + } + return -EINVAL; +} + +static ssize_t sony_nc_gfx_switch_status_show(struct device *dev, + struct device_attribute *attr, + char *buffer) +{ + int pos = __sony_nc_gfx_switch_status_get(); + + if (pos < 0) + return pos; + + return snprintf(buffer, PAGE_SIZE, "%s\n", pos ? "speed" : "stamina"); +} + +static int sony_nc_gfx_switch_setup(struct platform_device *pd, + unsigned int handle) +{ + unsigned int result; + + gfxs_ctl = kzalloc(sizeof(struct snc_gfx_switch_control), GFP_KERNEL); + if (!gfxs_ctl) + return -ENOMEM; + + gfxs_ctl->handle = handle; + + sysfs_attr_init(&gfxs_ctl->attr.attr); + gfxs_ctl->attr.attr.name = "gfx_switch_status"; + gfxs_ctl->attr.attr.mode = S_IRUGO; + gfxs_ctl->attr.show = sony_nc_gfx_switch_status_show; + + result = device_create_file(&pd->dev, &gfxs_ctl->attr); + if (result) + goto gfxerror; + + return 0; + +gfxerror: + kfree(gfxs_ctl); + gfxs_ctl = NULL; + + return result; +} + +static void sony_nc_gfx_switch_cleanup(struct platform_device *pd) +{ + if (gfxs_ctl) { + device_remove_file(&pd->dev, &gfxs_ctl->attr); + + kfree(gfxs_ctl); + gfxs_ctl = NULL; + } +} + /* High speed charging function */ static struct device_attribute *hsc_handle; @@ -2533,6 +2642,8 @@ static void sony_nc_backlight_ng_read_limits(int handle, lvl_table_len = 9; break; case 0x143: + case 0x14b: + case 0x14c: lvl_table_len = 16; break; } @@ -2584,6 +2695,18 @@ static void sony_nc_backlight_setup(void) sony_nc_backlight_ng_read_limits(0x143, &sony_bl_props); max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (sony_find_snc_handle(0x14b) >= 0) { + ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x3000; + sony_nc_backlight_ng_read_limits(0x14b, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + + } else if (sony_find_snc_handle(0x14c) >= 0) { + ops = &sony_backlight_ng_ops; + sony_bl_props.cmd_base = 0x3000; + sony_nc_backlight_ng_read_limits(0x14c, &sony_bl_props); + max_brightness = sony_bl_props.maxlvl - sony_bl_props.offset; + } else if (ACPI_SUCCESS(acpi_get_handle(sony_nc_acpi_handle, "GBRT", &unused))) { ops = &sony_backlight_ops; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index f4f8408..9a90756 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -209,9 +209,8 @@ enum tpacpi_hkey_event_t { TP_HKEY_EV_ALARM_SENSOR_XHOT = 0x6022, /* sensor critically hot */ TP_HKEY_EV_THM_TABLE_CHANGED = 0x6030, /* thermal table changed */ - TP_HKEY_EV_UNK_6040 = 0x6040, /* Related to AC change? - some sort of APM hint, - W520 */ + /* AC-related events */ + TP_HKEY_EV_AC_CHANGED = 0x6040, /* AC status changed */ /* Misc */ TP_HKEY_EV_RFKILL_CHANGED = 0x7000, /* rfkill switch changed */ @@ -3629,6 +3628,12 @@ static bool hotkey_notify_6xxx(const u32 hkey, "a sensor reports something is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; + case TP_HKEY_EV_AC_CHANGED: + /* X120e, X121e, X220, X220i, X220t, X230, T420, T420s, W520: + * AC status changed; can be triggered by plugging or + * unplugging AC adapter, docking or undocking. */ + + /* fallthrough */ case TP_HKEY_EV_KEY_NUMLOCK: case TP_HKEY_EV_KEY_FN: @@ -8574,7 +8579,8 @@ static bool __pure __init tpacpi_is_valid_fw_id(const char* const s, return s && strlen(s) >= 8 && tpacpi_is_fw_digit(s[0]) && tpacpi_is_fw_digit(s[1]) && - s[2] == t && s[3] == 'T' && + s[2] == t && + (s[3] == 'T' || s[3] == 'N') && tpacpi_is_fw_digit(s[4]) && tpacpi_is_fw_digit(s[5]); } @@ -8607,7 +8613,8 @@ static int __must_check __init get_thinkpad_model_data( return -ENOMEM; /* Really ancient ThinkPad 240X will fail this, which is fine */ - if (!tpacpi_is_valid_fw_id(tp->bios_version_str, 'E')) + if (!(tpacpi_is_valid_fw_id(tp->bios_version_str, 'E') || + tpacpi_is_valid_fw_id(tp->bios_version_str, 'C'))) return 0; tp->bios_model = tp->bios_version_str[0] diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c index b2a8ed9..98f0d3c 100644 --- a/drivers/rtc/rtc-stmp3xxx.c +++ b/drivers/rtc/rtc-stmp3xxx.c @@ -27,6 +27,8 @@ #include <linux/slab.h> #include <linux/of_device.h> #include <linux/of.h> +#include <linux/stmp_device.h> +#include <linux/stmp3xxx_rtc_wdt.h> #include <mach/common.h> @@ -36,6 +38,7 @@ #define STMP3XXX_RTC_CTRL_ALARM_IRQ_EN 0x00000001 #define STMP3XXX_RTC_CTRL_ONEMSEC_IRQ_EN 0x00000002 #define STMP3XXX_RTC_CTRL_ALARM_IRQ 0x00000004 +#define STMP3XXX_RTC_CTRL_WATCHDOGEN 0x00000010 #define STMP3XXX_RTC_STAT 0x10 #define STMP3XXX_RTC_STAT_STALE_SHIFT 16 @@ -45,6 +48,8 @@ #define STMP3XXX_RTC_ALARM 0x40 +#define STMP3XXX_RTC_WATCHDOG 0x50 + #define STMP3XXX_RTC_PERSISTENT0 0x60 #define STMP3XXX_RTC_PERSISTENT0_SET 0x64 #define STMP3XXX_RTC_PERSISTENT0_CLR 0x68 @@ -52,12 +57,70 @@ #define STMP3XXX_RTC_PERSISTENT0_ALARM_EN 0x00000004 #define STMP3XXX_RTC_PERSISTENT0_ALARM_WAKE 0x00000080 +#define STMP3XXX_RTC_PERSISTENT1 0x70 +/* missing bitmask in headers */ +#define STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER 0x80000000 + struct stmp3xxx_rtc_data { struct rtc_device *rtc; void __iomem *io; int irq_alarm; }; +#if IS_ENABLED(CONFIG_STMP3XXX_RTC_WATCHDOG) +/** + * stmp3xxx_wdt_set_timeout - configure the watchdog inside the STMP3xxx RTC + * @dev: the parent device of the watchdog (= the RTC) + * @timeout: the desired value for the timeout register of the watchdog. + * 0 disables the watchdog + * + * The watchdog needs one register and two bits which are in the RTC domain. + * To handle the resource conflict, the RTC driver will create another + * platform_device for the watchdog driver as a child of the RTC device. + * The watchdog driver is passed the below accessor function via platform_data + * to configure the watchdog. Locking is not needed because accessing SET/CLR + * registers is atomic. + */ + +static void stmp3xxx_wdt_set_timeout(struct device *dev, u32 timeout) +{ + struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (timeout) { + writel(timeout, rtc_data->io + STMP3XXX_RTC_WATCHDOG); + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_SET); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_SET); + } else { + writel(STMP3XXX_RTC_CTRL_WATCHDOGEN, + rtc_data->io + STMP3XXX_RTC_CTRL + STMP_OFFSET_REG_CLR); + writel(STMP3XXX_RTC_PERSISTENT1_FORCE_UPDATER, + rtc_data->io + STMP3XXX_RTC_PERSISTENT1 + STMP_OFFSET_REG_CLR); + } +} + +static struct stmp3xxx_wdt_pdata wdt_pdata = { + .wdt_set_timeout = stmp3xxx_wdt_set_timeout, +}; + +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ + struct platform_device *wdt_pdev = + platform_device_alloc("stmp3xxx_rtc_wdt", rtc_pdev->id); + + if (wdt_pdev) { + wdt_pdev->dev.parent = &rtc_pdev->dev; + wdt_pdev->dev.platform_data = &wdt_pdata; + platform_device_add(wdt_pdev); + } +} +#else +static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) +{ +} +#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ + static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) { /* @@ -233,6 +296,7 @@ static int stmp3xxx_rtc_probe(struct platform_device *pdev) goto out_irq_alarm; } + stmp3xxx_wdt_register(pdev); return 0; out_irq_alarm: diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 742f5d7..a6f7190 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,13 +12,13 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 29801 +# define AAC_DRIVER_BUILD 30000 # define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 #define AAC_NUM_MGT_FIB 8 -#define AAC_NUM_IO_FIB (512 - AAC_NUM_MGT_FIB) +#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB) #define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB) #define AAC_MAX_LUN (8) @@ -36,6 +36,10 @@ #define CONTAINER_TO_ID(cont) (cont) #define CONTAINER_TO_LUN(cont) (0) +#define PMC_DEVICE_S7 0x28c +#define PMC_DEVICE_S8 0x28d +#define PMC_DEVICE_S9 0x28f + #define aac_phys_to_logical(x) ((x)+1) #define aac_logical_to_phys(x) ((x)?(x)-1:0) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 8e5d3be..3f75995 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) dev->max_fib_size = status[1] & 0xFFE0; host->sg_tablesize = status[2] >> 16; dev->sg_tablesize = status[2] & 0xFFFF; - host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; + if (dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8 || + dev->pdev->device == PMC_DEVICE_S9) + host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) : + (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB; + else + host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; dev->max_num_aif = status[4] & 0xFFFF; /* * NOTE: @@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) } } + if (host->can_queue > AAC_NUM_IO_FIB) + host->can_queue = AAC_NUM_IO_FIB; + /* * Ok now init the communication subsystem */ diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 3b021ec..e2e3492 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -407,7 +407,7 @@ static int aac_src_deliver_message(struct fib *fib) fib->hw_fib_va->header.StructType = FIB_MAGIC2; fib->hw_fib_va->header.SenderFibAddress = (u32)address; fib->hw_fib_va->header.u.TimeStamp = 0; - BUG_ON((u32)(address >> 32) != 0L); + BUG_ON(upper_32_bits(address) != 0L); address |= fibsize; } else { /* Calculate the amount to the fibsize bits */ @@ -431,7 +431,7 @@ static int aac_src_deliver_message(struct fib *fib) address |= fibsize; } - src_writel(dev, MUnit.IQ_H, (address >> 32) & 0xffffffff); + src_writel(dev, MUnit.IQ_H, upper_32_bits(address) & 0xffffffff); src_writel(dev, MUnit.IQ_L, address & 0xffffffff); return 0; diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index e6bf126..a5f7690 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) { sizeof(driver_info.host_os_patch) - 1); strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name - 1)); + sizeof(driver_info.os_device_name) - 1); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3486845..50fcd01 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -64,7 +64,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "1.0.12" +#define BNX2FC_VERSION "1.0.13" #define PFX "bnx2fc: " @@ -156,6 +156,18 @@ #define BNX2FC_RELOGIN_WAIT_TIME 200 #define BNX2FC_RELOGIN_WAIT_CNT 10 +#define BNX2FC_STATS(hba, stat, cnt) \ + do { \ + u32 val; \ + \ + val = fw_stats->stat.cnt; \ + if (hba->prev_stats.stat.cnt <= val) \ + val -= hba->prev_stats.stat.cnt; \ + else \ + val += (0xfffffff - hba->prev_stats.stat.cnt); \ + hba->bfw_stats.cnt += val; \ + } while (0) + /* bnx2fc driver uses only one instance of fcoe_percpu_s */ extern struct fcoe_percpu_s bnx2fc_global; @@ -167,6 +179,14 @@ struct bnx2fc_percpu_s { spinlock_t fp_work_lock; }; +struct bnx2fc_fw_stats { + u64 fc_crc_cnt; + u64 fcoe_tx_pkt_cnt; + u64 fcoe_rx_pkt_cnt; + u64 fcoe_tx_byte_cnt; + u64 fcoe_rx_byte_cnt; +}; + struct bnx2fc_hba { struct list_head list; struct cnic_dev *cnic; @@ -207,6 +227,8 @@ struct bnx2fc_hba { struct bnx2fc_rport **tgt_ofld_list; /* statistics */ + struct bnx2fc_fw_stats bfw_stats; + struct fcoe_statistics_params prev_stats; struct fcoe_statistics_params *stats_buffer; dma_addr_t stats_buf_dma; struct completion stat_req_done; @@ -280,6 +302,7 @@ struct bnx2fc_rport { #define BNX2FC_FLAG_UPLD_REQ_COMPL 0x7 #define BNX2FC_FLAG_EXPL_LOGO 0x8 #define BNX2FC_FLAG_DISABLE_FAILED 0x9 +#define BNX2FC_FLAG_ENABLED 0xa u8 src_addr[ETH_ALEN]; u32 max_sqes; @@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba); int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba); int bnx2fc_send_session_ofld_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); +int bnx2fc_send_session_enable_req(struct fcoe_port *port, + struct bnx2fc_rport *tgt); int bnx2fc_send_session_disable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 70ecd95..2daf4b0 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu); #define DRV_MODULE_NAME "bnx2fc" #define DRV_MODULE_VERSION BNX2FC_VERSION -#define DRV_MODULE_RELDATE "Jun 04, 2012" +#define DRV_MODULE_RELDATE "Dec 21, 2012" static char version[] = @@ -62,6 +62,10 @@ static int bnx2fc_destroy(struct net_device *net_device); static int bnx2fc_enable(struct net_device *netdev); static int bnx2fc_disable(struct net_device *netdev); +/* fcoe_syfs control interface handlers */ +static int bnx2fc_ctlr_alloc(struct net_device *netdev); +static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev); + static void bnx2fc_recv_frame(struct sk_buff *skb); static void bnx2fc_start_disc(struct bnx2fc_interface *interface); @@ -89,7 +93,6 @@ static void bnx2fc_port_shutdown(struct fc_lport *lport); static void bnx2fc_stop(struct bnx2fc_interface *interface); static int __init bnx2fc_mod_init(void); static void __exit bnx2fc_mod_exit(void); -static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev); unsigned int bnx2fc_debug_level; module_param_named(debug_logging, bnx2fc_debug_level, int, S_IRUGO|S_IWUSR); @@ -107,44 +110,6 @@ static inline struct net_device *bnx2fc_netdev(const struct fc_lport *lport) ((struct fcoe_port *)lport_priv(lport))->priv)->netdev; } -/** - * bnx2fc_get_lesb() - Fill the FCoE Link Error Status Block - * @lport: the local port - * @fc_lesb: the link error status block - */ -static void bnx2fc_get_lesb(struct fc_lport *lport, - struct fc_els_lesb *fc_lesb) -{ - struct net_device *netdev = bnx2fc_netdev(lport); - - __fcoe_get_lesb(lport, fc_lesb, netdev); -} - -static void bnx2fc_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) -{ - struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); - struct net_device *netdev = bnx2fc_netdev(fip->lp); - struct fcoe_fc_els_lesb *fcoe_lesb; - struct fc_els_lesb fc_lesb; - - __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); - fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); - - ctlr_dev->lesb.lesb_link_fail = - ntohl(fcoe_lesb->lesb_link_fail); - ctlr_dev->lesb.lesb_vlink_fail = - ntohl(fcoe_lesb->lesb_vlink_fail); - ctlr_dev->lesb.lesb_miss_fka = - ntohl(fcoe_lesb->lesb_miss_fka); - ctlr_dev->lesb.lesb_symb_err = - ntohl(fcoe_lesb->lesb_symb_err); - ctlr_dev->lesb.lesb_err_block = - ntohl(fcoe_lesb->lesb_err_block); - ctlr_dev->lesb.lesb_fcs_error = - ntohl(fcoe_lesb->lesb_fcs_error); -} -EXPORT_SYMBOL(bnx2fc_ctlr_get_lesb); - static void bnx2fc_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev) { struct fcoe_ctlr_device *ctlr_dev = @@ -687,11 +652,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); return bnx2fc_stats; } - bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt; - bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt; - bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4; - bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt; - bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4; + BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); + bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt); + bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt); + bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4); + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt); + bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt; + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt); + bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4); bnx2fc_stats->dumped_frames = 0; bnx2fc_stats->lip_count = 0; @@ -700,6 +670,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) bnx2fc_stats->loss_of_signal_count = 0; bnx2fc_stats->prim_seq_protocol_err_count = 0; + memcpy(&hba->prev_stats, hba->stats_buffer, + sizeof(struct fcoe_statistics_params)); return bnx2fc_stats; } @@ -734,35 +706,6 @@ static int bnx2fc_shost_config(struct fc_lport *lport, struct device *dev) return 0; } -static void bnx2fc_link_speed_update(struct fc_lport *lport) -{ - struct fcoe_port *port = lport_priv(lport); - struct bnx2fc_interface *interface = port->priv; - struct net_device *netdev = interface->netdev; - struct ethtool_cmd ecmd; - - if (!__ethtool_get_settings(netdev, &ecmd)) { - lport->link_supported_speeds &= - ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); - if (ecmd.supported & (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full)) - lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; - if (ecmd.supported & SUPPORTED_10000baseT_Full) - lport->link_supported_speeds |= FC_PORTSPEED_10GBIT; - - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_1000: - lport->link_speed = FC_PORTSPEED_1GBIT; - break; - case SPEED_2500: - lport->link_speed = FC_PORTSPEED_2GBIT; - break; - case SPEED_10000: - lport->link_speed = FC_PORTSPEED_10GBIT; - break; - } - } -} static int bnx2fc_link_ok(struct fc_lport *lport) { struct fcoe_port *port = lport_priv(lport); @@ -820,7 +763,7 @@ static int bnx2fc_net_config(struct fc_lport *lport, struct net_device *netdev) port->fcoe_pending_queue_active = 0; setup_timer(&port->timer, fcoe_queue_timer, (unsigned long) lport); - bnx2fc_link_speed_update(lport); + fcoe_link_speed_update(lport); if (!lport->vport) { if (fcoe_get_wwn(netdev, &wwnn, NETDEV_FCOE_WWNN)) @@ -864,6 +807,7 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, u16 vlan_id) { struct bnx2fc_hba *hba = (struct bnx2fc_hba *)context; + struct fcoe_ctlr_device *cdev; struct fc_lport *lport; struct fc_lport *vport; struct bnx2fc_interface *interface, *tmp; @@ -923,30 +867,47 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, BNX2FC_HBA_DBG(lport, "netevent handler - event=%s %ld\n", interface->netdev->name, event); - bnx2fc_link_speed_update(lport); + fcoe_link_speed_update(lport); + + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); if (link_possible && !bnx2fc_link_ok(lport)) { - /* Reset max recv frame size to default */ - fc_set_mfs(lport, BNX2FC_MFS); - /* - * ctlr link up will only be handled during - * enable to avoid sending discovery solicitation - * on a stale vlan - */ - if (interface->enabled) - fcoe_ctlr_link_up(ctlr); + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link up while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + /* Reset max recv frame size to default */ + fc_set_mfs(lport, BNX2FC_MFS); + /* + * ctlr link up will only be handled during + * enable to avoid sending discovery + * solicitation on a stale vlan + */ + if (interface->enabled) + fcoe_ctlr_link_up(ctlr); + }; } else if (fcoe_ctlr_link_down(ctlr)) { - mutex_lock(&lport->lp_mutex); - list_for_each_entry(vport, &lport->vports, list) - fc_host_port_type(vport->host) = - FC_PORTTYPE_UNKNOWN; - mutex_unlock(&lport->lp_mutex); - fc_host_port_type(lport->host) = FC_PORTTYPE_UNKNOWN; - per_cpu_ptr(lport->stats, - get_cpu())->LinkFailureCount++; - put_cpu(); - fcoe_clean_pending_queue(lport); - wait_for_upload = 1; + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link down while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + mutex_lock(&lport->lp_mutex); + list_for_each_entry(vport, &lport->vports, list) + fc_host_port_type(vport->host) = + FC_PORTTYPE_UNKNOWN; + mutex_unlock(&lport->lp_mutex); + fc_host_port_type(lport->host) = + FC_PORTTYPE_UNKNOWN; + per_cpu_ptr(lport->stats, + get_cpu())->LinkFailureCount++; + put_cpu(); + fcoe_clean_pending_queue(lport); + wait_for_upload = 1; + }; } } mutex_unlock(&bnx2fc_dev_lock); @@ -1477,6 +1438,7 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port = lport_priv(lport); port->lport = lport; port->priv = interface; + port->get_netdev = bnx2fc_netdev; INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ @@ -1996,7 +1958,9 @@ static void bnx2fc_ulp_init(struct cnic_dev *dev) set_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic); } - +/** + * Deperecated: Use bnx2fc_enabled() + */ static int bnx2fc_disable(struct net_device *netdev) { struct bnx2fc_interface *interface; @@ -2022,7 +1986,9 @@ static int bnx2fc_disable(struct net_device *netdev) return rc; } - +/** + * Deprecated: Use bnx2fc_enabled() + */ static int bnx2fc_enable(struct net_device *netdev) { struct bnx2fc_interface *interface; @@ -2048,17 +2014,57 @@ static int bnx2fc_enable(struct net_device *netdev) } /** - * bnx2fc_create - Create bnx2fc FCoE interface + * bnx2fc_ctlr_enabled() - Enable or disable an FCoE Controller + * @cdev: The FCoE Controller that is being enabled or disabled + * + * fcoe_sysfs will ensure that the state of 'enabled' has + * changed, so no checking is necessary here. This routine simply + * calls fcoe_enable or fcoe_disable, both of which are deprecated. + * When those routines are removed the functionality can be merged + * here. + */ +static int bnx2fc_ctlr_enabled(struct fcoe_ctlr_device *cdev) +{ + struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev); + struct fc_lport *lport = ctlr->lp; + struct net_device *netdev = bnx2fc_netdev(lport); + + switch (cdev->enabled) { + case FCOE_CTLR_ENABLED: + return bnx2fc_enable(netdev); + case FCOE_CTLR_DISABLED: + return bnx2fc_disable(netdev); + case FCOE_CTLR_UNUSED: + default: + return -ENOTSUPP; + }; +} + +enum bnx2fc_create_link_state { + BNX2FC_CREATE_LINK_DOWN, + BNX2FC_CREATE_LINK_UP, +}; + +/** + * _bnx2fc_create() - Create bnx2fc FCoE interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * @link_state: The ctlr link state on creation * - * @buffer: The name of Ethernet interface to create on - * @kp: The associated kernel param + * Called from either the libfcoe 'create' module parameter + * via fcoe_create or from fcoe_syfs's ctlr_create file. * - * Called from sysfs. + * libfcoe's 'create' module parameter is deprecated so some + * consolidation of code can be done when that interface is + * removed. * * Returns: 0 for success */ -static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) +static int _bnx2fc_create(struct net_device *netdev, + enum fip_state fip_mode, + enum bnx2fc_create_link_state link_state) { + struct fcoe_ctlr_device *cdev; struct fcoe_ctlr *ctlr; struct bnx2fc_interface *interface; struct bnx2fc_hba *hba; @@ -2153,7 +2159,15 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) /* Make this master N_port */ ctlr->lp = lport; - if (!bnx2fc_link_ok(lport)) { + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + + if (link_state == BNX2FC_CREATE_LINK_UP) + cdev->enabled = FCOE_CTLR_ENABLED; + else + cdev->enabled = FCOE_CTLR_DISABLED; + + if (link_state == BNX2FC_CREATE_LINK_UP && + !bnx2fc_link_ok(lport)) { fcoe_ctlr_link_up(ctlr); fc_host_port_type(lport->host) = FC_PORTTYPE_NPORT; set_bit(ADAPTER_STATE_READY, &interface->hba->adapter_state); @@ -2161,7 +2175,10 @@ static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) BNX2FC_HBA_DBG(lport, "create: START DISC\n"); bnx2fc_start_disc(interface); - interface->enabled = true; + + if (link_state == BNX2FC_CREATE_LINK_UP) + interface->enabled = true; + /* * Release from kref_init in bnx2fc_interface_setup, on success * lport should be holding a reference taken in bnx2fc_if_create @@ -2187,6 +2204,37 @@ mod_err: } /** + * bnx2fc_create() - Create a bnx2fc interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * + * Called from fcoe transport + * + * Returns: 0 for success + */ +static int bnx2fc_create(struct net_device *netdev, enum fip_state fip_mode) +{ + return _bnx2fc_create(netdev, fip_mode, BNX2FC_CREATE_LINK_UP); +} + +/** + * bnx2fc_ctlr_alloc() - Allocate a bnx2fc interface from fcoe_sysfs + * @netdev: The net_device to be used by the allocated FCoE Controller + * + * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr + * in a link_down state. The allows the user an opportunity to configure + * the FCoE Controller from sysfs before enabling the FCoE Controller. + * + * Creating in with this routine starts the FCoE Controller in Fabric + * mode. The user can change to VN2VN or another mode before enabling. + */ +static int bnx2fc_ctlr_alloc(struct net_device *netdev) +{ + return _bnx2fc_create(netdev, FIP_MODE_FABRIC, + BNX2FC_CREATE_LINK_DOWN); +} + +/** * bnx2fc_find_hba_for_cnic - maps cnic instance to bnx2fc hba instance * * @cnic: Pointer to cnic device instance @@ -2311,6 +2359,7 @@ static struct fcoe_transport bnx2fc_transport = { .name = {"bnx2fc"}, .attached = false, .list = LIST_HEAD_INIT(bnx2fc_transport.list), + .alloc = bnx2fc_ctlr_alloc, .match = bnx2fc_match, .create = bnx2fc_create, .destroy = bnx2fc_destroy, @@ -2555,13 +2604,13 @@ module_init(bnx2fc_mod_init); module_exit(bnx2fc_mod_exit); static struct fcoe_sysfs_function_template bnx2fc_fcoe_sysfs_templ = { - .get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode, - .get_fcoe_ctlr_link_fail = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_vlink_fail = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_miss_fka = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_symb_err = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_err_block = bnx2fc_ctlr_get_lesb, - .get_fcoe_ctlr_fcs_error = bnx2fc_ctlr_get_lesb, + .set_fcoe_ctlr_enabled = bnx2fc_ctlr_enabled, + .get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_symb_err = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_err_block = fcoe_ctlr_get_lesb, + .get_fcoe_ctlr_fcs_error = fcoe_ctlr_get_lesb, .get_fcoe_fcf_selected = fcoe_fcf_get_selected, .get_fcoe_fcf_vlan_id = bnx2fc_fcf_get_vlan_id, @@ -2660,7 +2709,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .can_queue = BNX2FC_CAN_QUEUE, .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, - .max_sectors = 512, + .max_sectors = 1024, }; static struct libfc_function_template bnx2fc_libfc_fcn_templ = { @@ -2668,7 +2717,7 @@ static struct libfc_function_template bnx2fc_libfc_fcn_templ = { .elsct_send = bnx2fc_elsct_send, .fcp_abort_io = bnx2fc_abort_io, .fcp_cleanup = bnx2fc_cleanup, - .get_lesb = bnx2fc_get_lesb, + .get_lesb = fcoe_get_lesb, .rport_event_callback = bnx2fc_rport_event_handler, }; diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index ef60afa..85ea98a 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port, * @port: port structure pointer * @tgt: bnx2fc_rport structure pointer */ -static int bnx2fc_send_session_enable_req(struct fcoe_port *port, +int bnx2fc_send_session_enable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt) { struct kwqe *kwqe_arr[2]; @@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET: BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n", xid); - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); if (!test_bit(BNX2FC_FLAG_SRR_SENT, @@ -847,8 +845,6 @@ ret_err_rqe: goto ret_warn_rqe; } - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); @@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, struct bnx2fc_interface *interface; u32 conn_id; u32 context_id; - int rc; conn_id = ofld_kcqe->fcoe_conn_id; context_id = ofld_kcqe->fcoe_conn_context_id; @@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, "resources\n"); set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags); } - goto ofld_cmpl_err; } else { - - /* now enable the session */ - rc = bnx2fc_send_session_enable_req(port, tgt); - if (rc) { - printk(KERN_ERR PFX "enable session failed\n"); - goto ofld_cmpl_err; - } + /* FW offload request successfully completed */ + set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); } - return; ofld_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); @@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); goto enbl_cmpl_err; } - if (ofld_kcqe->completion_status) - goto enbl_cmpl_err; - else { + if (!ofld_kcqe->completion_status) /* enable successful - rport ready for issuing IOs */ - set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); - set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); - wake_up_interruptible(&tgt->ofld_wait); - } - return; + set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); enbl_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); @@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba, /* disable successful */ BNX2FC_TGT_DBG(tgt, "disable successful\n"); clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8d4626c..60798e8 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz, &mp_req->mp_resp_bd_dma, GFP_ATOMIC); - if (!mp_req->mp_req_bd) { + if (!mp_req->mp_resp_bd) { printk(KERN_ERR PFX "unable to alloc MP resp bd\n"); bnx2fc_free_mp_resc(io_req); return FAILED; @@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) { struct fc_lport *lport; - struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport *rport; + struct fc_rport_libfc_priv *rp; struct fcoe_port *port; struct bnx2fc_interface *interface; struct bnx2fc_rport *tgt; @@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) unsigned long start = jiffies; lport = shost_priv(host); + rport = starget_to_rport(scsi_target(sc_cmd->device)); port = lport_priv(lport); interface = port->priv; @@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) rc = FAILED; goto tmf_err; } + rp = rport->dd_data; rc = fc_block_scsi_eh(sc_cmd); if (rc) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index b9d0d9c..c57a3bb 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data) BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n"); /* fake upload completion */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); } @@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data) * resources are freed up in bnx2fc_offload_session */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); } +static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); + mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); + + wait_event_interruptible(tgt->ofld_wait, + (test_bit( + BNX2FC_FLAG_OFLD_REQ_CMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->ofld_timer); +} + static void bnx2fc_offload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt, struct fc_rport_priv *rdata) @@ -103,17 +119,7 @@ retry_ofld: * wait for the session is offloaded and enabled. 3 Secs * should be ample time for this process to complete. */ - setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); - mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->ofld_wait, - (test_bit( - BNX2FC_FLAG_OFLD_REQ_CMPL, - &tgt->flags))); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->ofld_timer); + bnx2fc_ofld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, @@ -131,14 +137,23 @@ retry_ofld: } if (bnx2fc_map_doorbell(tgt)) { printk(KERN_ERR PFX "map doorbell failed - no mem\n"); - /* upload will take care of cleaning up sess resc */ - lport->tt.rport_logoff(rdata); + goto ofld_err; } + clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); + rval = bnx2fc_send_session_enable_req(port, tgt); + if (rval) { + pr_err(PFX "enable session failed\n"); + goto ofld_err; + } + bnx2fc_ofld_wait(tgt); + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) + goto ofld_err; return; ofld_err: /* couldn't offload the session. log off from this rport */ BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n"); + clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); /* Free session resources */ bnx2fc_free_session_resc(hba, tgt); tgt_init_err: @@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt) spin_unlock_bh(&tgt->tgt_lock); } +static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); + mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); + wait_event_interruptible(tgt->upld_wait, + (test_bit( + BNX2FC_FLAG_UPLD_REQ_COMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->upld_timer); +} + static void bnx2fc_upload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt) { @@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port, * wait for upload to complete. 3 Secs * should be sufficient time for this process to complete. */ - setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n"); - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); - - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); + bnx2fc_upld_wait(tgt); /* * traverse thru the active_q and tmf_q and cleanup @@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port, bnx2fc_send_session_destroy_req(hba, tgt); /* wait for destroy to complete */ - setup_timer(&tgt->upld_timer, - bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); + bnx2fc_upld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags))) printk(KERN_ERR PFX "ERROR!! destroy timed out\n"); BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n", tgt->flags); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) { printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy" @@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt, tgt->rq_cons_idx = 0; atomic_set(&tgt->num_active_ios, 0); - if (rdata->flags & FC_RP_FLAGS_RETRY) { + if (rdata->flags & FC_RP_FLAGS_RETRY && + rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET && + !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) { tgt->dev_type = TYPE_TAPE; tgt->io_timeout = 0; /* use default ULP timeout */ } else { @@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, tgt = (struct bnx2fc_rport *)&rp[1]; /* This can happen when ADISC finds the same target */ - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { BNX2FC_TGT_DBG(tgt, "already offloaded\n"); mutex_unlock(&hba->hba_mutex); return; @@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n", hba->num_ofld_sess); - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { - /* - * Session is offloaded and enabled. Map - * doorbell register for this target - */ + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { + /* Session is offloaded and enabled. */ BNX2FC_TGT_DBG(tgt, "sess offloaded\n"); /* This counter is protected with hba mutex */ hba->num_ofld_sess++; @@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, */ tgt = (struct bnx2fc_rport *)&rp[1]; - if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) { mutex_unlock(&hba->hba_mutex); break; } diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 91eec60..a28b03e 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN)); if (error_mask1) { iscsi_init2.error_bit_map[0] = error_mask1; - mask64 &= (u32)(~mask64); + mask64 ^= (u32)(mask64); mask64 |= error_mask1; } else iscsi_init2.error_bit_map[0] = (u32) mask64; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 8ecdb94..bdd78fb 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) value_to_add = 4 - (cf->size % 4); cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL); - if (cfg_data == NULL) - return -ENOMEM; + if (cfg_data == NULL) { + ret = -ENOMEM; + goto leave; + } memcpy((void *)cfg_data, (const void *)cf->data, cf->size); - - if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) - return -EINVAL; + if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) { + ret = -EINVAL; + goto leave; + } mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param); maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16; @@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); } +leave: kfree(cfg_data); release_firmware(cf); - return ret; } diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index c323b20..0604b5ff 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport; /* * debugfs support */ -static int -csio_mem_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static const struct file_operations csio_mem_debugfs_fops = { .owner = THIS_MODULE, - .open = csio_mem_open, + .open = simple_open, .read = csio_mem_read, .llseek = default_llseek, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f924b3c..3fecf35 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) break; case CXGB4_STATE_DETACH: pr_info("cdev 0x%p, DETACH.\n", cdev); + cxgbi_device_unregister(cdev); break; default: pr_info("cdev 0x%p, unknown state %d.\n", cdev, state); diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 865c64f..fed486bf 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3747,13 +3747,13 @@ static struct DeviceCtlBlk *device_alloc(struct AdapterCtlBlk *acb, dcb->max_command = 1; dcb->target_id = target; dcb->target_lun = lun; + dcb->dev_mode = eeprom->target[target].cfg0; #ifndef DC395x_NO_DISCONNECT dcb->identify_msg = IDENTIFY(dcb->dev_mode & NTC_DO_DISCONNECT, lun); #else dcb->identify_msg = IDENTIFY(0, lun); #endif - dcb->dev_mode = eeprom->target[target].cfg0; dcb->inquiry7 = 0; dcb->sync_mode = 0; dcb->min_nego_period = clock_period[period_index]; diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 666b7ac..b5d92fc 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -82,11 +82,11 @@ static int fcoe_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int fcoe_percpu_receive_thread(void *); static void fcoe_percpu_clean(struct fc_lport *); -static int fcoe_link_speed_update(struct fc_lport *); static int fcoe_link_ok(struct fc_lport *); static struct fc_lport *fcoe_hostlist_lookup(const struct net_device *); static int fcoe_hostlist_add(const struct fc_lport *); +static void fcoe_hostlist_del(const struct fc_lport *); static int fcoe_device_notification(struct notifier_block *, ulong, void *); static void fcoe_dev_setup(void); @@ -117,6 +117,11 @@ static int fcoe_destroy(struct net_device *netdev); static int fcoe_enable(struct net_device *netdev); static int fcoe_disable(struct net_device *netdev); +/* fcoe_syfs control interface handlers */ +static int fcoe_ctlr_alloc(struct net_device *netdev); +static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev); + + static struct fc_seq *fcoe_elsct_send(struct fc_lport *, u32 did, struct fc_frame *, unsigned int op, @@ -126,8 +131,6 @@ static struct fc_seq *fcoe_elsct_send(struct fc_lport *, void *, u32 timeout); static void fcoe_recv_frame(struct sk_buff *skb); -static void fcoe_get_lesb(struct fc_lport *, struct fc_els_lesb *); - /* notification function for packets from net device */ static struct notifier_block fcoe_notifier = { .notifier_call = fcoe_device_notification, @@ -151,11 +154,11 @@ static int fcoe_vport_create(struct fc_vport *, bool disabled); static int fcoe_vport_disable(struct fc_vport *, bool disable); static void fcoe_set_vport_symbolic_name(struct fc_vport *); static void fcoe_set_port_id(struct fc_lport *, u32, struct fc_frame *); -static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *); static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *); static struct fcoe_sysfs_function_template fcoe_sysfs_templ = { - .get_fcoe_ctlr_mode = fcoe_ctlr_get_fip_mode, + .set_fcoe_ctlr_mode = fcoe_ctlr_set_fip_mode, + .set_fcoe_ctlr_enabled = fcoe_ctlr_enabled, .get_fcoe_ctlr_link_fail = fcoe_ctlr_get_lesb, .get_fcoe_ctlr_vlink_fail = fcoe_ctlr_get_lesb, .get_fcoe_ctlr_miss_fka = fcoe_ctlr_get_lesb, @@ -1112,10 +1115,17 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe, port = lport_priv(lport); port->lport = lport; port->priv = fcoe; + port->get_netdev = fcoe_netdev; port->max_queue_depth = FCOE_MAX_QUEUE_DEPTH; port->min_queue_depth = FCOE_MIN_QUEUE_DEPTH; INIT_WORK(&port->destroy_work, fcoe_destroy_work); + /* + * Need to add the lport to the hostlist + * so we catch NETDEV_CHANGE events. + */ + fcoe_hostlist_add(lport); + /* configure a fc_lport including the exchange manager */ rc = fcoe_lport_config(lport); if (rc) { @@ -1187,6 +1197,7 @@ static struct fc_lport *fcoe_if_create(struct fcoe_interface *fcoe, out_lp_destroy: fc_exch_mgr_free(lport); out_host_put: + fcoe_hostlist_del(lport); scsi_host_put(lport->host); out: return ERR_PTR(rc); @@ -1964,6 +1975,7 @@ static int fcoe_dcb_app_notification(struct notifier_block *notifier, static int fcoe_device_notification(struct notifier_block *notifier, ulong event, void *ptr) { + struct fcoe_ctlr_device *cdev; struct fc_lport *lport = NULL; struct net_device *netdev = ptr; struct fcoe_ctlr *ctlr; @@ -2020,13 +2032,29 @@ static int fcoe_device_notification(struct notifier_block *notifier, fcoe_link_speed_update(lport); - if (link_possible && !fcoe_link_ok(lport)) - fcoe_ctlr_link_up(ctlr); - else if (fcoe_ctlr_link_down(ctlr)) { - stats = per_cpu_ptr(lport->stats, get_cpu()); - stats->LinkFailureCount++; - put_cpu(); - fcoe_clean_pending_queue(lport); + cdev = fcoe_ctlr_to_ctlr_dev(ctlr); + + if (link_possible && !fcoe_link_ok(lport)) { + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link up while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + fcoe_ctlr_link_up(ctlr); + }; + } else if (fcoe_ctlr_link_down(ctlr)) { + switch (cdev->enabled) { + case FCOE_CTLR_DISABLED: + pr_info("Link down while interface is disabled.\n"); + break; + case FCOE_CTLR_ENABLED: + case FCOE_CTLR_UNUSED: + stats = per_cpu_ptr(lport->stats, get_cpu()); + stats->LinkFailureCount++; + put_cpu(); + fcoe_clean_pending_queue(lport); + }; } out: return rc; @@ -2039,6 +2067,8 @@ out: * Called from fcoe transport. * * Returns: 0 for success + * + * Deprecated: use fcoe_ctlr_enabled() */ static int fcoe_disable(struct net_device *netdev) { @@ -2098,6 +2128,33 @@ out: } /** + * fcoe_ctlr_enabled() - Enable or disable an FCoE Controller + * @cdev: The FCoE Controller that is being enabled or disabled + * + * fcoe_sysfs will ensure that the state of 'enabled' has + * changed, so no checking is necessary here. This routine simply + * calls fcoe_enable or fcoe_disable, both of which are deprecated. + * When those routines are removed the functionality can be merged + * here. + */ +static int fcoe_ctlr_enabled(struct fcoe_ctlr_device *cdev) +{ + struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(cdev); + struct fc_lport *lport = ctlr->lp; + struct net_device *netdev = fcoe_netdev(lport); + + switch (cdev->enabled) { + case FCOE_CTLR_ENABLED: + return fcoe_enable(netdev); + case FCOE_CTLR_DISABLED: + return fcoe_disable(netdev); + case FCOE_CTLR_UNUSED: + default: + return -ENOTSUPP; + }; +} + +/** * fcoe_destroy() - Destroy a FCoE interface * @netdev : The net_device object the Ethernet interface to create on * @@ -2139,8 +2196,31 @@ static void fcoe_destroy_work(struct work_struct *work) { struct fcoe_port *port; struct fcoe_interface *fcoe; + struct Scsi_Host *shost; + struct fc_host_attrs *fc_host; + unsigned long flags; + struct fc_vport *vport; + struct fc_vport *next_vport; port = container_of(work, struct fcoe_port, destroy_work); + shost = port->lport->host; + fc_host = shost_to_fc_host(shost); + + /* Loop through all the vports and mark them for deletion */ + spin_lock_irqsave(shost->host_lock, flags); + list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers) { + if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) { + continue; + } else { + vport->flags |= FC_VPORT_DELETING; + queue_work(fc_host_work_q(shost), + &vport->vport_delete_work); + } + } + spin_unlock_irqrestore(shost->host_lock, flags); + + flush_workqueue(fc_host_work_q(shost)); + mutex_lock(&fcoe_config_mutex); fcoe = port->priv; @@ -2204,16 +2284,26 @@ static void fcoe_dcb_create(struct fcoe_interface *fcoe) #endif } +enum fcoe_create_link_state { + FCOE_CREATE_LINK_DOWN, + FCOE_CREATE_LINK_UP, +}; + /** - * fcoe_create() - Create a fcoe interface - * @netdev : The net_device object the Ethernet interface to create on - * @fip_mode: The FIP mode for this creation + * _fcoe_create() - (internal) Create a fcoe interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * @link_state: The ctlr link state on creation * - * Called from fcoe transport + * Called from either the libfcoe 'create' module parameter + * via fcoe_create or from fcoe_syfs's ctlr_create file. * - * Returns: 0 for success + * libfcoe's 'create' module parameter is deprecated so some + * consolidation of code can be done when that interface is + * removed. */ -static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) +static int _fcoe_create(struct net_device *netdev, enum fip_state fip_mode, + enum fcoe_create_link_state link_state) { int rc = 0; struct fcoe_ctlr_device *ctlr_dev; @@ -2254,13 +2344,29 @@ static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) /* setup DCB priority attributes. */ fcoe_dcb_create(fcoe); - /* add to lports list */ - fcoe_hostlist_add(lport); - /* start FIP Discovery and FLOGI */ lport->boot_time = jiffies; fc_fabric_login(lport); - if (!fcoe_link_ok(lport)) { + + /* + * If the fcoe_ctlr_device is to be set to DISABLED + * it must be done after the lport is added to the + * hostlist, but before the rtnl_lock is released. + * This is because the rtnl_lock protects the + * hostlist that fcoe_device_notification uses. If + * the FCoE Controller is intended to be created + * DISABLED then 'enabled' needs to be considered + * handling link events. 'enabled' must be set + * before the lport can be found in the hostlist + * when a link up event is received. + */ + if (link_state == FCOE_CREATE_LINK_UP) + ctlr_dev->enabled = FCOE_CTLR_ENABLED; + else + ctlr_dev->enabled = FCOE_CTLR_DISABLED; + + if (link_state == FCOE_CREATE_LINK_UP && + !fcoe_link_ok(lport)) { rtnl_unlock(); fcoe_ctlr_link_up(ctlr); mutex_unlock(&fcoe_config_mutex); @@ -2275,37 +2381,34 @@ out_nortnl: } /** - * fcoe_link_speed_update() - Update the supported and actual link speeds - * @lport: The local port to update speeds for + * fcoe_create() - Create a fcoe interface + * @netdev : The net_device object the Ethernet interface to create on + * @fip_mode: The FIP mode for this creation + * + * Called from fcoe transport + * + * Returns: 0 for success + */ +static int fcoe_create(struct net_device *netdev, enum fip_state fip_mode) +{ + return _fcoe_create(netdev, fip_mode, FCOE_CREATE_LINK_UP); +} + +/** + * fcoe_ctlr_alloc() - Allocate a fcoe interface from fcoe_sysfs + * @netdev: The net_device to be used by the allocated FCoE Controller * - * Returns: 0 if the ethtool query was successful - * -1 if the ethtool query failed + * This routine is called from fcoe_sysfs. It will start the fcoe_ctlr + * in a link_down state. The allows the user an opportunity to configure + * the FCoE Controller from sysfs before enabling the FCoE Controller. + * + * Creating in with this routine starts the FCoE Controller in Fabric + * mode. The user can change to VN2VN or another mode before enabling. */ -static int fcoe_link_speed_update(struct fc_lport *lport) +static int fcoe_ctlr_alloc(struct net_device *netdev) { - struct net_device *netdev = fcoe_netdev(lport); - struct ethtool_cmd ecmd; - - if (!__ethtool_get_settings(netdev, &ecmd)) { - lport->link_supported_speeds &= - ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); - if (ecmd.supported & (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full)) - lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; - if (ecmd.supported & SUPPORTED_10000baseT_Full) - lport->link_supported_speeds |= - FC_PORTSPEED_10GBIT; - switch (ethtool_cmd_speed(&ecmd)) { - case SPEED_1000: - lport->link_speed = FC_PORTSPEED_1GBIT; - break; - case SPEED_10000: - lport->link_speed = FC_PORTSPEED_10GBIT; - break; - } - return 0; - } - return -1; + return _fcoe_create(netdev, FIP_MODE_FABRIC, + FCOE_CREATE_LINK_DOWN); } /** @@ -2375,10 +2478,13 @@ static int fcoe_reset(struct Scsi_Host *shost) struct fcoe_port *port = lport_priv(lport); struct fcoe_interface *fcoe = port->priv; struct fcoe_ctlr *ctlr = fcoe_to_ctlr(fcoe); + struct fcoe_ctlr_device *cdev = fcoe_ctlr_to_ctlr_dev(ctlr); fcoe_ctlr_link_down(ctlr); fcoe_clean_pending_queue(ctlr->lp); - if (!fcoe_link_ok(ctlr->lp)) + + if (cdev->enabled != FCOE_CTLR_DISABLED && + !fcoe_link_ok(ctlr->lp)) fcoe_ctlr_link_up(ctlr); return 0; } @@ -2445,12 +2551,31 @@ static int fcoe_hostlist_add(const struct fc_lport *lport) return 0; } +/** + * fcoe_hostlist_del() - Remove the FCoE interface identified by a local + * port to the hostlist + * @lport: The local port that identifies the FCoE interface to be added + * + * Locking: must be called with the RTNL mutex held + * + */ +static void fcoe_hostlist_del(const struct fc_lport *lport) +{ + struct fcoe_interface *fcoe; + struct fcoe_port *port; + + port = lport_priv(lport); + fcoe = port->priv; + list_del(&fcoe->list); + return; +} static struct fcoe_transport fcoe_sw_transport = { .name = {FCOE_TRANSPORT_DEFAULT}, .attached = false, .list = LIST_HEAD_INIT(fcoe_sw_transport.list), .match = fcoe_match, + .alloc = fcoe_ctlr_alloc, .create = fcoe_create, .destroy = fcoe_destroy, .enable = fcoe_enable, @@ -2534,9 +2659,9 @@ static void __exit fcoe_exit(void) /* releases the associated fcoe hosts */ rtnl_lock(); list_for_each_entry_safe(fcoe, tmp, &fcoe_hostlist, list) { - list_del(&fcoe->list); ctlr = fcoe_to_ctlr(fcoe); port = lport_priv(ctlr->lp); + fcoe_hostlist_del(port->lport); queue_work(fcoe_wq, &port->destroy_work); } rtnl_unlock(); @@ -2776,43 +2901,6 @@ static void fcoe_set_vport_symbolic_name(struct fc_vport *vport) NULL, NULL, 3 * lport->r_a_tov); } -/** - * fcoe_get_lesb() - Fill the FCoE Link Error Status Block - * @lport: the local port - * @fc_lesb: the link error status block - */ -static void fcoe_get_lesb(struct fc_lport *lport, - struct fc_els_lesb *fc_lesb) -{ - struct net_device *netdev = fcoe_netdev(lport); - - __fcoe_get_lesb(lport, fc_lesb, netdev); -} - -static void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) -{ - struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); - struct net_device *netdev = fcoe_netdev(fip->lp); - struct fcoe_fc_els_lesb *fcoe_lesb; - struct fc_els_lesb fc_lesb; - - __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); - fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); - - ctlr_dev->lesb.lesb_link_fail = - ntohl(fcoe_lesb->lesb_link_fail); - ctlr_dev->lesb.lesb_vlink_fail = - ntohl(fcoe_lesb->lesb_vlink_fail); - ctlr_dev->lesb.lesb_miss_fka = - ntohl(fcoe_lesb->lesb_miss_fka); - ctlr_dev->lesb.lesb_symb_err = - ntohl(fcoe_lesb->lesb_symb_err); - ctlr_dev->lesb.lesb_err_block = - ntohl(fcoe_lesb->lesb_err_block); - ctlr_dev->lesb.lesb_fcs_error = - ntohl(fcoe_lesb->lesb_fcs_error); -} - static void fcoe_fcf_get_vlan_id(struct fcoe_fcf_device *fcf_dev) { struct fcoe_ctlr_device *ctlr_dev = diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h index b42dc32c..2b53672 100644 --- a/drivers/scsi/fcoe/fcoe.h +++ b/drivers/scsi/fcoe/fcoe.h @@ -55,12 +55,12 @@ do { \ #define FCOE_DBG(fmt, args...) \ FCOE_CHECK_LOGGING(FCOE_LOGGING, \ - printk(KERN_INFO "fcoe: " fmt, ##args);) + pr_info("fcoe: " fmt, ##args);) #define FCOE_NETDEV_DBG(netdev, fmt, args...) \ FCOE_CHECK_LOGGING(FCOE_NETDEV_LOGGING, \ - printk(KERN_INFO "fcoe: %s: " fmt, \ - netdev->name, ##args);) + pr_info("fcoe: %s: " fmt, \ + netdev->name, ##args);) /** * struct fcoe_interface - A FCoE interface diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index 4a909d7..08c3bc3 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -1291,8 +1291,16 @@ static void fcoe_ctlr_recv_clr_vlink(struct fcoe_ctlr *fip, LIBFCOE_FIP_DBG(fip, "Clear Virtual Link received\n"); - if (!fcf || !lport->port_id) + if (!fcf || !lport->port_id) { + /* + * We are yet to select best FCF, but we got CVL in the + * meantime. reset the ctlr and let it rediscover the FCF + */ + mutex_lock(&fip->ctlr_mutex); + fcoe_ctlr_reset(fip); + mutex_unlock(&fip->ctlr_mutex); return; + } /* * mask of required descriptors. Validating each one clears its bit. @@ -1551,15 +1559,6 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip) fcf->fabric_name, fcf->vfid, fcf->fcf_mac, fcf->fc_map, fcoe_ctlr_mtu_valid(fcf), fcf->flogi_sent, fcf->pri); - if (fcf->fabric_name != first->fabric_name || - fcf->vfid != first->vfid || - fcf->fc_map != first->fc_map) { - LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, " - "or FC-MAP\n"); - return NULL; - } - if (fcf->flogi_sent) - continue; if (!fcoe_ctlr_fcf_usable(fcf)) { LIBFCOE_FIP_DBG(fip, "FCF for fab %16.16llx " "map %x %svalid %savailable\n", @@ -1569,6 +1568,15 @@ static struct fcoe_fcf *fcoe_ctlr_select(struct fcoe_ctlr *fip) "" : "un"); continue; } + if (fcf->fabric_name != first->fabric_name || + fcf->vfid != first->vfid || + fcf->fc_map != first->fc_map) { + LIBFCOE_FIP_DBG(fip, "Conflicting fabric, VFID, " + "or FC-MAP\n"); + return NULL; + } + if (fcf->flogi_sent) + continue; if (!best || fcf->pri < best->pri || best->flogi_sent) best = fcf; } @@ -2864,22 +2872,21 @@ void fcoe_fcf_get_selected(struct fcoe_fcf_device *fcf_dev) } EXPORT_SYMBOL(fcoe_fcf_get_selected); -void fcoe_ctlr_get_fip_mode(struct fcoe_ctlr_device *ctlr_dev) +void fcoe_ctlr_set_fip_mode(struct fcoe_ctlr_device *ctlr_dev) { struct fcoe_ctlr *ctlr = fcoe_ctlr_device_priv(ctlr_dev); mutex_lock(&ctlr->ctlr_mutex); - switch (ctlr->mode) { - case FIP_MODE_FABRIC: - ctlr_dev->mode = FIP_CONN_TYPE_FABRIC; - break; - case FIP_MODE_VN2VN: - ctlr_dev->mode = FIP_CONN_TYPE_VN2VN; + switch (ctlr_dev->mode) { + case FIP_CONN_TYPE_VN2VN: + ctlr->mode = FIP_MODE_VN2VN; break; + case FIP_CONN_TYPE_FABRIC: default: - ctlr_dev->mode = FIP_CONN_TYPE_UNKNOWN; + ctlr->mode = FIP_MODE_FABRIC; break; } + mutex_unlock(&ctlr->ctlr_mutex); } -EXPORT_SYMBOL(fcoe_ctlr_get_fip_mode); +EXPORT_SYMBOL(fcoe_ctlr_set_fip_mode); diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c index 5e75168..8c05ae01 100644 --- a/drivers/scsi/fcoe/fcoe_sysfs.c +++ b/drivers/scsi/fcoe/fcoe_sysfs.c @@ -21,8 +21,17 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/etherdevice.h> +#include <linux/ctype.h> #include <scsi/fcoe_sysfs.h> +#include <scsi/libfcoe.h> + +/* + * OK to include local libfcoe.h for debug_logging, but cannot include + * <scsi/libfcoe.h> otherwise non-netdev based fcoe solutions would have + * have to include more than fcoe_sysfs.h. + */ +#include "libfcoe.h" static atomic_t ctlr_num; static atomic_t fcf_num; @@ -71,6 +80,8 @@ MODULE_PARM_DESC(fcf_dev_loss_tmo, ((x)->lesb.lesb_err_block) #define fcoe_ctlr_fcs_error(x) \ ((x)->lesb.lesb_fcs_error) +#define fcoe_ctlr_enabled(x) \ + ((x)->enabled) #define fcoe_fcf_state(x) \ ((x)->state) #define fcoe_fcf_fabric_name(x) \ @@ -210,25 +221,34 @@ static ssize_t show_fcoe_fcf_device_##field(struct device *dev, \ #define fcoe_enum_name_search(title, table_type, table) \ static const char *get_fcoe_##title##_name(enum table_type table_key) \ { \ - int i; \ - char *name = NULL; \ - \ - for (i = 0; i < ARRAY_SIZE(table); i++) { \ - if (table[i].value == table_key) { \ - name = table[i].name; \ - break; \ - } \ - } \ - return name; \ + if (table_key < 0 || table_key >= ARRAY_SIZE(table)) \ + return NULL; \ + return table[table_key]; \ +} + +static char *fip_conn_type_names[] = { + [ FIP_CONN_TYPE_UNKNOWN ] = "Unknown", + [ FIP_CONN_TYPE_FABRIC ] = "Fabric", + [ FIP_CONN_TYPE_VN2VN ] = "VN2VN", +}; +fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names) + +static enum fip_conn_type fcoe_parse_mode(const char *buf) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fip_conn_type_names); i++) { + if (strcasecmp(buf, fip_conn_type_names[i]) == 0) + return i; + } + + return FIP_CONN_TYPE_UNKNOWN; } -static struct { - enum fcf_state value; - char *name; -} fcf_state_names[] = { - { FCOE_FCF_STATE_UNKNOWN, "Unknown" }, - { FCOE_FCF_STATE_DISCONNECTED, "Disconnected" }, - { FCOE_FCF_STATE_CONNECTED, "Connected" }, +static char *fcf_state_names[] = { + [ FCOE_FCF_STATE_UNKNOWN ] = "Unknown", + [ FCOE_FCF_STATE_DISCONNECTED ] = "Disconnected", + [ FCOE_FCF_STATE_CONNECTED ] = "Connected", }; fcoe_enum_name_search(fcf_state, fcf_state, fcf_state_names) #define FCOE_FCF_STATE_MAX_NAMELEN 50 @@ -246,17 +266,7 @@ static ssize_t show_fcf_state(struct device *dev, } static FCOE_DEVICE_ATTR(fcf, state, S_IRUGO, show_fcf_state, NULL); -static struct { - enum fip_conn_type value; - char *name; -} fip_conn_type_names[] = { - { FIP_CONN_TYPE_UNKNOWN, "Unknown" }, - { FIP_CONN_TYPE_FABRIC, "Fabric" }, - { FIP_CONN_TYPE_VN2VN, "VN2VN" }, -}; -fcoe_enum_name_search(ctlr_mode, fip_conn_type, fip_conn_type_names) -#define FCOE_CTLR_MODE_MAX_NAMELEN 50 - +#define FCOE_MAX_MODENAME_LEN 20 static ssize_t show_ctlr_mode(struct device *dev, struct device_attribute *attr, char *buf) @@ -264,17 +274,116 @@ static ssize_t show_ctlr_mode(struct device *dev, struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); const char *name; - if (ctlr->f->get_fcoe_ctlr_mode) - ctlr->f->get_fcoe_ctlr_mode(ctlr); - name = get_fcoe_ctlr_mode_name(ctlr->mode); if (!name) return -EINVAL; - return snprintf(buf, FCOE_CTLR_MODE_MAX_NAMELEN, + return snprintf(buf, FCOE_MAX_MODENAME_LEN, + "%s\n", name); +} + +static ssize_t store_ctlr_mode(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + char mode[FCOE_MAX_MODENAME_LEN + 1]; + + if (count > FCOE_MAX_MODENAME_LEN) + return -EINVAL; + + strncpy(mode, buf, count); + + if (mode[count - 1] == '\n') + mode[count - 1] = '\0'; + else + mode[count] = '\0'; + + switch (ctlr->enabled) { + case FCOE_CTLR_ENABLED: + LIBFCOE_SYSFS_DBG(ctlr, "Cannot change mode when enabled."); + return -EBUSY; + case FCOE_CTLR_DISABLED: + if (!ctlr->f->set_fcoe_ctlr_mode) { + LIBFCOE_SYSFS_DBG(ctlr, + "Mode change not supported by LLD."); + return -ENOTSUPP; + } + + ctlr->mode = fcoe_parse_mode(mode); + if (ctlr->mode == FIP_CONN_TYPE_UNKNOWN) { + LIBFCOE_SYSFS_DBG(ctlr, + "Unknown mode %s provided.", buf); + return -EINVAL; + } + + ctlr->f->set_fcoe_ctlr_mode(ctlr); + LIBFCOE_SYSFS_DBG(ctlr, "Mode changed to %s.", buf); + + return count; + case FCOE_CTLR_UNUSED: + default: + LIBFCOE_SYSFS_DBG(ctlr, "Mode change not supported."); + return -ENOTSUPP; + }; +} + +static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO | S_IWUSR, + show_ctlr_mode, store_ctlr_mode); + +static ssize_t store_ctlr_enabled(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + int rc; + + switch (ctlr->enabled) { + case FCOE_CTLR_ENABLED: + if (*buf == '1') + return count; + ctlr->enabled = FCOE_CTLR_DISABLED; + break; + case FCOE_CTLR_DISABLED: + if (*buf == '0') + return count; + ctlr->enabled = FCOE_CTLR_ENABLED; + break; + case FCOE_CTLR_UNUSED: + return -ENOTSUPP; + }; + + rc = ctlr->f->set_fcoe_ctlr_enabled(ctlr); + if (rc) + return rc; + + return count; +} + +static char *ctlr_enabled_state_names[] = { + [ FCOE_CTLR_ENABLED ] = "1", + [ FCOE_CTLR_DISABLED ] = "0", +}; +fcoe_enum_name_search(ctlr_enabled_state, ctlr_enabled_state, + ctlr_enabled_state_names) +#define FCOE_CTLR_ENABLED_MAX_NAMELEN 50 + +static ssize_t show_ctlr_enabled_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct fcoe_ctlr_device *ctlr = dev_to_ctlr(dev); + const char *name; + + name = get_fcoe_ctlr_enabled_state_name(ctlr->enabled); + if (!name) + return -EINVAL; + return snprintf(buf, FCOE_CTLR_ENABLED_MAX_NAMELEN, "%s\n", name); } -static FCOE_DEVICE_ATTR(ctlr, mode, S_IRUGO, - show_ctlr_mode, NULL); + +static FCOE_DEVICE_ATTR(ctlr, enabled, S_IRUGO | S_IWUSR, + show_ctlr_enabled_state, + store_ctlr_enabled); static ssize_t store_private_fcoe_ctlr_fcf_dev_loss_tmo(struct device *dev, @@ -359,6 +468,7 @@ static struct attribute_group fcoe_ctlr_lesb_attr_group = { static struct attribute *fcoe_ctlr_attrs[] = { &device_attr_fcoe_ctlr_fcf_dev_loss_tmo.attr, + &device_attr_fcoe_ctlr_enabled.attr, &device_attr_fcoe_ctlr_mode.attr, NULL, }; @@ -443,9 +553,16 @@ struct device_type fcoe_fcf_device_type = { .release = fcoe_fcf_device_release, }; +struct bus_attribute fcoe_bus_attr_group[] = { + __ATTR(ctlr_create, S_IWUSR, NULL, fcoe_ctlr_create_store), + __ATTR(ctlr_destroy, S_IWUSR, NULL, fcoe_ctlr_destroy_store), + __ATTR_NULL +}; + struct bus_type fcoe_bus_type = { .name = "fcoe", .match = &fcoe_bus_match, + .bus_attrs = fcoe_bus_attr_group, }; /** @@ -566,6 +683,7 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent, ctlr->id = atomic_inc_return(&ctlr_num) - 1; ctlr->f = f; + ctlr->mode = FIP_CONN_TYPE_FABRIC; INIT_LIST_HEAD(&ctlr->fcfs); mutex_init(&ctlr->lock); ctlr->dev.parent = parent; diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c index ac76d8a..f3a5a53 100644 --- a/drivers/scsi/fcoe/fcoe_transport.c +++ b/drivers/scsi/fcoe/fcoe_transport.c @@ -83,6 +83,50 @@ static struct notifier_block libfcoe_notifier = { .notifier_call = libfcoe_device_notification, }; +/** + * fcoe_link_speed_update() - Update the supported and actual link speeds + * @lport: The local port to update speeds for + * + * Returns: 0 if the ethtool query was successful + * -1 if the ethtool query failed + */ +int fcoe_link_speed_update(struct fc_lport *lport) +{ + struct net_device *netdev = fcoe_get_netdev(lport); + struct ethtool_cmd ecmd; + + if (!__ethtool_get_settings(netdev, &ecmd)) { + lport->link_supported_speeds &= + ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); + if (ecmd.supported & (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full)) + lport->link_supported_speeds |= FC_PORTSPEED_1GBIT; + if (ecmd.supported & SUPPORTED_10000baseT_Full) + lport->link_supported_speeds |= + FC_PORTSPEED_10GBIT; + switch (ethtool_cmd_speed(&ecmd)) { + case SPEED_1000: + lport->link_speed = FC_PORTSPEED_1GBIT; + break; + case SPEED_10000: + lport->link_speed = FC_PORTSPEED_10GBIT; + break; + } + return 0; + } + return -1; +} +EXPORT_SYMBOL_GPL(fcoe_link_speed_update); + +/** + * __fcoe_get_lesb() - Get the Link Error Status Block (LESB) for a given lport + * @lport: The local port to update speeds for + * @fc_lesb: Pointer to the LESB to be filled up + * @netdev: Pointer to the netdev that is associated with the lport + * + * Note, the Link Error Status Block (LESB) for FCoE is defined in FC-BB-6 + * Clause 7.11 in v1.04. + */ void __fcoe_get_lesb(struct fc_lport *lport, struct fc_els_lesb *fc_lesb, struct net_device *netdev) @@ -112,6 +156,51 @@ void __fcoe_get_lesb(struct fc_lport *lport, } EXPORT_SYMBOL_GPL(__fcoe_get_lesb); +/** + * fcoe_get_lesb() - Fill the FCoE Link Error Status Block + * @lport: the local port + * @fc_lesb: the link error status block + */ +void fcoe_get_lesb(struct fc_lport *lport, + struct fc_els_lesb *fc_lesb) +{ + struct net_device *netdev = fcoe_get_netdev(lport); + + __fcoe_get_lesb(lport, fc_lesb, netdev); +} +EXPORT_SYMBOL_GPL(fcoe_get_lesb); + +/** + * fcoe_ctlr_get_lesb() - Get the Link Error Status Block (LESB) for a given + * fcoe controller device + * @ctlr_dev: The given fcoe controller device + * + */ +void fcoe_ctlr_get_lesb(struct fcoe_ctlr_device *ctlr_dev) +{ + struct fcoe_ctlr *fip = fcoe_ctlr_device_priv(ctlr_dev); + struct net_device *netdev = fcoe_get_netdev(fip->lp); + struct fcoe_fc_els_lesb *fcoe_lesb; + struct fc_els_lesb fc_lesb; + + __fcoe_get_lesb(fip->lp, &fc_lesb, netdev); + fcoe_lesb = (struct fcoe_fc_els_lesb *)(&fc_lesb); + + ctlr_dev->lesb.lesb_link_fail = + ntohl(fcoe_lesb->lesb_link_fail); + ctlr_dev->lesb.lesb_vlink_fail = + ntohl(fcoe_lesb->lesb_vlink_fail); + ctlr_dev->lesb.lesb_miss_fka = + ntohl(fcoe_lesb->lesb_miss_fka); + ctlr_dev->lesb.lesb_symb_err = + ntohl(fcoe_lesb->lesb_symb_err); + ctlr_dev->lesb.lesb_err_block = + ntohl(fcoe_lesb->lesb_err_block); + ctlr_dev->lesb.lesb_fcs_error = + ntohl(fcoe_lesb->lesb_fcs_error); +} +EXPORT_SYMBOL_GPL(fcoe_ctlr_get_lesb); + void fcoe_wwn_to_str(u64 wwn, char *buf, int len) { u8 wwpn[8]; @@ -627,6 +716,110 @@ static int libfcoe_device_notification(struct notifier_block *notifier, return NOTIFY_OK; } +ssize_t fcoe_ctlr_create_store(struct bus_type *bus, + const char *buf, size_t count) +{ + struct net_device *netdev = NULL; + struct fcoe_transport *ft = NULL; + struct fcoe_ctlr_device *ctlr_dev = NULL; + int rc = 0; + int err; + + mutex_lock(&ft_mutex); + + netdev = fcoe_if_to_netdev(buf); + if (!netdev) { + LIBFCOE_TRANSPORT_DBG("Invalid device %s.\n", buf); + rc = -ENODEV; + goto out_nodev; + } + + ft = fcoe_netdev_map_lookup(netdev); + if (ft) { + LIBFCOE_TRANSPORT_DBG("transport %s already has existing " + "FCoE instance on %s.\n", + ft->name, netdev->name); + rc = -EEXIST; + goto out_putdev; + } + + ft = fcoe_transport_lookup(netdev); + if (!ft) { + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n", + netdev->name); + rc = -ENODEV; + goto out_putdev; + } + + /* pass to transport create */ + err = ft->alloc ? ft->alloc(netdev) : -ENODEV; + if (err) { + fcoe_del_netdev_mapping(netdev); + rc = -ENOMEM; + goto out_putdev; + } + + err = fcoe_add_netdev_mapping(netdev, ft); + if (err) { + LIBFCOE_TRANSPORT_DBG("failed to add new netdev mapping " + "for FCoE transport %s for %s.\n", + ft->name, netdev->name); + rc = -ENODEV; + goto out_putdev; + } + + LIBFCOE_TRANSPORT_DBG("transport %s %s to create fcoe on %s.\n", + ft->name, (ctlr_dev) ? "succeeded" : "failed", + netdev->name); + +out_putdev: + dev_put(netdev); +out_nodev: + mutex_unlock(&ft_mutex); + if (rc) + return rc; + return count; +} + +ssize_t fcoe_ctlr_destroy_store(struct bus_type *bus, + const char *buf, size_t count) +{ + int rc = -ENODEV; + struct net_device *netdev = NULL; + struct fcoe_transport *ft = NULL; + + mutex_lock(&ft_mutex); + + netdev = fcoe_if_to_netdev(buf); + if (!netdev) { + LIBFCOE_TRANSPORT_DBG("invalid device %s.\n", buf); + goto out_nodev; + } + + ft = fcoe_netdev_map_lookup(netdev); + if (!ft) { + LIBFCOE_TRANSPORT_DBG("no FCoE transport found for %s.\n", + netdev->name); + goto out_putdev; + } + + /* pass to transport destroy */ + rc = ft->destroy(netdev); + if (rc) + goto out_putdev; + + fcoe_del_netdev_mapping(netdev); + LIBFCOE_TRANSPORT_DBG("transport %s %s to destroy fcoe on %s.\n", + ft->name, (rc) ? "failed" : "succeeded", + netdev->name); + rc = count; /* required for successful return */ +out_putdev: + dev_put(netdev); +out_nodev: + mutex_unlock(&ft_mutex); + return rc; +} +EXPORT_SYMBOL(fcoe_ctlr_destroy_store); /** * fcoe_transport_create() - Create a fcoe interface @@ -769,11 +962,7 @@ out_putdev: dev_put(netdev); out_nodev: mutex_unlock(&ft_mutex); - - if (rc == -ERESTARTSYS) - return restart_syscall(); - else - return rc; + return rc; } /** diff --git a/drivers/scsi/fcoe/libfcoe.h b/drivers/scsi/fcoe/libfcoe.h index 6af5fc3..d3bb16d 100644 --- a/drivers/scsi/fcoe/libfcoe.h +++ b/drivers/scsi/fcoe/libfcoe.h @@ -2,9 +2,10 @@ #define _FCOE_LIBFCOE_H_ extern unsigned int libfcoe_debug_logging; -#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ -#define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */ -#define LIBFCOE_TRANSPORT_LOGGING 0x04 /* FCoE transport logging */ +#define LIBFCOE_LOGGING 0x01 /* General logging, not categorized */ +#define LIBFCOE_FIP_LOGGING 0x02 /* FIP logging */ +#define LIBFCOE_TRANSPORT_LOGGING 0x04 /* FCoE transport logging */ +#define LIBFCOE_SYSFS_LOGGING 0x08 /* fcoe_sysfs logging */ #define LIBFCOE_CHECK_LOGGING(LEVEL, CMD) \ do { \ @@ -16,16 +17,19 @@ do { \ #define LIBFCOE_DBG(fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_LOGGING, \ - printk(KERN_INFO "libfcoe: " fmt, ##args);) + pr_info("libfcoe: " fmt, ##args);) #define LIBFCOE_FIP_DBG(fip, fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_FIP_LOGGING, \ - printk(KERN_INFO "host%d: fip: " fmt, \ - (fip)->lp->host->host_no, ##args);) + pr_info("host%d: fip: " fmt, \ + (fip)->lp->host->host_no, ##args);) #define LIBFCOE_TRANSPORT_DBG(fmt, args...) \ LIBFCOE_CHECK_LOGGING(LIBFCOE_TRANSPORT_LOGGING, \ - printk(KERN_INFO "%s: " fmt, \ - __func__, ##args);) + pr_info("%s: " fmt, __func__, ##args);) + +#define LIBFCOE_SYSFS_DBG(cdev, fmt, args...) \ + LIBFCOE_CHECK_LOGGING(LIBFCOE_SYSFS_LOGGING, \ + pr_info("ctlr_%d: " fmt, cdev->id, ##args);) #endif /* _FCOE_LIBFCOE_H_ */ diff --git a/drivers/scsi/fnic/Makefile b/drivers/scsi/fnic/Makefile index 37c3440..383598f 100644 --- a/drivers/scsi/fnic/Makefile +++ b/drivers/scsi/fnic/Makefile @@ -7,6 +7,8 @@ fnic-y := \ fnic_res.o \ fnic_fcs.o \ fnic_scsi.o \ + fnic_trace.o \ + fnic_debugfs.o \ vnic_cq.o \ vnic_dev.o \ vnic_intr.o \ diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h index 95a5ba2..98436c3 100644 --- a/drivers/scsi/fnic/fnic.h +++ b/drivers/scsi/fnic/fnic.h @@ -26,6 +26,7 @@ #include <scsi/libfcoe.h> #include "fnic_io.h" #include "fnic_res.h" +#include "fnic_trace.h" #include "vnic_dev.h" #include "vnic_wq.h" #include "vnic_rq.h" @@ -56,6 +57,34 @@ #define FNIC_NO_TAG -1 /* + * Command flags to identify the type of command and for other future + * use. + */ +#define FNIC_NO_FLAGS 0 +#define FNIC_IO_INITIALIZED BIT(0) +#define FNIC_IO_ISSUED BIT(1) +#define FNIC_IO_DONE BIT(2) +#define FNIC_IO_REQ_NULL BIT(3) +#define FNIC_IO_ABTS_PENDING BIT(4) +#define FNIC_IO_ABORTED BIT(5) +#define FNIC_IO_ABTS_ISSUED BIT(6) +#define FNIC_IO_TERM_ISSUED BIT(7) +#define FNIC_IO_INTERNAL_TERM_ISSUED BIT(8) +#define FNIC_IO_ABT_TERM_DONE BIT(9) +#define FNIC_IO_ABT_TERM_REQ_NULL BIT(10) +#define FNIC_IO_ABT_TERM_TIMED_OUT BIT(11) +#define FNIC_DEVICE_RESET BIT(12) /* Device reset request */ +#define FNIC_DEV_RST_ISSUED BIT(13) +#define FNIC_DEV_RST_TIMED_OUT BIT(14) +#define FNIC_DEV_RST_ABTS_ISSUED BIT(15) +#define FNIC_DEV_RST_TERM_ISSUED BIT(16) +#define FNIC_DEV_RST_DONE BIT(17) +#define FNIC_DEV_RST_REQ_NULL BIT(18) +#define FNIC_DEV_RST_ABTS_DONE BIT(19) +#define FNIC_DEV_RST_TERM_DONE BIT(20) +#define FNIC_DEV_RST_ABTS_PENDING BIT(21) + +/* * Usage of the scsi_cmnd scratchpad. * These fields are locked by the hashed io_req_lock. */ @@ -64,6 +93,7 @@ #define CMD_ABTS_STATUS(Cmnd) ((Cmnd)->SCp.Message) #define CMD_LR_STATUS(Cmnd) ((Cmnd)->SCp.have_data_in) #define CMD_TAG(Cmnd) ((Cmnd)->SCp.sent_command) +#define CMD_FLAGS(Cmnd) ((Cmnd)->SCp.Status) #define FCPIO_INVALID_CODE 0x100 /* hdr_status value unused by firmware */ @@ -71,9 +101,28 @@ #define FNIC_HOST_RESET_TIMEOUT 10000 /* mSec */ #define FNIC_RMDEVICE_TIMEOUT 1000 /* mSec */ #define FNIC_HOST_RESET_SETTLE_TIME 30 /* Sec */ +#define FNIC_ABT_TERM_DELAY_TIMEOUT 500 /* mSec */ #define FNIC_MAX_FCP_TARGET 256 +/** + * state_flags to identify host state along along with fnic's state + **/ +#define __FNIC_FLAGS_FWRESET BIT(0) /* fwreset in progress */ +#define __FNIC_FLAGS_BLOCK_IO BIT(1) /* IOs are blocked */ + +#define FNIC_FLAGS_NONE (0) +#define FNIC_FLAGS_FWRESET (__FNIC_FLAGS_FWRESET | \ + __FNIC_FLAGS_BLOCK_IO) + +#define FNIC_FLAGS_IO_BLOCKED (__FNIC_FLAGS_BLOCK_IO) + +#define fnic_set_state_flags(fnicp, st_flags) \ + __fnic_set_state_flags(fnicp, st_flags, 0) + +#define fnic_clear_state_flags(fnicp, st_flags) \ + __fnic_set_state_flags(fnicp, st_flags, 1) + extern unsigned int fnic_log_level; #define FNIC_MAIN_LOGGING 0x01 @@ -170,6 +219,9 @@ struct fnic { struct completion *remove_wait; /* device remove thread blocks */ + atomic_t in_flight; /* io counter */ + u32 _reserved; /* fill hole */ + unsigned long state_flags; /* protected by host lock */ enum fnic_state state; spinlock_t fnic_lock; @@ -267,4 +319,12 @@ const char *fnic_state_to_str(unsigned int state); void fnic_log_q_error(struct fnic *fnic); void fnic_handle_link_event(struct fnic *fnic); +int fnic_is_abts_pending(struct fnic *, struct scsi_cmnd *); + +static inline int +fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags) +{ + return ((fnic->state_flags & st_flags) == st_flags); +} +void __fnic_set_state_flags(struct fnic *, unsigned long, unsigned long); #endif /* _FNIC_H_ */ diff --git a/drivers/scsi/fnic/fnic_debugfs.c b/drivers/scsi/fnic/fnic_debugfs.c new file mode 100644 index 0000000..adc1f7f --- /dev/null +++ b/drivers/scsi/fnic/fnic_debugfs.c @@ -0,0 +1,314 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/debugfs.h> +#include "fnic.h" + +static struct dentry *fnic_trace_debugfs_root; +static struct dentry *fnic_trace_debugfs_file; +static struct dentry *fnic_trace_enable; + +/* + * fnic_trace_ctrl_open - Open the trace_enable file + * @inode: The inode pointer. + * @file: The file pointer to attach the trace enable/disable flag. + * + * Description: + * This routine opens a debugsfs file trace_enable. + * + * Returns: + * This function returns zero if successful. + */ +static int fnic_trace_ctrl_open(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/* + * fnic_trace_ctrl_read - Read a trace_enable debugfs file + * @filp: The file pointer to read from. + * @ubuf: The buffer to copy the data to. + * @cnt: The number of bytes to read. + * @ppos: The position in the file to start reading from. + * + * Description: + * This routine reads value of variable fnic_tracing_enabled + * and stores into local @buf. It will start reading file at @ppos and + * copy up to @cnt of data to @ubuf from @buf. + * + * Returns: + * This function returns the amount of data that was read. + */ +static ssize_t fnic_trace_ctrl_read(struct file *filp, + char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + int len; + len = sprintf(buf, "%u\n", fnic_tracing_enabled); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); +} + +/* + * fnic_trace_ctrl_write - Write to trace_enable debugfs file + * @filp: The file pointer to write from. + * @ubuf: The buffer to copy the data from. + * @cnt: The number of bytes to write. + * @ppos: The position in the file to start writing to. + * + * Description: + * This routine writes data from user buffer @ubuf to buffer @buf and + * sets fnic_tracing_enabled value as per user input. + * + * Returns: + * This function returns the amount of data that was written. + */ +static ssize_t fnic_trace_ctrl_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + unsigned long val; + int ret; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + ret = kstrtoul(buf, 10, &val); + if (ret < 0) + return ret; + + fnic_tracing_enabled = val; + (*ppos)++; + + return cnt; +} + +/* + * fnic_trace_debugfs_open - Open the fnic trace log + * @inode: The inode pointer + * @file: The file pointer to attach the log output + * + * Description: + * This routine is the entry point for the debugfs open file operation. + * It allocates the necessary buffer for the log, fills the buffer from + * the in-memory log and then returns a pointer to that log in + * the private_data field in @file. + * + * Returns: + * This function returns zero if successful. On error it will return + * a negative error value. + */ +static int fnic_trace_debugfs_open(struct inode *inode, + struct file *file) +{ + fnic_dbgfs_t *fnic_dbg_prt; + fnic_dbg_prt = kzalloc(sizeof(fnic_dbgfs_t), GFP_KERNEL); + if (!fnic_dbg_prt) + return -ENOMEM; + + fnic_dbg_prt->buffer = vmalloc((3*(trace_max_pages * PAGE_SIZE))); + if (!fnic_dbg_prt->buffer) { + kfree(fnic_dbg_prt); + return -ENOMEM; + } + memset((void *)fnic_dbg_prt->buffer, 0, + (3*(trace_max_pages * PAGE_SIZE))); + fnic_dbg_prt->buffer_len = fnic_get_trace_data(fnic_dbg_prt); + file->private_data = fnic_dbg_prt; + return 0; +} + +/* + * fnic_trace_debugfs_lseek - Seek through a debugfs file + * @file: The file pointer to seek through. + * @offset: The offset to seek to or the amount to seek by. + * @howto: Indicates how to seek. + * + * Description: + * This routine is the entry point for the debugfs lseek file operation. + * The @howto parameter indicates whether @offset is the offset to directly + * seek to, or if it is a value to seek forward or reverse by. This function + * figures out what the new offset of the debugfs file will be and assigns + * that value to the f_pos field of @file. + * + * Returns: + * This function returns the new offset if successful and returns a negative + * error if unable to process the seek. + */ +static loff_t fnic_trace_debugfs_lseek(struct file *file, + loff_t offset, + int howto) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + loff_t pos = -1; + + switch (howto) { + case 0: + pos = offset; + break; + case 1: + pos = file->f_pos + offset; + break; + case 2: + pos = fnic_dbg_prt->buffer_len - offset; + } + return (pos < 0 || pos > fnic_dbg_prt->buffer_len) ? + -EINVAL : (file->f_pos = pos); +} + +/* + * fnic_trace_debugfs_read - Read a debugfs file + * @file: The file pointer to read from. + * @ubuf: The buffer to copy the data to. + * @nbytes: The number of bytes to read. + * @pos: The position in the file to start reading from. + * + * Description: + * This routine reads data from the buffer indicated in the private_data + * field of @file. It will start reading at @pos and copy up to @nbytes of + * data to @ubuf. + * + * Returns: + * This function returns the amount of data that was read (this could be + * less than @nbytes if the end of the file was reached). + */ +static ssize_t fnic_trace_debugfs_read(struct file *file, + char __user *ubuf, + size_t nbytes, + loff_t *pos) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + int rc = 0; + rc = simple_read_from_buffer(ubuf, nbytes, pos, + fnic_dbg_prt->buffer, + fnic_dbg_prt->buffer_len); + return rc; +} + +/* + * fnic_trace_debugfs_release - Release the buffer used to store + * debugfs file data + * @inode: The inode pointer + * @file: The file pointer that contains the buffer to release + * + * Description: + * This routine frees the buffer that was allocated when the debugfs + * file was opened. + * + * Returns: + * This function returns zero. + */ +static int fnic_trace_debugfs_release(struct inode *inode, + struct file *file) +{ + fnic_dbgfs_t *fnic_dbg_prt = file->private_data; + + vfree(fnic_dbg_prt->buffer); + kfree(fnic_dbg_prt); + return 0; +} + +static const struct file_operations fnic_trace_ctrl_fops = { + .owner = THIS_MODULE, + .open = fnic_trace_ctrl_open, + .read = fnic_trace_ctrl_read, + .write = fnic_trace_ctrl_write, +}; + +static const struct file_operations fnic_trace_debugfs_fops = { + .owner = THIS_MODULE, + .open = fnic_trace_debugfs_open, + .llseek = fnic_trace_debugfs_lseek, + .read = fnic_trace_debugfs_read, + .release = fnic_trace_debugfs_release, +}; + +/* + * fnic_trace_debugfs_init - Initialize debugfs for fnic trace logging + * + * Description: + * When Debugfs is configured this routine sets up the fnic debugfs + * file system. If not already created, this routine will create the + * fnic directory. It will create file trace to log fnic trace buffer + * output into debugfs and it will also create file trace_enable to + * control enable/disable of trace logging into trace buffer. + */ +int fnic_trace_debugfs_init(void) +{ + int rc = -1; + fnic_trace_debugfs_root = debugfs_create_dir("fnic", NULL); + if (!fnic_trace_debugfs_root) { + printk(KERN_DEBUG "Cannot create debugfs root\n"); + return rc; + } + fnic_trace_enable = debugfs_create_file("tracing_enable", + S_IFREG|S_IRUGO|S_IWUSR, + fnic_trace_debugfs_root, + NULL, &fnic_trace_ctrl_fops); + + if (!fnic_trace_enable) { + printk(KERN_DEBUG "Cannot create trace_enable file" + " under debugfs"); + return rc; + } + + fnic_trace_debugfs_file = debugfs_create_file("trace", + S_IFREG|S_IRUGO|S_IWUSR, + fnic_trace_debugfs_root, + NULL, + &fnic_trace_debugfs_fops); + + if (!fnic_trace_debugfs_file) { + printk(KERN_DEBUG "Cannot create trace file under debugfs"); + return rc; + } + rc = 0; + return rc; +} + +/* + * fnic_trace_debugfs_terminate - Tear down debugfs infrastructure + * + * Description: + * When Debugfs is configured this routine removes debugfs file system + * elements that are specific to fnic trace logging. + */ +void fnic_trace_debugfs_terminate(void) +{ + if (fnic_trace_debugfs_file) { + debugfs_remove(fnic_trace_debugfs_file); + fnic_trace_debugfs_file = NULL; + } + if (fnic_trace_enable) { + debugfs_remove(fnic_trace_enable); + fnic_trace_enable = NULL; + } + if (fnic_trace_debugfs_root) { + debugfs_remove(fnic_trace_debugfs_root); + fnic_trace_debugfs_root = NULL; + } +} diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 3c53c34..483eb9d 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb) } fnic_queue_wq_eth_desc(wq, skb, pa, skb->len, - fnic->vlan_hw_insert, fnic->vlan_id, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1); spin_unlock_irqrestore(&fnic->wq_lock[0], flags); } @@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) } fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), - fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1, 1, 1); fnic_send_frame_end: spin_unlock_irqrestore(&fnic->wq_lock[0], flags); diff --git a/drivers/scsi/fnic/fnic_io.h b/drivers/scsi/fnic/fnic_io.h index f0b8969..c35b8f1 100644 --- a/drivers/scsi/fnic/fnic_io.h +++ b/drivers/scsi/fnic/fnic_io.h @@ -21,7 +21,7 @@ #include <scsi/fc/fc_fcp.h> #define FNIC_DFLT_SG_DESC_CNT 32 -#define FNIC_MAX_SG_DESC_CNT 1024 /* Maximum descriptors per sgl */ +#define FNIC_MAX_SG_DESC_CNT 256 /* Maximum descriptors per sgl */ #define FNIC_SG_DESC_ALIGN 16 /* Descriptor address alignment */ struct host_sg_desc { @@ -45,7 +45,8 @@ enum fnic_sgl_list_type { }; enum fnic_ioreq_state { - FNIC_IOREQ_CMD_PENDING = 0, + FNIC_IOREQ_NOT_INITED = 0, + FNIC_IOREQ_CMD_PENDING, FNIC_IOREQ_ABTS_PENDING, FNIC_IOREQ_ABTS_COMPLETE, FNIC_IOREQ_CMD_COMPLETE, @@ -60,6 +61,7 @@ struct fnic_io_req { u8 sgl_type; /* device DMA descriptor list type */ u8 io_completed:1; /* set to 1 when fw completes IO */ u32 port_id; /* remote port DID */ + unsigned long start_time; /* in jiffies */ struct completion *abts_done; /* completion for abts */ struct completion *dr_done; /* completion for device reset */ }; diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index fbf3ac6..d601ac5 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -68,6 +68,10 @@ unsigned int fnic_log_level; module_param(fnic_log_level, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(fnic_log_level, "bit mask of fnic logging levels"); +unsigned int fnic_trace_max_pages = 16; +module_param(fnic_trace_max_pages, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages " + "for fnic trace buffer"); static struct libfc_function_template fnic_transport_template = { .frame_send = fnic_send, @@ -624,6 +628,9 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } fnic->state = FNIC_IN_FC_MODE; + atomic_set(&fnic->in_flight, 0); + fnic->state_flags = FNIC_FLAGS_NONE; + /* Enable hardware stripping of vlan header on ingress */ fnic_set_nic_config(fnic, 0, 0, 0, 0, 0, 0, 1); @@ -858,6 +865,14 @@ static int __init fnic_init_module(void) printk(KERN_INFO PFX "%s, ver %s\n", DRV_DESCRIPTION, DRV_VERSION); + /* Allocate memory for trace buffer */ + err = fnic_trace_buf_init(); + if (err < 0) { + printk(KERN_ERR PFX "Trace buffer initialization Failed " + "Fnic Tracing utility is disabled\n"); + fnic_trace_free(); + } + /* Create a cache for allocation of default size sgls */ len = sizeof(struct fnic_dflt_sgl_list); fnic_sgl_cache[FNIC_SGL_CACHE_DFLT] = kmem_cache_create @@ -928,6 +943,7 @@ err_create_fnic_ioreq_slab: err_create_fnic_sgl_slab_max: kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); err_create_fnic_sgl_slab_dflt: + fnic_trace_free(); return err; } @@ -939,6 +955,7 @@ static void __exit fnic_cleanup_module(void) kmem_cache_destroy(fnic_sgl_cache[FNIC_SGL_CACHE_DFLT]); kmem_cache_destroy(fnic_io_req_cache); fc_release_transport(fnic_fc_transport); + fnic_trace_free(); } module_init(fnic_init_module); diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index c40ce52..be99e75 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -47,6 +47,7 @@ const char *fnic_state_str[] = { }; static const char *fnic_ioreq_state_str[] = { + [FNIC_IOREQ_NOT_INITED] = "FNIC_IOREQ_NOT_INITED", [FNIC_IOREQ_CMD_PENDING] = "FNIC_IOREQ_CMD_PENDING", [FNIC_IOREQ_ABTS_PENDING] = "FNIC_IOREQ_ABTS_PENDING", [FNIC_IOREQ_ABTS_COMPLETE] = "FNIC_IOREQ_ABTS_COMPLETE", @@ -165,6 +166,33 @@ static int free_wq_copy_descs(struct fnic *fnic, struct vnic_wq_copy *wq) } +/** + * __fnic_set_state_flags + * Sets/Clears bits in fnic's state_flags + **/ +void +__fnic_set_state_flags(struct fnic *fnic, unsigned long st_flags, + unsigned long clearbits) +{ + struct Scsi_Host *host = fnic->lport->host; + int sh_locked = spin_is_locked(host->host_lock); + unsigned long flags = 0; + + if (!sh_locked) + spin_lock_irqsave(host->host_lock, flags); + + if (clearbits) + fnic->state_flags &= ~st_flags; + else + fnic->state_flags |= st_flags; + + if (!sh_locked) + spin_unlock_irqrestore(host->host_lock, flags); + + return; +} + + /* * fnic_fw_reset_handler * Routine to send reset msg to fw @@ -175,9 +203,16 @@ int fnic_fw_reset_handler(struct fnic *fnic) int ret = 0; unsigned long flags; + /* indicate fwreset to io path */ + fnic_set_state_flags(fnic, FNIC_FLAGS_FWRESET); + skb_queue_purge(&fnic->frame_queue); skb_queue_purge(&fnic->tx_queue); + /* wait for io cmpl */ + while (atomic_read(&fnic->in_flight)) + schedule_timeout(msecs_to_jiffies(1)); + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) @@ -193,9 +228,12 @@ int fnic_fw_reset_handler(struct fnic *fnic) if (!ret) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Issued fw reset\n"); - else + else { + fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Failed to issue fw reset\n"); + } + return ret; } @@ -312,6 +350,8 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, if (unlikely(!vnic_wq_copy_desc_avail(wq))) { spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "fnic_queue_wq_copy_desc failure - no descriptors\n"); return SCSI_MLQUEUE_HOST_BUSY; } @@ -351,16 +391,20 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, */ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) { - struct fc_lport *lp; + struct fc_lport *lp = shost_priv(sc->device->host); struct fc_rport *rport; - struct fnic_io_req *io_req; - struct fnic *fnic; + struct fnic_io_req *io_req = NULL; + struct fnic *fnic = lport_priv(lp); struct vnic_wq_copy *wq; int ret; - int sg_count; + u64 cmd_trace; + int sg_count = 0; unsigned long flags; unsigned long ptr; + if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) + return SCSI_MLQUEUE_HOST_BUSY; + rport = starget_to_rport(scsi_target(sc->device)); ret = fc_remote_port_chkready(rport); if (ret) { @@ -369,20 +413,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ return 0; } - lp = shost_priv(sc->device->host); if (lp->state != LPORT_ST_READY || !(lp->link_up)) return SCSI_MLQUEUE_HOST_BUSY; + atomic_inc(&fnic->in_flight); + /* * Release host lock, use driver resource specific locks from here. * Don't re-enable interrupts in case they were disabled prior to the * caller disabling them. */ spin_unlock(lp->host->host_lock); + CMD_STATE(sc) = FNIC_IOREQ_NOT_INITED; + CMD_FLAGS(sc) = FNIC_NO_FLAGS; /* Get a new io_req for this SCSI IO */ - fnic = lport_priv(lp); - io_req = mempool_alloc(fnic->io_req_pool, GFP_ATOMIC); if (!io_req) { ret = SCSI_MLQUEUE_HOST_BUSY; @@ -393,6 +438,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ /* Map the data buffer */ sg_count = scsi_dma_map(sc); if (sg_count < 0) { + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, 0, sc->cmnd[0], + sg_count, CMD_STATE(sc)); mempool_free(io_req, fnic->io_req_pool); goto out; } @@ -427,8 +475,10 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ /* initialize rest of io_req */ io_req->port_id = rport->port_id; + io_req->start_time = jiffies; CMD_STATE(sc) = FNIC_IOREQ_CMD_PENDING; CMD_SP(sc) = (char *)io_req; + CMD_FLAGS(sc) |= FNIC_IO_INITIALIZED; sc->scsi_done = done; /* create copy wq desc and enqueue it */ @@ -440,7 +490,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ * refetch the pointer under the lock. */ spinlock_t *io_lock = fnic_io_lock_hash(fnic, sc); - + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, 0, 0, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); CMD_SP(sc) = NULL; @@ -450,8 +502,21 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); } + } else { + /* REVISIT: Use per IO lock in the final code */ + CMD_FLAGS(sc) |= FNIC_IO_ISSUED; } out: + cmd_trace = ((u64)sc->cmnd[0] << 56 | (u64)sc->cmnd[7] << 40 | + (u64)sc->cmnd[8] << 32 | (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | (u64)sc->cmnd[4] << 8 | + sc->cmnd[5]); + + FNIC_TRACE(fnic_queuecommand, sc->device->host->host_no, + sc->request->tag, sc, io_req, + sg_count, cmd_trace, + (((u64)CMD_FLAGS(sc) >> 32) | CMD_STATE(sc))); + atomic_dec(&fnic->in_flight); /* acquire host lock before returning to SCSI */ spin_lock(lp->host->host_lock); return ret; @@ -529,6 +594,8 @@ static int fnic_fcpio_fw_reset_cmpl_handler(struct fnic *fnic, fnic_flush_tx(fnic); reset_cmpl_handler_end: + fnic_clear_state_flags(fnic, FNIC_FLAGS_FWRESET); + return ret; } @@ -622,6 +689,7 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic, struct vnic_wq_copy *wq; u16 request_out = desc->u.ack.request_out; unsigned long flags; + u64 *ox_id_tag = (u64 *)(void *)desc; /* mark the ack state */ wq = &fnic->wq_copy[cq_index - fnic->raw_wq_count - fnic->rq_count]; @@ -632,6 +700,9 @@ static inline void fnic_fcpio_ack_handler(struct fnic *fnic, fnic->fw_ack_recd[0] = 1; } spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + FNIC_TRACE(fnic_fcpio_ack_handler, + fnic->lport->host->host_no, 0, 0, ox_id_tag[2], ox_id_tag[3], + ox_id_tag[4], ox_id_tag[5]); } /* @@ -651,27 +722,53 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, struct scsi_cmnd *sc; unsigned long flags; spinlock_t *io_lock; + u64 cmd_trace; + unsigned long start_time; /* Decode the cmpl description to get the io_req id */ fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); fcpio_tag_id_dec(&tag, &id); + icmnd_cmpl = &desc->u.icmnd_cmpl; - if (id >= FNIC_MAX_IO_REQ) + if (id >= FNIC_MAX_IO_REQ) { + shost_printk(KERN_ERR, fnic->lport->host, + "Tag out of range tag %x hdr status = %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); return; + } sc = scsi_host_find_tag(fnic->lport->host, id); WARN_ON_ONCE(!sc); - if (!sc) + if (!sc) { + shost_printk(KERN_ERR, fnic->lport->host, + "icmnd_cmpl sc is null - " + "hdr status = %s tag = 0x%x desc = 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, desc); + FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler, + fnic->lport->host->host_no, id, + ((u64)icmnd_cmpl->_resvd0[1] << 16 | + (u64)icmnd_cmpl->_resvd0[0]), + ((u64)hdr_status << 16 | + (u64)icmnd_cmpl->scsi_status << 8 | + (u64)icmnd_cmpl->flags), desc, + (u64)icmnd_cmpl->residual, 0); return; + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); WARN_ON_ONCE(!io_req); if (!io_req) { + CMD_FLAGS(sc) |= FNIC_IO_REQ_NULL; spin_unlock_irqrestore(io_lock, flags); + shost_printk(KERN_ERR, fnic->lport->host, + "icmnd_cmpl io_req is null - " + "hdr status = %s tag = 0x%x sc 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, sc); return; } + start_time = io_req->start_time; /* firmware completed the io */ io_req->io_completed = 1; @@ -682,6 +779,28 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABTS_PENDING; + switch (hdr_status) { + case FCPIO_SUCCESS: + CMD_FLAGS(sc) |= FNIC_IO_DONE; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "icmnd_cmpl ABTS pending hdr status = %s " + "sc 0x%p scsi_status %x residual %d\n", + fnic_fcpio_status_to_str(hdr_status), sc, + icmnd_cmpl->scsi_status, + icmnd_cmpl->residual); + break; + case FCPIO_ABORTED: + CMD_FLAGS(sc) |= FNIC_IO_ABORTED; + break; + default: + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "icmnd_cmpl abts pending " + "hdr status = %s tag = 0x%x sc = 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), + id, sc); + break; + } return; } @@ -765,6 +884,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, /* Break link with the SCSI command */ CMD_SP(sc) = NULL; + CMD_FLAGS(sc) |= FNIC_IO_DONE; spin_unlock_irqrestore(io_lock, flags); @@ -772,6 +892,20 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, mempool_free(io_req, fnic->io_req_pool); + cmd_trace = ((u64)hdr_status << 56) | + (u64)icmnd_cmpl->scsi_status << 48 | + (u64)icmnd_cmpl->flags << 40 | (u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]; + + FNIC_TRACE(fnic_fcpio_icmnd_cmpl_handler, + sc->device->host->host_no, id, sc, + ((u64)icmnd_cmpl->_resvd0[1] << 56 | + (u64)icmnd_cmpl->_resvd0[0] << 48 | + jiffies_to_msecs(jiffies - start_time)), + desc, cmd_trace, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + if (sc->sc_data_direction == DMA_FROM_DEVICE) { fnic->lport->host_stats.fcp_input_requests++; fnic->fcp_input_bytes += xfer_len; @@ -784,7 +918,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic, /* Call SCSI completion function to complete the IO */ if (sc->scsi_done) sc->scsi_done(sc); - } /* fnic_fcpio_itmf_cmpl_handler @@ -801,28 +934,54 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, struct fnic_io_req *io_req; unsigned long flags; spinlock_t *io_lock; + unsigned long start_time; fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag); fcpio_tag_id_dec(&tag, &id); - if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) + if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) { + shost_printk(KERN_ERR, fnic->lport->host, + "Tag out of range tag %x hdr status = %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); return; + } sc = scsi_host_find_tag(fnic->lport->host, id & FNIC_TAG_MASK); WARN_ON_ONCE(!sc); - if (!sc) + if (!sc) { + shost_printk(KERN_ERR, fnic->lport->host, + "itmf_cmpl sc is null - hdr status = %s tag = 0x%x\n", + fnic_fcpio_status_to_str(hdr_status), id); return; - + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); WARN_ON_ONCE(!io_req); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; + shost_printk(KERN_ERR, fnic->lport->host, + "itmf_cmpl io_req is null - " + "hdr status = %s tag = 0x%x sc 0x%p\n", + fnic_fcpio_status_to_str(hdr_status), id, sc); return; } + start_time = io_req->start_time; - if (id & FNIC_TAG_ABORT) { + if ((id & FNIC_TAG_ABORT) && (id & FNIC_TAG_DEV_RST)) { + /* Abort and terminate completion of device reset req */ + /* REVISIT : Add asserts about various flags */ + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset abts cmpl recd. id %x status %s\n", + id, fnic_fcpio_status_to_str(hdr_status)); + CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; + CMD_ABTS_STATUS(sc) = hdr_status; + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; + if (io_req->abts_done) + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + } else if (id & FNIC_TAG_ABORT) { /* Completion of abort cmd */ if (CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING) { /* This is a late completion. Ignore it */ @@ -832,6 +991,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE; CMD_ABTS_STATUS(sc) = hdr_status; + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "abts cmpl recd. id %d status %s\n", (int)(id & FNIC_TAG_MASK), @@ -855,14 +1015,58 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic, fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, + sc, + jiffies_to_msecs(jiffies - start_time), + desc, + (((u64)hdr_status << 40) | + (u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | + CMD_STATE(sc))); sc->scsi_done(sc); + } } } else if (id & FNIC_TAG_DEV_RST) { /* Completion of device reset */ CMD_LR_STATUS(sc) = hdr_status; + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_ABTS_PENDING; + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + desc, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Terminate pending " + "dev reset cmpl recd. id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + return; + } + if (CMD_FLAGS(sc) & FNIC_DEV_RST_TIMED_OUT) { + /* Need to wait for terminate completion */ + spin_unlock_irqrestore(io_lock, flags); + FNIC_TRACE(fnic_fcpio_itmf_cmpl_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + desc, 0, + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "dev reset cmpl recd after time out. " + "id %d status %s\n", + (int)(id & FNIC_TAG_MASK), + fnic_fcpio_status_to_str(hdr_status)); + return; + } CMD_STATE(sc) = FNIC_IOREQ_CMD_COMPLETE; + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "dev reset cmpl recd. id %d status %s\n", (int)(id & FNIC_TAG_MASK), @@ -889,7 +1093,6 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, struct fcpio_fw_req *desc) { struct fnic *fnic = vnic_dev_priv(vdev); - int ret = 0; switch (desc->hdr.type) { case FCPIO_ACK: /* fw copied copy wq desc to its queue */ @@ -906,11 +1109,11 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, case FCPIO_FLOGI_REG_CMPL: /* fw completed flogi_reg */ case FCPIO_FLOGI_FIP_REG_CMPL: /* fw completed flogi_fip_reg */ - ret = fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); + fnic_fcpio_flogi_reg_cmpl_handler(fnic, desc); break; case FCPIO_RESET_CMPL: /* fw completed reset */ - ret = fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); + fnic_fcpio_fw_reset_cmpl_handler(fnic, desc); break; default: @@ -920,7 +1123,7 @@ static int fnic_fcpio_cmpl_handler(struct vnic_dev *vdev, break; } - return ret; + return 0; } /* @@ -950,6 +1153,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) unsigned long flags = 0; struct scsi_cmnd *sc; spinlock_t *io_lock; + unsigned long start_time = 0; for (i = 0; i < FNIC_MAX_IO_REQ; i++) { if (i == exclude_id) @@ -962,6 +1166,23 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { + /* + * We will be here only when FW completes reset + * without sending completions for outstanding ios. + */ + CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE; + if (io_req && io_req->dr_done) + complete(io_req->dr_done); + else if (io_req && io_req->abts_done) + complete(io_req->abts_done); + spin_unlock_irqrestore(io_lock, flags); + continue; + } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } if (!io_req) { spin_unlock_irqrestore(io_lock, flags); goto cleanup_scsi_cmd; @@ -975,6 +1196,7 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) * If there is a scsi_cmnd associated with this io_req, then * free the corresponding state */ + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); @@ -984,8 +1206,18 @@ cleanup_scsi_cmd: " DID_TRANSPORT_DISRUPTED\n"); /* Complete the command to SCSI */ - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_cleanup_io, + sc->device->host->host_no, i, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | + (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + sc->scsi_done(sc); + } } } @@ -998,6 +1230,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, struct scsi_cmnd *sc; unsigned long flags; spinlock_t *io_lock; + unsigned long start_time = 0; /* get the tag reference */ fcpio_tag_id_dec(&desc->hdr.tag, &id); @@ -1027,6 +1260,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq, spin_unlock_irqrestore(io_lock, flags); + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); @@ -1035,8 +1269,17 @@ wq_copy_cleanup_scsi_cmd: FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "wq_copy_cleanup_handler:" " DID_NO_CONNECT\n"); - if (sc->scsi_done) + if (sc->scsi_done) { + FNIC_TRACE(fnic_wq_copy_cleanup_handler, + sc->device->host->host_no, id, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + sc->scsi_done(sc); + } } static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, @@ -1044,8 +1287,18 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, struct fnic_io_req *io_req) { struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct Scsi_Host *host = fnic->lport->host; unsigned long flags; + spin_lock_irqsave(host->host_lock, flags); + if (unlikely(fnic_chk_state_flags_locked(fnic, + FNIC_FLAGS_IO_BLOCKED))) { + spin_unlock_irqrestore(host->host_lock, flags); + return 1; + } else + atomic_inc(&fnic->in_flight); + spin_unlock_irqrestore(host->host_lock, flags); + spin_lock_irqsave(&fnic->wq_copy_lock[0], flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) @@ -1053,6 +1306,9 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, if (!vnic_wq_copy_desc_avail(wq)) { spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + atomic_dec(&fnic->in_flight); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_queue_abort_io_req: failure: no descriptors\n"); return 1; } fnic_queue_wq_copy_desc_itmf(wq, tag | FNIC_TAG_ABORT, @@ -1060,12 +1316,15 @@ static inline int fnic_queue_abort_io_req(struct fnic *fnic, int tag, fnic->config.ra_tov, fnic->config.ed_tov); spin_unlock_irqrestore(&fnic->wq_copy_lock[0], flags); + atomic_dec(&fnic->in_flight); + return 0; } -void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) +static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) { int tag; + int abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1075,13 +1334,14 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "fnic_rport_reset_exch called portid 0x%06x\n", + "fnic_rport_exch_reset called portid 0x%06x\n", port_id); if (fnic->in_remove) return; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + abt_tag = tag; sc = scsi_host_find_tag(fnic->lport->host, tag); if (!sc) continue; @@ -1096,6 +1356,15 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) continue; } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_exch_reset dev rst not pending sc 0x%p\n", + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } + /* * Found IO that is still pending with firmware and * belongs to rport that went away @@ -1104,9 +1373,29 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) spin_unlock_irqrestore(io_lock, flags); continue; } + if (io_req->abts_done) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_rport_exch_reset: io_req->abts_done is set " + "state is %s\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + } + + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { + shost_printk(KERN_ERR, fnic->lport->host, + "rport_exch_reset " + "IO not yet issued %p tag 0x%x flags " + "%x state %d\n", + sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); + } old_ioreq_state = CMD_STATE(sc); CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag = (tag | FNIC_TAG_DEV_RST); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_rport_exch_reset dev rst sc 0x%p\n", + sc); + } BUG_ON(io_req->abts_done); @@ -1118,7 +1407,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { /* @@ -1127,12 +1416,17 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) * aborted later by scsi_eh, or cleaned up during * lun reset */ - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } } @@ -1141,6 +1435,7 @@ void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id) void fnic_terminate_rport_io(struct fc_rport *rport) { int tag; + int abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1154,14 +1449,15 @@ void fnic_terminate_rport_io(struct fc_rport *rport) FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "fnic_terminate_rport_io called" - " wwpn 0x%llx, wwnn0x%llx, portid 0x%06x\n", - rport->port_name, rport->node_name, + " wwpn 0x%llx, wwnn0x%llx, rport 0x%p, portid 0x%06x\n", + rport->port_name, rport->node_name, rport, rport->port_id); if (fnic->in_remove) return; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + abt_tag = tag; sc = scsi_host_find_tag(fnic->lport->host, tag); if (!sc) continue; @@ -1180,6 +1476,14 @@ void fnic_terminate_rport_io(struct fc_rport *rport) continue; } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_terminate_rport_io dev rst not pending sc 0x%p\n", + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } /* * Found IO that is still pending with firmware and * belongs to rport that went away @@ -1188,9 +1492,27 @@ void fnic_terminate_rport_io(struct fc_rport *rport) spin_unlock_irqrestore(io_lock, flags); continue; } + if (io_req->abts_done) { + shost_printk(KERN_ERR, fnic->lport->host, + "fnic_terminate_rport_io: io_req->abts_done is set " + "state is %s\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + } + if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) { + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "fnic_terminate_rport_io " + "IO not yet issued %p tag 0x%x flags " + "%x state %d\n", + sc, tag, CMD_FLAGS(sc), CMD_STATE(sc)); + } old_ioreq_state = CMD_STATE(sc); CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag = (tag | FNIC_TAG_DEV_RST); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "fnic_terminate_rport_io dev rst sc 0x%p\n", sc); + } BUG_ON(io_req->abts_done); @@ -1203,7 +1525,7 @@ void fnic_terminate_rport_io(struct fc_rport *rport) /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { /* @@ -1212,12 +1534,17 @@ void fnic_terminate_rport_io(struct fc_rport *rport) * aborted later by scsi_eh, or cleaned up during * lun reset */ - io_lock = fnic_io_lock_hash(fnic, sc); - spin_lock_irqsave(io_lock, flags); if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } } @@ -1232,13 +1559,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) { struct fc_lport *lp; struct fnic *fnic; - struct fnic_io_req *io_req; + struct fnic_io_req *io_req = NULL; struct fc_rport *rport; spinlock_t *io_lock; unsigned long flags; + unsigned long start_time = 0; int ret = SUCCESS; - u32 task_req; + u32 task_req = 0; struct scsi_lun fc_lun; + int tag; DECLARE_COMPLETION_ONSTACK(tm_done); /* Wait for rport to unblock */ @@ -1249,9 +1578,13 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) fnic = lport_priv(lp); rport = starget_to_rport(scsi_target(sc->device)); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %d\n", - rport->port_id, sc->device->lun, sc->request->tag); + tag = sc->request->tag; + FNIC_SCSI_DBG(KERN_DEBUG, + fnic->lport->host, + "Abort Cmd called FCID 0x%x, LUN 0x%x TAG %x flags %x\n", + rport->port_id, sc->device->lun, tag, CMD_FLAGS(sc)); + + CMD_FLAGS(sc) = FNIC_NO_FLAGS; if (lp->state != LPORT_ST_READY || !(lp->link_up)) { ret = FAILED; @@ -1318,6 +1651,10 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) ret = FAILED; goto fnic_abort_cmd_end; } + if (task_req == FCPIO_ITMF_ABT_TASK) + CMD_FLAGS(sc) |= FNIC_IO_ABTS_ISSUED; + else + CMD_FLAGS(sc) |= FNIC_IO_TERM_ISSUED; /* * We queued an abort IO, wait for its completion. @@ -1336,6 +1673,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; ret = FAILED; goto fnic_abort_cmd_end; } @@ -1344,6 +1682,7 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) /* fw did not complete abort, timed out */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT; ret = FAILED; goto fnic_abort_cmd_end; } @@ -1359,12 +1698,21 @@ int fnic_abort_cmd(struct scsi_cmnd *sc) spin_unlock_irqrestore(io_lock, flags); + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); fnic_abort_cmd_end: + FNIC_TRACE(fnic_abort_cmd, sc->device->host->host_no, + sc->request->tag, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Returning from abort cmd %s\n", + "Returning from abort cmd type %x %s\n", task_req, (ret == SUCCESS) ? "SUCCESS" : "FAILED"); return ret; @@ -1375,16 +1723,28 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, struct fnic_io_req *io_req) { struct vnic_wq_copy *wq = &fnic->wq_copy[0]; + struct Scsi_Host *host = fnic->lport->host; struct scsi_lun fc_lun; int ret = 0; unsigned long intr_flags; + spin_lock_irqsave(host->host_lock, intr_flags); + if (unlikely(fnic_chk_state_flags_locked(fnic, + FNIC_FLAGS_IO_BLOCKED))) { + spin_unlock_irqrestore(host->host_lock, intr_flags); + return FAILED; + } else + atomic_inc(&fnic->in_flight); + spin_unlock_irqrestore(host->host_lock, intr_flags); + spin_lock_irqsave(&fnic->wq_copy_lock[0], intr_flags); if (vnic_wq_copy_desc_avail(wq) <= fnic->wq_copy_desc_low[0]) free_wq_copy_descs(fnic, wq); if (!vnic_wq_copy_desc_avail(wq)) { + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "queue_dr_io_req failure - no descriptors\n"); ret = -EAGAIN; goto lr_io_req_end; } @@ -1399,6 +1759,7 @@ static inline int fnic_queue_dr_io_req(struct fnic *fnic, lr_io_req_end: spin_unlock_irqrestore(&fnic->wq_copy_lock[0], intr_flags); + atomic_dec(&fnic->in_flight); return ret; } @@ -1412,7 +1773,7 @@ lr_io_req_end: static int fnic_clean_pending_aborts(struct fnic *fnic, struct scsi_cmnd *lr_sc) { - int tag; + int tag, abt_tag; struct fnic_io_req *io_req; spinlock_t *io_lock; unsigned long flags; @@ -1421,6 +1782,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, struct scsi_lun fc_lun; struct scsi_device *lun_dev = lr_sc->device; DECLARE_COMPLETION_ONSTACK(tm_done); + enum fnic_ioreq_state old_ioreq_state; for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { sc = scsi_host_find_tag(fnic->lport->host, tag); @@ -1449,7 +1811,41 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, "Found IO in %s on lun\n", fnic_ioreq_state_to_str(CMD_STATE(sc))); - BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING); + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && + (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) { + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s dev rst not pending sc 0x%p\n", __func__, + sc); + spin_unlock_irqrestore(io_lock, flags); + continue; + } + old_ioreq_state = CMD_STATE(sc); + /* + * Any pending IO issued prior to reset is expected to be + * in abts pending state, if not we need to set + * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending. + * When IO is completed, the IO will be handed over and + * handled in this function. + */ + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + + if (io_req->abts_done) + shost_printk(KERN_ERR, fnic->lport->host, + "%s: io_req->abts_done is set state is %s\n", + __func__, fnic_ioreq_state_to_str(CMD_STATE(sc))); + + BUG_ON(io_req->abts_done); + + abt_tag = tag; + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) { + abt_tag |= FNIC_TAG_DEV_RST; + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "%s: dev rst sc 0x%p\n", __func__, sc); + } CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; io_req->abts_done = &tm_done; @@ -1458,17 +1854,25 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, /* Now queue the abort command to firmware */ int_to_scsilun(sc->device->lun, &fc_lun); - if (fnic_queue_abort_io_req(fnic, tag, + if (fnic_queue_abort_io_req(fnic, abt_tag, FCPIO_ITMF_ABT_TASK_TERM, fc_lun.scsi_lun, io_req)) { spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); if (io_req) io_req->abts_done = NULL; + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) + CMD_STATE(sc) = old_ioreq_state; spin_unlock_irqrestore(io_lock, flags); ret = 1; goto clean_pending_aborts_end; + } else { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + spin_unlock_irqrestore(io_lock, flags); } + CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED; wait_for_completion_timeout(&tm_done, msecs_to_jiffies @@ -1479,8 +1883,8 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); - ret = 1; - goto clean_pending_aborts_end; + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL; + continue; } io_req->abts_done = NULL; @@ -1488,6 +1892,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, /* if abort is still pending with fw, fail */ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { spin_unlock_irqrestore(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; ret = 1; goto clean_pending_aborts_end; } @@ -1498,10 +1903,75 @@ static int fnic_clean_pending_aborts(struct fnic *fnic, mempool_free(io_req, fnic->io_req_pool); } + schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov)); + + /* walk again to check, if IOs are still pending in fw */ + if (fnic_is_abts_pending(fnic, lr_sc)) + ret = FAILED; + clean_pending_aborts_end: return ret; } +/** + * fnic_scsi_host_start_tag + * Allocates tagid from host's tag list + **/ +static inline int +fnic_scsi_host_start_tag(struct fnic *fnic, struct scsi_cmnd *sc) +{ + struct blk_queue_tag *bqt = fnic->lport->host->bqt; + int tag, ret = SCSI_NO_TAG; + + BUG_ON(!bqt); + if (!bqt) { + pr_err("Tags are not supported\n"); + goto end; + } + + do { + tag = find_next_zero_bit(bqt->tag_map, bqt->max_depth, 1); + if (tag >= bqt->max_depth) { + pr_err("Tag allocation failure\n"); + goto end; + } + } while (test_and_set_bit(tag, bqt->tag_map)); + + bqt->tag_index[tag] = sc->request; + sc->request->tag = tag; + sc->tag = tag; + if (!sc->request->special) + sc->request->special = sc; + + ret = tag; + +end: + return ret; +} + +/** + * fnic_scsi_host_end_tag + * frees tag allocated by fnic_scsi_host_start_tag. + **/ +static inline void +fnic_scsi_host_end_tag(struct fnic *fnic, struct scsi_cmnd *sc) +{ + struct blk_queue_tag *bqt = fnic->lport->host->bqt; + int tag = sc->request->tag; + + if (tag == SCSI_NO_TAG) + return; + + BUG_ON(!bqt || !bqt->tag_index[tag]); + if (!bqt) + return; + + bqt->tag_index[tag] = NULL; + clear_bit(tag, bqt->tag_map); + + return; +} + /* * SCSI Eh thread issues a Lun Reset when one or more commands on a LUN * fail to get aborted. It calls driver's eh_device_reset with a SCSI command @@ -1511,13 +1981,17 @@ int fnic_device_reset(struct scsi_cmnd *sc) { struct fc_lport *lp; struct fnic *fnic; - struct fnic_io_req *io_req; + struct fnic_io_req *io_req = NULL; struct fc_rport *rport; int status; int ret = FAILED; spinlock_t *io_lock; unsigned long flags; + unsigned long start_time = 0; + struct scsi_lun fc_lun; + int tag = 0; DECLARE_COMPLETION_ONSTACK(tm_done); + int tag_gen_flag = 0; /*to track tags allocated by fnic driver*/ /* Wait for rport to unblock */ fc_block_scsi_eh(sc); @@ -1529,8 +2003,8 @@ int fnic_device_reset(struct scsi_cmnd *sc) rport = starget_to_rport(scsi_target(sc->device)); FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, - "Device reset called FCID 0x%x, LUN 0x%x\n", - rport->port_id, sc->device->lun); + "Device reset called FCID 0x%x, LUN 0x%x sc 0x%p\n", + rport->port_id, sc->device->lun, sc); if (lp->state != LPORT_ST_READY || !(lp->link_up)) goto fnic_device_reset_end; @@ -1539,6 +2013,16 @@ int fnic_device_reset(struct scsi_cmnd *sc) if (fc_remote_port_chkready(rport)) goto fnic_device_reset_end; + CMD_FLAGS(sc) = FNIC_DEVICE_RESET; + /* Allocate tag if not present */ + + tag = sc->request->tag; + if (unlikely(tag < 0)) { + tag = fnic_scsi_host_start_tag(fnic, sc); + if (unlikely(tag == SCSI_NO_TAG)) + goto fnic_device_reset_end; + tag_gen_flag = 1; + } io_lock = fnic_io_lock_hash(fnic, sc); spin_lock_irqsave(io_lock, flags); io_req = (struct fnic_io_req *)CMD_SP(sc); @@ -1562,8 +2046,7 @@ int fnic_device_reset(struct scsi_cmnd *sc) CMD_LR_STATUS(sc) = FCPIO_INVALID_CODE; spin_unlock_irqrestore(io_lock, flags); - FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %d\n", - sc->request->tag); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "TAG %x\n", tag); /* * issue the device reset, if enqueue failed, clean up the ioreq @@ -1576,6 +2059,9 @@ int fnic_device_reset(struct scsi_cmnd *sc) io_req->dr_done = NULL; goto fnic_device_reset_clean; } + spin_lock_irqsave(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_ISSUED; + spin_unlock_irqrestore(io_lock, flags); /* * Wait on the local completion for LUN reset. The io_req may be @@ -1588,12 +2074,13 @@ int fnic_device_reset(struct scsi_cmnd *sc) io_req = (struct fnic_io_req *)CMD_SP(sc); if (!io_req) { spin_unlock_irqrestore(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "io_req is null tag 0x%x sc 0x%p\n", tag, sc); goto fnic_device_reset_end; } io_req->dr_done = NULL; status = CMD_LR_STATUS(sc); - spin_unlock_irqrestore(io_lock, flags); /* * If lun reset not completed, bail out with failed. io_req @@ -1602,7 +2089,53 @@ int fnic_device_reset(struct scsi_cmnd *sc) if (status == FCPIO_INVALID_CODE) { FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Device reset timed out\n"); - goto fnic_device_reset_end; + CMD_FLAGS(sc) |= FNIC_DEV_RST_TIMED_OUT; + spin_unlock_irqrestore(io_lock, flags); + int_to_scsilun(sc->device->lun, &fc_lun); + /* + * Issue abort and terminate on the device reset request. + * If q'ing of the abort fails, retry issue it after a delay. + */ + while (1) { + spin_lock_irqsave(io_lock, flags); + if (CMD_FLAGS(sc) & FNIC_DEV_RST_TERM_ISSUED) { + spin_unlock_irqrestore(io_lock, flags); + break; + } + spin_unlock_irqrestore(io_lock, flags); + if (fnic_queue_abort_io_req(fnic, + tag | FNIC_TAG_DEV_RST, + FCPIO_ITMF_ABT_TASK_TERM, + fc_lun.scsi_lun, io_req)) { + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_ABT_TERM_DELAY_TIMEOUT)); + } else { + spin_lock_irqsave(io_lock, flags); + CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; + CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; + io_req->abts_done = &tm_done; + spin_unlock_irqrestore(io_lock, flags); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, + "Abort and terminate issued on Device reset " + "tag 0x%x sc 0x%p\n", tag, sc); + break; + } + } + while (1) { + spin_lock_irqsave(io_lock, flags); + if (!(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { + spin_unlock_irqrestore(io_lock, flags); + wait_for_completion_timeout(&tm_done, + msecs_to_jiffies(FNIC_LUN_RESET_TIMEOUT)); + break; + } else { + io_req = (struct fnic_io_req *)CMD_SP(sc); + io_req->abts_done = NULL; + goto fnic_device_reset_clean; + } + } + } else { + spin_unlock_irqrestore(io_lock, flags); } /* Completed, but not successful, clean up the io_req, return fail */ @@ -1645,11 +2178,24 @@ fnic_device_reset_clean: spin_unlock_irqrestore(io_lock, flags); if (io_req) { + start_time = io_req->start_time; fnic_release_ioreq_buf(fnic, io_req, sc); mempool_free(io_req, fnic->io_req_pool); } fnic_device_reset_end: + FNIC_TRACE(fnic_device_reset, sc->device->host->host_no, + sc->request->tag, sc, + jiffies_to_msecs(jiffies - start_time), + 0, ((u64)sc->cmnd[0] << 32 | + (u64)sc->cmnd[2] << 24 | (u64)sc->cmnd[3] << 16 | + (u64)sc->cmnd[4] << 8 | sc->cmnd[5]), + (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc))); + + /* free tag if it is allocated */ + if (unlikely(tag_gen_flag)) + fnic_scsi_host_end_tag(fnic, sc); + FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, "Returning from device reset %s\n", (ret == SUCCESS) ? @@ -1735,7 +2281,15 @@ void fnic_scsi_abort_io(struct fc_lport *lp) DECLARE_COMPLETION_ONSTACK(remove_wait); /* Issue firmware reset for fnic, wait for reset to complete */ +retry_fw_reset: spin_lock_irqsave(&fnic->fnic_lock, flags); + if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) { + /* fw reset is in progress, poll for its completion */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + schedule_timeout(msecs_to_jiffies(100)); + goto retry_fw_reset; + } + fnic->remove_wait = &remove_wait; old_state = fnic->state; fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; @@ -1776,7 +2330,14 @@ void fnic_scsi_cleanup(struct fc_lport *lp) struct fnic *fnic = lport_priv(lp); /* issue fw reset */ +retry_fw_reset: spin_lock_irqsave(&fnic->fnic_lock, flags); + if (unlikely(fnic->state == FNIC_IN_FC_TRANS_ETH_MODE)) { + /* fw reset is in progress, poll for its completion */ + spin_unlock_irqrestore(&fnic->fnic_lock, flags); + schedule_timeout(msecs_to_jiffies(100)); + goto retry_fw_reset; + } old_state = fnic->state; fnic->state = FNIC_IN_FC_TRANS_ETH_MODE; fnic_update_mac_locked(fnic, fnic->ctlr.ctl_src_addr); @@ -1822,3 +2383,61 @@ call_fc_exch_mgr_reset: fc_exch_mgr_reset(lp, sid, did); } + +/* + * fnic_is_abts_pending() is a helper function that + * walks through tag map to check if there is any IOs pending,if there is one, + * then it returns 1 (true), otherwise 0 (false) + * if @lr_sc is non NULL, then it checks IOs specific to particular LUN, + * otherwise, it checks for all IOs. + */ +int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc) +{ + int tag; + struct fnic_io_req *io_req; + spinlock_t *io_lock; + unsigned long flags; + int ret = 0; + struct scsi_cmnd *sc; + struct scsi_device *lun_dev = NULL; + + if (lr_sc) + lun_dev = lr_sc->device; + + /* walk again to check, if IOs are still pending in fw */ + for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { + sc = scsi_host_find_tag(fnic->lport->host, tag); + /* + * ignore this lun reset cmd or cmds that do not belong to + * this lun + */ + if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc))) + continue; + + io_lock = fnic_io_lock_hash(fnic, sc); + spin_lock_irqsave(io_lock, flags); + + io_req = (struct fnic_io_req *)CMD_SP(sc); + + if (!io_req || sc->device != lun_dev) { + spin_unlock_irqrestore(io_lock, flags); + continue; + } + + /* + * Found IO that is still pending with firmware and + * belongs to the LUN that we are resetting + */ + FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host, + "Found IO in %s on lun\n", + fnic_ioreq_state_to_str(CMD_STATE(sc))); + + if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { + spin_unlock_irqrestore(io_lock, flags); + ret = 1; + continue; + } + } + + return ret; +} diff --git a/drivers/scsi/fnic/fnic_trace.c b/drivers/scsi/fnic/fnic_trace.c new file mode 100644 index 0000000..23a60e3 --- /dev/null +++ b/drivers/scsi/fnic/fnic_trace.c @@ -0,0 +1,273 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/mempool.h> +#include <linux/errno.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include "fnic_io.h" +#include "fnic.h" + +unsigned int trace_max_pages; +static int fnic_max_trace_entries; + +static unsigned long fnic_trace_buf_p; +static DEFINE_SPINLOCK(fnic_trace_lock); + +static fnic_trace_dbg_t fnic_trace_entries; +int fnic_tracing_enabled = 1; + +/* + * fnic_trace_get_buf - Give buffer pointer to user to fill up trace information + * + * Description: + * This routine gets next available trace buffer entry location @wr_idx + * from allocated trace buffer pages and give that memory location + * to user to store the trace information. + * + * Return Value: + * This routine returns pointer to next available trace entry + * @fnic_buf_head for user to fill trace information. + */ +fnic_trace_data_t *fnic_trace_get_buf(void) +{ + unsigned long fnic_buf_head; + unsigned long flags; + + spin_lock_irqsave(&fnic_trace_lock, flags); + + /* + * Get next available memory location for writing trace information + * at @wr_idx and increment @wr_idx + */ + fnic_buf_head = + fnic_trace_entries.page_offset[fnic_trace_entries.wr_idx]; + fnic_trace_entries.wr_idx++; + + /* + * Verify if trace buffer is full then change wd_idx to + * start from zero + */ + if (fnic_trace_entries.wr_idx >= fnic_max_trace_entries) + fnic_trace_entries.wr_idx = 0; + + /* + * Verify if write index @wr_idx and read index @rd_idx are same then + * increment @rd_idx to move to next entry in trace buffer + */ + if (fnic_trace_entries.wr_idx == fnic_trace_entries.rd_idx) { + fnic_trace_entries.rd_idx++; + if (fnic_trace_entries.rd_idx >= fnic_max_trace_entries) + fnic_trace_entries.rd_idx = 0; + } + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return (fnic_trace_data_t *)fnic_buf_head; +} + +/* + * fnic_get_trace_data - Copy trace buffer to a memory file + * @fnic_dbgfs_t: pointer to debugfs trace buffer + * + * Description: + * This routine gathers the fnic trace debugfs data from the fnic_trace_data_t + * buffer and dumps it to fnic_dbgfs_t. It will start at the rd_idx entry in + * the log and process the log until the end of the buffer. Then it will gather + * from the beginning of the log and process until the current entry @wr_idx. + * + * Return Value: + * This routine returns the amount of bytes that were dumped into fnic_dbgfs_t + */ +int fnic_get_trace_data(fnic_dbgfs_t *fnic_dbgfs_prt) +{ + int rd_idx; + int wr_idx; + int len = 0; + unsigned long flags; + char str[KSYM_SYMBOL_LEN]; + struct timespec val; + fnic_trace_data_t *tbp; + + spin_lock_irqsave(&fnic_trace_lock, flags); + rd_idx = fnic_trace_entries.rd_idx; + wr_idx = fnic_trace_entries.wr_idx; + if (wr_idx < rd_idx) { + while (1) { + /* Start from read index @rd_idx */ + tbp = (fnic_trace_data_t *) + fnic_trace_entries.page_offset[rd_idx]; + if (!tbp) { + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return 0; + } + /* Convert function pointer to function name */ + if (sizeof(unsigned long) < 8) { + sprint_symbol(str, tbp->fnaddr.low); + jiffies_to_timespec(tbp->timestamp.low, &val); + } else { + sprint_symbol(str, tbp->fnaddr.val); + jiffies_to_timespec(tbp->timestamp.val, &val); + } + /* + * Dump trace buffer entry to memory file + * and increment read index @rd_idx + */ + len += snprintf(fnic_dbgfs_prt->buffer + len, + (trace_max_pages * PAGE_SIZE * 3) - len, + "%16lu.%16lu %-50s %8x %8x %16llx %16llx " + "%16llx %16llx %16llx\n", val.tv_sec, + val.tv_nsec, str, tbp->host_no, tbp->tag, + tbp->data[0], tbp->data[1], tbp->data[2], + tbp->data[3], tbp->data[4]); + rd_idx++; + /* + * If rd_idx is reached to maximum trace entries + * then move rd_idx to zero + */ + if (rd_idx > (fnic_max_trace_entries-1)) + rd_idx = 0; + /* + * Continure dumpping trace buffer entries into + * memory file till rd_idx reaches write index + */ + if (rd_idx == wr_idx) + break; + } + } else if (wr_idx > rd_idx) { + while (1) { + /* Start from read index @rd_idx */ + tbp = (fnic_trace_data_t *) + fnic_trace_entries.page_offset[rd_idx]; + if (!tbp) { + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return 0; + } + /* Convert function pointer to function name */ + if (sizeof(unsigned long) < 8) { + sprint_symbol(str, tbp->fnaddr.low); + jiffies_to_timespec(tbp->timestamp.low, &val); + } else { + sprint_symbol(str, tbp->fnaddr.val); + jiffies_to_timespec(tbp->timestamp.val, &val); + } + /* + * Dump trace buffer entry to memory file + * and increment read index @rd_idx + */ + len += snprintf(fnic_dbgfs_prt->buffer + len, + (trace_max_pages * PAGE_SIZE * 3) - len, + "%16lu.%16lu %-50s %8x %8x %16llx %16llx " + "%16llx %16llx %16llx\n", val.tv_sec, + val.tv_nsec, str, tbp->host_no, tbp->tag, + tbp->data[0], tbp->data[1], tbp->data[2], + tbp->data[3], tbp->data[4]); + rd_idx++; + /* + * Continue dumpping trace buffer entries into + * memory file till rd_idx reaches write index + */ + if (rd_idx == wr_idx) + break; + } + } + spin_unlock_irqrestore(&fnic_trace_lock, flags); + return len; +} + +/* + * fnic_trace_buf_init - Initialize fnic trace buffer logging facility + * + * Description: + * Initialize trace buffer data structure by allocating required memory and + * setting page_offset information for every trace entry by adding trace entry + * length to previous page_offset value. + */ +int fnic_trace_buf_init(void) +{ + unsigned long fnic_buf_head; + int i; + int err = 0; + + trace_max_pages = fnic_trace_max_pages; + fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/ + FNIC_ENTRY_SIZE_BYTES; + + fnic_trace_buf_p = (unsigned long)vmalloc((trace_max_pages * PAGE_SIZE)); + if (!fnic_trace_buf_p) { + printk(KERN_ERR PFX "Failed to allocate memory " + "for fnic_trace_buf_p\n"); + err = -ENOMEM; + goto err_fnic_trace_buf_init; + } + memset((void *)fnic_trace_buf_p, 0, (trace_max_pages * PAGE_SIZE)); + + fnic_trace_entries.page_offset = vmalloc(fnic_max_trace_entries * + sizeof(unsigned long)); + if (!fnic_trace_entries.page_offset) { + printk(KERN_ERR PFX "Failed to allocate memory for" + " page_offset\n"); + if (fnic_trace_buf_p) { + vfree((void *)fnic_trace_buf_p); + fnic_trace_buf_p = 0; + } + err = -ENOMEM; + goto err_fnic_trace_buf_init; + } + memset((void *)fnic_trace_entries.page_offset, 0, + (fnic_max_trace_entries * sizeof(unsigned long))); + fnic_trace_entries.wr_idx = fnic_trace_entries.rd_idx = 0; + fnic_buf_head = fnic_trace_buf_p; + + /* + * Set page_offset field of fnic_trace_entries struct by + * calculating memory location for every trace entry using + * length of each trace entry + */ + for (i = 0; i < fnic_max_trace_entries; i++) { + fnic_trace_entries.page_offset[i] = fnic_buf_head; + fnic_buf_head += FNIC_ENTRY_SIZE_BYTES; + } + err = fnic_trace_debugfs_init(); + if (err < 0) { + printk(KERN_ERR PFX "Failed to initialize debugfs for tracing\n"); + goto err_fnic_trace_debugfs_init; + } + printk(KERN_INFO PFX "Successfully Initialized Trace Buffer\n"); + return err; +err_fnic_trace_debugfs_init: + fnic_trace_free(); +err_fnic_trace_buf_init: + return err; +} + +/* + * fnic_trace_free - Free memory of fnic trace data structures. + */ +void fnic_trace_free(void) +{ + fnic_tracing_enabled = 0; + fnic_trace_debugfs_terminate(); + if (fnic_trace_entries.page_offset) { + vfree((void *)fnic_trace_entries.page_offset); + fnic_trace_entries.page_offset = NULL; + } + if (fnic_trace_buf_p) { + vfree((void *)fnic_trace_buf_p); + fnic_trace_buf_p = 0; + } + printk(KERN_INFO PFX "Successfully Freed Trace Buffer\n"); +} diff --git a/drivers/scsi/fnic/fnic_trace.h b/drivers/scsi/fnic/fnic_trace.h new file mode 100644 index 0000000..cef42b4 --- /dev/null +++ b/drivers/scsi/fnic/fnic_trace.h @@ -0,0 +1,90 @@ +/* + * Copyright 2012 Cisco Systems, Inc. All rights reserved. + * + * This program is free software; you may redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __FNIC_TRACE_H__ +#define __FNIC_TRACE_H__ + +#define FNIC_ENTRY_SIZE_BYTES 64 + +extern ssize_t simple_read_from_buffer(void __user *to, + size_t count, + loff_t *ppos, + const void *from, + size_t available); + +extern unsigned int fnic_trace_max_pages; +extern int fnic_tracing_enabled; +extern unsigned int trace_max_pages; + +typedef struct fnic_trace_dbg { + int wr_idx; + int rd_idx; + unsigned long *page_offset; +} fnic_trace_dbg_t; + +typedef struct fnic_dbgfs { + int buffer_len; + char *buffer; +} fnic_dbgfs_t; + +struct fnic_trace_data { + union { + struct { + u32 low; + u32 high; + }; + u64 val; + } timestamp, fnaddr; + u32 host_no; + u32 tag; + u64 data[5]; +} __attribute__((__packed__)); + +typedef struct fnic_trace_data fnic_trace_data_t; + +#define FNIC_TRACE_ENTRY_SIZE \ + (FNIC_ENTRY_SIZE_BYTES - sizeof(fnic_trace_data_t)) + +#define FNIC_TRACE(_fn, _hn, _t, _a, _b, _c, _d, _e) \ + if (unlikely(fnic_tracing_enabled)) { \ + fnic_trace_data_t *trace_buf = fnic_trace_get_buf(); \ + if (trace_buf) { \ + if (sizeof(unsigned long) < 8) { \ + trace_buf->timestamp.low = jiffies; \ + trace_buf->fnaddr.low = (u32)(unsigned long)_fn; \ + } else { \ + trace_buf->timestamp.val = jiffies; \ + trace_buf->fnaddr.val = (u64)(unsigned long)_fn; \ + } \ + trace_buf->host_no = _hn; \ + trace_buf->tag = _t; \ + trace_buf->data[0] = (u64)(unsigned long)_a; \ + trace_buf->data[1] = (u64)(unsigned long)_b; \ + trace_buf->data[2] = (u64)(unsigned long)_c; \ + trace_buf->data[3] = (u64)(unsigned long)_d; \ + trace_buf->data[4] = (u64)(unsigned long)_e; \ + } \ + } + +fnic_trace_data_t *fnic_trace_get_buf(void); +int fnic_get_trace_data(fnic_dbgfs_t *); +int fnic_trace_buf_init(void); +void fnic_trace_free(void); +int fnic_trace_debugfs_init(void); +void fnic_trace_debugfs_terminate(void); + +#endif diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e..59bceac 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr, pci_read_config_word(pdev, PCI_COMMAND, &command); command |= 6; pci_write_config_word(pdev, PCI_COMMAND, command); - if (pci_resource_start(pdev, 8) == 1UL) - pci_resource_start(pdev, 8) = 0UL; - i = 0xFEFF0001UL; - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i); - gdth_delay(1); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, - pci_resource_start(pdev, 8)); - + gdth_delay(1); + dp6m_ptr = ha->brd; /* Ensure that it is safe to access the non HW portions of DPMEM. diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 4f33806..7f4f790 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -165,7 +165,7 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c); static void cmd_special_free(struct ctlr_info *h, struct CommandList *c); static struct CommandList *cmd_alloc(struct ctlr_info *h); static struct CommandList *cmd_special_alloc(struct ctlr_info *h); -static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, +static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, int cmd_type); @@ -1131,7 +1131,7 @@ clean: return -ENOMEM; } -static void hpsa_map_sg_chain_block(struct ctlr_info *h, +static int hpsa_map_sg_chain_block(struct ctlr_info *h, struct CommandList *c) { struct SGDescriptor *chain_sg, *chain_block; @@ -1144,8 +1144,15 @@ static void hpsa_map_sg_chain_block(struct ctlr_info *h, (c->Header.SGTotal - h->max_cmd_sg_entries); temp64 = pci_map_single(h->pdev, chain_block, chain_sg->Len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&h->pdev->dev, temp64)) { + /* prevent subsequent unmapping */ + chain_sg->Addr.lower = 0; + chain_sg->Addr.upper = 0; + return -1; + } chain_sg->Addr.lower = (u32) (temp64 & 0x0FFFFFFFFULL); chain_sg->Addr.upper = (u32) ((temp64 >> 32) & 0x0FFFFFFFFULL); + return 0; } static void hpsa_unmap_sg_chain_block(struct ctlr_info *h, @@ -1390,7 +1397,7 @@ static void hpsa_pci_unmap(struct pci_dev *pdev, } } -static void hpsa_map_one(struct pci_dev *pdev, +static int hpsa_map_one(struct pci_dev *pdev, struct CommandList *cp, unsigned char *buf, size_t buflen, @@ -1401,10 +1408,16 @@ static void hpsa_map_one(struct pci_dev *pdev, if (buflen == 0 || data_direction == PCI_DMA_NONE) { cp->Header.SGList = 0; cp->Header.SGTotal = 0; - return; + return 0; } addr64 = (u64) pci_map_single(pdev, buf, buflen, data_direction); + if (dma_mapping_error(&pdev->dev, addr64)) { + /* Prevent subsequent unmap of something never mapped */ + cp->Header.SGList = 0; + cp->Header.SGTotal = 0; + return -1; + } cp->SG[0].Addr.lower = (u32) (addr64 & (u64) 0x00000000FFFFFFFF); cp->SG[0].Addr.upper = @@ -1412,6 +1425,7 @@ static void hpsa_map_one(struct pci_dev *pdev, cp->SG[0].Len = buflen; cp->Header.SGList = (u8) 1; /* no. SGs contig in this cmd */ cp->Header.SGTotal = (u16) 1; /* total sgs in this cmd list */ + return 0; } static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h, @@ -1540,13 +1554,18 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr, return -ENOMEM; } - fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize, page, scsi3addr, TYPE_CMD); + if (fill_cmd(c, HPSA_INQUIRY, h, buf, bufsize, + page, scsi3addr, TYPE_CMD)) { + rc = -1; + goto out; + } hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE); ei = c->err_info; if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) { hpsa_scsi_interpret_error(c); rc = -1; } +out: cmd_special_free(h, c); return rc; } @@ -1564,7 +1583,9 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr) return -ENOMEM; } - fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, scsi3addr, TYPE_MSG); + /* fill_cmd can't fail here, no data buffer to map. */ + (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, + NULL, 0, 0, scsi3addr, TYPE_MSG); hpsa_scsi_do_simple_cmd_core(h, c); /* no unmap needed here because no data xfer. */ @@ -1631,8 +1652,11 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical, } /* address the controller */ memset(scsi3addr, 0, sizeof(scsi3addr)); - fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h, - buf, bufsize, 0, scsi3addr, TYPE_CMD); + if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h, + buf, bufsize, 0, scsi3addr, TYPE_CMD)) { + rc = -1; + goto out; + } if (extended_response) c->Request.CDB[1] = extended_response; hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE); @@ -1642,6 +1666,7 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical, hpsa_scsi_interpret_error(c); rc = -1; } +out: cmd_special_free(h, c); return rc; } @@ -2105,7 +2130,10 @@ static int hpsa_scatter_gather(struct ctlr_info *h, if (chained) { cp->Header.SGList = h->max_cmd_sg_entries; cp->Header.SGTotal = (u16) (use_sg + 1); - hpsa_map_sg_chain_block(h, cp); + if (hpsa_map_sg_chain_block(h, cp)) { + scsi_dma_unmap(cmd); + return -1; + } return 0; } @@ -2353,8 +2381,9 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h, if (waittime < HPSA_MAX_WAIT_INTERVAL_SECS) waittime = waittime * 2; - /* Send the Test Unit Ready */ - fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, lunaddr, TYPE_CMD); + /* Send the Test Unit Ready, fill_cmd can't fail, no mapping */ + (void) fill_cmd(c, TEST_UNIT_READY, h, + NULL, 0, 0, lunaddr, TYPE_CMD); hpsa_scsi_do_simple_cmd_core(h, c); /* no unmap needed here because no data xfer. */ @@ -2439,7 +2468,9 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr, return -ENOMEM; } - fill_cmd(c, HPSA_ABORT_MSG, h, abort, 0, 0, scsi3addr, TYPE_MSG); + /* fill_cmd can't fail here, no buffer to map */ + (void) fill_cmd(c, HPSA_ABORT_MSG, h, abort, + 0, 0, scsi3addr, TYPE_MSG); if (swizzle) swizzle_abort_tag(&c->Request.CDB[4]); hpsa_scsi_do_simple_cmd_core(h, c); @@ -2928,6 +2959,7 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) struct CommandList *c; char *buff = NULL; union u64bit temp64; + int rc = 0; if (!argp) return -EINVAL; @@ -2947,8 +2979,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) /* Copy the data into the buffer we created */ if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) { - kfree(buff); - return -EFAULT; + rc = -EFAULT; + goto out_kfree; } } else { memset(buff, 0, iocommand.buf_size); @@ -2956,8 +2988,8 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) } c = cmd_special_alloc(h); if (c == NULL) { - kfree(buff); - return -ENOMEM; + rc = -ENOMEM; + goto out_kfree; } /* Fill in the command type */ c->cmd_type = CMD_IOCTL_PEND; @@ -2982,6 +3014,13 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) if (iocommand.buf_size > 0) { temp64.val = pci_map_single(h->pdev, buff, iocommand.buf_size, PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + c->SG[0].Addr.lower = 0; + c->SG[0].Addr.upper = 0; + c->SG[0].Len = 0; + rc = -ENOMEM; + goto out; + } c->SG[0].Addr.lower = temp64.val32.lower; c->SG[0].Addr.upper = temp64.val32.upper; c->SG[0].Len = iocommand.buf_size; @@ -2996,22 +3035,22 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp) memcpy(&iocommand.error_info, c->err_info, sizeof(iocommand.error_info)); if (copy_to_user(argp, &iocommand, sizeof(iocommand))) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; + rc = -EFAULT; + goto out; } if (iocommand.Request.Type.Direction == XFER_READ && iocommand.buf_size > 0) { /* Copy the data out of the buffer we created */ if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) { - kfree(buff); - cmd_special_free(h, c); - return -EFAULT; + rc = -EFAULT; + goto out; } } - kfree(buff); +out: cmd_special_free(h, c); - return 0; +out_kfree: + kfree(buff); + return rc; } static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) @@ -3103,6 +3142,15 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp) for (i = 0; i < sg_used; i++) { temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i], PCI_DMA_BIDIRECTIONAL); + if (dma_mapping_error(&h->pdev->dev, temp64.val)) { + c->SG[i].Addr.lower = 0; + c->SG[i].Addr.upper = 0; + c->SG[i].Len = 0; + hpsa_pci_unmap(h->pdev, c, i, + PCI_DMA_BIDIRECTIONAL); + status = -ENOMEM; + goto cleanup1; + } c->SG[i].Addr.lower = temp64.val32.lower; c->SG[i].Addr.upper = temp64.val32.upper; c->SG[i].Len = buff_size[i]; @@ -3190,7 +3238,8 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr, c = cmd_alloc(h); if (!c) return -ENOMEM; - fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, + /* fill_cmd can't fail here, no data buffer to map */ + (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0, RAID_CTLR_LUNID, TYPE_MSG); c->Request.CDB[1] = reset_type; /* fill_cmd defaults to target reset */ c->waiting = NULL; @@ -3202,7 +3251,7 @@ static int hpsa_send_host_reset(struct ctlr_info *h, unsigned char *scsi3addr, return 0; } -static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, +static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, void *buff, size_t size, u8 page_code, unsigned char *scsi3addr, int cmd_type) { @@ -3271,7 +3320,7 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, default: dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd); BUG(); - return; + return -1; } } else if (cmd_type == TYPE_MSG) { switch (cmd) { @@ -3343,10 +3392,9 @@ static void fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h, default: pci_dir = PCI_DMA_BIDIRECTIONAL; } - - hpsa_map_one(h->pdev, c, buff, size, pci_dir); - - return; + if (hpsa_map_one(h->pdev, c, buff, size, pci_dir)) + return -1; + return 0; } /* @@ -4882,10 +4930,13 @@ static void hpsa_flush_cache(struct ctlr_info *h) dev_warn(&h->pdev->dev, "cmd_special_alloc returned NULL!\n"); goto out_of_memory; } - fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0, - RAID_CTLR_LUNID, TYPE_CMD); + if (fill_cmd(c, HPSA_CACHE_FLUSH, h, flush_buf, 4, 0, + RAID_CTLR_LUNID, TYPE_CMD)) { + goto out; + } hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE); if (c->err_info->CommandStatus != 0) +out: dev_warn(&h->pdev->dev, "error flushing cache on controller\n"); cmd_special_free(h, c); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f..f328089 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0; static unsigned int ipr_debug = 0; static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; static unsigned int ipr_dual_ioa_raid = 1; +static unsigned int ipr_number_of_msix = 2; static DEFINE_SPINLOCK(ipr_driver_lock); /* This table describes the differences between DMA controller chips */ @@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x0022C, .clr_interrupt_mask_reg = 0x00230, @@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x00288, .clr_interrupt_mask_reg = 0x0028C, @@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 1000, .cache_line_size = 0x20, .clear_isr = 0, + .iopoll_weight = 64, { .set_interrupt_mask_reg = 0x00010, .clr_interrupt_mask_reg = 0x00018, @@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e module_param_named(max_devs, ipr_max_devs, int, 0); MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. " "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]"); +module_param_named(number_of_msix, ipr_number_of_msix, int, 0); +MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5). (default:2)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IPR_DRIVER_VERSION); @@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++]; + trace_entry = &ioa_cfg->trace[atomic_add_return + (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff; trace_entry->res_handle = ipr_cmd->ioarcb.res_handle; trace_entry->u.add_data = add_data; + wmb(); } #else #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) @@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd) struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa; struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64; dma_addr_t dma_addr = ipr_cmd->dma_addr; + int hrrq_id; + hrrq_id = ioarcb->cmd_pkt.hrrq_id; memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt)); + ioarcb->cmd_pkt.hrrq_id = hrrq_id; ioarcb->data_transfer_length = 0; ioarcb->read_data_transfer_length = 0; ioarcb->ioadl_len = 0; @@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, * pointer to ipr command struct **/ static -struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) +struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq) { - struct ipr_cmnd *ipr_cmd; + struct ipr_cmnd *ipr_cmd = NULL; + + if (likely(!list_empty(&hrrq->hrrq_free_q))) { + ipr_cmd = list_entry(hrrq->hrrq_free_q.next, + struct ipr_cmnd, queue); + list_del(&ipr_cmd->queue); + } - ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue); - list_del(&ipr_cmd->queue); return ipr_cmd; } @@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) static struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) { - struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); + struct ipr_cmnd *ipr_cmd = + __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]); ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); return ipr_cmd; } @@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, u32 clr_ints) { volatile u32 int_reg; + int i; /* Stop new interrupts */ - ioa_cfg->allow_interrupts = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg) **/ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ata_queued_cmd *qc = ipr_cmd->qc; struct ipr_sata_port *sata_port = qc->ap->private_data; qc->err_mask |= AC_ERR_OTHER; sata_port->ioasa.status |= ATA_BUSY; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); ata_qc_complete(qc); } @@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) **/ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; scsi_cmd->result |= (DID_ERROR << 16); scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) { struct ipr_cmnd *ipr_cmd, *temp; + struct ipr_hrr_queue *hrrq; ENTER; - list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) { - list_del(&ipr_cmd->queue); + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry_safe(ipr_cmd, + temp, &hrrq->hrrq_pending_q, queue) { + list_del(&ipr_cmd->queue); - ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); - ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID); + ipr_cmd->s.ioasa.hdr.ioasc = + cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); + ipr_cmd->s.ioasa.hdr.ilid = + cpu_to_be32(IPR_DRIVER_ILID); - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - else if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + else if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); - ipr_cmd->done(ipr_cmd); + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, + IPR_IOASC_IOA_WAS_RESET); + del_timer(&ipr_cmd->timer); + ipr_cmd->done(ipr_cmd); + } + spin_unlock(&hrrq->_lock); } - LEAVE; } @@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd, void (*done) (struct ipr_cmnd *), void (*timeout_func) (struct ipr_cmnd *), u32 timeout) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = done; @@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, spin_lock_irq(ioa_cfg->host->host_lock); } +static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) +{ + if (ioa_cfg->hrrq_num == 1) + return 0; + else + return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; +} + /** * ipr_send_hcam - Send an HCAM to the adapter. * @ioa_cfg: ioa config struct @@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; - if (ioa_cfg->allow_cmds) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q); ipr_cmd->u.hostrcb = hostrcb; @@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res, } /** - * ipr_format_res_path - Format the resource path for printing. + * __ipr_format_res_path - Format the resource path for printing. * @res_path: resource path * @buf: buffer + * @len: length of buffer provided * * Return value: * pointer to buffer **/ -static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) +static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len) { int i; char *p = buffer; @@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) } /** + * ipr_format_res_path - Format the resource path for printing. + * @ioa_cfg: ioa config struct + * @res_path: resource path + * @buf: buffer + * @len: length of buffer provided + * + * Return value: + * pointer to buffer + **/ +static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg, + u8 *res_path, char *buffer, int len) +{ + char *p = buffer; + + *p = '\0'; + p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no); + __ipr_format_res_path(res_path, p, len - (buffer - p)); + return buffer; +} + +/** * ipr_update_res_entry - Update the resource entry. * @res: resource entry struct * @cfgtew: config table entry wrapper struct @@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res, if (res->sdev && new_path) sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(res->ioa_cfg, + res->res_path, buffer, sizeof(buffer))); } else { res->flags = cfgtew->u.cfgte->flags; if (res->flags & IPR_IS_IOA_RESOURCE) @@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd) u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ioasc) { if (ioasc != IPR_IOASC_IOA_WAS_RESET) @@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err_separator; ipr_err("Device %d : %s", i + 1, - ipr_format_res_path(dev_entry->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(dev_entry->res_path, + buffer, sizeof(buffer))); ipr_log_ext_vpd(&dev_entry->vpd); ipr_err("-----New Device Information-----\n"); @@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n", path_active_desc[i].desc, path_state_desc[j].desc, - ipr_format_res_path(fabric->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, + fabric->res_path, + buffer, sizeof(buffer))); return; } } ipr_err("Path state=%02X Resource Path=%s\n", path_state, - ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path, + buffer, sizeof(buffer))); } static const struct { @@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n", path_status_desc[j].desc, path_type_desc[i].desc, - ipr_format_res_path(cfg->res_path, buffer, - sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), + be32_to_cpu(cfg->wwid[1])); return; } } ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s " "WWN=%08X%08X\n", cfg->type_status, - ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); } /** @@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("RAID %s Array Configuration: %s\n", error->protection_level, - ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, error->last_res_path, + buffer, sizeof(buffer))); ipr_err_separator; @@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("Array Member %d:\n", i); ipr_log_ext_vpd(&array_entry->vpd); ipr_err("Current Location: %s\n", - ipr_format_res_path(array_entry->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, array_entry->res_path, + buffer, sizeof(buffer))); ipr_err("Expected Location: %s\n", - ipr_format_res_path(array_entry->expected_res_path, - buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + array_entry->expected_res_path, + buffer, sizeof(buffer))); ipr_err_separator; } @@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ioasc) { ipr_handle_log_data(ioa_cfg, hostrcb); @@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd) } /** - * ipr_reset_reload - Reset/Reload the IOA - * @ioa_cfg: ioa config struct - * @shutdown_type: shutdown type - * - * This function resets the adapter and re-initializes it. - * This function assumes that all new host commands have been stopped. - * Return value: - * SUCCESS / FAILED - **/ -static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg, - enum ipr_shutdown_type shutdown_type) -{ - if (!ioa_cfg->in_reset_reload) - ipr_initiate_ioa_reset(ioa_cfg, shutdown_type); - - spin_unlock_irq(ioa_cfg->host->host_lock); - wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - spin_lock_irq(ioa_cfg->host->host_lock); - - /* If we got hit with a host reset while we were already resetting - the adapter for some reason, and the reset failed. */ - if (ioa_cfg->ioa_is_dead) { - ipr_trace; - return FAILED; - } - - return SUCCESS; -} - -/** * ipr_find_ses_entry - Find matching SES in SES table * @res: resource entry struct of SES * @@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work) restart: do { did_work = 0; - if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds || + !ioa_cfg->allow_ml_add_del) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; } @@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev, int len; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) len = snprintf(buf, PAGE_SIZE, "offline\n"); else len = snprintf(buf, PAGE_SIZE, "online\n"); @@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; unsigned long lock_flags; - int result = count; + int result = count, i; if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) { - ioa_cfg->ioa_is_dead = 0; + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && + !strncmp(buf, "online", 6)) { + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ioa_cfg->reset_retries = 0; ioa_cfg->in_ioa_bringdown = 0; ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); @@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; +static int ipr_iopoll(struct blk_iopoll *iop, int budget); + /** + * ipr_show_iopoll_weight - Show ipr polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_show_iopoll_weight(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long lock_flags = 0; + int len; + + spin_lock_irqsave(shost->host_lock, lock_flags); + len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight); + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return len; +} + +/** + * ipr_store_iopoll_weight - Change the adapter's polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_store_iopoll_weight(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long user_iopoll_weight; + unsigned long lock_flags = 0; + int i; + + if (!ioa_cfg->sis64) { + dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + return -EINVAL; + } + if (kstrtoul(buf, 10, &user_iopoll_weight)) + return -EINVAL; + + if (user_iopoll_weight > 256) { + dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + return -EINVAL; + } + + if (user_iopoll_weight == ioa_cfg->iopoll_weight) { + dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + return strlen(buf); + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + + spin_lock_irqsave(shost->host_lock, lock_flags); + ioa_cfg->iopoll_weight = user_iopoll_weight; + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return strlen(buf); +} + +static struct device_attribute ipr_iopoll_weight_attr = { + .attr = { + .name = "iopoll_weight", + .mode = S_IRUGO | S_IWUSR, + }, + .show = ipr_show_iopoll_weight, + .store = ipr_store_iopoll_weight +}; + /** * ipr_alloc_ucode_buffer - Allocates a microcode download buffer * @buf_len: buffer length @@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { &ipr_ioa_reset_attr, &ipr_update_fw_attr, &ipr_ioa_fw_type_attr, + &ipr_iopoll_weight_attr, NULL, }; @@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg) ioa_cfg->dump = dump; ioa_cfg->sdt_state = WAIT_FOR_DUMP; - if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) { ioa_cfg->dump_taken = 1; schedule_work(&ioa_cfg->work_q); } @@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut res = (struct ipr_resource_entry *)sdev->hostdata; if (res && ioa_cfg->sis64) len = snprintf(buf, PAGE_SIZE, "%s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(res->res_path, buffer, + sizeof(buffer))); else if (res) len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no, res->bus, res->target, res->lun); @@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); if (ioa_cfg->sis64) sdev_printk(KERN_INFO, sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + res->res_path, buffer, sizeof(buffer))); return 0; } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); @@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } -/** - * ipr_eh_host_reset - Reset the host adapter - * @scsi_cmd: scsi command struct - * - * Return value: - * SUCCESS / FAILED - **/ -static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + unsigned long lock_flags = 0; + int rc = SUCCESS; ENTER; - ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->in_reset_reload) { + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV); dev_err(&ioa_cfg->pdev->dev, "Adapter being reset as a result of error recovery.\n"); @@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) ioa_cfg->sdt_state = GET_DUMP; } - rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); - - LEAVE; - return rc; -} - -static int ipr_eh_host_reset(struct scsi_cmnd *cmd) -{ - int rc; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - spin_lock_irq(cmd->device->host->host_lock); - rc = __ipr_eh_host_reset(cmd); - spin_unlock_irq(cmd->device->host->host_lock); + /* If we got hit with a host reset while we were already resetting + the adapter for some reason, and the reset failed. */ + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { + ipr_trace; + rc = FAILED; + } + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + LEAVE; return rc; } @@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT); ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) { if (ipr_cmd->ioa_cfg->sis64) memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, @@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) struct ipr_resource_entry *res; struct ata_port *ap; int rc = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; @@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) */ if (ioa_cfg->in_reset_reload) return FAILED; - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; - if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { - ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; - ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->res_handle) { + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->qc && + !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { + ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; + ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + } } } + spin_unlock(&hrrq->_lock); } - res->resetting_device = 1; scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n"); @@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) ata_std_error_handler(ap); spin_lock_irq(scsi_cmd->device->host->host_lock); - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - rc = -EIO; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, + &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == + res->res_handle) { + rc = -EIO; + break; + } } + spin_unlock(&hrrq->_lock); } } else rc = ipr_device_reset(ioa_cfg, res); @@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd) else ipr_cmd->sibling->done(ipr_cmd->sibling); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); LEAVE; } @@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) struct ipr_cmd_pkt *cmd_pkt; u32 ioasc, int_reg; int op_found = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata; @@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) * This will force the mid-layer to call ipr_eh_host_reset, * which will then go to sleep and wait for the reset to complete */ - if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) + if (ioa_cfg->in_reset_reload || + ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; if (!res) return FAILED; @@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) if (!ipr_is_gscsi(res)) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->scsi_cmd == scsi_cmd) { - ipr_cmd->done = ipr_scsi_eh_done; - op_found = 1; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->scsi_cmd == scsi_cmd) { + ipr_cmd->done = ipr_scsi_eh_done; + op_found = 1; + break; + } } + spin_unlock(&hrrq->_lock); } if (!op_found) @@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, } else { if (int_reg & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + else if (int_reg & IPR_PCII_NO_HOST_RRQ) + dev_err(&ioa_cfg->pdev->dev, + "No Host RRQ. 0x%08X\n", int_reg); else dev_err(&ioa_cfg->pdev->dev, "Permanent IOA failure. 0x%08X\n", int_reg); @@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, * Return value: * none **/ -static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) +static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number) { ioa_cfg->errors_logged++; - dev_err(&ioa_cfg->pdev->dev, "%s\n", msg); + dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number); if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) ioa_cfg->sdt_state = GET_DUMP; @@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); } +static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, + struct list_head *doneq) +{ + u32 ioasc; + u16 cmd_index; + struct ipr_cmnd *ipr_cmd; + struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg; + int num_hrrq = 0; + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrr_queue->allow_interrupts) + return 0; + + while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrr_queue->toggle_bit) { + + cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) & + IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> + IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; + + if (unlikely(cmd_index > hrr_queue->max_cmd_id || + cmd_index < hrr_queue->min_cmd_id)) { + ipr_isr_eh(ioa_cfg, + "Invalid response handle from IOA: ", + cmd_index); + break; + } + + ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; + ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); + + list_move_tail(&ipr_cmd->queue, doneq); + + if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) { + hrr_queue->hrrq_curr++; + } else { + hrr_queue->hrrq_curr = hrr_queue->hrrq_start; + hrr_queue->toggle_bit ^= 1u; + } + num_hrrq++; + if (budget > 0 && num_hrrq >= budget) + break; + } + + return num_hrrq; +} + +static int ipr_iopoll(struct blk_iopoll *iop, int budget) +{ + struct ipr_ioa_cfg *ioa_cfg; + struct ipr_hrr_queue *hrrq; + struct ipr_cmnd *ipr_cmd, *temp; + unsigned long hrrq_flags; + int completed_ops; + LIST_HEAD(doneq); + + hrrq = container_of(iop, struct ipr_hrr_queue, iopoll); + ioa_cfg = hrrq->ioa_cfg; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); + + if (completed_ops < budget) + blk_iopoll_complete(iop); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } + + return completed_ops; +} + /** * ipr_isr - Interrupt service routine * @irq: irq number @@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) **/ static irqreturn_t ipr_isr(int irq, void *devp) { - struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp; - unsigned long lock_flags = 0; + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; u32 int_reg = 0; - u32 ioasc; - u16 cmd_index; int num_hrrq = 0; int irq_none = 0; struct ipr_cmnd *ipr_cmd, *temp; irqreturn_t rc = IRQ_NONE; LIST_HEAD(doneq); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* If interrupts are disabled, ignore the interrupt */ - if (!ioa_cfg->allow_interrupts) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_NONE; } while (1) { - ipr_cmd = NULL; - - while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == - ioa_cfg->toggle_bit) { - - cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) & - IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; - - if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) { - ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA"); - rc = IRQ_HANDLED; - goto unlock_out; - } - - ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; - - ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); - - list_move_tail(&ipr_cmd->queue, &doneq); - - rc = IRQ_HANDLED; - - if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) { - ioa_cfg->hrrq_curr++; - } else { - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit ^= 1u; - } - } + if (ipr_process_hrrq(hrrq, -1, &doneq)) { + rc = IRQ_HANDLED; - if (ipr_cmd && !ioa_cfg->clear_isr) - break; + if (!ioa_cfg->clear_isr) + break; - if (ipr_cmd != NULL) { /* Clear the PCI interrupt */ num_hrrq = 0; do { - writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32); + writel(IPR_PCII_HRRQ_UPDATED, + ioa_cfg->regs.clr_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); } while (int_reg & IPR_PCII_HRRQ_UPDATED && - num_hrrq++ < IPR_MAX_HRRQ_RETRIES); + num_hrrq++ < IPR_MAX_HRRQ_RETRIES); } else if (rc == IRQ_NONE && irq_none == 0) { int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); irq_none++; } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES && int_reg & IPR_PCII_HRRQ_UPDATED) { - ipr_isr_eh(ioa_cfg, "Error clearing HRRQ"); + ipr_isr_eh(ioa_cfg, + "Error clearing HRRQ: ", num_hrrq); rc = IRQ_HANDLED; - goto unlock_out; + break; } else break; } @@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp) if (unlikely(rc == IRQ_NONE)) rc = ipr_handle_other_interrupt(ioa_cfg, int_reg); -unlock_out: - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); del_timer(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } + return rc; +} + +/** + * ipr_isr_mhrrq - Interrupt service routine + * @irq: irq number + * @devp: pointer to ioa config struct + * + * Return value: + * IRQ_NONE / IRQ_HANDLED + **/ +static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) +{ + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; + struct ipr_cmnd *ipr_cmd, *temp; + irqreturn_t rc = IRQ_NONE; + LIST_HEAD(doneq); + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_NONE; + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) { + if (!blk_iopoll_sched_prep(&hrrq->iopoll)) + blk_iopoll_sched(&hrrq->iopoll); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_HANDLED; + } + } else { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) + + if (ipr_process_hrrq(hrrq, -1, &doneq)) + rc = IRQ_HANDLED; + } + + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } return rc; } @@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) { struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_resource_entry *res = scsi_cmd->device->hostdata; - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); if (IPR_IOASC_SENSE_KEY(ioasc) > 0) { @@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) res->in_erp = 0; } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long lock_flags; + unsigned long hrrq_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } else { - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } } @@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost, struct ipr_resource_entry *res; struct ipr_ioarcb *ioarcb; struct ipr_cmnd *ipr_cmd; - unsigned long lock_flags; + unsigned long hrrq_flags, lock_flags; int rc; + struct ipr_hrr_queue *hrrq; + int hrrq_id; ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; - spin_lock_irqsave(shost->host_lock, lock_flags); scsi_cmd->result = (DID_OK << 16); res = scsi_cmd->device->hostdata; + if (ipr_is_gata(res) && res->sata_port) { + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return rc; + } + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* * We are currently blocking all devices due to a host reset * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead && !hrrq->removing_ioa)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(ioa_cfg->ioa_is_dead || !res)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead || hrrq->removing_ioa || !res)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } - if (ipr_is_gata(res) && res->sata_port) { - rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); - spin_unlock_irqrestore(shost->host_lock, lock_flags); - return rc; + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return SCSI_MLQUEUE_HOST_BUSY; } - - ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done); ioarcb = &ipr_cmd->ioarcb; @@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost, } if (scsi_cmd->cmnd[0] >= 0xC0 && - (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) + (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + } if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else rc = ipr_build_ioadl(ioa_cfg, ipr_cmd); - spin_lock_irqsave(shost->host_lock, lock_flags); - if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); + if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); if (!rc) scsi_dma_unmap(scsi_cmd); return SCSI_MLQUEUE_HOST_BUSY; } - if (unlikely(ioa_cfg->ioa_is_dead)) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); scsi_dma_unmap(scsi_cmd); goto err_nodev; } @@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost, ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE; res->needs_sync_complete = 0; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q); ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res)); ipr_send_command(ipr_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; err_nodev: - spin_lock_irqsave(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); scsi_cmd->result = (DID_NO_CONNECT << 16); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; } @@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) goto out_unlock; rc = ipr_device_reset(ioa_cfg, res); @@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) struct ipr_sata_port *sata_port = qc->ap->private_data; struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->qc == qc) { - ipr_device_reset(ioa_cfg, sata_port->res); - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->qc == qc) { + ipr_device_reset(ioa_cfg, sata_port->res); + break; + } } + spin_unlock(&hrrq->_lock); } spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) struct ipr_resource_entry *res = sata_port->res; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + spin_lock(&ipr_cmd->hrrq->_lock); if (ipr_cmd->ioa_cfg->sis64) memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, sizeof(struct ipr_ioasa_gata)); @@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) qc->err_mask |= __ac_err_mask(sata_port->ioasa.status); else qc->err_mask |= ac_err_mask(sata_port->ioasa.status); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); ata_qc_complete(qc); } @@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd, } /** + * ipr_qc_defer - Get a free ipr_cmd + * @qc: queued command + * + * Return value: + * 0 if success + **/ +static int ipr_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ipr_sata_port *sata_port = ap->private_data; + struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; + struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; + int hrrq_id; + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + qc->lldd_task = NULL; + spin_lock(&hrrq->_lock); + if (unlikely(hrrq->ioa_is_dead)) { + spin_unlock(&hrrq->_lock); + return 0; + } + + if (unlikely(!hrrq->allow_cmds)) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + qc->lldd_task = ipr_cmd; + spin_unlock(&hrrq->_lock); + return 0; +} + +/** * ipr_qc_issue - Issue a SATA qc to a device * @qc: queued command * @@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) struct ipr_ioarcb *ioarcb; struct ipr_ioarcb_ata_regs *regs; - if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead)) + if (qc->lldd_task == NULL) + ipr_qc_defer(qc); + + ipr_cmd = qc->lldd_task; + if (ipr_cmd == NULL) return AC_ERR_SYSTEM; - ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); + qc->lldd_task = NULL; + spin_lock(&ipr_cmd->hrrq->_lock); + if (unlikely(!ipr_cmd->hrrq->allow_cmds || + ipr_cmd->hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); + return AC_ERR_SYSTEM; + } + + ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); ioarcb = &ipr_cmd->ioarcb; if (ioa_cfg->sis64) { @@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) memset(regs, 0, sizeof(*regs)); ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs)); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->qc = qc; ipr_cmd->done = ipr_sata_done; ipr_cmd->ioarcb.res_handle = res->res_handle; @@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) default: WARN_ON(1); + spin_unlock(&ipr_cmd->hrrq->_lock); return AC_ERR_INVALID; } ipr_send_command(ipr_cmd); + spin_unlock(&ipr_cmd->hrrq->_lock); return 0; } @@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = { .hardreset = ipr_sata_reset, .post_internal_cmd = ipr_ata_post_internal, .qc_prep = ata_noop_qc_prep, + .qc_defer = ipr_qc_defer, .qc_issue = ipr_qc_issue, .qc_fill_rtf = ipr_qc_fill_rtf, .port_start = ata_sas_port_start, @@ -6425,14 +6741,17 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; ENTER; + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + ipr_trace; + spin_unlock_irq(ioa_cfg->host->host_lock); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irq(ioa_cfg->host->host_lock); + } + ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); - - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); LEAVE; return IPR_RC_JOB_RETURN; @@ -6454,11 +6773,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_resource_entry *res; struct ipr_hostrcb *hostrcb, *temp; - int i = 0; + int i = 0, j; ENTER; ioa_cfg->in_reset_reload = 0; - ioa_cfg->allow_cmds = 1; + for (j = 0; j < ioa_cfg->hrrq_num; j++) { + spin_lock(&ioa_cfg->hrrq[j]._lock); + ioa_cfg->hrrq[j].allow_cmds = 1; + spin_unlock(&ioa_cfg->hrrq[j]._lock); + } + wmb(); ioa_cfg->reset_cmd = NULL; ioa_cfg->doorbell |= IPR_RUNTIME_RESET; @@ -6482,14 +6806,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n"); ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock(ioa_cfg->host->host_lock); scsi_unblock_requests(ioa_cfg->host); spin_lock(ioa_cfg->host->host_lock); - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) scsi_block_requests(ioa_cfg->host); LEAVE; @@ -6560,9 +6884,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd) if (!ioa_cfg->sis64) ipr_cmd->job_step = ipr_set_supported_devs; + LEAVE; return IPR_RC_JOB_RETURN; } + LEAVE; return IPR_RC_JOB_CONTINUE; } @@ -6820,7 +7146,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd) ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); return IPR_RC_JOB_RETURN; } @@ -7278,46 +7604,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd) { struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_hrr_queue *hrrq; ENTER; + ipr_cmd->job_step = ipr_ioafp_std_inquiry; dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n"); - ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; - ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); + if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) { + hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index]; - ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; - if (ioa_cfg->sis64) - ioarcb->cmd_pkt.cdb[1] = 0x1; - ioarcb->cmd_pkt.cdb[2] = - ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff; - ioarcb->cmd_pkt.cdb[3] = - ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff; - ioarcb->cmd_pkt.cdb[4] = - ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[5] = - ((u64) ioa_cfg->host_rrq_dma) & 0xff; - ioarcb->cmd_pkt.cdb[7] = - ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[8] = - (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff; + ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; + ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); - if (ioa_cfg->sis64) { - ioarcb->cmd_pkt.cdb[10] = - ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff; - ioarcb->cmd_pkt.cdb[11] = - ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff; - ioarcb->cmd_pkt.cdb[12] = - ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff; - ioarcb->cmd_pkt.cdb[13] = - ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff; - } + ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + if (ioa_cfg->sis64) + ioarcb->cmd_pkt.cdb[1] = 0x1; - ipr_cmd->job_step = ipr_ioafp_std_inquiry; + if (ioa_cfg->nvectors == 1) + ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE; + else + ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE; + + ioarcb->cmd_pkt.cdb[2] = + ((u64) hrrq->host_rrq_dma >> 24) & 0xff; + ioarcb->cmd_pkt.cdb[3] = + ((u64) hrrq->host_rrq_dma >> 16) & 0xff; + ioarcb->cmd_pkt.cdb[4] = + ((u64) hrrq->host_rrq_dma >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[5] = + ((u64) hrrq->host_rrq_dma) & 0xff; + ioarcb->cmd_pkt.cdb[7] = + ((sizeof(u32) * hrrq->size) >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[8] = + (sizeof(u32) * hrrq->size) & 0xff; + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[9] = + ioa_cfg->identify_hrrq_index; - ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT); + if (ioa_cfg->sis64) { + ioarcb->cmd_pkt.cdb[10] = + ((u64) hrrq->host_rrq_dma >> 56) & 0xff; + ioarcb->cmd_pkt.cdb[11] = + ((u64) hrrq->host_rrq_dma >> 48) & 0xff; + ioarcb->cmd_pkt.cdb[12] = + ((u64) hrrq->host_rrq_dma >> 40) & 0xff; + ioarcb->cmd_pkt.cdb[13] = + ((u64) hrrq->host_rrq_dma >> 32) & 0xff; + } + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[14] = + ioa_cfg->identify_hrrq_index; + + ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, + IPR_INTERNAL_TIMEOUT); + + if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) + ipr_cmd->job_step = ipr_ioafp_identify_hrrq; + + LEAVE; + return IPR_RC_JOB_RETURN; + } LEAVE; - return IPR_RC_JOB_RETURN; + return IPR_RC_JOB_CONTINUE; } /** @@ -7365,7 +7716,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd) static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, unsigned long timeout) { - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + + ENTER; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; ipr_cmd->timer.data = (unsigned long) ipr_cmd; @@ -7383,13 +7736,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, **/ static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg) { - memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS); + struct ipr_hrr_queue *hrrq; - /* Initialize Host RRQ pointers */ - ioa_cfg->hrrq_start = ioa_cfg->host_rrq; - ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1]; - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit = 1; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size); + + /* Initialize Host RRQ pointers */ + hrrq->hrrq_start = hrrq->host_rrq; + hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1]; + hrrq->hrrq_curr = hrrq->hrrq_start; + hrrq->toggle_bit = 1; + spin_unlock(&hrrq->_lock); + } + wmb(); + + ioa_cfg->identify_hrrq_index = 0; + if (ioa_cfg->hrrq_num == 1) + atomic_set(&ioa_cfg->hrrq_index, 0); + else + atomic_set(&ioa_cfg->hrrq_index, 1); /* Zero out config table */ memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size); @@ -7446,7 +7812,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); return IPR_RC_JOB_RETURN; } @@ -7466,12 +7833,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; volatile u32 int_reg; volatile u64 maskval; + int i; ENTER; ipr_cmd->job_step = ipr_ioafp_identify_hrrq; ipr_init_ioa_mem(ioa_cfg); - ioa_cfg->allow_interrupts = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); @@ -7511,7 +7884,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -8030,7 +8403,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd) int rc = IPR_RC_JOB_CONTINUE; ENTER; - if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) { + if (shutdown_type != IPR_SHUTDOWN_NONE && + !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD; ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN; @@ -8078,7 +8452,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) * We are doing nested adapter resets and this is * not the current reset job. */ - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, + &ipr_cmd->hrrq->hrrq_free_q); return; } @@ -8113,10 +8488,17 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { struct ipr_cmnd *ipr_cmd; + int i; ioa_cfg->in_reset_reload = 1; - ioa_cfg->allow_cmds = 0; - scsi_block_requests(ioa_cfg->host); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) + scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); ioa_cfg->reset_cmd = ipr_cmd; @@ -8141,7 +8523,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { - if (ioa_cfg->ioa_is_dead) + int i; + + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return; if (ioa_cfg->in_reset_reload) { @@ -8156,7 +8540,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, "IOA taken offline - error recovery failed\n"); ioa_cfg->reset_retries = 0; - ioa_cfg->ioa_is_dead = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->in_ioa_bringdown) { ioa_cfg->reset_cmd = NULL; @@ -8164,9 +8553,11 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_fail_all_ops(ioa_cfg); wake_up_all(&ioa_cfg->reset_wait_q); - spin_unlock_irq(ioa_cfg->host->host_lock); - scsi_unblock_requests(ioa_cfg->host); - spin_lock_irq(ioa_cfg->host->host_lock); + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].removing_ioa) { + spin_unlock_irq(ioa_cfg->host->host_lock); + scsi_unblock_requests(ioa_cfg->host); + spin_lock_irq(ioa_cfg->host->host_lock); + } return; } else { ioa_cfg->in_ioa_bringdown = 1; @@ -8188,9 +8579,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, */ static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) { + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + int i; + /* Disallow new interrupts, avoid loop */ - ipr_cmd->ioa_cfg->allow_interrupts = 0; - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; return IPR_RC_JOB_RETURN; } @@ -8247,13 +8646,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev) { unsigned long flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) ioa_cfg->sdt_state = ABORT_DUMP; ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; ioa_cfg->in_ioa_bringdown = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -8310,12 +8715,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg) } else _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa, IPR_SHUTDOWN_NONE); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - if (ioa_cfg->ioa_is_dead) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { rc = -EIO; } else if (ipr_invalid_adapter(ioa_cfg)) { if (!ipr_testmode) @@ -8376,8 +8780,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs), ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); ipr_free_cmd_blks(ioa_cfg); - pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + + for (i = 0; i < ioa_cfg->hrrq_num; i++) + pci_free_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); @@ -8408,8 +8817,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg) struct pci_dev *pdev = ioa_cfg->pdev; ENTER; - free_irq(pdev->irq, ioa_cfg); - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { + int i; + for (i = 0; i < ioa_cfg->nvectors; i++) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + } else + free_irq(pdev->irq, &ioa_cfg->hrrq[0]); + + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + pci_disable_msi(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + pci_disable_msix(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + } + iounmap(ioa_cfg->hdw_dma_regs); pci_release_regions(pdev); ipr_free_mem(ioa_cfg); @@ -8430,7 +8854,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; dma_addr_t dma_addr; - int i; + int i, entries_each_hrrq, hrrq_id = 0; ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, sizeof(struct ipr_cmnd), 512, 0); @@ -8446,6 +8870,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + if (ioa_cfg->hrrq_num > 1) { + if (i == 0) { + entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = + (entries_each_hrrq - 1); + } else { + entries_each_hrrq = + IPR_NUM_BASE_CMD_BLKS/ + (ioa_cfg->hrrq_num - 1); + ioa_cfg->hrrq[i].min_cmd_id = + IPR_NUM_INTERNAL_CMD_BLKS + + (i - 1) * entries_each_hrrq; + ioa_cfg->hrrq[i].max_cmd_id = + (IPR_NUM_INTERNAL_CMD_BLKS + + i * entries_each_hrrq - 1); + } + } else { + entries_each_hrrq = IPR_NUM_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1); + } + ioa_cfg->hrrq[i].size = entries_each_hrrq; + } + + BUG_ON(ioa_cfg->hrrq_num == 0); + + i = IPR_NUM_CMD_BLKS - + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1; + if (i > 0) { + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i; + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i; + } + for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); @@ -8484,7 +8943,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) ipr_cmd->sense_buffer_dma = dma_addr + offsetof(struct ipr_cmnd, sense_buffer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id; + ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id]; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id) + hrrq_id++; } return 0; @@ -8516,6 +8979,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); + + if (!ioa_cfg->target_ids || !ioa_cfg->array_ids + || !ioa_cfg->vset_ids) + goto out_free_res_entries; } for (i = 0; i < ioa_cfg->max_devs_supported; i++) { @@ -8530,15 +8997,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) if (!ioa_cfg->vpd_cbs) goto out_free_res_entries; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q); + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q); + spin_lock_init(&ioa_cfg->hrrq[i]._lock); + if (i == 0) + ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock; + else + ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock; + } + if (ipr_alloc_cmd_blks(ioa_cfg)) goto out_free_vpd_cbs; - ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev, - sizeof(u32) * IPR_NUM_CMD_BLKS, - &ioa_cfg->host_rrq_dma); - - if (!ioa_cfg->host_rrq) - goto out_ipr_free_cmd_blocks; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + &ioa_cfg->hrrq[i].host_rrq_dma); + + if (!ioa_cfg->hrrq[i].host_rrq) { + while (--i > 0) + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + goto out_ipr_free_cmd_blocks; + } + ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg; + } ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, @@ -8582,8 +9068,12 @@ out_free_hostrcb_dma: ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); out_free_host_rrq: - pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + } out_ipr_free_cmd_blocks: ipr_free_cmd_blks(ioa_cfg); out_free_vpd_cbs: @@ -8591,6 +9081,9 @@ out_free_vpd_cbs: ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); out_free_res_entries: kfree(ioa_cfg->res_entries); + kfree(ioa_cfg->target_ids); + kfree(ioa_cfg->array_ids); + kfree(ioa_cfg->vset_ids); goto out; } @@ -8638,15 +9131,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->doorbell = IPR_DOORBELL; sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER); sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL); - sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL); - sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL); sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START); sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL); sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL); sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL); - INIT_LIST_HEAD(&ioa_cfg->free_q); - INIT_LIST_HEAD(&ioa_cfg->pending_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q); INIT_LIST_HEAD(&ioa_cfg->free_res_q); @@ -8724,6 +9213,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id) return NULL; } +static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) +{ + struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; + int i, err, vectors; + + for (i = 0; i < ARRAY_SIZE(entries); ++i) + entries[i].entry = i; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msix(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = entries[i].vector; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, err, vectors; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msi(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg) +{ + int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1; + + for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) { + snprintf(ioa_cfg->vectors_info[vec_idx].desc, n, + "host%d-%d", ioa_cfg->host->host_no, vec_idx); + ioa_cfg->vectors_info[vec_idx]. + desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0; + } +} + +static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, rc; + + for (i = 1; i < ioa_cfg->nvectors; i++) { + rc = request_irq(ioa_cfg->vectors_info[i].vec, + ipr_isr_mhrrq, + 0, + ioa_cfg->vectors_info[i].desc, + &ioa_cfg->hrrq[i]); + if (rc) { + while (--i >= 0) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + return rc; + } + } + return 0; +} + /** * ipr_test_intr - Handle the interrupt generated in ipr_test_msi(). * @pdev: PCI device struct @@ -8740,6 +9311,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp) unsigned long lock_flags = 0; irqreturn_t rc = IRQ_HANDLED; + dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ioa_cfg->msi_received = 1; @@ -8787,9 +9359,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->msi_received) { /* MSI test failed */ dev_info(&pdev->dev, "MSI test failed. Falling back to LSI.\n"); @@ -8806,8 +9378,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) return rc; } -/** - * ipr_probe_ioa - Allocates memory and does first stage of initialization + /* ipr_probe_ioa - Allocates memory and does first stage of initialization * @pdev: PCI device struct * @dev_id: PCI device id struct * @@ -8823,6 +9394,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, void __iomem *ipr_regs; int rc = PCIBIOS_SUCCESSFUL; volatile u32 mask, uproc, interrupts; + unsigned long lock_flags; ENTER; @@ -8918,17 +9490,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev, goto cleanup_nomem; } - /* Enable MSI style interrupts if they are supported. */ - if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) { + if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) { + dev_err(&pdev->dev, "The max number of MSIX is %d\n", + IPR_MAX_MSIX_VECTORS); + ipr_number_of_msix = IPR_MAX_MSIX_VECTORS; + } + + if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msix(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSIX; + else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msi(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSI; + else { + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + dev_info(&pdev->dev, "Cannot enable MSI.\n"); + } + + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { rc = ipr_test_msi(ioa_cfg, pdev); - if (rc == -EOPNOTSUPP) - pci_disable_msi(pdev); + if (rc == -EOPNOTSUPP) { + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + pci_disable_msi(pdev); + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + pci_disable_msix(pdev); + } + + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + } else if (rc) goto out_msi_disable; - else - dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq); - } else if (ipr_debug) - dev_info(&pdev->dev, "Cannot enable MSI.\n"); + else { + if (ioa_cfg->intr_flag == IPR_USE_MSI) + dev_info(&pdev->dev, + "Request for %d MSIs succeeded with starting IRQ: %d\n", + ioa_cfg->nvectors, pdev->irq); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + dev_info(&pdev->dev, + "Request for %d MSIXs succeeded.", + ioa_cfg->nvectors); + } + } + + ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors, + (unsigned int)num_online_cpus(), + (unsigned int)IPR_MAX_HRRQ_NUM); /* Save away PCI config space for use following IOA reset */ rc = pci_save_state(pdev); @@ -8975,11 +9586,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (interrupts & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - rc = request_irq(pdev->irq, ipr_isr, - ioa_cfg->msi_received ? 0 : IRQF_SHARED, - IPR_NAME, ioa_cfg); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->intr_flag == IPR_USE_MSI + || ioa_cfg->intr_flag == IPR_USE_MSIX) { + name_msi_vectors(ioa_cfg); + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr, + 0, + ioa_cfg->vectors_info[0].desc, + &ioa_cfg->hrrq[0]); + if (!rc) + rc = ipr_request_other_msi_irqs(ioa_cfg); + } else { + rc = request_irq(pdev->irq, ipr_isr, + IRQF_SHARED, + IPR_NAME, &ioa_cfg->hrrq[0]); + } if (rc) { dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n", pdev->irq, rc); @@ -9004,7 +9628,10 @@ out: cleanup_nolog: ipr_free_mem(ioa_cfg); out_msi_disable: - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI) + pci_disable_msi(pdev); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + pci_disable_msix(pdev); cleanup_nomem: iounmap(ipr_regs); out_release_regions: @@ -9074,6 +9701,7 @@ static void __ipr_remove(struct pci_dev *pdev) { unsigned long host_lock_flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; ENTER; spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); @@ -9083,6 +9711,12 @@ static void __ipr_remove(struct pci_dev *pdev) spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].removing_ioa = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_bringdown(ioa_cfg, IPR_SHUTDOWN_NORMAL); spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); @@ -9138,7 +9772,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -9185,6 +9819,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN); ioa_cfg->allow_ml_add_del = 1; ioa_cfg->host->max_channel = IPR_VSET_BUS; + ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + schedule_work(&ioa_cfg->work_q); return 0; } @@ -9203,8 +9848,16 @@ static void ipr_shutdown(struct pci_dev *pdev) { struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); unsigned long lock_flags = 0; + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + ioa_cfg->iopoll_weight = 0; + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); @@ -9277,6 +9930,8 @@ static struct pci_device_id ipr_pci_table[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 }, @@ -9290,6 +9945,14 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); @@ -9316,9 +9979,7 @@ static struct pci_driver ipr_driver = { **/ static void ipr_halt_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -9340,7 +10001,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf) list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) { spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - if (!ioa_cfg->allow_cmds) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); continue; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c8a137f..21a6ff1 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,14 +32,15 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/blk-iopoll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> /* * Literals */ -#define IPR_DRIVER_VERSION "2.5.4" -#define IPR_DRIVER_DATE "(July 11, 2012)" +#define IPR_DRIVER_VERSION "2.6.0" +#define IPR_DRIVER_DATE "(November 16, 2012)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding @@ -82,6 +83,7 @@ #define IPR_SUBS_DEV_ID_57B4 0x033B #define IPR_SUBS_DEV_ID_57B2 0x035F +#define IPR_SUBS_DEV_ID_57C0 0x0352 #define IPR_SUBS_DEV_ID_57C3 0x0353 #define IPR_SUBS_DEV_ID_57C4 0x0354 #define IPR_SUBS_DEV_ID_57C6 0x0357 @@ -94,6 +96,10 @@ #define IPR_SUBS_DEV_ID_574D 0x0356 #define IPR_SUBS_DEV_ID_57C8 0x035D +#define IPR_SUBS_DEV_ID_57D5 0x03FB +#define IPR_SUBS_DEV_ID_57D6 0x03FC +#define IPR_SUBS_DEV_ID_57D7 0x03FF +#define IPR_SUBS_DEV_ID_57D8 0x03FE #define IPR_NAME "ipr" /* @@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR) * Misc literals */ #define IPR_NUM_IOADL_ENTRIES IPR_MAX_SGLIST +#define IPR_MAX_MSIX_VECTORS 0x5 +#define IPR_MAX_HRRQ_NUM 0x10 +#define IPR_INIT_HRRQ 0x0 /* * Adapter interface types @@ -404,7 +413,7 @@ struct ipr_config_table_entry64 { __be64 dev_id; __be64 lun; __be64 lun_wwn[2]; -#define IPR_MAX_RES_PATH_LENGTH 24 +#define IPR_MAX_RES_PATH_LENGTH 48 __be64 res_path; struct ipr_std_inq_data std_inq_data; u8 reserved2[4]; @@ -459,9 +468,40 @@ struct ipr_supported_device { u8 reserved2[16]; }__attribute__((packed, aligned (4))); +struct ipr_hrr_queue { + struct ipr_ioa_cfg *ioa_cfg; + __be32 *host_rrq; + dma_addr_t host_rrq_dma; +#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc +#define IPR_HRRQ_RESP_BIT_SET 0x00000002 +#define IPR_HRRQ_TOGGLE_BIT 0x00000001 +#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 +#define IPR_ID_HRRQ_SELE_ENABLE 0x02 + volatile __be32 *hrrq_start; + volatile __be32 *hrrq_end; + volatile __be32 *hrrq_curr; + + struct list_head hrrq_free_q; + struct list_head hrrq_pending_q; + spinlock_t _lock; + spinlock_t *lock; + + volatile u32 toggle_bit; + u32 size; + u32 min_cmd_id; + u32 max_cmd_id; + u8 allow_interrupts:1; + u8 ioa_is_dead:1; + u8 allow_cmds:1; + u8 removing_ioa:1; + + struct blk_iopoll iopoll; +}; + /* Command packet structure */ struct ipr_cmd_pkt { - __be16 reserved; /* Reserved by IOA */ + u8 reserved; /* Reserved by IOA */ + u8 hrrq_id; u8 request_type; #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 @@ -1022,6 +1062,10 @@ struct ipr_hostrcb64_fabric_desc { struct ipr_hostrcb64_config_element elem[1]; }__attribute__((packed, aligned (8))); +#define for_each_hrrq(hrrq, ioa_cfg) \ + for (hrrq = (ioa_cfg)->hrrq; \ + hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++) + #define for_each_fabric_cfg(fabric, cfg) \ for (cfg = (fabric)->elem; \ cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \ @@ -1308,6 +1352,7 @@ struct ipr_chip_cfg_t { u16 max_cmds; u8 cache_line_size; u8 clear_isr; + u32 iopoll_weight; struct ipr_interrupt_offsets regs; }; @@ -1317,6 +1362,7 @@ struct ipr_chip_t { u16 intr_type; #define IPR_USE_LSI 0x00 #define IPR_USE_MSI 0x01 +#define IPR_USE_MSIX 0x02 u16 sis_type; #define IPR_SIS32 0x00 #define IPR_SIS64 0x01 @@ -1375,13 +1421,10 @@ struct ipr_ioa_cfg { struct list_head queue; - u8 allow_interrupts:1; u8 in_reset_reload:1; u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; - u8 ioa_is_dead:1; u8 dump_taken:1; - u8 allow_cmds:1; u8 allow_ml_add_del:1; u8 needs_hard_reset:1; u8 dual_raid:1; @@ -1413,21 +1456,7 @@ struct ipr_ioa_cfg { char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" struct ipr_trace_entry *trace; - u32 trace_index:IPR_NUM_TRACE_INDEX_BITS; - - /* - * Queue for free command blocks - */ - char ipr_free_label[8]; -#define IPR_FREEQ_LABEL "free-q" - struct list_head free_q; - - /* - * Queue for command blocks outstanding to the adapter - */ - char ipr_pending_label[8]; -#define IPR_PENDQ_LABEL "pend-q" - struct list_head pending_q; + atomic_t trace_index; char cfg_table_start[8]; #define IPR_CFG_TBL_START "cfg" @@ -1452,16 +1481,10 @@ struct ipr_ioa_cfg { struct list_head hostrcb_free_q; struct list_head hostrcb_pending_q; - __be32 *host_rrq; - dma_addr_t host_rrq_dma; -#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc -#define IPR_HRRQ_RESP_BIT_SET 0x00000002 -#define IPR_HRRQ_TOGGLE_BIT 0x00000001 -#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 - volatile __be32 *hrrq_start; - volatile __be32 *hrrq_end; - volatile __be32 *hrrq_curr; - volatile u32 toggle_bit; + struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM]; + u32 hrrq_num; + atomic_t hrrq_index; + u16 identify_hrrq_index; struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES]; @@ -1507,6 +1530,17 @@ struct ipr_ioa_cfg { u32 max_cmds; struct ipr_cmnd **ipr_cmnd_list; dma_addr_t *ipr_cmnd_list_dma; + + u16 intr_flag; + unsigned int nvectors; + + struct { + unsigned short vec; + char desc[22]; + } vectors_info[IPR_MAX_MSIX_VECTORS]; + + u32 iopoll_weight; + }; /* struct ipr_ioa_cfg */ struct ipr_cmnd { @@ -1544,6 +1578,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; @@ -1717,7 +1752,8 @@ struct ipr_ucode_image_header { if (ipr_is_device(hostrcb)) { \ if ((hostrcb)->ioa_cfg->sis64) { \ printk(KERN_ERR IPR_NAME ": %s: " fmt, \ - ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \ + ipr_format_res_path(hostrcb->ioa_cfg, \ + hostrcb->hcam.u.error64.fd_res_path, \ hostrcb->rp_buffer, \ sizeof(hostrcb->rp_buffer)), \ __VA_ARGS__); \ diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index fcb9d0b..09c81b2 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -1381,10 +1381,10 @@ static void fc_fcp_timeout(unsigned long data) fsp->state |= FC_SRB_FCP_PROCESSING_TMO; - if (fsp->state & FC_SRB_RCV_STATUS) - fc_fcp_complete_locked(fsp); - else if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) + if (rpriv->flags & FC_RP_FLAGS_REC_SUPPORTED) fc_fcp_rec(fsp); + else if (fsp->state & FC_SRB_RCV_STATUS) + fc_fcp_complete_locked(fsp); else fc_fcp_recovery(fsp, FC_TIMED_OUT); fsp->state &= ~FC_SRB_FCP_PROCESSING_TMO; diff --git a/drivers/scsi/libfc/fc_libfc.h b/drivers/scsi/libfc/fc_libfc.h index c2830cc..b74189d 100644 --- a/drivers/scsi/libfc/fc_libfc.h +++ b/drivers/scsi/libfc/fc_libfc.h @@ -41,25 +41,25 @@ extern unsigned int fc_debug_logging; #define FC_LIBFC_DBG(fmt, args...) \ FC_CHECK_LOGGING(FC_LIBFC_LOGGING, \ - printk(KERN_INFO "libfc: " fmt, ##args)) + pr_info("libfc: " fmt, ##args)) #define FC_LPORT_DBG(lport, fmt, args...) \ FC_CHECK_LOGGING(FC_LPORT_LOGGING, \ - printk(KERN_INFO "host%u: lport %6.6x: " fmt, \ - (lport)->host->host_no, \ - (lport)->port_id, ##args)) + pr_info("host%u: lport %6.6x: " fmt, \ + (lport)->host->host_no, \ + (lport)->port_id, ##args)) -#define FC_DISC_DBG(disc, fmt, args...) \ - FC_CHECK_LOGGING(FC_DISC_LOGGING, \ - printk(KERN_INFO "host%u: disc: " fmt, \ - fc_disc_lport(disc)->host->host_no, \ - ##args)) +#define FC_DISC_DBG(disc, fmt, args...) \ + FC_CHECK_LOGGING(FC_DISC_LOGGING, \ + pr_info("host%u: disc: " fmt, \ + fc_disc_lport(disc)->host->host_no, \ + ##args)) #define FC_RPORT_ID_DBG(lport, port_id, fmt, args...) \ FC_CHECK_LOGGING(FC_RPORT_LOGGING, \ - printk(KERN_INFO "host%u: rport %6.6x: " fmt, \ - (lport)->host->host_no, \ - (port_id), ##args)) + pr_info("host%u: rport %6.6x: " fmt, \ + (lport)->host->host_no, \ + (port_id), ##args)) #define FC_RPORT_DBG(rdata, fmt, args...) \ FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args) @@ -70,13 +70,13 @@ extern unsigned int fc_debug_logging; if ((pkt)->seq_ptr) { \ struct fc_exch *_ep = NULL; \ _ep = fc_seq_exch((pkt)->seq_ptr); \ - printk(KERN_INFO "host%u: fcp: %6.6x: " \ + pr_info("host%u: fcp: %6.6x: " \ "xid %04x-%04x: " fmt, \ (pkt)->lp->host->host_no, \ (pkt)->rport->port_id, \ (_ep)->oxid, (_ep)->rxid, ##args); \ } else { \ - printk(KERN_INFO "host%u: fcp: %6.6x: " fmt, \ + pr_info("host%u: fcp: %6.6x: " fmt, \ (pkt)->lp->host->host_no, \ (pkt)->rport->port_id, ##args); \ } \ @@ -84,14 +84,14 @@ extern unsigned int fc_debug_logging; #define FC_EXCH_DBG(exch, fmt, args...) \ FC_CHECK_LOGGING(FC_EXCH_LOGGING, \ - printk(KERN_INFO "host%u: xid %4x: " fmt, \ - (exch)->lp->host->host_no, \ - exch->xid, ##args)) + pr_info("host%u: xid %4x: " fmt, \ + (exch)->lp->host->host_no, \ + exch->xid, ##args)) #define FC_SCSI_DBG(lport, fmt, args...) \ FC_CHECK_LOGGING(FC_SCSI_LOGGING, \ - printk(KERN_INFO "host%u: scsi: " fmt, \ - (lport)->host->host_no, ##args)) + pr_info("host%u: scsi: " fmt, \ + (lport)->host->host_no, ##args)) /* * FC-4 Providers. diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index 83aa1ef..d518d17 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -582,7 +582,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp) static void fc_rport_error_retry(struct fc_rport_priv *rdata, struct fc_frame *fp) { - unsigned long delay = FC_DEF_E_D_TOV; + unsigned long delay = msecs_to_jiffies(FC_DEF_E_D_TOV); /* make sure this isn't an FC_EX_CLOSED error, never retry those */ if (PTR_ERR(fp) == -FC_EX_CLOSED) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df4c13a..7706c99 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -466,11 +466,13 @@ enum intr_type_t { MSIX, }; +#define LPFC_CT_CTX_MAX 64 struct unsol_rcv_ct_ctx { uint32_t ctxt_id; uint32_t SID; - uint32_t flags; -#define UNSOL_VALID 0x00000001 + uint32_t valid; +#define UNSOL_INVALID 0 +#define UNSOL_VALID 1 uint16_t oxid; uint16_t rxid; }; @@ -750,6 +752,15 @@ struct lpfc_hba { void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for PCI BAR2 */ + void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for + PCI BAR0 with dual-ULP support */ + void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for + PCI BAR2 with dual-ULP support */ + void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for + PCI BAR4 with dual-ULP support */ +#define PCI_64BIT_BAR0 0 +#define PCI_64BIT_BAR2 2 +#define PCI_64BIT_BAR4 4 void __iomem *MBslimaddr; /* virtual address for mbox cmds */ void __iomem *HAregaddr; /* virtual address for host attn reg */ void __iomem *CAregaddr; /* virtual address for chip attn reg */ @@ -938,7 +949,7 @@ struct lpfc_hba { spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; - struct unsol_rcv_ct_ctx ct_ctx[64]; + struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; uint32_t ctx_idx; uint8_t menlo_flag; /* menlo generic flags */ diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index f7368eb..32d5683 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irqsave(&phba->ct_ev_lock, flags); if (phba->sli_rev == LPFC_SLI_REV4) { evt_dat->immed_dat = phba->ctx_idx; - phba->ctx_idx = (phba->ctx_idx + 1) % 64; + phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX; /* Provide warning for over-run of the ct_ctx array */ - if (phba->ct_ctx[evt_dat->immed_dat].flags & + if (phba->ct_ctx[evt_dat->immed_dat].valid == UNSOL_VALID) lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, "2717 CT context array entry " @@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, piocbq->iocb.unsli3.rcvsli3.ox_id; phba->ct_ctx[evt_dat->immed_dat].SID = piocbq->iocb.un.rcvels.remoteID; - phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID; + phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID; } else evt_dat->immed_dat = piocbq->iocb.ulpContext; @@ -1013,6 +1013,47 @@ error_ct_unsol_exit: } /** + * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane + * @phba: Pointer to HBA context object. + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function handles abort to the CT command toward management plane + * for SLI4 port. + * + * If the pending context of a CT command to management plane present, clears + * such context and returns 1 for handled; otherwise, it returns 0 indicating + * no context exists. + **/ +int +lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header fc_hdr; + struct fc_frame_header *fc_hdr_ptr = &fc_hdr; + int ctx_idx, handled = 0; + uint16_t oxid, rxid; + uint32_t sid; + + memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); + sid = sli4_sid_from_fc_hdr(fc_hdr_ptr); + oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id); + rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id); + + for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) { + if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID) + continue; + if (phba->ct_ctx[ctx_idx].rxid != rxid) + continue; + if (phba->ct_ctx[ctx_idx].oxid != oxid) + continue; + if (phba->ct_ctx[ctx_idx].SID != sid) + continue; + phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID; + handled = 1; + } + return handled; +} + +/** * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command * @job: SET_EVENT fc_bsg_job **/ @@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, icmd->ulpClass = CLASS3; if (phba->sli_rev == LPFC_SLI_REV4) { /* Do not issue unsol response if oxid not marked as valid */ - if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) { + if (phba->ct_ctx[tag].valid != UNSOL_VALID) { rc = IOCB_ERROR; goto issue_ct_rsp_exit; } @@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; /* The exchange is done, mark the entry as invalid */ - phba->ct_ctx[tag].flags &= ~UNSOL_VALID; + phba->ct_ctx[tag].valid = UNSOL_INVALID; } else icmd->ulpContext = (ushort) tag; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 69d66e3..76ca65d 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, - struct lpfc_iocbq *); +int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); @@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *); int lpfc_bsg_timeout(struct fc_bsg_job *); int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); +int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 65f9fb6..7bff3a1 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, } /** - * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort + * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler * @phba: Pointer to HBA context object. - * @pring: Pointer to the driver internal I/O ring. - * @piocbq: Pointer to the IOCBQ. + * @dmabuf: pointer to a dmabuf that describes the FC sequence * - * This function serves as the default handler for the sli4 unsolicited - * abort event. It shall be invoked when there is no application interface - * registered unsolicited abort handler. This handler does nothing but - * just simply releases the dma buffer used by the unsol abort event. + * This function serves as the upper level protocol abort handler for CT + * protocol. + * + * Return 1 if abort has been handled, 0 otherwise. **/ -void -lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocbq) +int +lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) { - IOCB_t *icmd = &piocbq->iocb; - struct lpfc_dmabuf *bdeBuf; - uint32_t size; + int handled; - /* Forward abort event to any process registered to receive ct event */ - if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0) - return; + /* CT upper level goes through BSG */ + handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf); - /* If there is no BDE associated with IOCB, there is nothing to do */ - if (icmd->ulpBdeCount == 0) - return; - bdeBuf = piocbq->context2; - piocbq->context2 = NULL; - size = icmd->un.cont64[0].tus.f.bdeSize; - lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size); - lpfc_in_buf_free(phba, bdeBuf); + return handled; } static void diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b9440de..08d156a 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID) { + /* Continue forever if plogi to */ + /* the nameserver fails */ + maxretry = 0; + delay = 100; + } retry = 1; break; } @@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_nodelist *ndlp; struct ls_rjt stat; uint32_t *payload; - uint32_t cmd, did, newnode, rjt_err = 0; + uint32_t cmd, did, newnode; + uint8_t rjt_exp, rjt_err = 0; IOCB_t *icmd = &elsiocb->iocb; if (!vport || !(elsiocb->context2)) @@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* If Nport discovery is delayed, reject PLOGIs */ if (vport->fc_flag & FC_DISC_DELAYED) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } if (vport->port_state < LPFC_DISC_AUTH) { if (!(phba->pport->fc_flag & FC_PT2PT) || (phba->pport->fc_flag & FC_PT2PT_PLOGI)) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } /* We get here, and drop thru, if we are PT2PT with @@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); @@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); @@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvADISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPDISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPRLI++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); @@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (newnode) lpfc_nlp_put(ndlp); break; + case ELS_CMD_REC: + /* receive this due to exchange closed */ + rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_INVALID_OX_RX; + break; default: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", @@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Unsupported ELS command, reject */ rjt_err = LSRJT_CMD_UNSUPPORTED; + rjt_exp = LSEXP_NOTHING_MORE; /* Unknown ELS command <elsCmd> received from NPORT <did> */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, @@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (rjt_err) { memset(&stat, 0, sizeof(stat)); stat.un.b.lsRjtRsnCode = rjt_err; - stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + stat.un.b.lsRjtRsnCodeExp = rjt_exp; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, NULL); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 7398ca8..e8c4760 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -538,6 +538,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10000000 #define ELS_CMD_TEST 0x11000000 #define ELS_CMD_RRQ 0x12000000 +#define ELS_CMD_REC 0x13000000 #define ELS_CMD_PRLI 0x20100014 #define ELS_CMD_PRLO 0x21100014 #define ELS_CMD_PRLO_ACC 0x02100014 @@ -574,6 +575,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10 #define ELS_CMD_TEST 0x11 #define ELS_CMD_RRQ 0x12 +#define ELS_CMD_REC 0x13 #define ELS_CMD_PRLI 0x14001020 #define ELS_CMD_PRLO 0x14001021 #define ELS_CMD_PRLO_ACC 0x14001002 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index a47cfbd..6e93b88 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -106,6 +106,7 @@ struct lpfc_sli_intf { #define LPFC_SLI4_MB_WORD_COUNT 64 #define LPFC_MAX_MQ_PAGE 8 +#define LPFC_MAX_WQ_PAGE_V0 4 #define LPFC_MAX_WQ_PAGE 8 #define LPFC_MAX_CQ_PAGE 4 #define LPFC_MAX_EQ_PAGE 8 @@ -703,24 +704,41 @@ struct lpfc_register { * BAR0. The offsets are the same so the driver must account for * any base address difference. */ -#define LPFC_RQ_DOORBELL 0x00A0 -#define lpfc_rq_doorbell_num_posted_SHIFT 16 -#define lpfc_rq_doorbell_num_posted_MASK 0x3FFF -#define lpfc_rq_doorbell_num_posted_WORD word0 -#define lpfc_rq_doorbell_id_SHIFT 0 -#define lpfc_rq_doorbell_id_MASK 0xFFFF -#define lpfc_rq_doorbell_id_WORD word0 - -#define LPFC_WQ_DOORBELL 0x0040 -#define lpfc_wq_doorbell_num_posted_SHIFT 24 -#define lpfc_wq_doorbell_num_posted_MASK 0x00FF -#define lpfc_wq_doorbell_num_posted_WORD word0 -#define lpfc_wq_doorbell_index_SHIFT 16 -#define lpfc_wq_doorbell_index_MASK 0x00FF -#define lpfc_wq_doorbell_index_WORD word0 -#define lpfc_wq_doorbell_id_SHIFT 0 -#define lpfc_wq_doorbell_id_MASK 0xFFFF -#define lpfc_wq_doorbell_id_WORD word0 +#define LPFC_ULP0_RQ_DOORBELL 0x00A0 +#define LPFC_ULP1_RQ_DOORBELL 0x00C0 +#define lpfc_rq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_rq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_rq_db_list_fm_num_posted_WORD word0 +#define lpfc_rq_db_list_fm_index_SHIFT 16 +#define lpfc_rq_db_list_fm_index_MASK 0x00FF +#define lpfc_rq_db_list_fm_index_WORD word0 +#define lpfc_rq_db_list_fm_id_SHIFT 0 +#define lpfc_rq_db_list_fm_id_MASK 0xFFFF +#define lpfc_rq_db_list_fm_id_WORD word0 +#define lpfc_rq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_rq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_rq_db_ring_fm_num_posted_WORD word0 +#define lpfc_rq_db_ring_fm_id_SHIFT 0 +#define lpfc_rq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_rq_db_ring_fm_id_WORD word0 + +#define LPFC_ULP0_WQ_DOORBELL 0x0040 +#define LPFC_ULP1_WQ_DOORBELL 0x0060 +#define lpfc_wq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_wq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_wq_db_list_fm_num_posted_WORD word0 +#define lpfc_wq_db_list_fm_index_SHIFT 16 +#define lpfc_wq_db_list_fm_index_MASK 0x00FF +#define lpfc_wq_db_list_fm_index_WORD word0 +#define lpfc_wq_db_list_fm_id_SHIFT 0 +#define lpfc_wq_db_list_fm_id_MASK 0xFFFF +#define lpfc_wq_db_list_fm_id_WORD word0 +#define lpfc_wq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_wq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_wq_db_ring_fm_num_posted_WORD word0 +#define lpfc_wq_db_ring_fm_id_SHIFT 0 +#define lpfc_wq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_wq_db_ring_fm_id_WORD word0 #define LPFC_EQCQ_DOORBELL 0x0120 #define lpfc_eqcq_doorbell_se_SHIFT 31 @@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create { struct { /* Version 0 Request */ uint32_t word0; #define lpfc_mbx_wq_create_num_pages_SHIFT 0 -#define lpfc_mbx_wq_create_num_pages_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_num_pages_MASK 0x000000FF #define lpfc_mbx_wq_create_num_pages_WORD word0 +#define lpfc_mbx_wq_create_dua_SHIFT 8 +#define lpfc_mbx_wq_create_dua_MASK 0x00000001 +#define lpfc_mbx_wq_create_dua_WORD word0 #define lpfc_mbx_wq_create_cq_id_SHIFT 16 #define lpfc_mbx_wq_create_cq_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_cq_id_WORD word0 - struct dma_address page[LPFC_MAX_WQ_PAGE]; + struct dma_address page[LPFC_MAX_WQ_PAGE_V0]; + uint32_t word9; +#define lpfc_mbx_wq_create_bua_SHIFT 0 +#define lpfc_mbx_wq_create_bua_MASK 0x00000001 +#define lpfc_mbx_wq_create_bua_WORD word9 +#define lpfc_mbx_wq_create_ulp_num_SHIFT 8 +#define lpfc_mbx_wq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_wq_create_ulp_num_WORD word9 } request; struct { /* Version 1 Request */ uint32_t word0; /* Word 0 is the same as in v0 */ @@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_q_id_SHIFT 0 #define lpfc_mbx_wq_create_q_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_wq_create_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_bar_set_WORD word2 +#define WQ_PCI_BAR_0_AND_1 0x00 +#define WQ_PCI_BAR_2_AND_3 0x01 +#define WQ_PCI_BAR_4_AND_5 0x02 +#define lpfc_mbx_wq_create_db_format_SHIFT 16 +#define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_db_format_WORD word2 } response; } u; }; @@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create { #define lpfc_mbx_rq_create_num_pages_SHIFT 0 #define lpfc_mbx_rq_create_num_pages_MASK 0x0000FFFF #define lpfc_mbx_rq_create_num_pages_WORD word0 +#define lpfc_mbx_rq_create_dua_SHIFT 16 +#define lpfc_mbx_rq_create_dua_MASK 0x00000001 +#define lpfc_mbx_rq_create_dua_WORD word0 +#define lpfc_mbx_rq_create_bqu_SHIFT 17 +#define lpfc_mbx_rq_create_bqu_MASK 0x00000001 +#define lpfc_mbx_rq_create_bqu_WORD word0 +#define lpfc_mbx_rq_create_ulp_num_SHIFT 24 +#define lpfc_mbx_rq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_rq_create_ulp_num_WORD word0 struct rq_context context; struct dma_address page[LPFC_MAX_WQ_PAGE]; } request; struct { uint32_t word0; -#define lpfc_mbx_rq_create_q_id_SHIFT 0 -#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF -#define lpfc_mbx_rq_create_q_id_WORD word0 +#define lpfc_mbx_rq_create_q_id_SHIFT 0 +#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_rq_create_bar_set_SHIFT 0 +#define lpfc_mbx_rq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_bar_set_WORD word2 +#define lpfc_mbx_rq_create_db_format_SHIFT 16 +#define lpfc_mbx_rq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_db_format_WORD word2 } response; } u; }; @@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info { } u; }; +struct lpfc_mbx_query_fw_config { + struct mbox_header header; + struct { + uint32_t config_number; +#define LPFC_FC_FCOE 0x00000007 + uint32_t asic_revision; + uint32_t physical_port; + uint32_t function_mode; +#define LPFC_FCOE_INI_MODE 0x00000040 +#define LPFC_FCOE_TGT_MODE 0x00000080 +#define LPFC_DUA_MODE 0x00000800 + uint32_t ulp0_mode; +#define LPFC_ULP_FCOE_INIT_MODE 0x00000040 +#define LPFC_ULP_FCOE_TGT_MODE 0x00000080 + uint32_t ulp0_nap_words[12]; + uint32_t ulp1_mode; + uint32_t ulp1_nap_words[12]; + uint32_t function_capabilities; + uint32_t cqid_base; + uint32_t cqid_tot; + uint32_t eqid_base; + uint32_t eqid_tot; + uint32_t ulp0_nap2_words[2]; + uint32_t ulp1_nap2_words[2]; + } rsp; +}; + struct lpfc_id_range { uint32_t word5; #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0 @@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl { #define lpfc_mbx_redisc_fcf_index_WORD word12 }; -struct lpfc_mbx_query_fw_cfg { - struct mbox_header header; - uint32_t config_number; - uint32_t asic_rev; - uint32_t phys_port; - uint32_t function_mode; -/* firmware Function Mode */ -#define lpfc_function_mode_toe_SHIFT 0 -#define lpfc_function_mode_toe_MASK 0x00000001 -#define lpfc_function_mode_toe_WORD function_mode -#define lpfc_function_mode_nic_SHIFT 1 -#define lpfc_function_mode_nic_MASK 0x00000001 -#define lpfc_function_mode_nic_WORD function_mode -#define lpfc_function_mode_rdma_SHIFT 2 -#define lpfc_function_mode_rdma_MASK 0x00000001 -#define lpfc_function_mode_rdma_WORD function_mode -#define lpfc_function_mode_vm_SHIFT 3 -#define lpfc_function_mode_vm_MASK 0x00000001 -#define lpfc_function_mode_vm_WORD function_mode -#define lpfc_function_mode_iscsi_i_SHIFT 4 -#define lpfc_function_mode_iscsi_i_MASK 0x00000001 -#define lpfc_function_mode_iscsi_i_WORD function_mode -#define lpfc_function_mode_iscsi_t_SHIFT 5 -#define lpfc_function_mode_iscsi_t_MASK 0x00000001 -#define lpfc_function_mode_iscsi_t_WORD function_mode -#define lpfc_function_mode_fcoe_i_SHIFT 6 -#define lpfc_function_mode_fcoe_i_MASK 0x00000001 -#define lpfc_function_mode_fcoe_i_WORD function_mode -#define lpfc_function_mode_fcoe_t_SHIFT 7 -#define lpfc_function_mode_fcoe_t_MASK 0x00000001 -#define lpfc_function_mode_fcoe_t_WORD function_mode -#define lpfc_function_mode_dal_SHIFT 8 -#define lpfc_function_mode_dal_MASK 0x00000001 -#define lpfc_function_mode_dal_WORD function_mode -#define lpfc_function_mode_lro_SHIFT 9 -#define lpfc_function_mode_lro_MASK 0x00000001 -#define lpfc_function_mode_lro_WORD function_mode -#define lpfc_function_mode_flex10_SHIFT 10 -#define lpfc_function_mode_flex10_MASK 0x00000001 -#define lpfc_function_mode_flex10_WORD function_mode -#define lpfc_function_mode_ncsi_SHIFT 11 -#define lpfc_function_mode_ncsi_MASK 0x00000001 -#define lpfc_function_mode_ncsi_WORD function_mode -}; - /* Status field for embedded SLI_CONFIG mailbox command */ #define STATUS_SUCCESS 0x0 #define STATUS_FAILED 0x1 @@ -2965,7 +3003,7 @@ struct lpfc_mqe { struct lpfc_mbx_read_config rd_config; struct lpfc_mbx_request_features req_ftrs; struct lpfc_mbx_post_hdr_tmpl hdr_tmpl; - struct lpfc_mbx_query_fw_cfg query_fw_cfg; + struct lpfc_mbx_query_fw_config query_fw_cfg; struct lpfc_mbx_supp_pages supp_pages; struct lpfc_mbx_pc_sli4_params sli4_params; struct lpfc_mbx_get_sli4_parameters get_sli4_parameters; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 7de4ef14..314b4f6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6229,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type) phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET; phba->sli4_hba.RQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_RQ_DOORBELL; phba->sli4_hba.WQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_WQ_DOORBELL; phba->sli4_hba.EQCQDBregaddr = phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL; phba->sli4_hba.MQDBregaddr = @@ -6285,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) return -ENODEV; phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_RQ_DOORBELL); phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_WQ_DOORBELL); phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL); phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + @@ -6983,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } + if (phba->pci_bar0_memmap_p) { + iounmap(phba->pci_bar0_memmap_p); + phba->pci_bar0_memmap_p = NULL; + } + if (phba->pci_bar2_memmap_p) { + iounmap(phba->pci_bar2_memmap_p); + phba->pci_bar2_memmap_p = NULL; + } + if (phba->pci_bar4_memmap_p) { + iounmap(phba->pci_bar4_memmap_p); + phba->pci_bar4_memmap_p = NULL; + } + /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7046,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; + uint32_t shdr_status, shdr_add_status; + union lpfc_sli4_cfg_shdr *shdr; + LPFC_MBOXQ_t *mboxq; + uint32_t length; + + /* Check for dual-ULP support */ + mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mboxq) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3249 Unable to allocate memory for " + "QUERY_FW_CFG mailbox command\n"); + return -ENOMEM; + } + length = (sizeof(struct lpfc_mbx_query_fw_config) - + sizeof(struct lpfc_sli4_cfg_mhdr)); + lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, + LPFC_MBOX_OPCODE_QUERY_FW_CFG, + length, LPFC_SLI4_MBX_EMBED); + + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + + shdr = (union lpfc_sli4_cfg_shdr *) + &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; + shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + if (shdr_status || shdr_add_status || rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3250 QUERY_FW_CFG mailbox failed with status " + "x%x add_status x%x, mbx status x%x\n", + shdr_status, shdr_add_status, rc); + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); + rc = -ENXIO; + goto out_error; + } + + phba->sli4_hba.fw_func_mode = + mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode; + phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode; + phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, " + "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode, + phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode); + + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); /* * Set up HBA Event Queues (EQs) @@ -7660,78 +7724,6 @@ out: } /** - * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands - * @phba: pointer to lpfc hba data structure. - * @cnt: number of nop mailbox commands to send. - * - * This routine is invoked to send a number @cnt of NOP mailbox command and - * wait for each command to complete. - * - * Return: the number of NOP mailbox command completed. - **/ -static int -lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt) -{ - LPFC_MBOXQ_t *mboxq; - int length, cmdsent; - uint32_t mbox_tmo; - uint32_t rc = 0; - uint32_t shdr_status, shdr_add_status; - union lpfc_sli4_cfg_shdr *shdr; - - if (cnt == 0) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2518 Requested to send 0 NOP mailbox cmd\n"); - return cnt; - } - - mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mboxq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2519 Unable to allocate memory for issuing " - "NOP mailbox command\n"); - return 0; - } - - /* Set up NOP SLI4_CONFIG mailbox-ioctl command */ - length = (sizeof(struct lpfc_mbx_nop) - - sizeof(struct lpfc_sli4_cfg_mhdr)); - - for (cmdsent = 0; cmdsent < cnt; cmdsent++) { - lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, - LPFC_MBOX_OPCODE_NOP, length, - LPFC_SLI4_MBX_EMBED); - if (!phba->sli4_hba.intr_enable) - rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - else { - mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq); - rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo); - } - if (rc == MBX_TIMEOUT) - break; - /* Check return status */ - shdr = (union lpfc_sli4_cfg_shdr *) - &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, - &shdr->response); - if (shdr_status || shdr_add_status || rc) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2520 NOP mailbox command failed " - "status x%x add_status x%x mbx " - "status x%x\n", shdr_status, - shdr_add_status, rc); - break; - } - } - - if (rc != MBX_TIMEOUT) - mempool_free(mboxq, phba->mbox_mem_pool); - - return cmdsent; -} - -/** * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space. * @phba: pointer to lpfc hba data structure. * @@ -8499,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba) } /** - * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization. - * @phba: pointer to lpfc hba data structure. - * - * This routine is invoked to unset the HBA device initialization steps to - * a device with SLI-4 interface spec. - **/ -static void -lpfc_sli4_unset_hba(struct lpfc_hba *phba) -{ - struct lpfc_vport *vport = phba->pport; - struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - - spin_lock_irq(shost->host_lock); - vport->load_flag |= FC_UNLOADING; - spin_unlock_irq(shost->host_lock); - - phba->pport->work_port_events = 0; - - /* Stop the SLI4 device port */ - lpfc_stop_port(phba); - - lpfc_sli4_disable_intr(phba); - - /* Reset SLI4 HBA FCoE function */ - lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); - - return; -} - -/** * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy * @phba: Pointer to HBA context object. * @@ -9591,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct Scsi_Host *shost = NULL; int error, ret; uint32_t cfg_mode, intr_mode; - int mcnt; int adjusted_fcp_io_channel; /* Allocate memory for HBA structure */ @@ -9680,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */ /* Now, trying to enable interrupt and bring up the device */ cfg_mode = phba->cfg_use_msi; - while (true) { - /* Put device to a known state before enabling interrupt */ - lpfc_stop_port(phba); - /* Configure and enable interrupt */ - intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); - if (intr_mode == LPFC_INTR_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0426 Failed to enable interrupt.\n"); - error = -ENODEV; - goto out_free_sysfs_attr; - } - /* Default to single EQ for non-MSI-X */ - if (phba->intr_type != MSIX) - adjusted_fcp_io_channel = 1; - else - adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; - phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; - /* Set up SLI-4 HBA */ - if (lpfc_sli4_hba_setup(phba)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1421 Failed to set up hba\n"); - error = -ENODEV; - goto out_disable_intr; - } - /* Send NOP mbx cmds for non-INTx mode active interrupt test */ - if (intr_mode != 0) - mcnt = lpfc_sli4_send_nop_mbox_cmds(phba, - LPFC_ACT_INTR_CNT); - - /* Check active interrupts received only for MSI/MSI-X */ - if (intr_mode == 0 || - phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) { - /* Log the current active interrupt mode */ - phba->intr_mode = intr_mode; - lpfc_log_intr_mode(phba, intr_mode); - break; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0451 Configure interrupt mode (%d) " - "failed active interrupt test.\n", - intr_mode); - /* Unset the previous SLI-4 HBA setup. */ - /* - * TODO: Is this operation compatible with IF TYPE 2 - * devices? All port state is deleted and cleared. - */ - lpfc_sli4_unset_hba(phba); - /* Try next level of interrupt mode */ - cfg_mode = --intr_mode; + /* Put device to a known state before enabling interrupt */ + lpfc_stop_port(phba); + /* Configure and enable interrupt */ + intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); + if (intr_mode == LPFC_INTR_ERROR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0426 Failed to enable interrupt.\n"); + error = -ENODEV; + goto out_free_sysfs_attr; + } + /* Default to single EQ for non-MSI-X */ + if (phba->intr_type != MSIX) + adjusted_fcp_io_channel = 1; + else + adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; + phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; + /* Set up SLI-4 HBA */ + if (lpfc_sli4_hba_setup(phba)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1421 Failed to set up hba\n"); + error = -ENODEV; + goto out_disable_intr; } + /* Log the current active interrupt mode */ + phba->intr_mode = intr_mode; + lpfc_log_intr_mode(phba, intr_mode); + /* Perform post initialization setup */ lpfc_post_init_setup(phba); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d8fadcb..46128c6 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1115,6 +1115,13 @@ out: "0261 Cannot Register NameServer login\n"); } + /* + ** In case the node reference counter does not go to zero, ensure that + ** the stale state for the node is not processed. + */ + + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; spin_unlock_irq(shost->host_lock); @@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); cmdiocb = (struct lpfc_iocbq *) arg; rspiocb = cmdiocb->context_un.rsp_iocb; irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; + spin_unlock_irq(shost->host_lock); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a177..98af07c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } /** + * lpfc_change_queue_type() - Change a device's scsi tag queuing type + * @sdev: Pointer the scsi device whose queue depth is to change + * @tag_type: Identifier for queue tag type + */ +static int +lpfc_change_queue_type(struct scsi_device *sdev, int tag_type) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. * @@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, break; } } else - fcp_cmnd->fcpCntl1 = 0; + fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); @@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = { .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; struct scsi_host_template lpfc_vport_template = { @@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = { .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab3..74b67d9 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; - bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1); - bf_set(lpfc_wq_doorbell_index, &doorbell, host_index); - bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id); - writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr); + if (q->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } else if (q->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, q->db_regaddr); return 0; } @@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { doorbell.word0 = 0; - bf_set(lpfc_rq_doorbell_num_posted, &doorbell, - hq->entry_repost); - bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id); - writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr); + if (hq->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id); + } else if (hq->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_list_fm_index, &doorbell, + hq->host_index); + bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, hq->db_regaddr); } return put_index; } @@ -4939,7 +4956,7 @@ out_free_mboxq: static void lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) { - uint8_t fcp_eqidx; + int fcp_eqidx; lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM); lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM); @@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) } /* RPIs. */ count = phba->sli4_hba.max_cfg_param.max_rpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3279 Invalid provisioning of " + "rpi:%d\n", count); + rc = -EINVAL; + goto err_exit; + } base = phba->sli4_hba.max_cfg_param.rpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.rpi_bmask = kzalloc(longs * @@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VPIs. */ count = phba->sli4_hba.max_cfg_param.max_vpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3280 Invalid provisioning of " + "vpi:%d\n", count); + rc = -EINVAL; + goto free_rpi_ids; + } base = phba->sli4_hba.max_cfg_param.vpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->vpi_bmask = kzalloc(longs * @@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* XRIs. */ count = phba->sli4_hba.max_cfg_param.max_xri; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3281 Invalid provisioning of " + "xri:%d\n", count); + rc = -EINVAL; + goto free_vpi_ids; + } base = phba->sli4_hba.max_cfg_param.xri_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.xri_bmask = kzalloc(longs * @@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VFIs. */ count = phba->sli4_hba.max_cfg_param.max_vfi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3282 Invalid provisioning of " + "vfi:%d\n", count); + rc = -EINVAL; + goto free_xri_ids; + } base = phba->sli4_hba.max_cfg_param.vfi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.vfi_bmask = kzalloc(longs * @@ -6599,7 +6644,7 @@ static int lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag) { - MAILBOX_t *mb; + MAILBOX_t *mbx; struct lpfc_sli *psli = &phba->sli; uint32_t status, evtctr; uint32_t ha_copy, hc_copy; @@ -6653,7 +6698,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, psli = &phba->sli; - mb = &pmbox->u.mb; + mbx = &pmbox->u.mb; status = MBX_SUCCESS; if (phba->link_state == LPFC_HBA_ERROR) { @@ -6668,7 +6713,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, goto out_not_finished; } - if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) { + if (mbx->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT) { if (lpfc_readl(phba->HCregaddr, &hc_copy) || !(hc_copy & HC_MBINT_ENA)) { spin_unlock_irqrestore(&phba->hbalock, drvr_flag); @@ -6722,7 +6767,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, "(%d):0308 Mbox cmd issue - BUSY Data: " "x%x x%x x%x x%x\n", pmbox->vport ? pmbox->vport->vpi : 0xffffff, - mb->mbxCommand, phba->pport->port_state, + mbx->mbxCommand, phba->pport->port_state, psli->sli_flag, flag); psli->slistat.mbox_busy++; @@ -6732,15 +6777,15 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, lpfc_debugfs_disc_trc(pmbox->vport, LPFC_DISC_TRC_MBOX_VPORT, "MBOX Bsy vport: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } else { lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_MBOX, "MBOX Bsy: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } return MBX_BUSY; @@ -6751,7 +6796,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, /* If we are not polling, we MUST be in SLI2 mode */ if (flag != MBX_POLL) { if (!(psli->sli_flag & LPFC_SLI_ACTIVE) && - (mb->mbxCommand != MBX_KILL_BOARD)) { + (mbx->mbxCommand != MBX_KILL_BOARD)) { psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; spin_unlock_irqrestore(&phba->hbalock, drvr_flag); /* Mbox command <mbxCommand> cannot issue */ @@ -6773,23 +6818,23 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, "(%d):0309 Mailbox cmd x%x issue Data: x%x x%x " "x%x\n", pmbox->vport ? pmbox->vport->vpi : 0, - mb->mbxCommand, phba->pport->port_state, + mbx->mbxCommand, phba->pport->port_state, psli->sli_flag, flag); - if (mb->mbxCommand != MBX_HEARTBEAT) { + if (mbx->mbxCommand != MBX_HEARTBEAT) { if (pmbox->vport) { lpfc_debugfs_disc_trc(pmbox->vport, LPFC_DISC_TRC_MBOX_VPORT, "MBOX Send vport: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } else { lpfc_debugfs_disc_trc(phba->pport, LPFC_DISC_TRC_MBOX, "MBOX Send: cmd:x%x mb:x%x x%x", - (uint32_t)mb->mbxCommand, - mb->un.varWords[0], mb->un.varWords[1]); + (uint32_t)mbx->mbxCommand, + mbx->un.varWords[0], mbx->un.varWords[1]); } } @@ -6797,12 +6842,12 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, evtctr = psli->slistat.mbox_event; /* next set own bit for the adapter and copy over command word */ - mb->mbxOwner = OWN_CHIP; + mbx->mbxOwner = OWN_CHIP; if (psli->sli_flag & LPFC_SLI_ACTIVE) { /* Populate mbox extension offset word. */ if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len) { - *(((uint32_t *)mb) + pmbox->mbox_offset_word) + *(((uint32_t *)mbx) + pmbox->mbox_offset_word) = (uint8_t *)phba->mbox_ext - (uint8_t *)phba->mbox; } @@ -6814,11 +6859,11 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, pmbox->in_ext_byte_len); } /* Copy command data to host SLIM area */ - lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE); } else { /* Populate mbox extension offset word. */ if (pmbox->in_ext_byte_len || pmbox->out_ext_byte_len) - *(((uint32_t *)mb) + pmbox->mbox_offset_word) + *(((uint32_t *)mbx) + pmbox->mbox_offset_word) = MAILBOX_HBA_EXT_OFFSET; /* Copy the mailbox extension data */ @@ -6828,24 +6873,24 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, pmbox->context2, pmbox->in_ext_byte_len); } - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { /* copy command data into host mbox for cmpl */ - lpfc_sli_pcimem_bcopy(mb, phba->mbox, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(mbx, phba->mbox, MAILBOX_CMD_SIZE); } /* First copy mbox command data to HBA SLIM, skip past first word */ to_slim = phba->MBslimaddr + sizeof (uint32_t); - lpfc_memcpy_to_slim(to_slim, &mb->un.varWords[0], + lpfc_memcpy_to_slim(to_slim, &mbx->un.varWords[0], MAILBOX_CMD_SIZE - sizeof (uint32_t)); /* Next copy over first word, with mbxOwner set */ - ldata = *((uint32_t *)mb); + ldata = *((uint32_t *)mbx); to_slim = phba->MBslimaddr; writel(ldata, to_slim); readl(to_slim); /* flush */ - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { /* switch over to host mailbox */ psli->sli_flag |= LPFC_SLI_ACTIVE; } @@ -6920,7 +6965,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, /* First copy command data */ word0 = *((uint32_t *)phba->mbox); word0 = le32_to_cpu(word0); - if (mb->mbxCommand == MBX_CONFIG_PORT) { + if (mbx->mbxCommand == MBX_CONFIG_PORT) { MAILBOX_t *slimmb; uint32_t slimword0; /* Check real SLIM for any errors */ @@ -6947,7 +6992,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, if (psli->sli_flag & LPFC_SLI_ACTIVE) { /* copy results back to user */ - lpfc_sli_pcimem_bcopy(phba->mbox, mb, MAILBOX_CMD_SIZE); + lpfc_sli_pcimem_bcopy(phba->mbox, mbx, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ if (pmbox->out_ext_byte_len && pmbox->context2) { lpfc_sli_pcimem_bcopy(phba->mbox_ext, @@ -6956,7 +7001,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, } } else { /* First copy command data */ - lpfc_memcpy_from_slim(mb, phba->MBslimaddr, + lpfc_memcpy_from_slim(mbx, phba->MBslimaddr, MAILBOX_CMD_SIZE); /* Copy the mailbox extension data */ if (pmbox->out_ext_byte_len && pmbox->context2) { @@ -6971,7 +7016,7 @@ lpfc_sli_issue_mbox_s3(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, readl(phba->HAregaddr); /* flush */ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; - status = mb->mbxStatus; + status = mbx->mbxStatus; } spin_unlock_irqrestore(&phba->hbalock, drvr_flag); @@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, * This is a continuation of a commandi,(CX) so this * sglq is on the active list */ - sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag); + sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag); if (!sglq) return IOCB_ERROR; } @@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba) pring->prt[3].type = FC_TYPE_CT; pring->prt[3].lpfc_sli_rcv_unsol_event = lpfc_ct_unsol_event; - /* abort unsolicited sequence */ - pring->prt[4].profile = 0; /* Mask 4 */ - pring->prt[4].rctl = FC_RCTL_BA_ABTS; - pring->prt[4].type = FC_TYPE_BLS; - pring->prt[4].lpfc_sli_rcv_unsol_event = - lpfc_sli4_ct_abort_unsol_event; break; } totiocbsize += (pring->sli.sli3.numCiocb * @@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) struct lpfc_eqe *eqe; unsigned long iflag; int ecount = 0; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id; @@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) struct lpfc_hba *phba; irqreturn_t hba_irq_rc; bool hba_handled = false; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ phba = (struct lpfc_hba *)dev_id; @@ -12097,6 +12136,54 @@ out_fail: } /** + * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory + * @phba: HBA structure that indicates port to create a queue on. + * @pci_barset: PCI BAR set flag. + * + * This function shall perform iomap of the specified PCI BAR address to host + * memory address if not already done so and return it. The returned host + * memory address can be NULL. + */ +static void __iomem * +lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) +{ + struct pci_dev *pdev; + unsigned long bar_map, bar_map_len; + + if (!phba->pcidev) + return NULL; + else + pdev = phba->pcidev; + + switch (pci_barset) { + case WQ_PCI_BAR_0_AND_1: + if (!phba->pci_bar0_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); + phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar0_memmap_p; + case WQ_PCI_BAR_2_AND_3: + if (!phba->pci_bar2_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar2_memmap_p; + case WQ_PCI_BAR_4_AND_5: + if (!phba->pci_bar4_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar4_memmap_p; + default: + break; + } + return NULL; +} + +/** * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs * @phba: HBA structure that indicates port to create a queue on. * @startq: The starting FCP EQ to modify @@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; struct dma_address *page; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!wq || !cq) @@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, cq->queue_id); bf_set(lpfc_mbox_hdr_version, &shdr->request, phba->sli4_hba.pc_sli4_params.wqv); + if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys); page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format not " + "supported: x%x\n", wq->queue_id, + wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap pci " + "barset:x%x\n", wq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset not " + "supported: x%x\n", wq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x\n", + wq->queue_id, pci_barset, db_offset); + } else { + wq->db_format = LPFC_DB_LIST_FORMAT; + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + } wq->type = LPFC_WQ; wq->assoc_qid = cq->queue_id; wq->subtype = subtype; @@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!hrq || !drq || !cq) @@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -ENXIO; goto out; } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format, + &rq_create->u.response); + if ((hrq->db_format != LPFC_DB_LIST_FORMAT) && + (hrq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3262 RQ [%d] doorbell format not " + "supported: x%x\n", hrq->queue_id, + hrq->db_format); + status = -EINVAL; + goto out; + } + + pci_barset = bf_get(lpfc_mbx_rq_create_bar_set, + &rq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3269 RQ[%d] failed to memmap pci " + "barset:x%x\n", hrq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + + db_offset = rq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_RQ_DOORBELL) && + (db_offset != LPFC_ULP1_RQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3270 RQ[%d] doorbell offset not " + "supported: x%x\n", hrq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + hrq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n", + hrq->queue_id, pci_barset, db_offset); + } else { + hrq->db_format = LPFC_DB_RING_FORMAT; + hrq->db_regaddr = phba->sli4_hba.RQDBregaddr; + } hrq->type = LPFC_HRQ; hrq->assoc_qid = cq->queue_id; hrq->subtype = subtype; @@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr; @@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport, } /** + * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp + * @vport: pointer to a vitural port + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function tries to abort from the assembed sequence from upper level + * protocol, described by the information from basic abbort @dmabuf. It + * checks to see whether such pending context exists at upper level protocol. + * If so, it shall clean up the pending context. + * + * Return + * true -- if there is matching pending context of the sequence cleaned + * at ulp; + * false -- if there is no matching pending context of the sequence present + * at ulp. + **/ +static bool +lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf) +{ + struct lpfc_hba *phba = vport->phba; + int handled; + + /* Accepting abort at ulp with SLI4 only */ + if (phba->sli_rev < LPFC_SLI_REV4) + return false; + + /* Register all caring upper level protocols to attend abort */ + handled = lpfc_ct_handle_unsol_abort(phba, dmabuf); + if (handled) + return true; + + return false; +} + +/** * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler * @phba: Pointer to HBA context object. * @cmd_iocbq: pointer to the command iocbq structure. @@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmd_iocbq, struct lpfc_iocbq *rsp_iocbq) { - if (cmd_iocbq) + struct lpfc_nodelist *ndlp; + + if (cmd_iocbq) { + ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1; + lpfc_nlp_put(ndlp); + lpfc_nlp_not_used(ndlp); lpfc_sli_release_iocbq(phba, cmd_iocbq); + } /* Failure means BLS ABORT RSP did not get delivered to remote node*/ if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus) @@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * event after aborting the sequence handling. **/ static void -lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, - struct fc_frame_header *fc_hdr) +lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *ctiocb = NULL; struct lpfc_nodelist *ndlp; uint16_t oxid, rxid, xri, lxri; @@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); rxid = be16_to_cpu(fc_hdr->fh_rx_id); - ndlp = lpfc_findnode_did(phba->pport, sid); + ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, - "1268 Find ndlp returned NULL for oxid:x%x " - "SID:x%x\n", oxid, sid); - return; + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "1268 Failed to allocate ndlp for " + "oxid:x%x SID:x%x\n", oxid, sid); + return; + } + lpfc_nlp_init(vport, ndlp, sid); + /* Put ndlp onto pport node list */ + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + /* re-setup ndlp without removing from node list */ + ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "3275 Failed to active ndlp found " + "for oxid:x%x SID:x%x\n", oxid, sid); + return; + } } /* Allocate buffer for rsp iocb */ @@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, icmd->ulpLe = 1; icmd->ulpClass = CLASS3; icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; - ctiocb->context1 = ndlp; + ctiocb->context1 = lpfc_nlp_get(ndlp); ctiocb->iocb_cmpl = NULL; ctiocb->vport = phba->pport; @@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, if (lxri != NO_XRI) lpfc_set_rrq_active(phba, ndlp, lxri, (xri == oxid) ? rxid : oxid, 0); - /* If the oxid maps to the FCP XRI range or if it is out of range, - * send a BLS_RJT. The driver no longer has that exchange. - * Override the IOCB for a BA_RJT. + /* For BA_ABTS from exchange responder, if the logical xri with + * the oxid maps to the FCP XRI range, the port no longer has + * that exchange context, send a BLS_RJT. Override the IOCB for + * a BA_RJT. */ - if (xri > (phba->sli4_hba.max_cfg_param.max_xri + - phba->sli4_hba.max_cfg_param.xri_base) || - xri > (lpfc_sli4_get_els_iocb_cnt(phba) + - phba->sli4_hba.max_cfg_param.xri_base)) { + if ((fctl & FC_FC_EX_CTX) && + (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) { + icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; + bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); + bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); + bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE); + } + + /* If BA_ABTS failed to abort a partially assembled receive sequence, + * the driver no longer has that exchange, send a BLS_RJT. Override + * the IOCB for a BA_RJT. + */ + if (aborted == false) { icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); @@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid); /* Xmit CT abts response on exchange <xid> */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); if (rc == IOCB_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "2925 Failed to issue CT ABTS RSP x%x on " - "xri x%x, Data x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, - phba->link_state); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2925 Failed to issue CT ABTS RSP x%x on " + "xri x%x, Data x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, + phba->link_state); + lpfc_nlp_put(ndlp); + ctiocb->context1 = NULL; lpfc_sli_release_iocbq(phba, ctiocb); } } @@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, struct lpfc_hba *phba = vport->phba; struct fc_frame_header fc_hdr; uint32_t fctl; - bool abts_par; + bool aborted; /* Make a copy of fc_hdr before the dmabuf being released */ memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); fctl = sli4_fctl_from_fc_hdr(&fc_hdr); if (fctl & FC_FC_EX_CTX) { - /* - * ABTS sent by responder to exchange, just free the buffer - */ - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by responder to exchange, no cleanup needed */ + aborted = true; } else { - /* - * ABTS sent by initiator to exchange, need to do cleanup - */ - /* Try to abort partially assembled seq */ - abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf); - - /* Send abort to ULP if partially seq abort failed */ - if (abts_par == false) - lpfc_sli4_send_seq_to_ulp(vport, dmabuf); - else - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by initiator to exchange, need to do cleanup */ + aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf); + if (aborted == false) + aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf); } - /* Send basic accept (BA_ACC) to the abort requester */ - lpfc_sli4_seq_abort_rsp(phba, &fc_hdr); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + + /* Respond with BA_ACC or BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } /** @@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) { uint16_t next_fcf_index; +initial_priority: /* Search start from next bit of currently registered FCF index */ + next_fcf_index = phba->fcf.current_rec.fcf_indx; + next_priority: - next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) % - LPFC_SLI4_FCF_TBL_INDX_MAX; + /* Determine the next fcf index to check */ + next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX; next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, LPFC_SLI4_FCF_TBL_INDX_MAX, next_fcf_index); @@ -15337,7 +15589,7 @@ next_priority: * at that level and continue the selection process. */ if (lpfc_check_next_fcf_pri_level(phba)) - goto next_priority; + goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 44c427a..be02b59 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -139,6 +139,10 @@ struct lpfc_queue { struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + uint16_t db_format; +#define LPFC_DB_RING_FORMAT 0x01 +#define LPFC_DB_LIST_FORMAT 0x02 + void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -508,6 +512,10 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ + uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t ulp0_mode; /* ULP0 protocol mode */ + uint32_t ulp1_mode; /* ULP1 protocol mode */ + /* Setup information for various queue parameters */ int eq_esize; int eq_ecount; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ba596e8..f3b7795 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.36" +#define LPFC_DRIVER_VERSION "8.3.37" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index 3b2365c..408d254 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -33,9 +33,9 @@ /* * MegaRAID SAS Driver meta data */ -#define MEGASAS_VERSION "06.504.01.00-rc1" -#define MEGASAS_RELDATE "Oct. 1, 2012" -#define MEGASAS_EXT_VERSION "Mon. Oct. 1 17:00:00 PDT 2012" +#define MEGASAS_VERSION "06.506.00.00-rc1" +#define MEGASAS_RELDATE "Feb. 9, 2013" +#define MEGASAS_EXT_VERSION "Sat. Feb. 9 17:00:00 PDT 2013" /* * Device IDs diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 66a0fec..9d53540 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -18,7 +18,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * FILE: megaraid_sas_base.c - * Version : v06.504.01.00-rc1 + * Version : v06.506.00.00-rc1 * * Authors: LSI Corporation * Sreenivas Bagalkote diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 74030af..a7d5668 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -1206,7 +1206,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len, MPI2_SCSIIO_EEDPFLAGS_INSERT_OP; } io_request->Control |= (0x4 << 26); - io_request->EEDPBlockSize = MEGASAS_EEDPBLOCKSIZE; + io_request->EEDPBlockSize = scp->device->sector_size; } else { /* Some drives don't support 16/12 byte CDB's, convert to 10 */ if (((cdb_len == 12) || (cdb_len == 16)) && @@ -1511,7 +1511,8 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance, if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS && instance->pd_list[pd_index].driveState == MR_PD_STATE_SYSTEM) { io_request->Function = 0; - io_request->DevHandle = + if (fusion->fast_path_io) + io_request->DevHandle = local_map_ptr->raidMap.devHndlInfo[device_id].curDevHdl; io_request->RaidContext.timeoutValue = local_map_ptr->raidMap.fpPdIoTimeoutSec; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index a7c64f0..f68a3cd 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -61,7 +61,6 @@ #define MEGASAS_SCSI_ADDL_CDB_LEN 0x18 #define MEGASAS_RD_WR_PROTECT_CHECK_ALL 0x20 #define MEGASAS_RD_WR_PROTECT_CHECK_NONE 0x60 -#define MEGASAS_EEDPBLOCKSIZE 512 /* * Raid context flags diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index ffd85c5..bcb23d2 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work) struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) + if (ioc->shost_recovery || ioc->pci_error_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work) printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", ioc->name, __func__); + /* It may be possible that EEH recovery can resolve some of + * pci bus failure issues rather removing the dead ioc function + * by considering controller is in a non-operational state. So + * here priority is given to the EEH recovery. If it doesn't + * not resolve this issue, mpt2sas driver will consider this + * controller to non-operational state and remove the dead ioc + * function. + */ + if (ioc->non_operational_loop++ < 5) { + spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, + flags); + goto rearm_timer; + } + /* * Call _scsih_flush_pending_cmds callback so that we flush all * pending commands back to OS. This call is required to aovid @@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work) return; /* don't rearm timer */ } + ioc->non_operational_loop = 0; + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); @@ -2007,6 +2023,14 @@ _base_display_intel_branding(struct MPT2SAS_ADAPTER *ioc) printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, MPT2SAS_INTEL_RMS25KB040_BRANDING); break; + case MPT2SAS_INTEL_RMS25LB040_SSDID: + printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, + MPT2SAS_INTEL_RMS25LB040_BRANDING); + break; + case MPT2SAS_INTEL_RMS25LB080_SSDID: + printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, + MPT2SAS_INTEL_RMS25LB080_BRANDING); + break; default: break; } @@ -4386,6 +4410,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (missing_delay[0] != -1 && missing_delay[1] != -1) _base_update_missing_delay(ioc, missing_delay[0], missing_delay[1]); + ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 543d8d6..4caaac1 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -165,6 +165,10 @@ "Intel(R) Integrated RAID Module RMS25KB080" #define MPT2SAS_INTEL_RMS25KB040_BRANDING \ "Intel(R) Integrated RAID Module RMS25KB040" +#define MPT2SAS_INTEL_RMS25LB040_BRANDING \ + "Intel(R) Integrated RAID Module RMS25LB040" +#define MPT2SAS_INTEL_RMS25LB080_BRANDING \ + "Intel(R) Integrated RAID Module RMS25LB080" #define MPT2SAS_INTEL_RMS2LL080_BRANDING \ "Intel Integrated RAID Module RMS2LL080" #define MPT2SAS_INTEL_RMS2LL040_BRANDING \ @@ -180,6 +184,8 @@ #define MPT2SAS_INTEL_RMS25JB040_SSDID 0x3517 #define MPT2SAS_INTEL_RMS25KB080_SSDID 0x3518 #define MPT2SAS_INTEL_RMS25KB040_SSDID 0x3519 +#define MPT2SAS_INTEL_RMS25LB040_SSDID 0x351A +#define MPT2SAS_INTEL_RMS25LB080_SSDID 0x351B #define MPT2SAS_INTEL_RMS2LL080_SSDID 0x350E #define MPT2SAS_INTEL_RMS2LL040_SSDID 0x350F #define MPT2SAS_INTEL_RS25GB008_SSDID 0x3000 @@ -835,6 +841,7 @@ struct MPT2SAS_ADAPTER { u16 cpu_msix_table_sz; u32 ioc_reset_count; MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; + u32 non_operational_loop; /* internal commands, callback index */ u8 scsi_io_cb_idx; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 04f8010..1836003 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, void *sg_local, *chain; u32 chain_offset; u32 chain_length; - u32 chain_flags; int sges_left; u32 sges_in_segment; u8 simple_sgl_flags; @@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sges_in_segment--; } - /* initializing the chain flags and pointers */ - chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT; + /* initializing the pointers */ chain_req = _base_get_chain_buffer_tracker(ioc, smid); if (!chain_req) return -1; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index ce7e59b..1df9ed4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 8af944d..054d523 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev, spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); memset(&ioc->diag_trigger_mpi, 0, - sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES) ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6421a06..dcbf7c8 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc, int i; u16 handle; u16 reason_code; - u8 phy_number; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); if (!handle) continue; - phy_number = event_data->StartPhyNum + i; reason_code = event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index da6c5f2..6f8d621 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index 078c639..532110f 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -316,10 +316,13 @@ static int mvs_task_prep_smp(struct mvs_info *mvi, struct mvs_task_exec_info *tei) { int elem, rc, i; + struct sas_ha_struct *sha = mvi->sas; struct sas_task *task = tei->task; struct mvs_cmd_hdr *hdr = tei->hdr; struct domain_device *dev = task->dev; struct asd_sas_port *sas_port = dev->port; + struct sas_phy *sphy = dev->phy; + struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number]; struct scatterlist *sg_req, *sg_resp; u32 req_len, resp_len, tag = tei->tag; void *buf_tmp; @@ -392,7 +395,7 @@ static int mvs_task_prep_smp(struct mvs_info *mvi, slot->tx = mvi->tx_prod; mvi->tx[mvi->tx_prod] = cpu_to_le32((TXQ_CMD_SMP << TXQ_CMD_SHIFT) | TXQ_MODE_I | tag | - (sas_port->phy_mask << TXQ_PHY_SHIFT)); + (MVS_PHY_ID << TXQ_PHY_SHIFT)); hdr->flags |= flags; hdr->lens = cpu_to_le32(((resp_len / 4) << 16) | ((req_len - 4) / 4)); @@ -438,11 +441,14 @@ static u32 mvs_get_ncq_tag(struct sas_task *task, u32 *tag) static int mvs_task_prep_ata(struct mvs_info *mvi, struct mvs_task_exec_info *tei) { + struct sas_ha_struct *sha = mvi->sas; struct sas_task *task = tei->task; struct domain_device *dev = task->dev; struct mvs_device *mvi_dev = dev->lldd_dev; struct mvs_cmd_hdr *hdr = tei->hdr; struct asd_sas_port *sas_port = dev->port; + struct sas_phy *sphy = dev->phy; + struct asd_sas_phy *sas_phy = sha->sas_phy[sphy->number]; struct mvs_slot_info *slot; void *buf_prd; u32 tag = tei->tag, hdr_tag; @@ -462,7 +468,7 @@ static int mvs_task_prep_ata(struct mvs_info *mvi, slot->tx = mvi->tx_prod; del_q = TXQ_MODE_I | tag | (TXQ_CMD_STP << TXQ_CMD_SHIFT) | - (sas_port->phy_mask << TXQ_PHY_SHIFT) | + (MVS_PHY_ID << TXQ_PHY_SHIFT) | (mvi_dev->taskfileset << TXQ_SRS_SHIFT); mvi->tx[mvi->tx_prod] = cpu_to_le32(del_q); diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h index 2ae77a0..9f3cc13 100644 --- a/drivers/scsi/mvsas/mv_sas.h +++ b/drivers/scsi/mvsas/mv_sas.h @@ -76,6 +76,7 @@ extern struct kmem_cache *mvs_task_list_cache; (__mc) != 0 ; \ (++__lseq), (__mc) >>= 1) +#define MVS_PHY_ID (1U << sas_phy->id) #define MV_INIT_DELAYED_WORK(w, f, d) INIT_DELAYED_WORK(w, f) #define UNASSOC_D2H_FIS(id) \ ((void *) mvi->rx_fis + 0x100 * id) diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index c06b8e5..d8293f25c 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -144,6 +144,10 @@ static int _osd_get_print_system_info(struct osd_dev *od, odi->osdname_len = get_attrs[a].len; /* Avoid NULL for memcmp optimization 0-length is good enough */ odi->osdname = kzalloc(odi->osdname_len + 1, GFP_KERNEL); + if (!odi->osdname) { + ret = -ENOMEM; + goto out; + } if (odi->osdname_len) memcpy(odi->osdname, get_attrs[a].val_ptr, odi->osdname_len); OSD_INFO("OSD_NAME [%s]\n", odi->osdname); diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 4c9fe73..3d5e522 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -140,7 +140,8 @@ static void pm8001_free(struct pm8001_hba_info *pm8001_ha) for (i = 0; i < USI_MAX_MEMCNT; i++) { if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) { pci_free_consistent(pm8001_ha->pdev, - pm8001_ha->memoryMap.region[i].element_size, + (pm8001_ha->memoryMap.region[i].total_len + + pm8001_ha->memoryMap.region[i].alignment), pm8001_ha->memoryMap.region[i].virt_ptr, pm8001_ha->memoryMap.region[i].phys_addr); } diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 83d7984..1d82eef 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -1272,22 +1272,29 @@ qla2x00_thermal_temp_show(struct device *dev, struct device_attribute *attr, char *buf) { scsi_qla_host_t *vha = shost_priv(class_to_shost(dev)); - int rval = QLA_FUNCTION_FAILED; - uint16_t temp, frac; + uint16_t temp = 0; - if (!vha->hw->flags.thermal_supported) - return snprintf(buf, PAGE_SIZE, "\n"); + if (!vha->hw->thermal_support) { + ql_log(ql_log_warn, vha, 0x70db, + "Thermal not supported by this card.\n"); + goto done; + } - temp = frac = 0; - if (qla2x00_reset_active(vha)) - ql_log(ql_log_warn, vha, 0x707b, - "ISP reset active.\n"); - else if (!vha->hw->flags.eeh_busy) - rval = qla2x00_get_thermal_temp(vha, &temp, &frac); - if (rval != QLA_SUCCESS) - return snprintf(buf, PAGE_SIZE, "\n"); + if (qla2x00_reset_active(vha)) { + ql_log(ql_log_warn, vha, 0x70dc, "ISP reset active.\n"); + goto done; + } + + if (vha->hw->flags.eeh_busy) { + ql_log(ql_log_warn, vha, 0x70dd, "PCI EEH busy.\n"); + goto done; + } + + if (qla2x00_get_thermal_temp(vha, &temp) == QLA_SUCCESS) + return snprintf(buf, PAGE_SIZE, "%d\n", temp); - return snprintf(buf, PAGE_SIZE, "%d.%02d\n", temp, frac); +done: + return snprintf(buf, PAGE_SIZE, "\n"); } static ssize_t diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 9f34ded..ad54099 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -27,7 +27,7 @@ void qla2x00_bsg_sp_free(void *data, void *ptr) { srb_t *sp = (srb_t *)ptr; - struct scsi_qla_host *vha = (scsi_qla_host_t *)data; + struct scsi_qla_host *vha = sp->fcport->vha; struct fc_bsg_job *bsg_job = sp->u.bsg_job; struct qla_hw_data *ha = vha->hw; @@ -40,7 +40,7 @@ qla2x00_bsg_sp_free(void *data, void *ptr) if (sp->type == SRB_CT_CMD || sp->type == SRB_ELS_CMD_HST) kfree(sp->fcport); - mempool_free(sp, vha->hw->srb_mempool); + qla2x00_rel_sp(vha, sp); } int @@ -368,7 +368,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job) if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, "qla2x00_start_sp failed = %d\n", rval); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); rval = -EIO; goto done_unmap_sg; } @@ -515,7 +515,7 @@ qla2x00_process_ct(struct fc_bsg_job *bsg_job) if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x7017, "qla2x00_start_sp failed=%d.\n", rval); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); rval = -EIO; goto done_free_fcport; } @@ -531,6 +531,75 @@ done_unmap_sg: done: return rval; } + +/* Disable loopback mode */ +static inline int +qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, + int wait, int wait2) +{ + int ret = 0; + int rval = 0; + uint16_t new_config[4]; + struct qla_hw_data *ha = vha->hw; + + if (!IS_QLA81XX(ha) && !IS_QLA8031(ha)) + goto done_reset_internal; + + memset(new_config, 0 , sizeof(new_config)); + if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == + ENABLE_INTERNAL_LOOPBACK || + (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == + ENABLE_EXTERNAL_LOOPBACK) { + new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK; + ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n", + (new_config[0] & INTERNAL_LOOPBACK_MASK)); + memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ; + + ha->notify_dcbx_comp = wait; + ha->notify_lb_portup_comp = wait2; + + ret = qla81xx_set_port_config(vha, new_config); + if (ret != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x7025, + "Set port config failed.\n"); + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } + + /* Wait for DCBX complete event */ + if (wait && !wait_for_completion_timeout(&ha->dcbx_comp, + (DCBX_COMP_TIMEOUT * HZ))) { + ql_dbg(ql_dbg_user, vha, 0x7026, + "DCBX completion not received.\n"); + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } else + ql_dbg(ql_dbg_user, vha, 0x7027, + "DCBX completion received.\n"); + + if (wait2 && + !wait_for_completion_timeout(&ha->lb_portup_comp, + (LB_PORTUP_COMP_TIMEOUT * HZ))) { + ql_dbg(ql_dbg_user, vha, 0x70c5, + "Port up completion not received.\n"); + ha->notify_lb_portup_comp = 0; + rval = -EINVAL; + goto done_reset_internal; + } else + ql_dbg(ql_dbg_user, vha, 0x70c6, + "Port up completion received.\n"); + + ha->notify_dcbx_comp = 0; + ha->notify_lb_portup_comp = 0; + } +done_reset_internal: + return rval; +} + /* * Set the port configuration to enable the internal or external loopback * depending on the loopback mode. @@ -566,9 +635,19 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, } /* Wait for DCBX complete event */ - if (!wait_for_completion_timeout(&ha->dcbx_comp, (20 * HZ))) { + if (!wait_for_completion_timeout(&ha->dcbx_comp, + (DCBX_COMP_TIMEOUT * HZ))) { ql_dbg(ql_dbg_user, vha, 0x7022, - "State change notification not received.\n"); + "DCBX completion not received.\n"); + ret = qla81xx_reset_loopback_mode(vha, new_config, 0, 0); + /* + * If the reset of the loopback mode doesn't work take a FCoE + * dump and reset the chip. + */ + if (ret) { + ha->isp_ops->fw_dump(vha, 0); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + } rval = -EINVAL; } else { if (ha->flags.idc_compl_status) { @@ -578,7 +657,7 @@ qla81xx_set_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, ha->flags.idc_compl_status = 0; } else ql_dbg(ql_dbg_user, vha, 0x7023, - "State change received.\n"); + "DCBX completion received.\n"); } ha->notify_dcbx_comp = 0; @@ -587,57 +666,6 @@ done_set_internal: return rval; } -/* Disable loopback mode */ -static inline int -qla81xx_reset_loopback_mode(scsi_qla_host_t *vha, uint16_t *config, - int wait) -{ - int ret = 0; - int rval = 0; - uint16_t new_config[4]; - struct qla_hw_data *ha = vha->hw; - - if (!IS_QLA81XX(ha) && !IS_QLA8031(ha)) - goto done_reset_internal; - - memset(new_config, 0 , sizeof(new_config)); - if ((config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == - ENABLE_INTERNAL_LOOPBACK || - (config[0] & INTERNAL_LOOPBACK_MASK) >> 1 == - ENABLE_EXTERNAL_LOOPBACK) { - new_config[0] = config[0] & ~INTERNAL_LOOPBACK_MASK; - ql_dbg(ql_dbg_user, vha, 0x70bf, "new_config[0]=%02x\n", - (new_config[0] & INTERNAL_LOOPBACK_MASK)); - memcpy(&new_config[1], &config[1], sizeof(uint16_t) * 3) ; - - ha->notify_dcbx_comp = wait; - ret = qla81xx_set_port_config(vha, new_config); - if (ret != QLA_SUCCESS) { - ql_log(ql_log_warn, vha, 0x7025, - "Set port config failed.\n"); - ha->notify_dcbx_comp = 0; - rval = -EINVAL; - goto done_reset_internal; - } - - /* Wait for DCBX complete event */ - if (wait && !wait_for_completion_timeout(&ha->dcbx_comp, - (20 * HZ))) { - ql_dbg(ql_dbg_user, vha, 0x7026, - "State change notification not received.\n"); - ha->notify_dcbx_comp = 0; - rval = -EINVAL; - goto done_reset_internal; - } else - ql_dbg(ql_dbg_user, vha, 0x7027, - "State change received.\n"); - - ha->notify_dcbx_comp = 0; - } -done_reset_internal: - return rval; -} - static int qla2x00_process_loopback(struct fc_bsg_job *bsg_job) { @@ -739,6 +767,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) if (IS_QLA81XX(ha) || IS_QLA8031(ha)) { memset(config, 0, sizeof(config)); memset(new_config, 0, sizeof(new_config)); + if (qla81xx_get_port_config(vha, config)) { ql_log(ql_log_warn, vha, 0x701f, "Get port config failed.\n"); @@ -746,6 +775,14 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) goto done_free_dma_rsp; } + if ((config[0] & INTERNAL_LOOPBACK_MASK) != 0) { + ql_dbg(ql_dbg_user, vha, 0x70c4, + "Loopback operation already in " + "progress.\n"); + rval = -EAGAIN; + goto done_free_dma_rsp; + } + ql_dbg(ql_dbg_user, vha, 0x70c0, "elreq.options=%04x\n", elreq.options); @@ -755,7 +792,7 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) config, new_config, elreq.options); else rval = qla81xx_reset_loopback_mode(vha, - config, 1); + config, 1, 0); else rval = qla81xx_set_loopback_mode(vha, config, new_config, elreq.options); @@ -772,14 +809,6 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) command_sent = INT_DEF_LB_LOOPBACK_CMD; rval = qla2x00_loopback_test(vha, &elreq, response); - if (new_config[0]) { - /* Revert back to original port config - * Also clear internal loopback - */ - qla81xx_reset_loopback_mode(vha, - new_config, 0); - } - if (response[0] == MBS_COMMAND_ERROR && response[1] == MBS_LB_RESET) { ql_log(ql_log_warn, vha, 0x7029, @@ -788,15 +817,39 @@ qla2x00_process_loopback(struct fc_bsg_job *bsg_job) qla2xxx_wake_dpc(vha); qla2x00_wait_for_chip_reset(vha); /* Also reset the MPI */ - if (qla81xx_restart_mpi_firmware(vha) != - QLA_SUCCESS) { - ql_log(ql_log_warn, vha, 0x702a, - "MPI reset failed.\n"); + if (IS_QLA81XX(ha)) { + if (qla81xx_restart_mpi_firmware(vha) != + QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x702a, + "MPI reset failed.\n"); + } } rval = -EIO; goto done_free_dma_rsp; } + + if (new_config[0]) { + int ret; + + /* Revert back to original port config + * Also clear internal loopback + */ + ret = qla81xx_reset_loopback_mode(vha, + new_config, 0, 1); + if (ret) { + /* + * If the reset of the loopback mode + * doesn't work take FCoE dump and then + * reset the chip. + */ + ha->isp_ops->fw_dump(vha, 0); + set_bit(ISP_ABORT_NEEDED, + &vha->dpc_flags); + } + + } + } else { type = "FC_BSG_HST_VENDOR_LOOPBACK"; ql_dbg(ql_dbg_user, vha, 0x702b, @@ -1950,7 +2003,7 @@ qla24xx_bsg_timeout(struct fc_bsg_job *bsg_job) if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { if (((sp->type == SRB_CT_CMD) || @@ -1985,6 +2038,6 @@ done: spin_unlock_irqrestore(&ha->hardware_lock, flags); if (bsg_job->request->msgcode == FC_BSG_HST_CT) kfree(sp->fcport); - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(vha, sp); return 0; } diff --git a/drivers/scsi/qla2xxx/qla_bsg.h b/drivers/scsi/qla2xxx/qla_bsg.h index 37b8b7b..e9f6b9b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.h +++ b/drivers/scsi/qla2xxx/qla_bsg.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 53f9e49..1626de5 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -11,20 +11,21 @@ * ---------------------------------------------------------------------- * | Level | Last Value Used | Holes | * ---------------------------------------------------------------------- - * | Module Init and Probe | 0x0125 | 0x4b,0xba,0xfa | - * | Mailbox commands | 0x114f | 0x111a-0x111b | + * | Module Init and Probe | 0x0126 | 0x4b,0xba,0xfa | + * | Mailbox commands | 0x115b | 0x111a-0x111b | * | | | 0x112c-0x112e | * | | | 0x113a | * | Device Discovery | 0x2087 | 0x2020-0x2022, | * | | | 0x2016 | - * | Queue Command and IO tracing | 0x3030 | 0x3006-0x300b | + * | Queue Command and IO tracing | 0x3031 | 0x3006-0x300b | * | | | 0x3027-0x3028 | * | | | 0x302d-0x302e | * | DPC Thread | 0x401d | 0x4002,0x4013 | * | Async Events | 0x5071 | 0x502b-0x502f | * | | | 0x5047,0x5052 | * | Timer Routines | 0x6011 | | - * | User Space Interactions | 0x70c3 | 0x7018,0x702e, | + * | User Space Interactions | 0x70c4 | 0x7018,0x702e, | + * | | | 0x7020,0x7024, | * | | | 0x7039,0x7045, | * | | | 0x7073-0x7075, | * | | | 0x708c, | @@ -35,11 +36,11 @@ * | | | 0x800b,0x8039 | * | AER/EEH | 0x9011 | | * | Virtual Port | 0xa007 | | - * | ISP82XX Specific | 0xb084 | 0xb002,0xb024 | + * | ISP82XX Specific | 0xb086 | 0xb002,0xb024 | * | MultiQ | 0xc00c | | * | Misc | 0xd010 | | - * | Target Mode | 0xe06f | | - * | Target Mode Management | 0xf071 | | + * | Target Mode | 0xe070 | | + * | Target Mode Management | 0xf072 | | * | Target Mode Task Management | 0x1000b | | * ---------------------------------------------------------------------- */ diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 8f911c0..35e20b4 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 6e7727f4..c650991 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -37,6 +37,7 @@ #include "qla_nx.h" #define QLA2XXX_DRIVER_NAME "qla2xxx" #define QLA2XXX_APIDEV "ql2xapidev" +#define QLA2XXX_MANUFACTURER "QLogic Corporation" /* * We have MAILBOX_REGISTER_COUNT sized arrays in a few places, @@ -253,8 +254,8 @@ #define LOOP_DOWN_TIME 255 /* 240 */ #define LOOP_DOWN_RESET (LOOP_DOWN_TIME - 30) -/* Maximum outstanding commands in ISP queues (1-65535) */ -#define MAX_OUTSTANDING_COMMANDS 1024 +#define DEFAULT_OUTSTANDING_COMMANDS 1024 +#define MIN_OUTSTANDING_COMMANDS 128 /* ISP request and response entry counts (37-65535) */ #define REQUEST_ENTRY_CNT_2100 128 /* Number of request entries. */ @@ -537,6 +538,8 @@ struct device_reg_25xxmq { uint32_t req_q_out; uint32_t rsp_q_in; uint32_t rsp_q_out; + uint32_t atio_q_in; + uint32_t atio_q_out; }; typedef union { @@ -563,6 +566,9 @@ typedef union { &(reg)->u.isp2100.mailbox5 : \ &(reg)->u.isp2300.rsp_q_out) +#define ISP_ATIO_Q_IN(vha) (vha->hw->tgt.atio_q_in) +#define ISP_ATIO_Q_OUT(vha) (vha->hw->tgt.atio_q_out) + #define MAILBOX_REG(ha, reg, num) \ (IS_QLA2100(ha) || IS_QLA2200(ha) ? \ (num < 8 ? \ @@ -762,8 +768,8 @@ typedef struct { #define MBC_PORT_LOGOUT 0x56 /* Port Logout request */ #define MBC_SEND_RNID_ELS 0x57 /* Send RNID ELS request */ #define MBC_SET_RNID_PARAMS 0x59 /* Set RNID parameters */ -#define MBC_GET_RNID_PARAMS 0x5a /* Data Rate */ -#define MBC_DATA_RATE 0x5d /* Get RNID parameters */ +#define MBC_GET_RNID_PARAMS 0x5a /* Get RNID parameters */ +#define MBC_DATA_RATE 0x5d /* Data Rate */ #define MBC_INITIALIZE_FIRMWARE 0x60 /* Initialize firmware */ #define MBC_INITIATE_LIP 0x62 /* Initiate Loop */ /* Initialization Procedure */ @@ -809,6 +815,7 @@ typedef struct { #define MBC_HOST_MEMORY_COPY 0x53 /* Host Memory Copy. */ #define MBC_SEND_RNFT_ELS 0x5e /* Send RNFT ELS request */ #define MBC_GET_LINK_PRIV_STATS 0x6d /* Get link & private data. */ +#define MBC_LINK_INITIALIZATION 0x72 /* Do link initialization. */ #define MBC_SET_VENDOR_ID 0x76 /* Set Vendor ID. */ #define MBC_PORT_RESET 0x120 /* Port Reset */ #define MBC_SET_PORT_CONFIG 0x122 /* Set port configuration */ @@ -856,6 +863,9 @@ typedef struct { #define MBX_1 BIT_1 #define MBX_0 BIT_0 +#define RNID_TYPE_SET_VERSION 0x9 +#define RNID_TYPE_ASIC_TEMP 0xC + /* * Firmware state codes from get firmware state mailbox command */ @@ -1841,9 +1851,6 @@ typedef struct fc_port { uint8_t scan_state; } fc_port_t; -#define QLA_FCPORT_SCAN_NONE 0 -#define QLA_FCPORT_SCAN_FOUND 1 - /* * Fibre channel port/lun states. */ @@ -2533,8 +2540,10 @@ struct req_que { uint16_t qos; uint16_t vp_idx; struct rsp_que *rsp; - srb_t *outstanding_cmds[MAX_OUTSTANDING_COMMANDS]; + srb_t **outstanding_cmds; uint32_t current_outstanding_cmd; + uint16_t num_outstanding_cmds; +#define MAX_Q_DEPTH 32 int max_q_depth; }; @@ -2557,11 +2566,13 @@ struct qlt_hw_data { struct atio *atio_ring_ptr; /* Current address. */ uint16_t atio_ring_index; /* Current index. */ uint16_t atio_q_length; + uint32_t __iomem *atio_q_in; + uint32_t __iomem *atio_q_out; void *target_lport_ptr; struct qla_tgt_func_tmpl *tgt_ops; struct qla_tgt *qla_tgt; - struct qla_tgt_cmd *cmds[MAX_OUTSTANDING_COMMANDS]; + struct qla_tgt_cmd *cmds[DEFAULT_OUTSTANDING_COMMANDS]; uint16_t current_handle; struct qla_tgt_vp_map *tgt_vp_map; @@ -2618,7 +2629,6 @@ struct qla_hw_data { uint32_t nic_core_hung:1; uint32_t quiesce_owner:1; - uint32_t thermal_supported:1; uint32_t nic_core_reset_hdlr_active:1; uint32_t nic_core_reset_owner:1; uint32_t isp82xx_no_md_cap:1; @@ -2788,6 +2798,8 @@ struct qla_hw_data { #define IS_PI_SPLIT_DET_CAPABLE_HBA(ha) (IS_QLA83XX(ha)) #define IS_PI_SPLIT_DET_CAPABLE(ha) (IS_PI_SPLIT_DET_CAPABLE_HBA(ha) && \ (((ha)->fw_attributes_h << 16 | (ha)->fw_attributes) & BIT_22)) +#define IS_ATIO_MSIX_CAPABLE(ha) (IS_QLA83XX(ha)) +#define IS_TGT_MODE_CAPABLE(ha) (ha->tgt.atio_q_length) /* HBA serial number */ uint8_t serial0; @@ -2870,7 +2882,13 @@ struct qla_hw_data { struct completion mbx_cmd_comp; /* Serialize mbx access */ struct completion mbx_intr_comp; /* Used for completion notification */ struct completion dcbx_comp; /* For set port config notification */ + struct completion lb_portup_comp; /* Used to wait for link up during + * loopback */ +#define DCBX_COMP_TIMEOUT 20 +#define LB_PORTUP_COMP_TIMEOUT 10 + int notify_dcbx_comp; + int notify_lb_portup_comp; struct mutex selflogin_lock; /* Basic firmware related information. */ @@ -2887,6 +2905,7 @@ struct qla_hw_data { #define RISC_START_ADDRESS_2300 0x800 #define RISC_START_ADDRESS_2400 0x100000 uint16_t fw_xcb_count; + uint16_t fw_iocb_count; uint16_t fw_options[16]; /* slots: 1,2,3,10,11 */ uint8_t fw_seriallink_options[4]; @@ -3056,7 +3075,16 @@ struct qla_hw_data { struct work_struct idc_state_handler; struct work_struct nic_core_unrecoverable; +#define HOST_QUEUE_RAMPDOWN_INTERVAL (60 * HZ) +#define HOST_QUEUE_RAMPUP_INTERVAL (30 * HZ) + unsigned long host_last_rampdown_time; + unsigned long host_last_rampup_time; + int cfg_lun_q_depth; + struct qlt_hw_data tgt; + uint16_t thermal_support; +#define THERMAL_SUPPORT_I2C BIT_0 +#define THERMAL_SUPPORT_ISP BIT_1 }; /* @@ -3115,6 +3143,8 @@ typedef struct scsi_qla_host { #define MPI_RESET_NEEDED 19 /* Initiate MPI FW reset */ #define ISP_QUIESCE_NEEDED 20 /* Driver need some quiescence */ #define SCR_PENDING 21 /* SCR in target mode */ +#define HOST_RAMP_DOWN_QUEUE_DEPTH 22 +#define HOST_RAMP_UP_QUEUE_DEPTH 23 uint32_t device_flags; #define SWITCH_FOUND BIT_0 @@ -3248,8 +3278,6 @@ struct qla_tgt_vp_map { #define NVRAM_DELAY() udelay(10) -#define INVALID_HANDLE (MAX_OUTSTANDING_COMMANDS+1) - /* * Flash support definitions */ diff --git a/drivers/scsi/qla2xxx/qla_dfs.c b/drivers/scsi/qla2xxx/qla_dfs.c index 706c4f7..792a292 100644 --- a/drivers/scsi/qla2xxx/qla_dfs.c +++ b/drivers/scsi/qla2xxx/qla_dfs.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index be6d61a..1ac2b0e 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -300,7 +300,8 @@ struct init_cb_24xx { uint32_t prio_request_q_address[2]; uint16_t msix; - uint8_t reserved_2[6]; + uint16_t msix_atio; + uint8_t reserved_2[4]; uint16_t atio_q_inpointer; uint16_t atio_q_length; @@ -1387,9 +1388,7 @@ struct qla_flt_header { #define FLT_REG_FCP_PRIO_0 0x87 #define FLT_REG_FCP_PRIO_1 0x88 #define FLT_REG_FCOE_FW 0xA4 -#define FLT_REG_FCOE_VPD_0 0xA9 #define FLT_REG_FCOE_NVRAM_0 0xAA -#define FLT_REG_FCOE_VPD_1 0xAB #define FLT_REG_FCOE_NVRAM_1 0xAC struct qla_flt_region { diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index 2411d1a..eb3ca21 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -55,7 +55,7 @@ extern void qla2x00_update_fcport(scsi_qla_host_t *, fc_port_t *); extern void qla2x00_alloc_fw_dump(scsi_qla_host_t *); extern void qla2x00_try_to_stop_firmware(scsi_qla_host_t *); -extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *, uint16_t *); +extern int qla2x00_get_thermal_temp(scsi_qla_host_t *, uint16_t *); extern void qla84xx_put_chip(struct scsi_qla_host *); @@ -84,6 +84,9 @@ extern int qla83xx_nic_core_reset(scsi_qla_host_t *); extern void qla83xx_reset_ownership(scsi_qla_host_t *); extern int qla2xxx_mctp_dump(scsi_qla_host_t *); +extern int +qla2x00_alloc_outstanding_cmds(struct qla_hw_data *, struct req_que *); + /* * Global Data in qla_os.c source file. */ @@ -94,6 +97,7 @@ extern int qlport_down_retry; extern int ql2xplogiabsentdevice; extern int ql2xloginretrycount; extern int ql2xfdmienable; +extern int ql2xmaxqdepth; extern int ql2xallocfwdump; extern int ql2xextended_error_logging; extern int ql2xiidmaenable; @@ -278,6 +282,9 @@ extern int qla2x00_get_port_name(scsi_qla_host_t *, uint16_t, uint8_t *, uint8_t); extern int +qla24xx_link_initialize(scsi_qla_host_t *); + +extern int qla2x00_lip_reset(scsi_qla_host_t *); extern int @@ -351,6 +358,9 @@ extern int qla2x00_disable_fce_trace(scsi_qla_host_t *, uint64_t *, uint64_t *); extern int +qla2x00_set_driver_version(scsi_qla_host_t *, char *); + +extern int qla2x00_read_sfp(scsi_qla_host_t *, dma_addr_t, uint8_t *, uint16_t, uint16_t, uint16_t, uint16_t); @@ -436,6 +446,7 @@ extern uint8_t *qla25xx_read_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t, uint32_t); extern int qla25xx_write_nvram_data(scsi_qla_host_t *, uint8_t *, uint32_t, uint32_t); +extern int qla2x00_is_a_vp_did(scsi_qla_host_t *, uint32_t); extern int qla2x00_beacon_on(struct scsi_qla_host *); extern int qla2x00_beacon_off(struct scsi_qla_host *); diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 01efc0e..9b45525 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -1328,8 +1328,8 @@ qla2x00_fdmi_rhba(scsi_qla_host_t *vha) /* Manufacturer. */ eiter = (struct ct_fdmi_hba_attr *) (entries + size); eiter->type = __constant_cpu_to_be16(FDMI_HBA_MANUFACTURER); - strcpy(eiter->a.manufacturer, "QLogic Corporation"); - alen = strlen(eiter->a.manufacturer); + alen = strlen(QLA2XXX_MANUFACTURER); + strncpy(eiter->a.manufacturer, QLA2XXX_MANUFACTURER, alen + 1); alen += (alen & 3) ? (4 - (alen & 3)) : 4; eiter->len = cpu_to_be16(4 + alen); size += 4 + alen; @@ -1649,8 +1649,8 @@ qla2x00_fdmi_rpa(scsi_qla_host_t *vha) /* OS device name. */ eiter = (struct ct_fdmi_port_attr *) (entries + size); eiter->type = __constant_cpu_to_be16(FDMI_PORT_OS_DEVICE_NAME); - strcpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME); - alen = strlen(eiter->a.os_dev_name); + alen = strlen(QLA2XXX_DRIVER_NAME); + strncpy(eiter->a.os_dev_name, QLA2XXX_DRIVER_NAME, alen + 1); alen += (alen & 3) ? (4 - (alen & 3)) : 4; eiter->len = cpu_to_be16(4 + alen); size += 4 + alen; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 563eee3..edf4d14 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -70,9 +70,7 @@ qla2x00_sp_free(void *data, void *ptr) struct scsi_qla_host *vha = (scsi_qla_host_t *)data; del_timer(&iocb->timer); - mempool_free(sp, vha->hw->srb_mempool); - - QLA_VHA_MARK_NOT_BUSY(vha); + qla2x00_rel_sp(vha, sp); } /* Asynchronous Login/Logout Routines -------------------------------------- */ @@ -525,7 +523,7 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) vha->flags.reset_active = 0; ha->flags.pci_channel_io_perm_failure = 0; ha->flags.eeh_busy = 0; - ha->flags.thermal_supported = 1; + ha->thermal_support = THERMAL_SUPPORT_I2C|THERMAL_SUPPORT_ISP; atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); atomic_set(&vha->loop_state, LOOP_DOWN); vha->device_flags = DFLG_NO_CABLE; @@ -621,6 +619,8 @@ qla2x00_initialize_adapter(scsi_qla_host_t *vha) if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) qla24xx_read_fcp_prio_cfg(vha); + qla2x00_set_driver_version(vha, QLA2XXX_VERSION); + return (rval); } @@ -1559,6 +1559,47 @@ done: return rval; } +int +qla2x00_alloc_outstanding_cmds(struct qla_hw_data *ha, struct req_que *req) +{ + /* Don't try to reallocate the array */ + if (req->outstanding_cmds) + return QLA_SUCCESS; + + if (!IS_FWI2_CAPABLE(ha) || (ha->mqiobase && + (ql2xmultique_tag || ql2xmaxqueues > 1))) + req->num_outstanding_cmds = DEFAULT_OUTSTANDING_COMMANDS; + else { + if (ha->fw_xcb_count <= ha->fw_iocb_count) + req->num_outstanding_cmds = ha->fw_xcb_count; + else + req->num_outstanding_cmds = ha->fw_iocb_count; + } + + req->outstanding_cmds = kzalloc(sizeof(srb_t *) * + req->num_outstanding_cmds, GFP_KERNEL); + + if (!req->outstanding_cmds) { + /* + * Try to allocate a minimal size just so we can get through + * initialization. + */ + req->num_outstanding_cmds = MIN_OUTSTANDING_COMMANDS; + req->outstanding_cmds = kzalloc(sizeof(srb_t *) * + req->num_outstanding_cmds, GFP_KERNEL); + + if (!req->outstanding_cmds) { + ql_log(ql_log_fatal, NULL, 0x0126, + "Failed to allocate memory for " + "outstanding_cmds for req_que %p.\n", req); + req->num_outstanding_cmds = 0; + return QLA_FUNCTION_FAILED; + } + } + + return QLA_SUCCESS; +} + /** * qla2x00_setup_chip() - Load and start RISC firmware. * @ha: HA context @@ -1628,9 +1669,18 @@ enable_82xx_npiv: MIN_MULTI_ID_FABRIC - 1; } qla2x00_get_resource_cnts(vha, NULL, - &ha->fw_xcb_count, NULL, NULL, + &ha->fw_xcb_count, NULL, &ha->fw_iocb_count, &ha->max_npiv_vports, NULL); + /* + * Allocate the array of outstanding commands + * now that we know the firmware resources. + */ + rval = qla2x00_alloc_outstanding_cmds(ha, + vha->req); + if (rval != QLA_SUCCESS) + goto failed; + if (!fw_major_version && ql2xallocfwdump && !IS_QLA82XX(ha)) qla2x00_alloc_fw_dump(vha); @@ -1914,7 +1964,7 @@ qla24xx_config_rings(struct scsi_qla_host *vha) WRT_REG_DWORD(®->isp24.rsp_q_in, 0); WRT_REG_DWORD(®->isp24.rsp_q_out, 0); } - qlt_24xx_config_rings(vha, reg); + qlt_24xx_config_rings(vha); /* PCI posting */ RD_REG_DWORD(&ioreg->hccr); @@ -1948,7 +1998,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) req->outstanding_cmds[cnt] = NULL; req->current_outstanding_cmd = 1; @@ -2157,6 +2207,7 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) char connect_type[22]; struct qla_hw_data *ha = vha->hw; unsigned long flags; + scsi_qla_host_t *base_vha = pci_get_drvdata(ha->pdev); /* Get host addresses. */ rval = qla2x00_get_adapter_id(vha, @@ -2170,6 +2221,13 @@ qla2x00_configure_hba(scsi_qla_host_t *vha) } else { ql_log(ql_log_warn, vha, 0x2009, "Unable to get host loop ID.\n"); + if (IS_FWI2_CAPABLE(ha) && (vha == base_vha) && + (rval == QLA_COMMAND_ERROR && loop_id == 0x1b)) { + ql_log(ql_log_warn, vha, 0x1151, + "Doing link init.\n"); + if (qla24xx_link_initialize(vha) == QLA_SUCCESS) + return rval; + } set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); } return (rval); @@ -2690,7 +2748,6 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) fcport->loop_id = FC_NO_LOOP_ID; qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED); fcport->supported_classes = FC_COS_UNSPECIFIED; - fcport->scan_state = QLA_FCPORT_SCAN_NONE; return fcport; } @@ -3103,7 +3160,7 @@ static int qla2x00_configure_fabric(scsi_qla_host_t *vha) { int rval; - fc_port_t *fcport; + fc_port_t *fcport, *fcptemp; uint16_t next_loopid; uint16_t mb[MAILBOX_REGISTER_COUNT]; uint16_t loop_id; @@ -3141,7 +3198,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) 0xfc, mb, BIT_1|BIT_0); if (rval != QLA_SUCCESS) { set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); - break; + return rval; } if (mb[0] != MBS_COMMAND_COMPLETE) { ql_dbg(ql_dbg_disc, vha, 0x2042, @@ -3173,16 +3230,21 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) } } +#define QLA_FCPORT_SCAN 1 +#define QLA_FCPORT_FOUND 2 + + list_for_each_entry(fcport, &vha->vp_fcports, list) { + fcport->scan_state = QLA_FCPORT_SCAN; + } + rval = qla2x00_find_all_fabric_devs(vha, &new_fcports); if (rval != QLA_SUCCESS) break; - /* Add new ports to existing port list */ - list_splice_tail_init(&new_fcports, &vha->vp_fcports); - - /* Starting free loop ID. */ - next_loopid = ha->min_external_loopid; - + /* + * Logout all previous fabric devices marked lost, except + * FCP2 devices. + */ list_for_each_entry(fcport, &vha->vp_fcports, list) { if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; @@ -3190,8 +3252,7 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; - /* Logout lost/gone fabric devices (non-FCP2) */ - if (fcport->scan_state != QLA_FCPORT_SCAN_FOUND && + if (fcport->scan_state == QLA_FCPORT_SCAN && atomic_read(&fcport->state) == FCS_ONLINE) { qla2x00_mark_device_lost(vha, fcport, ql2xplogiabsentdevice, 0); @@ -3204,30 +3265,74 @@ qla2x00_configure_fabric(scsi_qla_host_t *vha) fcport->d_id.b.domain, fcport->d_id.b.area, fcport->d_id.b.al_pa); + fcport->loop_id = FC_NO_LOOP_ID; } - continue; } - fcport->scan_state = QLA_FCPORT_SCAN_NONE; - - /* Login fabric devices that need a login */ - if ((fcport->flags & FCF_LOGIN_NEEDED) != 0 && - atomic_read(&vha->loop_down_timer) == 0) { - if (fcport->loop_id == FC_NO_LOOP_ID) { - fcport->loop_id = next_loopid; - rval = qla2x00_find_new_loop_id( - base_vha, fcport); - if (rval != QLA_SUCCESS) { - /* Ran out of IDs to use */ - continue; - } + } + + /* Starting free loop ID. */ + next_loopid = ha->min_external_loopid; + + /* + * Scan through our port list and login entries that need to be + * logged in. + */ + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (atomic_read(&vha->loop_down_timer) || + test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) + break; + + if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 || + (fcport->flags & FCF_LOGIN_NEEDED) == 0) + continue; + + if (fcport->loop_id == FC_NO_LOOP_ID) { + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id( + base_vha, fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; } } + /* Login and update database */ + qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + } + + /* Exit if out of loop IDs. */ + if (rval != QLA_SUCCESS) { + break; + } + + /* + * Login and add the new devices to our port list. + */ + list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) { + if (atomic_read(&vha->loop_down_timer) || + test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) + break; + + /* Find a new loop ID to use. */ + fcport->loop_id = next_loopid; + rval = qla2x00_find_new_loop_id(base_vha, fcport); + if (rval != QLA_SUCCESS) { + /* Ran out of IDs to use */ + break; + } /* Login and update database */ qla2x00_fabric_dev_login(vha, fcport, &next_loopid); + + list_move_tail(&fcport->list, &vha->vp_fcports); } } while (0); + /* Free all new device structures not processed. */ + list_for_each_entry_safe(fcport, fcptemp, &new_fcports, list) { + list_del(&fcport->list); + kfree(fcport); + } + if (rval) { ql_dbg(ql_dbg_disc, vha, 0x2068, "Configure fabric error exit rval=%d.\n", rval); @@ -3263,8 +3368,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, int first_dev, last_dev; port_id_t wrap = {}, nxt_d_id; struct qla_hw_data *ha = vha->hw; - struct scsi_qla_host *vp, *base_vha = pci_get_drvdata(ha->pdev); - struct scsi_qla_host *tvp; + struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); rval = QLA_SUCCESS; @@ -3377,22 +3481,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, continue; /* Bypass virtual ports of the same host. */ - found = 0; - if (ha->num_vhosts) { - unsigned long flags; - - spin_lock_irqsave(&ha->vport_slock, flags); - list_for_each_entry_safe(vp, tvp, &ha->vp_list, list) { - if (new_fcport->d_id.b24 == vp->d_id.b24) { - found = 1; - break; - } - } - spin_unlock_irqrestore(&ha->vport_slock, flags); - - if (found) - continue; - } + if (qla2x00_is_a_vp_did(vha, new_fcport->d_id.b24)) + continue; /* Bypass if same domain and area of adapter. */ if (((new_fcport->d_id.b24 & 0xffff00) == @@ -3417,7 +3507,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha, WWN_SIZE)) continue; - fcport->scan_state = QLA_FCPORT_SCAN_FOUND; + fcport->scan_state = QLA_FCPORT_FOUND; found++; @@ -5004,7 +5094,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr, return rval; } -#define QLA_FW_URL "ftp://ftp.qlogic.com/outgoing/linux/firmware/" +#define QLA_FW_URL "http://ldriver.qlogic.com/firmware/" int qla2x00_load_risc(scsi_qla_host_t *vha, uint32_t *srisc_addr) @@ -5529,6 +5619,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) if (IS_T10_PI_CAPABLE(ha)) nv->frame_payload_size &= ~7; + qlt_81xx_config_nvram_stage1(vha, nv); + /* Reset Initialization control block */ memset(icb, 0, ha->init_cb_size); @@ -5569,6 +5661,8 @@ qla81xx_nvram_config(scsi_qla_host_t *vha) qla2x00_set_model_info(vha, nv->model_name, sizeof(nv->model_name), "QLE8XXX"); + qlt_81xx_config_nvram_stage2(vha, icb); + /* Use alternate WWN? */ if (nv->host_p & __constant_cpu_to_le32(BIT_15)) { memcpy(icb->node_name, nv->alternate_node_name, WWN_SIZE); diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index c0462c0..68e2c4a 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -198,6 +198,13 @@ done: } static inline void +qla2x00_rel_sp(scsi_qla_host_t *vha, srb_t *sp) +{ + mempool_free(sp, vha->hw->srb_mempool); + QLA_VHA_MARK_NOT_BUSY(vha); +} + +static inline void qla2x00_init_timer(srb_t *sp, unsigned long tmo) { init_timer(&sp->u.iocb_cmd.timer); @@ -213,3 +220,22 @@ qla2x00_gid_list_size(struct qla_hw_data *ha) { return sizeof(struct gid_list_info) * ha->max_fibre_devices; } + +static inline void +qla2x00_do_host_ramp_up(scsi_qla_host_t *vha) +{ + if (vha->hw->cfg_lun_q_depth >= ql2xmaxqdepth) + return; + + /* Wait at least HOST_QUEUE_RAMPDOWN_INTERVAL before ramping up */ + if (time_before(jiffies, (vha->hw->host_last_rampdown_time + + HOST_QUEUE_RAMPDOWN_INTERVAL))) + return; + + /* Wait at least HOST_QUEUE_RAMPUP_INTERVAL between each ramp up */ + if (time_before(jiffies, (vha->hw->host_last_rampup_time + + HOST_QUEUE_RAMPUP_INTERVAL))) + return; + + set_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags); +} diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index a481684..d263031 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -349,14 +349,14 @@ qla2x00_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ @@ -1467,16 +1467,15 @@ qla24xx_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) goto queuing_error; - } /* Map the sg table so we have an accurate count of sg entries needed */ if (scsi_sg_count(cmd)) { @@ -1641,15 +1640,15 @@ qla24xx_dif_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Compute number of required data segments */ @@ -1822,14 +1821,14 @@ qla2x00_alloc_iocbs(scsi_qla_host_t *vha, srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x700b, "No room on outstanding cmd array.\n"); goto queuing_error; @@ -2263,14 +2262,14 @@ qla82xx_start_scsi(srb_t *sp) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) + if (index == req->num_outstanding_cmds) goto queuing_error; /* Map the sg table so we have an accurate count of sg entries needed */ @@ -2767,15 +2766,15 @@ qla2x00_start_bidir(srb_t *sp, struct scsi_qla_host *vha, uint32_t tot_dsds) /* Check for room in outstanding command list. */ handle = req->current_outstanding_cmd; - for (index = 1; index < MAX_OUTSTANDING_COMMANDS; index++) { + for (index = 1; index < req->num_outstanding_cmds; index++) { handle++; - if (handle == MAX_OUTSTANDING_COMMANDS) + if (handle == req->num_outstanding_cmds) handle = 1; if (!req->outstanding_cmds[handle]) break; } - if (index == MAX_OUTSTANDING_COMMANDS) { + if (index == req->num_outstanding_cmds) { rval = EXT_STATUS_BUSY; goto queuing_error; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 873c820..e9dbd74 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -13,6 +13,8 @@ #include <scsi/scsi_bsg_fc.h> #include <scsi/scsi_eh.h> +#include "qla_target.h" + static void qla2x00_mbx_completion(scsi_qla_host_t *, uint16_t); static void qla2x00_process_completed_request(struct scsi_qla_host *, struct req_que *, uint32_t); @@ -489,10 +491,37 @@ qla83xx_handle_8200_aen(scsi_qla_host_t *vha, uint16_t *mb) if (mb[1] & IDC_DEVICE_STATE_CHANGE) { ql_log(ql_log_info, vha, 0x506a, "IDC Device-State changed = 0x%x.\n", mb[4]); + if (ha->flags.nic_core_reset_owner) + return; qla83xx_schedule_work(vha, MBA_IDC_AEN); } } +int +qla2x00_is_a_vp_did(scsi_qla_host_t *vha, uint32_t rscn_entry) +{ + struct qla_hw_data *ha = vha->hw; + scsi_qla_host_t *vp; + uint32_t vp_did; + unsigned long flags; + int ret = 0; + + if (!ha->num_vhosts) + return ret; + + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + vp_did = vp->d_id.b24; + if (vp_did == rscn_entry) { + ret = 1; + break; + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return ret; +} + /** * qla2x00_async_event() - Process aynchronous events. * @ha: SCSI driver HA context @@ -899,6 +928,10 @@ skip_rio: /* Ignore reserved bits from RSCN-payload. */ rscn_entry = ((mb[1] & 0x3ff) << 16) | mb[2]; + /* Skip RSCNs for virtual ports on the same physical port */ + if (qla2x00_is_a_vp_did(vha, rscn_entry)) + break; + atomic_set(&vha->loop_down_timer, 0); vha->flags.management_server_logged_in = 0; @@ -983,14 +1016,25 @@ skip_rio: mb[1], mb[2], mb[3]); break; case MBA_IDC_NOTIFY: - /* See if we need to quiesce any I/O */ - if (IS_QLA8031(vha->hw)) - if ((mb[2] & 0x7fff) == MBC_PORT_RESET || - (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) { + if (IS_QLA8031(vha->hw)) { + mb[4] = RD_REG_WORD(®24->mailbox4); + if (((mb[2] & 0x7fff) == MBC_PORT_RESET || + (mb[2] & 0x7fff) == MBC_SET_PORT_CONFIG) && + (mb[4] & INTERNAL_LOOPBACK_MASK) != 0) { set_bit(ISP_QUIESCE_NEEDED, &vha->dpc_flags); + /* + * Extend loop down timer since port is active. + */ + if (atomic_read(&vha->loop_state) == LOOP_DOWN) + atomic_set(&vha->loop_down_timer, + LOOP_DOWN_TIME); qla2xxx_wake_dpc(vha); } + } case MBA_IDC_COMPLETE: + if (ha->notify_lb_portup_comp) + complete(&ha->lb_portup_comp); + /* Fallthru */ case MBA_IDC_TIME_EXT: if (IS_QLA81XX(vha->hw) || IS_QLA8031(vha->hw)) qla81xx_idc_event(vha, mb[0], mb[1]); @@ -1029,7 +1073,7 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha, struct qla_hw_data *ha = vha->hw; /* Validate handle. */ - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x3014, "Invalid SCSI command index (%x).\n", index); @@ -1067,7 +1111,7 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, uint16_t index; index = LSW(pkt->handle); - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x5031, "Invalid command index (%x).\n", index); if (IS_QLA82XX(ha)) @@ -1740,7 +1784,7 @@ qla25xx_process_bidir_status_iocb(scsi_qla_host_t *vha, void *pkt, sts24 = (struct sts_entry_24xx *) pkt; /* Validate handle. */ - if (index >= MAX_OUTSTANDING_COMMANDS) { + if (index >= req->num_outstanding_cmds) { ql_log(ql_log_warn, vha, 0x70af, "Invalid SCSI completion handle 0x%x.\n", index); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); @@ -1910,9 +1954,9 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) req = ha->req_q_map[que]; /* Validate handle. */ - if (handle < MAX_OUTSTANDING_COMMANDS) { + if (handle < req->num_outstanding_cmds) sp = req->outstanding_cmds[handle]; - } else + else sp = NULL; if (sp == NULL) { @@ -1934,6 +1978,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) /* Fast path completion. */ if (comp_status == CS_COMPLETE && scsi_status == 0) { + qla2x00_do_host_ramp_up(vha); qla2x00_process_completed_request(vha, req, handle); return; @@ -2193,6 +2238,9 @@ out: cp->cmnd[8], cp->cmnd[9], scsi_bufflen(cp), rsp_info_len, resid_len, fw_resid_len); + if (!res) + qla2x00_do_host_ramp_up(vha); + if (rsp->status_srb == NULL) sp->done(ha, sp, res); } @@ -2747,6 +2795,12 @@ static struct qla_init_msix_entry qla82xx_msix_entries[2] = { { "qla2xxx (rsp_q)", qla82xx_msix_rsp_q }, }; +static struct qla_init_msix_entry qla83xx_msix_entries[3] = { + { "qla2xxx (default)", qla24xx_msix_default }, + { "qla2xxx (rsp_q)", qla24xx_msix_rsp_q }, + { "qla2xxx (atio_q)", qla83xx_msix_atio_q }, +}; + static void qla24xx_disable_msix(struct qla_hw_data *ha) { @@ -2827,9 +2881,13 @@ msix_failed: } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < 2; i++) { + for (i = 0; i < ha->msix_count; i++) { qentry = &ha->msix_entries[i]; - if (IS_QLA82XX(ha)) { + if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) { + ret = request_irq(qentry->vector, + qla83xx_msix_entries[i].handler, + 0, qla83xx_msix_entries[i].name, rsp); + } else if (IS_QLA82XX(ha)) { ret = request_irq(qentry->vector, qla82xx_msix_entries[i].handler, 0, qla82xx_msix_entries[i].name, rsp); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 68c55ea..186dd59 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -900,13 +900,13 @@ qla2x00_abort_command(srb_t *sp) "Entered %s.\n", __func__); spin_lock_irqsave(&ha->hardware_lock, flags); - for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) { + for (handle = 1; handle < req->num_outstanding_cmds; handle++) { if (req->outstanding_cmds[handle] == sp) break; } spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (handle == MAX_OUTSTANDING_COMMANDS) { + if (handle == req->num_outstanding_cmds) { /* command not found */ return QLA_FUNCTION_FAILED; } @@ -1633,6 +1633,54 @@ qla2x00_get_port_name(scsi_qla_host_t *vha, uint16_t loop_id, uint8_t *name, } /* + * qla24xx_link_initialization + * Issue link initialization mailbox command. + * + * Input: + * ha = adapter block pointer. + * TARGET_QUEUE_LOCK must be released. + * ADAPTER_STATE_LOCK must be released. + * + * Returns: + * qla2x00 local function return status code. + * + * Context: + * Kernel context. + */ +int +qla24xx_link_initialize(scsi_qla_host_t *vha) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1152, + "Entered %s.\n", __func__); + + if (!IS_FWI2_CAPABLE(vha->hw) || IS_CNA_CAPABLE(vha->hw)) + return QLA_FUNCTION_FAILED; + + mcp->mb[0] = MBC_LINK_INITIALIZATION; + mcp->mb[1] = BIT_6|BIT_4; + mcp->mb[2] = 0; + mcp->mb[3] = 0; + mcp->out_mb = MBX_3|MBX_2|MBX_1|MBX_0; + mcp->in_mb = MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x1153, "Failed=%x.\n", rval); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1154, + "Done %s.\n", __func__); + } + + return rval; +} + +/* * qla2x00_lip_reset * Issue LIP reset mailbox command. * @@ -2535,12 +2583,12 @@ qla24xx_abort_command(srb_t *sp) "Entered %s.\n", __func__); spin_lock_irqsave(&ha->hardware_lock, flags); - for (handle = 1; handle < MAX_OUTSTANDING_COMMANDS; handle++) { + for (handle = 1; handle < req->num_outstanding_cmds; handle++) { if (req->outstanding_cmds[handle] == sp) break; } spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (handle == MAX_OUTSTANDING_COMMANDS) { + if (handle == req->num_outstanding_cmds) { /* Command not found. */ return QLA_FUNCTION_FAILED; } @@ -3093,6 +3141,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, struct qla_hw_data *ha = vha->hw; scsi_qla_host_t *vp; unsigned long flags; + int found; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10b6, "Entered %s.\n", __func__); @@ -3128,13 +3177,17 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha, return; } + found = 0; spin_lock_irqsave(&ha->vport_slock, flags); - list_for_each_entry(vp, &ha->vp_list, list) - if (vp_idx == vp->vp_idx) + list_for_each_entry(vp, &ha->vp_list, list) { + if (vp_idx == vp->vp_idx) { + found = 1; break; + } + } spin_unlock_irqrestore(&ha->vport_slock, flags); - if (!vp) + if (!found) return; vp->d_id.b.domain = rptid_entry->port_id[2]; @@ -3814,6 +3867,97 @@ qla81xx_restart_mpi_firmware(scsi_qla_host_t *vha) } int +qla2x00_set_driver_version(scsi_qla_host_t *vha, char *version) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + int len; + uint16_t dwlen; + uint8_t *str; + dma_addr_t str_dma; + struct qla_hw_data *ha = vha->hw; + + if (!IS_FWI2_CAPABLE(ha) || IS_QLA82XX(ha)) + return QLA_FUNCTION_FAILED; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1155, + "Entered %s.\n", __func__); + + str = dma_pool_alloc(ha->s_dma_pool, GFP_KERNEL, &str_dma); + if (!str) { + ql_log(ql_log_warn, vha, 0x1156, + "Failed to allocate driver version param.\n"); + return QLA_MEMORY_ALLOC_FAILED; + } + + memcpy(str, "\x7\x3\x11\x0", 4); + dwlen = str[0]; + len = dwlen * sizeof(uint32_t) - 4; + memset(str + 4, 0, len); + if (len > strlen(version)) + len = strlen(version); + memcpy(str + 4, version, len); + + mcp->mb[0] = MBC_SET_RNID_PARAMS; + mcp->mb[1] = RNID_TYPE_SET_VERSION << 8 | dwlen; + mcp->mb[2] = MSW(LSD(str_dma)); + mcp->mb[3] = LSW(LSD(str_dma)); + mcp->mb[6] = MSW(MSD(str_dma)); + mcp->mb[7] = LSW(MSD(str_dma)); + mcp->out_mb = MBX_7|MBX_6|MBX_3|MBX_2|MBX_1|MBX_0; + mcp->in_mb = MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x1157, + "Failed=%x mb[0]=%x.\n", rval, mcp->mb[0]); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1158, + "Done %s.\n", __func__); + } + + dma_pool_free(ha->s_dma_pool, str, str_dma); + + return rval; +} + +static int +qla2x00_read_asic_temperature(scsi_qla_host_t *vha, uint16_t *temp) +{ + int rval; + mbx_cmd_t mc; + mbx_cmd_t *mcp = &mc; + + if (!IS_FWI2_CAPABLE(vha->hw)) + return QLA_FUNCTION_FAILED; + + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1159, + "Entered %s.\n", __func__); + + mcp->mb[0] = MBC_GET_RNID_PARAMS; + mcp->mb[1] = RNID_TYPE_ASIC_TEMP << 8; + mcp->out_mb = MBX_1|MBX_0; + mcp->in_mb = MBX_1|MBX_0; + mcp->tov = MBX_TOV_SECONDS; + mcp->flags = 0; + rval = qla2x00_mailbox_command(vha, mcp); + *temp = mcp->mb[1]; + + if (rval != QLA_SUCCESS) { + ql_dbg(ql_dbg_mbx, vha, 0x115a, + "Failed=%x mb[0]=%x,%x.\n", rval, mcp->mb[0], mcp->mb[1]); + } else { + ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x115b, + "Done %s.\n", __func__); + } + + return rval; +} + +int qla2x00_read_sfp(scsi_qla_host_t *vha, dma_addr_t sfp_dma, uint8_t *sfp, uint16_t dev, uint16_t off, uint16_t len, uint16_t opt) { @@ -4415,38 +4559,45 @@ qla24xx_set_fcp_prio(scsi_qla_host_t *vha, uint16_t loop_id, uint16_t priority, } int -qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp, uint16_t *frac) +qla2x00_get_thermal_temp(scsi_qla_host_t *vha, uint16_t *temp) { - int rval; - uint8_t byte; + int rval = QLA_FUNCTION_FAILED; struct qla_hw_data *ha = vha->hw; + uint8_t byte; ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x10ca, "Entered %s.\n", __func__); - /* Integer part */ - rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x01, 1, - BIT_13|BIT_12|BIT_0); - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x10c9, "Failed=%x.\n", rval); - ha->flags.thermal_supported = 0; - goto fail; + if (ha->thermal_support & THERMAL_SUPPORT_I2C) { + rval = qla2x00_read_sfp(vha, 0, &byte, + 0x98, 0x1, 1, BIT_13|BIT_12|BIT_0); + *temp = byte; + if (rval == QLA_SUCCESS) + goto done; + + ql_log(ql_log_warn, vha, 0x10c9, + "Thermal not supported by I2C.\n"); + ha->thermal_support &= ~THERMAL_SUPPORT_I2C; } - *temp = byte; - /* Fraction part */ - rval = qla2x00_read_sfp(vha, 0, &byte, 0x98, 0x10, 1, - BIT_13|BIT_12|BIT_0); - if (rval != QLA_SUCCESS) { - ql_dbg(ql_dbg_mbx, vha, 0x1019, "Failed=%x.\n", rval); - ha->flags.thermal_supported = 0; - goto fail; + if (ha->thermal_support & THERMAL_SUPPORT_ISP) { + rval = qla2x00_read_asic_temperature(vha, temp); + if (rval == QLA_SUCCESS) + goto done; + + ql_log(ql_log_warn, vha, 0x1019, + "Thermal not supported by ISP.\n"); + ha->thermal_support &= ~THERMAL_SUPPORT_ISP; } - *frac = (byte >> 6) * 25; + ql_log(ql_log_warn, vha, 0x1150, + "Thermal not supported by this card " + "(ignoring further requests).\n"); + return rval; + +done: ql_dbg(ql_dbg_mbx + ql_dbg_verbose, vha, 0x1018, "Done %s.\n", __func__); -fail: return rval; } diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 20fd974..f868a9f 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -523,6 +523,7 @@ qla25xx_free_req_que(struct scsi_qla_host *vha, struct req_que *req) clear_bit(que_id, ha->req_qid_map); mutex_unlock(&ha->vport_lock); } + kfree(req->outstanding_cmds); kfree(req); req = NULL; } @@ -649,6 +650,10 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, goto que_failed; } + ret = qla2x00_alloc_outstanding_cmds(ha, req); + if (ret != QLA_SUCCESS) + goto que_failed; + mutex_lock(&ha->vport_lock); que_id = find_first_zero_bit(ha->req_qid_map, ha->max_req_queues); if (que_id >= ha->max_req_queues) { @@ -685,7 +690,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, "options=0x%x.\n", req->options); ql_dbg(ql_dbg_init, base_vha, 0x00dd, "options=0x%x.\n", req->options); - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) req->outstanding_cmds[cnt] = NULL; req->current_outstanding_cmd = 1; diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 3e3f593..10754f51 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -847,14 +847,21 @@ static int qla82xx_rom_lock(struct qla_hw_data *ha) { int done = 0, timeout = 0; + uint32_t lock_owner = 0; + scsi_qla_host_t *vha = pci_get_drvdata(ha->pdev); while (!done) { /* acquire semaphore2 from PCI HW block */ done = qla82xx_rd_32(ha, QLA82XX_PCIE_REG(PCIE_SEM2_LOCK)); if (done == 1) break; - if (timeout >= qla82xx_rom_lock_timeout) + if (timeout >= qla82xx_rom_lock_timeout) { + lock_owner = qla82xx_rd_32(ha, QLA82XX_ROM_LOCK_ID); + ql_dbg(ql_dbg_p3p, vha, 0xb085, + "Failed to acquire rom lock, acquired by %d.\n", + lock_owner); return -1; + } timeout++; } qla82xx_wr_32(ha, QLA82XX_ROM_LOCK_ID, ROM_LOCK_DRIVER); @@ -3629,7 +3636,7 @@ qla82xx_chip_reset_cleanup(scsi_qla_host_t *vha) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { if (!sp->u.scmd.ctx || diff --git a/drivers/scsi/qla2xxx/qla_nx.h b/drivers/scsi/qla2xxx/qla_nx.h index 6c953e8..d268e84 100644 --- a/drivers/scsi/qla2xxx/qla_nx.h +++ b/drivers/scsi/qla2xxx/qla_nx.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -897,7 +897,7 @@ struct ct6_dsd { #define FLT_REG_BOOT_CODE_82XX 0x78 #define FLT_REG_FW_82XX 0x74 #define FLT_REG_GOLD_FW_82XX 0x75 -#define FLT_REG_VPD_82XX 0x81 +#define FLT_REG_VPD_8XXX 0x81 #define FA_VPD_SIZE_82XX 0x400 diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 10d23f8..2c6dd3d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -111,8 +111,7 @@ MODULE_PARM_DESC(ql2xfdmienable, "Enables FDMI registrations. " "0 - no FDMI. Default is 1 - perform FDMI."); -#define MAX_Q_DEPTH 32 -static int ql2xmaxqdepth = MAX_Q_DEPTH; +int ql2xmaxqdepth = MAX_Q_DEPTH; module_param(ql2xmaxqdepth, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(ql2xmaxqdepth, "Maximum queue depth to set for each LUN. " @@ -360,6 +359,9 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req) (req->length + 1) * sizeof(request_t), req->ring, req->dma); + if (req) + kfree(req->outstanding_cmds); + kfree(req); req = NULL; } @@ -628,7 +630,7 @@ qla2x00_sp_free_dma(void *vha, void *ptr) } CMD_SP(cmd) = NULL; - mempool_free(sp, ha->srb_mempool); + qla2x00_rel_sp(sp->fcport->vha, sp); } static void @@ -716,9 +718,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) goto qc24_target_busy; } - sp = qla2x00_get_sp(base_vha, fcport, GFP_ATOMIC); - if (!sp) + sp = qla2x00_get_sp(vha, fcport, GFP_ATOMIC); + if (!sp) { + set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags); goto qc24_host_busy; + } sp->u.scmd.cmd = cmd; sp->type = SRB_SCSI_CMD; @@ -731,6 +735,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) if (rval != QLA_SUCCESS) { ql_dbg(ql_dbg_io + ql_dbg_verbose, vha, 0x3013, "Start scsi failed rval=%d for cmd=%p.\n", rval, cmd); + set_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags); goto qc24_host_busy_free_sp; } @@ -1010,7 +1015,7 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, spin_lock_irqsave(&ha->hardware_lock, flags); req = vha->req; for (cnt = 1; status == QLA_SUCCESS && - cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (!sp) continue; @@ -1300,14 +1305,14 @@ qla2x00_loop_reset(scsi_qla_host_t *vha) } if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) { + atomic_set(&vha->loop_state, LOOP_DOWN); + atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + qla2x00_mark_all_devices_lost(vha, 0); ret = qla2x00_full_login_lip(vha); if (ret != QLA_SUCCESS) { ql_dbg(ql_dbg_taskm, vha, 0x802d, "full_login_lip=%d.\n", ret); } - atomic_set(&vha->loop_state, LOOP_DOWN); - atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); - qla2x00_mark_all_devices_lost(vha, 0); } if (ha->flags.enable_lip_reset) { @@ -1337,7 +1342,9 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) req = ha->req_q_map[que]; if (!req) continue; - for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { + if (!req->outstanding_cmds) + continue; + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; if (sp) { req->outstanding_cmds[cnt] = NULL; @@ -1453,6 +1460,81 @@ qla2x00_change_queue_type(struct scsi_device *sdev, int tag_type) return tag_type; } +static void +qla2x00_host_ramp_down_queuedepth(scsi_qla_host_t *vha) +{ + scsi_qla_host_t *vp; + struct Scsi_Host *shost; + struct scsi_device *sdev; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + + ha->host_last_rampdown_time = jiffies; + + if (ha->cfg_lun_q_depth <= vha->host->cmd_per_lun) + return; + + if ((ha->cfg_lun_q_depth / 2) < vha->host->cmd_per_lun) + ha->cfg_lun_q_depth = vha->host->cmd_per_lun; + else + ha->cfg_lun_q_depth = ha->cfg_lun_q_depth / 2; + + /* + * Geometrically ramp down the queue depth for all devices on this + * adapter + */ + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + shost = vp->host; + shost_for_each_device(sdev, shost) { + if (sdev->queue_depth > shost->cmd_per_lun) { + if (sdev->queue_depth < ha->cfg_lun_q_depth) + continue; + ql_log(ql_log_warn, vp, 0x3031, + "%ld:%d:%d: Ramping down queue depth to %d", + vp->host_no, sdev->id, sdev->lun, + ha->cfg_lun_q_depth); + qla2x00_change_queue_depth(sdev, + ha->cfg_lun_q_depth, SCSI_QDEPTH_DEFAULT); + } + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return; +} + +static void +qla2x00_host_ramp_up_queuedepth(scsi_qla_host_t *vha) +{ + scsi_qla_host_t *vp; + struct Scsi_Host *shost; + struct scsi_device *sdev; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + + ha->host_last_rampup_time = jiffies; + ha->cfg_lun_q_depth++; + + /* + * Linearly ramp up the queue depth for all devices on this + * adapter + */ + spin_lock_irqsave(&ha->vport_slock, flags); + list_for_each_entry(vp, &ha->vp_list, list) { + shost = vp->host; + shost_for_each_device(sdev, shost) { + if (sdev->queue_depth > ha->cfg_lun_q_depth) + continue; + qla2x00_change_queue_depth(sdev, ha->cfg_lun_q_depth, + SCSI_QDEPTH_RAMP_UP); + } + } + spin_unlock_irqrestore(&ha->vport_slock, flags); + + return; +} + /** * qla2x00_config_dma_addressing() - Configure OS DMA addressing method. * @ha: HA context @@ -1730,6 +1812,9 @@ qla83xx_iospace_config(struct qla_hw_data *ha) mqiobase_exit: ha->msix_count = ha->max_rsp_queues + 1; + + qlt_83xx_iospace_config(ha); + ql_dbg_pci(ql_dbg_init, ha->pdev, 0x011f, "MSIX Count:%d.\n", ha->msix_count); return 0; @@ -2230,6 +2315,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->init_cb_size = sizeof(init_cb_t); ha->link_data_rate = PORT_SPEED_UNKNOWN; ha->optrom_size = OPTROM_SIZE_2300; + ha->cfg_lun_q_depth = ql2xmaxqdepth; /* Assign ISP specific operations. */ if (IS_QLA2100(ha)) { @@ -2307,6 +2393,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mbx_count = MAILBOX_REGISTER_COUNT; req_length = REQUEST_ENTRY_CNT_24XX; rsp_length = RESPONSE_ENTRY_CNT_2300; + ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX; ha->max_loop_id = SNS_LAST_LOOP_ID_2300; ha->init_cb_size = sizeof(struct mid_init_cb_81xx); ha->gid_list_info_size = 8; @@ -2338,6 +2425,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) ha->mbx_count = MAILBOX_REGISTER_COUNT; req_length = REQUEST_ENTRY_CNT_24XX; rsp_length = RESPONSE_ENTRY_CNT_2300; + ha->tgt.atio_q_length = ATIO_ENTRY_CNT_24XX; ha->max_loop_id = SNS_LAST_LOOP_ID_2300; ha->init_cb_size = sizeof(struct mid_init_cb_81xx); ha->gid_list_info_size = 8; @@ -2377,6 +2465,7 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) complete(&ha->mbx_cmd_comp); init_completion(&ha->mbx_intr_comp); init_completion(&ha->dcbx_comp); + init_completion(&ha->lb_portup_comp); set_bit(0, (unsigned long *) ha->vp_idx_map); @@ -2720,6 +2809,9 @@ qla2x00_shutdown(struct pci_dev *pdev) scsi_qla_host_t *vha; struct qla_hw_data *ha; + if (!atomic_read(&pdev->enable_cnt)) + return; + vha = pci_get_drvdata(pdev); ha = vha->hw; @@ -3974,6 +4066,8 @@ qla83xx_force_lock_recovery(scsi_qla_host_t *base_vha) uint32_t idc_lck_rcvry_stage_mask = 0x3; uint32_t idc_lck_rcvry_owner_mask = 0x3c; struct qla_hw_data *ha = base_vha->hw; + ql_dbg(ql_dbg_p3p, base_vha, 0xb086, + "Trying force recovery of the IDC lock.\n"); rval = qla83xx_rd_reg(base_vha, QLA83XX_IDC_LOCK_RECOVERY, &data); if (rval) @@ -4065,6 +4159,7 @@ qla83xx_idc_lock(scsi_qla_host_t *base_vha, uint16_t requester_id) { uint16_t options = (requester_id << 15) | BIT_6; uint32_t data; + uint32_t lock_owner; struct qla_hw_data *ha = base_vha->hw; /* IDC-lock implementation using driver-lock/lock-id remote registers */ @@ -4076,8 +4171,11 @@ retry_lock: qla83xx_wr_reg(base_vha, QLA83XX_DRIVER_LOCKID, ha->portnum); } else { + qla83xx_rd_reg(base_vha, QLA83XX_DRIVER_LOCKID, + &lock_owner); ql_dbg(ql_dbg_p3p, base_vha, 0xb063, - "Failed to acquire IDC lock. retrying...\n"); + "Failed to acquire IDC lock, acquired by %d, " + "retrying...\n", lock_owner); /* Retry/Perform IDC-Lock recovery */ if (qla83xx_idc_lock_recovery(base_vha) @@ -4605,6 +4703,18 @@ qla2x00_do_dpc(void *data) qla2xxx_flash_npiv_conf(base_vha); } + if (test_and_clear_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, + &base_vha->dpc_flags)) { + /* Prevents simultaneous ramp up and down */ + clear_bit(HOST_RAMP_UP_QUEUE_DEPTH, + &base_vha->dpc_flags); + qla2x00_host_ramp_down_queuedepth(base_vha); + } + + if (test_and_clear_bit(HOST_RAMP_UP_QUEUE_DEPTH, + &base_vha->dpc_flags)) + qla2x00_host_ramp_up_queuedepth(base_vha); + if (!ha->interrupts_on) ha->isp_ops->enable_intrs(ha); @@ -4733,7 +4843,7 @@ qla2x00_timer(scsi_qla_host_t *vha) cpu_flags); req = ha->req_q_map[0]; for (index = 1; - index < MAX_OUTSTANDING_COMMANDS; + index < req->num_outstanding_cmds; index++) { fc_port_t *sfcp; @@ -4802,7 +4912,9 @@ qla2x00_timer(scsi_qla_host_t *vha) test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags) || test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags) || test_bit(VP_DPC_NEEDED, &vha->dpc_flags) || - test_bit(RELOGIN_NEEDED, &vha->dpc_flags))) { + test_bit(RELOGIN_NEEDED, &vha->dpc_flags) || + test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags) || + test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags))) { ql_dbg(ql_dbg_timer, vha, 0x600b, "isp_abort_needed=%d loop_resync_needed=%d " "fcport_update_needed=%d start_dpc=%d " @@ -4815,12 +4927,15 @@ qla2x00_timer(scsi_qla_host_t *vha) ql_dbg(ql_dbg_timer, vha, 0x600c, "beacon_blink_needed=%d isp_unrecoverable=%d " "fcoe_ctx_reset_needed=%d vp_dpc_needed=%d " - "relogin_needed=%d.\n", + "relogin_needed=%d, host_ramp_down_needed=%d " + "host_ramp_up_needed=%d.\n", test_bit(BEACON_BLINK_NEEDED, &vha->dpc_flags), test_bit(ISP_UNRECOVERABLE, &vha->dpc_flags), test_bit(FCOE_CTX_RESET_NEEDED, &vha->dpc_flags), test_bit(VP_DPC_NEEDED, &vha->dpc_flags), - test_bit(RELOGIN_NEEDED, &vha->dpc_flags)); + test_bit(RELOGIN_NEEDED, &vha->dpc_flags), + test_bit(HOST_RAMP_UP_QUEUE_DEPTH, &vha->dpc_flags), + test_bit(HOST_RAMP_DOWN_QUEUE_DEPTH, &vha->dpc_flags)); qla2xxx_wake_dpc(vha); } diff --git a/drivers/scsi/qla2xxx/qla_settings.h b/drivers/scsi/qla2xxx/qla_settings.h index 892a81e..46ef0ac 100644 --- a/drivers/scsi/qla2xxx/qla_settings.h +++ b/drivers/scsi/qla2xxx/qla_settings.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c index 32fdc2a..3bef673 100644 --- a/drivers/scsi/qla2xxx/qla_sup.c +++ b/drivers/scsi/qla2xxx/qla_sup.c @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ @@ -798,20 +798,8 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr) case FLT_REG_BOOTLOAD_82XX: ha->flt_region_bootload = start; break; - case FLT_REG_VPD_82XX: - ha->flt_region_vpd = start; - break; - case FLT_REG_FCOE_VPD_0: - if (!IS_QLA8031(ha)) - break; - ha->flt_region_vpd_nvram = start; - if (ha->flags.port0) - ha->flt_region_vpd = start; - break; - case FLT_REG_FCOE_VPD_1: - if (!IS_QLA8031(ha)) - break; - if (!ha->flags.port0) + case FLT_REG_VPD_8XXX: + if (IS_CNA_CAPABLE(ha)) ha->flt_region_vpd = start; break; case FLT_REG_FCOE_NVRAM_0: diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 80f4b84..61b5d8c 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -52,7 +52,7 @@ MODULE_PARM_DESC(qlini_mode, "\"disabled\" - initiator mode will never be enabled; " "\"enabled\" (default) - initiator mode will always stay enabled."); -static int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE; +int ql2x_ini_mode = QLA2XXX_INI_MODE_EXCLUSIVE; /* * From scsi/fc/fc_fcp.h @@ -1119,6 +1119,7 @@ static void qlt_send_notify_ack(struct scsi_qla_host *vha, nack->u.isp24.srr_rx_id = ntfy->u.isp24.srr_rx_id; nack->u.isp24.status = ntfy->u.isp24.status; nack->u.isp24.status_subcode = ntfy->u.isp24.status_subcode; + nack->u.isp24.fw_handle = ntfy->u.isp24.fw_handle; nack->u.isp24.exchange_address = ntfy->u.isp24.exchange_address; nack->u.isp24.srr_rel_offs = ntfy->u.isp24.srr_rel_offs; nack->u.isp24.srr_ui = ntfy->u.isp24.srr_ui; @@ -1570,7 +1571,7 @@ static inline uint32_t qlt_make_handle(struct scsi_qla_host *vha) /* always increment cmd handle */ do { ++h; - if (h > MAX_OUTSTANDING_COMMANDS) + if (h > DEFAULT_OUTSTANDING_COMMANDS) h = 1; /* 0 is QLA_TGT_NULL_HANDLE */ if (h == ha->tgt.current_handle) { ql_dbg(ql_dbg_tgt, vha, 0xe04e, @@ -2441,7 +2442,7 @@ static struct qla_tgt_cmd *qlt_ctio_to_cmd(struct scsi_qla_host *vha, return NULL; } /* handle-1 is actually used */ - if (unlikely(handle > MAX_OUTSTANDING_COMMANDS)) { + if (unlikely(handle > DEFAULT_OUTSTANDING_COMMANDS)) { ql_dbg(ql_dbg_tgt, vha, 0xe052, "qla_target(%d): Wrong handle %x received\n", vha->vp_idx, handle); @@ -4305,6 +4306,12 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) if (!QLA_TGT_MODE_ENABLED()) return 0; + if (!IS_TGT_MODE_CAPABLE(ha)) { + ql_log(ql_log_warn, base_vha, 0xe070, + "This adapter does not support target mode.\n"); + return 0; + } + ql_dbg(ql_dbg_tgt, base_vha, 0xe03b, "Registering target for host %ld(%p)", base_vha->host_no, ha); @@ -4666,7 +4673,6 @@ void qlt_24xx_process_atio_queue(struct scsi_qla_host *vha) { struct qla_hw_data *ha = vha->hw; - struct device_reg_24xx __iomem *reg = &ha->iobase->isp24; struct atio_from_isp *pkt; int cnt, i; @@ -4694,26 +4700,28 @@ qlt_24xx_process_atio_queue(struct scsi_qla_host *vha) } /* Adjust ring index */ - WRT_REG_DWORD(®->atio_q_out, ha->tgt.atio_ring_index); + WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), ha->tgt.atio_ring_index); } void -qlt_24xx_config_rings(struct scsi_qla_host *vha, device_reg_t __iomem *reg) +qlt_24xx_config_rings(struct scsi_qla_host *vha) { struct qla_hw_data *ha = vha->hw; + if (!QLA_TGT_MODE_ENABLED()) + return; -/* FIXME: atio_q in/out for ha->mqenable=1..? */ - if (ha->mqenable) { -#if 0 - WRT_REG_DWORD(®->isp25mq.atio_q_in, 0); - WRT_REG_DWORD(®->isp25mq.atio_q_out, 0); - RD_REG_DWORD(®->isp25mq.atio_q_out); -#endif - } else { - /* Setup APTIO registers for target mode */ - WRT_REG_DWORD(®->isp24.atio_q_in, 0); - WRT_REG_DWORD(®->isp24.atio_q_out, 0); - RD_REG_DWORD(®->isp24.atio_q_out); + WRT_REG_DWORD(ISP_ATIO_Q_IN(vha), 0); + WRT_REG_DWORD(ISP_ATIO_Q_OUT(vha), 0); + RD_REG_DWORD(ISP_ATIO_Q_OUT(vha)); + + if (IS_ATIO_MSIX_CAPABLE(ha)) { + struct qla_msix_entry *msix = &ha->msix_entries[2]; + struct init_cb_24xx *icb = (struct init_cb_24xx *)ha->init_cb; + + icb->msix_atio = cpu_to_le16(msix->entry); + ql_dbg(ql_dbg_init, vha, 0xf072, + "Registering ICB vector 0x%x for atio que.\n", + msix->entry); } } @@ -4796,6 +4804,101 @@ qlt_24xx_config_nvram_stage2(struct scsi_qla_host *vha, } } +void +qlt_81xx_config_nvram_stage1(struct scsi_qla_host *vha, struct nvram_81xx *nv) +{ + struct qla_hw_data *ha = vha->hw; + + if (!QLA_TGT_MODE_ENABLED()) + return; + + if (qla_tgt_mode_enabled(vha)) { + if (!ha->tgt.saved_set) { + /* We save only once */ + ha->tgt.saved_exchange_count = nv->exchange_count; + ha->tgt.saved_firmware_options_1 = + nv->firmware_options_1; + ha->tgt.saved_firmware_options_2 = + nv->firmware_options_2; + ha->tgt.saved_firmware_options_3 = + nv->firmware_options_3; + ha->tgt.saved_set = 1; + } + + nv->exchange_count = __constant_cpu_to_le16(0xFFFF); + + /* Enable target mode */ + nv->firmware_options_1 |= __constant_cpu_to_le32(BIT_4); + + /* Disable ini mode, if requested */ + if (!qla_ini_mode_enabled(vha)) + nv->firmware_options_1 |= + __constant_cpu_to_le32(BIT_5); + + /* Disable Full Login after LIP */ + nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_13); + /* Enable initial LIP */ + nv->firmware_options_1 &= __constant_cpu_to_le32(~BIT_9); + /* Enable FC tapes support */ + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_12); + /* Disable Full Login after LIP */ + nv->host_p &= __constant_cpu_to_le32(~BIT_10); + /* Enable target PRLI control */ + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_14); + } else { + if (ha->tgt.saved_set) { + nv->exchange_count = ha->tgt.saved_exchange_count; + nv->firmware_options_1 = + ha->tgt.saved_firmware_options_1; + nv->firmware_options_2 = + ha->tgt.saved_firmware_options_2; + nv->firmware_options_3 = + ha->tgt.saved_firmware_options_3; + } + return; + } + + /* out-of-order frames reassembly */ + nv->firmware_options_3 |= BIT_6|BIT_9; + + if (ha->tgt.enable_class_2) { + if (vha->flags.init_done) + fc_host_supported_classes(vha->host) = + FC_COS_CLASS2 | FC_COS_CLASS3; + + nv->firmware_options_2 |= __constant_cpu_to_le32(BIT_8); + } else { + if (vha->flags.init_done) + fc_host_supported_classes(vha->host) = FC_COS_CLASS3; + + nv->firmware_options_2 &= ~__constant_cpu_to_le32(BIT_8); + } +} + +void +qlt_81xx_config_nvram_stage2(struct scsi_qla_host *vha, + struct init_cb_81xx *icb) +{ + struct qla_hw_data *ha = vha->hw; + + if (!QLA_TGT_MODE_ENABLED()) + return; + + if (ha->tgt.node_name_set) { + memcpy(icb->node_name, ha->tgt.tgt_node_name, WWN_SIZE); + icb->firmware_options_1 |= __constant_cpu_to_le32(BIT_14); + } +} + +void +qlt_83xx_iospace_config(struct qla_hw_data *ha) +{ + if (!QLA_TGT_MODE_ENABLED()) + return; + + ha->msix_count += 1; /* For ATIO Q */ +} + int qlt_24xx_process_response_error(struct scsi_qla_host *vha, struct sts_entry_24xx *pkt) @@ -4828,11 +4931,41 @@ qlt_probe_one_stage1(struct scsi_qla_host *base_vha, struct qla_hw_data *ha) if (!QLA_TGT_MODE_ENABLED()) return; + if (ha->mqenable || IS_QLA83XX(ha)) { + ISP_ATIO_Q_IN(base_vha) = &ha->mqiobase->isp25mq.atio_q_in; + ISP_ATIO_Q_OUT(base_vha) = &ha->mqiobase->isp25mq.atio_q_out; + } else { + ISP_ATIO_Q_IN(base_vha) = &ha->iobase->isp24.atio_q_in; + ISP_ATIO_Q_OUT(base_vha) = &ha->iobase->isp24.atio_q_out; + } + mutex_init(&ha->tgt.tgt_mutex); mutex_init(&ha->tgt.tgt_host_action_mutex); qlt_clear_mode(base_vha); } +irqreturn_t +qla83xx_msix_atio_q(int irq, void *dev_id) +{ + struct rsp_que *rsp; + scsi_qla_host_t *vha; + struct qla_hw_data *ha; + unsigned long flags; + + rsp = (struct rsp_que *) dev_id; + ha = rsp->hw; + vha = pci_get_drvdata(ha->pdev); + + spin_lock_irqsave(&ha->hardware_lock, flags); + + qlt_24xx_process_atio_queue(vha); + qla24xx_process_response_queue(vha, rsp); + + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + return IRQ_HANDLED; +} + int qlt_mem_alloc(struct qla_hw_data *ha) { diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index bad7495..ff9ccb9 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -60,8 +60,9 @@ * multi-complete should come to the tgt driver or be handled there by qla2xxx */ #define CTIO_COMPLETION_HANDLE_MARK BIT_29 -#if (CTIO_COMPLETION_HANDLE_MARK <= MAX_OUTSTANDING_COMMANDS) -#error "CTIO_COMPLETION_HANDLE_MARK not larger than MAX_OUTSTANDING_COMMANDS" +#if (CTIO_COMPLETION_HANDLE_MARK <= DEFAULT_OUTSTANDING_COMMANDS) +#error "CTIO_COMPLETION_HANDLE_MARK not larger than " + "DEFAULT_OUTSTANDING_COMMANDS" #endif #define HANDLE_IS_CTIO_COMP(h) (h & CTIO_COMPLETION_HANDLE_MARK) @@ -161,7 +162,7 @@ struct imm_ntfy_from_isp { uint16_t srr_rx_id; uint16_t status; uint8_t status_subcode; - uint8_t reserved_3; + uint8_t fw_handle; uint32_t exchange_address; uint32_t srr_rel_offs; uint16_t srr_ui; @@ -217,7 +218,7 @@ struct nack_to_isp { uint16_t srr_rx_id; uint16_t status; uint8_t status_subcode; - uint8_t reserved_3; + uint8_t fw_handle; uint32_t exchange_address; uint32_t srr_rel_offs; uint16_t srr_ui; @@ -948,6 +949,7 @@ extern void qlt_update_vp_map(struct scsi_qla_host *, int); * is not set. Right now, ha value is ignored. */ #define QLA_TGT_MODE_ENABLED() (ql2x_ini_mode != QLA2XXX_INI_MODE_ENABLED) +extern int ql2x_ini_mode; static inline bool qla_tgt_mode_enabled(struct scsi_qla_host *ha) { @@ -985,12 +987,15 @@ extern void qlt_vport_create(struct scsi_qla_host *, struct qla_hw_data *); extern void qlt_rff_id(struct scsi_qla_host *, struct ct_sns_req *); extern void qlt_init_atio_q_entries(struct scsi_qla_host *); extern void qlt_24xx_process_atio_queue(struct scsi_qla_host *); -extern void qlt_24xx_config_rings(struct scsi_qla_host *, - device_reg_t __iomem *); +extern void qlt_24xx_config_rings(struct scsi_qla_host *); extern void qlt_24xx_config_nvram_stage1(struct scsi_qla_host *, struct nvram_24xx *); extern void qlt_24xx_config_nvram_stage2(struct scsi_qla_host *, struct init_cb_24xx *); +extern void qlt_81xx_config_nvram_stage2(struct scsi_qla_host *, + struct init_cb_81xx *); +extern void qlt_81xx_config_nvram_stage1(struct scsi_qla_host *, + struct nvram_81xx *); extern int qlt_24xx_process_response_error(struct scsi_qla_host *, struct sts_entry_24xx *); extern void qlt_modify_vp_config(struct scsi_qla_host *, @@ -1000,5 +1005,7 @@ extern int qlt_mem_alloc(struct qla_hw_data *); extern void qlt_mem_free(struct qla_hw_data *); extern void qlt_stop_phase1(struct qla_tgt *); extern void qlt_stop_phase2(struct qla_tgt *); +extern irqreturn_t qla83xx_msix_atio_q(int, void *); +extern void qlt_83xx_iospace_config(struct qla_hw_data *); #endif /* __QLA_TARGET_H */ diff --git a/drivers/scsi/qla2xxx/qla_version.h b/drivers/scsi/qla2xxx/qla_version.h index 49697ca..2b6e478 100644 --- a/drivers/scsi/qla2xxx/qla_version.h +++ b/drivers/scsi/qla2xxx/qla_version.h @@ -1,6 +1,6 @@ /* * QLogic Fibre Channel HBA Driver - * Copyright (c) 2003-2012 QLogic Corporation + * Copyright (c) 2003-2013 QLogic Corporation * * See LICENSE.qla2xxx for copyright and licensing details. */ diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c index 6e9af20..5d8fe4f 100644 --- a/drivers/scsi/qla4xxx/ql4_83xx.c +++ b/drivers/scsi/qla4xxx/ql4_83xx.c @@ -538,7 +538,7 @@ struct device_info { int port_num; }; -static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) { uint32_t drv_active; uint32_t dev_part, dev_part1, dev_part2; @@ -1351,31 +1351,58 @@ exit_start_fw: /*----------------------Interrupt Related functions ---------------------*/ -void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) + qla4_8xxx_intr_disable(ha); +} + +static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int, ret; - if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + ret = readl(&ha->qla4_83xx_reg->mbox_int); + mb_int = ret & ~INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(1, &ha->qla4_83xx_reg->leg_int_mask); + } +} - ret = readl(&ha->qla4_83xx_reg->mbox_int); - mb_int = ret & ~INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(1, &ha->qla4_83xx_reg->leg_int_mask); +void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_disable_mbox_intrs(ha); + qla4_83xx_disable_iocb_intrs(ha); } -void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) { + qla4_8xxx_intr_enable(ha); + set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags); + } +} + +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int; - qla4_8xxx_mbx_intr_enable(ha); - mb_int = INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(0, &ha->qla4_83xx_reg->leg_int_mask); + if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + mb_int = INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(0, &ha->qla4_83xx_reg->leg_int_mask); + set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags); + } +} - set_bit(AF_INTERRUPTS_ON, &ha->flags); + +void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_enable_mbox_intrs(ha); + qla4_83xx_enable_iocb_intrs(ha); } + void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd, int incount) { diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c index 76819b7..19ee55a 100644 --- a/drivers/scsi/qla4xxx/ql4_attr.c +++ b/drivers/scsi/qla4xxx/ql4_attr.c @@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj, } break; case 2: - /* Reset HBA */ + /* Reset HBA and collect FW dump */ ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE); if (dev_state == QLA8XXX_DEV_READY) { - ql4_printk(KERN_INFO, ha, - "%s: Setting Need reset, reset_owner is 0x%x.\n", - __func__, ha->func_num); + ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n", + __func__); qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE, QLA8XXX_DEV_NEED_RESET); - set_bit(AF_8XXX_RST_OWNER, &ha->flags); + if (is_qla8022(ha) || + (is_qla8032(ha) && + qla4_83xx_can_perform_reset(ha))) { + set_bit(AF_8XXX_RST_OWNER, &ha->flags); + set_bit(AF_FW_RECOVERY, &ha->flags); + ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n", + __func__, ha->func_num); + } } else ql4_printk(KERN_INFO, ha, "%s: Reset not performed as device state is 0x%x\n", diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 329d553..129f5dd 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -136,6 +136,7 @@ #define RESPONSE_QUEUE_DEPTH 64 #define QUEUE_SIZE 64 #define DMA_BUFFER_SIZE 512 +#define IOCB_HIWAT_CUSHION 4 /* * Misc @@ -180,6 +181,7 @@ #define DISABLE_ACB_TOV 30 #define IP_CONFIG_TOV 30 #define LOGIN_TOV 12 +#define BOOT_LOGIN_RESP_TOV 60 #define MAX_RESET_HA_RETRIES 2 #define FW_ALIVE_WAIT_TOV 3 @@ -314,6 +316,7 @@ struct ql4_tuple_ddb { * DDB flags. */ #define DF_RELOGIN 0 /* Relogin to device */ +#define DF_BOOT_TGT 1 /* Boot target entry */ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 @@ -501,6 +504,7 @@ struct scsi_qla_host { #define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ +#define AF_LOOPBACK 9 /* 0x00000200 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ #define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ #define AF_HA_REMOVAL 12 /* 0x00001000 */ @@ -516,6 +520,8 @@ struct scsi_qla_host { #define AF_8XXX_RST_OWNER 25 /* 0x02000000 */ #define AF_82XX_DUMP_READING 26 /* 0x04000000 */ #define AF_83XX_NO_FW_DUMP 27 /* 0x08000000 */ +#define AF_83XX_IOCB_INTR_ON 28 /* 0x10000000 */ +#define AF_83XX_MBOX_INTR_ON 29 /* 0x20000000 */ unsigned long dpc_flags; @@ -537,6 +543,7 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; + uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 @@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha) static inline int adapter_up(struct scsi_qla_host *ha) { return (test_bit(AF_ONLINE, &ha->flags) != 0) && - (test_bit(AF_LINK_UP, &ha->flags) != 0); + (test_bit(AF_LINK_UP, &ha->flags) != 0) && + (!test_bit(AF_LOOPBACK, &ha->flags)); } static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1c47950..ad9d2e2 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -495,7 +495,7 @@ struct qla_flt_region { #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E #define MBOX_ASTS_IDC_COMPLETE 0x8100 -#define MBOX_ASTS_IDC_NOTIFY 0x8101 +#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION 0x8101 #define MBOX_ASTS_TXSCVR_INSERTED 0x8130 #define MBOX_ASTS_TXSCVR_REMOVED 0x8131 @@ -522,6 +522,10 @@ struct qla_flt_region { #define FLASH_OPT_COMMIT 2 #define FLASH_OPT_RMW_COMMIT 3 +/* Loopback type */ +#define ENABLE_INTERNAL_LOOPBACK 0x04 +#define ENABLE_EXTERNAL_LOOPBACK 0x08 + /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index 57a5a3c..982293e 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha); void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha); int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha); void qla4_8xxx_get_minidump(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha); int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param); int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha); int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha); void qla4_83xx_disable_pause(struct scsi_qla_host *ha); +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha); +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdontresethba; diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c index 1aca1b4..8fc8548 100644 --- a/drivers/scsi/qla4xxx/ql4_init.c +++ b/drivers/scsi/qla4xxx/ql4_init.c @@ -195,12 +195,10 @@ exit_get_sys_info_no_free: * @ha: pointer to host adapter structure. * **/ -static int qla4xxx_init_local_data(struct scsi_qla_host *ha) +static void qla4xxx_init_local_data(struct scsi_qla_host *ha) { /* Initialize aen queue */ ha->aen_q_count = MAX_AEN_ENTRIES; - - return qla4xxx_get_firmware_status(ha); } static uint8_t @@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset) if (ha->isp_ops->start_firmware(ha) == QLA_ERROR) goto exit_init_hba; + /* + * For ISP83XX, mailbox and IOCB interrupts are enabled separately. + * Mailbox interrupts must be enabled prior to issuing any mailbox + * command in order to prevent the possibility of losing interrupts + * while switching from polling to interrupt mode. IOCB interrupts are + * enabled via isp_ops->enable_intrs. + */ + if (is_qla8032(ha)) + qla4_83xx_enable_mbox_intrs(ha); + if (qla4xxx_about_firmware(ha) == QLA_ERROR) goto exit_init_hba; if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR) goto exit_init_hba; - if (qla4xxx_init_local_data(ha) == QLA_ERROR) - goto exit_init_hba; + qla4xxx_init_local_data(ha); status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index f48f37a..14fec97 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) goto queuing_error; /* total iocbs active */ - if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH) + if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) goto queuing_error; /* Build command packet */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 15ea814..1b83dc2 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -582,6 +582,33 @@ exit_prq_error: } /** + * qla4_83xx_loopback_in_progress: Is loopback in progress? + * @ha: Pointer to host adapter structure. + * @ret: 1 = loopback in progress, 0 = loopback not in progress + **/ +static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) || + (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics in progress\n", + __func__)); + rval = 1; + } else { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics not in progress\n", + __func__)); + rval = 0; + } + } + + return rval; +} + +/** * qla4xxx_isr_decode_mailbox - decodes mailbox status * @ha: Pointer to host adapter structure. * @mailbox_status: Mailbox status. @@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, case MBOX_ASTS_LINK_DOWN: clear_bit(AF_LINK_UP, &ha->flags); - if (test_bit(AF_INIT_DONE, &ha->flags)) + if (test_bit(AF_INIT_DONE, &ha->flags)) { set_bit(DPC_LINK_CHANGED, &ha->dpc_flags); + qla4xxx_wake_dpc(ha); + } ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__); qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN, @@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, " removed\n", ha->host_no, mbox_sts[0])); break; - case MBOX_ASTS_IDC_NOTIFY: + case MBOX_ASTS_IDC_REQUEST_NOTIFICATION: { uint32_t opcode; if (is_qla8032(ha)) { @@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, DEBUG2(ql4_printk(KERN_INFO, ha, "scsi:%ld: AEN %04x IDC Complete notification\n", ha->host_no, mbox_sts[0])); + + if (qla4_83xx_loopback_in_progress(ha)) + set_bit(AF_LOOPBACK, &ha->flags); + else + clear_bit(AF_LOOPBACK, &ha->flags); } break; @@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id) /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */ if (!(leg_int_ptr & LEG_INT_PTR_B31)) { - ql4_printk(KERN_ERR, ha, - "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", - __func__); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", + __func__)); return IRQ_NONE; } /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */ if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) { - ql4_printk(KERN_ERR, ha, - "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", - __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", + __func__, (leg_int_ptr & PF_BITS_MASK), + ha->pf_bit)); return IRQ_NONE; } @@ -1437,11 +1472,14 @@ irq_not_attached: void qla4xxx_free_irqs(struct scsi_qla_host *ha) { - if (test_bit(AF_MSIX_ENABLED, &ha->flags)) - qla4_8xxx_disable_msix(ha); - else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { - free_irq(ha->pdev->irq, ha); - pci_disable_msi(ha->pdev); - } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) - free_irq(ha->pdev->irq, ha); + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) { + if (test_bit(AF_MSIX_ENABLED, &ha->flags)) { + qla4_8xxx_disable_msix(ha); + } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + pci_disable_msi(ha->pdev); + } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + } + } } diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 3d41034..160d336 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count) } /** + * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts? + * @ha: Pointer to host adapter structure. + * @ret: 1=polling mode, 0=non-polling mode + **/ +static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) + rval = 0; + } else { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_INTERRUPTS_ON, &ha->flags) && + test_bit(AF_ONLINE, &ha->flags) && + !test_bit(AF_HA_REMOVAL, &ha->flags)) + rval = 0; + } + + return rval; +} + +/** * qla4xxx_mailbox_command - issues mailbox commands * @ha: Pointer to host adapter structure. * @inCount: number of mailbox registers to load. @@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount, /* * Wait for completion: Poll or completion queue */ - if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && - test_bit(AF_INTERRUPTS_ON, &ha->flags) && - test_bit(AF_ONLINE, &ha->flags) && - !test_bit(AF_HA_REMOVAL, &ha->flags)) { - /* Do not poll for completion. Use completion queue */ - set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); - clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - } else { + if (qla4xxx_is_intr_poll_mode(ha)) { /* Poll for command to complete */ wait_count = jiffies + MBOX_TOV * HZ; while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) { if (time_after_eq(jiffies, wait_count)) break; - /* * Service the interrupt. * The ISR will save the mailbox status registers * to a temporary storage location in the adapter * structure. */ - spin_lock_irqsave(&ha->hardware_lock, flags); ha->isp_ops->process_mailbox_interrupt(ha, outCount); spin_unlock_irqrestore(&ha->hardware_lock, flags); msleep(10); } + } else { + /* Do not poll for completion. Use completion queue */ + set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); + wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); + clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); } /* Check for mailbox timeout. */ @@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) return QLA_ERROR; } - ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n", - ha->host_no, mbox_sts[2]); + /* High-water mark of IOCBs */ + ha->iocb_hiwat = mbox_sts[2]; + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: firmware IOCBs available = %d\n", __func__, + ha->iocb_hiwat)); + + if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) + ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; + + /* Ideally, we should not enter this code, as the # of firmware + * IOCBs is hard-coded in the firmware. We set a default + * iocb_hiwat here just in case */ + if (ha->iocb_hiwat == 0) { + ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4; + DEBUG2(ql4_printk(KERN_WARNING, ha, + "%s: Setting IOCB's to = %d\n", __func__, + ha->iocb_hiwat)); + } return QLA_SUCCESS; } @@ -1385,10 +1420,8 @@ int qla4xxx_get_chap(struct scsi_qla_host *ha, char *username, char *password, dma_addr_t chap_dma; chap_table = dma_pool_alloc(ha->chap_dma_pool, GFP_KERNEL, &chap_dma); - if (chap_table == NULL) { - ret = -ENOMEM; - goto exit_get_chap; - } + if (chap_table == NULL) + return -ENOMEM; chap_size = sizeof(struct ql4_chap_table); memset(chap_table, 0, chap_size); diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 499a92d..71d3d23 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha) retval = qla4_8xxx_device_state_handler(ha); - if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags)) + if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags)) retval = qla4xxx_request_irqs(ha); return retval; @@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha) } /* Make sure we receive the minimum required data to cache internally */ - if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) { + if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) < + offsetof(struct mbx_sys_info, reserved)) { DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive" " error (%x)\n", ha->host_no, __func__, mbox_sts[4])); goto exit_validate_mac82; - } /* Save M.A.C. address & serial_number */ @@ -3463,7 +3463,7 @@ exit_validate_mac82: /* Interrupt handling helpers. */ -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) return QLA_SUCCESS; } -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) void qla4_82xx_enable_intrs(struct scsi_qla_host *ha) { - qla4_8xxx_mbx_intr_enable(ha); + qla4_8xxx_intr_enable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - reset */ @@ -3522,7 +3522,7 @@ void qla4_82xx_disable_intrs(struct scsi_qla_host *ha) { if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + qla4_8xxx_intr_disable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - set */ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4cec123..6142729 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess, sess->password_in, BIDI_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; case ISCSI_PARAM_CHAP_OUT_IDX: rval = qla4xxx_get_chap_index(ha, sess->username, sess->password, LOCAL_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; default: return iscsi_session_get_param(cls_sess, param, buf); @@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) || !test_bit(AF_ONLINE, &ha->flags) || !test_bit(AF_LINK_UP, &ha->flags) || + test_bit(AF_LOOPBACK, &ha->flags) || test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags)) goto qc_host_busy; @@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) if (status == QLA_SUCCESS) { if (!test_bit(AF_FW_RECOVERY, &ha->flags)) qla4xxx_cmd_wait(ha); + ha->isp_ops->disable_intrs(ha); qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); qla4xxx_abort_active_cmds(ha, DID_RESET << 16); @@ -3479,7 +3481,8 @@ dpc_post_reset_ha: } /* ---- link change? --- */ - if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { + if (!test_bit(AF_LOOPBACK, &ha->flags) && + test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { if (!test_bit(AF_LINK_UP, &ha->flags)) { /* ---- link down? --- */ qla4xxx_mark_all_devices_missing(ha); @@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) { qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16); - if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) { - /* Turn-off interrupts on the card. */ - ha->isp_ops->disable_intrs(ha); - } + /* Turn-off interrupts on the card. */ + ha->isp_ops->disable_intrs(ha); if (is_qla40XX(ha)) { writel(set_rmask(CSR_SCSI_PROCESSOR_INTR), @@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) } /* Detach interrupts */ - if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) - qla4xxx_free_irqs(ha); + qla4xxx_free_irqs(ha); /* free extra memory */ qla4xxx_mem_free(ha); @@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, struct iscsi_endpoint *ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct sockaddr *dst_addr; + struct sockaddr *t_addr; + struct sockaddr_storage *dst_addr; char *ip; /* TODO: need to destroy on unload iscsi_endpoint*/ @@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, return NULL; if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) { - dst_addr->sa_family = AF_INET6; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET6; addr6 = (struct sockaddr_in6 *)dst_addr; ip = (char *)&addr6->sin6_addr; memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN); addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port)); } else { - dst_addr->sa_family = AF_INET; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET; addr = (struct sockaddr_in *)dst_addr; ip = (char *)&addr->sin_addr; memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN); addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port)); } - ep = qla4xxx_ep_connect(ha->host, dst_addr, 0); + ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); return ep; } @@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx) } static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry) + struct ddb_entry *ddb_entry, + uint16_t idx) { uint16_t def_timeout; @@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, def_timeout : LOGIN_TOV; ddb_entry->default_time2wait = le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait); + + if (ql4xdisablesysfsboot && + (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)) + set_bit(DF_BOOT_TGT, &ddb_entry->flags); } static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha) @@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha, static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct dev_db_entry *fw_ddb_entry, - int is_reset) + int is_reset, uint16_t idx) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; @@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry, sizeof(struct dev_db_entry)); - qla4xxx_setup_flash_ddb_entry(ha, ddb_entry); + qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx); cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id); @@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha, goto continue_next_nt; } - ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset); + ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx); if (ret == QLA_ERROR) goto exit_nt_list; @@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset) } /** + * qla4xxx_wait_login_resp_boot_tgt - Wait for iSCSI boot target login + * response. + * @ha: pointer to adapter structure + * + * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be + * set in DDB and we will wait for login response of boot targets during + * probe. + **/ +static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha) +{ + struct ddb_entry *ddb_entry; + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; + unsigned long wtime; + uint32_t ddb_state; + int max_ddbs, idx, ret; + + max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX : + MAX_DEV_DB_ENTRIES; + + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + &fw_ddb_entry_dma, GFP_KERNEL); + if (!fw_ddb_entry) { + ql4_printk(KERN_ERR, ha, + "%s: Unable to allocate dma buffer\n", __func__); + goto exit_login_resp; + } + + wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV); + + for (idx = 0; idx < max_ddbs; idx++) { + ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx); + if (ddb_entry == NULL) + continue; + + if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: DDB index [%d]\n", __func__, + ddb_entry->fw_ddb_index)); + do { + ret = qla4xxx_get_fwddb_entry(ha, + ddb_entry->fw_ddb_index, + fw_ddb_entry, fw_ddb_entry_dma, + NULL, NULL, &ddb_state, NULL, + NULL, NULL); + if (ret == QLA_ERROR) + goto exit_login_resp; + + if ((ddb_state == DDB_DS_SESSION_ACTIVE) || + (ddb_state == DDB_DS_SESSION_FAILED)) + break; + + schedule_timeout_uninterruptible(HZ); + + } while ((time_after(wtime, jiffies))); + + if (!time_after(wtime, jiffies)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Login response wait timer expired\n", + __func__)); + goto exit_login_resp; + } + } + } + +exit_login_resp: + if (fw_ddb_entry) + dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + fw_ddb_entry, fw_ddb_entry_dma); +} + +/** * qla4xxx_probe_adapter - callback function to probe HBA * @pdev: pointer to pci_dev structure * @pci_device_id: pointer to pci_device entry @@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev, if (is_qla80XX(ha)) { ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, - QLA82XX_CRB_DEV_STATE); + QLA8XXX_CRB_DEV_STATE); ha->isp_ops->idc_unlock(ha); if (dev_state == QLA8XXX_DEV_FAILED) { ql4_printk(KERN_WARNING, ha, "%s: don't retry " @@ -5368,6 +5448,7 @@ skip_retry_init: /* Perform the build ddb list and login to each */ qla4xxx_build_ddb_list(ha, INIT_ADAPTER); iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb); + qla4xxx_wait_login_resp_boot_tgt(ha); qla4xxx_create_chap_list(ha); @@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type) goto exit_host_reset; } - rval = qla4xxx_wait_for_hba_online(ha); - if (rval != QLA_SUCCESS) { - DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host " - "adapter\n", __func__)); - rval = -EIO; - goto exit_host_reset; - } - if (test_bit(DPC_RESET_HA, &ha->dpc_flags)) goto recover_adapter; @@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev) static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) { uint32_t rval = QLA_ERROR; - uint32_t ret = 0; int fn; struct pci_dev *other_pdev = NULL; @@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to " - "reserve interrupt %d already in use.\n", - ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } + ha->isp_ops->enable_intrs(ha); } } else { ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not " @@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) QLA8XXX_DEV_READY)) { clear_bit(AF_FW_RECOVERY, &ha->flags); rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER); - if (rval == QLA_SUCCESS) { - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to" - " reserve interrupt %d already in" - " use.\n", ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } - } + if (rval == QLA_SUCCESS) + ha->isp_ops->enable_intrs(ha); + ha->isp_ops->idc_lock(ha); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index f6df2ea..6775a45 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,4 +5,4 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.03.00-k1" +#define QLA4XXX_DRIVER_VERSION "5.03.00-k4" diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 59d427b..0a74b97 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); +static ssize_t +show_priv_session_target_id(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + return sprintf(buf, "%d\n", session->target_id); +} +static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, + show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, + &dev_attr_priv_sess_target_id.attr, NULL, }; @@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 270b3cf..16a3a0c 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -201,6 +201,7 @@ enum storvsc_request_type { #define SRB_STATUS_AUTOSENSE_VALID 0x80 #define SRB_STATUS_INVALID_LUN 0x20 #define SRB_STATUS_SUCCESS 0x01 +#define SRB_STATUS_ABORTED 0x02 #define SRB_STATUS_ERROR 0x04 /* @@ -295,6 +296,25 @@ struct storvsc_scan_work { uint lun; }; +static void storvsc_device_scan(struct work_struct *work) +{ + struct storvsc_scan_work *wrk; + uint lun; + struct scsi_device *sdev; + + wrk = container_of(work, struct storvsc_scan_work, work); + lun = wrk->lun; + + sdev = scsi_device_lookup(wrk->host, 0, 0, lun); + if (!sdev) + goto done; + scsi_rescan_device(&sdev->sdev_gendev); + scsi_device_put(sdev); + +done: + kfree(wrk); +} + static void storvsc_bus_scan(struct work_struct *work) { struct storvsc_scan_work *wrk; @@ -467,6 +487,7 @@ static struct scatterlist *create_bounce_buffer(struct scatterlist *sgl, if (!bounce_sgl) return NULL; + sg_init_table(bounce_sgl, num_pages); for (i = 0; i < num_pages; i++) { page_buf = alloc_page(GFP_ATOMIC); if (!page_buf) @@ -760,6 +781,66 @@ cleanup: return ret; } +static void storvsc_handle_error(struct vmscsi_request *vm_srb, + struct scsi_cmnd *scmnd, + struct Scsi_Host *host, + u8 asc, u8 ascq) +{ + struct storvsc_scan_work *wrk; + void (*process_err_fn)(struct work_struct *work); + bool do_work = false; + + switch (vm_srb->srb_status) { + case SRB_STATUS_ERROR: + /* + * If there is an error; offline the device since all + * error recovery strategies would have already been + * deployed on the host side. However, if the command + * were a pass-through command deal with it appropriately. + */ + switch (scmnd->cmnd[0]) { + case ATA_16: + case ATA_12: + set_host_byte(scmnd, DID_PASSTHROUGH); + break; + default: + set_host_byte(scmnd, DID_TARGET_FAILURE); + } + break; + case SRB_STATUS_INVALID_LUN: + do_work = true; + process_err_fn = storvsc_remove_lun; + break; + case (SRB_STATUS_ABORTED | SRB_STATUS_AUTOSENSE_VALID): + if ((asc == 0x2a) && (ascq == 0x9)) { + do_work = true; + process_err_fn = storvsc_device_scan; + /* + * Retry the I/O that trigerred this. + */ + set_host_byte(scmnd, DID_REQUEUE); + } + break; + } + + if (!do_work) + return; + + /* + * We need to schedule work to process this error; schedule it. + */ + wrk = kmalloc(sizeof(struct storvsc_scan_work), GFP_ATOMIC); + if (!wrk) { + set_host_byte(scmnd, DID_TARGET_FAILURE); + return; + } + + wrk->host = host; + wrk->lun = vm_srb->lun; + INIT_WORK(&wrk->work, process_err_fn); + schedule_work(&wrk->work); +} + static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) { @@ -768,8 +849,13 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) void (*scsi_done_fn)(struct scsi_cmnd *); struct scsi_sense_hdr sense_hdr; struct vmscsi_request *vm_srb; - struct storvsc_scan_work *wrk; struct stor_mem_pools *memp = scmnd->device->hostdata; + struct Scsi_Host *host; + struct storvsc_device *stor_dev; + struct hv_device *dev = host_dev->dev; + + stor_dev = get_in_stor_device(dev); + host = stor_dev->host; vm_srb = &cmd_request->vstor_packet.vm_srb; if (cmd_request->bounce_sgl_count) { @@ -782,55 +868,18 @@ static void storvsc_command_completion(struct storvsc_cmd_request *cmd_request) cmd_request->bounce_sgl_count); } - /* - * If there is an error; offline the device since all - * error recovery strategies would have already been - * deployed on the host side. However, if the command - * were a pass-through command deal with it appropriately. - */ scmnd->result = vm_srb->scsi_status; - if (vm_srb->srb_status == SRB_STATUS_ERROR) { - switch (scmnd->cmnd[0]) { - case ATA_16: - case ATA_12: - set_host_byte(scmnd, DID_PASSTHROUGH); - break; - default: - set_host_byte(scmnd, DID_TARGET_FAILURE); - } - } - - - /* - * If the LUN is invalid; remove the device. - */ - if (vm_srb->srb_status == SRB_STATUS_INVALID_LUN) { - struct storvsc_device *stor_dev; - struct hv_device *dev = host_dev->dev; - struct Scsi_Host *host; - - stor_dev = get_in_stor_device(dev); - host = stor_dev->host; - - wrk = kmalloc(sizeof(struct storvsc_scan_work), - GFP_ATOMIC); - if (!wrk) { - scmnd->result = DID_TARGET_FAILURE << 16; - } else { - wrk->host = host; - wrk->lun = vm_srb->lun; - INIT_WORK(&wrk->work, storvsc_remove_lun); - schedule_work(&wrk->work); - } - } - if (scmnd->result) { if (scsi_normalize_sense(scmnd->sense_buffer, SCSI_SENSE_BUFFERSIZE, &sense_hdr)) scsi_print_sense_hdr("storvsc", &sense_hdr); } + if (vm_srb->srb_status != SRB_STATUS_SUCCESS) + storvsc_handle_error(vm_srb, scmnd, host, sense_hdr.asc, + sense_hdr.ascq); + scsi_set_resid(scmnd, cmd_request->data_buffer.len - vm_srb->data_transfer_length); @@ -1155,6 +1204,8 @@ static int storvsc_device_configure(struct scsi_device *sdevice) blk_queue_bounce_limit(sdevice->request_queue, BLK_BOUNCE_ANY); + sdevice->no_write_same = 1; + return 0; } @@ -1237,6 +1288,8 @@ static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd) u8 scsi_op = scmnd->cmnd[0]; switch (scsi_op) { + /* the host does not handle WRITE_SAME, log accident usage */ + case WRITE_SAME: /* * smartd sends this command and the host does not handle * this. So, don't send it. diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 8f27f9d..0371047 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -2,48 +2,58 @@ # Kernel configuration file for the UFS Host Controller # # This code is based on drivers/scsi/ufs/Kconfig -# Copyright (C) 2011 Samsung Samsung India Software Operations +# Copyright (C) 2011-2013 Samsung India Software Operations +# +# Authors: +# Santosh Yaraganavi <santosh.sy@samsung.com> +# Vinayak Holikatti <h.vinayak@samsung.com> # -# Santosh Yaraganavi <santosh.sy@samsung.com> -# Vinayak Holikatti <h.vinayak@samsung.com> - # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. - +# See the COPYING file in the top-level directory or visit +# <http://www.gnu.org/licenses/gpl-2.0.html> +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. +# +# This program is provided "AS IS" and "WITH ALL FAULTS" and +# without warranty of any kind. You are solely responsible for +# determining the appropriateness of using and distributing +# the program and assume all risks associated with your exercise +# of rights with respect to the program, including but not limited +# to infringement of third party rights, the risks and costs of +# program errors, damage to or loss of data, programs or equipment, +# and unavailability or interruption of operations. Under no +# circumstances will the contributor of this Program be liable for +# any damages of any kind arising from your use or distribution of +# this program. -# NO WARRANTY -# THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT -# LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, -# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is -# solely responsible for determining the appropriateness of using and -# distributing the Program and assumes all risks associated with its -# exercise of rights under this Agreement, including but not limited to -# the risks and costs of program errors, damage to or loss of data, -# programs or equipment, and unavailability or interruption of operations. - -# DISCLAIMER OF LIABILITY -# NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -# USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED -# HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES +config SCSI_UFSHCD + tristate "Universal Flash Storage Controller Driver Core" + depends on SCSI + ---help--- + This selects the support for UFS devices in Linux, say Y and make + sure that you know the name of your UFS host adapter (the card + inside your computer that "speaks" the UFS protocol, also + called UFS Host Controller), because you will be asked for it. + The module will be called ufshcd. -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, -# USA. + To compile this driver as a module, choose M here and read + <file:Documentation/scsi/ufs.txt>. + However, do not compile this as a module if your root file system + (the one containing the directory /) is located on a UFS device. -config SCSI_UFSHCD - tristate "Universal Flash Storage host controller driver" - depends on PCI && SCSI +config SCSI_UFSHCD_PCI + tristate "PCI bus based UFS Controller support" + depends on SCSI_UFSHCD && PCI ---help--- - This is a generic driver which supports PCIe UFS Host controllers. + This selects the PCI UFS Host Controller Interface. Select this if + you have UFS Host Controller with PCI Interface. + + If you have a controller with this interface, say Y or M here. + + If unsure, say N. diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index adf7895..9eda0df 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -1,2 +1,3 @@ # UFSHCD makefile obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o +obj-$(CONFIG_SCSI_UFSHCD_PCI) += ufshcd-pci.o diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index b207529..139bc06 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -2,45 +2,35 @@ * Universal Flash Storage Host controller driver * * This code is based on drivers/scsi/ufs/ufs.h - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ #ifndef _UFS_H diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c new file mode 100644 index 0000000..5cb1d75 --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -0,0 +1,211 @@ +/* + * Universal Flash Storage Host controller PCI glue driver + * + * This code is based on drivers/scsi/ufs/ufshcd-pci.c + * Copyright (C) 2011-2013 Samsung India Software Operations + * + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. + */ + +#include "ufshcd.h" +#include <linux/pci.h> + +#ifdef CONFIG_PM +/** + * ufshcd_pci_suspend - suspend power management function + * @pdev: pointer to PCI device handle + * @state: power state + * + * Returns -ENOSYS + */ +static int ufshcd_pci_suspend(struct pci_dev *pdev, pm_message_t state) +{ + /* + * TODO: + * 1. Call ufshcd_suspend + * 2. Do bus specific power management + */ + + return -ENOSYS; +} + +/** + * ufshcd_pci_resume - resume power management function + * @pdev: pointer to PCI device handle + * + * Returns -ENOSYS + */ +static int ufshcd_pci_resume(struct pci_dev *pdev) +{ + /* + * TODO: + * 1. Call ufshcd_resume. + * 2. Do bus specific wake up + */ + + return -ENOSYS; +} +#endif /* CONFIG_PM */ + +/** + * ufshcd_pci_shutdown - main function to put the controller in reset state + * @pdev: pointer to PCI device handle + */ +static void ufshcd_pci_shutdown(struct pci_dev *pdev) +{ + ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev)); +} + +/** + * ufshcd_pci_remove - de-allocate PCI/SCSI host and host memory space + * data structure memory + * @pdev - pointer to PCI handle + */ +static void ufshcd_pci_remove(struct pci_dev *pdev) +{ + struct ufs_hba *hba = pci_get_drvdata(pdev); + + disable_irq(pdev->irq); + free_irq(pdev->irq, hba); + ufshcd_remove(hba); + pci_release_regions(pdev); + pci_set_drvdata(pdev, NULL); + pci_clear_master(pdev); + pci_disable_device(pdev); +} + +/** + * ufshcd_set_dma_mask - Set dma mask based on the controller + * addressing capability + * @pdev: PCI device structure + * + * Returns 0 for success, non-zero for failure + */ +static int ufshcd_set_dma_mask(struct pci_dev *pdev) +{ + int err; + + if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) + && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) + return 0; + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (!err) + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); + return err; +} + +/** + * ufshcd_pci_probe - probe routine of the driver + * @pdev: pointer to PCI device handle + * @id: PCI device id + * + * Returns 0 on success, non-zero value on failure + */ +static int +ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct ufs_hba *hba; + void __iomem *mmio_base; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device failed\n"); + goto out_error; + } + + pci_set_master(pdev); + + + err = pci_request_regions(pdev, UFSHCD); + if (err < 0) { + dev_err(&pdev->dev, "request regions failed\n"); + goto out_disable; + } + + mmio_base = pci_ioremap_bar(pdev, 0); + if (!mmio_base) { + dev_err(&pdev->dev, "memory map failed\n"); + err = -ENOMEM; + goto out_release_regions; + } + + err = ufshcd_set_dma_mask(pdev); + if (err) { + dev_err(&pdev->dev, "set dma mask failed\n"); + goto out_iounmap; + } + + err = ufshcd_init(&pdev->dev, &hba, mmio_base, pdev->irq); + if (err) { + dev_err(&pdev->dev, "Initialization failed\n"); + goto out_iounmap; + } + + pci_set_drvdata(pdev, hba); + + return 0; + +out_iounmap: + iounmap(mmio_base); +out_release_regions: + pci_release_regions(pdev); +out_disable: + pci_clear_master(pdev); + pci_disable_device(pdev); +out_error: + return err; +} + +static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = { + { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { } /* terminate list */ +}; + +MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl); + +static struct pci_driver ufshcd_pci_driver = { + .name = UFSHCD, + .id_table = ufshcd_pci_tbl, + .probe = ufshcd_pci_probe, + .remove = ufshcd_pci_remove, + .shutdown = ufshcd_pci_shutdown, +#ifdef CONFIG_PM + .suspend = ufshcd_pci_suspend, + .resume = ufshcd_pci_resume, +#endif +}; + +module_pci_driver(ufshcd_pci_driver); + +MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>"); +MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>"); +MODULE_DESCRIPTION("UFS host controller PCI glue driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(UFSHCD_DRIVER_VERSION); diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 91a4046..60fd40c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1,77 +1,39 @@ /* - * Universal Flash Storage Host controller driver + * Universal Flash Storage Host controller driver Core * * This code is based on drivers/scsi/ufs/ufshcd.c - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/delay.h> -#include <linux/slab.h> -#include <linux/spinlock.h> -#include <linux/workqueue.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/wait.h> -#include <linux/bitops.h> - -#include <asm/irq.h> -#include <asm/byteorder.h> -#include <scsi/scsi.h> -#include <scsi/scsi_cmnd.h> -#include <scsi/scsi_host.h> -#include <scsi/scsi_tcq.h> -#include <scsi/scsi_dbg.h> -#include <scsi/scsi_eh.h> - -#include "ufs.h" -#include "ufshci.h" - -#define UFSHCD "ufshcd" -#define UFSHCD_DRIVER_VERSION "0.1" +#include "ufshcd.h" enum { UFSHCD_MAX_CHANNEL = 0, @@ -102,121 +64,6 @@ enum { }; /** - * struct uic_command - UIC command structure - * @command: UIC command - * @argument1: UIC command argument 1 - * @argument2: UIC command argument 2 - * @argument3: UIC command argument 3 - * @cmd_active: Indicate if UIC command is outstanding - * @result: UIC command result - */ -struct uic_command { - u32 command; - u32 argument1; - u32 argument2; - u32 argument3; - int cmd_active; - int result; -}; - -/** - * struct ufs_hba - per adapter private structure - * @mmio_base: UFSHCI base register address - * @ucdl_base_addr: UFS Command Descriptor base address - * @utrdl_base_addr: UTP Transfer Request Descriptor base address - * @utmrdl_base_addr: UTP Task Management Descriptor base address - * @ucdl_dma_addr: UFS Command Descriptor DMA address - * @utrdl_dma_addr: UTRDL DMA address - * @utmrdl_dma_addr: UTMRDL DMA address - * @host: Scsi_Host instance of the driver - * @pdev: PCI device handle - * @lrb: local reference block - * @outstanding_tasks: Bits representing outstanding task requests - * @outstanding_reqs: Bits representing outstanding transfer requests - * @capabilities: UFS Controller Capabilities - * @nutrs: Transfer Request Queue depth supported by controller - * @nutmrs: Task Management Queue depth supported by controller - * @active_uic_cmd: handle of active UIC command - * @ufshcd_tm_wait_queue: wait queue for task management - * @tm_condition: condition variable for task management - * @ufshcd_state: UFSHCD states - * @int_enable_mask: Interrupt Mask Bits - * @uic_workq: Work queue for UIC completion handling - * @feh_workq: Work queue for fatal controller error handling - * @errors: HBA errors - */ -struct ufs_hba { - void __iomem *mmio_base; - - /* Virtual memory reference */ - struct utp_transfer_cmd_desc *ucdl_base_addr; - struct utp_transfer_req_desc *utrdl_base_addr; - struct utp_task_req_desc *utmrdl_base_addr; - - /* DMA memory reference */ - dma_addr_t ucdl_dma_addr; - dma_addr_t utrdl_dma_addr; - dma_addr_t utmrdl_dma_addr; - - struct Scsi_Host *host; - struct pci_dev *pdev; - - struct ufshcd_lrb *lrb; - - unsigned long outstanding_tasks; - unsigned long outstanding_reqs; - - u32 capabilities; - int nutrs; - int nutmrs; - u32 ufs_version; - - struct uic_command active_uic_cmd; - wait_queue_head_t ufshcd_tm_wait_queue; - unsigned long tm_condition; - - u32 ufshcd_state; - u32 int_enable_mask; - - /* Work Queues */ - struct work_struct uic_workq; - struct work_struct feh_workq; - - /* HBA Errors */ - u32 errors; -}; - -/** - * struct ufshcd_lrb - local reference block - * @utr_descriptor_ptr: UTRD address of the command - * @ucd_cmd_ptr: UCD address of the command - * @ucd_rsp_ptr: Response UPIU address for this command - * @ucd_prdt_ptr: PRDT address of the command - * @cmd: pointer to SCSI command - * @sense_buffer: pointer to sense buffer address of the SCSI command - * @sense_bufflen: Length of the sense buffer - * @scsi_status: SCSI status of the command - * @command_type: SCSI, UFS, Query. - * @task_tag: Task tag of the command - * @lun: LUN of the command - */ -struct ufshcd_lrb { - struct utp_transfer_req_desc *utr_descriptor_ptr; - struct utp_upiu_cmd *ucd_cmd_ptr; - struct utp_upiu_rsp *ucd_rsp_ptr; - struct ufshcd_sg_entry *ucd_prdt_ptr; - - struct scsi_cmnd *cmd; - u8 *sense_buffer; - unsigned int sense_bufflen; - int scsi_status; - - int command_type; - int task_tag; - unsigned int lun; -}; - -/** * ufshcd_get_ufs_version - Get the UFS version supported by the HBA * @hba - Pointer to adapter instance * @@ -335,21 +182,21 @@ static inline void ufshcd_free_hba_memory(struct ufs_hba *hba) if (hba->utmrdl_base_addr) { utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs; - dma_free_coherent(&hba->pdev->dev, utmrdl_size, + dma_free_coherent(hba->dev, utmrdl_size, hba->utmrdl_base_addr, hba->utmrdl_dma_addr); } if (hba->utrdl_base_addr) { utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs); - dma_free_coherent(&hba->pdev->dev, utrdl_size, + dma_free_coherent(hba->dev, utrdl_size, hba->utrdl_base_addr, hba->utrdl_dma_addr); } if (hba->ucdl_base_addr) { ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); - dma_free_coherent(&hba->pdev->dev, ucdl_size, + dma_free_coherent(hba->dev, ucdl_size, hba->ucdl_base_addr, hba->ucdl_dma_addr); } } @@ -429,15 +276,6 @@ static void ufshcd_enable_run_stop_reg(struct ufs_hba *hba) } /** - * ufshcd_hba_stop - Send controller to reset state - * @hba: per adapter instance - */ -static inline void ufshcd_hba_stop(struct ufs_hba *hba) -{ - writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE)); -} - -/** * ufshcd_hba_start - Start controller initialization sequence * @hba: per adapter instance */ @@ -724,7 +562,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) /* Allocate memory for UTP command descriptors */ ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); - hba->ucdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->ucdl_base_addr = dma_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, GFP_KERNEL); @@ -737,7 +575,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) */ if (!hba->ucdl_base_addr || WARN_ON(hba->ucdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Command Descriptor Memory allocation failed\n"); goto out; } @@ -747,13 +585,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) * UFSHCI requires 1024 byte alignment of UTRD */ utrdl_size = (sizeof(struct utp_transfer_req_desc) * hba->nutrs); - hba->utrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->utrdl_base_addr = dma_alloc_coherent(hba->dev, utrdl_size, &hba->utrdl_dma_addr, GFP_KERNEL); if (!hba->utrdl_base_addr || WARN_ON(hba->utrdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Transfer Descriptor Memory allocation failed\n"); goto out; } @@ -763,13 +601,13 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) * UFSHCI requires 1024 byte alignment of UTMRD */ utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs; - hba->utmrdl_base_addr = dma_alloc_coherent(&hba->pdev->dev, + hba->utmrdl_base_addr = dma_alloc_coherent(hba->dev, utmrdl_size, &hba->utmrdl_dma_addr, GFP_KERNEL); if (!hba->utmrdl_base_addr || WARN_ON(hba->utmrdl_dma_addr & (PAGE_SIZE - 1))) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Task Management Descriptor Memory allocation failed\n"); goto out; } @@ -777,7 +615,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) /* Allocate memory for local reference block */ hba->lrb = kcalloc(hba->nutrs, sizeof(struct ufshcd_lrb), GFP_KERNEL); if (!hba->lrb) { - dev_err(&hba->pdev->dev, "LRB Memory allocation failed\n"); + dev_err(hba->dev, "LRB Memory allocation failed\n"); goto out; } return 0; @@ -867,7 +705,7 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba) /* check if controller is ready to accept UIC commands */ if (((readl(hba->mmio_base + REG_CONTROLLER_STATUS)) & UIC_COMMAND_READY) == 0x0) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Controller not ready" " to accept UIC commands\n"); return -EIO; @@ -912,7 +750,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) /* check if device present */ reg = readl((hba->mmio_base + REG_CONTROLLER_STATUS)); if (!ufshcd_is_device_present(reg)) { - dev_err(&hba->pdev->dev, "cc: Device not present\n"); + dev_err(hba->dev, "cc: Device not present\n"); err = -ENXIO; goto out; } @@ -924,7 +762,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba) if (!(ufshcd_get_lists_status(reg))) { ufshcd_enable_run_stop_reg(hba); } else { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Host controller not ready to process requests"); err = -EIO; goto out; @@ -1005,7 +843,7 @@ static int ufshcd_hba_enable(struct ufs_hba *hba) if (retry) { retry--; } else { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Controller enable failed\n"); return -EIO; } @@ -1084,7 +922,7 @@ static int ufshcd_do_reset(struct ufs_hba *hba) /* start the initialization process */ if (ufshcd_initialize_hba(hba)) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Reset: Controller initialization failed\n"); return FAILED; } @@ -1167,7 +1005,7 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index) task_result = SUCCESS; } else { task_result = FAILED; - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "trc: Invalid ocs = %x\n", ocs_value); } spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -1281,7 +1119,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) /* check if the returned transfer response is valid */ result = ufshcd_is_valid_req_rsp(lrbp->ucd_rsp_ptr); if (result) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Invalid response = %x\n", result); break; } @@ -1310,7 +1148,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case OCS_FATAL_ERROR: default: result |= DID_ERROR << 16; - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "OCS error from controller = %x\n", ocs); break; } /* end of switch */ @@ -1374,7 +1212,7 @@ static void ufshcd_uic_cc_handler (struct work_struct *work) !(ufshcd_get_uic_cmd_result(hba))) { if (ufshcd_make_hba_operational(hba)) - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "cc: hba not operational state\n"); return; } @@ -1509,7 +1347,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba, free_slot = ufshcd_get_tm_free_slot(hba); if (free_slot >= hba->nutmrs) { spin_unlock_irqrestore(host->host_lock, flags); - dev_err(&hba->pdev->dev, "Task management queue full\n"); + dev_err(hba->dev, "Task management queue full\n"); err = FAILED; goto out; } @@ -1552,7 +1390,7 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba, &hba->tm_condition) != 0), 60 * HZ); if (!err) { - dev_err(&hba->pdev->dev, + dev_err(hba->dev, "Task management command timed-out\n"); err = FAILED; goto out; @@ -1688,23 +1526,13 @@ static struct scsi_host_template ufshcd_driver_template = { }; /** - * ufshcd_shutdown - main function to put the controller in reset state - * @pdev: pointer to PCI device handle - */ -static void ufshcd_shutdown(struct pci_dev *pdev) -{ - ufshcd_hba_stop((struct ufs_hba *)pci_get_drvdata(pdev)); -} - -#ifdef CONFIG_PM -/** * ufshcd_suspend - suspend power management function - * @pdev: pointer to PCI device handle + * @hba: per adapter instance * @state: power state * * Returns -ENOSYS */ -static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state) +int ufshcd_suspend(struct ufs_hba *hba, pm_message_t state) { /* * TODO: @@ -1717,14 +1545,15 @@ static int ufshcd_suspend(struct pci_dev *pdev, pm_message_t state) return -ENOSYS; } +EXPORT_SYMBOL_GPL(ufshcd_suspend); /** * ufshcd_resume - resume power management function - * @pdev: pointer to PCI device handle + * @hba: per adapter instance * * Returns -ENOSYS */ -static int ufshcd_resume(struct pci_dev *pdev) +int ufshcd_resume(struct ufs_hba *hba) { /* * TODO: @@ -1737,7 +1566,7 @@ static int ufshcd_resume(struct pci_dev *pdev) return -ENOSYS; } -#endif /* CONFIG_PM */ +EXPORT_SYMBOL_GPL(ufshcd_resume); /** * ufshcd_hba_free - free allocated memory for @@ -1748,107 +1577,67 @@ static void ufshcd_hba_free(struct ufs_hba *hba) { iounmap(hba->mmio_base); ufshcd_free_hba_memory(hba); - pci_release_regions(hba->pdev); } /** - * ufshcd_remove - de-allocate PCI/SCSI host and host memory space + * ufshcd_remove - de-allocate SCSI host and host memory space * data structure memory - * @pdev - pointer to PCI handle + * @hba - per adapter instance */ -static void ufshcd_remove(struct pci_dev *pdev) +void ufshcd_remove(struct ufs_hba *hba) { - struct ufs_hba *hba = pci_get_drvdata(pdev); - /* disable interrupts */ ufshcd_int_config(hba, UFSHCD_INT_DISABLE); - free_irq(pdev->irq, hba); ufshcd_hba_stop(hba); ufshcd_hba_free(hba); scsi_remove_host(hba->host); scsi_host_put(hba->host); - pci_set_drvdata(pdev, NULL); - pci_clear_master(pdev); - pci_disable_device(pdev); -} - -/** - * ufshcd_set_dma_mask - Set dma mask based on the controller - * addressing capability - * @pdev: PCI device structure - * - * Returns 0 for success, non-zero for failure - */ -static int ufshcd_set_dma_mask(struct ufs_hba *hba) -{ - int err; - u64 dma_mask; - - /* - * If controller supports 64 bit addressing mode, then set the DMA - * mask to 64-bit, else set the DMA mask to 32-bit - */ - if (hba->capabilities & MASK_64_ADDRESSING_SUPPORT) - dma_mask = DMA_BIT_MASK(64); - else - dma_mask = DMA_BIT_MASK(32); - - err = pci_set_dma_mask(hba->pdev, dma_mask); - if (err) - return err; - - err = pci_set_consistent_dma_mask(hba->pdev, dma_mask); - - return err; } +EXPORT_SYMBOL_GPL(ufshcd_remove); /** - * ufshcd_probe - probe routine of the driver - * @pdev: pointer to PCI device handle - * @id: PCI device id - * + * ufshcd_init - Driver initialization routine + * @dev: pointer to device handle + * @hba_handle: driver private handle + * @mmio_base: base register address + * @irq: Interrupt line of device * Returns 0 on success, non-zero value on failure */ -static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) +int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle, + void __iomem *mmio_base, unsigned int irq) { struct Scsi_Host *host; struct ufs_hba *hba; int err; - err = pci_enable_device(pdev); - if (err) { - dev_err(&pdev->dev, "pci_enable_device failed\n"); + if (!dev) { + dev_err(dev, + "Invalid memory reference for dev is NULL\n"); + err = -ENODEV; goto out_error; } - pci_set_master(pdev); + if (!mmio_base) { + dev_err(dev, + "Invalid memory reference for mmio_base is NULL\n"); + err = -ENODEV; + goto out_error; + } host = scsi_host_alloc(&ufshcd_driver_template, sizeof(struct ufs_hba)); if (!host) { - dev_err(&pdev->dev, "scsi_host_alloc failed\n"); + dev_err(dev, "scsi_host_alloc failed\n"); err = -ENOMEM; - goto out_disable; + goto out_error; } hba = shost_priv(host); - - err = pci_request_regions(pdev, UFSHCD); - if (err < 0) { - dev_err(&pdev->dev, "request regions failed\n"); - goto out_host_put; - } - - hba->mmio_base = pci_ioremap_bar(pdev, 0); - if (!hba->mmio_base) { - dev_err(&pdev->dev, "memory map failed\n"); - err = -ENOMEM; - goto out_release_regions; - } - hba->host = host; - hba->pdev = pdev; + hba->dev = dev; + hba->mmio_base = mmio_base; + hba->irq = irq; /* Read capabilities registers */ ufshcd_hba_capabilities(hba); @@ -1856,17 +1645,11 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Get UFS version supported by the controller */ hba->ufs_version = ufshcd_get_ufs_version(hba); - err = ufshcd_set_dma_mask(hba); - if (err) { - dev_err(&pdev->dev, "set dma mask failed\n"); - goto out_iounmap; - } - /* Allocate memory for host memory space */ err = ufshcd_memory_alloc(hba); if (err) { - dev_err(&pdev->dev, "Memory allocation failed\n"); - goto out_iounmap; + dev_err(hba->dev, "Memory allocation failed\n"); + goto out_disable; } /* Configure LRB */ @@ -1888,76 +1671,50 @@ static int ufshcd_probe(struct pci_dev *pdev, const struct pci_device_id *id) INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler); /* IRQ registration */ - err = request_irq(pdev->irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba); + err = request_irq(irq, ufshcd_intr, IRQF_SHARED, UFSHCD, hba); if (err) { - dev_err(&pdev->dev, "request irq failed\n"); + dev_err(hba->dev, "request irq failed\n"); goto out_lrb_free; } /* Enable SCSI tag mapping */ err = scsi_init_shared_tag_map(host, host->can_queue); if (err) { - dev_err(&pdev->dev, "init shared queue failed\n"); + dev_err(hba->dev, "init shared queue failed\n"); goto out_free_irq; } - pci_set_drvdata(pdev, hba); - - err = scsi_add_host(host, &pdev->dev); + err = scsi_add_host(host, hba->dev); if (err) { - dev_err(&pdev->dev, "scsi_add_host failed\n"); + dev_err(hba->dev, "scsi_add_host failed\n"); goto out_free_irq; } /* Initialization routine */ err = ufshcd_initialize_hba(hba); if (err) { - dev_err(&pdev->dev, "Initialization failed\n"); - goto out_free_irq; + dev_err(hba->dev, "Initialization failed\n"); + goto out_remove_scsi_host; } + *hba_handle = hba; return 0; +out_remove_scsi_host: + scsi_remove_host(hba->host); out_free_irq: - free_irq(pdev->irq, hba); + free_irq(irq, hba); out_lrb_free: ufshcd_free_hba_memory(hba); -out_iounmap: - iounmap(hba->mmio_base); -out_release_regions: - pci_release_regions(pdev); -out_host_put: - scsi_host_put(host); out_disable: - pci_clear_master(pdev); - pci_disable_device(pdev); + scsi_host_put(host); out_error: return err; } +EXPORT_SYMBOL_GPL(ufshcd_init); -static DEFINE_PCI_DEVICE_TABLE(ufshcd_pci_tbl) = { - { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, - { } /* terminate list */ -}; - -MODULE_DEVICE_TABLE(pci, ufshcd_pci_tbl); - -static struct pci_driver ufshcd_pci_driver = { - .name = UFSHCD, - .id_table = ufshcd_pci_tbl, - .probe = ufshcd_probe, - .remove = ufshcd_remove, - .shutdown = ufshcd_shutdown, -#ifdef CONFIG_PM - .suspend = ufshcd_suspend, - .resume = ufshcd_resume, -#endif -}; - -module_pci_driver(ufshcd_pci_driver); - -MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>, " - "Vinayak Holikatti <h.vinayak@samsung.com>"); -MODULE_DESCRIPTION("Generic UFS host controller driver"); +MODULE_AUTHOR("Santosh Yaragnavi <santosh.sy@samsung.com>"); +MODULE_AUTHOR("Vinayak Holikatti <h.vinayak@samsung.com>"); +MODULE_DESCRIPTION("Generic UFS host controller driver Core"); MODULE_LICENSE("GPL"); MODULE_VERSION(UFSHCD_DRIVER_VERSION); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h new file mode 100644 index 0000000..6b99a42 --- /dev/null +++ b/drivers/scsi/ufs/ufshcd.h @@ -0,0 +1,202 @@ +/* + * Universal Flash Storage Host controller driver + * + * This code is based on drivers/scsi/ufs/ufshcd.h + * Copyright (C) 2011-2013 Samsung India Software Operations + * + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. + */ + +#ifndef _UFSHCD_H +#define _UFSHCD_H + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/workqueue.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <linux/bitops.h> +#include <linux/pm_runtime.h> +#include <linux/clk.h> + +#include <asm/irq.h> +#include <asm/byteorder.h> +#include <scsi/scsi.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_tcq.h> +#include <scsi/scsi_dbg.h> +#include <scsi/scsi_eh.h> + +#include "ufs.h" +#include "ufshci.h" + +#define UFSHCD "ufshcd" +#define UFSHCD_DRIVER_VERSION "0.2" + +/** + * struct uic_command - UIC command structure + * @command: UIC command + * @argument1: UIC command argument 1 + * @argument2: UIC command argument 2 + * @argument3: UIC command argument 3 + * @cmd_active: Indicate if UIC command is outstanding + * @result: UIC command result + */ +struct uic_command { + u32 command; + u32 argument1; + u32 argument2; + u32 argument3; + int cmd_active; + int result; +}; + +/** + * struct ufshcd_lrb - local reference block + * @utr_descriptor_ptr: UTRD address of the command + * @ucd_cmd_ptr: UCD address of the command + * @ucd_rsp_ptr: Response UPIU address for this command + * @ucd_prdt_ptr: PRDT address of the command + * @cmd: pointer to SCSI command + * @sense_buffer: pointer to sense buffer address of the SCSI command + * @sense_bufflen: Length of the sense buffer + * @scsi_status: SCSI status of the command + * @command_type: SCSI, UFS, Query. + * @task_tag: Task tag of the command + * @lun: LUN of the command + */ +struct ufshcd_lrb { + struct utp_transfer_req_desc *utr_descriptor_ptr; + struct utp_upiu_cmd *ucd_cmd_ptr; + struct utp_upiu_rsp *ucd_rsp_ptr; + struct ufshcd_sg_entry *ucd_prdt_ptr; + + struct scsi_cmnd *cmd; + u8 *sense_buffer; + unsigned int sense_bufflen; + int scsi_status; + + int command_type; + int task_tag; + unsigned int lun; +}; + + +/** + * struct ufs_hba - per adapter private structure + * @mmio_base: UFSHCI base register address + * @ucdl_base_addr: UFS Command Descriptor base address + * @utrdl_base_addr: UTP Transfer Request Descriptor base address + * @utmrdl_base_addr: UTP Task Management Descriptor base address + * @ucdl_dma_addr: UFS Command Descriptor DMA address + * @utrdl_dma_addr: UTRDL DMA address + * @utmrdl_dma_addr: UTMRDL DMA address + * @host: Scsi_Host instance of the driver + * @dev: device handle + * @lrb: local reference block + * @outstanding_tasks: Bits representing outstanding task requests + * @outstanding_reqs: Bits representing outstanding transfer requests + * @capabilities: UFS Controller Capabilities + * @nutrs: Transfer Request Queue depth supported by controller + * @nutmrs: Task Management Queue depth supported by controller + * @ufs_version: UFS Version to which controller complies + * @irq: Irq number of the controller + * @active_uic_cmd: handle of active UIC command + * @ufshcd_tm_wait_queue: wait queue for task management + * @tm_condition: condition variable for task management + * @ufshcd_state: UFSHCD states + * @int_enable_mask: Interrupt Mask Bits + * @uic_workq: Work queue for UIC completion handling + * @feh_workq: Work queue for fatal controller error handling + * @errors: HBA errors + */ +struct ufs_hba { + void __iomem *mmio_base; + + /* Virtual memory reference */ + struct utp_transfer_cmd_desc *ucdl_base_addr; + struct utp_transfer_req_desc *utrdl_base_addr; + struct utp_task_req_desc *utmrdl_base_addr; + + /* DMA memory reference */ + dma_addr_t ucdl_dma_addr; + dma_addr_t utrdl_dma_addr; + dma_addr_t utmrdl_dma_addr; + + struct Scsi_Host *host; + struct device *dev; + + struct ufshcd_lrb *lrb; + + unsigned long outstanding_tasks; + unsigned long outstanding_reqs; + + u32 capabilities; + int nutrs; + int nutmrs; + u32 ufs_version; + unsigned int irq; + + struct uic_command active_uic_cmd; + wait_queue_head_t ufshcd_tm_wait_queue; + unsigned long tm_condition; + + u32 ufshcd_state; + u32 int_enable_mask; + + /* Work Queues */ + struct work_struct uic_workq; + struct work_struct feh_workq; + + /* HBA Errors */ + u32 errors; +}; + +int ufshcd_init(struct device *, struct ufs_hba ** , void __iomem * , + unsigned int); +void ufshcd_remove(struct ufs_hba *); + +/** + * ufshcd_hba_stop - Send controller to reset state + * @hba: per adapter instance + */ +static inline void ufshcd_hba_stop(struct ufs_hba *hba) +{ + writel(CONTROLLER_DISABLE, (hba->mmio_base + REG_CONTROLLER_ENABLE)); +} + +#endif /* End of Header */ diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6e3510f..0c16484 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -2,45 +2,35 @@ * Universal Flash Storage Host controller driver * * This code is based on drivers/scsi/ufs/ufshci.h - * Copyright (C) 2011-2012 Samsung India Software Operations + * Copyright (C) 2011-2013 Samsung India Software Operations * - * Santosh Yaraganavi <santosh.sy@samsung.com> - * Vinayak Holikatti <h.vinayak@samsung.com> + * Authors: + * Santosh Yaraganavi <santosh.sy@samsung.com> + * Vinayak Holikatti <h.vinayak@samsung.com> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. + * See the COPYING file in the top-level directory or visit + * <http://www.gnu.org/licenses/gpl-2.0.html> * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * NO WARRANTY - * THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR - * CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT - * LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is - * solely responsible for determining the appropriateness of using and - * distributing the Program and assumes all risks associated with its - * exercise of rights under this Agreement, including but not limited to - * the risks and costs of program errors, damage to or loss of data, - * programs or equipment, and unavailability or interruption of operations. - - * DISCLAIMER OF LIABILITY - * NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR - * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE - * USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED - * HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, - * USA. + * This program is provided "AS IS" and "WITH ALL FAULTS" and + * without warranty of any kind. You are solely responsible for + * determining the appropriateness of using and distributing + * the program and assume all risks associated with your exercise + * of rights with respect to the program, including but not limited + * to infringement of third party rights, the risks and costs of + * program errors, damage to or loss of data, programs or equipment, + * and unavailability or interruption of operations. Under no + * circumstances will the contributor of this Program be liable for + * any damages of any kind arising from your use or distribution of + * this program. */ #ifndef _UFSHCI_H diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c index a43415a..4c0f6d8 100644 --- a/drivers/ssb/driver_chipcommon_pmu.c +++ b/drivers/ssb/driver_chipcommon_pmu.c @@ -14,7 +14,7 @@ #include <linux/delay.h> #include <linux/export.h> #ifdef CONFIG_BCM47XX -#include <asm/mach-bcm47xx/nvram.h> +#include <bcm47xx_nvram.h> #endif #include "ssb_private.h" @@ -322,7 +322,7 @@ static void ssb_pmu_pll_init(struct ssb_chipcommon *cc) if (bus->bustype == SSB_BUSTYPE_SSB) { #ifdef CONFIG_BCM47XX char buf[20]; - if (nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0) + if (bcm47xx_nvram_getenv("xtalfreq", buf, sizeof(buf)) >= 0) crystalfreq = simple_strtoul(buf, NULL, 0); #endif } diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 9435a3d..7ea246a 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -3584,6 +3584,10 @@ check_rsp_state: spin_lock_bh(&cmd->istate_lock); cmd->i_state = ISTATE_SENT_STATUS; spin_unlock_bh(&cmd->istate_lock); + + if (atomic_read(&conn->check_immediate_queue)) + return 1; + continue; } else if (ret == 2) { /* Still must send status, @@ -3673,7 +3677,7 @@ check_rsp_state: } if (atomic_read(&conn->check_immediate_queue)) - break; + return 1; } return 0; @@ -3717,12 +3721,15 @@ restart: signal_pending(current)) goto transport_err; +get_immediate: ret = handle_immediate_queue(conn); if (ret < 0) goto transport_err; ret = handle_response_queue(conn); - if (ret == -EAGAIN) + if (ret == 1) + goto get_immediate; + else if (ret == -EAGAIN) goto restart; else if (ret < 0) goto transport_err; diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 6917a9e..d3536f5 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -2598,7 +2598,7 @@ static int __init sbp_init(void) return 0; }; -static void sbp_exit(void) +static void __exit sbp_exit(void) { sbp_deregister_configfs(); }; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index d226c10..17a6acb 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -631,7 +631,7 @@ static int __init fileio_module_init(void) return transport_subsystem_register(&fileio_template); } -static void fileio_module_exit(void) +static void __exit fileio_module_exit(void) { transport_subsystem_release(&fileio_template); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index c73f4a9..8bcc514 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -821,7 +821,7 @@ static int __init iblock_module_init(void) return transport_subsystem_register(&iblock_template); } -static void iblock_module_exit(void) +static void __exit iblock_module_exit(void) { transport_subsystem_release(&iblock_template); } diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 2bcfd79..82e78d7 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -840,14 +840,14 @@ static void pscsi_bi_endio(struct bio *bio, int error) bio_put(bio); } -static inline struct bio *pscsi_get_bio(int sg_num) +static inline struct bio *pscsi_get_bio(int nr_vecs) { struct bio *bio; /* * Use bio_malloc() following the comment in for bio -> struct request * in block/blk-core.c:blk_make_request() */ - bio = bio_kmalloc(GFP_KERNEL, sg_num); + bio = bio_kmalloc(GFP_KERNEL, nr_vecs); if (!bio) { pr_err("PSCSI: bio_kmalloc() failed\n"); return NULL; @@ -940,7 +940,6 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, bio = NULL; } - page++; len -= bytes; data_len -= bytes; off = 0; @@ -952,7 +951,6 @@ fail: while (*hbio) { bio = *hbio; *hbio = (*hbio)->bi_next; - bio->bi_next = NULL; bio_endio(bio, 0); /* XXX: should be error */ } return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1092,7 +1090,6 @@ fail_free_bio: while (hbio) { struct bio *bio = hbio; hbio = hbio->bi_next; - bio->bi_next = NULL; bio_endio(bio, 0); /* XXX: should be error */ } ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -1178,7 +1175,7 @@ static int __init pscsi_module_init(void) return transport_subsystem_register(&pscsi_template); } -static void pscsi_module_exit(void) +static void __exit pscsi_module_exit(void) { transport_subsystem_release(&pscsi_template); } diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c2c77d1..a764f16 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -29,14 +29,14 @@ choice config THERMAL_DEFAULT_GOV_STEP_WISE bool "step_wise" - select STEP_WISE + select THERMAL_GOV_STEP_WISE help Use the step_wise governor as default. This throttles the devices one step at a time. config THERMAL_DEFAULT_GOV_FAIR_SHARE bool "fair_share" - select FAIR_SHARE + select THERMAL_GOV_FAIR_SHARE help Use the fair_share governor as default. This throttles the devices based on their 'contribution' to a zone. The @@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" - select USER_SPACE + select THERMAL_GOV_USER_SPACE help Select this if you want to let the user space manage the lpatform thermals. endchoice -config FAIR_SHARE +config THERMAL_GOV_FAIR_SHARE bool "Fair-share thermal governor" help Enable this to manage platform thermals using fair-share governor. -config STEP_WISE +config THERMAL_GOV_STEP_WISE bool "Step_wise thermal governor" help Enable this to manage platform thermals using a simple linear -config USER_SPACE +config THERMAL_GOV_USER_SPACE bool "User_space thermal governor" help Enable this to let the user space manage the platform thermals. @@ -78,6 +78,14 @@ config CPU_THERMAL and not the ACPI interface. If you want this support, you should say Y here. +config THERMAL_EMULATION + bool "Thermal emulation mode support" + help + Enable this option to make a emul_temp sysfs node in thermal zone + directory to support temperature emulation. With emulation sysfs node, + user can manually input temperature and test the different trip + threshold behaviour for simulation purpose. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -93,6 +101,14 @@ config RCAR_THERMAL Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework +config KIRKWOOD_THERMAL + tristate "Temperature sensor on Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + depends on OF + help + Support for the Kirkwood thermal sensor driver into the Linux thermal + framework. Only kirkwood 88F6282 and 88F6283 have this sensor. + config EXYNOS_THERMAL tristate "Temperature sensor on Samsung EXYNOS" depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) @@ -101,6 +117,23 @@ config EXYNOS_THERMAL If you say yes here you get support for TMU (Thermal Management Unit) on SAMSUNG EXYNOS series of SoC. +config EXYNOS_THERMAL_EMUL + bool "EXYNOS TMU emulation mode support" + depends on EXYNOS_THERMAL + help + Exynos 4412 and 4414 and 5 series has emulation mode on TMU. + Enable this option will be make sysfs node in exynos thermal platform + device directory to support emulation mode. With emulation mode sysfs + node, you can manually input temperature to TMU for simulation purpose. + +config DOVE_THERMAL + tristate "Temperature sensor on Marvell Dove SoCs" + depends on ARCH_DOVE + depends on OF + help + Support for the Dove thermal sensor driver in the Linux thermal + framework. + config DB8500_THERMAL bool "DB8500 thermal management" depends on ARCH_U8500 @@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING bound cpufreq cooling device turns active to set CPU frequency low to cool down the CPU. +config INTEL_POWERCLAMP + tristate "Intel PowerClamp idle injection driver" + depends on THERMAL + depends on X86 + depends on CPU_SUP_INTEL + help + Enable this to enable Intel PowerClamp idle injection driver. This + enforce idle time which results in more package C-state residency. The + user interface is exposed via generic thermal framework. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index d8da683..d3a2b38c 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o # governors -obj-$(CONFIG_FAIR_SHARE) += fair_share.o -obj-$(CONFIG_STEP_WISE) += step_wise.o -obj-$(CONFIG_USER_SPACE) += user_space.o +obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o +obj-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o +obj-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o # cpufreq cooling obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o @@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o # platform thermal drivers obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o +obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o +obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o + diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index c33fa53..8dc44cb 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -111,8 +111,8 @@ static int is_cpufreq_valid(int cpu) /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. - * @level: level of frequency of the CPU - * e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc + * @level: level of frequency, equals cooling state of cpu cooling device + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc */ static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) { diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c index 4cf8e72..2141985 100644 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ b/drivers/thermal/db8500_cpufreq_cooling.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = { { .compatible = "stericsson,db8500-cpufreq-cooling" }, {}, }; -#else -#define db8500_cpufreq_cooling_match NULL #endif static struct platform_driver db8500_cpufreq_cooling_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-cpufreq-cooling", - .of_match_table = db8500_cpufreq_cooling_match, + .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), }, .probe = db8500_cpufreq_cooling_probe, .suspend = db8500_cpufreq_cooling_suspend, diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index ec71ade..61ce60a 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; -#else -#define db8500_thermal_match NULL #endif static struct platform_driver db8500_thermal_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-thermal", - .of_match_table = db8500_thermal_match, + .of_match_table = of_match_ptr(db8500_thermal_match), }, .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c new file mode 100644 index 0000000..7b0bfa0 --- /dev/null +++ b/drivers/thermal/dove_thermal.c @@ -0,0 +1,209 @@ +/* + * Dove thermal sensor driver + * + * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define DOVE_THERMAL_TEMP_OFFSET 1 +#define DOVE_THERMAL_TEMP_MASK 0x1FF + +/* Dove Thermal Manager Control and Status Register */ +#define PMU_TM_DISABLE_OFFS 0 +#define PMU_TM_DISABLE_MASK (0x1 << PMU_TM_DISABLE_OFFS) + +/* Dove Theraml Diode Control 0 Register */ +#define PMU_TDC0_SW_RST_MASK (0x1 << 1) +#define PMU_TDC0_SEL_VCAL_OFFS 5 +#define PMU_TDC0_SEL_VCAL_MASK (0x3 << PMU_TDC0_SEL_VCAL_OFFS) +#define PMU_TDC0_REF_CAL_CNT_OFFS 11 +#define PMU_TDC0_REF_CAL_CNT_MASK (0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS) +#define PMU_TDC0_AVG_NUM_OFFS 25 +#define PMU_TDC0_AVG_NUM_MASK (0x7 << PMU_TDC0_AVG_NUM_OFFS) + +/* Dove Thermal Diode Control 1 Register */ +#define PMU_TEMP_DIOD_CTRL1_REG 0x04 +#define PMU_TDC1_TEMP_VALID_MASK (0x1 << 10) + +/* Dove Thermal Sensor Dev Structure */ +struct dove_thermal_priv { + void __iomem *sensor; + void __iomem *control; +}; + +static int dove_init_sensor(const struct dove_thermal_priv *priv) +{ + u32 reg; + u32 i; + + /* Configure the Diode Control Register #0 */ + reg = readl_relaxed(priv->control); + + /* Use average of 2 */ + reg &= ~PMU_TDC0_AVG_NUM_MASK; + reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS); + + /* Set the high level reference for calibration */ + reg &= ~PMU_TDC0_SEL_VCAL_MASK; + reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS); + writel(reg, priv->control); + + /* Reset the sensor */ + reg = readl_relaxed(priv->control); + writel((reg | PMU_TDC0_SW_RST_MASK), priv->control); + writel(reg, priv->control); + + /* Enable the sensor */ + reg = readl_relaxed(priv->sensor); + reg &= ~PMU_TM_DISABLE_MASK; + writel(reg, priv->sensor); + + /* Poll the sensor for the first reading */ + for (i = 0; i < 1000000; i++) { + reg = readl_relaxed(priv->sensor); + if (reg & DOVE_THERMAL_TEMP_MASK) + break; + } + + if (i == 1000000) + return -EIO; + + return 0; +} + +static int dove_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct dove_thermal_priv *priv = thermal->devdata; + + /* Valid check */ + reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG); + if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of 88AP510, + * Documentation/arm/Marvell/README + */ + reg = readl_relaxed(priv->sensor); + reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = dove_get_temp, +}; + +static const struct of_device_id dove_thermal_id_table[] = { + { .compatible = "marvell,dove-thermal" }, + {} +}; + +static int dove_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct dove_thermal_priv *priv; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + priv->control = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->control) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + ret = dove_init_sensor(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize sensor\n"); + return ret; + } + + thermal = thermal_zone_device_register("dove_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int dove_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *dove_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(dove_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, dove_thermal_id_table); + +static struct platform_driver dove_thermal_driver = { + .probe = dove_thermal_probe, + .remove = dove_thermal_exit, + .driver = { + .name = "dove_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(dove_thermal_id_table), + }, +}; + +module_platform_driver(dove_thermal_driver); + +MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); +MODULE_DESCRIPTION("Dove thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index bada130..e04ebd8 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -82,7 +82,7 @@ #define EXYNOS_TRIMINFO_RELOAD 0x1 #define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 16) +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) #define EXYNOS_MUX_ADDR_VALUE 6 #define EXYNOS_MUX_ADDR_SHIFT 20 #define EXYNOS_TMU_TRIP_MODE_SHIFT 13 @@ -94,11 +94,20 @@ #define SENSOR_NAME_LEN 16 #define MAX_TRIP_COUNT 8 #define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 4 #define ACTIVE_INTERVAL 500 #define IDLE_INTERVAL 10000 #define MCELSIUS 1000 +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 +#endif /* CONFIG_EXYNOS_THERMAL_EMUL */ + /* CPU Zone information */ #define PANIC_ZONE 4 #define WARN_ZONE 3 @@ -125,6 +134,7 @@ struct exynos_tmu_data { struct thermal_trip_point_conf { int trip_val[MAX_TRIP_COUNT]; int trip_count; + u8 trigger_falling; }; struct thermal_cooling_conf { @@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal, mutex_lock(&th_zone->therm_dev->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) th_zone->therm_dev->polling_delay = IDLE_INTERVAL; else th_zone->therm_dev->polling_delay = 0; @@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal, case MONITOR_ZONE: case WARN_ZONE: if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, level)) { + level, 0)) { pr_err("error binding cdev inst %d\n", i); ret = -EINVAL; } @@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal, static int exynos_get_trend(struct thermal_zone_device *thermal, int trip, enum thermal_trend *trend) { - if (thermal->temperature >= trip) - *trend = THERMAL_TREND_RAISING; + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; else - *trend = THERMAL_TREND_DROPPING; + *trend = THERMAL_TREND_DROP_FULL; return 0; } @@ -413,7 +431,8 @@ static void exynos_report_trigger(void) break; } - if (th_zone->mode == THERMAL_DEVICE_ENABLED) { + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { if (i > 0) th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; else @@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - IDLE_INTERVAL); + sensor_conf->trip_data.trigger_falling ? + 0 : IDLE_INTERVAL); if (IS_ERR(th_zone->therm_dev)) { pr_err("Failed to register thermal zone device\n"); @@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info, rising_threshold; - int ret = 0, threshold_code; + unsigned int status, trim_info; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; mutex_lock(&data->lock); clk_enable(data->clk); @@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev) (data->temp_error2 != 0)) data->temp_error1 = pdata->efuse_value; + /* Count trigger levels to be enabled */ + for (i = 0; i < MAX_THRESHOLD_LEVS; i++) + if (pdata->trigger_levels[i]) + trigger_levs++; + if (data->soc == SOC_ARCH_EXYNOS4210) { /* Write temperature code for threshold */ threshold_code = temp_to_code(data, pdata->threshold); @@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev) } writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - - writeb(pdata->trigger_levels[0], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0); - writeb(pdata->trigger_levels[1], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1); - writeb(pdata->trigger_levels[2], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2); - writeb(pdata->trigger_levels[3], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], + data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->trigger_levels[0]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold = threshold_code; - threshold_code = temp_to_code(data, pdata->trigger_levels[1]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= (threshold_code << 8); - threshold_code = temp_to_code(data, pdata->trigger_levels[2]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; + /* Write temperature code for rising and falling threshold */ + for (i = 0; i < trigger_levs; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } } - rising_threshold |= (threshold_code << 16); writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); - writel(0, data->base + EXYNOS_THD_TEMP_FALL); + writel(falling_threshold, + data->base + EXYNOS_THD_TEMP_FALL); - writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); } out: @@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) pdata->trigger_level2_en << 8 | pdata->trigger_level1_en << 4 | pdata->trigger_level0_en; + if (pdata->threshold_falling) + interrupt_en |= interrupt_en << 16; } else { con |= EXYNOS_TMU_CORE_OFF; interrupt_en = 0; /* Disable all interrupts */ @@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work) struct exynos_tmu_data *data = container_of(work, struct exynos_tmu_data, irq_work); + exynos_report_trigger(); mutex_lock(&data->lock); clk_enable(data->clk); - - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | + EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); else writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); mutex_unlock(&data->lock); - exynos_report_trigger(); + enable_irq(data->irq); } @@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) static struct exynos_tmu_platform_data const exynos_default_tmu_data = { + .threshold_falling = 10, .trigger_levels[0] = 85, .trigger_levels[1] = 103, .trigger_levels[2] = 110, @@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = { {}, }; MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#else -#define exynos_tmu_match NULL #endif static struct platform_device_id exynos_tmu_driver_ids[] = { @@ -832,6 +852,94 @@ static inline struct exynos_tmu_platform_data *exynos_get_driver_data( return (struct exynos_tmu_platform_data *) platform_get_device_id(pdev)->driver_data; } + +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +static ssize_t exynos_tmu_emulation_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + u8 temp_code; + int temp = 0; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + reg = readl(data->base + EXYNOS_EMUL_CON); + clk_disable(data->clk); + mutex_unlock(&data->lock); + + if (reg & EXYNOS_EMUL_ENABLE) { + reg >>= EXYNOS_EMUL_DATA_SHIFT; + temp_code = reg & EXYNOS_EMUL_DATA_MASK; + temp = code_to_temp(data, temp_code); + } +out: + return sprintf(buf, "%d\n", temp * MCELSIUS); +} + +static ssize_t exynos_tmu_emulation_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + int temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + if (!sscanf(buf, "%d\n", &temp) || temp < 0) + return -EINVAL; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + reg = readl(data->base + EXYNOS_EMUL_CON); + + if (temp) { + /* Both CELSIUS and MCELSIUS type are available for input */ + if (temp > MCELSIUS) + temp /= MCELSIUS; + + reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | + (temp_to_code(data, (temp / MCELSIUS)) + << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; + } else { + reg &= ~EXYNOS_EMUL_ENABLE; + } + + writel(reg, data->base + EXYNOS_EMUL_CON); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + +out: + return count; +} + +static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show, + exynos_tmu_emulation_store); +static int create_emulation_sysfs(struct device *dev) +{ + return device_create_file(dev, &dev_attr_emulation); +} +static void remove_emulation_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_emulation); +} +#else +static inline int create_emulation_sysfs(struct device *dev) { return 0; } +static inline void remove_emulation_sysfs(struct device *dev) {} +#endif + static int exynos_tmu_probe(struct platform_device *pdev) { struct exynos_tmu_data *data; @@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) exynos_sensor_conf.trip_data.trip_val[i] = pdata->threshold + pdata->trigger_levels[i]; + exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; + exynos_sensor_conf.cooling_data.freq_clip_count = pdata->freq_tab_count; for (i = 0; i < pdata->freq_tab_count; i++) { @@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to register thermal interface\n"); goto err_clk; } + + ret = create_emulation_sysfs(&pdev->dev); + if (ret) + dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n"); + return 0; err_clk: platform_set_drvdata(pdev, NULL); @@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); + remove_emulation_sysfs(&pdev->dev); + exynos_tmu_control(pdev, false); exynos_unregister_thermal(); @@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = { .name = "exynos-tmu", .owner = THIS_MODULE, .pm = EXYNOS_TMU_PM, - .of_match_table = exynos_tmu_match, + .of_match_table = of_match_ptr(exynos_tmu_match), }, .probe = exynos_tmu_probe, .remove = exynos_tmu_remove, diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c new file mode 100644 index 0000000..b40b37c --- /dev/null +++ b/drivers/thermal/intel_powerclamp.c @@ -0,0 +1,795 @@ +/* + * intel_powerclamp.c - package c-state idle injection + * + * Copyright (c) 2012, Intel Corporation. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * Jacob Pan <jacob.jun.pan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * TODO: + * 1. better handle wakeup from external interrupts, currently a fixed + * compensation is added to clamping duration when excessive amount + * of wakeups are observed during idle time. the reason is that in + * case of external interrupts without need for ack, clamping down + * cpu in non-irq context does not reduce irq. for majority of the + * cases, clamping down cpu does help reduce irq as well, we should + * be able to differenciate the two cases and give a quantitative + * solution for the irqs that we can control. perhaps based on + * get_cpu_iowait_time_us() + * + * 2. synchronization with other hw blocks + * + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/cpu.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/tick.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched/rt.h> + +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/mwait.h> +#include <asm/cpu_device_id.h> +#include <asm/idle.h> +#include <asm/hardirq.h> + +#define MAX_TARGET_RATIO (50U) +/* For each undisturbed clamping period (no extra wake ups during idle time), + * we increment the confidence counter for the given target ratio. + * CONFIDENCE_OK defines the level where runtime calibration results are + * valid. + */ +#define CONFIDENCE_OK (3) +/* Default idle injection duration, driver adjust sleep time to meet target + * idle ratio. Similar to frequency modulation. + */ +#define DEFAULT_DURATION_JIFFIES (6) + +static unsigned int target_mwait; +static struct dentry *debug_dir; + +/* user selected target */ +static unsigned int set_target_ratio; +static unsigned int current_ratio; +static bool should_skip; +static bool reduce_irq; +static atomic_t idle_wakeup_counter; +static unsigned int control_cpu; /* The cpu assigned to collect stat and update + * control parameters. default to BSP but BSP + * can be offlined. + */ +static bool clamping; + + +static struct task_struct * __percpu *powerclamp_thread; +static struct thermal_cooling_device *cooling_dev; +static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu + * clamping thread + */ + +static unsigned int duration; +static unsigned int pkg_cstate_ratio_cur; +static unsigned int window_size; + +static int duration_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_duration; + + ret = kstrtoul(arg, 10, &new_duration); + if (ret) + goto exit; + if (new_duration > 25 || new_duration < 6) { + pr_err("Out of recommended range %lu, between 6-25ms\n", + new_duration); + ret = -EINVAL; + } + + duration = clamp(new_duration, 6ul, 25ul); + smp_mb(); + +exit: + + return ret; +} + +static struct kernel_param_ops duration_ops = { + .set = duration_set, + .get = param_get_int, +}; + + +module_param_cb(duration, &duration_ops, &duration, 0644); +MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); + +struct powerclamp_calibration_data { + unsigned long confidence; /* used for calibration, basically a counter + * gets incremented each time a clamping + * period is completed without extra wakeups + * once that counter is reached given level, + * compensation is deemed usable. + */ + unsigned long steady_comp; /* steady state compensation used when + * no extra wakeups occurred. + */ + unsigned long dynamic_comp; /* compensate excessive wakeup from idle + * mostly from external interrupts. + */ +}; + +static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO]; + +static int window_size_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_window_size; + + ret = kstrtoul(arg, 10, &new_window_size); + if (ret) + goto exit_win; + if (new_window_size > 10 || new_window_size < 2) { + pr_err("Out of recommended window size %lu, between 2-10\n", + new_window_size); + ret = -EINVAL; + } + + window_size = clamp(new_window_size, 2ul, 10ul); + smp_mb(); + +exit_win: + + return ret; +} + +static struct kernel_param_ops window_size_ops = { + .set = window_size_set, + .get = param_get_int, +}; + +module_param_cb(window_size, &window_size_ops, &window_size, 0644); +MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" + "\tpowerclamp controls idle ratio within this window. larger\n" + "\twindow size results in slower response time but more smooth\n" + "\tclamping results. default to 2."); + +static void find_target_mwait(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return; + + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + +} + +static u64 pkg_state_counter(void) +{ + u64 val; + u64 count = 0; + + static bool skip_c2; + static bool skip_c3; + static bool skip_c6; + static bool skip_c7; + + if (!skip_c2) { + if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val)) + count += val; + else + skip_c2 = true; + } + + if (!skip_c3) { + if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val)) + count += val; + else + skip_c3 = true; + } + + if (!skip_c6) { + if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val)) + count += val; + else + skip_c6 = true; + } + + if (!skip_c7) { + if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val)) + count += val; + else + skip_c7 = true; + } + + return count; +} + +static void noop_timer(unsigned long foo) +{ + /* empty... just the fact that we get the interrupt wakes us up */ +} + +static unsigned int get_compensation(int ratio) +{ + unsigned int comp = 0; + + /* we only use compensation if all adjacent ones are good */ + if (ratio == 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio + 1].steady_comp + + cal_data[ratio + 2].steady_comp) / 3; + } else if (ratio == MAX_TARGET_RATIO - 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio - 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio - 2].steady_comp) / 3; + } else if (cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio + 1].steady_comp) / 3; + } + + /* REVISIT: simple penalty of double idle injection */ + if (reduce_irq) + comp = ratio; + /* do not exceed limit */ + if (comp + ratio >= MAX_TARGET_RATIO) + comp = MAX_TARGET_RATIO - ratio - 1; + + return comp; +} + +static void adjust_compensation(int target_ratio, unsigned int win) +{ + int delta; + struct powerclamp_calibration_data *d = &cal_data[target_ratio]; + + /* + * adjust compensations if confidence level has not been reached or + * there are too many wakeups during the last idle injection period, we + * cannot trust the data for compensation. + */ + if (d->confidence >= CONFIDENCE_OK || + atomic_read(&idle_wakeup_counter) > + win * num_online_cpus()) + return; + + delta = set_target_ratio - current_ratio; + /* filter out bad data */ + if (delta >= 0 && delta <= (1+target_ratio/10)) { + if (d->steady_comp) + d->steady_comp = + roundup(delta+d->steady_comp, 2)/2; + else + d->steady_comp = delta; + d->confidence++; + } +} + +static bool powerclamp_adjust_controls(unsigned int target_ratio, + unsigned int guard, unsigned int win) +{ + static u64 msr_last, tsc_last; + u64 msr_now, tsc_now; + u64 val64; + + /* check result for the last window */ + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + current_ratio = 1; + else if (tsc_now-tsc_last) { + val64 = 100*(msr_now-msr_last); + do_div(val64, (tsc_now-tsc_last)); + current_ratio = val64; + } + + /* update record */ + msr_last = msr_now; + tsc_last = tsc_now; + + adjust_compensation(target_ratio, win); + /* + * too many external interrupts, set flag such + * that we can take measure later. + */ + reduce_irq = atomic_read(&idle_wakeup_counter) >= + 2 * win * num_online_cpus(); + + atomic_set(&idle_wakeup_counter, 0); + /* if we are above target+guard, skip */ + return set_target_ratio + guard <= current_ratio; +} + +static int clamp_thread(void *arg) +{ + int cpunr = (unsigned long)arg; + DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); + static const struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; + unsigned int count = 0; + unsigned int target_ratio; + + set_bit(cpunr, cpu_clamping_mask); + set_freezable(); + init_timer_on_stack(&wakeup_timer); + sched_setscheduler(current, SCHED_FIFO, ¶m); + + while (true == clamping && !kthread_should_stop() && + cpu_online(cpunr)) { + int sleeptime; + unsigned long target_jiffies; + unsigned int guard; + unsigned int compensation = 0; + int interval; /* jiffies to sleep for each attempt */ + unsigned int duration_jiffies = msecs_to_jiffies(duration); + unsigned int window_size_now; + + try_to_freeze(); + /* + * make sure user selected ratio does not take effect until + * the next round. adjust target_ratio if user has changed + * target such that we can converge quickly. + */ + target_ratio = set_target_ratio; + guard = 1 + target_ratio/20; + window_size_now = window_size; + count++; + + /* + * systems may have different ability to enter package level + * c-states, thus we need to compensate the injected idle ratio + * to achieve the actual target reported by the HW. + */ + compensation = get_compensation(target_ratio); + interval = duration_jiffies*100/(target_ratio+compensation); + + /* align idle time */ + target_jiffies = roundup(jiffies, interval); + sleeptime = target_jiffies - jiffies; + if (sleeptime <= 0) + sleeptime = 1; + schedule_timeout_interruptible(sleeptime); + /* + * only elected controlling cpu can collect stats and update + * control parameters. + */ + if (cpunr == control_cpu && !(count%window_size_now)) { + should_skip = + powerclamp_adjust_controls(target_ratio, + guard, window_size_now); + smp_mb(); + } + + if (should_skip) + continue; + + target_jiffies = jiffies + duration_jiffies; + mod_timer(&wakeup_timer, target_jiffies); + if (unlikely(local_softirq_pending())) + continue; + /* + * stop tick sched during idle time, interrupts are still + * allowed. thus jiffies are updated properly. + */ + preempt_disable(); + tick_nohz_idle_enter(); + /* mwait until target jiffies is reached */ + while (time_before(jiffies, target_jiffies)) { + unsigned long ecx = 1; + unsigned long eax = target_mwait; + + /* + * REVISIT: may call enter_idle() to notify drivers who + * can save power during cpu idle. same for exit_idle() + */ + local_touch_nmi(); + stop_critical_timings(); + __monitor((void *)¤t_thread_info()->flags, 0, 0); + cpu_relax(); /* allow HT sibling to run */ + __mwait(eax, ecx); + start_critical_timings(); + atomic_inc(&idle_wakeup_counter); + } + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + } + del_timer_sync(&wakeup_timer); + clear_bit(cpunr, cpu_clamping_mask); + + return 0; +} + +/* + * 1 HZ polling while clamping is active, useful for userspace + * to monitor actual idle ratio. + */ +static void poll_pkg_cstate(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); +static void poll_pkg_cstate(struct work_struct *dummy) +{ + static u64 msr_last; + static u64 tsc_last; + static unsigned long jiffies_last; + + u64 msr_now; + unsigned long jiffies_now; + u64 tsc_now; + u64 val64; + + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + jiffies_now = jiffies; + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + pkg_cstate_ratio_cur = 1; + else { + if (tsc_now - tsc_last) { + val64 = 100 * (msr_now - msr_last); + do_div(val64, (tsc_now - tsc_last)); + pkg_cstate_ratio_cur = val64; + } + } + + /* update record */ + msr_last = msr_now; + jiffies_last = jiffies_now; + tsc_last = tsc_now; + + if (true == clamping) + schedule_delayed_work(&poll_pkg_cstate_work, HZ); +} + +static int start_power_clamp(void) +{ + unsigned long cpu; + struct task_struct *thread; + + /* check if pkg cstate counter is completely 0, abort in this case */ + if (!pkg_state_counter()) { + pr_err("pkg cstate counter not functional, abort\n"); + return -EINVAL; + } + + set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); + /* prevent cpu hotplug */ + get_online_cpus(); + + /* prefer BSP */ + control_cpu = 0; + if (!cpu_online(control_cpu)) + control_cpu = smp_processor_id(); + + clamping = true; + schedule_delayed_work(&poll_pkg_cstate_work, 0); + + /* start one thread per online cpu */ + for_each_online_cpu(cpu) { + struct task_struct **p = + per_cpu_ptr(powerclamp_thread, cpu); + + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%ld", cpu); + /* bind to cpu here */ + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *p = thread; + } + + } + put_online_cpus(); + + return 0; +} + +static void end_power_clamp(void) +{ + int i; + struct task_struct *thread; + + clamping = false; + /* + * make clamping visible to other cpus and give per cpu clamping threads + * sometime to exit, or gets killed later. + */ + smp_mb(); + msleep(20); + if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping thread for cpu %d alive, kill\n", i); + thread = *per_cpu_ptr(powerclamp_thread, i); + kthread_stop(thread); + } + } +} + +static int powerclamp_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct task_struct *thread; + struct task_struct **percpu_thread = + per_cpu_ptr(powerclamp_thread, cpu); + + if (false == clamping) + goto exit_ok; + + switch (action) { + case CPU_ONLINE: + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%lu", cpu); + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *percpu_thread = thread; + } + /* prefer BSP as controlling CPU */ + if (cpu == 0) { + control_cpu = 0; + smp_mb(); + } + break; + case CPU_DEAD: + if (test_bit(cpu, cpu_clamping_mask)) { + pr_err("cpu %lu dead but powerclamping thread is not\n", + cpu); + kthread_stop(*percpu_thread); + } + if (cpu == control_cpu) { + control_cpu = smp_processor_id(); + smp_mb(); + } + } + +exit_ok: + return NOTIFY_OK; +} + +static struct notifier_block powerclamp_cpu_notifier = { + .notifier_call = powerclamp_cpu_callback, +}; + +static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = MAX_TARGET_RATIO; + + return 0; +} + +static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + if (true == clamping) + *state = pkg_cstate_ratio_cur; + else + /* to save power, do not poll idle ratio while not clamping */ + *state = -1; /* indicates invalid state */ + + return 0; +} + +static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long new_target_ratio) +{ + int ret = 0; + + new_target_ratio = clamp(new_target_ratio, 0UL, + (unsigned long) (MAX_TARGET_RATIO-1)); + if (set_target_ratio == 0 && new_target_ratio > 0) { + pr_info("Start idle injection to reduce power\n"); + set_target_ratio = new_target_ratio; + ret = start_power_clamp(); + goto exit_set; + } else if (set_target_ratio > 0 && new_target_ratio == 0) { + pr_info("Stop forced idle injection\n"); + set_target_ratio = 0; + end_power_clamp(); + } else /* adjust currently running */ { + set_target_ratio = new_target_ratio; + /* make new set_target_ratio visible to other cpus */ + smp_mb(); + } + +exit_set: + return ret; +} + +/* bind to generic thermal layer as cooling device*/ +static struct thermal_cooling_device_ops powerclamp_cooling_ops = { + .get_max_state = powerclamp_get_max_state, + .get_cur_state = powerclamp_get_cur_state, + .set_cur_state = powerclamp_set_cur_state, +}; + +/* runs on Nehalem and later */ +static const struct x86_cpu_id intel_powerclamp_ids[] = { + { X86_VENDOR_INTEL, 6, 0x1a}, + { X86_VENDOR_INTEL, 6, 0x1c}, + { X86_VENDOR_INTEL, 6, 0x1e}, + { X86_VENDOR_INTEL, 6, 0x1f}, + { X86_VENDOR_INTEL, 6, 0x25}, + { X86_VENDOR_INTEL, 6, 0x26}, + { X86_VENDOR_INTEL, 6, 0x2a}, + { X86_VENDOR_INTEL, 6, 0x2c}, + { X86_VENDOR_INTEL, 6, 0x2d}, + { X86_VENDOR_INTEL, 6, 0x2e}, + { X86_VENDOR_INTEL, 6, 0x2f}, + { X86_VENDOR_INTEL, 6, 0x3a}, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); + +static int powerclamp_probe(void) +{ + if (!x86_match_cpu(intel_powerclamp_ids)) { + pr_err("Intel powerclamp does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + return -ENODEV; + } + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) || + !boot_cpu_has(X86_FEATURE_MWAIT) || + !boot_cpu_has(X86_FEATURE_ARAT)) + return -ENODEV; + + /* find the deepest mwait value */ + find_target_mwait(); + + return 0; +} + +static int powerclamp_debug_show(struct seq_file *m, void *unused) +{ + int i = 0; + + seq_printf(m, "controlling cpu: %d\n", control_cpu); + seq_printf(m, "pct confidence steady dynamic (compensation)\n"); + for (i = 0; i < MAX_TARGET_RATIO; i++) { + seq_printf(m, "%d\t%lu\t%lu\t%lu\n", + i, + cal_data[i].confidence, + cal_data[i].steady_comp, + cal_data[i].dynamic_comp); + } + + return 0; +} + +static int powerclamp_debug_open(struct inode *inode, + struct file *file) +{ + return single_open(file, powerclamp_debug_show, inode->i_private); +} + +static const struct file_operations powerclamp_debug_fops = { + .open = powerclamp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static inline void powerclamp_create_debug_files(void) +{ + debug_dir = debugfs_create_dir("intel_powerclamp", NULL); + if (!debug_dir) + return; + + if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, + cal_data, &powerclamp_debug_fops)) + goto file_error; + + return; + +file_error: + debugfs_remove_recursive(debug_dir); +} + +static int powerclamp_init(void) +{ + int retval; + int bitmap_size; + + bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long); + cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL); + if (!cpu_clamping_mask) + return -ENOMEM; + + /* probe cpu features and ids here */ + retval = powerclamp_probe(); + if (retval) + return retval; + /* set default limit, maybe adjusted during runtime based on feedback */ + window_size = 2; + register_hotcpu_notifier(&powerclamp_cpu_notifier); + powerclamp_thread = alloc_percpu(struct task_struct *); + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; + + if (!duration) + duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + powerclamp_create_debug_files(); + + return 0; +} +module_init(powerclamp_init); + +static void powerclamp_exit(void) +{ + unregister_hotcpu_notifier(&powerclamp_cpu_notifier); + end_power_clamp(); + free_percpu(powerclamp_thread); + thermal_cooling_device_unregister(cooling_dev); + kfree(cpu_clamping_mask); + + cancel_delayed_work_sync(&poll_pkg_cstate_work); + debugfs_remove_recursive(debug_dir); +} +module_exit(powerclamp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); +MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs"); diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c new file mode 100644 index 0000000..65cb4f0 --- /dev/null +++ b/drivers/thermal/kirkwood_thermal.c @@ -0,0 +1,134 @@ +/* + * Kirkwood thermal sensor driver + * + * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define KIRKWOOD_THERMAL_VALID_OFFSET 9 +#define KIRKWOOD_THERMAL_VALID_MASK 0x1 +#define KIRKWOOD_THERMAL_TEMP_OFFSET 10 +#define KIRKWOOD_THERMAL_TEMP_MASK 0x1FF + +/* Kirkwood Thermal Sensor Dev Structure */ +struct kirkwood_thermal_priv { + void __iomem *sensor; +}; + +static int kirkwood_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct kirkwood_thermal_priv *priv = thermal->devdata; + + reg = readl_relaxed(priv->sensor); + + /* Valid check */ + if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & + KIRKWOOD_THERMAL_VALID_MASK) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of the 88AP510, + * datasheet, which has the same sensor. + * Documentation/arm/Marvell/README + */ + reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) & + KIRKWOOD_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = kirkwood_get_temp, +}; + +static const struct of_device_id kirkwood_thermal_id_table[] = { + { .compatible = "marvell,kirkwood-thermal" }, + {} +}; + +static int kirkwood_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct kirkwood_thermal_priv *priv; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int kirkwood_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *kirkwood_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(kirkwood_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table); + +static struct platform_driver kirkwood_thermal_driver = { + .probe = kirkwood_thermal_probe, + .remove = kirkwood_thermal_exit, + .driver = { + .name = "kirkwood_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(kirkwood_thermal_id_table), + }, +}; + +module_platform_driver(kirkwood_thermal_driver); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>"); +MODULE_DESCRIPTION("kirkwood thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 90db951..28f0919 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -19,225 +19,473 @@ */ #include <linux/delay.h> #include <linux/err.h> +#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/thermal.h> -#define THSCR 0x2c -#define THSSR 0x30 +#define IDLE_INTERVAL 5000 + +#define COMMON_STR 0x00 +#define COMMON_ENR 0x04 +#define COMMON_INTMSK 0x0c + +#define REG_POSNEG 0x20 +#define REG_FILONOFF 0x28 +#define REG_THSCR 0x2c +#define REG_THSSR 0x30 +#define REG_INTCTRL 0x34 /* THSCR */ -#define CPTAP 0xf +#define CPCTL (1 << 12) /* THSSR */ #define CTEMP 0x3f - -struct rcar_thermal_priv { +struct rcar_thermal_common { void __iomem *base; struct device *dev; + struct list_head head; spinlock_t lock; - u32 comp; }; +struct rcar_thermal_priv { + void __iomem *base; + struct rcar_thermal_common *common; + struct thermal_zone_device *zone; + struct delayed_work work; + struct mutex lock; + struct list_head list; + int id; + int ctemp; +}; + +#define rcar_thermal_for_each_priv(pos, common) \ + list_for_each_entry(pos, &common->head, list) + #define MCELSIUS(temp) ((temp) * 1000) -#define rcar_zone_to_priv(zone) (zone->devdata) +#define rcar_zone_to_priv(zone) ((zone)->devdata) +#define rcar_priv_to_dev(priv) ((priv)->common->dev) +#define rcar_has_irq_support(priv) ((priv)->common->base) +#define rcar_id_to_shift(priv) ((priv)->id * 8) + +#ifdef DEBUG +# define rcar_force_update_temp(priv) 1 +#else +# define rcar_force_update_temp(priv) 0 +#endif /* * basic functions */ -static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) +#define rcar_thermal_common_read(c, r) \ + _rcar_thermal_common_read(c, COMMON_ ##r) +static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common, + u32 reg) { - unsigned long flags; - u32 ret; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(common->base + reg); +} - ret = ioread32(priv->base + reg); +#define rcar_thermal_common_write(c, r, d) \ + _rcar_thermal_common_write(c, COMMON_ ##r, d) +static void _rcar_thermal_common_write(struct rcar_thermal_common *common, + u32 reg, u32 data) +{ + iowrite32(data, common->base + reg); +} - spin_unlock_irqrestore(&priv->lock, flags); +#define rcar_thermal_common_bset(c, r, m, d) \ + _rcar_thermal_common_bset(c, COMMON_ ##r, m, d) +static void _rcar_thermal_common_bset(struct rcar_thermal_common *common, + u32 reg, u32 mask, u32 data) +{ + u32 val; - return ret; + val = ioread32(common->base + reg); + val &= ~mask; + val |= (data & mask); + iowrite32(val, common->base + reg); } -#if 0 /* no user at this point */ -static void rcar_thermal_write(struct rcar_thermal_priv *priv, - u32 reg, u32 data) +#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r) +static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) { - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(priv->base + reg); +} +#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d) +static void _rcar_thermal_write(struct rcar_thermal_priv *priv, + u32 reg, u32 data) +{ iowrite32(data, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } -#endif -static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, - u32 mask, u32 data) +#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d) +static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, + u32 mask, u32 data) { - unsigned long flags; u32 val; - spin_lock_irqsave(&priv->lock, flags); - val = ioread32(priv->base + reg); val &= ~mask; val |= (data & mask); iowrite32(val, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } /* * zone device functions */ -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) { - struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); - int val, min, max, tmp; - - tmp = -200; /* default */ - while (1) { - if (priv->comp < 1 || priv->comp > 12) { - dev_err(priv->dev, - "THSSR invalid data (%d)\n", priv->comp); - priv->comp = 4; /* for next thermal */ - return -EINVAL; - } + struct device *dev = rcar_priv_to_dev(priv); + int i; + int ctemp, old, new; - /* - * THS comparator offset and the reference temperature - * - * Comparator | reference | Temperature field - * offset | temperature | measurement - * | (degrees C) | (degrees C) - * -------------+---------------+------------------- - * 1 | -45 | -45 to -30 - * 2 | -30 | -30 to -15 - * 3 | -15 | -15 to 0 - * 4 | 0 | 0 to +15 - * 5 | +15 | +15 to +30 - * 6 | +30 | +30 to +45 - * 7 | +45 | +45 to +60 - * 8 | +60 | +60 to +75 - * 9 | +75 | +75 to +90 - * 10 | +90 | +90 to +105 - * 11 | +105 | +105 to +120 - * 12 | +120 | +120 to +135 - */ + mutex_lock(&priv->lock); - /* calculate thermal limitation */ - min = (priv->comp * 15) - 60; - max = min + 15; + /* + * TSC decides a value of CPTAP automatically, + * and this is the conditions which validate interrupt. + */ + rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL); + ctemp = 0; + old = ~0; + for (i = 0; i < 128; i++) { /* * we need to wait 300us after changing comparator offset * to get stable temperature. * see "Usage Notes" on datasheet */ - rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp); udelay(300); - /* calculate current temperature */ - val = rcar_thermal_read(priv, THSSR) & CTEMP; - val = (val * 5) - 65; + new = rcar_thermal_read(priv, THSSR) & CTEMP; + if (new == old) { + ctemp = new; + break; + } + old = new; + } - dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n", - priv->comp, min, max, val); + if (!ctemp) { + dev_err(dev, "thermal sensor was broken\n"); + return -EINVAL; + } - /* - * If val is same as min/max, then, - * it should try again on next comparator. - * But the val might be correct temperature. - * Keep it on "tmp" and compare with next val. - */ - if (tmp == val) - break; + /* + * enable IRQ + */ + if (rcar_has_irq_support(priv)) { + rcar_thermal_write(priv, FILONOFF, 0); - if (val <= min) { - tmp = min; - priv->comp--; /* try again */ - } else if (val >= max) { - tmp = max; - priv->comp++; /* try again */ - } else { - tmp = val; - break; - } + /* enable Rising/Falling edge interrupt */ + rcar_thermal_write(priv, POSNEG, 0x1); + rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) | + ((ctemp - 1) << 0))); + } + + dev_dbg(dev, "thermal%d %d -> %d\n", priv->id, priv->ctemp, ctemp); + + priv->ctemp = ctemp; + + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, + unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + + if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv)) + rcar_thermal_update_temp(priv); + + mutex_lock(&priv->lock); + *temp = MCELSIUS((priv->ctemp * 5) - 65); + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type *type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *type = THERMAL_TRIP_CRITICAL; + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, + int trip, unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *temp = MCELSIUS(90); + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_notify(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + switch (type) { + case THERMAL_TRIP_CRITICAL: + /* FIXME */ + dev_warn(dev, "Thermal reached to critical temperature\n"); + break; + default: + break; } - *temp = MCELSIUS(tmp); return 0; } static struct thermal_zone_device_ops rcar_thermal_zone_ops = { - .get_temp = rcar_thermal_get_temp, + .get_temp = rcar_thermal_get_temp, + .get_trip_type = rcar_thermal_get_trip_type, + .get_trip_temp = rcar_thermal_get_trip_temp, + .notify = rcar_thermal_notify, }; /* - * platform functions + * interrupt */ -static int rcar_thermal_probe(struct platform_device *pdev) +#define rcar_thermal_irq_enable(p) _rcar_thermal_irq_ctrl(p, 1) +#define rcar_thermal_irq_disable(p) _rcar_thermal_irq_ctrl(p, 0) +static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) +{ + struct rcar_thermal_common *common = priv->common; + unsigned long flags; + u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */ + + spin_lock_irqsave(&common->lock, flags); + + rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask); + + spin_unlock_irqrestore(&common->lock, flags); +} + +static void rcar_thermal_work(struct work_struct *work) { - struct thermal_zone_device *zone; struct rcar_thermal_priv *priv; - struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Could not get platform resource\n"); - return -ENODEV; + priv = container_of(work, struct rcar_thermal_priv, work.work); + + rcar_thermal_update_temp(priv); + rcar_thermal_irq_enable(priv); + thermal_zone_device_update(priv->zone); +} + +static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status) +{ + struct device *dev = rcar_priv_to_dev(priv); + + status = (status >> rcar_id_to_shift(priv)) & 0x3; + + if (status & 0x3) { + dev_dbg(dev, "thermal%d %s%s\n", + priv->id, + (status & 0x2) ? "Rising " : "", + (status & 0x1) ? "Falling" : ""); } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&pdev->dev, "Could not allocate priv\n"); - return -ENOMEM; + return status; +} + +static irqreturn_t rcar_thermal_irq(int irq, void *data) +{ + struct rcar_thermal_common *common = data; + struct rcar_thermal_priv *priv; + unsigned long flags; + u32 status, mask; + + spin_lock_irqsave(&common->lock, flags); + + mask = rcar_thermal_common_read(common, INTMSK); + status = rcar_thermal_common_read(common, STR); + rcar_thermal_common_write(common, STR, 0x000F0F0F & mask); + + spin_unlock_irqrestore(&common->lock, flags); + + status = status & ~mask; + + /* + * check the status + */ + rcar_thermal_for_each_priv(priv, common) { + if (rcar_thermal_had_changed(priv, status)) { + rcar_thermal_irq_disable(priv); + schedule_delayed_work(&priv->work, + msecs_to_jiffies(300)); + } } - priv->comp = 4; /* basic setup */ - priv->dev = &pdev->dev; - spin_lock_init(&priv->lock); - priv->base = devm_ioremap_nocache(&pdev->dev, - res->start, resource_size(res)); - if (!priv->base) { - dev_err(&pdev->dev, "Unable to ioremap thermal register\n"); + return IRQ_HANDLED; +} + +/* + * platform functions + */ +static int rcar_thermal_probe(struct platform_device *pdev) +{ + struct rcar_thermal_common *common; + struct rcar_thermal_priv *priv; + struct device *dev = &pdev->dev; + struct resource *res, *irq; + int mres = 0; + int i; + int idle = IDLE_INTERVAL; + + common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); + if (!common) { + dev_err(dev, "Could not allocate common\n"); return -ENOMEM; } - zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv, - &rcar_thermal_zone_ops, NULL, 0, 0); - if (IS_ERR(zone)) { - dev_err(&pdev->dev, "thermal zone device is NULL\n"); - return PTR_ERR(zone); + INIT_LIST_HEAD(&common->head); + spin_lock_init(&common->lock); + common->dev = dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (irq) { + int ret; + + /* + * platform has IRQ support. + * Then, drier use common register + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) { + dev_err(dev, "Could not get platform resource\n"); + return -ENODEV; + } + + ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0, + dev_name(dev), common); + if (ret) { + dev_err(dev, "irq request failed\n "); + return ret; + } + + /* + * rcar_has_irq_support() will be enabled + */ + common->base = devm_request_and_ioremap(dev, res); + if (!common->base) { + dev_err(dev, "Unable to ioremap thermal register\n"); + return -ENOMEM; + } + + /* enable temperature comparation */ + rcar_thermal_common_write(common, ENR, 0x00030303); + + idle = 0; /* polling delaye is not needed */ } - platform_set_drvdata(pdev, zone); + for (i = 0;; i++) { + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) + break; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(dev, "Could not allocate priv\n"); + return -ENOMEM; + } + + priv->base = devm_request_and_ioremap(dev, res); + if (!priv->base) { + dev_err(dev, "Unable to ioremap priv register\n"); + return -ENOMEM; + } - dev_info(&pdev->dev, "proved\n"); + priv->common = common; + priv->id = i; + mutex_init(&priv->lock); + INIT_LIST_HEAD(&priv->list); + INIT_DELAYED_WORK(&priv->work, rcar_thermal_work); + rcar_thermal_update_temp(priv); + + priv->zone = thermal_zone_device_register("rcar_thermal", + 1, 0, priv, + &rcar_thermal_zone_ops, NULL, 0, + idle); + if (IS_ERR(priv->zone)) { + dev_err(dev, "can't register thermal zone\n"); + goto error_unregister; + } + + list_move_tail(&priv->list, &common->head); + + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_enable(priv); + } + + platform_set_drvdata(pdev, common); + + dev_info(dev, "%d sensor proved\n", i); return 0; + +error_unregister: + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); + + return -ENODEV; } static int rcar_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *zone = platform_get_drvdata(pdev); + struct rcar_thermal_common *common = platform_get_drvdata(pdev); + struct rcar_thermal_priv *priv; + + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); - thermal_zone_device_unregister(zone); platform_set_drvdata(pdev, NULL); return 0; } +static const struct of_device_id rcar_thermal_dt_ids[] = { + { .compatible = "renesas,rcar-thermal", }, + {}, +}; +MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids); + static struct platform_driver rcar_thermal_driver = { .driver = { .name = "rcar_thermal", + .of_match_table = rcar_thermal_dt_ids, }, .probe = rcar_thermal_probe, .remove = rcar_thermal_remove, diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 6b2d8b2..3c5ee56 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev) return -ENOMEM; } - stdev->clk = clk_get(&pdev->dev, NULL); + stdev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(stdev->clk)) { dev_err(&pdev->dev, "Can't get clock\n"); return PTR_ERR(stdev->clk); @@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev) ret = clk_enable(stdev->clk); if (ret) { dev_err(&pdev->dev, "Can't enable clock\n"); - goto put_clk; + return ret; } stdev->flags = val; @@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev) disable_clk: clk_disable(stdev->clk); -put_clk: - clk_put(stdev->clk); return ret; } @@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev) writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); - clk_put(stdev->clk); return 0; } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 0cd5e9f..407cde3 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -35,21 +35,54 @@ * state for this trip point * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling * state for this trip point + * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit + * for this trip point + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit + * for this trip point + * If the temperature is lower than a trip point, + * a. if the trend is THERMAL_TREND_RAISING, do nothing + * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling + * state for this trip point, if the cooling state already + * equals lower limit, deactivate the thermal instance + * c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit, + * if the cooling state already equals lower limit, + * deactive the thermal instance */ static unsigned long get_target_state(struct thermal_instance *instance, - enum thermal_trend trend) + enum thermal_trend trend, bool throttle) { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; cdev->ops->get_cur_state(cdev, &cur_state); - if (trend == THERMAL_TREND_RAISING) { - cur_state = cur_state < instance->upper ? - (cur_state + 1) : instance->upper; - } else if (trend == THERMAL_TREND_DROPPING) { - cur_state = cur_state > instance->lower ? - (cur_state - 1) : instance->lower; + switch (trend) { + case THERMAL_TREND_RAISING: + if (throttle) + cur_state = cur_state < instance->upper ? + (cur_state + 1) : instance->upper; + break; + case THERMAL_TREND_RAISE_FULL: + if (throttle) + cur_state = instance->upper; + break; + case THERMAL_TREND_DROPPING: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state -= 1; + break; + case THERMAL_TREND_DROP_FULL: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state = instance->lower; + break; + default: + break; } return cur_state; @@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz, tz->passive += value; } -static void update_instance_for_throttle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type, - enum thermal_trend trend) -{ - struct thermal_instance *instance; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; - - instance->target = get_target_state(instance, trend); - - /* Activate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, 1); - - instance->cdev->updated = false; /* cdev needs update */ - } -} - -static void update_instance_for_dethrottle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type) -{ - struct thermal_instance *instance; - struct thermal_cooling_device *cdev; - unsigned long cur_state; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip || - instance->target == THERMAL_NO_TARGET) - continue; - - cdev = instance->cdev; - cdev->ops->get_cur_state(cdev, &cur_state); - - instance->target = cur_state > instance->lower ? - (cur_state - 1) : THERMAL_NO_TARGET; - - /* Deactivate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, -1); - - cdev->updated = false; /* cdev needs update */ - } -} - static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { long trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; + struct thermal_instance *instance; + bool throttle = false; + int old_target; if (trip == THERMAL_TRIPS_NONE) { trip_temp = tz->forced_passive; @@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trend = get_tz_trend(tz, trip); + if (tz->temperature >= trip_temp) + throttle = true; + mutex_lock(&tz->lock); - if (tz->temperature >= trip_temp) - update_instance_for_throttle(tz, trip, trip_type, trend); - else - update_instance_for_dethrottle(tz, trip, trip_type); + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip) + continue; + + old_target = instance->target; + instance->target = get_target_state(instance, trend, throttle); + + /* Activate a passive thermal instance */ + if (old_target == THERMAL_NO_TARGET && + instance->target != THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, 1); + /* Deactivate a passive thermal instance */ + else if (old_target != THERMAL_NO_TARGET && + instance->target == THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, -1); + + + instance->cdev->updated = false; /* cdev needs update */ + } mutex_unlock(&tz->lock); } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 84e95f3..5b7863a 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -32,7 +32,6 @@ #include <linux/kdev_t.h> #include <linux/idr.h> #include <linux/thermal.h> -#include <linux/spinlock.h> #include <linux/reboot.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -348,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->ops->notify(tz, trip, trip_type); if (trip_type == THERMAL_TRIP_CRITICAL) { - pr_emerg("Critical temperature reached(%d C),shutting down\n", - tz->temperature / 1000); + dev_emerg(&tz->device, + "critical temperature reached(%d C),shutting down\n", + tz->temperature / 1000); orderly_poweroff(true); } } @@ -371,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) monitor_thermal_zone(tz); } +static int thermal_zone_get_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + int ret = 0; +#ifdef CONFIG_THERMAL_EMULATION + int count; + unsigned long crit_temp = -1UL; + enum thermal_trip_type type; +#endif + + mutex_lock(&tz->lock); + + ret = tz->ops->get_temp(tz, temp); +#ifdef CONFIG_THERMAL_EMULATION + if (!tz->emul_temperature) + goto skip_emul; + + for (count = 0; count < tz->trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, &crit_temp); + break; + } + } + + if (ret) + goto skip_emul; + + if (*temp < crit_temp) + *temp = tz->emul_temperature; +skip_emul: +#endif + mutex_unlock(&tz->lock); + return ret; +} + static void update_temperature(struct thermal_zone_device *tz) { long temp; int ret; - mutex_lock(&tz->lock); - - ret = tz->ops->get_temp(tz, &temp); + ret = thermal_zone_get_temp(tz, &temp); if (ret) { - pr_warn("failed to read out thermal zone %d\n", tz->id); - goto exit; + dev_warn(&tz->device, "failed to read out thermal zone %d\n", + tz->id); + return; } + mutex_lock(&tz->lock); tz->last_temperature = tz->temperature; tz->temperature = temp; - -exit: mutex_unlock(&tz->lock); } @@ -430,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf) long temperature; int ret; - if (!tz->ops->get_temp) - return -EPERM; - - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -693,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf) return sprintf(buf, "%s\n", tz->governor->name); } +#ifdef CONFIG_THERMAL_EMULATION +static ssize_t +emul_temp_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + int ret = 0; + unsigned long temperature; + + if (kstrtoul(buf, 10, &temperature)) + return -EINVAL; + + if (!tz->ops->set_emul_temp) { + mutex_lock(&tz->lock); + tz->emul_temperature = temperature; + mutex_unlock(&tz->lock); + } else { + ret = tz->ops->set_emul_temp(tz, temperature); + } + + return ret ? ret : count; +} +static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); +#endif/*CONFIG_THERMAL_EMULATION*/ + static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); @@ -835,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) temp_input); struct thermal_zone_device *tz = temp->tz; - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -1522,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!ops || !ops->get_temp) return ERR_PTR(-EINVAL); + if (trips > 0 && !ops->get_trip_type) + return ERR_PTR(-EINVAL); + tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); @@ -1585,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; } +#ifdef CONFIG_THERMAL_EMULATION + result = device_create_file(&tz->device, &dev_attr_emul_temp); + if (result) + goto unregister; +#endif /* Create policy attribute */ result = device_create_file(&tz->device, &dev_attr_policy); if (result) @@ -1704,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = { .name = THERMAL_GENL_MCAST_GROUP_NAME, }; -int thermal_generate_netlink_event(u32 orig, enum events event) +int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { struct sk_buff *skb; struct nlattr *attr; @@ -1714,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event) int result; static unsigned int thermal_event_seqnum; + if (!tz) + return -EINVAL; + /* allocate memory */ size = nla_total_size(sizeof(struct thermal_genl_event)) + nla_total_size(0); @@ -1748,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) memset(thermal_event, 0, sizeof(struct thermal_genl_event)); - thermal_event->orig = orig; + thermal_event->orig = tz->id; thermal_event->event = event; /* send multicast genetlink message */ @@ -1760,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC); if (result) - pr_info("failed to send netlink event:%d\n", result); + dev_err(&tz->device, "Failed to send netlink event:%d", result); return result; } @@ -1800,6 +1868,7 @@ static int __init thermal_init(void) idr_destroy(&thermal_cdev_idr); mutex_destroy(&thermal_idr_lock); mutex_destroy(&thermal_list_lock); + return result; } result = genetlink_init(); return result; diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index a0162cb..cf9210d 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -729,19 +729,19 @@ config SERIAL_SH_SCI_DMA config SERIAL_PNX8XXX bool "Enable PNX8XXX SoCs' UART Support" - depends on SOC_PNX8550 || SOC_PNX833X + depends on SOC_PNX833X select SERIAL_CORE help - If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330 - and you want to use serial ports, say Y. Otherwise, say N. + If you have a MIPS-based Philips SoC such as PNX8330 and you want + to use serial ports, say Y. Otherwise, say N. config SERIAL_PNX8XXX_CONSOLE bool "Enable PNX8XX0 serial console" depends on SERIAL_PNX8XXX select SERIAL_CORE_CONSOLE help - If you have a MIPS-based Philips SoC such as PNX8550 or PNX8330 - and you want to use serial console, say Y. Otherwise, say N. + If you have a MIPS-based Philips SoC such as PNX8330 and you want + to use serial console, say Y. Otherwise, say N. config SERIAL_HS_LPC32XX tristate "LPC32XX high speed serial port support" diff --git a/drivers/usb/host/ehci-timer.c b/drivers/usb/host/ehci-timer.c index f904071..20dbdcb 100644 --- a/drivers/usb/host/ehci-timer.c +++ b/drivers/usb/host/ehci-timer.c @@ -113,15 +113,14 @@ static void ehci_poll_ASS(struct ehci_hcd *ehci) if (want != actual) { - /* Poll again later */ - ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true); - ++ehci->ASS_poll_count; - return; + /* Poll again later, but give up after about 20 ms */ + if (ehci->ASS_poll_count++ < 20) { + ehci_enable_event(ehci, EHCI_HRTIMER_POLL_ASS, true); + return; + } + ehci_dbg(ehci, "Waited too long for the async schedule status (%x/%x), giving up\n", + want, actual); } - - if (ehci->ASS_poll_count > 20) - ehci_dbg(ehci, "ASS poll count reached %d\n", - ehci->ASS_poll_count); ehci->ASS_poll_count = 0; /* The status is up-to-date; restart or stop the schedule as needed */ @@ -160,14 +159,14 @@ static void ehci_poll_PSS(struct ehci_hcd *ehci) if (want != actual) { - /* Poll again later */ - ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true); - return; + /* Poll again later, but give up after about 20 ms */ + if (ehci->PSS_poll_count++ < 20) { + ehci_enable_event(ehci, EHCI_HRTIMER_POLL_PSS, true); + return; + } + ehci_dbg(ehci, "Waited too long for the periodic schedule status (%x/%x), giving up\n", + want, actual); } - - if (ehci->PSS_poll_count > 20) - ehci_dbg(ehci, "PSS poll count reached %d\n", - ehci->PSS_poll_count); ehci->PSS_poll_count = 0; /* The status is up-to-date; restart or stop the schedule as needed */ diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 372c8c0..950d354 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev) return 0; } +static struct of_device_id mxc_w1_dt_ids[] = { + { .compatible = "fsl,imx21-owire" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids); + static struct platform_driver mxc_w1_driver = { .driver = { - .name = "mxc_w1", + .name = "mxc_w1", + .of_match_table = mxc_w1_dt_ids, }, .probe = mxc_w1_probe, .remove = mxc_w1_remove, diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 26e1fdb..9fcc70c 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -79,6 +79,7 @@ config DA9052_WATCHDOG config DA9055_WATCHDOG tristate "Dialog Semiconductor DA9055 Watchdog" depends on MFD_DA9055 + select WATCHDOG_CORE help If you say yes here you get support for watchdog on the Dialog Semiconductor DA9055 PMIC. @@ -108,7 +109,7 @@ config WM8350_WATCHDOG config ARM_SP805_WATCHDOG tristate "ARM SP805 Watchdog" - depends on ARM_AMBA + depends on ARM && ARM_AMBA select WATCHDOG_CORE help ARM Primecell SP805 Watchdog timer. This will reboot your system when @@ -116,7 +117,7 @@ config ARM_SP805_WATCHDOG config AT91RM9200_WATCHDOG tristate "AT91RM9200 watchdog" - depends on ARCH_AT91RM9200 + depends on ARCH_AT91 help Watchdog timer embedded into AT91RM9200 chips. This will reboot your system when the timeout is reached. @@ -124,6 +125,7 @@ config AT91RM9200_WATCHDOG config AT91SAM9X_WATCHDOG tristate "AT91SAM9X / AT91CAP9 watchdog" depends on ARCH_AT91 && !ARCH_AT91RM9200 + select WATCHDOG_CORE help Watchdog timer embedded into AT91SAM9X and AT91CAP9 chips. This will reboot your system when the timeout is reached. @@ -316,14 +318,15 @@ config TWL4030_WATCHDOG Support for TI TWL4030 watchdog. Say 'Y' here to enable the watchdog timer support for TWL4030 chips. -config STMP3XXX_WATCHDOG - tristate "Freescale STMP3XXX watchdog" - depends on ARCH_STMP3XXX +config STMP3XXX_RTC_WATCHDOG + tristate "Freescale STMP3XXX & i.MX23/28 watchdog" + depends on RTC_DRV_STMP + select WATCHDOG_CORE help - Say Y here if to include support for the watchdog timer - for the Sigmatel STMP37XX/378X SoC. + Say Y here to include support for the watchdog timer inside + the RTC for the STMP37XX/378X or i.MX23/28 SoC. To compile this driver as a module, choose M here: the - module will be called stmp3xxx_wdt. + module will be called stmp3xxx_rtc_wdt. config NUC900_WATCHDOG tristate "Nuvoton NUC900 watchdog" @@ -376,6 +379,18 @@ config UX500_WATCHDOG To compile this driver as a module, choose M here: the module will be called ux500_wdt. +config RETU_WATCHDOG + tristate "Retu watchdog" + depends on MFD_RETU + select WATCHDOG_CORE + help + Retu watchdog driver for Nokia Internet Tablets (770, N800, + N810). At least on N800 the watchdog cannot be disabled, so + this driver is essential and you should enable it. + + To compile this driver as a module, choose M here: the + module will be called retu_wdt. + # AVR32 Architecture config AT32AP700X_WDT @@ -593,7 +608,7 @@ config IE6XX_WDT config INTEL_SCU_WATCHDOG bool "Intel SCU Watchdog for Mobile Platforms" - depends on X86_MRST + depends on X86_INTEL_MID ---help--- Hardware driver for the watchdog time built into the Intel SCU for Intel Mobile Platforms. @@ -983,6 +998,7 @@ config ATH79_WDT config BCM47XX_WDT tristate "Broadcom BCM47xx Watchdog Timer" depends on BCM47XX + select WATCHDOG_CORE help Hardware driver for the Broadcom BCM47xx Watchdog Timer. @@ -1131,6 +1147,7 @@ config PIKA_WDT config BOOKE_WDT tristate "PowerPC Book-E Watchdog Timer" depends on BOOKE || 4xx + select WATCHDOG_CORE ---help--- Watchdog driver for PowerPC Book-E chips, such as the Freescale MPC85xx SOCs and the IBM PowerPC 440. diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index bec86ee..a300b94 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -48,11 +48,12 @@ obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o -obj-$(CONFIG_STMP3XXX_WATCHDOG) += stmp3xxx_wdt.o +obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o obj-$(CONFIG_TS72XX_WATCHDOG) += ts72xx_wdt.o obj-$(CONFIG_IMX2_WDT) += imx2_wdt.o obj-$(CONFIG_UX500_WATCHDOG) += ux500_wdt.o +obj-$(CONFIG_RETU_WATCHDOG) += retu_wdt.o # AVR32 Architecture obj-$(CONFIG_AT32AP700X_WDT) += at32ap700x_wdt.o diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 89831ed..1c75260 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -24,6 +24,8 @@ #include <linux/types.h> #include <linux/watchdog.h> #include <linux/uaccess.h> +#include <linux/of.h> +#include <linux/of_device.h> #include <mach/at91_st.h> #define WDT_DEFAULT_TIME 5 /* seconds */ @@ -252,6 +254,12 @@ static int at91wdt_resume(struct platform_device *pdev) #define at91wdt_resume NULL #endif +static const struct of_device_id at91_wdt_dt_ids[] = { + { .compatible = "atmel,at91rm9200-wdt" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, at91_wdt_dt_ids); + static struct platform_driver at91wdt_driver = { .probe = at91wdt_probe, .remove = at91wdt_remove, @@ -261,6 +269,7 @@ static struct platform_driver at91wdt_driver = { .driver = { .name = "at91_wdt", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(at91_wdt_dt_ids), }, }; diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index c08933c..be37dde 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -18,11 +18,9 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/errno.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/io.h> #include <linux/kernel.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/platform_device.h> @@ -58,7 +56,7 @@ /* User land timeout */ #define WDT_HEARTBEAT 15 -static int heartbeat = WDT_HEARTBEAT; +static int heartbeat; module_param(heartbeat, int, 0); MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. " "(default = " __MODULE_STRING(WDT_HEARTBEAT) ")"); @@ -68,19 +66,17 @@ module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); +static struct watchdog_device at91_wdt_dev; static void at91_ping(unsigned long data); static struct { void __iomem *base; unsigned long next_heartbeat; /* the next_heartbeat for the timer */ - unsigned long open; - char expect_close; struct timer_list timer; /* The timer that pings the watchdog */ } at91wdt_private; /* ......................................................................... */ - /* * Reload the watchdog timer. (ie, pat the watchdog) */ @@ -95,39 +91,37 @@ static inline void at91_wdt_reset(void) static void at91_ping(unsigned long data) { if (time_before(jiffies, at91wdt_private.next_heartbeat) || - (!nowayout && !at91wdt_private.open)) { + (!watchdog_active(&at91_wdt_dev))) { at91_wdt_reset(); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); } else pr_crit("I will reset your machine !\n"); } -/* - * Watchdog device is opened, and watchdog starts running. - */ -static int at91_wdt_open(struct inode *inode, struct file *file) +static int at91_wdt_ping(struct watchdog_device *wdd) { - if (test_and_set_bit(0, &at91wdt_private.open)) - return -EBUSY; + /* calculate when the next userspace timeout will be */ + at91wdt_private.next_heartbeat = jiffies + wdd->timeout * HZ; + return 0; +} - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; +static int at91_wdt_start(struct watchdog_device *wdd) +{ + /* calculate the next userspace timeout and modify the timer */ + at91_wdt_ping(wdd); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); - - return nonseekable_open(inode, file); + return 0; } -/* - * Close the watchdog device. - */ -static int at91_wdt_close(struct inode *inode, struct file *file) +static int at91_wdt_stop(struct watchdog_device *wdd) { - clear_bit(0, &at91wdt_private.open); - - /* stop internal ping */ - if (!at91wdt_private.expect_close) - del_timer(&at91wdt_private.timer); + /* The watchdog timer hardware can not be stopped... */ + return 0; +} - at91wdt_private.expect_close = 0; +static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_timeout) +{ + wdd->timeout = new_timeout; return 0; } @@ -163,96 +157,28 @@ static int at91_wdt_settimeout(unsigned int timeout) return 0; } +/* ......................................................................... */ + static const struct watchdog_info at91_wdt_info = { .identity = DRV_NAME, .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE, }; -/* - * Handle commands from user-space. - */ -static long at91_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &at91_wdt_info, - sizeof(at91_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_KEEPALIVE: - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - heartbeat = new_value; - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - - return put_user(new_value, p); /* return current value */ - - case WDIOC_GETTIMEOUT: - return put_user(heartbeat, p); - } - return -ENOTTY; -} - -/* - * Pat the watchdog whenever device is written to. - */ -static ssize_t at91_wdt_write(struct file *file, const char *data, size_t len, - loff_t *ppos) -{ - if (!len) - return 0; - - /* Scan for magic character */ - if (!nowayout) { - size_t i; - - at91wdt_private.expect_close = 0; - - for (i = 0; i < len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') { - at91wdt_private.expect_close = 42; - break; - } - } - } - - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; - - return len; -} - -/* ......................................................................... */ - -static const struct file_operations at91wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = at91_wdt_ioctl, - .open = at91_wdt_open, - .release = at91_wdt_close, - .write = at91_wdt_write, +static const struct watchdog_ops at91_wdt_ops = { + .owner = THIS_MODULE, + .start = at91_wdt_start, + .stop = at91_wdt_stop, + .ping = at91_wdt_ping, + .set_timeout = at91_wdt_set_timeout, }; -static struct miscdevice at91wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &at91wdt_fops, +static struct watchdog_device at91_wdt_dev = { + .info = &at91_wdt_info, + .ops = &at91_wdt_ops, + .timeout = WDT_HEARTBEAT, + .min_timeout = 1, + .max_timeout = 0xFFFF, }; static int __init at91wdt_probe(struct platform_device *pdev) @@ -260,10 +186,6 @@ static int __init at91wdt_probe(struct platform_device *pdev) struct resource *r; int res; - if (at91wdt_miscdev.parent) - return -EBUSY; - at91wdt_miscdev.parent = &pdev->dev; - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!r) return -ENODEV; @@ -273,38 +195,41 @@ static int __init at91wdt_probe(struct platform_device *pdev) return -ENOMEM; } + at91_wdt_dev.parent = &pdev->dev; + watchdog_init_timeout(&at91_wdt_dev, heartbeat, &pdev->dev); + watchdog_set_nowayout(&at91_wdt_dev, nowayout); + /* Set watchdog */ res = at91_wdt_settimeout(ms_to_ticks(WDT_HW_TIMEOUT * 1000)); if (res) return res; - res = misc_register(&at91wdt_miscdev); + res = watchdog_register_device(&at91_wdt_dev); if (res) return res; - at91wdt_private.next_heartbeat = jiffies + heartbeat * HZ; + at91wdt_private.next_heartbeat = jiffies + at91_wdt_dev.timeout * HZ; setup_timer(&at91wdt_private.timer, at91_ping, 0); mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT); pr_info("enabled (heartbeat=%d sec, nowayout=%d)\n", - heartbeat, nowayout); + at91_wdt_dev.timeout, nowayout); return 0; } static int __exit at91wdt_remove(struct platform_device *pdev) { - int res; + watchdog_unregister_device(&at91_wdt_dev); - res = misc_deregister(&at91wdt_miscdev); - if (!res) - at91wdt_miscdev.parent = NULL; + pr_warn("I quit now, hardware will probably reboot!\n"); + del_timer(&at91wdt_private.timer); - return res; + return 0; } #if defined(CONFIG_OF) -static const struct of_device_id at91_wdt_dt_ids[] __initconst = { +static const struct of_device_id at91_wdt_dt_ids[] = { { .compatible = "atmel,at91sam9260-wdt" }, { /* sentinel */ } }; @@ -326,4 +251,3 @@ module_platform_driver_probe(at91wdt_driver, at91wdt_probe); MODULE_AUTHOR("Renaud CERRATO <r.cerrato@til-technologies.fr>"); MODULE_DESCRIPTION("Watchdog driver for Atmel AT91SAM9x processors"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c index 38a999e..8987990 100644 --- a/drivers/watchdog/ath79_wdt.c +++ b/drivers/watchdog/ath79_wdt.c @@ -23,6 +23,7 @@ #include <linux/errno.h> #include <linux/fs.h> #include <linux/init.h> +#include <linux/io.h> #include <linux/kernel.h> #include <linux/miscdevice.h> #include <linux/module.h> @@ -32,14 +33,16 @@ #include <linux/watchdog.h> #include <linux/clk.h> #include <linux/err.h> - -#include <asm/mach-ath79/ath79.h> -#include <asm/mach-ath79/ar71xx_regs.h> +#include <linux/of.h> +#include <linux/of_platform.h> #define DRIVER_NAME "ath79-wdt" #define WDT_TIMEOUT 15 /* seconds */ +#define WDOG_REG_CTRL 0x00 +#define WDOG_REG_TIMER 0x04 + #define WDOG_CTRL_LAST_RESET BIT(31) #define WDOG_CTRL_ACTION_MASK 3 #define WDOG_CTRL_ACTION_NONE 0 /* no action */ @@ -66,27 +69,38 @@ static struct clk *wdt_clk; static unsigned long wdt_freq; static int boot_status; static int max_timeout; +static void __iomem *wdt_base; + +static inline void ath79_wdt_wr(unsigned reg, u32 val) +{ + iowrite32(val, wdt_base + reg); +} + +static inline u32 ath79_wdt_rr(unsigned reg) +{ + return ioread32(wdt_base + reg); +} static inline void ath79_wdt_keepalive(void) { - ath79_reset_wr(AR71XX_RESET_REG_WDOG, wdt_freq * timeout); + ath79_wdt_wr(WDOG_REG_TIMER, wdt_freq * timeout); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG); + ath79_wdt_rr(WDOG_REG_TIMER); } static inline void ath79_wdt_enable(void) { ath79_wdt_keepalive(); - ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_FCR); + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_FCR); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ath79_wdt_rr(WDOG_REG_CTRL); } static inline void ath79_wdt_disable(void) { - ath79_reset_wr(AR71XX_RESET_REG_WDOG_CTRL, WDOG_CTRL_ACTION_NONE); + ath79_wdt_wr(WDOG_REG_CTRL, WDOG_CTRL_ACTION_NONE); /* flush write */ - ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ath79_wdt_rr(WDOG_REG_CTRL); } static int ath79_wdt_set_timeout(int val) @@ -226,16 +240,32 @@ static struct miscdevice ath79_wdt_miscdev = { static int ath79_wdt_probe(struct platform_device *pdev) { + struct resource *res; u32 ctrl; int err; - wdt_clk = clk_get(&pdev->dev, "wdt"); + if (wdt_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "no memory resource found\n"); + return -EINVAL; + } + + wdt_base = devm_request_and_ioremap(&pdev->dev, res); + if (!wdt_base) { + dev_err(&pdev->dev, "unable to remap memory region\n"); + return -ENOMEM; + } + + wdt_clk = devm_clk_get(&pdev->dev, "wdt"); if (IS_ERR(wdt_clk)) return PTR_ERR(wdt_clk); err = clk_enable(wdt_clk); if (err) - goto err_clk_put; + return err; wdt_freq = clk_get_rate(wdt_clk); if (!wdt_freq) { @@ -251,7 +281,7 @@ static int ath79_wdt_probe(struct platform_device *pdev) max_timeout, timeout); } - ctrl = ath79_reset_rr(AR71XX_RESET_REG_WDOG_CTRL); + ctrl = ath79_wdt_rr(WDOG_REG_CTRL); boot_status = (ctrl & WDOG_CTRL_LAST_RESET) ? WDIOF_CARDRESET : 0; err = misc_register(&ath79_wdt_miscdev); @@ -265,8 +295,6 @@ static int ath79_wdt_probe(struct platform_device *pdev) err_clk_disable: clk_disable(wdt_clk); -err_clk_put: - clk_put(wdt_clk); return err; } @@ -274,7 +302,6 @@ static int ath79_wdt_remove(struct platform_device *pdev) { misc_deregister(&ath79_wdt_miscdev); clk_disable(wdt_clk); - clk_put(wdt_clk); return 0; } @@ -283,6 +310,14 @@ static void ath97_wdt_shutdown(struct platform_device *pdev) ath79_wdt_disable(); } +#ifdef CONFIG_OF +static const struct of_device_id ath79_wdt_match[] = { + { .compatible = "qca,ar7130-wdt" }, + {}, +}; +MODULE_DEVICE_TABLE(of, ath79_wdt_match); +#endif + static struct platform_driver ath79_wdt_driver = { .probe = ath79_wdt_probe, .remove = ath79_wdt_remove, @@ -290,6 +325,7 @@ static struct platform_driver ath79_wdt_driver = { .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, + .of_match_table = of_match_ptr(ath79_wdt_match), }, }; diff --git a/drivers/watchdog/bcm47xx_wdt.c b/drivers/watchdog/bcm47xx_wdt.c index bc0e91e..b4021a2b 100644 --- a/drivers/watchdog/bcm47xx_wdt.c +++ b/drivers/watchdog/bcm47xx_wdt.c @@ -3,6 +3,7 @@ * * Copyright (C) 2008 Aleksandar Radovanovic <biblbroks@sezampro.rs> * Copyright (C) 2009 Matthieu CASTET <castet.matthieu@free.fr> + * Copyright (C) 2012-2013 Hauke Mehrtens <hauke@hauke-m.de> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -12,165 +13,143 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/bcm47xx_wdt.h> #include <linux/bitops.h> #include <linux/errno.h> -#include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/platform_device.h> #include <linux/reboot.h> #include <linux/types.h> -#include <linux/uaccess.h> #include <linux/watchdog.h> #include <linux/timer.h> #include <linux/jiffies.h> -#include <linux/ssb/ssb_embedded.h> -#include <asm/mach-bcm47xx/bcm47xx.h> #define DRV_NAME "bcm47xx_wdt" #define WDT_DEFAULT_TIME 30 /* seconds */ -#define WDT_MAX_TIME 255 /* seconds */ +#define WDT_SOFTTIMER_MAX 255 /* seconds */ +#define WDT_SOFTTIMER_THRESHOLD 60 /* seconds */ -static int wdt_time = WDT_DEFAULT_TIME; +static int timeout = WDT_DEFAULT_TIME; static bool nowayout = WATCHDOG_NOWAYOUT; -module_param(wdt_time, int, 0); -MODULE_PARM_DESC(wdt_time, "Watchdog time in seconds. (default=" +module_param(timeout, int, 0); +MODULE_PARM_DESC(timeout, "Watchdog time in seconds. (default=" __MODULE_STRING(WDT_DEFAULT_TIME) ")"); -#ifdef CONFIG_WATCHDOG_NOWAYOUT module_param(nowayout, bool, 0); MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); -#endif -static unsigned long bcm47xx_wdt_busy; -static char expect_release; -static struct timer_list wdt_timer; -static atomic_t ticks; - -static inline void bcm47xx_wdt_hw_start(void) +static inline struct bcm47xx_wdt *bcm47xx_wdt_get(struct watchdog_device *wdd) { - /* this is 2,5s on 100Mhz clock and 2s on 133 Mhz */ - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0xfffffff); - break; -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, - 0xfffffff); - break; -#endif - } + return container_of(wdd, struct bcm47xx_wdt, wdd); } -static inline int bcm47xx_wdt_hw_stop(void) +static int bcm47xx_wdt_hard_keepalive(struct watchdog_device *wdd) { - switch (bcm47xx_bus_type) { -#ifdef CONFIG_BCM47XX_SSB - case BCM47XX_BUS_TYPE_SSB: - return ssb_watchdog_timer_set(&bcm47xx_bus.ssb, 0); -#endif -#ifdef CONFIG_BCM47XX_BCMA - case BCM47XX_BUS_TYPE_BCMA: - bcma_chipco_watchdog_timer_set(&bcm47xx_bus.bcma.bus.drv_cc, 0); - return 0; -#endif - } - return -EINVAL; -} + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); -static void bcm47xx_timer_tick(unsigned long unused) -{ - if (!atomic_dec_and_test(&ticks)) { - bcm47xx_wdt_hw_start(); - mod_timer(&wdt_timer, jiffies + HZ); - } else { - pr_crit("Watchdog will fire soon!!!\n"); - } + wdt->timer_set_ms(wdt, wdd->timeout * 1000); + + return 0; } -static inline void bcm47xx_wdt_pet(void) +static int bcm47xx_wdt_hard_start(struct watchdog_device *wdd) { - atomic_set(&ticks, wdt_time); + return 0; } -static void bcm47xx_wdt_start(void) +static int bcm47xx_wdt_hard_stop(struct watchdog_device *wdd) { - bcm47xx_wdt_pet(); - bcm47xx_timer_tick(0); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + wdt->timer_set(wdt, 0); + + return 0; } -static void bcm47xx_wdt_pause(void) +static int bcm47xx_wdt_hard_set_timeout(struct watchdog_device *wdd, + unsigned int new_time) { - del_timer_sync(&wdt_timer); - bcm47xx_wdt_hw_stop(); + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + u32 max_timer = wdt->max_timer_ms; + + if (new_time < 1 || new_time > max_timer / 1000) { + pr_warn("timeout value must be 1<=x<=%d, using %d\n", + max_timer / 1000, new_time); + return -EINVAL; + } + + wdd->timeout = new_time; + return 0; } -static void bcm47xx_wdt_stop(void) +static struct watchdog_ops bcm47xx_wdt_hard_ops = { + .owner = THIS_MODULE, + .start = bcm47xx_wdt_hard_start, + .stop = bcm47xx_wdt_hard_stop, + .ping = bcm47xx_wdt_hard_keepalive, + .set_timeout = bcm47xx_wdt_hard_set_timeout, +}; + +static void bcm47xx_wdt_soft_timer_tick(unsigned long data) { - bcm47xx_wdt_pause(); + struct bcm47xx_wdt *wdt = (struct bcm47xx_wdt *)data; + u32 next_tick = min(wdt->wdd.timeout * 1000, wdt->max_timer_ms); + + if (!atomic_dec_and_test(&wdt->soft_ticks)) { + wdt->timer_set_ms(wdt, next_tick); + mod_timer(&wdt->soft_timer, jiffies + HZ); + } else { + pr_crit("Watchdog will fire soon!!!\n"); + } } -static int bcm47xx_wdt_settimeout(int new_time) +static int bcm47xx_wdt_soft_keepalive(struct watchdog_device *wdd) { - if ((new_time <= 0) || (new_time > WDT_MAX_TIME)) - return -EINVAL; + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + atomic_set(&wdt->soft_ticks, wdd->timeout); - wdt_time = new_time; return 0; } -static int bcm47xx_wdt_open(struct inode *inode, struct file *file) +static int bcm47xx_wdt_soft_start(struct watchdog_device *wdd) { - if (test_and_set_bit(0, &bcm47xx_wdt_busy)) - return -EBUSY; + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + bcm47xx_wdt_soft_keepalive(wdd); + bcm47xx_wdt_soft_timer_tick((unsigned long)wdt); - bcm47xx_wdt_start(); - return nonseekable_open(inode, file); + return 0; } -static int bcm47xx_wdt_release(struct inode *inode, struct file *file) +static int bcm47xx_wdt_soft_stop(struct watchdog_device *wdd) { - if (expect_release == 42) { - bcm47xx_wdt_stop(); - } else { - pr_crit("Unexpected close, not stopping watchdog!\n"); - bcm47xx_wdt_start(); - } + struct bcm47xx_wdt *wdt = bcm47xx_wdt_get(wdd); + + del_timer_sync(&wdt->soft_timer); + wdt->timer_set(wdt, 0); - clear_bit(0, &bcm47xx_wdt_busy); - expect_release = 0; return 0; } -static ssize_t bcm47xx_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) +static int bcm47xx_wdt_soft_set_timeout(struct watchdog_device *wdd, + unsigned int new_time) { - if (len) { - if (!nowayout) { - size_t i; - - expect_release = 0; - - for (i = 0; i != len; i++) { - char c; - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_release = 42; - } - } - bcm47xx_wdt_pet(); + if (new_time < 1 || new_time > WDT_SOFTTIMER_MAX) { + pr_warn("timeout value must be 1<=x<=%d, using %d\n", + WDT_SOFTTIMER_MAX, new_time); + return -EINVAL; } - return len; + + wdd->timeout = new_time; + return 0; } static const struct watchdog_info bcm47xx_wdt_info = { @@ -180,130 +159,100 @@ static const struct watchdog_info bcm47xx_wdt_info = { WDIOF_MAGICCLOSE, }; -static long bcm47xx_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_value, retval = -EINVAL; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &bcm47xx_wdt_info, - sizeof(bcm47xx_wdt_info)) ? -EFAULT : 0; - - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - - case WDIOC_SETOPTIONS: - if (get_user(new_value, p)) - return -EFAULT; - - if (new_value & WDIOS_DISABLECARD) { - bcm47xx_wdt_stop(); - retval = 0; - } - - if (new_value & WDIOS_ENABLECARD) { - bcm47xx_wdt_start(); - retval = 0; - } - - return retval; - - case WDIOC_KEEPALIVE: - bcm47xx_wdt_pet(); - return 0; - - case WDIOC_SETTIMEOUT: - if (get_user(new_value, p)) - return -EFAULT; - - if (bcm47xx_wdt_settimeout(new_value)) - return -EINVAL; - - bcm47xx_wdt_pet(); - - case WDIOC_GETTIMEOUT: - return put_user(wdt_time, p); - - default: - return -ENOTTY; - } -} - static int bcm47xx_wdt_notify_sys(struct notifier_block *this, - unsigned long code, void *unused) + unsigned long code, void *unused) { + struct bcm47xx_wdt *wdt; + + wdt = container_of(this, struct bcm47xx_wdt, notifier); if (code == SYS_DOWN || code == SYS_HALT) - bcm47xx_wdt_stop(); + wdt->wdd.ops->stop(&wdt->wdd); return NOTIFY_DONE; } -static const struct file_operations bcm47xx_wdt_fops = { +static struct watchdog_ops bcm47xx_wdt_soft_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, - .unlocked_ioctl = bcm47xx_wdt_ioctl, - .open = bcm47xx_wdt_open, - .release = bcm47xx_wdt_release, - .write = bcm47xx_wdt_write, -}; - -static struct miscdevice bcm47xx_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &bcm47xx_wdt_fops, -}; - -static struct notifier_block bcm47xx_wdt_notifier = { - .notifier_call = bcm47xx_wdt_notify_sys, + .start = bcm47xx_wdt_soft_start, + .stop = bcm47xx_wdt_soft_stop, + .ping = bcm47xx_wdt_soft_keepalive, + .set_timeout = bcm47xx_wdt_soft_set_timeout, }; -static int __init bcm47xx_wdt_init(void) +static int bcm47xx_wdt_probe(struct platform_device *pdev) { int ret; + bool soft; + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); - if (bcm47xx_wdt_hw_stop() < 0) - return -ENODEV; + if (!wdt) + return -ENXIO; - setup_timer(&wdt_timer, bcm47xx_timer_tick, 0L); + soft = wdt->max_timer_ms < WDT_SOFTTIMER_THRESHOLD * 1000; - if (bcm47xx_wdt_settimeout(wdt_time)) { - bcm47xx_wdt_settimeout(WDT_DEFAULT_TIME); - pr_info("wdt_time value must be 0 < wdt_time < %d, using %d\n", - (WDT_MAX_TIME + 1), wdt_time); + if (soft) { + wdt->wdd.ops = &bcm47xx_wdt_soft_ops; + setup_timer(&wdt->soft_timer, bcm47xx_wdt_soft_timer_tick, + (long unsigned int)wdt); + } else { + wdt->wdd.ops = &bcm47xx_wdt_hard_ops; } - ret = register_reboot_notifier(&bcm47xx_wdt_notifier); + wdt->wdd.info = &bcm47xx_wdt_info; + wdt->wdd.timeout = WDT_DEFAULT_TIME; + ret = wdt->wdd.ops->set_timeout(&wdt->wdd, timeout); if (ret) - return ret; + goto err_timer; + watchdog_set_nowayout(&wdt->wdd, nowayout); - ret = misc_register(&bcm47xx_wdt_miscdev); - if (ret) { - unregister_reboot_notifier(&bcm47xx_wdt_notifier); - return ret; - } + wdt->notifier.notifier_call = &bcm47xx_wdt_notify_sys; + + ret = register_reboot_notifier(&wdt->notifier); + if (ret) + goto err_timer; - pr_info("BCM47xx Watchdog Timer enabled (%d seconds%s)\n", - wdt_time, nowayout ? ", nowayout" : ""); + ret = watchdog_register_device(&wdt->wdd); + if (ret) + goto err_notifier; + + dev_info(&pdev->dev, "BCM47xx Watchdog Timer enabled (%d seconds%s%s)\n", + timeout, nowayout ? ", nowayout" : "", + soft ? ", Software Timer" : ""); return 0; + +err_notifier: + unregister_reboot_notifier(&wdt->notifier); +err_timer: + if (soft) + del_timer_sync(&wdt->soft_timer); + + return ret; } -static void __exit bcm47xx_wdt_exit(void) +static int bcm47xx_wdt_remove(struct platform_device *pdev) { - if (!nowayout) - bcm47xx_wdt_stop(); + struct bcm47xx_wdt *wdt = dev_get_platdata(&pdev->dev); - misc_deregister(&bcm47xx_wdt_miscdev); + if (!wdt) + return -ENXIO; - unregister_reboot_notifier(&bcm47xx_wdt_notifier); + watchdog_unregister_device(&wdt->wdd); + unregister_reboot_notifier(&wdt->notifier); + + return 0; } -module_init(bcm47xx_wdt_init); -module_exit(bcm47xx_wdt_exit); +static struct platform_driver bcm47xx_wdt_driver = { + .driver = { + .owner = THIS_MODULE, + .name = "bcm47xx-wdt", + }, + .probe = bcm47xx_wdt_probe, + .remove = bcm47xx_wdt_remove, +}; + +module_platform_driver(bcm47xx_wdt_driver); MODULE_AUTHOR("Aleksandar Radovanovic"); +MODULE_AUTHOR("Hauke Mehrtens <hauke@hauke-m.de>"); MODULE_DESCRIPTION("Watchdog driver for Broadcom BCM47xx"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index c0bc92d..a8dbceb3 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -15,12 +15,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/fs.h> #include <linux/smp.h> -#include <linux/miscdevice.h> -#include <linux/notifier.h> #include <linux/watchdog.h> -#include <linux/uaccess.h> #include <asm/reg_booke.h> #include <asm/time.h> @@ -45,7 +41,7 @@ u32 booke_wdt_period = CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT; #define WDTP_MASK (TCR_WP_MASK) #endif -static DEFINE_SPINLOCK(booke_wdt_lock); +#ifdef CONFIG_PPC_FSL_BOOK3E /* For the specified period, determine the number of seconds * corresponding to the reset time. There will be a watchdog @@ -86,6 +82,24 @@ static unsigned int sec_to_period(unsigned int secs) return 0; } +#define MAX_WDT_TIMEOUT period_to_sec(1) + +#else /* CONFIG_PPC_FSL_BOOK3E */ + +static unsigned long long period_to_sec(unsigned int period) +{ + return period; +} + +static unsigned int sec_to_period(unsigned int secs) +{ + return secs; +} + +#define MAX_WDT_TIMEOUT 3 /* from Kconfig */ + +#endif /* !CONFIG_PPC_FSL_BOOK3E */ + static void __booke_wdt_set(void *data) { u32 val; @@ -107,9 +121,11 @@ static void __booke_wdt_ping(void *data) mtspr(SPRN_TSR, TSR_ENW|TSR_WIS); } -static void booke_wdt_ping(void) +static int booke_wdt_ping(struct watchdog_device *wdog) { on_each_cpu(__booke_wdt_ping, NULL, 0); + + return 0; } static void __booke_wdt_enable(void *data) @@ -146,152 +162,81 @@ static void __booke_wdt_disable(void *data) } -static ssize_t booke_wdt_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static void __booke_wdt_start(struct watchdog_device *wdog) { - booke_wdt_ping(); - return count; + on_each_cpu(__booke_wdt_enable, NULL, 0); + pr_debug("watchdog enabled (timeout = %u sec)\n", wdog->timeout); } -static struct watchdog_info ident = { - .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, - .identity = "PowerPC Book-E Watchdog", -}; - -static long booke_wdt_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static int booke_wdt_start(struct watchdog_device *wdog) { - u32 tmp = 0; - u32 __user *p = (u32 __user *)arg; - - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(p, &ident, sizeof(ident)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - return put_user(0, p); - case WDIOC_GETBOOTSTATUS: - /* XXX: something is clearing TSR */ - tmp = mfspr(SPRN_TSR) & TSR_WRS(3); - /* returns CARDRESET if last reset was caused by the WDT */ - return put_user((tmp ? WDIOF_CARDRESET : 0), p); - case WDIOC_SETOPTIONS: - if (get_user(tmp, p)) - return -EFAULT; - if (tmp == WDIOS_ENABLECARD) { - booke_wdt_ping(); - break; - } else - return -EINVAL; - return 0; - case WDIOC_KEEPALIVE: - booke_wdt_ping(); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(tmp, p)) - return -EFAULT; -#ifdef CONFIG_PPC_FSL_BOOK3E - /* period of 1 gives the largest possible timeout */ - if (tmp > period_to_sec(1)) - return -EINVAL; - booke_wdt_period = sec_to_period(tmp); -#else - booke_wdt_period = tmp; -#endif - booke_wdt_set(); - /* Fall */ - case WDIOC_GETTIMEOUT: -#ifdef CONFIG_FSL_BOOKE - return put_user(period_to_sec(booke_wdt_period), p); -#else - return put_user(booke_wdt_period, p); -#endif - default: - return -ENOTTY; - } - - return 0; -} - -/* wdt_is_active stores whether or not the /dev/watchdog device is opened */ -static unsigned long wdt_is_active; - -static int booke_wdt_open(struct inode *inode, struct file *file) -{ - /* /dev/watchdog can only be opened once */ - if (test_and_set_bit(0, &wdt_is_active)) - return -EBUSY; - - spin_lock(&booke_wdt_lock); if (booke_wdt_enabled == 0) { booke_wdt_enabled = 1; - on_each_cpu(__booke_wdt_enable, NULL, 0); - pr_debug("watchdog enabled (timeout = %llu sec)\n", - period_to_sec(booke_wdt_period)); + __booke_wdt_start(wdog); } - spin_unlock(&booke_wdt_lock); - - return nonseekable_open(inode, file); + return 0; } -static int booke_wdt_release(struct inode *inode, struct file *file) +static int booke_wdt_stop(struct watchdog_device *wdog) { -#ifndef CONFIG_WATCHDOG_NOWAYOUT - /* Normally, the watchdog is disabled when /dev/watchdog is closed, but - * if CONFIG_WATCHDOG_NOWAYOUT is defined, then it means that the - * watchdog should remain enabled. So we disable it only if - * CONFIG_WATCHDOG_NOWAYOUT is not defined. - */ on_each_cpu(__booke_wdt_disable, NULL, 0); booke_wdt_enabled = 0; pr_debug("watchdog disabled\n"); -#endif - clear_bit(0, &wdt_is_active); + return 0; +} + +static int booke_wdt_set_timeout(struct watchdog_device *wdt_dev, + unsigned int timeout) +{ + if (timeout > MAX_WDT_TIMEOUT) + return -EINVAL; + booke_wdt_period = sec_to_period(timeout); + wdt_dev->timeout = timeout; + booke_wdt_set(); return 0; } -static const struct file_operations booke_wdt_fops = { +static struct watchdog_info booke_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "PowerPC Book-E Watchdog", +}; + +static struct watchdog_ops booke_wdt_ops = { .owner = THIS_MODULE, - .llseek = no_llseek, - .write = booke_wdt_write, - .unlocked_ioctl = booke_wdt_ioctl, - .open = booke_wdt_open, - .release = booke_wdt_release, + .start = booke_wdt_start, + .stop = booke_wdt_stop, + .ping = booke_wdt_ping, + .set_timeout = booke_wdt_set_timeout, }; -static struct miscdevice booke_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &booke_wdt_fops, +static struct watchdog_device booke_wdt_dev = { + .info = &booke_wdt_info, + .ops = &booke_wdt_ops, + .min_timeout = 1, + .max_timeout = 0xFFFF }; static void __exit booke_wdt_exit(void) { - misc_deregister(&booke_wdt_miscdev); + watchdog_unregister_device(&booke_wdt_dev); } static int __init booke_wdt_init(void) { int ret = 0; + bool nowayout = WATCHDOG_NOWAYOUT; pr_info("powerpc book-e watchdog driver loaded\n"); - ident.firmware_version = cur_cpu_spec->pvr_value; - - ret = misc_register(&booke_wdt_miscdev); - if (ret) { - pr_err("cannot register device (minor=%u, ret=%i)\n", - WATCHDOG_MINOR, ret); - return ret; - } - - spin_lock(&booke_wdt_lock); - if (booke_wdt_enabled == 1) { - pr_info("watchdog enabled (timeout = %llu sec)\n", - period_to_sec(booke_wdt_period)); - on_each_cpu(__booke_wdt_enable, NULL, 0); - } - spin_unlock(&booke_wdt_lock); + booke_wdt_info.firmware_version = cur_cpu_spec->pvr_value; + booke_wdt_set_timeout(&booke_wdt_dev, + period_to_sec(CONFIG_BOOKE_WDT_DEFAULT_TIMEOUT)); + watchdog_set_nowayout(&booke_wdt_dev, nowayout); + if (booke_wdt_enabled) + __booke_wdt_start(&booke_wdt_dev); + + ret = watchdog_register_device(&booke_wdt_dev); return ret; } diff --git a/drivers/watchdog/davinci_wdt.c b/drivers/watchdog/davinci_wdt.c index e8e8724..7df1fdc 100644 --- a/drivers/watchdog/davinci_wdt.c +++ b/drivers/watchdog/davinci_wdt.c @@ -69,7 +69,6 @@ static unsigned long wdt_status; #define WDT_REGION_INITED 2 #define WDT_DEVICE_INITED 3 -static struct resource *wdt_mem; static void __iomem *wdt_base; struct clk *wdt_clk; @@ -201,10 +200,11 @@ static struct miscdevice davinci_wdt_miscdev = { static int davinci_wdt_probe(struct platform_device *pdev) { - int ret = 0, size; + int ret = 0; struct device *dev = &pdev->dev; + struct resource *wdt_mem; - wdt_clk = clk_get(dev, NULL); + wdt_clk = devm_clk_get(dev, NULL); if (WARN_ON(IS_ERR(wdt_clk))) return PTR_ERR(wdt_clk); @@ -221,43 +221,26 @@ static int davinci_wdt_probe(struct platform_device *pdev) return -ENOENT; } - size = resource_size(wdt_mem); - if (!request_mem_region(wdt_mem->start, size, pdev->name)) { - dev_err(dev, "failed to get memory region\n"); - return -ENOENT; - } - - wdt_base = ioremap(wdt_mem->start, size); + wdt_base = devm_request_and_ioremap(dev, wdt_mem); if (!wdt_base) { - dev_err(dev, "failed to map memory region\n"); - release_mem_region(wdt_mem->start, size); - wdt_mem = NULL; - return -ENOMEM; + dev_err(dev, "ioremap failed\n"); + return -EADDRNOTAVAIL; } ret = misc_register(&davinci_wdt_miscdev); if (ret < 0) { dev_err(dev, "cannot register misc device\n"); - release_mem_region(wdt_mem->start, size); - wdt_mem = NULL; } else { set_bit(WDT_DEVICE_INITED, &wdt_status); } - iounmap(wdt_base); return ret; } static int davinci_wdt_remove(struct platform_device *pdev) { misc_deregister(&davinci_wdt_miscdev); - if (wdt_mem) { - release_mem_region(wdt_mem->start, resource_size(wdt_mem)); - wdt_mem = NULL; - } - clk_disable_unprepare(wdt_clk); - clk_put(wdt_clk); return 0; } diff --git a/drivers/watchdog/gef_wdt.c b/drivers/watchdog/gef_wdt.c index b9c5b58..257cfba 100644 --- a/drivers/watchdog/gef_wdt.c +++ b/drivers/watchdog/gef_wdt.c @@ -310,6 +310,7 @@ static struct platform_driver gef_wdt_driver = { .of_match_table = gef_wdt_ids, }, .probe = gef_wdt_probe, + .remove = gef_wdt_remove, }; static int __init gef_wdt_init(void) diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index b0e541d..af88ffd 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -45,6 +45,11 @@ #include "omap_wdt.h" +static bool nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, bool, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + static unsigned timer_margin; module_param(timer_margin, uint, 0); MODULE_PARM_DESC(timer_margin, "initial watchdog timeout (in seconds)"); @@ -201,7 +206,6 @@ static const struct watchdog_ops omap_wdt_ops = { static int omap_wdt_probe(struct platform_device *pdev) { struct omap_wd_timer_platform_data *pdata = pdev->dev.platform_data; - bool nowayout = WATCHDOG_NOWAYOUT; struct watchdog_device *omap_wdt; struct resource *res, *mem; struct omap_wdt_dev *wdev; diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c index 7c18b3b..da57798 100644 --- a/drivers/watchdog/orion_wdt.c +++ b/drivers/watchdog/orion_wdt.c @@ -140,6 +140,7 @@ static const struct watchdog_ops orion_wdt_ops = { static struct watchdog_device orion_wdt = { .info = &orion_wdt_info, .ops = &orion_wdt_ops, + .min_timeout = 1, }; static int orion_wdt_probe(struct platform_device *pdev) @@ -164,12 +165,9 @@ static int orion_wdt_probe(struct platform_device *pdev) wdt_max_duration = WDT_MAX_CYCLE_COUNT / wdt_tclk; - if ((heartbeat < 1) || (heartbeat > wdt_max_duration)) - heartbeat = wdt_max_duration; - - orion_wdt.timeout = heartbeat; - orion_wdt.min_timeout = 1; + orion_wdt.timeout = wdt_max_duration; orion_wdt.max_timeout = wdt_max_duration; + watchdog_init_timeout(&orion_wdt, heartbeat, &pdev->dev); watchdog_set_nowayout(&orion_wdt, nowayout); ret = watchdog_register_device(&orion_wdt); @@ -179,7 +177,7 @@ static int orion_wdt_probe(struct platform_device *pdev) } pr_info("Initial timeout %d sec%s\n", - heartbeat, nowayout ? ", nowayout" : ""); + orion_wdt.timeout, nowayout ? ", nowayout" : ""); return 0; } @@ -225,4 +223,5 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:orion_wdt"); MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c index de1f3fa..a3684a3 100644 --- a/drivers/watchdog/pnx4008_wdt.c +++ b/drivers/watchdog/pnx4008_wdt.c @@ -142,6 +142,7 @@ static const struct watchdog_ops pnx4008_wdt_ops = { static struct watchdog_device pnx4008_wdd = { .info = &pnx4008_wdt_ident, .ops = &pnx4008_wdt_ops, + .timeout = DEFAULT_HEARTBEAT, .min_timeout = 1, .max_timeout = MAX_HEARTBEAT, }; @@ -151,8 +152,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) struct resource *r; int ret = 0; - if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT) - heartbeat = DEFAULT_HEARTBEAT; + watchdog_init_timeout(&pnx4008_wdd, heartbeat, &pdev->dev); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt_base = devm_ioremap_resource(&pdev->dev, r); @@ -167,7 +167,6 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) if (ret) goto out; - pnx4008_wdd.timeout = heartbeat; pnx4008_wdd.bootstatus = (readl(WDTIM_RES(wdt_base)) & WDOG_RESET) ? WDIOF_CARDRESET : 0; watchdog_set_nowayout(&pnx4008_wdd, nowayout); @@ -181,7 +180,7 @@ static int pnx4008_wdt_probe(struct platform_device *pdev) } dev_info(&pdev->dev, "PNX4008 Watchdog Timer: heartbeat %d sec\n", - heartbeat); + pnx4008_wdd.timeout); return 0; diff --git a/drivers/watchdog/retu_wdt.c b/drivers/watchdog/retu_wdt.c new file mode 100644 index 0000000..f53615d --- /dev/null +++ b/drivers/watchdog/retu_wdt.c @@ -0,0 +1,178 @@ +/* + * Retu watchdog driver + * + * Copyright (C) 2004, 2005 Nokia Corporation + * + * Based on code written by Amit Kucheria and Michael Buesch. + * Rewritten by Aaro Koskinen. + * + * This file is subject to the terms and conditions of the GNU General + * Public License. See the file "COPYING" in the main directory of this + * archive for more details. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/errno.h> +#include <linux/device.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mfd/retu.h> +#include <linux/watchdog.h> +#include <linux/platform_device.h> + +/* Watchdog timer values in seconds */ +#define RETU_WDT_MAX_TIMER 63 + +struct retu_wdt_dev { + struct retu_dev *rdev; + struct device *dev; + struct delayed_work ping_work; +}; + +/* + * Since Retu watchdog cannot be disabled in hardware, we must kick it + * with a timer until userspace watchdog software takes over. If + * CONFIG_WATCHDOG_NOWAYOUT is set, we never start the feeding. + */ +static void retu_wdt_ping_enable(struct retu_wdt_dev *wdev) +{ + retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); + schedule_delayed_work(&wdev->ping_work, + round_jiffies_relative(RETU_WDT_MAX_TIMER * HZ / 2)); +} + +static void retu_wdt_ping_disable(struct retu_wdt_dev *wdev) +{ + retu_write(wdev->rdev, RETU_REG_WATCHDOG, RETU_WDT_MAX_TIMER); + cancel_delayed_work_sync(&wdev->ping_work); +} + +static void retu_wdt_ping_work(struct work_struct *work) +{ + struct retu_wdt_dev *wdev = container_of(to_delayed_work(work), + struct retu_wdt_dev, ping_work); + retu_wdt_ping_enable(wdev); +} + +static int retu_wdt_start(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + retu_wdt_ping_disable(wdev); + + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static int retu_wdt_stop(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + retu_wdt_ping_enable(wdev); + + return 0; +} + +static int retu_wdt_ping(struct watchdog_device *wdog) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static int retu_wdt_set_timeout(struct watchdog_device *wdog, + unsigned int timeout) +{ + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + wdog->timeout = timeout; + return retu_write(wdev->rdev, RETU_REG_WATCHDOG, wdog->timeout); +} + +static const struct watchdog_info retu_wdt_info = { + .options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "Retu watchdog", +}; + +static const struct watchdog_ops retu_wdt_ops = { + .owner = THIS_MODULE, + .start = retu_wdt_start, + .stop = retu_wdt_stop, + .ping = retu_wdt_ping, + .set_timeout = retu_wdt_set_timeout, +}; + +static int retu_wdt_probe(struct platform_device *pdev) +{ + struct retu_dev *rdev = dev_get_drvdata(pdev->dev.parent); + bool nowayout = WATCHDOG_NOWAYOUT; + struct watchdog_device *retu_wdt; + struct retu_wdt_dev *wdev; + int ret; + + retu_wdt = devm_kzalloc(&pdev->dev, sizeof(*retu_wdt), GFP_KERNEL); + if (!retu_wdt) + return -ENOMEM; + + wdev = devm_kzalloc(&pdev->dev, sizeof(*wdev), GFP_KERNEL); + if (!wdev) + return -ENOMEM; + + retu_wdt->info = &retu_wdt_info; + retu_wdt->ops = &retu_wdt_ops; + retu_wdt->timeout = RETU_WDT_MAX_TIMER; + retu_wdt->min_timeout = 0; + retu_wdt->max_timeout = RETU_WDT_MAX_TIMER; + + watchdog_set_drvdata(retu_wdt, wdev); + watchdog_set_nowayout(retu_wdt, nowayout); + + wdev->rdev = rdev; + wdev->dev = &pdev->dev; + + INIT_DELAYED_WORK(&wdev->ping_work, retu_wdt_ping_work); + + ret = watchdog_register_device(retu_wdt); + if (ret < 0) + return ret; + + if (nowayout) + retu_wdt_ping(retu_wdt); + else + retu_wdt_ping_enable(wdev); + + platform_set_drvdata(pdev, retu_wdt); + + return 0; +} + +static int retu_wdt_remove(struct platform_device *pdev) +{ + struct watchdog_device *wdog = platform_get_drvdata(pdev); + struct retu_wdt_dev *wdev = watchdog_get_drvdata(wdog); + + watchdog_unregister_device(wdog); + cancel_delayed_work_sync(&wdev->ping_work); + + return 0; +} + +static struct platform_driver retu_wdt_driver = { + .probe = retu_wdt_probe, + .remove = retu_wdt_remove, + .driver = { + .name = "retu-wdt", + }, +}; +module_platform_driver(retu_wdt_driver); + +MODULE_ALIAS("platform:retu-wdt"); +MODULE_DESCRIPTION("Retu watchdog"); +MODULE_AUTHOR("Amit Kucheria"); +MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 27bcd4e..c1a221c 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -53,7 +53,7 @@ #define CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME (15) static bool nowayout = WATCHDOG_NOWAYOUT; -static int tmr_margin = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME; +static int tmr_margin; static int tmr_atboot = CONFIG_S3C2410_WATCHDOG_ATBOOT; static int soft_noboot; static int debug; @@ -226,6 +226,7 @@ static struct watchdog_ops s3c2410wdt_ops = { static struct watchdog_device s3c2410_wdd = { .info = &s3c2410_wdt_ident, .ops = &s3c2410wdt_ops, + .timeout = CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME, }; /* interrupt handler code */ @@ -309,7 +310,6 @@ static int s3c2410wdt_probe(struct platform_device *pdev) unsigned int wtcon; int started = 0; int ret; - int size; DBG("%s: probe=%p\n", __func__, pdev); @@ -330,28 +330,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev) } /* get the memory region for the watchdog timer */ - - size = resource_size(wdt_mem); - if (!request_mem_region(wdt_mem->start, size, pdev->name)) { - dev_err(dev, "failed to get memory region\n"); - ret = -EBUSY; - goto err; - } - - wdt_base = ioremap(wdt_mem->start, size); + wdt_base = devm_request_and_ioremap(dev, wdt_mem); if (wdt_base == NULL) { - dev_err(dev, "failed to ioremap() region\n"); - ret = -EINVAL; - goto err_req; + dev_err(dev, "failed to devm_request_and_ioremap() region\n"); + ret = -ENOMEM; + goto err; } DBG("probe: mapped wdt_base=%p\n", wdt_base); - wdt_clock = clk_get(&pdev->dev, "watchdog"); + wdt_clock = devm_clk_get(dev, "watchdog"); if (IS_ERR(wdt_clock)) { dev_err(dev, "failed to find watchdog clock source\n"); ret = PTR_ERR(wdt_clock); - goto err_map; + goto err; } clk_prepare_enable(wdt_clock); @@ -365,7 +357,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) /* see if we can actually set the requested timer margin, and if * not, try the default value */ - if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, tmr_margin)) { + watchdog_init_timeout(&s3c2410_wdd, tmr_margin, &pdev->dev); + if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) { started = s3c2410wdt_set_heartbeat(&s3c2410_wdd, CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME); @@ -378,7 +371,8 @@ static int s3c2410wdt_probe(struct platform_device *pdev) "cannot start\n"); } - ret = request_irq(wdt_irq->start, s3c2410wdt_irq, 0, pdev->name, pdev); + ret = devm_request_irq(dev, wdt_irq->start, s3c2410wdt_irq, 0, + pdev->name, pdev); if (ret != 0) { dev_err(dev, "failed to install irq (%d)\n", ret); goto err_cpufreq; @@ -389,7 +383,7 @@ static int s3c2410wdt_probe(struct platform_device *pdev) ret = watchdog_register_device(&s3c2410_wdd); if (ret) { dev_err(dev, "cannot register watchdog (%d)\n", ret); - goto err_irq; + goto err_cpufreq; } if (tmr_atboot && started == 0) { @@ -414,23 +408,13 @@ static int s3c2410wdt_probe(struct platform_device *pdev) return 0; - err_irq: - free_irq(wdt_irq->start, pdev); - err_cpufreq: s3c2410wdt_cpufreq_deregister(); err_clk: clk_disable_unprepare(wdt_clock); - clk_put(wdt_clock); wdt_clock = NULL; - err_map: - iounmap(wdt_base); - - err_req: - release_mem_region(wdt_mem->start, size); - err: wdt_irq = NULL; wdt_mem = NULL; @@ -441,17 +425,11 @@ static int s3c2410wdt_remove(struct platform_device *dev) { watchdog_unregister_device(&s3c2410_wdd); - free_irq(wdt_irq->start, dev); - s3c2410wdt_cpufreq_deregister(); clk_disable_unprepare(wdt_clock); - clk_put(wdt_clock); wdt_clock = NULL; - iounmap(wdt_base); - - release_mem_region(wdt_mem->start, resource_size(wdt_mem)); wdt_irq = NULL; wdt_mem = NULL; return 0; diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 2b0e000..e3b8f75 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -361,7 +361,7 @@ static unsigned char sp5100_tco_setupdevice(void) { struct pci_dev *dev = NULL; const char *dev_name = NULL; - u32 val; + u32 val, tmp_val; u32 index_reg, data_reg, base_addr; /* Match the PCI device */ @@ -497,30 +497,19 @@ static unsigned char sp5100_tco_setupdevice(void) pr_debug("Got 0x%04x from resource tree\n", val); } - /* Restore to the low three bits, if chipset is SB8x0(or later) */ - if (sp5100_tco_pci->revision >= 0x40) { - u8 reserved_bit; - reserved_bit = inb(base_addr) & 0x7; - val |= (u32)reserved_bit; - } + /* Restore to the low three bits */ + outb(base_addr+0, index_reg); + tmp_val = val | (inb(data_reg) & 0x7); /* Re-programming the watchdog timer base address */ outb(base_addr+0, index_reg); - /* Low three bits of BASE are reserved */ - outb((val >> 0) & 0xf8, data_reg); + outb((tmp_val >> 0) & 0xff, data_reg); outb(base_addr+1, index_reg); - outb((val >> 8) & 0xff, data_reg); + outb((tmp_val >> 8) & 0xff, data_reg); outb(base_addr+2, index_reg); - outb((val >> 16) & 0xff, data_reg); + outb((tmp_val >> 16) & 0xff, data_reg); outb(base_addr+3, index_reg); - outb((val >> 24) & 0xff, data_reg); - - /* - * Clear unnecessary the low three bits, - * if chipset is SB8x0(or later) - */ - if (sp5100_tco_pci->revision >= 0x40) - val &= ~0x7; + outb((tmp_val >> 24) & 0xff, data_reg); if (!request_mem_region_exclusive(val, SP5100_WDT_MEM_MAP_SIZE, dev_name)) { diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c new file mode 100644 index 0000000..c97e98d --- /dev/null +++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c @@ -0,0 +1,111 @@ +/* + * Watchdog driver for the RTC based watchdog in STMP3xxx and i.MX23/28 + * + * Author: Wolfram Sang <w.sang@pengutronix.de> + * + * Copyright (C) 2011-12 Wolfram Sang, Pengutronix + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/watchdog.h> +#include <linux/platform_device.h> +#include <linux/stmp3xxx_rtc_wdt.h> + +#define WDOG_TICK_RATE 1000 /* 1 kHz clock */ +#define STMP3XXX_DEFAULT_TIMEOUT 19 +#define STMP3XXX_MAX_TIMEOUT (UINT_MAX / WDOG_TICK_RATE) + +static int heartbeat = STMP3XXX_DEFAULT_TIMEOUT; +module_param(heartbeat, uint, 0); +MODULE_PARM_DESC(heartbeat, "Watchdog heartbeat period in seconds from 1 to " + __MODULE_STRING(STMP3XXX_MAX_TIMEOUT) ", default " + __MODULE_STRING(STMP3XXX_DEFAULT_TIMEOUT)); + +static int wdt_start(struct watchdog_device *wdd) +{ + struct device *dev = watchdog_get_drvdata(wdd); + struct stmp3xxx_wdt_pdata *pdata = dev->platform_data; + + pdata->wdt_set_timeout(dev->parent, wdd->timeout * WDOG_TICK_RATE); + return 0; +} + +static int wdt_stop(struct watchdog_device *wdd) +{ + struct device *dev = watchdog_get_drvdata(wdd); + struct stmp3xxx_wdt_pdata *pdata = dev->platform_data; + + pdata->wdt_set_timeout(dev->parent, 0); + return 0; +} + +static int wdt_set_timeout(struct watchdog_device *wdd, unsigned new_timeout) +{ + wdd->timeout = new_timeout; + return wdt_start(wdd); +} + +static const struct watchdog_info stmp3xxx_wdt_ident = { + .options = WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING, + .identity = "STMP3XXX RTC Watchdog", +}; + +static const struct watchdog_ops stmp3xxx_wdt_ops = { + .owner = THIS_MODULE, + .start = wdt_start, + .stop = wdt_stop, + .set_timeout = wdt_set_timeout, +}; + +static struct watchdog_device stmp3xxx_wdd = { + .info = &stmp3xxx_wdt_ident, + .ops = &stmp3xxx_wdt_ops, + .min_timeout = 1, + .max_timeout = STMP3XXX_MAX_TIMEOUT, + .status = WATCHDOG_NOWAYOUT_INIT_STATUS, +}; + +static int stmp3xxx_wdt_probe(struct platform_device *pdev) +{ + int ret; + + watchdog_set_drvdata(&stmp3xxx_wdd, &pdev->dev); + + stmp3xxx_wdd.timeout = clamp_t(unsigned, heartbeat, 1, STMP3XXX_MAX_TIMEOUT); + + ret = watchdog_register_device(&stmp3xxx_wdd); + if (ret < 0) { + dev_err(&pdev->dev, "cannot register watchdog device\n"); + return ret; + } + + dev_info(&pdev->dev, "initialized watchdog with heartbeat %ds\n", + stmp3xxx_wdd.timeout); + return 0; +} + +static int stmp3xxx_wdt_remove(struct platform_device *pdev) +{ + watchdog_unregister_device(&stmp3xxx_wdd); + return 0; +} + +static struct platform_driver stmp3xxx_wdt_driver = { + .driver = { + .name = "stmp3xxx_rtc_wdt", + }, + .probe = stmp3xxx_wdt_probe, + .remove = stmp3xxx_wdt_remove, +}; +module_platform_driver(stmp3xxx_wdt_driver); + +MODULE_DESCRIPTION("STMP3XXX RTC Watchdog Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/stmp3xxx_wdt.c b/drivers/watchdog/stmp3xxx_wdt.c deleted file mode 100644 index 1f4f697..0000000 --- a/drivers/watchdog/stmp3xxx_wdt.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Watchdog driver for Freescale STMP37XX/STMP378X - * - * Author: Vitaly Wool <vital@embeddedalley.com> - * - * Copyright 2008 Freescale Semiconductor, Inc. All Rights Reserved. - * Copyright 2008 Embedded Alley Solutions, Inc All Rights Reserved. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/miscdevice.h> -#include <linux/watchdog.h> -#include <linux/platform_device.h> -#include <linux/spinlock.h> -#include <linux/uaccess.h> -#include <linux/module.h> - -#include <mach/platform.h> -#include <mach/regs-rtc.h> - -#define DEFAULT_HEARTBEAT 19 -#define MAX_HEARTBEAT (0x10000000 >> 6) - -/* missing bitmask in headers */ -#define BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER 0x80000000 - -#define WDT_IN_USE 0 -#define WDT_OK_TO_CLOSE 1 - -#define WDOG_COUNTER_RATE 1000 /* 1 kHz clock */ - -static DEFINE_SPINLOCK(stmp3xxx_wdt_io_lock); -static unsigned long wdt_status; -static const bool nowayout = WATCHDOG_NOWAYOUT; -static int heartbeat = DEFAULT_HEARTBEAT; -static unsigned long boot_status; - -static void wdt_enable(u32 value) -{ - spin_lock(&stmp3xxx_wdt_io_lock); - __raw_writel(value, REGS_RTC_BASE + HW_RTC_WATCHDOG); - stmp3xxx_setl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL); - stmp3xxx_setl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - spin_unlock(&stmp3xxx_wdt_io_lock); -} - -static void wdt_disable(void) -{ - spin_lock(&stmp3xxx_wdt_io_lock); - stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - stmp3xxx_clearl(BM_RTC_CTRL_WATCHDOGEN, REGS_RTC_BASE + HW_RTC_CTRL); - spin_unlock(&stmp3xxx_wdt_io_lock); -} - -static void wdt_ping(void) -{ - wdt_enable(heartbeat * WDOG_COUNTER_RATE); -} - -static int stmp3xxx_wdt_open(struct inode *inode, struct file *file) -{ - if (test_and_set_bit(WDT_IN_USE, &wdt_status)) - return -EBUSY; - - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - wdt_ping(); - - return nonseekable_open(inode, file); -} - -static ssize_t stmp3xxx_wdt_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - if (len) { - if (!nowayout) { - size_t i; - - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - - for (i = 0; i != len; i++) { - char c; - - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - set_bit(WDT_OK_TO_CLOSE, &wdt_status); - } - } - wdt_ping(); - } - - return len; -} - -static const struct watchdog_info ident = { - .options = WDIOF_CARDRESET | - WDIOF_MAGICCLOSE | - WDIOF_SETTIMEOUT | - WDIOF_KEEPALIVEPING, - .identity = "STMP3XXX Watchdog", -}; - -static long stmp3xxx_wdt_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_heartbeat, opts; - int ret = -ENOTTY; - - switch (cmd) { - case WDIOC_GETSUPPORT: - ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; - break; - - case WDIOC_GETSTATUS: - ret = put_user(0, p); - break; - - case WDIOC_GETBOOTSTATUS: - ret = put_user(boot_status, p); - break; - - case WDIOC_SETOPTIONS: - if (get_user(opts, p)) { - ret = -EFAULT; - break; - } - if (opts & WDIOS_DISABLECARD) - wdt_disable(); - else if (opts & WDIOS_ENABLECARD) - wdt_ping(); - else { - pr_debug("%s: unknown option 0x%x\n", __func__, opts); - ret = -EINVAL; - break; - } - ret = 0; - break; - - case WDIOC_KEEPALIVE: - wdt_ping(); - ret = 0; - break; - - case WDIOC_SETTIMEOUT: - if (get_user(new_heartbeat, p)) { - ret = -EFAULT; - break; - } - if (new_heartbeat <= 0 || new_heartbeat > MAX_HEARTBEAT) { - ret = -EINVAL; - break; - } - - heartbeat = new_heartbeat; - wdt_ping(); - /* Fall through */ - - case WDIOC_GETTIMEOUT: - ret = put_user(heartbeat, p); - break; - } - return ret; -} - -static int stmp3xxx_wdt_release(struct inode *inode, struct file *file) -{ - int ret = 0; - - if (!nowayout) { - if (!test_bit(WDT_OK_TO_CLOSE, &wdt_status)) { - wdt_ping(); - pr_debug("%s: Device closed unexpectedly\n", __func__); - ret = -EINVAL; - } else { - wdt_disable(); - clear_bit(WDT_OK_TO_CLOSE, &wdt_status); - } - } - clear_bit(WDT_IN_USE, &wdt_status); - - return ret; -} - -static const struct file_operations stmp3xxx_wdt_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = stmp3xxx_wdt_write, - .unlocked_ioctl = stmp3xxx_wdt_ioctl, - .open = stmp3xxx_wdt_open, - .release = stmp3xxx_wdt_release, -}; - -static struct miscdevice stmp3xxx_wdt_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &stmp3xxx_wdt_fops, -}; - -static int stmp3xxx_wdt_probe(struct platform_device *pdev) -{ - int ret = 0; - - if (heartbeat < 1 || heartbeat > MAX_HEARTBEAT) - heartbeat = DEFAULT_HEARTBEAT; - - boot_status = __raw_readl(REGS_RTC_BASE + HW_RTC_PERSISTENT1) & - BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER; - boot_status = !!boot_status; - stmp3xxx_clearl(BV_RTC_PERSISTENT1_GENERAL__RTC_FORCE_UPDATER, - REGS_RTC_BASE + HW_RTC_PERSISTENT1); - wdt_disable(); /* disable for now */ - - ret = misc_register(&stmp3xxx_wdt_miscdev); - if (ret < 0) { - dev_err(&pdev->dev, "cannot register misc device\n"); - return ret; - } - - pr_info("initialized, heartbeat %d sec\n", heartbeat); - - return ret; -} - -static int stmp3xxx_wdt_remove(struct platform_device *pdev) -{ - misc_deregister(&stmp3xxx_wdt_miscdev); - return 0; -} - -#ifdef CONFIG_PM -static int wdt_suspended; -static u32 wdt_saved_time; - -static int stmp3xxx_wdt_suspend(struct platform_device *pdev, - pm_message_t state) -{ - if (__raw_readl(REGS_RTC_BASE + HW_RTC_CTRL) & - BM_RTC_CTRL_WATCHDOGEN) { - wdt_suspended = 1; - wdt_saved_time = __raw_readl(REGS_RTC_BASE + HW_RTC_WATCHDOG); - wdt_disable(); - } - return 0; -} - -static int stmp3xxx_wdt_resume(struct platform_device *pdev) -{ - if (wdt_suspended) { - wdt_enable(wdt_saved_time); - wdt_suspended = 0; - } - return 0; -} -#else -#define stmp3xxx_wdt_suspend NULL -#define stmp3xxx_wdt_resume NULL -#endif - -static struct platform_driver platform_wdt_driver = { - .driver = { - .name = "stmp3xxx_wdt", - }, - .probe = stmp3xxx_wdt_probe, - .remove = stmp3xxx_wdt_remove, - .suspend = stmp3xxx_wdt_suspend, - .resume = stmp3xxx_wdt_resume, -}; - -module_platform_driver(platform_wdt_driver); - -MODULE_DESCRIPTION("STMP3XXX Watchdog Driver"); -MODULE_LICENSE("GPL"); - -module_param(heartbeat, int, 0); -MODULE_PARM_DESC(heartbeat, - "Watchdog heartbeat period in seconds from 1 to " - __MODULE_STRING(MAX_HEARTBEAT) ", default " - __MODULE_STRING(DEFAULT_HEARTBEAT)); - -MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c index 3796434..05d18b4 100644 --- a/drivers/watchdog/watchdog_core.c +++ b/drivers/watchdog/watchdog_core.c @@ -36,12 +36,68 @@ #include <linux/init.h> /* For __init/__exit/... */ #include <linux/idr.h> /* For ida_* macros */ #include <linux/err.h> /* For IS_ERR macros */ +#include <linux/of.h> /* For of_get_timeout_sec */ #include "watchdog_core.h" /* For watchdog_dev_register/... */ static DEFINE_IDA(watchdog_ida); static struct class *watchdog_class; +static void watchdog_check_min_max_timeout(struct watchdog_device *wdd) +{ + /* + * Check that we have valid min and max timeout values, if + * not reset them both to 0 (=not used or unknown) + */ + if (wdd->min_timeout > wdd->max_timeout) { + pr_info("Invalid min and max timeout values, resetting to 0!\n"); + wdd->min_timeout = 0; + wdd->max_timeout = 0; + } +} + +/** + * watchdog_init_timeout() - initialize the timeout field + * @timeout_parm: timeout module parameter + * @dev: Device that stores the timeout-sec property + * + * Initialize the timeout field of the watchdog_device struct with either the + * timeout module parameter (if it is valid value) or the timeout-sec property + * (only if it is a valid value and the timeout_parm is out of bounds). + * If none of them are valid then we keep the old value (which should normally + * be the default timeout value. + * + * A zero is returned on success and -EINVAL for failure. + */ +int watchdog_init_timeout(struct watchdog_device *wdd, + unsigned int timeout_parm, struct device *dev) +{ + unsigned int t = 0; + int ret = 0; + + watchdog_check_min_max_timeout(wdd); + + /* try to get the tiemout module parameter first */ + if (!watchdog_timeout_invalid(wdd, timeout_parm)) { + wdd->timeout = timeout_parm; + return ret; + } + if (timeout_parm) + ret = -EINVAL; + + /* try to get the timeout_sec property */ + if (dev == NULL || dev->of_node == NULL) + return ret; + of_property_read_u32(dev->of_node, "timeout-sec", &t); + if (!watchdog_timeout_invalid(wdd, t)) + wdd->timeout = t; + else + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_GPL(watchdog_init_timeout); + /** * watchdog_register_device() - register a watchdog device * @wdd: watchdog device @@ -63,15 +119,7 @@ int watchdog_register_device(struct watchdog_device *wdd) if (wdd->ops->start == NULL || wdd->ops->stop == NULL) return -EINVAL; - /* - * Check that we have valid min and max timeout values, if - * not reset them both to 0 (=not used or unknown) - */ - if (wdd->min_timeout > wdd->max_timeout) { - pr_info("Invalid min and max timeout values, resetting to 0!\n"); - wdd->min_timeout = 0; - wdd->max_timeout = 0; - } + watchdog_check_min_max_timeout(wdd); /* * Note: now that all watchdog_device data has been verified, we diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index ef8edec..08b48bb 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -200,8 +200,7 @@ static int watchdog_set_timeout(struct watchdog_device *wddev, !(wddev->info->options & WDIOF_SETTIMEOUT)) return -EOPNOTSUPP; - if ((wddev->max_timeout != 0) && - (timeout < wddev->min_timeout || timeout > wddev->max_timeout)) + if (watchdog_timeout_invalid(wddev, timeout)) return -EINVAL; mutex_lock(&wddev->lock); |