diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 08:46:32 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-24 08:46:32 -0700 |
commit | 08d183e3c1f650b4db1d07d764502116861542fa (patch) | |
tree | f868a813f36744597bc7a8260c63cd37a3a94338 /drivers | |
parent | 4b1f2af6752a4cc9acc1c22ddf3842478965f113 (diff) | |
parent | 6096f884515466f400864ad23d16f20b731a7ce7 (diff) | |
download | op-kernel-dev-08d183e3c1f650b4db1d07d764502116861542fa.zip op-kernel-dev-08d183e3c1f650b4db1d07d764502116861542fa.tar.gz |
Merge tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux
Pull powerpc updates from Michael Ellerman:
- disable the 32-bit vdso when building LE, so we can build with a
64-bit only toolchain.
- EEH fixes from Gavin & Richard.
- enable the sys_kcmp syscall from Laurent.
- sysfs control for fastsleep workaround from Shreyas.
- expose OPAL events as an irq chip by Alistair.
- MSI ops moved to pci_controller_ops by Daniel.
- fix for kernel to userspace backtraces for perf from Anton.
- merge pseries and pseries_le defconfigs from Cyril.
- CXL in-kernel API from Mikey.
- OPAL prd driver from Jeremy.
- fix for DSCR handling & tests from Anshuman.
- Powernv flash mtd driver from Cyril.
- dynamic DMA Window support on powernv from Alexey.
- LLVM clang fixes & workarounds from Anton.
- reworked version of the patch to abort syscalls when transactional.
- fix the swap encoding to support 4TB, from Aneesh.
- various fixes as usual.
- Freescale updates from Scott: Highlights include more 8xx
optimizations, an e6500 hugetlb optimization, QMan device tree nodes,
t1024/t1023 support, and various fixes and cleanup.
* tag 'powerpc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (180 commits)
cxl: Fix typo in debug print
cxl: Add CXL_KERNEL_API config option
powerpc/powernv: Fix wrong IOMMU table in pnv_ioda_setup_bus_dma()
powerpc/mm: Change the swap encoding in pte.
powerpc/mm: PTE_RPN_MAX is not used, remove the same
powerpc/tm: Abort syscalls in active transactions
powerpc/iommu/ioda2: Enable compile with IOV=on and IOMMU_API=off
powerpc/include: Add opal-prd to installed uapi headers
powerpc/powernv: fix construction of opal PRD messages
powerpc/powernv: Increase opal-irqchip initcall priority
powerpc: Make doorbell check preemption safe
powerpc/powernv: pnv_init_idle_states() should only run on powernv
macintosh/nvram: Remove as unused
powerpc: Don't use gcc specific options on clang
powerpc: Don't use -mno-strict-align on clang
powerpc: Only use -mtraceback=no, -mno-string and -msoft-float if toolchain supports it
powerpc: Only use -mabi=altivec if toolchain supports it
powerpc: Fix duplicate const clang warning in user access code
vfio: powerpc/spapr: Support Dynamic DMA windows
vfio: powerpc/spapr: Register memory and define IOMMU v2
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/char/ipmi/ipmi_powernv.c | 39 | ||||
-rw-r--r-- | drivers/macintosh/nvram.c | 130 | ||||
-rw-r--r-- | drivers/misc/cxl/Kconfig | 5 | ||||
-rw-r--r-- | drivers/misc/cxl/Makefile | 4 | ||||
-rw-r--r-- | drivers/misc/cxl/api.c | 331 | ||||
-rw-r--r-- | drivers/misc/cxl/base.c | 2 | ||||
-rw-r--r-- | drivers/misc/cxl/context.c | 36 | ||||
-rw-r--r-- | drivers/misc/cxl/cxl.h | 38 | ||||
-rw-r--r-- | drivers/misc/cxl/fault.c | 34 | ||||
-rw-r--r-- | drivers/misc/cxl/file.c | 48 | ||||
-rw-r--r-- | drivers/misc/cxl/irq.c | 37 | ||||
-rw-r--r-- | drivers/misc/cxl/main.c | 2 | ||||
-rw-r--r-- | drivers/misc/cxl/native.c | 83 | ||||
-rw-r--r-- | drivers/misc/cxl/pci.c | 129 | ||||
-rw-r--r-- | drivers/misc/cxl/sysfs.c | 35 | ||||
-rw-r--r-- | drivers/misc/cxl/vphb.c | 270 | ||||
-rw-r--r-- | drivers/mtd/devices/Kconfig | 8 | ||||
-rw-r--r-- | drivers/mtd/devices/Makefile | 1 | ||||
-rw-r--r-- | drivers/mtd/devices/powernv_flash.c | 285 | ||||
-rw-r--r-- | drivers/tty/hvc/hvc_opal.c | 33 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 1101 | ||||
-rw-r--r-- | drivers/vfio/vfio_spapr_eeh.c | 10 |
22 files changed, 2270 insertions, 391 deletions
diff --git a/drivers/char/ipmi/ipmi_powernv.c b/drivers/char/ipmi/ipmi_powernv.c index 8753b0f..9b409c0 100644 --- a/drivers/char/ipmi/ipmi_powernv.c +++ b/drivers/char/ipmi/ipmi_powernv.c @@ -15,6 +15,8 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/interrupt.h> #include <asm/opal.h> @@ -23,8 +25,7 @@ struct ipmi_smi_powernv { u64 interface_id; struct ipmi_device_id ipmi_id; ipmi_smi_t intf; - u64 event; - struct notifier_block event_nb; + unsigned int irq; /** * We assume that there can only be one outstanding request, so @@ -197,15 +198,12 @@ static struct ipmi_smi_handlers ipmi_powernv_smi_handlers = { .poll = ipmi_powernv_poll, }; -static int ipmi_opal_event(struct notifier_block *nb, - unsigned long events, void *change) +static irqreturn_t ipmi_opal_event(int irq, void *data) { - struct ipmi_smi_powernv *smi = container_of(nb, - struct ipmi_smi_powernv, event_nb); + struct ipmi_smi_powernv *smi = data; - if (events & smi->event) - ipmi_powernv_recv(smi); - return 0; + ipmi_powernv_recv(smi); + return IRQ_HANDLED; } static int ipmi_powernv_probe(struct platform_device *pdev) @@ -240,13 +238,16 @@ static int ipmi_powernv_probe(struct platform_device *pdev) goto err_free; } - ipmi->event = 1ull << prop; - ipmi->event_nb.notifier_call = ipmi_opal_event; + ipmi->irq = irq_of_parse_and_map(dev->of_node, 0); + if (!ipmi->irq) { + dev_info(dev, "Unable to map irq from device tree\n"); + ipmi->irq = opal_event_request(prop); + } - rc = opal_notifier_register(&ipmi->event_nb); - if (rc) { - dev_warn(dev, "OPAL notifier registration failed (%d)\n", rc); - goto err_free; + if (request_irq(ipmi->irq, ipmi_opal_event, IRQ_TYPE_LEVEL_HIGH, + "opal-ipmi", ipmi)) { + dev_warn(dev, "Unable to request irq\n"); + goto err_dispose; } ipmi->opal_msg = devm_kmalloc(dev, @@ -271,7 +272,9 @@ static int ipmi_powernv_probe(struct platform_device *pdev) err_free_msg: devm_kfree(dev, ipmi->opal_msg); err_unregister: - opal_notifier_unregister(&ipmi->event_nb); + free_irq(ipmi->irq, ipmi); +err_dispose: + irq_dispose_mapping(ipmi->irq); err_free: devm_kfree(dev, ipmi); return rc; @@ -282,7 +285,9 @@ static int ipmi_powernv_remove(struct platform_device *pdev) struct ipmi_smi_powernv *smi = dev_get_drvdata(&pdev->dev); ipmi_unregister_smi(smi->intf); - opal_notifier_unregister(&smi->event_nb); + free_irq(smi->irq, smi); + irq_dispose_mapping(smi->irq); + return 0; } diff --git a/drivers/macintosh/nvram.c b/drivers/macintosh/nvram.c deleted file mode 100644 index f0e03e7..0000000 --- a/drivers/macintosh/nvram.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * /dev/nvram driver for Power Macintosh. - */ - -#define NVRAM_VERSION "1.0" - -#include <linux/module.h> - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/miscdevice.h> -#include <linux/fcntl.h> -#include <linux/nvram.h> -#include <linux/init.h> -#include <asm/uaccess.h> -#include <asm/nvram.h> - -#define NVRAM_SIZE 8192 - -static loff_t nvram_llseek(struct file *file, loff_t offset, int origin) -{ - switch (origin) { - case 0: - break; - case 1: - offset += file->f_pos; - break; - case 2: - offset += NVRAM_SIZE; - break; - default: - offset = -1; - } - if (offset < 0) - return -EINVAL; - - file->f_pos = offset; - return file->f_pos; -} - -static ssize_t read_nvram(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned int i; - char __user *p = buf; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - if (*ppos >= NVRAM_SIZE) - return 0; - for (i = *ppos; count > 0 && i < NVRAM_SIZE; ++i, ++p, --count) - if (__put_user(nvram_read_byte(i), p)) - return -EFAULT; - *ppos = i; - return p - buf; -} - -static ssize_t write_nvram(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) -{ - unsigned int i; - const char __user *p = buf; - char c; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - if (*ppos >= NVRAM_SIZE) - return 0; - for (i = *ppos; count > 0 && i < NVRAM_SIZE; ++i, ++p, --count) { - if (__get_user(c, p)) - return -EFAULT; - nvram_write_byte(c, i); - } - *ppos = i; - return p - buf; -} - -static long nvram_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - switch(cmd) { - case PMAC_NVRAM_GET_OFFSET: - { - int part, offset; - if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0) - return -EFAULT; - if (part < pmac_nvram_OF || part > pmac_nvram_NR) - return -EINVAL; - offset = pmac_get_partition(part); - if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0) - return -EFAULT; - break; - } - - default: - return -EINVAL; - } - - return 0; -} - -const struct file_operations nvram_fops = { - .owner = THIS_MODULE, - .llseek = nvram_llseek, - .read = read_nvram, - .write = write_nvram, - .unlocked_ioctl = nvram_ioctl, -}; - -static struct miscdevice nvram_dev = { - NVRAM_MINOR, - "nvram", - &nvram_fops -}; - -int __init nvram_init(void) -{ - printk(KERN_INFO "Macintosh non-volatile memory driver v%s\n", - NVRAM_VERSION); - return misc_register(&nvram_dev); -} - -void __exit nvram_cleanup(void) -{ - misc_deregister( &nvram_dev ); -} - -module_init(nvram_init); -module_exit(nvram_cleanup); -MODULE_LICENSE("GPL"); diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig index a990b39..b6db9eb 100644 --- a/drivers/misc/cxl/Kconfig +++ b/drivers/misc/cxl/Kconfig @@ -7,10 +7,15 @@ config CXL_BASE default n select PPC_COPRO_BASE +config CXL_KERNEL_API + bool + default n + config CXL tristate "Support for IBM Coherent Accelerators (CXL)" depends on PPC_POWERNV && PCI_MSI select CXL_BASE + select CXL_KERNEL_API default m help Select this option to enable driver support for IBM Coherent diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index edb494d..14e3f82 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,4 +1,6 @@ -cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o +cxl-y += main.o file.o irq.o fault.o native.o +cxl-y += context.o sysfs.o debugfs.o pci.o trace.o +cxl-y += vphb.o api.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c new file mode 100644 index 0000000..0c77240 --- /dev/null +++ b/drivers/misc/cxl/api.c @@ -0,0 +1,331 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci.h> +#include <linux/slab.h> +#include <linux/anon_inodes.h> +#include <linux/file.h> +#include <misc/cxl.h> + +#include "cxl.h" + +struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) +{ + struct cxl_afu *afu; + struct cxl_context *ctx; + int rc; + + afu = cxl_pci_to_afu(dev); + + ctx = cxl_context_alloc(); + if (IS_ERR(ctx)) + return ctx; + + /* Make it a slave context. We can promote it later? */ + rc = cxl_context_init(ctx, afu, false, NULL); + if (rc) { + kfree(ctx); + return ERR_PTR(-ENOMEM); + } + cxl_assign_psn_space(ctx); + + return ctx; +} +EXPORT_SYMBOL_GPL(cxl_dev_context_init); + +struct cxl_context *cxl_get_context(struct pci_dev *dev) +{ + return dev->dev.archdata.cxl_ctx; +} +EXPORT_SYMBOL_GPL(cxl_get_context); + +struct device *cxl_get_phys_dev(struct pci_dev *dev) +{ + struct cxl_afu *afu; + + afu = cxl_pci_to_afu(dev); + + return afu->adapter->dev.parent; +} +EXPORT_SYMBOL_GPL(cxl_get_phys_dev); + +int cxl_release_context(struct cxl_context *ctx) +{ + if (ctx->status != CLOSED) + return -EBUSY; + + cxl_context_free(ctx); + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_release_context); + +int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num) +{ + if (num == 0) + num = ctx->afu->pp_irqs; + return afu_allocate_irqs(ctx, num); +} +EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs); + +void cxl_free_afu_irqs(struct cxl_context *ctx) +{ + cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); +} +EXPORT_SYMBOL_GPL(cxl_free_afu_irqs); + +static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num) +{ + __u16 range; + int r; + + WARN_ON(num == 0); + + for (r = 0; r < CXL_IRQ_RANGES; r++) { + range = ctx->irqs.range[r]; + if (num < range) { + return ctx->irqs.offset[r] + num; + } + num -= range; + } + return 0; +} + +int cxl_map_afu_irq(struct cxl_context *ctx, int num, + irq_handler_t handler, void *cookie, char *name) +{ + irq_hw_number_t hwirq; + + /* + * Find interrupt we are to register. + */ + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return -ENOENT; + + return cxl_map_irq(ctx->afu->adapter, hwirq, handler, cookie, name); +} +EXPORT_SYMBOL_GPL(cxl_map_afu_irq); + +void cxl_unmap_afu_irq(struct cxl_context *ctx, int num, void *cookie) +{ + irq_hw_number_t hwirq; + unsigned int virq; + + hwirq = cxl_find_afu_irq(ctx, num); + if (!hwirq) + return; + + virq = irq_find_mapping(NULL, hwirq); + if (virq) + cxl_unmap_irq(virq, cookie); +} +EXPORT_SYMBOL_GPL(cxl_unmap_afu_irq); + +/* + * Start a context + * Code here similar to afu_ioctl_start_work(). + */ +int cxl_start_context(struct cxl_context *ctx, u64 wed, + struct task_struct *task) +{ + int rc = 0; + bool kernel = true; + + pr_devel("%s: pe: %i\n", __func__, ctx->pe); + + mutex_lock(&ctx->status_mutex); + if (ctx->status == STARTED) + goto out; /* already started */ + + if (task) { + ctx->pid = get_task_pid(task, PIDTYPE_PID); + get_pid(ctx->pid); + kernel = false; + } + + cxl_ctx_get(); + + if ((rc = cxl_attach_process(ctx, kernel, wed , 0))) { + put_pid(ctx->pid); + cxl_ctx_put(); + goto out; + } + + ctx->status = STARTED; + get_device(&ctx->afu->dev); +out: + mutex_unlock(&ctx->status_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(cxl_start_context); + +int cxl_process_element(struct cxl_context *ctx) +{ + return ctx->pe; +} +EXPORT_SYMBOL_GPL(cxl_process_element); + +/* Stop a context. Returns 0 on success, otherwise -Errno */ +int cxl_stop_context(struct cxl_context *ctx) +{ + int rc; + + rc = __detach_context(ctx); + if (!rc) + put_device(&ctx->afu->dev); + return rc; +} +EXPORT_SYMBOL_GPL(cxl_stop_context); + +void cxl_set_master(struct cxl_context *ctx) +{ + ctx->master = true; + cxl_assign_psn_space(ctx); +} +EXPORT_SYMBOL_GPL(cxl_set_master); + +/* wrappers around afu_* file ops which are EXPORTED */ +int cxl_fd_open(struct inode *inode, struct file *file) +{ + return afu_open(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_open); +int cxl_fd_release(struct inode *inode, struct file *file) +{ + return afu_release(inode, file); +} +EXPORT_SYMBOL_GPL(cxl_fd_release); +long cxl_fd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return afu_ioctl(file, cmd, arg); +} +EXPORT_SYMBOL_GPL(cxl_fd_ioctl); +int cxl_fd_mmap(struct file *file, struct vm_area_struct *vm) +{ + return afu_mmap(file, vm); +} +EXPORT_SYMBOL_GPL(cxl_fd_mmap); +unsigned int cxl_fd_poll(struct file *file, struct poll_table_struct *poll) +{ + return afu_poll(file, poll); +} +EXPORT_SYMBOL_GPL(cxl_fd_poll); +ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count, + loff_t *off) +{ + return afu_read(file, buf, count, off); +} +EXPORT_SYMBOL_GPL(cxl_fd_read); + +#define PATCH_FOPS(NAME) if (!fops->NAME) fops->NAME = afu_fops.NAME + +/* Get a struct file and fd for a context and attach the ops */ +struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, + int *fd) +{ + struct file *file; + int rc, flags, fdtmp; + + flags = O_RDWR | O_CLOEXEC; + + /* This code is similar to anon_inode_getfd() */ + rc = get_unused_fd_flags(flags); + if (rc < 0) + return ERR_PTR(rc); + fdtmp = rc; + + /* + * Patch the file ops. Needs to be careful that this is rentrant safe. + */ + if (fops) { + PATCH_FOPS(open); + PATCH_FOPS(poll); + PATCH_FOPS(read); + PATCH_FOPS(release); + PATCH_FOPS(unlocked_ioctl); + PATCH_FOPS(compat_ioctl); + PATCH_FOPS(mmap); + } else /* use default ops */ + fops = (struct file_operations *)&afu_fops; + + file = anon_inode_getfile("cxl", fops, ctx, flags); + if (IS_ERR(file)) + put_unused_fd(fdtmp); + *fd = fdtmp; + return file; +} +EXPORT_SYMBOL_GPL(cxl_get_fd); + +struct cxl_context *cxl_fops_get_context(struct file *file) +{ + return file->private_data; +} +EXPORT_SYMBOL_GPL(cxl_fops_get_context); + +int cxl_start_work(struct cxl_context *ctx, + struct cxl_ioctl_start_work *work) +{ + int rc; + + /* code taken from afu_ioctl_start_work */ + if (!(work->flags & CXL_START_WORK_NUM_IRQS)) + work->num_interrupts = ctx->afu->pp_irqs; + else if ((work->num_interrupts < ctx->afu->pp_irqs) || + (work->num_interrupts > ctx->afu->irqs_max)) { + return -EINVAL; + } + + rc = afu_register_irqs(ctx, work->num_interrupts); + if (rc) + return rc; + + rc = cxl_start_context(ctx, work->work_element_descriptor, current); + if (rc < 0) { + afu_release_irqs(ctx, ctx); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_start_work); + +void __iomem *cxl_psa_map(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + int rc; + + rc = cxl_afu_check_and_enable(afu); + if (rc) + return NULL; + + pr_devel("%s: psn_phys%llx size:%llx\n", + __func__, afu->psn_phys, afu->adapter->ps_size); + return ioremap(ctx->psn_phys, ctx->psn_size); +} +EXPORT_SYMBOL_GPL(cxl_psa_map); + +void cxl_psa_unmap(void __iomem *addr) +{ + iounmap(addr); +} +EXPORT_SYMBOL_GPL(cxl_psa_unmap); + +int cxl_afu_reset(struct cxl_context *ctx) +{ + struct cxl_afu *afu = ctx->afu; + int rc; + + rc = __cxl_afu_reset(afu); + if (rc) + return rc; + + return cxl_afu_check_and_enable(afu); +} +EXPORT_SYMBOL_GPL(cxl_afu_reset); diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c index 0654ad8..a9f0dd3 100644 --- a/drivers/misc/cxl/base.c +++ b/drivers/misc/cxl/base.c @@ -10,7 +10,7 @@ #include <linux/module.h> #include <linux/rcupdate.h> #include <asm/errno.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" /* protected by rcu */ diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index d1b55fe..2a4c80a 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -174,7 +174,7 @@ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) * return until all outstanding interrupts for this context have completed. The * hardware should no longer access *ctx after this has returned. */ -static void __detach_context(struct cxl_context *ctx) +int __detach_context(struct cxl_context *ctx) { enum cxl_context_status status; @@ -183,12 +183,13 @@ static void __detach_context(struct cxl_context *ctx) ctx->status = CLOSED; mutex_unlock(&ctx->status_mutex); if (status != STARTED) - return; + return -EBUSY; WARN_ON(cxl_detach_process(ctx)); - afu_release_irqs(ctx); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ - wake_up_all(&ctx->wq); + put_pid(ctx->pid); + cxl_ctx_put(); + return 0; } /* @@ -199,7 +200,14 @@ static void __detach_context(struct cxl_context *ctx) */ void cxl_context_detach(struct cxl_context *ctx) { - __detach_context(ctx); + int rc; + + rc = __detach_context(ctx); + if (rc) + return; + + afu_release_irqs(ctx, ctx); + wake_up_all(&ctx->wq); } /* @@ -216,7 +224,7 @@ void cxl_context_detach_all(struct cxl_afu *afu) * Anything done in here needs to be setup before the IDR is * created and torn down after the IDR removed */ - __detach_context(ctx); + cxl_context_detach(ctx); /* * We are force detaching - remove any active PSA mappings so @@ -232,16 +240,20 @@ void cxl_context_detach_all(struct cxl_afu *afu) mutex_unlock(&afu->contexts_lock); } -void cxl_context_free(struct cxl_context *ctx) +static void reclaim_ctx(struct rcu_head *rcu) { - mutex_lock(&ctx->afu->contexts_lock); - idr_remove(&ctx->afu->contexts_idr, ctx->pe); - mutex_unlock(&ctx->afu->contexts_lock); - synchronize_rcu(); + struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); free_page((u64)ctx->sstp); ctx->sstp = NULL; - put_pid(ctx->pid); kfree(ctx); } + +void cxl_context_free(struct cxl_context *ctx) +{ + mutex_lock(&ctx->afu->contexts_lock); + idr_remove(&ctx->afu->contexts_idr, ctx->pe); + mutex_unlock(&ctx->afu->contexts_lock); + call_rcu(&ctx->rcu, reclaim_ctx); +} diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a1cee47..4fd66ca 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -18,10 +18,11 @@ #include <linux/pid.h> #include <linux/io.h> #include <linux/pci.h> +#include <linux/fs.h> #include <asm/cputable.h> #include <asm/mmu.h> #include <asm/reg.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include <uapi/misc/cxl.h> @@ -315,8 +316,6 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_MAX_SLICES 4 #define MAX_AFU_MMIO_REGS 3 -#define CXL_MODE_DEDICATED 0x1 -#define CXL_MODE_DIRECTED 0x2 #define CXL_MODE_TIME_SLICED 0x4 #define CXL_SUPPORTED_MODES (CXL_MODE_DEDICATED | CXL_MODE_DIRECTED) @@ -362,6 +361,10 @@ struct cxl_afu { struct mutex spa_mutex; spinlock_t afu_cntl_lock; + /* AFU error buffer fields and bin attribute for sysfs */ + u64 eb_len, eb_offset; + struct bin_attribute attr_eb; + /* * Only the first part of the SPA is used for the process element * linked list. The only other part that software needs to worry about @@ -375,6 +378,9 @@ struct cxl_afu { int spa_max_procs; unsigned int psl_virq; + /* pointer to the vphb */ + struct pci_controller *phb; + int pp_irqs; int irqs_max; int num_procs; @@ -455,6 +461,8 @@ struct cxl_context { bool pending_irq; bool pending_fault; bool pending_afu_err; + + struct rcu_head rcu; }; struct cxl { @@ -563,6 +571,9 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); +ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count); + struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); @@ -606,7 +617,7 @@ void cxl_release_psl_err_irq(struct cxl *adapter); int cxl_register_serr_irq(struct cxl_afu *afu); void cxl_release_serr_irq(struct cxl_afu *afu); int afu_register_irqs(struct cxl_context *ctx, u32 count); -void afu_release_irqs(struct cxl_context *ctx); +void afu_release_irqs(struct cxl_context *ctx, void *cookie); irqreturn_t cxl_slice_irq_err(int irq, void *data); int cxl_debugfs_init(void); @@ -629,6 +640,10 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, struct address_space *mapping); void cxl_context_free(struct cxl_context *ctx); int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma); +unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, + irq_handler_t handler, void *cookie, const char *name); +void cxl_unmap_irq(unsigned int virq, void *cookie); +int __detach_context(struct cxl_context *ctx); /* This matches the layout of the H_COLLECT_CA_INT_INFO retbuf */ struct cxl_irq_info { @@ -642,6 +657,7 @@ struct cxl_irq_info { u64 padding[3]; /* to match the expected retbuf size for plpar_hcall9 */ }; +void cxl_assign_psn_space(struct cxl_context *ctx); int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int cxl_detach_process(struct cxl_context *ctx); @@ -653,11 +669,23 @@ int cxl_check_error(struct cxl_afu *afu); int cxl_afu_slbia(struct cxl_afu *afu); int cxl_tlb_slb_invalidate(struct cxl *adapter); int cxl_afu_disable(struct cxl_afu *afu); -int cxl_afu_reset(struct cxl_afu *afu); +int __cxl_afu_reset(struct cxl_afu *afu); +int cxl_afu_check_and_enable(struct cxl_afu *afu); int cxl_psl_purge(struct cxl_afu *afu); void cxl_stop_trace(struct cxl *cxl); +int cxl_pci_vphb_add(struct cxl_afu *afu); +void cxl_pci_vphb_remove(struct cxl_afu *afu); extern struct pci_driver cxl_pci_driver; +int afu_allocate_irqs(struct cxl_context *ctx, u32 count); + +int afu_open(struct inode *inode, struct file *file); +int afu_release(struct inode *inode, struct file *file); +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +int afu_mmap(struct file *file, struct vm_area_struct *vm); +unsigned int afu_poll(struct file *file, struct poll_table_struct *poll); +ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off); +extern const struct file_operations afu_fops; #endif diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 5286b8b..25a5418 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -172,8 +172,8 @@ void cxl_handle_fault(struct work_struct *fault_work) container_of(fault_work, struct cxl_context, fault_work); u64 dsisr = ctx->dsisr; u64 dar = ctx->dar; - struct task_struct *task; - struct mm_struct *mm; + struct task_struct *task = NULL; + struct mm_struct *mm = NULL; if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || @@ -194,17 +194,19 @@ void cxl_handle_fault(struct work_struct *fault_work) pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); - if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { - pr_devel("cxl_handle_fault unable to get task %i\n", - pid_nr(ctx->pid)); - cxl_ack_ae(ctx); - return; - } - if (!(mm = get_task_mm(task))) { - pr_devel("cxl_handle_fault unable to get mm %i\n", - pid_nr(ctx->pid)); - cxl_ack_ae(ctx); - goto out; + if (!ctx->kernel) { + if (!(task = get_pid_task(ctx->pid, PIDTYPE_PID))) { + pr_devel("cxl_handle_fault unable to get task %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + return; + } + if (!(mm = get_task_mm(task))) { + pr_devel("cxl_handle_fault unable to get mm %i\n", + pid_nr(ctx->pid)); + cxl_ack_ae(ctx); + goto out; + } } if (dsisr & CXL_PSL_DSISR_An_DS) @@ -214,9 +216,11 @@ void cxl_handle_fault(struct work_struct *fault_work) else WARN(1, "cxl_handle_fault has nothing to handle\n"); - mmput(mm); + if (mm) + mmput(mm); out: - put_task_struct(task); + if (task) + put_task_struct(task); } static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 2364bca..e3f4b69 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -96,7 +96,8 @@ err_put_adapter: put_device(&adapter->dev); return rc; } -static int afu_open(struct inode *inode, struct file *file) + +int afu_open(struct inode *inode, struct file *file) { return __afu_open(inode, file, false); } @@ -106,7 +107,7 @@ static int afu_master_open(struct inode *inode, struct file *file) return __afu_open(inode, file, true); } -static int afu_release(struct inode *inode, struct file *file) +int afu_release(struct inode *inode, struct file *file) { struct cxl_context *ctx = file->private_data; @@ -128,7 +129,6 @@ static int afu_release(struct inode *inode, struct file *file) */ cxl_context_free(ctx); - cxl_ctx_put(); return 0; } @@ -191,7 +191,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, amr))) { - afu_release_irqs(ctx); + afu_release_irqs(ctx, ctx); goto out; } @@ -212,7 +212,26 @@ static long afu_ioctl_process_element(struct cxl_context *ctx, return 0; } -static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long afu_ioctl_get_afu_id(struct cxl_context *ctx, + struct cxl_afu_id __user *upafuid) +{ + struct cxl_afu_id afuid = { 0 }; + + afuid.card_id = ctx->afu->adapter->adapter_num; + afuid.afu_offset = ctx->afu->slice; + afuid.afu_mode = ctx->afu->current_mode; + + /* set the flag bit in case the afu is a slave */ + if (ctx->afu->current_mode == CXL_MODE_DIRECTED && !ctx->master) + afuid.flags |= CXL_AFUID_FLAG_SLAVE; + + if (copy_to_user(upafuid, &afuid, sizeof(afuid))) + return -EFAULT; + + return 0; +} + +long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct cxl_context *ctx = file->private_data; @@ -225,17 +244,20 @@ static long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return afu_ioctl_start_work(ctx, (struct cxl_ioctl_start_work __user *)arg); case CXL_IOCTL_GET_PROCESS_ELEMENT: return afu_ioctl_process_element(ctx, (__u32 __user *)arg); + case CXL_IOCTL_GET_AFU_ID: + return afu_ioctl_get_afu_id(ctx, (struct cxl_afu_id __user *) + arg); } return -EINVAL; } -static long afu_compat_ioctl(struct file *file, unsigned int cmd, +long afu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return afu_ioctl(file, cmd, arg); } -static int afu_mmap(struct file *file, struct vm_area_struct *vm) +int afu_mmap(struct file *file, struct vm_area_struct *vm) { struct cxl_context *ctx = file->private_data; @@ -246,7 +268,7 @@ static int afu_mmap(struct file *file, struct vm_area_struct *vm) return cxl_context_iomap(ctx, vm); } -static unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) +unsigned int afu_poll(struct file *file, struct poll_table_struct *poll) { struct cxl_context *ctx = file->private_data; int mask = 0; @@ -278,7 +300,7 @@ static inline int ctx_event_pending(struct cxl_context *ctx) ctx->pending_afu_err || (ctx->status == CLOSED)); } -static ssize_t afu_read(struct file *file, char __user *buf, size_t count, +ssize_t afu_read(struct file *file, char __user *buf, size_t count, loff_t *off) { struct cxl_context *ctx = file->private_data; @@ -359,7 +381,11 @@ out: return rc; } -static const struct file_operations afu_fops = { +/* + * Note: if this is updated, we need to update api.c to patch the new ones in + * too + */ +const struct file_operations afu_fops = { .owner = THIS_MODULE, .open = afu_open, .poll = afu_poll, @@ -370,7 +396,7 @@ static const struct file_operations afu_fops = { .mmap = afu_mmap, }; -static const struct file_operations afu_master_fops = { +const struct file_operations afu_master_fops = { .owner = THIS_MODULE, .open = afu_master_open, .poll = afu_poll, diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index c8929c5..680cd26 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -14,7 +14,7 @@ #include <linux/slab.h> #include <linux/pid.h> #include <asm/cputable.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" @@ -416,9 +416,8 @@ void afu_irq_name_free(struct cxl_context *ctx) } } -int afu_register_irqs(struct cxl_context *ctx, u32 count) +int afu_allocate_irqs(struct cxl_context *ctx, u32 count) { - irq_hw_number_t hwirq; int rc, r, i, j = 1; struct cxl_irq_name *irq_name; @@ -458,6 +457,18 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) j++; } } + return 0; + +out: + afu_irq_name_free(ctx); + return -ENOMEM; +} + +void afu_register_hwirqs(struct cxl_context *ctx) +{ + irq_hw_number_t hwirq; + struct cxl_irq_name *irq_name; + int r,i; /* We've allocated all memory now, so let's do the irq allocations */ irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); @@ -469,15 +480,21 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) irq_name = list_next_entry(irq_name, list); } } +} - return 0; +int afu_register_irqs(struct cxl_context *ctx, u32 count) +{ + int rc; -out: - afu_irq_name_free(ctx); - return -ENOMEM; -} + rc = afu_allocate_irqs(ctx, count); + if (rc) + return rc; + + afu_register_hwirqs(ctx); + return 0; + } -void afu_release_irqs(struct cxl_context *ctx) +void afu_release_irqs(struct cxl_context *ctx, void *cookie) { irq_hw_number_t hwirq; unsigned int virq; @@ -488,7 +505,7 @@ void afu_release_irqs(struct cxl_context *ctx) for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { virq = irq_find_mapping(NULL, hwirq); if (virq) - cxl_unmap_irq(virq, ctx); + cxl_unmap_irq(virq, cookie); } } diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 8ccddce..833348e 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -20,7 +20,7 @@ #include <linux/idr.h> #include <linux/pci.h> #include <asm/cputable.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 29185fc..10567f2 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -15,7 +15,7 @@ #include <linux/mm.h> #include <linux/uaccess.h> #include <asm/synch.h> -#include <misc/cxl.h> +#include <misc/cxl-base.h> #include "cxl.h" #include "trace.h" @@ -73,7 +73,7 @@ int cxl_afu_disable(struct cxl_afu *afu) } /* This will disable as well as reset */ -int cxl_afu_reset(struct cxl_afu *afu) +int __cxl_afu_reset(struct cxl_afu *afu) { pr_devel("AFU reset request\n"); @@ -83,7 +83,7 @@ int cxl_afu_reset(struct cxl_afu *afu) false); } -static int afu_check_and_enable(struct cxl_afu *afu) +int cxl_afu_check_and_enable(struct cxl_afu *afu) { if (afu->enabled) return 0; @@ -379,7 +379,7 @@ static int remove_process_element(struct cxl_context *ctx) } -static void assign_psn_space(struct cxl_context *ctx) +void cxl_assign_psn_space(struct cxl_context *ctx) { if (!ctx->afu->pp_size || ctx->master) { ctx->psn_phys = ctx->afu->psn_phys; @@ -430,34 +430,46 @@ err: #define set_endian(sr) ((sr) &= ~(CXL_PSL_SR_An_LE)) #endif +static u64 calculate_sr(struct cxl_context *ctx) +{ + u64 sr = 0; + + if (ctx->master) + sr |= CXL_PSL_SR_An_MP; + if (mfspr(SPRN_LPCR) & LPCR_TC) + sr |= CXL_PSL_SR_An_TC; + if (ctx->kernel) { + sr |= CXL_PSL_SR_An_R | (mfmsr() & MSR_SF); + sr |= CXL_PSL_SR_An_HV; + } else { + sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; + set_endian(sr); + sr &= ~(CXL_PSL_SR_An_HV); + if (!test_tsk_thread_flag(current, TIF_32BIT)) + sr |= CXL_PSL_SR_An_SF; + } + return sr; +} + static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) { - u64 sr; + u32 pid; int r, result; - assign_psn_space(ctx); + cxl_assign_psn_space(ctx); ctx->elem->ctxtime = 0; /* disable */ ctx->elem->lpid = cpu_to_be32(mfspr(SPRN_LPID)); ctx->elem->haurp = 0; /* disable */ ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1)); - sr = 0; - if (ctx->master) - sr |= CXL_PSL_SR_An_MP; - if (mfspr(SPRN_LPCR) & LPCR_TC) - sr |= CXL_PSL_SR_An_TC; - /* HV=0, PR=1, R=1 for userspace - * For kernel contexts: this would need to change - */ - sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; - set_endian(sr); - sr &= ~(CXL_PSL_SR_An_HV); - if (!test_tsk_thread_flag(current, TIF_32BIT)) - sr |= CXL_PSL_SR_An_SF; - ctx->elem->common.pid = cpu_to_be32(current->pid); + pid = current->pid; + if (ctx->kernel) + pid = 0; ctx->elem->common.tid = 0; - ctx->elem->sr = cpu_to_be64(sr); + ctx->elem->common.pid = cpu_to_be32(pid); + + ctx->elem->sr = cpu_to_be64(calculate_sr(ctx)); ctx->elem->common.csrp = 0; /* disable */ ctx->elem->common.aurp0 = 0; /* disable */ @@ -477,7 +489,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr) ctx->elem->common.wed = cpu_to_be64(wed); /* first guy needs to enable */ - if ((result = afu_check_and_enable(ctx->afu))) + if ((result = cxl_afu_check_and_enable(ctx->afu))) return result; add_process_element(ctx); @@ -495,7 +507,7 @@ static int deactivate_afu_directed(struct cxl_afu *afu) cxl_sysfs_afu_m_remove(afu); cxl_chardev_afu_remove(afu); - cxl_afu_reset(afu); + __cxl_afu_reset(afu); cxl_afu_disable(afu); cxl_psl_purge(afu); @@ -530,20 +542,15 @@ static int activate_dedicated_process(struct cxl_afu *afu) static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) { struct cxl_afu *afu = ctx->afu; - u64 sr; + u64 pid; int rc; - sr = 0; - set_endian(sr); - if (ctx->master) - sr |= CXL_PSL_SR_An_MP; - if (mfspr(SPRN_LPCR) & LPCR_TC) - sr |= CXL_PSL_SR_An_TC; - sr |= CXL_PSL_SR_An_PR | CXL_PSL_SR_An_R; - if (!test_tsk_thread_flag(current, TIF_32BIT)) - sr |= CXL_PSL_SR_An_SF; - cxl_p2n_write(afu, CXL_PSL_PID_TID_An, (u64)current->pid << 32); - cxl_p1n_write(afu, CXL_PSL_SR_An, sr); + pid = (u64)current->pid << 32; + if (ctx->kernel) + pid = 0; + cxl_p2n_write(afu, CXL_PSL_PID_TID_An, pid); + + cxl_p1n_write(afu, CXL_PSL_SR_An, calculate_sr(ctx)); if ((rc = cxl_write_sstp(afu, ctx->sstp0, ctx->sstp1))) return rc; @@ -564,9 +571,9 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr) cxl_p2n_write(afu, CXL_PSL_AMR_An, amr); /* master only context for dedicated */ - assign_psn_space(ctx); + cxl_assign_psn_space(ctx); - if ((rc = cxl_afu_reset(afu))) + if ((rc = __cxl_afu_reset(afu))) return rc; cxl_p2n_write(afu, CXL_PSL_WED_An, wed); @@ -629,7 +636,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr) static inline int detach_process_native_dedicated(struct cxl_context *ctx) { - cxl_afu_reset(ctx->afu); + __cxl_afu_reset(ctx->afu); cxl_afu_disable(ctx->afu); cxl_psl_purge(ctx->afu); return 0; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1ef0164..c68ef58 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -90,6 +90,7 @@ /* This works a little different than the p1/p2 register accesses to make it * easier to pull out individual fields */ #define AFUD_READ(afu, off) in_be64(afu->afu_desc_mmio + off) +#define AFUD_READ_LE(afu, off) in_le64(afu->afu_desc_mmio + off) #define EXTRACT_PPC_BIT(val, bit) (!!(val & PPC_BIT(bit))) #define EXTRACT_PPC_BITS(val, bs, be) ((val & PPC_BITMASK(bs, be)) >> PPC_BITLSHIFT(be)) @@ -204,7 +205,7 @@ static void dump_cxl_config_space(struct pci_dev *dev) dev_info(&dev->dev, "p1 regs: %#llx, len: %#llx\n", p1_base(dev), p1_size(dev)); dev_info(&dev->dev, "p2 regs: %#llx, len: %#llx\n", - p1_base(dev), p2_size(dev)); + p2_base(dev), p2_size(dev)); dev_info(&dev->dev, "BAR 4/5: %#llx, len: %#llx\n", pci_resource_start(dev, 4), pci_resource_len(dev, 4)); @@ -286,7 +287,8 @@ static void dump_cxl_config_space(struct pci_dev *dev) static void dump_afu_descriptor(struct cxl_afu *afu) { - u64 val; + u64 val, afu_cr_num, afu_cr_off, afu_cr_len; + int i; #define show_reg(name, what) \ dev_info(&afu->dev, "afu desc: %30s: %#llx\n", name, what) @@ -296,6 +298,7 @@ static void dump_afu_descriptor(struct cxl_afu *afu) show_reg("num_of_processes", AFUD_NUM_PROCS(val)); show_reg("num_of_afu_CRs", AFUD_NUM_CRS(val)); show_reg("req_prog_mode", val & 0xffffULL); + afu_cr_num = AFUD_NUM_CRS(val); val = AFUD_READ(afu, 0x8); show_reg("Reserved", val); @@ -307,8 +310,10 @@ static void dump_afu_descriptor(struct cxl_afu *afu) val = AFUD_READ_CR(afu); show_reg("Reserved", (val >> (63-7)) & 0xff); show_reg("AFU_CR_len", AFUD_CR_LEN(val)); + afu_cr_len = AFUD_CR_LEN(val) * 256; val = AFUD_READ_CR_OFF(afu); + afu_cr_off = val; show_reg("AFU_CR_offset", val); val = AFUD_READ_PPPSA(afu); @@ -325,6 +330,11 @@ static void dump_afu_descriptor(struct cxl_afu *afu) val = AFUD_READ_EB_OFF(afu); show_reg("AFU_EB_offset", val); + for (i = 0; i < afu_cr_num; i++) { + val = AFUD_READ_LE(afu, afu_cr_off + i * afu_cr_len); + show_reg("CR Vendor", val & 0xffff); + show_reg("CR Device", (val >> 16) & 0xffff); + } #undef show_reg } @@ -593,6 +603,22 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) afu->crs_len = AFUD_CR_LEN(val) * 256; afu->crs_offset = AFUD_READ_CR_OFF(afu); + + /* eb_len is in multiple of 4K */ + afu->eb_len = AFUD_EB_LEN(AFUD_READ_EB(afu)) * 4096; + afu->eb_offset = AFUD_READ_EB_OFF(afu); + + /* eb_off is 4K aligned so lower 12 bits are always zero */ + if (EXTRACT_PPC_BITS(afu->eb_offset, 0, 11) != 0) { + dev_warn(&afu->dev, + "Invalid AFU error buffer offset %Lx\n", + afu->eb_offset); + dev_info(&afu->dev, + "Ignoring AFU error buffer in the descriptor\n"); + /* indicate that no afu buffer exists */ + afu->eb_len = 0; + } + return 0; } @@ -631,7 +657,7 @@ static int sanitise_afu_regs(struct cxl_afu *afu) reg = cxl_p2n_read(afu, CXL_AFU_Cntl_An); if ((reg & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) { dev_warn(&afu->dev, "WARNING: AFU was not disabled: %#.16llx\n", reg); - if (cxl_afu_reset(afu)) + if (__cxl_afu_reset(afu)) return -EIO; if (cxl_afu_disable(afu)) return -EIO; @@ -672,6 +698,50 @@ static int sanitise_afu_regs(struct cxl_afu *afu) return 0; } +#define ERR_BUFF_MAX_COPY_SIZE PAGE_SIZE +/* + * afu_eb_read: + * Called from sysfs and reads the afu error info buffer. The h/w only supports + * 4/8 bytes aligned access. So in case the requested offset/count arent 8 byte + * aligned the function uses a bounce buffer which can be max PAGE_SIZE. + */ +ssize_t cxl_afu_read_err_buffer(struct cxl_afu *afu, char *buf, + loff_t off, size_t count) +{ + loff_t aligned_start, aligned_end; + size_t aligned_length; + void *tbuf; + const void __iomem *ebuf = afu->afu_desc_mmio + afu->eb_offset; + + if (count == 0 || off < 0 || (size_t)off >= afu->eb_len) + return 0; + + /* calculate aligned read window */ + count = min((size_t)(afu->eb_len - off), count); + aligned_start = round_down(off, 8); + aligned_end = round_up(off + count, 8); + aligned_length = aligned_end - aligned_start; + + /* max we can copy in one read is PAGE_SIZE */ + if (aligned_length > ERR_BUFF_MAX_COPY_SIZE) { + aligned_length = ERR_BUFF_MAX_COPY_SIZE; + count = ERR_BUFF_MAX_COPY_SIZE - (off & 0x7); + } + + /* use bounce buffer for copy */ + tbuf = (void *)__get_free_page(GFP_TEMPORARY); + if (!tbuf) + return -ENOMEM; + + /* perform aligned read from the mmio region */ + memcpy_fromio(tbuf, ebuf + aligned_start, aligned_length); + memcpy(buf, tbuf + (off & 0x7), count); + + free_page((unsigned long)tbuf); + + return count; +} + static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) { struct cxl_afu *afu; @@ -691,7 +761,7 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) goto err2; /* We need to reset the AFU before we can read the AFU descriptor */ - if ((rc = cxl_afu_reset(afu))) + if ((rc = __cxl_afu_reset(afu))) goto err2; if (cxl_verbose) @@ -731,6 +801,9 @@ static int cxl_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev) adapter->afu[afu->slice] = afu; + if ((rc = cxl_pci_vphb_add(afu))) + dev_info(&afu->dev, "Can't register vPHB\n"); + return 0; err_put2: @@ -783,8 +856,10 @@ int cxl_reset(struct cxl *adapter) dev_info(&dev->dev, "CXL reset\n"); - for (i = 0; i < adapter->slices; i++) + for (i = 0; i < adapter->slices; i++) { + cxl_pci_vphb_remove(adapter->afu[i]); cxl_remove_afu(adapter->afu[i]); + } /* pcie_warm_reset requests a fundamental pci reset which includes a * PERST assert/deassert. PERST triggers a loading of the image @@ -857,13 +932,13 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) u8 image_state; if (!(vsec = find_cxl_vsec(dev))) { - dev_err(&adapter->dev, "ABORTING: CXL VSEC not found!\n"); + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); return -ENODEV; } CXL_READ_VSEC_LENGTH(dev, vsec, &vseclen); if (vseclen < CXL_VSEC_MIN_SIZE) { - pr_err("ABORTING: CXL VSEC too short\n"); + dev_err(&dev->dev, "ABORTING: CXL VSEC too short\n"); return -EINVAL; } @@ -902,24 +977,24 @@ static int cxl_vsec_looks_ok(struct cxl *adapter, struct pci_dev *dev) return -EBUSY; if (adapter->vsec_status & CXL_UNSUPPORTED_FEATURES) { - dev_err(&adapter->dev, "ABORTING: CXL requires unsupported features\n"); + dev_err(&dev->dev, "ABORTING: CXL requires unsupported features\n"); return -EINVAL; } if (!adapter->slices) { /* Once we support dynamic reprogramming we can use the card if * it supports loadable AFUs */ - dev_err(&adapter->dev, "ABORTING: Device has no AFUs\n"); + dev_err(&dev->dev, "ABORTING: Device has no AFUs\n"); return -EINVAL; } if (!adapter->afu_desc_off || !adapter->afu_desc_size) { - dev_err(&adapter->dev, "ABORTING: VSEC shows no AFU descriptors\n"); + dev_err(&dev->dev, "ABORTING: VSEC shows no AFU descriptors\n"); return -EINVAL; } if (adapter->ps_size > p2_size(dev) - adapter->ps_off) { - dev_err(&adapter->dev, "ABORTING: Problem state size larger than " + dev_err(&dev->dev, "ABORTING: Problem state size larger than " "available in BAR2: 0x%llx > 0x%llx\n", adapter->ps_size, p2_size(dev) - adapter->ps_off); return -EINVAL; @@ -968,6 +1043,15 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if (!(adapter = cxl_alloc_adapter(dev))) return ERR_PTR(-ENOMEM); + if ((rc = cxl_read_vsec(adapter, dev))) + goto err1; + + if ((rc = cxl_vsec_looks_ok(adapter, dev))) + goto err1; + + if ((rc = setup_cxl_bars(dev))) + goto err1; + if ((rc = switch_card_to_cxl(dev))) goto err1; @@ -977,12 +1061,6 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if ((rc = dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))) goto err2; - if ((rc = cxl_read_vsec(adapter, dev))) - goto err2; - - if ((rc = cxl_vsec_looks_ok(adapter, dev))) - goto err2; - if ((rc = cxl_update_image_control(adapter))) goto err2; @@ -1067,9 +1145,6 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) if (cxl_verbose) dump_cxl_config_space(dev); - if ((rc = setup_cxl_bars(dev))) - return rc; - if ((rc = pci_enable_device(dev))) { dev_err(&dev->dev, "pci_enable_device failed: %i\n", rc); return rc; @@ -1078,6 +1153,7 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) adapter = cxl_init_adapter(dev); if (IS_ERR(adapter)) { dev_err(&dev->dev, "cxl_init_adapter failed: %li\n", PTR_ERR(adapter)); + pci_disable_device(dev); return PTR_ERR(adapter); } @@ -1092,16 +1168,18 @@ static int cxl_probe(struct pci_dev *dev, const struct pci_device_id *id) static void cxl_remove(struct pci_dev *dev) { struct cxl *adapter = pci_get_drvdata(dev); - int afu; - - dev_warn(&dev->dev, "pci remove\n"); + struct cxl_afu *afu; + int i; /* * Lock to prevent someone grabbing a ref through the adapter list as * we are removing it */ - for (afu = 0; afu < adapter->slices; afu++) - cxl_remove_afu(adapter->afu[afu]); + for (i = 0; i < adapter->slices; i++) { + afu = adapter->afu[i]; + cxl_pci_vphb_remove(afu); + cxl_remove_afu(afu); + } cxl_remove_adapter(adapter); } @@ -1110,4 +1188,5 @@ struct pci_driver cxl_pci_driver = { .id_table = cxl_pci_tbl, .probe = cxl_probe, .remove = cxl_remove, + .shutdown = cxl_remove, }; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index d0c38c7..31f38bc 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -185,7 +185,7 @@ static ssize_t reset_store_afu(struct device *device, goto err; } - if ((rc = cxl_afu_reset(afu))) + if ((rc = __cxl_afu_reset(afu))) goto err; rc = count; @@ -356,6 +356,16 @@ static ssize_t api_version_compatible_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "%i\n", CXL_API_VERSION_COMPATIBLE); } +static ssize_t afu_eb_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct cxl_afu *afu = to_cxl_afu(container_of(kobj, + struct device, kobj)); + + return cxl_afu_read_err_buffer(afu, buf, off, count); +} + static struct device_attribute afu_attrs[] = { __ATTR_RO(mmio_size), __ATTR_RO(irqs_min), @@ -534,6 +544,10 @@ void cxl_sysfs_afu_remove(struct cxl_afu *afu) struct afu_config_record *cr, *tmp; int i; + /* remove the err buffer bin attribute */ + if (afu->eb_len) + device_remove_bin_file(&afu->dev, &afu->attr_eb); + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) device_remove_file(&afu->dev, &afu_attrs[i]); @@ -555,6 +569,22 @@ int cxl_sysfs_afu_add(struct cxl_afu *afu) goto err; } + /* conditionally create the add the binary file for error info buffer */ + if (afu->eb_len) { + afu->attr_eb.attr.name = "afu_err_buff"; + afu->attr_eb.attr.mode = S_IRUGO; + afu->attr_eb.size = afu->eb_len; + afu->attr_eb.read = afu_eb_read; + + rc = device_create_bin_file(&afu->dev, &afu->attr_eb); + if (rc) { + dev_err(&afu->dev, + "Unable to create eb attr for the afu. Err(%d)\n", + rc); + goto err; + } + } + for (i = 0; i < afu->crs_num; i++) { cr = cxl_sysfs_afu_new_cr(afu, i); if (IS_ERR(cr)) { @@ -570,6 +600,9 @@ err1: cxl_sysfs_afu_remove(afu); return rc; err: + /* reset the eb_len as we havent created the bin attr */ + afu->eb_len = 0; + for (i--; i >= 0; i--) device_remove_file(&afu->dev, &afu_attrs[i]); return rc; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c new file mode 100644 index 0000000..b1d1983a --- /dev/null +++ b/drivers/misc/cxl/vphb.c @@ -0,0 +1,270 @@ +/* + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/pci.h> +#include <misc/cxl.h> +#include "cxl.h" + +static int cxl_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) +{ + if (dma_mask < DMA_BIT_MASK(64)) { + pr_info("%s only 64bit DMA supported on CXL", __func__); + return -EIO; + } + + *(pdev->dev.dma_mask) = dma_mask; + return 0; +} + +static int cxl_pci_probe_mode(struct pci_bus *bus) +{ + return PCI_PROBE_NORMAL; +} + +static int cxl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + return -ENODEV; +} + +static void cxl_teardown_msi_irqs(struct pci_dev *pdev) +{ + /* + * MSI should never be set but need still need to provide this call + * back. + */ +} + +static bool cxl_pci_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + struct cxl_context *ctx; + + phb = pci_bus_to_host(dev->bus); + afu = (struct cxl_afu *)phb->private_data; + set_dma_ops(&dev->dev, &dma_direct_ops); + set_dma_offset(&dev->dev, PAGE_OFFSET); + + /* + * Allocate a context to do cxl things too. If we eventually do real + * DMA ops, we'll need a default context to attach them to + */ + ctx = cxl_dev_context_init(dev); + if (!ctx) + return false; + dev->dev.archdata.cxl_ctx = ctx; + + return (cxl_afu_check_and_enable(afu) == 0); +} + +static void cxl_pci_disable_device(struct pci_dev *dev) +{ + struct cxl_context *ctx = cxl_get_context(dev); + + if (ctx) { + if (ctx->status == STARTED) { + dev_err(&dev->dev, "Default context started\n"); + return; + } + dev->dev.archdata.cxl_ctx = NULL; + cxl_release_context(ctx); + } +} + +static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus, + unsigned long type) +{ + return 1; +} + +static void cxl_pci_reset_secondary_bus(struct pci_dev *dev) +{ + /* Should we do an AFU reset here ? */ +} + +static int cxl_pcie_cfg_record(u8 bus, u8 devfn) +{ + return (bus << 8) + devfn; +} + +static unsigned long cxl_pcie_cfg_addr(struct pci_controller* phb, + u8 bus, u8 devfn, int offset) +{ + int record = cxl_pcie_cfg_record(bus, devfn); + + return (unsigned long)phb->cfg_addr + ((unsigned long)phb->cfg_data * record) + offset; +} + + +static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, + int offset, int len, + volatile void __iomem **ioaddr, + u32 *mask, int *shift) +{ + struct pci_controller *phb; + struct cxl_afu *afu; + unsigned long addr; + + phb = pci_bus_to_host(bus); + afu = (struct cxl_afu *)phb->private_data; + if (phb == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= (unsigned long)phb->cfg_data) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = cxl_pcie_cfg_addr(phb, bus->number, devfn, offset); + + *ioaddr = (void *)(addr & ~0x3ULL); + *shift = ((addr & 0x3) * 8); + switch (len) { + case 1: + *mask = 0xff; + break; + case 2: + *mask = 0xffff; + break; + default: + *mask = 0xffffffff; + break; + } + return 0; +} + +static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + volatile void __iomem *ioaddr; + int shift, rc; + u32 mask; + + rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, + &mask, &shift); + if (rc) + return rc; + + /* Can only read 32 bits */ + *val = (in_le32(ioaddr) >> shift) & mask; + return PCIBIOS_SUCCESSFUL; +} + +static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + volatile void __iomem *ioaddr; + u32 v, mask; + int shift, rc; + + rc = cxl_pcie_config_info(bus, devfn, offset, len, &ioaddr, + &mask, &shift); + if (rc) + return rc; + + /* Can only write 32 bits so do read-modify-write */ + mask <<= shift; + val <<= shift; + + v = (in_le32(ioaddr) & ~mask) || (val & mask); + + out_le32(ioaddr, v); + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops cxl_pcie_pci_ops = +{ + .read = cxl_pcie_read_config, + .write = cxl_pcie_write_config, +}; + + +static struct pci_controller_ops cxl_pci_controller_ops = +{ + .probe_mode = cxl_pci_probe_mode, + .enable_device_hook = cxl_pci_enable_device_hook, + .disable_device = cxl_pci_disable_device, + .release_device = cxl_pci_disable_device, + .window_alignment = cxl_pci_window_alignment, + .reset_secondary_bus = cxl_pci_reset_secondary_bus, + .setup_msi_irqs = cxl_setup_msi_irqs, + .teardown_msi_irqs = cxl_teardown_msi_irqs, + .dma_set_mask = cxl_dma_set_mask, +}; + +int cxl_pci_vphb_add(struct cxl_afu *afu) +{ + struct pci_dev *phys_dev; + struct pci_controller *phb, *phys_phb; + + phys_dev = to_pci_dev(afu->adapter->dev.parent); + phys_phb = pci_bus_to_host(phys_dev->bus); + + /* Alloc and setup PHB data structure */ + phb = pcibios_alloc_controller(phys_phb->dn); + + if (!phb) + return -ENODEV; + + /* Setup parent in sysfs */ + phb->parent = &phys_dev->dev; + + /* Setup the PHB using arch provided callback */ + phb->ops = &cxl_pcie_pci_ops; + phb->cfg_addr = afu->afu_desc_mmio + afu->crs_offset; + phb->cfg_data = (void *)(u64)afu->crs_len; + phb->private_data = afu; + phb->controller_ops = cxl_pci_controller_ops; + + /* Scan the bus */ + pcibios_scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; + + /* Claim resources. This might need some rework as well depending + * whether we are doing probe-only or not, like assigning unassigned + * resources etc... + */ + pcibios_claim_one_bus(phb->bus); + + /* Add probed PCI devices to the device model */ + pci_bus_add_devices(phb->bus); + + afu->phb = phb; + + return 0; +} + + +void cxl_pci_vphb_remove(struct cxl_afu *afu) +{ + struct pci_controller *phb; + + /* If there is no configuration record we won't have one of these */ + if (!afu || !afu->phb) + return; + + phb = afu->phb; + + pci_remove_root_bus(phb->bus); +} + +struct cxl_afu *cxl_pci_to_afu(struct pci_dev *dev) +{ + struct pci_controller *phb; + + phb = pci_bus_to_host(dev->bus); + + return (struct cxl_afu *)phb->private_data; +} +EXPORT_SYMBOL_GPL(cxl_pci_to_afu); + +unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev) +{ + return cxl_pcie_cfg_record(dev->bus->number, dev->devfn); +} +EXPORT_SYMBOL_GPL(cxl_pci_to_cfg_record); diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index c49d0b1..f73c416 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -195,6 +195,14 @@ config MTD_BLOCK2MTD Testing MTD users (eg JFFS2) on large media and media that might be removed during a write (using the floppy drive). +config MTD_POWERNV_FLASH + tristate "powernv flash MTD driver" + depends on PPC_POWERNV + help + This provides an MTD device to access flash on powernv OPAL + platforms from Linux. This device abstracts away the + firmware interface for flash access. + comment "Disk-On-Chip Device Drivers" config MTD_DOCG3 diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index f0b0e61..7912d3a 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_MTD_SPEAR_SMI) += spear_smi.o obj-$(CONFIG_MTD_SST25L) += sst25l.o obj-$(CONFIG_MTD_BCM47XXSFLASH) += bcm47xxsflash.o obj-$(CONFIG_MTD_ST_SPI_FSM) += st_spi_fsm.o +obj-$(CONFIG_MTD_POWERNV_FLASH) += powernv_flash.o CFLAGS_docg3.o += -I$(src) diff --git a/drivers/mtd/devices/powernv_flash.c b/drivers/mtd/devices/powernv_flash.c new file mode 100644 index 0000000..d5b870b --- /dev/null +++ b/drivers/mtd/devices/powernv_flash.c @@ -0,0 +1,285 @@ +/* + * OPAL PNOR flash MTD abstraction + * + * Copyright IBM 2015 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/platform_device.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/partitions.h> + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <asm/opal.h> + + +/* + * This driver creates the a Linux MTD abstraction for platform PNOR flash + * backed by OPAL calls + */ + +struct powernv_flash { + struct mtd_info mtd; + u32 id; +}; + +enum flash_op { + FLASH_OP_READ, + FLASH_OP_WRITE, + FLASH_OP_ERASE, +}; + +static int powernv_flash_async_op(struct mtd_info *mtd, enum flash_op op, + loff_t offset, size_t len, size_t *retlen, u_char *buf) +{ + struct powernv_flash *info = (struct powernv_flash *)mtd->priv; + struct device *dev = &mtd->dev; + int token; + struct opal_msg msg; + int rc; + + dev_dbg(dev, "%s(op=%d, offset=0x%llx, len=%zu)\n", + __func__, op, offset, len); + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + dev_err(dev, "Failed to get an async token\n"); + + return token; + } + + switch (op) { + case FLASH_OP_READ: + rc = opal_flash_read(info->id, offset, __pa(buf), len, token); + break; + case FLASH_OP_WRITE: + rc = opal_flash_write(info->id, offset, __pa(buf), len, token); + break; + case FLASH_OP_ERASE: + rc = opal_flash_erase(info->id, offset, len, token); + break; + default: + BUG_ON(1); + } + + if (rc != OPAL_ASYNC_COMPLETION) { + dev_err(dev, "opal_flash_async_op(op=%d) failed (rc %d)\n", + op, rc); + opal_async_release_token(token); + return -EIO; + } + + rc = opal_async_wait_response(token, &msg); + opal_async_release_token(token); + if (rc) { + dev_err(dev, "opal async wait failed (rc %d)\n", rc); + return -EIO; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc == OPAL_SUCCESS) { + rc = 0; + if (retlen) + *retlen = len; + } else { + rc = -EIO; + } + + return rc; +} + +/** + * @mtd: the device + * @from: the offset to read from + * @len: the number of bytes to read + * @retlen: the number of bytes actually read + * @buf: the filled in buffer + * + * Returns 0 if read successful, or -ERRNO if an error occurred + */ +static int powernv_flash_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + return powernv_flash_async_op(mtd, FLASH_OP_READ, from, + len, retlen, buf); +} + +/** + * @mtd: the device + * @to: the offset to write to + * @len: the number of bytes to write + * @retlen: the number of bytes actually written + * @buf: the buffer to get bytes from + * + * Returns 0 if write successful, -ERRNO if error occurred + */ +static int powernv_flash_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + return powernv_flash_async_op(mtd, FLASH_OP_WRITE, to, + len, retlen, (u_char *)buf); +} + +/** + * @mtd: the device + * @erase: the erase info + * Returns 0 if erase successful or -ERRNO if an error occurred + */ +static int powernv_flash_erase(struct mtd_info *mtd, struct erase_info *erase) +{ + int rc; + + erase->state = MTD_ERASING; + + /* todo: register our own notifier to do a true async implementation */ + rc = powernv_flash_async_op(mtd, FLASH_OP_ERASE, erase->addr, + erase->len, NULL, NULL); + + if (rc) { + erase->fail_addr = erase->addr; + erase->state = MTD_ERASE_FAILED; + } else { + erase->state = MTD_ERASE_DONE; + } + mtd_erase_callback(erase); + return rc; +} + +/** + * powernv_flash_set_driver_info - Fill the mtd_info structure and docg3 + * structure @pdev: The platform device + * @mtd: The structure to fill + */ +static int powernv_flash_set_driver_info(struct device *dev, + struct mtd_info *mtd) +{ + u64 size; + u32 erase_size; + int rc; + + rc = of_property_read_u32(dev->of_node, "ibm,flash-block-size", + &erase_size); + if (rc) { + dev_err(dev, "couldn't get resource block size information\n"); + return rc; + } + + rc = of_property_read_u64(dev->of_node, "reg", &size); + if (rc) { + dev_err(dev, "couldn't get resource size information\n"); + return rc; + } + + /* + * Going to have to check what details I need to set and how to + * get them + */ + mtd->name = of_get_property(dev->of_node, "name", NULL); + mtd->type = MTD_NORFLASH; + mtd->flags = MTD_WRITEABLE; + mtd->size = size; + mtd->erasesize = erase_size; + mtd->writebufsize = mtd->writesize = 1; + mtd->owner = THIS_MODULE; + mtd->_erase = powernv_flash_erase; + mtd->_read = powernv_flash_read; + mtd->_write = powernv_flash_write; + mtd->dev.parent = dev; + return 0; +} + +/** + * powernv_flash_probe + * @pdev: platform device + * + * Returns 0 on success, -ENOMEM, -ENXIO on error + */ +static int powernv_flash_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct powernv_flash *data; + int ret; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto out; + } + data->mtd.priv = data; + + ret = of_property_read_u32(dev->of_node, "ibm,opal-id", &(data->id)); + if (ret) { + dev_err(dev, "no device property 'ibm,opal-id'\n"); + goto out; + } + + ret = powernv_flash_set_driver_info(dev, &data->mtd); + if (ret) + goto out; + + dev_set_drvdata(dev, data); + + /* + * The current flash that skiboot exposes is one contiguous flash chip + * with an ffs partition at the start, it should prove easier for users + * to deal with partitions or not as they see fit + */ + ret = mtd_device_register(&data->mtd, NULL, 0); + +out: + return ret; +} + +/** + * op_release - Release the driver + * @pdev: the platform device + * + * Returns 0 + */ +static int powernv_flash_release(struct platform_device *pdev) +{ + struct powernv_flash *data = dev_get_drvdata(&(pdev->dev)); + + /* All resources should be freed automatically */ + return mtd_device_unregister(&(data->mtd)); +} + +static const struct of_device_id powernv_flash_match[] = { + { .compatible = "ibm,opal-flash" }, + {} +}; + +static struct platform_driver powernv_flash_driver = { + .driver = { + .name = "powernv_flash", + .of_match_table = powernv_flash_match, + }, + .remove = powernv_flash_release, + .probe = powernv_flash_probe, +}; + +module_platform_driver(powernv_flash_driver); + +MODULE_DEVICE_TABLE(of, powernv_flash_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Cyril Bur <cyril.bur@au1.ibm.com>"); +MODULE_DESCRIPTION("MTD abstraction for OPAL flash"); diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index 543b234..47b54c6 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -29,6 +29,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/export.h> +#include <linux/interrupt.h> #include <asm/hvconsole.h> #include <asm/prom.h> @@ -61,7 +62,6 @@ static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES]; /* For early boot console */ static struct hvc_opal_priv hvc_opal_boot_priv; static u32 hvc_opal_boot_termno; -static bool hvc_opal_event_registered; static const struct hv_ops hvc_opal_raw_ops = { .get_chars = opal_get_chars, @@ -162,28 +162,15 @@ static const struct hv_ops hvc_opal_hvsi_ops = { .tiocmset = hvc_opal_hvsi_tiocmset, }; -static int hvc_opal_console_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - if (events & OPAL_EVENT_CONSOLE_INPUT) - hvc_kick(); - return 0; -} - -static struct notifier_block hvc_opal_console_nb = { - .notifier_call = hvc_opal_console_event, -}; - static int hvc_opal_probe(struct platform_device *dev) { const struct hv_ops *ops; struct hvc_struct *hp; struct hvc_opal_priv *pv; hv_protocol_t proto; - unsigned int termno, boot = 0; + unsigned int termno, irq, boot = 0; const __be32 *reg; - if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) { proto = HV_PROTOCOL_RAW; ops = &hvc_opal_raw_ops; @@ -227,18 +214,18 @@ static int hvc_opal_probe(struct platform_device *dev) dev->dev.of_node->full_name, boot ? " (boot console)" : ""); - /* We don't do IRQ ... */ - hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS); + irq = opal_event_request(ilog2(OPAL_EVENT_CONSOLE_INPUT)); + if (!irq) { + pr_err("hvc_opal: Unable to map interrupt for device %s\n", + dev->dev.of_node->full_name); + return irq; + } + + hp = hvc_alloc(termno, irq, ops, MAX_VIO_PUT_CHARS); if (IS_ERR(hp)) return PTR_ERR(hp); dev_set_drvdata(&dev->dev, hp); - /* ... but we use OPAL event to kick the console */ - if (!hvc_opal_event_registered) { - opal_notifier_register(&hvc_opal_console_nb); - hvc_opal_event_registered = true; - } - return 0; } diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 730b4ef..0582b72 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -19,8 +19,10 @@ #include <linux/uaccess.h> #include <linux/err.h> #include <linux/vfio.h> +#include <linux/vmalloc.h> #include <asm/iommu.h> #include <asm/tce.h> +#include <asm/mmu_context.h> #define DRIVER_VERSION "0.1" #define DRIVER_AUTHOR "aik@ozlabs.ru" @@ -29,6 +31,51 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group); +static long try_increment_locked_vm(long npages) +{ + long ret = 0, locked, lock_limit; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + if (!npages) + return 0; + + down_write(¤t->mm->mmap_sem); + locked = current->mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + current->mm->locked_vm += npages; + + pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid, + npages << PAGE_SHIFT, + current->mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK), + ret ? " - exceeded" : ""); + + up_write(¤t->mm->mmap_sem); + + return ret; +} + +static void decrement_locked_vm(long npages) +{ + if (!current || !current->mm || !npages) + return; /* process exited */ + + down_write(¤t->mm->mmap_sem); + if (WARN_ON_ONCE(npages > current->mm->locked_vm)) + npages = current->mm->locked_vm; + current->mm->locked_vm -= npages; + pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid, + npages << PAGE_SHIFT, + current->mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK)); + up_write(¤t->mm->mmap_sem); +} + /* * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation * @@ -36,6 +83,11 @@ static void tce_iommu_detach_group(void *iommu_data, * into DMA'ble space using the IOMMU */ +struct tce_iommu_group { + struct list_head next; + struct iommu_group *grp; +}; + /* * The container descriptor supports only a single group per container. * Required by the API as the container is not supplied with the IOMMU group @@ -43,18 +95,140 @@ static void tce_iommu_detach_group(void *iommu_data, */ struct tce_container { struct mutex lock; - struct iommu_table *tbl; bool enabled; + bool v2; + unsigned long locked_pages; + struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; + struct list_head group_list; }; +static long tce_iommu_unregister_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + struct mm_iommu_table_group_mem_t *mem; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) + return -EINVAL; + + mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + if (!mem) + return -ENOENT; + + return mm_iommu_put(mem); +} + +static long tce_iommu_register_pages(struct tce_container *container, + __u64 vaddr, __u64 size) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long entries = size >> PAGE_SHIFT; + + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || + ((vaddr + size) < vaddr)) + return -EINVAL; + + ret = mm_iommu_get(vaddr, entries, &mem); + if (ret) + return ret; + + container->enabled = true; + + return 0; +} + +static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + unsigned long *uas; + long ret; + + BUG_ON(tbl->it_userspace); + + ret = try_increment_locked_vm(cb >> PAGE_SHIFT); + if (ret) + return ret; + + uas = vzalloc(cb); + if (!uas) { + decrement_locked_vm(cb >> PAGE_SHIFT); + return -ENOMEM; + } + tbl->it_userspace = uas; + + return 0; +} + +static void tce_iommu_userspace_view_free(struct iommu_table *tbl) +{ + unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) * + tbl->it_size, PAGE_SIZE); + + if (!tbl->it_userspace) + return; + + vfree(tbl->it_userspace); + tbl->it_userspace = NULL; + decrement_locked_vm(cb >> PAGE_SHIFT); +} + +static bool tce_page_is_contained(struct page *page, unsigned page_shift) +{ + /* + * Check that the TCE table granularity is not bigger than the size of + * a page we just found. Otherwise the hardware can get access to + * a bigger memory chunk that it should. + */ + return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; +} + +static inline bool tce_groups_attached(struct tce_container *container) +{ + return !list_empty(&container->group_list); +} + +static long tce_iommu_find_table(struct tce_container *container, + phys_addr_t ioba, struct iommu_table **ptbl) +{ + long i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (tbl) { + unsigned long entry = ioba >> tbl->it_page_shift; + unsigned long start = tbl->it_offset; + unsigned long end = start + tbl->it_size; + + if ((start <= entry) && (entry < end)) { + *ptbl = tbl; + return i; + } + } + } + + return -1; +} + +static int tce_iommu_find_free_table(struct tce_container *container) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (!container->tables[i]) + return i; + } + + return -ENOSPC; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; - unsigned long locked, lock_limit, npages; - struct iommu_table *tbl = container->tbl; - - if (!container->tbl) - return -ENXIO; + unsigned long locked; + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp; if (!current->mm) return -ESRCH; /* process exited */ @@ -79,21 +253,38 @@ static int tce_iommu_enable(struct tce_container *container) * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, * that would effectively kill the guest at random points, much better * enforcing the limit based on the max that the guest can map. + * + * Unfortunately at the moment it counts whole tables, no matter how + * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups + * each with 2GB DMA window, 8GB will be counted here. The reason for + * this is that we cannot tell here the amount of RAM used by the guest + * as this information is only available from KVM and VFIO is + * KVM agnostic. + * + * So we do not allow enabling a container without a group attached + * as there is no way to know how much we should increment + * the locked_vm counter. */ - down_write(¤t->mm->mmap_sem); - npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; - locked = current->mm->locked_vm + npages; - lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { - pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", - rlimit(RLIMIT_MEMLOCK)); - ret = -ENOMEM; - } else { + if (!tce_groups_attached(container)) + return -ENODEV; - current->mm->locked_vm += npages; - container->enabled = true; - } - up_write(¤t->mm->mmap_sem); + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + if (!table_group) + return -ENODEV; + + if (!table_group->tce32_size) + return -EPERM; + + locked = table_group->tce32_size >> PAGE_SHIFT; + ret = try_increment_locked_vm(locked); + if (ret) + return ret; + + container->locked_pages = locked; + + container->enabled = true; return ret; } @@ -105,20 +296,17 @@ static void tce_iommu_disable(struct tce_container *container) container->enabled = false; - if (!container->tbl || !current->mm) + if (!current->mm) return; - down_write(¤t->mm->mmap_sem); - current->mm->locked_vm -= (container->tbl->it_size << - IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT; - up_write(¤t->mm->mmap_sem); + decrement_locked_vm(container->locked_pages); } static void *tce_iommu_open(unsigned long arg) { struct tce_container *container; - if (arg != VFIO_SPAPR_TCE_IOMMU) { + if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) { pr_err("tce_vfio: Wrong IOMMU type\n"); return ERR_PTR(-EINVAL); } @@ -128,36 +316,411 @@ static void *tce_iommu_open(unsigned long arg) return ERR_PTR(-ENOMEM); mutex_init(&container->lock); + INIT_LIST_HEAD_RCU(&container->group_list); + + container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; return container; } +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages); +static void tce_iommu_free_table(struct iommu_table *tbl); + static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp; + long i; + + while (tce_groups_attached(container)) { + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + tce_iommu_detach_group(iommu_data, tcegrp->grp); + } - WARN_ON(container->tbl && !container->tbl->it_group); - tce_iommu_disable(container); + /* + * If VFIO created a table, it was not disposed + * by tce_iommu_detach_group() so do it now. + */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (!tbl) + continue; - if (container->tbl && container->tbl->it_group) - tce_iommu_detach_group(iommu_data, container->tbl->it_group); + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + } + tce_iommu_disable(container); mutex_destroy(&container->lock); kfree(container); } +static void tce_iommu_unuse_page(struct tce_container *container, + unsigned long hpa) +{ + struct page *page; + + page = pfn_to_page(hpa >> PAGE_SHIFT); + put_page(page); +} + +static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, + unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem) +{ + long ret = 0; + struct mm_iommu_table_group_mem_t *mem; + + mem = mm_iommu_lookup(tce, size); + if (!mem) + return -EINVAL; + + ret = mm_iommu_ua_to_hpa(mem, tce, phpa); + if (ret) + return -EINVAL; + + *pmem = mem; + + return 0; +} + +static void tce_iommu_unuse_page_v2(struct iommu_table *tbl, + unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + int ret; + unsigned long hpa = 0; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua || !current || !current->mm) + return; + + ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n", + __func__, *pua, entry, ret); + if (mem) + mm_iommu_mapped_dec(mem); + + *pua = 0; +} + +static int tce_iommu_clear(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long pages) +{ + unsigned long oldhpa; + long ret; + enum dma_data_direction direction; + + for ( ; pages; --pages, ++entry) { + direction = DMA_NONE; + oldhpa = 0; + ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction); + if (ret) + continue; + + if (direction == DMA_NONE) + continue; + + if (container->v2) { + tce_iommu_unuse_page_v2(tbl, entry); + continue; + } + + tce_iommu_unuse_page(container, oldhpa); + } + + return 0; +} + +static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa) +{ + struct page *page = NULL; + enum dma_data_direction direction = iommu_tce_direction(tce); + + if (get_user_pages_fast(tce & PAGE_MASK, 1, + direction != DMA_TO_DEVICE, &page) != 1) + return -EFAULT; + + *hpa = __pa((unsigned long) page_address(page)); + + return 0; +} + +static long tce_iommu_build(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) +{ + long i, ret = 0; + struct page *page; + unsigned long hpa; + enum dma_data_direction dirtmp; + + for (i = 0; i < pages; ++i) { + unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + + ret = tce_iommu_use_page(tce, &hpa); + if (ret) + break; + + page = pfn_to_page(hpa >> PAGE_SHIFT); + if (!tce_page_is_contained(page, tbl->it_page_shift)) { + ret = -EPERM; + break; + } + + hpa |= offset; + dirtmp = direction; + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); + if (ret) { + tce_iommu_unuse_page(container, hpa); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page(container, hpa); + + tce += IOMMU_PAGE_SIZE(tbl); + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + +static long tce_iommu_build_v2(struct tce_container *container, + struct iommu_table *tbl, + unsigned long entry, unsigned long tce, unsigned long pages, + enum dma_data_direction direction) +{ + long i, ret = 0; + struct page *page; + unsigned long hpa; + enum dma_data_direction dirtmp; + + for (i = 0; i < pages; ++i) { + struct mm_iommu_table_group_mem_t *mem = NULL; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, + entry + i); + + ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl), + &hpa, &mem); + if (ret) + break; + + page = pfn_to_page(hpa >> PAGE_SHIFT); + if (!tce_page_is_contained(page, tbl->it_page_shift)) { + ret = -EPERM; + break; + } + + /* Preserve offset within IOMMU page */ + hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK; + dirtmp = direction; + + /* The registered region is being unregistered */ + if (mm_iommu_mapped_inc(mem)) + break; + + ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp); + if (ret) { + /* dirtmp cannot be DMA_NONE here */ + tce_iommu_unuse_page_v2(tbl, entry + i); + pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", + __func__, entry << tbl->it_page_shift, + tce, ret); + break; + } + + if (dirtmp != DMA_NONE) + tce_iommu_unuse_page_v2(tbl, entry + i); + + *pua = tce; + + tce += IOMMU_PAGE_SIZE(tbl); + } + + if (ret) + tce_iommu_clear(container, tbl, entry, i); + + return ret; +} + +static long tce_iommu_create_table(struct tce_container *container, + struct iommu_table_group *table_group, + int num, + __u32 page_shift, + __u64 window_size, + __u32 levels, + struct iommu_table **ptbl) +{ + long ret, table_size; + + table_size = table_group->ops->get_table_size(page_shift, window_size, + levels); + if (!table_size) + return -EINVAL; + + ret = try_increment_locked_vm(table_size >> PAGE_SHIFT); + if (ret) + return ret; + + ret = table_group->ops->create_table(table_group, num, + page_shift, window_size, levels, ptbl); + + WARN_ON(!ret && !(*ptbl)->it_ops->free); + WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size)); + + if (!ret && container->v2) { + ret = tce_iommu_userspace_view_alloc(*ptbl); + if (ret) + (*ptbl)->it_ops->free(*ptbl); + } + + if (ret) + decrement_locked_vm(table_size >> PAGE_SHIFT); + + return ret; +} + +static void tce_iommu_free_table(struct iommu_table *tbl) +{ + unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT; + + tce_iommu_userspace_view_free(tbl); + tbl->it_ops->free(tbl); + decrement_locked_vm(pages); +} + +static long tce_iommu_create_window(struct tce_container *container, + __u32 page_shift, __u64 window_size, __u32 levels, + __u64 *start_addr) +{ + struct tce_iommu_group *tcegrp; + struct iommu_table_group *table_group; + struct iommu_table *tbl = NULL; + long ret, num; + + num = tce_iommu_find_free_table(container); + if (num < 0) + return num; + + /* Get the first group for ops::create_table */ + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); + if (!table_group) + return -EFAULT; + + if (!(table_group->pgsizes & (1ULL << page_shift))) + return -EINVAL; + + if (!table_group->ops->set_window || !table_group->ops->unset_window || + !table_group->ops->get_table_size || + !table_group->ops->create_table) + return -EPERM; + + /* Create TCE table */ + ret = tce_iommu_create_table(container, table_group, num, + page_shift, window_size, levels, &tbl); + if (ret) + return ret; + + BUG_ON(!tbl->it_ops->free); + + /* + * Program the table to every group. + * Groups have been tested for compatibility at the attach time. + */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + ret = table_group->ops->set_window(table_group, num, tbl); + if (ret) + goto unset_exit; + } + + container->tables[num] = tbl; + + /* Return start address assigned by platform in create_table() */ + *start_addr = tbl->it_offset << tbl->it_page_shift; + + return 0; + +unset_exit: + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + table_group->ops->unset_window(table_group, num); + } + tce_iommu_free_table(tbl); + + return ret; +} + +static long tce_iommu_remove_window(struct tce_container *container, + __u64 start_addr) +{ + struct iommu_table_group *table_group = NULL; + struct iommu_table *tbl; + struct tce_iommu_group *tcegrp; + int num; + + num = tce_iommu_find_table(container, start_addr, &tbl); + if (num < 0) + return -EINVAL; + + BUG_ON(!tbl->it_size); + + /* Detach groups from IOMMUs */ + list_for_each_entry(tcegrp, &container->group_list, next) { + table_group = iommu_group_get_iommudata(tcegrp->grp); + + /* + * SPAPR TCE IOMMU exposes the default DMA window to + * the guest via dma32_window_start/size of + * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow + * the userspace to remove this window, some do not so + * here we check for the platform capability. + */ + if (!table_group->ops || !table_group->ops->unset_window) + return -EPERM; + + table_group->ops->unset_window(table_group, num); + } + + /* Free table */ + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_free_table(tbl); + container->tables[num] = NULL; + + return 0; +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { struct tce_container *container = iommu_data; - unsigned long minsz; + unsigned long minsz, ddwsz; long ret; switch (cmd) { case VFIO_CHECK_EXTENSION: switch (arg) { case VFIO_SPAPR_TCE_IOMMU: + case VFIO_SPAPR_TCE_v2_IOMMU: ret = 1; break; default: @@ -169,9 +732,17 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { struct vfio_iommu_spapr_tce_info info; - struct iommu_table *tbl = container->tbl; + struct tce_iommu_group *tcegrp; + struct iommu_table_group *table_group; + + if (!tce_groups_attached(container)) + return -ENXIO; + + tcegrp = list_first_entry(&container->group_list, + struct tce_iommu_group, next); + table_group = iommu_group_get_iommudata(tcegrp->grp); - if (WARN_ON(!tbl)) + if (!table_group) return -ENXIO; minsz = offsetofend(struct vfio_iommu_spapr_tce_info, @@ -183,9 +754,24 @@ static long tce_iommu_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K; - info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K; + info.dma32_window_start = table_group->tce32_start; + info.dma32_window_size = table_group->tce32_size; info.flags = 0; + memset(&info.ddw, 0, sizeof(info.ddw)); + + if (table_group->max_dynamic_windows_supported && + container->v2) { + info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW; + info.ddw.pgsizes = table_group->pgsizes; + info.ddw.max_dynamic_windows_supported = + table_group->max_dynamic_windows_supported; + info.ddw.levels = table_group->max_levels; + } + + ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw); + + if (info.argsz >= ddwsz) + minsz = ddwsz; if (copy_to_user((void __user *)arg, &info, minsz)) return -EFAULT; @@ -194,13 +780,12 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_MAP_DMA: { struct vfio_iommu_type1_dma_map param; - struct iommu_table *tbl = container->tbl; - unsigned long tce, i; + struct iommu_table *tbl = NULL; + long num; + enum dma_data_direction direction; - if (!tbl) - return -ENXIO; - - BUG_ON(!tbl->it_group); + if (!container->enabled) + return -EPERM; minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); @@ -214,32 +799,43 @@ static long tce_iommu_ioctl(void *iommu_data, VFIO_DMA_MAP_FLAG_WRITE)) return -EINVAL; - if ((param.size & ~IOMMU_PAGE_MASK_4K) || - (param.vaddr & ~IOMMU_PAGE_MASK_4K)) + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + + if ((param.size & ~IOMMU_PAGE_MASK(tbl)) || + (param.vaddr & ~IOMMU_PAGE_MASK(tbl))) return -EINVAL; /* iova is checked by the IOMMU API */ - tce = param.vaddr; - if (param.flags & VFIO_DMA_MAP_FLAG_READ) - tce |= TCE_PCI_READ; - if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) - tce |= TCE_PCI_WRITE; + if (param.flags & VFIO_DMA_MAP_FLAG_READ) { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_BIDIRECTIONAL; + else + direction = DMA_TO_DEVICE; + } else { + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + direction = DMA_FROM_DEVICE; + else + return -EINVAL; + } - ret = iommu_tce_put_param_check(tbl, param.iova, tce); + ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr); if (ret) return ret; - for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT_4K); ++i) { - ret = iommu_put_tce_user_mode(tbl, - (param.iova >> IOMMU_PAGE_SHIFT_4K) + i, - tce); - if (ret) - break; - tce += IOMMU_PAGE_SIZE_4K; - } - if (ret) - iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT_4K, i); + if (container->v2) + ret = tce_iommu_build_v2(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); + else + ret = tce_iommu_build(container, tbl, + param.iova >> tbl->it_page_shift, + param.vaddr, + param.size >> tbl->it_page_shift, + direction); iommu_flush_tce(tbl); @@ -247,10 +843,11 @@ static long tce_iommu_ioctl(void *iommu_data, } case VFIO_IOMMU_UNMAP_DMA: { struct vfio_iommu_type1_dma_unmap param; - struct iommu_table *tbl = container->tbl; + struct iommu_table *tbl = NULL; + long num; - if (WARN_ON(!tbl)) - return -ENXIO; + if (!container->enabled) + return -EPERM; minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); @@ -265,22 +862,81 @@ static long tce_iommu_ioctl(void *iommu_data, if (param.flags) return -EINVAL; - if (param.size & ~IOMMU_PAGE_MASK_4K) + num = tce_iommu_find_table(container, param.iova, &tbl); + if (num < 0) + return -ENXIO; + + if (param.size & ~IOMMU_PAGE_MASK(tbl)) return -EINVAL; ret = iommu_tce_clear_param_check(tbl, param.iova, 0, - param.size >> IOMMU_PAGE_SHIFT_4K); + param.size >> tbl->it_page_shift); if (ret) return ret; - ret = iommu_clear_tces_and_put_pages(tbl, - param.iova >> IOMMU_PAGE_SHIFT_4K, - param.size >> IOMMU_PAGE_SHIFT_4K); + ret = tce_iommu_clear(container, tbl, + param.iova >> tbl->it_page_shift, + param.size >> tbl->it_page_shift); iommu_flush_tce(tbl); return ret; } + case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_register_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } + case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: { + struct vfio_iommu_spapr_register_memory param; + + if (!container->v2) + break; + + minsz = offsetofend(struct vfio_iommu_spapr_register_memory, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + mutex_lock(&container->lock); + ret = tce_iommu_unregister_pages(container, param.vaddr, + param.size); + mutex_unlock(&container->lock); + + return ret; + } case VFIO_IOMMU_ENABLE: + if (container->v2) + break; + mutex_lock(&container->lock); ret = tce_iommu_enable(container); mutex_unlock(&container->lock); @@ -288,48 +944,280 @@ static long tce_iommu_ioctl(void *iommu_data, case VFIO_IOMMU_DISABLE: + if (container->v2) + break; + mutex_lock(&container->lock); tce_iommu_disable(container); mutex_unlock(&container->lock); return 0; - case VFIO_EEH_PE_OP: - if (!container->tbl || !container->tbl->it_group) - return -ENODEV; - return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group, - cmd, arg); + case VFIO_EEH_PE_OP: { + struct tce_iommu_group *tcegrp; + + ret = 0; + list_for_each_entry(tcegrp, &container->group_list, next) { + ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, + cmd, arg); + if (ret) + return ret; + } + return ret; + } + + case VFIO_IOMMU_SPAPR_TCE_CREATE: { + struct vfio_iommu_spapr_tce_create create; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_create, + start_addr); + + if (copy_from_user(&create, (void __user *)arg, minsz)) + return -EFAULT; + + if (create.argsz < minsz) + return -EINVAL; + + if (create.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_create_window(container, create.page_shift, + create.window_size, create.levels, + &create.start_addr); + + mutex_unlock(&container->lock); + + if (!ret && copy_to_user((void __user *)arg, &create, minsz)) + ret = -EFAULT; + + return ret; + } + case VFIO_IOMMU_SPAPR_TCE_REMOVE: { + struct vfio_iommu_spapr_tce_remove remove; + + if (!container->v2) + break; + + if (!tce_groups_attached(container)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_remove, + start_addr); + + if (copy_from_user(&remove, (void __user *)arg, minsz)) + return -EFAULT; + + if (remove.argsz < minsz) + return -EINVAL; + + if (remove.flags) + return -EINVAL; + + mutex_lock(&container->lock); + + ret = tce_iommu_remove_window(container, remove.start_addr); + + mutex_unlock(&container->lock); + + return ret; + } } return -ENOTTY; } +static void tce_iommu_release_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container->tables[i]; + + if (!tbl) + continue; + + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + tce_iommu_userspace_view_free(tbl); + if (tbl->it_map) + iommu_release_ownership(tbl); + + container->tables[i] = NULL; + } +} + +static int tce_iommu_take_ownership(struct tce_container *container, + struct iommu_table_group *table_group) +{ + int i, j, rc = 0; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl || !tbl->it_map) + continue; + + rc = tce_iommu_userspace_view_alloc(tbl); + if (!rc) + rc = iommu_take_ownership(tbl); + + if (rc) { + for (j = 0; j < i; ++j) + iommu_release_ownership( + table_group->tables[j]); + + return rc; + } + } + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + container->tables[i] = table_group->tables[i]; + + return 0; +} + +static void tce_iommu_release_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + long i; + + if (!table_group->ops->unset_window) { + WARN_ON_ONCE(1); + return; + } + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + table_group->ops->unset_window(table_group, i); + + table_group->ops->release_ownership(table_group); +} + +static long tce_iommu_take_ownership_ddw(struct tce_container *container, + struct iommu_table_group *table_group) +{ + long i, ret = 0; + struct iommu_table *tbl = NULL; + + if (!table_group->ops->create_table || !table_group->ops->set_window || + !table_group->ops->release_ownership) { + WARN_ON_ONCE(1); + return -EFAULT; + } + + table_group->ops->take_ownership(table_group); + + /* + * If it the first group attached, check if there is + * a default DMA window and create one if none as + * the userspace expects it to exist. + */ + if (!tce_groups_attached(container) && !container->tables[0]) { + ret = tce_iommu_create_table(container, + table_group, + 0, /* window number */ + IOMMU_PAGE_SHIFT_4K, + table_group->tce32_size, + 1, /* default levels */ + &tbl); + if (ret) + goto release_exit; + else + container->tables[0] = tbl; + } + + /* Set all windows to the new group */ + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + tbl = container->tables[i]; + + if (!tbl) + continue; + + /* Set the default window to a new group */ + ret = table_group->ops->set_window(table_group, i, tbl); + if (ret) + goto release_exit; + } + + return 0; + +release_exit: + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) + table_group->ops->unset_window(table_group, i); + + table_group->ops->release_ownership(table_group); + + return ret; +} + static int tce_iommu_attach_group(void *iommu_data, struct iommu_group *iommu_group) { int ret; struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp = NULL; - BUG_ON(!tbl); mutex_lock(&container->lock); /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ - if (container->tbl) { - pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", - iommu_group_id(container->tbl->it_group), - iommu_group_id(iommu_group)); - ret = -EBUSY; - } else if (container->enabled) { - pr_err("tce_vfio: attaching group #%u to enabled container\n", - iommu_group_id(iommu_group)); + table_group = iommu_group_get_iommudata(iommu_group); + + if (tce_groups_attached(container) && (!table_group->ops || + !table_group->ops->take_ownership || + !table_group->ops->release_ownership)) { ret = -EBUSY; - } else { - ret = iommu_take_ownership(tbl); - if (!ret) - container->tbl = tbl; + goto unlock_exit; + } + + /* Check if new group has the same iommu_ops (i.e. compatible) */ + list_for_each_entry(tcegrp, &container->group_list, next) { + struct iommu_table_group *table_group_tmp; + + if (tcegrp->grp == iommu_group) { + pr_warn("tce_vfio: Group %d is already attached\n", + iommu_group_id(iommu_group)); + ret = -EBUSY; + goto unlock_exit; + } + table_group_tmp = iommu_group_get_iommudata(tcegrp->grp); + if (table_group_tmp->ops != table_group->ops) { + pr_warn("tce_vfio: Group %d is incompatible with group %d\n", + iommu_group_id(iommu_group), + iommu_group_id(tcegrp->grp)); + ret = -EPERM; + goto unlock_exit; + } + } + + tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL); + if (!tcegrp) { + ret = -ENOMEM; + goto unlock_exit; } + if (!table_group->ops || !table_group->ops->take_ownership || + !table_group->ops->release_ownership) + ret = tce_iommu_take_ownership(container, table_group); + else + ret = tce_iommu_take_ownership_ddw(container, table_group); + + if (!ret) { + tcegrp->grp = iommu_group; + list_add(&tcegrp->next, &container->group_list); + } + +unlock_exit: + if (ret && tcegrp) + kfree(tcegrp); + mutex_unlock(&container->lock); return ret; @@ -339,26 +1227,37 @@ static void tce_iommu_detach_group(void *iommu_data, struct iommu_group *iommu_group) { struct tce_container *container = iommu_data; - struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + struct iommu_table_group *table_group; + bool found = false; + struct tce_iommu_group *tcegrp; - BUG_ON(!tbl); mutex_lock(&container->lock); - if (tbl != container->tbl) { - pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", - iommu_group_id(iommu_group), - iommu_group_id(tbl->it_group)); - } else { - if (container->enabled) { - pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", - iommu_group_id(tbl->it_group)); - tce_iommu_disable(container); + + list_for_each_entry(tcegrp, &container->group_list, next) { + if (tcegrp->grp == iommu_group) { + found = true; + break; } + } - /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", - iommu_group_id(iommu_group), iommu_group); */ - container->tbl = NULL; - iommu_release_ownership(tbl); + if (!found) { + pr_warn("tce_vfio: detaching unattached group #%u\n", + iommu_group_id(iommu_group)); + goto unlock_exit; } + + list_del(&tcegrp->next); + kfree(tcegrp); + + table_group = iommu_group_get_iommudata(iommu_group); + BUG_ON(!table_group); + + if (!table_group->ops || !table_group->ops->release_ownership) + tce_iommu_release_ownership(container, table_group); + else + tce_iommu_release_ownership_ddw(container, table_group); + +unlock_exit: mutex_unlock(&container->lock); } diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 5fa42db..38edeb4 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, case VFIO_EEH_PE_CONFIGURE: ret = eeh_pe_configure(pe); break; + case VFIO_EEH_PE_INJECT_ERR: + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); + if (op.argsz < minsz) + return -EINVAL; + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + + ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, + op.err.addr, op.err.mask); + break; default: ret = -EINVAL; } |