summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel-iommu.c
Commit message (Collapse)AuthorAgeFilesLines
* treewide: kzalloc() -> kcalloc()Kees Cook2018-06-121-2/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
* Merge tag 'iommu-updates-v4.18' of ↵Linus Torvalds2018-06-081-52/+49
|\ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu Pull IOMMU updates from Joerg Roedel: "Nothing big this time. In particular: - Debugging code for Tegra-GART - Improvement in Intel VT-d fault printing to prevent soft-lockups when on fault storms - Improvements in AMD IOMMU event reporting - NUMA aware allocation in io-pgtable code for ARM - Various other small fixes and cleanups all over the place" * tag 'iommu-updates-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: iommu/io-pgtable-arm: Make allocations NUMA-aware iommu/amd: Prevent possible null pointer dereference and infinite loop iommu/amd: Fix grammar of comments iommu: Clean up the comments for iommu_group_alloc iommu/vt-d: Remove unnecessary parentheses iommu/vt-d: Clean up pasid quirk for pre-production devices iommu/vt-d: Clean up unused variable in find_or_alloc_domain iommu/vt-d: Fix iotlb psi missing for mappings iommu/vt-d: Introduce __mapping_notify_one() iommu: Remove extra NULL check when call strtobool() iommu/amd: Update logging information for new event type iommu/amd: Update the PASID information printed to the system log iommu/tegra: gart: Fix gart_iommu_unmap() iommu/tegra: gart: Add debugging facility iommu/io-pgtable-arm: Use for_each_set_bit to simplify code iommu/qcom: Simplify getting .drvdata iommu: Remove depends on HAS_DMA in case of platform dependency iommu/vt-d: Ratelimit each dmar fault printing
| * iommu/vt-d: Clean up pasid quirk for pre-production devicesLu Baolu2018-05-151-30/+2
| | | | | | | | | | | | | | | | | | The pasid28 quirk is needed only for some pre-production devices. Remove it to make the code concise. Signed-off-by: Ashok Raj <ashok.raj@intel.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Clean up unused variable in find_or_alloc_domainLu Baolu2018-05-151-3/+1
| | | | | | | | | | | | | | Remove it to make the code more concise. Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Fix iotlb psi missing for mappingsPeter Xu2018-05-151-9/+34
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When caching mode is enabled for IOMMU, we should send explicit IOTLB PSIs even for newly created mappings. However these events are missing for all intel_iommu_map() callers, e.g., iommu_map(). One direct user is the vfio-pci driver. To make sure we'll send the PSIs always when necessary, this patch firstly introduced domain_mapping() helper for page mappings, then fixed the problem by generalizing the explicit map IOTLB PSI logic into that new helper. With that, we let iommu_domain_identity_map() to use the simplified version to avoid sending the notifications, while for all the rest of cases we send the notifications always. For VM case, we send the PSIs to all the backend IOMMUs for the domain. This patch allows the nested device assignment to work with QEMU (assign device firstly to L1 guest, then assign it again to L2 guest). Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Introduce __mapping_notify_one()Peter Xu2018-05-151-12/+14
| | | | | | | | | | | | | | | | Introduce this new helper to notify one newly created mapping on one single IOMMU. We can further leverage this helper in the next patch. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | PCI: Add "pci=noats" boot parameterGil Kupfer2018-05-101-1/+2
|/ | | | | | | | | | | | | | | | | | | Adds a "pci=noats" boot parameter. When supplied, all ATS related functions fail immediately and the IOMMU is configured to not use device-IOTLB. Any function that checks for ATS capabilities directly against the devices should also check this flag. Currently, such functions exist only in IOMMU drivers, and they are covered by this patch. The motivation behind this patch is the existence of malicious devices. Lots of research has been done about how to use the IOMMU as protection from such devices. When ATS is supported, any I/O device can access any physical address by faking device-IOTLB entries. Adding the ability to ignore these entries lets sysadmins enhance system security. Signed-off-by: Gil Kupfer <gilkup@cs.technion.ac.il> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Joerg Roedel <jroedel@suse.de>
* Merge tag 'iommu-updates-v4.17' of ↵Linus Torvalds2018-04-111-5/+1
|\ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu Pull IOMMU updates from Joerg Roedel: - OF_IOMMU support for the Rockchip iommu driver so that it can use generic DT bindings - rework of locking in the AMD IOMMU interrupt remapping code to make it work better in RT kernels - support for improved iotlb flushing in the AMD IOMMU driver - support for 52-bit physical and virtual addressing in the ARM-SMMU - various other small fixes and cleanups * tag 'iommu-updates-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (53 commits) iommu/io-pgtable-arm: Avoid warning with 32-bit phys_addr_t iommu/rockchip: Support sharing IOMMU between masters iommu/rockchip: Add runtime PM support iommu/rockchip: Fix error handling in init iommu/rockchip: Use OF_IOMMU to attach devices automatically iommu/rockchip: Use IOMMU device for dma mapping operations dt-bindings: iommu/rockchip: Add clock property iommu/rockchip: Control clocks needed to access the IOMMU iommu/rockchip: Fix TLB flush of secondary IOMMUs iommu/rockchip: Use iopoll helpers to wait for hardware iommu/rockchip: Fix error handling in attach iommu/rockchip: Request irqs in rk_iommu_probe() iommu/rockchip: Fix error handling in probe iommu/rockchip: Prohibit unbind and remove iommu/amd: Return proper error code in irq_remapping_alloc() iommu/amd: Make amd_iommu_devtable_lock a spin_lock iommu/amd: Drop the lock while allocating new irq remap table iommu/amd: Factor out setting the remap table for a devid iommu/amd: Use `table' instead `irt' as variable name in amd_iommu_update_ga() iommu/amd: Remove the special case from alloc_irq_table() ...
| * iommu/vt-d:Remove unused variableShaokun Zhang2018-03-291-5/+1
| | | | | | | | | | | | | | | | | | | | Unused after commit <42e8c186b595> ("iommu/vt-d: Simplify io/tlb flushing in intel_iommu_unmap"), cleanup it. Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | iommu/intel-iommu: Enable CONFIG_DMA_DIRECT_OPS=y and clean up ↵Christoph Hellwig2018-03-201-46/+16
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | intel_{alloc,free}_coherent() Use the dma_direct_*() helpers and clean up the code flow. Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jon Mason <jdmason@kudzu.us> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Muli Ben-Yehuda <mulix@mulix.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-9-hch@lst.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
* | x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y)Christoph Hellwig2018-03-201-1/+2
|/ | | | | | | | | | | | | | | | | | | | | | | | The generic DMA-direct (CONFIG_DMA_DIRECT_OPS=y) implementation is now functionally equivalent to the x86 nommu dma_map implementation, so switch over to using it. That includes switching from using x86_dma_supported in various IOMMU drivers to use dma_direct_supported instead, which provides the same functionality. Tested-by: Tom Lendacky <thomas.lendacky@amd.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jon Mason <jdmason@kudzu.us> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Muli Ben-Yehuda <mulix@mulix.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-4-hch@lst.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
* Merge tag 'iommu-updates-v4.16' of ↵Linus Torvalds2018-02-081-3/+2
|\ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu Pull IOMMU updates from Joerg Roedel: "This time there are not a lot of changes coming from the IOMMU side. That is partly because I returned from my parental leave late in the development process and probably partly because everyone was busy with Spectre and Meltdown mitigation work and didn't find the time for IOMMU work. So here are the few changes that queued up for this merge window: - 5-level page-table support for the Intel IOMMU. - error reporting improvements for the AMD IOMMU driver - additional DT bindings for ipmmu-vmsa (Renesas) - small fixes and cleanups" * tag 'iommu-updates-v4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: iommu: Clean up of_iommu_init_fn iommu/ipmmu-vmsa: Remove redundant of_iommu_init_fn hook iommu/msm: Claim bus ops on probe iommu/vt-d: Enable 5-level paging mode in the PASID entry iommu/vt-d: Add a check for 5-level paging support iommu/vt-d: Add a check for 1GB page support iommu/vt-d: Enable upto 57 bits of domain address width iommu/vt-d: Use domain instead of cache fetching iommu/exynos: Don't unconditionally steal bus ops iommu/omap: Fix debugfs_create_*() usage iommu/vt-d: clean up pr_irq if request_threaded_irq fails iommu: Check the result of iommu_group_get() for NULL iommu/ipmmu-vmsa: Add r8a779(70|95) DT bindings iommu/ipmmu-vmsa: Add r8a7796 DT binding iommu/amd: Set the device table entry PPR bit for IOMMU V2 devices iommu/amd - Record more information about unknown events
| * iommu/vt-d: Enable upto 57 bits of domain address widthSohil Mehta2018-01-171-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | Update the IOMMU default domain address width to 57 bits. This would enable the IOMMU to do upto 5-levels of paging for second level translations - IOVA translation requests without PASID. Even though the maximum supported address width is being increased to 57, __iommu_calculate_agaw() would set the actual supported address width to the maximum support available in IOMMU hardware. Signed-off-by: Sohil Mehta <sohil.mehta@intel.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Use domain instead of cache fetchingPeter Xu2018-01-171-2/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | after commit a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi", 2015-08-12), we have domain pointer as parameter to iommu_flush_iotlb_psi(), so no need to fetch it from cache again. More importantly, a NULL reference pointer bug is reported on RHEL7 (and it can be reproduced on some old upstream kernels too, e.g., v4.13) by unplugging an 40g nic from a VM (hard to test unplug on real host, but it should be the same): https://bugzilla.redhat.com/show_bug.cgi?id=1531367 [ 24.391863] pciehp 0000:00:03.0:pcie004: Slot(0): Attention button pressed [ 24.393442] pciehp 0000:00:03.0:pcie004: Slot(0): Powering off due to button press [ 29.721068] i40evf 0000:01:00.0: Unable to send opcode 2 to PF, err I40E_ERR_QUEUE_EMPTY, aq_err OK [ 29.783557] iommu: Removing device 0000:01:00.0 from group 3 [ 29.784662] BUG: unable to handle kernel NULL pointer dereference at 0000000000000304 [ 29.785817] IP: iommu_flush_iotlb_psi+0xcf/0x120 [ 29.786486] PGD 0 [ 29.786487] P4D 0 [ 29.786812] [ 29.787390] Oops: 0000 [#1] SMP [ 29.787876] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_ng [ 29.795371] CPU: 0 PID: 156 Comm: kworker/0:2 Not tainted 4.13.0 #14 [ 29.796366] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.0-1.el7 04/01/2014 [ 29.797593] Workqueue: pciehp-0 pciehp_power_thread [ 29.798328] task: ffff94f5745b4a00 task.stack: ffffb326805ac000 [ 29.799178] RIP: 0010:iommu_flush_iotlb_psi+0xcf/0x120 [ 29.799919] RSP: 0018:ffffb326805afbd0 EFLAGS: 00010086 [ 29.800666] RAX: ffff94f5bc56e800 RBX: 0000000000000000 RCX: 0000000200000025 [ 29.801667] RDX: ffff94f5bc56e000 RSI: 0000000000000082 RDI: 0000000000000000 [ 29.802755] RBP: ffffb326805afbf8 R08: 0000000000000000 R09: ffff94f5bc86bbf0 [ 29.803772] R10: ffffb326805afba8 R11: 00000000000ffdc4 R12: ffff94f5bc86a400 [ 29.804789] R13: 0000000000000000 R14: 00000000ffdc4000 R15: 0000000000000000 [ 29.805792] FS: 0000000000000000(0000) GS:ffff94f5bfc00000(0000) knlGS:0000000000000000 [ 29.806923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.807736] CR2: 0000000000000304 CR3: 000000003499d000 CR4: 00000000000006f0 [ 29.808747] Call Trace: [ 29.809156] flush_unmaps_timeout+0x126/0x1c0 [ 29.809800] domain_exit+0xd6/0x100 [ 29.810322] device_notifier+0x6b/0x70 [ 29.810902] notifier_call_chain+0x4a/0x70 [ 29.812822] __blocking_notifier_call_chain+0x47/0x60 [ 29.814499] blocking_notifier_call_chain+0x16/0x20 [ 29.816137] device_del+0x233/0x320 [ 29.817588] pci_remove_bus_device+0x6f/0x110 [ 29.819133] pci_stop_and_remove_bus_device+0x1a/0x20 [ 29.820817] pciehp_unconfigure_device+0x7a/0x1d0 [ 29.822434] pciehp_disable_slot+0x52/0xe0 [ 29.823931] pciehp_power_thread+0x8a/0xa0 [ 29.825411] process_one_work+0x18c/0x3a0 [ 29.826875] worker_thread+0x4e/0x3b0 [ 29.828263] kthread+0x109/0x140 [ 29.829564] ? process_one_work+0x3a0/0x3a0 [ 29.831081] ? kthread_park+0x60/0x60 [ 29.832464] ret_from_fork+0x25/0x30 [ 29.833794] Code: 85 ed 74 0b 5b 41 5c 41 5d 41 5e 41 5f 5d c3 49 8b 54 24 60 44 89 f8 0f b6 c4 48 8b 04 c2 48 85 c0 74 49 45 0f b6 ff 4a 8b 3c f8 <80> bf [ 29.838514] RIP: iommu_flush_iotlb_psi+0xcf/0x120 RSP: ffffb326805afbd0 [ 29.840362] CR2: 0000000000000304 [ 29.841716] ---[ end trace b10ec0d6900868d3 ]--- This patch fixes that problem if applied to v4.13 kernel. The bug does not exist on latest upstream kernel since it's fixed as a side effect of commit 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing", 2017-08-15). But IMHO it's still good to have this patch upstream. CC: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Peter Xu <peterx@redhat.com> Fixes: a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi") Reviewed-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | ia64: clean up swiotlb supportChristoph Hellwig2018-01-151-1/+1
|/ | | | | | | | Move the few remaining bits of swiotlb glue towards their callers, and remove the pointless on ia64 swiotlb variable. Signed-off-by: Christoph Hellwig <hch@lst.de> Acked-by: Christian König <christian.koenig@amd.com>
* iommu/vt-d: Fix scatterlist offset handlingRobin Murphy2017-11-171-3/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The intel-iommu DMA ops fail to correctly handle scatterlists where sg->offset is greater than PAGE_SIZE - the IOVA allocation is computed appropriately based on the page-aligned portion of the offset, but the mapping is set up relative to sg->page, which means it fails to actually cover the whole buffer (and in the worst case doesn't cover it at all): (sg->dma_address + sg->dma_len) ----+ sg->dma_address ---------+ | iov_pfn------+ | | | | | v v v iova: a b c d e f |--------|--------|--------|--------|--------| <...calculated....> [_____mapped______] pfn: 0 1 2 3 4 5 |--------|--------|--------|--------|--------| ^ ^ ^ | | | sg->page ----+ | | sg->offset --------------+ | (sg->offset + sg->length) ----------+ As a result, the caller ends up overrunning the mapping into whatever lies beyond, which usually goes badly: [ 429.645492] DMAR: DRHD: handling fault status reg 2 [ 429.650847] DMAR: [DMA Write] Request device [02:00.4] fault addr f2682000 ... Whilst this is a fairly rare occurrence, it can happen from the result of intermediate scatterlist processing such as scatterwalk_ffwd() in the crypto layer. Whilst that particular site could be fixed up, it still seems worthwhile to bring intel-iommu in line with other DMA API implementations in handling this robustly. To that end, fix the intel_map_sg() path to line up the mapping correctly (in units of MM pages rather than VT-d pages to match the aligned_nrpages() calculation) regardless of the offset, and use sg_phys() consistently for clarity. Reported-by: Harsh Jain <Harsh@chelsio.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Reviewed by: Ashok Raj <ashok.raj@intel.com> Tested by: Jacob Pan <jacob.jun.pan@intel.com> Cc: stable@vger.kernel.org Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
*-. Merge branches 'iommu/fixes', 'arm/omap', 'arm/exynos', 'x86/amd', ↵Joerg Roedel2017-10-131-11/+17
|\ \ | | | | | | | | | 'x86/vt-d' and 'core' into next
| | * iommu/iova: Make rcache flush optional on IOVA allocation failureTomasz Nowicki2017-10-121-2/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to defer it. - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Signed-off-by: Tomasz Nowicki <Tomasz.Nowicki@caviumnetworks.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Tested-by: Nate Watterson <nwatters@codeaurora.org> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | * iommu/iova: Make dma_32bit_pfn implicitZhen Lei2017-09-271-8/+3
| |/ |/| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Now that the cached node optimisation can apply to all allocations, the couple of users which were playing tricks with dma_32bit_pfn in order to benefit from it can stop doing so. Conversely, there is also no need for all the other users to explicitly calculate a 'real' 32-bit PFN, when init_iova_domain() can happily do that itself from the page granularity. CC: Thierry Reding <thierry.reding@gmail.com> CC: Jonathan Hunter <jonathanh@nvidia.com> CC: David Airlie <airlied@linux.ie> CC: Sudeep Dutt <sudeep.dutt@intel.com> CC: Ashutosh Dixit <ashutosh.dixit@intel.com> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Tested-by: Zhen Lei <thunder.leizhen@huawei.com> Tested-by: Nate Watterson <nwatters@codeaurora.org> [rm: use iova_shift(), rewrote commit message] Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Delete unnecessary check in domain_context_mapping_one()Christos Gkekas2017-10-101-1/+1
| | | | | | | | | | | | | | | | Variable did_old is unsigned so checking whether it is greater or equal to zero is not necessary. Signed-off-by: Christos Gkekas <chris.gekas@gmail.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Don't register bus-notifier under dmar_global_lockJoerg Roedel2017-10-061-0/+10
|/ | | | | | | | | | The notifier function will take the dmar_global_lock too, so lockdep complains about inverse locking order when the notifier is registered under the dmar_global_lock. Reported-by: Jan Kiszka <jan.kiszka@siemens.com> Fixes: 59ce0515cdaf ('iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens') Signed-off-by: Joerg Roedel <jroedel@suse.de>
* Merge tag 'iommu-updates-v4.14' of ↵Linus Torvalds2017-09-091-190/+90
|\ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu Pull IOMMU updates from Joerg Roedel: "Slightly more changes than usual this time: - KDump Kernel IOMMU take-over code for AMD IOMMU. The code now tries to preserve the mappings of the kernel so that master aborts for devices are avoided. Master aborts cause some devices to fail in the kdump kernel, so this code makes the dump more likely to succeed when AMD IOMMU is enabled. - common flush queue implementation for IOVA code users. The code is still optional, but AMD and Intel IOMMU drivers had their own implementation which is now unified. - finish support for iommu-groups. All drivers implement this feature now so that IOMMU core code can rely on it. - finish support for 'struct iommu_device' in iommu drivers. All drivers now use the interface. - new functions in the IOMMU-API for explicit IO/TLB flushing. This will help to reduce the number of IO/TLB flushes when IOMMU drivers support this interface. - support for mt2712 in the Mediatek IOMMU driver - new IOMMU driver for QCOM hardware - system PM support for ARM-SMMU - shutdown method for ARM-SMMU-v3 - some constification patches - various other small improvements and fixes" * tag 'iommu-updates-v4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (87 commits) iommu/vt-d: Don't be too aggressive when clearing one context entry iommu: Introduce Interface for IOMMU TLB Flushing iommu/s390: Constify iommu_ops iommu/vt-d: Avoid calling virt_to_phys() on null pointer iommu/vt-d: IOMMU Page Request needs to check if address is canonical. arm/tegra: Call bus_set_iommu() after iommu_device_register() iommu/exynos: Constify iommu_ops iommu/ipmmu-vmsa: Make ipmmu_gather_ops const iommu/ipmmu-vmsa: Rereserving a free context before setting up a pagetable iommu/amd: Rename a few flush functions iommu/amd: Check if domain is NULL in get_domain() and return -EBUSY iommu/mediatek: Fix a build warning of BIT(32) in ARM iommu/mediatek: Fix a build fail of m4u_type iommu: qcom: annotate PM functions as __maybe_unused iommu/pamu: Fix PAMU boot crash memory: mtk-smi: Degrade SMI init to module_init iommu/mediatek: Enlarge the validate PA range for 4GB mode iommu/mediatek: Disable iommu clock when system suspend iommu/mediatek: Move pgtable allocation into domain_alloc iommu/mediatek: Merge 2 M4U HWs into one iommu domain ...
| *---. Merge branches 'arm/exynos', 'arm/renesas', 'arm/rockchip', 'arm/omap', ↵Joerg Roedel2017-09-011-190/+90
| |\ \ \ | | | | | | | | | | | | | | | 'arm/mediatek', 'arm/tegra', 'arm/qcom', 'arm/smmu', 'ppc/pamu', 'x86/vt-d', 'x86/amd', 's390' and 'core' into next
| | | | * iommu/vt-d: Don't be too aggressive when clearing one context entryFilippo Sironi2017-09-011-18/+24
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Previously, we were invalidating context cache and IOTLB globally when clearing one context entry. This is a tad too aggressive. Invalidate the context cache and IOTLB for the interested device only. Signed-off-by: Filippo Sironi <sironi@amazon.de> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com> Cc: iommu@lists.linux-foundation.org Cc: linux-kernel@vger.kernel.org Acked-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | | * iommu/vt-d: Avoid calling virt_to_phys() on null pointerAshok Raj2017-08-301-1/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | New kernels with debug show panic() from __phys_addr() checks. Avoid calling virt_to_phys() when pasid_state_tbl pointer is null To: Joerg Roedel <joro@8bytes.org> To: linux-kernel@vger.kernel.org> Cc: iommu@lists.linux-foundation.org Cc: David Woodhouse <dwmw2@infradead.org> Cc: Jacob Pan <jacob.jun.pan@intel.com> Cc: Ashok Raj <ashok.raj@intel.com> Fixes: 2f26e0a9c9860 ('iommu/vt-d: Add basic SVM PASID support') Signed-off-by: Ashok Raj <ashok.raj@intel.com> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | | * iommu/vt-d: Don't free parent pagetable of the PTE we're addingDavid Dillow2017-07-261-12/+26
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When adding a large scatterlist entry that covers more than the L3 superpage size (1GB) but has an alignment such that we must use L2 superpages (2MB) , we give dma_pte_free_level() a range that causes it to free the L3 pagetable we're about to populate. We fix this by telling dma_pte_free_pagetable() about the pagetable level we're about to populate to prevent freeing it. For example, mapping a scatterlist with entry lengths 854MB and 1194MB at IOVA 0xffff80000000 would, when processing the 2MB-aligned second entry, cause pfn_to_dma_pte() to create a L3 directory to hold L2 superpages for the mapping at IOVA 0xffffc0000000. We would previously call dma_pte_free_pagetable(domain, 0xffffc0000, 0xfffffffff), which would free the L3 directory pfn_to_dma_pte() just created for IO PFN 0xffffc0000. Telling dma_pte_free_pagetable() to retain the L3 directories while using L2 superpages avoids the erroneous free. Signed-off-by: David Dillow <dillow@google.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | * | iommu/vt-d: Make use of iova deferred flushingJoerg Roedel2017-08-151-159/+38
| | |/ / | | | | | | | | | | | | | | | | | | | | | | | | | | | | Remove the deferred flushing implementation in the Intel VT-d driver and use the one from the common iova code instead. Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | | | Merge tag 'pci-v4.14-changes' of ↵Linus Torvalds2017-09-081-0/+7
|\ \ \ \ | |/ / / |/| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci Pull PCI updates from Bjorn Helgaas: - add enhanced Downstream Port Containment support, which prints more details about Root Port Programmed I/O errors (Dongdong Liu) - add Layerscape ls1088a and ls2088a support (Hou Zhiqiang) - add MediaTek MT2712 and MT7622 support (Ryder Lee) - add MediaTek MT2712 and MT7622 MSI support (Honghui Zhang) - add Qualcom IPQ8074 support (Varadarajan Narayanan) - add R-Car r8a7743/5 device tree support (Biju Das) - add Rockchip per-lane PHY support for better power management (Shawn Lin) - fix IRQ mapping for hot-added devices by replacing the pci_fixup_irqs() boot-time design with a host bridge hook called at probe-time (Lorenzo Pieralisi, Matthew Minter) - fix race when enabling two devices that results in upstream bridge not being enabled correctly (Srinath Mannam) - fix pciehp power fault infinite loop (Keith Busch) - fix SHPC bridge MSI hotplug events by enabling bus mastering (Aleksandr Bezzubikov) - fix a VFIO issue by correcting PCIe capability sizes (Alex Williamson) - fix an INTD issue on Xilinx and possibly other drivers by unifying INTx IRQ domain support (Paul Burton) - avoid IOMMU stalls by marking AMD Stoney GPU ATS as broken (Joerg Roedel) - allow APM X-Gene device assignment to guests by adding an ACS quirk (Feng Kan) - fix driver crashes by disabling Extended Tags on Broadcom HT2100 (Extended Tags support is required for PCIe Receivers but not Requesters, and we now enable them by default when Requesters support them) (Sinan Kaya) - fix MSIs for devices that use phantom RIDs for DMA by assuming MSIs use the real Requester ID (not a phantom RID) (Robin Murphy) - prevent assignment of Intel VMD children to guests (which may be supported eventually, but isn't yet) by not associating an IOMMU with them (Jon Derrick) - fix Intel VMD suspend/resume by releasing IRQs on suspend (Scott Bauer) - fix a Function-Level Reset issue with Intel 750 NVMe by waiting longer (up to 60sec instead of 1sec) for device to become ready (Sinan Kaya) - fix a Function-Level Reset issue on iProc Stingray by working around hardware defects in the CRS implementation (Oza Pawandeep) - fix an issue with Intel NVMe P3700 after an iProc reset by adding a delay during shutdown (Oza Pawandeep) - fix a Microsoft Hyper-V lockdep issue by polling instead of blocking in compose_msi_msg() (Stephen Hemminger) - fix a wireless LAN driver timeout by clearing DesignWare MSI interrupt status after it is handled, not before (Faiz Abbas) - fix DesignWare ATU enable checking (Jisheng Zhang) - reduce Layerscape dependencies on the bootloader by doing more initialization in the driver (Hou Zhiqiang) - improve Intel VMD performance allowing allocation of more IRQ vectors than present CPUs (Keith Busch) - improve endpoint framework support for initial DMA mask, different BAR sizes, configurable page sizes, MSI, test driver, etc (Kishon Vijay Abraham I, Stan Drozd) - rework CRS support to add periodic messages while we poll during enumeration and after Function-Level Reset and prepare for possible other uses of CRS (Sinan Kaya) - clean up Root Port AER handling by removing unnecessary code and moving error handler methods to struct pcie_port_service_driver (Christoph Hellwig) - clean up error handling paths in various drivers (Bjorn Andersson, Fabio Estevam, Gustavo A. R. Silva, Harunobu Kurokawa, Jeffy Chen, Lorenzo Pieralisi, Sergei Shtylyov) - clean up SR-IOV resource handling by disabling VF decoding before updating the corresponding resource structs (Gavin Shan) - clean up DesignWare-based drivers by unifying quirks to update Class Code and Interrupt Pin and related handling of write-protected registers (Hou Zhiqiang) - clean up by adding empty generic pcibios_align_resource() and pcibios_fixup_bus() and removing empty arch-specific implementations (Palmer Dabbelt) - request exclusive reset control for several drivers to allow cleanup elsewhere (Philipp Zabel) - constify various structures (Arvind Yadav, Bhumika Goyal) - convert from full_name() to %pOF (Rob Herring) - remove unused variables from iProc, HiSi, Altera, Keystone (Shawn Lin) * tag 'pci-v4.14-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci: (170 commits) PCI: xgene: Clean up whitespace PCI: xgene: Define XGENE_PCI_EXP_CAP and use generic PCI_EXP_RTCTL offset PCI: xgene: Fix platform_get_irq() error handling PCI: xilinx-nwl: Fix platform_get_irq() error handling PCI: rockchip: Fix platform_get_irq() error handling PCI: altera: Fix platform_get_irq() error handling PCI: spear13xx: Fix platform_get_irq() error handling PCI: artpec6: Fix platform_get_irq() error handling PCI: armada8k: Fix platform_get_irq() error handling PCI: dra7xx: Fix platform_get_irq() error handling PCI: exynos: Fix platform_get_irq() error handling PCI: iproc: Clean up whitespace PCI: iproc: Rename PCI_EXP_CAP to IPROC_PCI_EXP_CAP PCI: iproc: Add 500ms delay during device shutdown PCI: Fix typos and whitespace errors PCI: Remove unused "res" variable from pci_resource_io() PCI: Correct kernel-doc of pci_vpd_srdt_size(), pci_vpd_srdt_tag() PCI/AER: Reformat AER register definitions iommu/vt-d: Prevent VMD child devices from being remapping targets x86/PCI: Use is_vmd() rather than relying on the domain number ...
| * | | iommu/vt-d: Prevent VMD child devices from being remapping targetsJon Derrick2017-08-301-0/+7
| |/ / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | VMD child devices must use the VMD endpoint's ID as the requester. Because of this, there needs to be a way to link the parent VMD endpoint's IOMMU group and associated mappings to the VMD child devices such that attaching and detaching child devices modify the endpoint's mappings, while preventing early detaching on a singular device removal or unbinding. The reassignment of individual VMD child devices devices to VMs is outside the scope of VMD, but may be implemented in the future. For now it is best to prevent any such attempts. Prevent VMD child devices from returning an IOMMU, which prevents it from exposing an iommu_group sysfs directory and allowing subsequent binding by userspace-access drivers such as VFIO. Signed-off-by: Jon Derrick <jonathan.derrick@intel.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
* | | iommu: Fix wrong freeing of iommu_device->devJoerg Roedel2017-08-151-1/+3
|/ / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The struct iommu_device has a 'struct device' embedded into it, not as a pointer, but the whole struct. In the conversion of the iommu drivers to use struct iommu_device it was forgotten that the relase function for that struct device simply calls kfree() on the pointer. This frees memory that was never allocated and causes memory corruption. To fix this issue, use a pointer to struct device instead of embedding the whole struct. This needs some updates in the iommu sysfs code as well as the Intel VT-d and AMD IOMMU driver. Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Cc: stable@vger.kernel.org # >= v4.11 Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | Merge tag 'iommu-updates-v4.13' of ↵Linus Torvalds2017-07-121-21/+5
|\ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu Pull IOMMU updates from Joerg Roedel: "This update comes with: - Support for lockless operation in the ARM io-pgtable code. This is an important step to solve the scalability problems in the common dma-iommu code for ARM - Some Errata workarounds for ARM SMMU implemenations - Rewrite of the deferred IO/TLB flush code in the AMD IOMMU driver. The code suffered from very high flush rates, with the new implementation the flush rate is down to ~1% of what it was before - Support for amd_iommu=off when booting with kexec. The problem here was that the IOMMU driver bailed out early without disabling the iommu hardware, if it was enabled in the old kernel - The Rockchip IOMMU driver is now available on ARM64 - Align the return value of the iommu_ops->device_group call-backs to not miss error values - Preempt-disable optimizations in the Intel VT-d and common IOVA code to help Linux-RT - Various other small cleanups and fixes" * tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits) iommu/vt-d: Constify intel_dma_ops iommu: Warn once when device_group callback returns NULL iommu/omap: Return ERR_PTR in device_group call-back iommu: Return ERR_PTR() values from device_group call-backs iommu/s390: Use iommu_group_get_for_dev() in s390_iommu_add_device() iommu/vt-d: Don't disable preemption while accessing deferred_flush() iommu/iova: Don't disable preempt around this_cpu_ptr() iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126 iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701) iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74 ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE iommu/arm-smmu-v3: Remove io-pgtable spinlock iommu/arm-smmu: Remove io-pgtable spinlock iommu/io-pgtable-arm-v7s: Support lockless operation iommu/io-pgtable-arm: Support lockless operation iommu/io-pgtable: Introduce explicit coherency iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap ...
| | \
| | \
| *-. \ Merge branches 'iommu/fixes', 'arm/rockchip', 'arm/renesas', 'arm/smmu', ↵Joerg Roedel2017-06-281-21/+5
| |\ \ \ | | | |/ | | | | | | | | 'arm/core', 'x86/vt-d', 'x86/amd', 's390' and 'core' into next
| | | * iommu/vt-d: Constify intel_dma_opsArvind Yadav2017-06-281-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Most dma_map_ops structures are never modified. Constify these structures such that these can be write-protected. Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | * iommu/vt-d: Don't disable preemption while accessing deferred_flush()Sebastian Andrzej Siewior2017-06-281-6/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | get_cpu() disables preemption and returns the current CPU number. The CPU number is only used once while retrieving the address of the local's CPU deferred_flush pointer. We can instead use raw_cpu_ptr() while we remain preemptible. The worst thing that can happen is that flush_unmaps_timeout() is invoked multiple times: once by taskA after seeing HIGH_WATER_MARK and then preempted to another CPU and then by taskB which saw HIGH_WATER_MARK on the same CPU as taskA. It is also likely that ->size got from HIGH_WATER_MARK to 0 right after its read because another CPU invoked flush_unmaps_timeout() for this CPU. The access to flush_data is protected by a spinlock so even if we get migrated to another CPU or preempted - the data structure is protected. While at it, I marked deferred_flush static since I can't find a reference to it outside of this file. Cc: David Woodhouse <dwmw2@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Cc: iommu@lists.linux-foundation.org Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | * iommu/vt-d: Unwrap __get_valid_domain_for_dev()Peter Xu2017-05-301-14/+2
| | |/ | | | | | | | | | | | | | | | | | | | | | We do find_domain() in __get_valid_domain_for_dev(), while we do the same thing in get_valid_domain_for_dev(). No need to do it twice. Signed-off-by: Peter Xu <peterx@redhat.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
* | | Merge tag 'dma-mapping-4.13' of git://git.infradead.org/users/hch/dma-mappingLinus Torvalds2017-07-061-0/+3
|\ \ \ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Pull dma-mapping infrastructure from Christoph Hellwig: "This is the first pull request for the new dma-mapping subsystem In this new subsystem we'll try to properly maintain all the generic code related to dma-mapping, and will further consolidate arch code into common helpers. This pull request contains: - removal of the DMA_ERROR_CODE macro, replacing it with calls to ->mapping_error so that the dma_map_ops instances are more self contained and can be shared across architectures (me) - removal of the ->set_dma_mask method, which duplicates the ->dma_capable one in terms of functionality, but requires more duplicate code. - various updates for the coherent dma pool and related arm code (Vladimir) - various smaller cleanups (me)" * tag 'dma-mapping-4.13' of git://git.infradead.org/users/hch/dma-mapping: (56 commits) ARM: dma-mapping: Remove traces of NOMMU code ARM: NOMMU: Set ARM_DMA_MEM_BUFFERABLE for M-class cpus ARM: NOMMU: Introduce dma operations for noMMU drivers: dma-mapping: allow dma_common_mmap() for NOMMU drivers: dma-coherent: Introduce default DMA pool drivers: dma-coherent: Account dma_pfn_offset when used with device tree dma: Take into account dma_pfn_offset dma-mapping: replace dmam_alloc_noncoherent with dmam_alloc_attrs dma-mapping: remove dmam_free_noncoherent crypto: qat - avoid an uninitialized variable warning au1100fb: remove a bogus dma_free_nonconsistent call MAINTAINERS: add entry for dma mapping helpers powerpc: merge __dma_set_mask into dma_set_mask dma-mapping: remove the set_dma_mask method powerpc/cell: use the dma_supported method for ops switching powerpc/cell: clean up fixed mapping dma_ops initialization tile: remove dma_supported and mapping_error methods xen-swiotlb: remove xen_swiotlb_set_dma_mask arm: implement ->dma_supported instead of ->set_dma_mask mips/loongson64: implement ->dma_supported instead of ->set_dma_mask ...
| * | | x86: remove arch specific dma_supported implementationChristoph Hellwig2017-06-281-0/+3
| |/ / | | | | | | | | | | | | | | | | | | | | | | | | And instead wire it up as method for all the dma_map_ops instances. Note that this also means the arch specific check will be fully instead of partially applied in the AMD iommu driver. Signed-off-by: Christoph Hellwig <hch@lst.de>
* | | iommu/vt-d: Adjust system_state checksThomas Gleixner2017-05-231-2/+2
|/ / | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | To enable smp_processor_id() and might_sleep() debug checks earlier, it's required to add system states between SYSTEM_BOOTING and SYSTEM_RUNNING. Adjust the system_state checks in dmar_parse_one_atsr() and dmar_iommu_notify_scope_dev() to handle the extra states. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Joerg Roedel <joro@8bytes.org> Cc: David Woodhouse <dwmw2@infradead.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20170516184735.712365947@linutronix.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
* | iommu/vt-d: Flush the IOTLB to get rid of the initial kdump mappingsKarimAllah Ahmed2017-05-171-1/+4
|/ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Ever since commit 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") the kdump kernel copies the IOMMU context tables from the previous kernel. Each device mappings will be destroyed once the driver for the respective device takes over. This unfortunately breaks the workflow of mapping and unmapping a new context to the IOMMU. The mapping function assumes that either: 1) Unmapping did the proper IOMMU flushing and it only ever flush if the IOMMU unit supports caching invalid entries. 2) The system just booted and the initialization code took care of flushing all IOMMU caches. This assumption is not true for the kdump kernel since the context tables have been copied from the previous kernel and translations could have been cached ever since. So make sure to flush the IOTLB as well when we destroy these old copied mappings. Cc: Joerg Roedel <joro@8bytes.org> Cc: David Woodhouse <dwmw2@infradead.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Anthony Liguori <aliguori@amazon.com> Signed-off-by: KarimAllah Ahmed <karahmed@amazon.de> Acked-by: David Woodhouse <dwmw@amazon.co.uk> Cc: stable@vger.kernel.org v4.2+ Fixes: 091d42e43d ("iommu/vt-d: Copy translation tables from old kernel") Signed-off-by: Joerg Roedel <jroedel@suse.de>
*-. Merge branches 'arm/exynos', 'arm/omap', 'arm/rockchip', 'arm/mediatek', ↵Joerg Roedel2017-05-041-3/+37
|\ \ | | | | | | | | | 'arm/smmu', 'arm/core', 'x86/vt-d', 'x86/amd' and 'core' into next
| | * x86, iommu/vt-d: Add an option to disable Intel IOMMU force onShaohua Li2017-04-261-0/+18
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | IOMMU harms performance signficantly when we run very fast networking workloads. It's 40GB networking doing XDP test. Software overhead is almost unaware, but it's the IOTLB miss (based on our analysis) which kills the performance. We observed the same performance issue even with software passthrough (identity mapping), only the hardware passthrough survives. The pps with iommu (with software passthrough) is only about ~30% of that without it. This is a limitation in hardware based on our observation, so we'd like to disable the IOMMU force on, but we do want to use TBOOT and we can sacrifice the DMA security bought by IOMMU. I must admit I know nothing about TBOOT, but TBOOT guys (cc-ed) think not eabling IOMMU is totally ok. So introduce a new boot option to disable the force on. It's kind of silly we need to run into intel_iommu_init even without force on, but we need to disable TBOOT PMR registers. For system without the boot option, nothing is changed. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | * iommu/vt-d: Make sure IOMMUs are off when intel_iommu=offJoerg Roedel2017-03-291-1/+17
| |/ |/| | | | | | | | | | | | | | | | | | | | | When booting into a kexec kernel with intel_iommu=off, and the previous kernel had intel_iommu=on, the IOMMU hardware is still enabled and gets not disabled by the new kernel. This causes the boot to fail because DMA is blocked by the hardware. Disable the IOMMUs when we find it enabled in the kexec kernel and boot with intel_iommu=off. Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu: Disambiguate MSI region typesRobin Murphy2017-03-221-1/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The introduction of reserved regions has left a couple of rough edges which we could do with sorting out sooner rather than later. Since we are not yet addressing the potential dynamic aspect of software-managed reservations and presenting them at arbitrary fixed addresses, it is incongruous that we end up displaying hardware vs. software-managed MSI regions to userspace differently, especially since ARM-based systems may actually require one or the other, or even potentially both at once, (which iommu-dma currently has no hope of dealing with at all). Let's resolve the former user-visible inconsistency ASAP before the ABI has been baked into a kernel release, in a way that also lays the groundwork for the latter shortcoming to be addressed by follow-up patches. For clarity, rename the software-managed type to IOMMU_RESV_SW_MSI, use IOMMU_RESV_MSI to describe the hardware type, and document everything a little bit. Since the x86 MSI remapping hardware falls squarely under this meaning of IOMMU_RESV_MSI, apply that type to their regions as well, so that we tell the same story to userspace across all platforms. Secondly, as the various region types require quite different handling, and it really makes little sense to ever try combining them, convert the bitfield-esque #defines to a plain enum in the process before anyone gets the wrong impression. Fixes: d30ddcaa7b02 ("iommu: Add a new type field in iommu_resv_region") Reviewed-by: Eric Auger <eric.auger@redhat.com> CC: Alex Williamson <alex.williamson@redhat.com> CC: David Woodhouse <dwmw2@infradead.org> CC: kvm@vger.kernel.org Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de>
| * iommu/vt-d: Fix NULL pointer dereference in device_to_iommuKoos Vriezen2017-03-221-1/+1
|/ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The function device_to_iommu() in the Intel VT-d driver lacks a NULL-ptr check, resulting in this oops at boot on some platforms: BUG: unable to handle kernel NULL pointer dereference at 00000000000007ab IP: [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0 PGD 0 [...] Call Trace: ? find_or_alloc_domain.constprop.29+0x1a/0x300 ? dw_dma_probe+0x561/0x580 [dw_dmac_core] ? __get_valid_domain_for_dev+0x39/0x120 ? __intel_map_single+0x138/0x180 ? intel_alloc_coherent+0xb6/0x120 ? sst_hsw_dsp_init+0x173/0x420 [snd_soc_sst_haswell_pcm] ? mutex_lock+0x9/0x30 ? kernfs_add_one+0xdb/0x130 ? devres_add+0x19/0x60 ? hsw_pcm_dev_probe+0x46/0xd0 [snd_soc_sst_haswell_pcm] ? platform_drv_probe+0x30/0x90 ? driver_probe_device+0x1ed/0x2b0 ? __driver_attach+0x8f/0xa0 ? driver_probe_device+0x2b0/0x2b0 ? bus_for_each_dev+0x55/0x90 ? bus_add_driver+0x110/0x210 ? 0xffffffffa11ea000 ? driver_register+0x52/0xc0 ? 0xffffffffa11ea000 ? do_one_initcall+0x32/0x130 ? free_vmap_area_noflush+0x37/0x70 ? kmem_cache_alloc+0x88/0xd0 ? do_init_module+0x51/0x1c4 ? load_module+0x1ee9/0x2430 ? show_taint+0x20/0x20 ? kernel_read_file+0xfd/0x190 ? SyS_finit_module+0xa3/0xb0 ? do_syscall_64+0x4a/0xb0 ? entry_SYSCALL64_slow_path+0x25/0x25 Code: 78 ff ff ff 4d 85 c0 74 ee 49 8b 5a 10 0f b6 9b e0 00 00 00 41 38 98 e0 00 00 00 77 da 0f b6 eb 49 39 a8 88 00 00 00 72 ce eb 8f <41> f6 82 ab 07 00 00 04 0f 85 76 ff ff ff 0f b6 4d 08 88 0e 49 RIP [<ffffffff8132234a>] device_to_iommu+0x11a/0x1a0 RSP <ffffc90001457a78> CR2: 00000000000007ab ---[ end trace 16f974b6d58d0aad ]--- Add the missing pointer check. Fixes: 1c387188c60f53b338c20eee32db055dfe022a9b ("iommu/vt-d: Fix IOMMU lookup for SR-IOV Virtual Functions") Signed-off-by: Koos Vriezen <koos.vriezen@gmail.com> Cc: stable@vger.kernel.org # 4.8.15+ Signed-off-by: Joerg Roedel <jroedel@suse.de>
* iommu/vt-d: Fix crash when accessing VT-d sysfs entriesJoerg Roedel2017-02-281-6/+11
| | | | | | | | | | The link between the iommu sysfs-device and the struct intel_iommu is no longer stored as driver-data. Update the code to use the new access method. Reported-by: Dave Jones <davej@codemonkey.org.uk> Fixes: 39ab9555c241 ('iommu: Add sysfs bindings for struct iommu_device') Signed-off-by: Joerg Roedel <jroedel@suse.de>
* mm: wire up GFP flag passing in dma_alloc_from_contiguousLucas Stach2017-02-241-1/+1
| | | | | | | | | | | | | | | | | | | | | The callers of the DMA alloc functions already provide the proper context GFP flags. Make sure to pass them through to the CMA allocator, to make the CMA compaction context aware. Link: http://lkml.kernel.org/r/20170127172328.18574-3-l.stach@pengutronix.de Signed-off-by: Lucas Stach <l.stach@pengutronix.de> Acked-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Michal Hocko <mhocko@suse.com> Cc: Radim Krcmar <rkrcmar@redhat.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Alexander Graf <agraf@suse.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
*-----. Merge branches 'iommu/fixes', 'arm/exynos', 'arm/renesas', 'arm/smmu', ↵Joerg Roedel2017-02-101-28/+88
|\ \ \ \ | | | | | | | | | | | | | | | 'arm/mediatek', 'arm/core', 'x86/vt-d' and 'core' into next
| | | | * iommu: Make iommu_device_link/unlink take a struct iommu_deviceJoerg Roedel2017-02-101-2/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This makes the interface more consistent with iommu_device_sysfs_add/remove. Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | | * iommu: Add sysfs bindings for struct iommu_deviceJoerg Roedel2017-02-101-6/+9
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | There is currently support for iommu sysfs bindings, but those need to be implemented in the IOMMU drivers. Add a more generic version of this by adding a struct device to struct iommu_device and use that for the sysfs bindings. Also convert the AMD and Intel IOMMU driver to make use of it. Signed-off-by: Joerg Roedel <jroedel@suse.de>
| | | | * iommu: Introduce new 'struct iommu_device'Joerg Roedel2017-02-101-2/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This struct represents one hardware iommu in the iommu core code. For now it only has the iommu-ops associated with it, but that will be extended soon. The register/unregister interface is also added, as well as making use of it in the Intel and AMD IOMMU drivers. Signed-off-by: Joerg Roedel <jroedel@suse.de>
OpenPOWER on IntegriCloud