From d14053b3c714178525f22660e6aaf41263d00056 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 09:28:06 +0100 Subject: iommu/vt-d: Fix ATSR handling for Root-Complex integrated endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VT-d specification says that "Software must enable ATS on endpoint devices behind a Root Port only if the Root Port is reported as supporting ATS transactions." We walk up the tree to find a Root Port, but for integrated devices we don't find one — we get to the host bridge. In that case we *should* allow ATS. Currently we don't, which means that we are incorrectly failing to use ATS for the integrated graphics. Fix that. We should never break out of this loop "naturally" with bus==NULL, since we'll always find bridge==NULL in that case (and now return 1). So remove the check for (!bridge) after the loop, since it can never happen. If it did, it would be worthy of a BUG_ON(!bridge). But since it'll oops anyway in that case, that'll do just as well. Cc: stable@vger.kernel.org Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index df53855..17b4744 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4193,14 +4193,17 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) dev = pci_physfn(dev); for (bus = dev->bus; bus; bus = bus->parent) { bridge = bus->self; - if (!bridge || !pci_is_pcie(bridge) || + /* If it's an integrated device, allow ATS */ + if (!bridge) + return 1; + /* Connected via non-PCIe: no ATS */ + if (!pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; + /* If we found the root port, look it up in the ATSR */ if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; } - if (!bridge) - return 0; rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { -- cgit v1.1 From ae853ddb9ad5e7c01cad3fbf016040acd961f407 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 9 Sep 2015 11:58:59 +0100 Subject: iommu/vt-d: Introduce intel_iommu=pasid28, and pasid_enabled() macro As long as we use an identity mapping to work around the worst of the hardware bugs which caused us to defeature it and change the definition of the capability bit, we *can* use PASID support on the devices which advertised it in bit 28 of the Extended Capability Register. Allow people to do so with 'intel_iommu=pasid28' on the command line. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 17b4744..6a01735 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -497,13 +497,21 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int intel_iommu_ecs = 1; +static int intel_iommu_pasid28; +static int iommu_identity_mapping; + +#define IDENTMAP_ALL 1 +#define IDENTMAP_GFX 2 +#define IDENTMAP_AZALIA 4 /* We only actually use ECS when PASID support (on the new bit 40) * is also advertised. Some early implementations — the ones with * PASID support on bit 28 — have issues even when we *only* use * extended root/context tables. */ +#define pasid_enabled(iommu) (ecap_pasid(iommu->ecap) || \ + (intel_iommu_pasid28 && ecap_broken_pasid(iommu->ecap))) #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - ecap_pasid(iommu->ecap)) + pasid_enabled(iommu)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -566,6 +574,11 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable extended context table support\n"); intel_iommu_ecs = 0; + } else if (!strncmp(str, "pasid28", 7)) { + printk(KERN_INFO + "Intel-IOMMU: enable pre-production PASID support\n"); + intel_iommu_pasid28 = 1; + iommu_identity_mapping |= IDENTMAP_GFX; } str += strcspn(str, ","); @@ -2403,11 +2416,6 @@ found_domain: return domain; } -static int iommu_identity_mapping; -#define IDENTMAP_ALL 1 -#define IDENTMAP_GFX 2 -#define IDENTMAP_AZALIA 4 - static int iommu_domain_identity_map(struct dmar_domain *domain, unsigned long long start, unsigned long long end) -- cgit v1.1 From 8a94ade4ce6df22006b96c5c9a8d6d12fce67585 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 24 Mar 2015 14:54:56 +0000 Subject: iommu/vt-d: Add initial support for PASID tables Add CONFIG_INTEL_IOMMU_SVM, and allocate PASID tables on supported hardware. Signed-off-by: David Woodhouse --- drivers/iommu/Kconfig | 8 ++++++ drivers/iommu/Makefile | 1 + drivers/iommu/intel-iommu.c | 14 ++++++++++ drivers/iommu/intel-svm.c | 65 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+) create mode 100644 drivers/iommu/intel-svm.c (limited to 'drivers/iommu') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9da766..e3b2c2e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -135,6 +135,14 @@ config INTEL_IOMMU and include PCI device scope covered by these DMA remapping devices. +config INTEL_IOMMU_SVM + bool "Support for Shared Virtual Memory with Intel IOMMU" + depends on INTEL_IOMMU && X86 + help + Shared Virtual Memory (SVM) provides a facility for devices + to access DMA resources through process address space by + means of a Process Address Space ID (PASID). + config INTEL_IOMMU_DEFAULT_ON def_bool y prompt "Enable Intel DMA Remapping Devices by default" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index c6dcc51..dc6f511 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o +obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6a01735..ab99fcf 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1680,6 +1680,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu) /* free context mapping */ free_context_table(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_free_pasid_tables(iommu); +#endif } static struct dmar_domain *alloc_domain(int flags) @@ -3107,6 +3112,10 @@ static int __init init_dmars(void) if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif } if (iommu_pass_through) @@ -4122,6 +4131,11 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) if (ret) goto out; +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu)) + intel_svm_alloc_pasid_tables(iommu); +#endif + if (dmaru->ignored) { /* * we always have to disable PMRs or DMA may fail on this device diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c new file mode 100644 index 0000000..5b42a95 --- /dev/null +++ b/drivers/iommu/intel-svm.c @@ -0,0 +1,65 @@ +/* + * Copyright © 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * Authors: David Woodhouse + */ + +#include + +int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) +{ + struct page *pages; + int order; + + order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; + if (order < 0) + order = 0; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!pages) { + pr_warn("IOMMU: %s: Failed to allocate PASID table\n", + iommu->name); + return -ENOMEM; + } + iommu->pasid_table = page_address(pages); + pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order); + + if (ecap_dis(iommu->ecap)) { + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); + if (pages) + iommu->pasid_state_table = page_address(pages); + else + pr_warn("IOMMU: %s: Failed to allocate PASID state table\n", + iommu->name); + } + + return 0; +} + +int intel_svm_free_pasid_tables(struct intel_iommu *iommu) +{ + int order; + + order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT; + if (order < 0) + order = 0; + + if (iommu->pasid_table) { + free_pages((unsigned long)iommu->pasid_table, order); + iommu->pasid_table = NULL; + } + if (iommu->pasid_state_table) { + free_pages((unsigned long)iommu->pasid_state_table, order); + iommu->pasid_state_table = NULL; + } + return 0; +} -- cgit v1.1 From b16d0cb9e2fc5c311948c660dd6f4b59a9ccd333 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 12 Oct 2015 14:17:37 +0100 Subject: iommu/vt-d: Always enable PASID/PRI PCI capabilities before ATS The behaviour if you enable PASID support after ATS is undefined. So we have to enable it first, even if we don't know whether we'll need it. This is safe enough; unless we set up a context that permits it, the device can't actually *do* anything with it. Also shift the feature detction to dmar_insert_one_dev_info() as it only needs to happen once. Signed-off-by: David Woodhouse --- drivers/iommu/Kconfig | 1 + drivers/iommu/intel-iommu.c | 110 ++++++++++++++++++++++++++++++-------------- 2 files changed, 76 insertions(+), 35 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e3b2c2e..8d23f5e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -138,6 +138,7 @@ config INTEL_IOMMU config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on INTEL_IOMMU && X86 + select PCI_PASID help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ab99fcf..9995ea8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -418,10 +418,13 @@ struct device_domain_info { struct list_head global; /* link to global list */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ - struct { - u8 enabled:1; - u8 qdep; - } ats; /* ATS state */ + u8 pasid_supported:3; + u8 pasid_enabled:1; + u8 pri_supported:1; + u8 pri_enabled:1; + u8 ats_supported:1; + u8 ats_enabled:1; + u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ @@ -1420,37 +1423,22 @@ static struct device_domain_info * iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, u8 bus, u8 devfn) { - bool found = false; struct device_domain_info *info; - struct pci_dev *pdev; assert_spin_locked(&device_domain_lock); - if (!ecap_dev_iotlb_support(iommu->ecap)) - return NULL; - if (!iommu->qi) return NULL; list_for_each_entry(info, &domain->devices, link) if (info->iommu == iommu && info->bus == bus && info->devfn == devfn) { - found = true; + if (info->ats_supported && info->dev) + return info; break; } - if (!found || !info->dev || !dev_is_pci(info->dev)) - return NULL; - - pdev = to_pci_dev(info->dev); - - if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) - return NULL; - - if (!dmar_find_matched_atsr_unit(pdev)) - return NULL; - - return info; + return NULL; } static void iommu_enable_dev_iotlb(struct device_domain_info *info) @@ -1461,20 +1449,48 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) return; pdev = to_pci_dev(info->dev); - if (pci_enable_ats(pdev, VTD_PAGE_SHIFT)) - return; - info->ats.enabled = 1; - info->ats.qdep = pci_ats_queue_depth(pdev); +#ifdef CONFIG_INTEL_IOMMU_SVM + /* The PCIe spec, in its wisdom, declares that the behaviour of + the device if you enable PASID support after ATS support is + undefined. So always enable PASID support on devices which + have it, even if we can't yet know if we're ever going to + use it. */ + if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) + info->pasid_enabled = 1; + + if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + info->pri_enabled = 1; +#endif + if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { + info->ats_enabled = 1; + info->ats_qdep = pci_ats_queue_depth(pdev); + } } static void iommu_disable_dev_iotlb(struct device_domain_info *info) { - if (!info->ats.enabled) + struct pci_dev *pdev; + + if (dev_is_pci(info->dev)) return; - pci_disable_ats(to_pci_dev(info->dev)); - info->ats.enabled = 0; + pdev = to_pci_dev(info->dev); + + if (info->ats_enabled) { + pci_disable_ats(pdev); + info->ats_enabled = 0; + } +#ifdef CONFIG_INTEL_IOMMU_SVM + if (info->pri_enabled) { + pci_disable_pri(pdev); + info->pri_enabled = 0; + } + if (info->pasid_enabled) { + pci_disable_pasid(pdev); + info->pasid_enabled = 0; + } +#endif } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1486,11 +1502,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) { - if (!info->ats.enabled) + if (!info->ats_enabled) continue; sid = info->bus << 8 | info->devfn; - qdep = info->ats.qdep; + qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -1952,8 +1968,10 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); - translation = info ? CONTEXT_TT_DEV_IOTLB : - CONTEXT_TT_MULTI_LEVEL; + if (info && info->ats_supported) + translation = CONTEXT_TT_DEV_IOTLB; + else + translation = CONTEXT_TT_MULTI_LEVEL; context_set_address_root(context, virt_to_phys(pgd)); context_set_address_width(context, iommu->agaw); @@ -2291,12 +2309,34 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, info->bus = bus; info->devfn = devfn; - info->ats.enabled = 0; - info->ats.qdep = 0; + info->ats_supported = info->pasid_supported = info->pri_supported = 0; + info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0; + info->ats_qdep = 0; info->dev = dev; info->domain = domain; info->iommu = iommu; + if (dev && dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(info->dev); + + if (ecap_dev_iotlb_support(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && + dmar_find_matched_atsr_unit(pdev)) + info->ats_supported = 1; + + if (ecs_enabled(iommu)) { + if (pasid_enabled(iommu)) { + int features = pci_pasid_features(pdev); + if (features >= 0) + info->pasid_supported = features | 1; + } + + if (info->ats_supported && ecap_prs(iommu->ecap) && + pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI)) + info->pri_supported = 1; + } + } + spin_lock_irqsave(&device_domain_lock, flags); if (dev) found = find_domain(dev); -- cgit v1.1 From 2f26e0a9c9860db290d63e9d85c2c8c09813677f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 9 Sep 2015 11:40:47 +0100 Subject: iommu/vt-d: Add basic SVM PASID support This provides basic PASID support for endpoint devices, tested with a version of the i915 driver. Signed-off-by: David Woodhouse --- drivers/iommu/Kconfig | 1 + drivers/iommu/intel-iommu.c | 104 ++++++++++++++++ drivers/iommu/intel-svm.c | 291 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 396 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 8d23f5e..be18b21 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on INTEL_IOMMU && X86 select PCI_PASID + select MMU_NOTIFIER help Shared Virtual Memory (SVM) provides a facility for devices to access DMA resources through process address space by diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 9995ea8..60b66d2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev) iommu_device_unlink(iommu->iommu_dev, dev); } +#ifdef CONFIG_INTEL_IOMMU_SVM +int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev) +{ + struct device_domain_info *info; + struct context_entry *context; + struct dmar_domain *domain; + unsigned long flags; + u64 ctx_lo; + int ret; + + domain = get_valid_domain_for_dev(sdev->dev); + if (!domain) + return -EINVAL; + + spin_lock_irqsave(&device_domain_lock, flags); + spin_lock(&iommu->lock); + + ret = -EINVAL; + info = sdev->dev->archdata.iommu; + if (!info || !info->pasid_supported) + goto out; + + context = iommu_context_addr(iommu, info->bus, info->devfn, 0); + if (WARN_ON(!context)) + goto out; + + ctx_lo = context[0].lo; + + sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->sid = PCI_DEVID(info->bus, info->devfn); + + if (!(ctx_lo & CONTEXT_PASIDE)) { + context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table); + context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap); + wmb(); + /* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both + * extended to permit requests-with-PASID if the PASIDE bit + * is set. which makes sense. For CONTEXT_TT_PASS_THROUGH, + * however, the PASIDE bit is ignored and requests-with-PASID + * are unconditionally blocked. Which makes less sense. + * So convert from CONTEXT_TT_PASS_THROUGH to one of the new + * "guest mode" translation types depending on whether ATS + * is available or not. Annoyingly, we can't use the new + * modes *unless* PASIDE is set. */ + if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) { + ctx_lo &= ~CONTEXT_TT_MASK; + if (info->ats_supported) + ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2; + else + ctx_lo |= CONTEXT_TT_PT_PASID << 2; + } + ctx_lo |= CONTEXT_PASIDE; + context[0].lo = ctx_lo; + wmb(); + iommu->flush.flush_context(iommu, sdev->did, sdev->sid, + DMA_CCMD_MASK_NOBIT, + DMA_CCMD_DEVICE_INVL); + } + + /* Enable PASID support in the device, if it wasn't already */ + if (!info->pasid_enabled) + iommu_enable_dev_iotlb(info); + + if (info->ats_enabled) { + sdev->dev_iotlb = 1; + sdev->qdep = info->ats_qdep; + if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) + sdev->qdep = 0; + } + ret = 0; + + out: + spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&device_domain_lock, flags); + + return ret; +} + +struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) +{ + struct intel_iommu *iommu; + u8 bus, devfn; + + if (iommu_dummy(dev)) { + dev_warn(dev, + "No IOMMU translation for device; cannot enable SVM\n"); + return NULL; + } + + iommu = device_to_iommu(dev, &bus, &devfn); + if ((!iommu)) { + dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n"); + return NULL; + } + + if (!iommu->pasid_table) { + dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); + return NULL; + } + + return iommu; +} +#endif /* CONFIG_INTEL_IOMMU_SVM */ + static const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 5b42a95..82d53e1 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -14,6 +14,17 @@ */ #include +#include +#include +#include +#include +#include +#include +#include + +struct pasid_entry { + u64 val; +}; int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) { @@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) iommu->name); } + idr_init(&iommu->pasid_idr); + return 0; } @@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu) free_pages((unsigned long)iommu->pasid_state_table, order); iommu->pasid_state_table = NULL; } + idr_destroy(&iommu->pasid_idr); return 0; } + +static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, + unsigned long address, int pages, int ih) +{ + struct qi_desc desc; + int mask = ilog2(__roundup_pow_of_two(pages)); + + if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) || + mask > cap_max_amask_val(svm->iommu->cap)) { + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + desc.high = 0; + } else { + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; + desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) | + QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); + } + + qi_submit_sync(&desc, svm->iommu); + + if (sdev->dev_iotlb) { + desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | + QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; + if (mask) { + unsigned long adr, delta; + + /* Least significant zero bits in the address indicate the + * range of the request. So mask them out according to the + * size. */ + adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1); + + /* Now ensure that we round down further if the original + * request was not aligned w.r.t. its size */ + delta = address - adr; + if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask))) + adr &= ~(1 << (VTD_PAGE_SHIFT + mask)); + desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE; + } else { + desc.high = QI_DEV_EIOTLB_ADDR(address); + } + qi_submit_sync(&desc, svm->iommu); + } +} + +static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, + int pages, int ih) +{ + struct intel_svm_dev *sdev; + + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) + intel_flush_svm_range_dev(svm, sdev, address, pages, ih); + rcu_read_unlock(); +} + +static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address, pte_t pte) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, address, 1, 1); +} + +static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, address, 1, 1); +} + +/* Pages have been freed at this point */ +static void intel_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + intel_flush_svm_range(svm, start, + (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0); +} + + +static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev) +{ + struct qi_desc desc; + + desc.high = 0; + desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid); + + qi_submit_sync(&desc, svm->iommu); +} + +static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); + + svm->iommu->pasid_table[svm->pasid].val = 0; + + /* There's no need to do any flush because we can't get here if there + * are any devices left anyway. */ + WARN_ON(!list_empty(&svm->devs)); +} + +static const struct mmu_notifier_ops intel_mmuops = { + .release = intel_mm_release, + .change_pte = intel_change_pte, + .invalidate_page = intel_invalidate_page, + .invalidate_range = intel_invalidate_range, +}; + +static DEFINE_MUTEX(pasid_mutex); + +int intel_svm_bind_mm(struct device *dev, int *pasid) +{ + struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); + struct intel_svm_dev *sdev; + struct intel_svm *svm = NULL; + int pasid_max; + int ret; + + BUG_ON(pasid && !current->mm); + + if (WARN_ON(!iommu)) + return -EINVAL; + + if (dev_is_pci(dev)) { + pasid_max = pci_max_pasids(to_pci_dev(dev)); + if (pasid_max < 0) + return -EINVAL; + } else + pasid_max = 1 << 20; + + mutex_lock(&pasid_mutex); + if (pasid) { + int i; + + idr_for_each_entry(&iommu->pasid_idr, svm, i) { + if (svm->mm != current->mm) + continue; + + if (svm->pasid >= pasid_max) { + dev_warn(dev, + "Limited PASID width. Cannot use existing PASID %d\n", + svm->pasid); + ret = -ENOSPC; + goto out; + } + + list_for_each_entry(sdev, &svm->devs, list) { + if (dev == sdev->dev) { + sdev->users++; + goto success; + } + } + + break; + } + } + + sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + if (!sdev) { + ret = -ENOMEM; + goto out; + } + sdev->dev = dev; + + ret = intel_iommu_enable_pasid(iommu, sdev); + if (ret || !pasid) { + /* If they don't actually want to assign a PASID, this is + * just an enabling check/preparation. */ + kfree(sdev); + goto out; + } + /* Finish the setup now we know we're keeping it */ + sdev->users = 1; + init_rcu_head(&sdev->rcu); + + if (!svm) { + svm = kzalloc(sizeof(*svm), GFP_KERNEL); + if (!svm) { + ret = -ENOMEM; + kfree(sdev); + goto out; + } + svm->iommu = iommu; + + if (pasid_max > 2 << ecap_pss(iommu->ecap)) + pasid_max = 2 << ecap_pss(iommu->ecap); + + ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1, + GFP_KERNEL); + if (ret < 0) { + kfree(svm); + goto out; + } + svm->pasid = ret; + svm->notifier.ops = &intel_mmuops; + svm->mm = get_task_mm(current); + INIT_LIST_HEAD_RCU(&svm->devs); + ret = -ENOMEM; + if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) { + idr_remove(&svm->iommu->pasid_idr, svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } + iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1; + wmb(); + } + list_add_rcu(&sdev->list, &svm->devs); + + success: + *pasid = svm->pasid; + ret = 0; + out: + mutex_unlock(&pasid_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(intel_svm_bind_mm); + +int intel_svm_unbind_mm(struct device *dev, int pasid) +{ + struct intel_svm_dev *sdev; + struct intel_iommu *iommu; + struct intel_svm *svm; + int ret = -EINVAL; + + mutex_lock(&pasid_mutex); + iommu = intel_svm_device_to_iommu(dev); + if (!iommu || !iommu->pasid_table) + goto out; + + svm = idr_find(&iommu->pasid_idr, pasid); + if (!svm) + goto out; + + list_for_each_entry(sdev, &svm->devs, list) { + if (dev == sdev->dev) { + ret = 0; + sdev->users--; + if (!sdev->users) { + list_del_rcu(&sdev->list); + /* Flush the PASID cache and IOTLB for this device. + * Note that we do depend on the hardware *not* using + * the PASID any more. Just as we depend on other + * devices never using PASIDs that they have no right + * to use. We have a *shared* PASID table, because it's + * large and has to be physically contiguous. So it's + * hard to be as defensive as we might like. */ + intel_flush_pasid_dev(svm, sdev); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + mmu_notifier_unregister(&svm->notifier, svm->mm); + + idr_remove(&svm->iommu->pasid_idr, svm->pasid); + mmput(svm->mm); + /* We mandate that no page faults may be outstanding + * for the PASID when intel_svm_unbind_mm() is called. + * If that is not obeyed, subtle errors will happen. + * Let's make them less subtle... */ + memset(svm, 0x6b, sizeof(*svm)); + kfree(svm); + } + } + break; + } + } + out: + mutex_unlock(&pasid_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); -- cgit v1.1 From 907fea3491d52063bb37b1f1ce0cf8a4ae70944c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 13 Oct 2015 14:11:13 +0100 Subject: iommu/vt-d: Implement deferred invalidate for SVM Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 2 ++ drivers/iommu/intel-svm.c | 9 +++++++++ 2 files changed, 11 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 60b66d2..58ecd52 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4981,6 +4981,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo |= CONTEXT_TT_PT_PASID << 2; } ctx_lo |= CONTEXT_PASIDE; + if (iommu->pasid_state_table) + ctx_lo |= CONTEXT_DINVE; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 82d53e1..a64720b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -26,6 +26,10 @@ struct pasid_entry { u64 val; }; +struct pasid_state_entry { + u64 val; +}; + int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) { struct page *pages; @@ -127,6 +131,11 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, { struct intel_svm_dev *sdev; + /* Try deferred invalidate if available */ + if (svm->iommu->pasid_state_table && + !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63)) + return; + rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) intel_flush_svm_range_dev(svm, sdev, address, pages, ih); -- cgit v1.1 From 1208225cf48fa3b170b6dfe7369f15c295260755 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2015 15:37:03 +0100 Subject: iommu/vt-d: Generalise DMAR MSI setup to allow for page request events Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 8757f8d..80e3c17 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1086,6 +1086,11 @@ static void free_iommu(struct intel_iommu *iommu) iommu_device_destroy(iommu->iommu_dev); if (iommu->irq) { + if (iommu->pr_irq) { + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + } free_irq(iommu->irq, iommu); dmar_free_hwirq(iommu->irq); iommu->irq = 0; @@ -1493,53 +1498,68 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type) } } + +static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) +{ + if (iommu->irq == irq) + return DMAR_FECTL_REG; + else if (iommu->pr_irq == irq) + return DMAR_PECTL_REG; + else + BUG(); +} + void dmar_msi_unmask(struct irq_data *data) { struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + int reg = dmar_msi_reg(iommu, data->irq); unsigned long flag; /* unmask it */ raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(0, iommu->reg + DMAR_FECTL_REG); + writel(0, iommu->reg + reg); /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); + readl(iommu->reg + reg); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_mask(struct irq_data *data) { - unsigned long flag; struct intel_iommu *iommu = irq_data_get_irq_handler_data(data); + int reg = dmar_msi_reg(iommu, data->irq); + unsigned long flag; /* mask it */ raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG); + writel(DMA_FECTL_IM, iommu->reg + reg); /* Read a reg to force flush the post write */ - readl(iommu->reg + DMAR_FECTL_REG); + readl(iommu->reg + reg); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_write(int irq, struct msi_msg *msg) { struct intel_iommu *iommu = irq_get_handler_data(irq); + int reg = dmar_msi_reg(iommu, irq); unsigned long flag; raw_spin_lock_irqsave(&iommu->register_lock, flag); - writel(msg->data, iommu->reg + DMAR_FEDATA_REG); - writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG); - writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG); + writel(msg->data, iommu->reg + reg + 4); + writel(msg->address_lo, iommu->reg + reg + 8); + writel(msg->address_hi, iommu->reg + reg + 12); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } void dmar_msi_read(int irq, struct msi_msg *msg) { struct intel_iommu *iommu = irq_get_handler_data(irq); + int reg = dmar_msi_reg(iommu, irq); unsigned long flag; raw_spin_lock_irqsave(&iommu->register_lock, flag); - msg->data = readl(iommu->reg + DMAR_FEDATA_REG); - msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG); - msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG); + msg->data = readl(iommu->reg + reg + 4); + msg->address_lo = readl(iommu->reg + reg + 8); + msg->address_hi = readl(iommu->reg + reg + 12); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); } -- cgit v1.1 From a222a7f0bb6c94c31cc9c755110593656f19de89 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2015 23:35:18 +0100 Subject: iommu/vt-d: Implement page request handling Largely based on the driver-mode implementation by Jesse Barnes. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 22 +++++- drivers/iommu/intel-svm.c | 173 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 58ecd52..68d71f8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1698,8 +1698,11 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); #ifdef CONFIG_INTEL_IOMMU_SVM - if (pasid_enabled(iommu)) + if (pasid_enabled(iommu)) { + if (ecap_prs(iommu->ecap)) + intel_svm_finish_prq(iommu); intel_svm_free_pasid_tables(iommu); + } #endif } @@ -3243,6 +3246,13 @@ domains_done: iommu_flush_write_buffer(iommu); +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto free_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto free_iommu; @@ -4187,6 +4197,14 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) intel_iommu_init_qi(iommu); iommu_flush_write_buffer(iommu); + +#ifdef CONFIG_INTEL_IOMMU_SVM + if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) { + ret = intel_svm_enable_prq(iommu); + if (ret) + goto disable_iommu; + } +#endif ret = dmar_set_interrupt(iommu); if (ret) goto disable_iommu; @@ -4983,6 +5001,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo |= CONTEXT_PASIDE; if (iommu->pasid_state_table) ctx_lo |= CONTEXT_DINVE; + if (info->pri_supported) + ctx_lo |= CONTEXT_PRS; context[0].lo = ctx_lo; wmb(); iommu->flush.flush_context(iommu, sdev->did, sdev->sid, diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a64720b..0e86542 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -21,6 +21,10 @@ #include #include #include +#include +#include + +static irqreturn_t prq_event_thread(int irq, void *d); struct pasid_entry { u64 val; @@ -82,6 +86,66 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu) return 0; } +#define PRQ_ORDER 0 + +int intel_svm_enable_prq(struct intel_iommu *iommu) +{ + struct page *pages; + int irq, ret; + + pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + if (!pages) { + pr_warn("IOMMU: %s: Failed to allocate page request queue\n", + iommu->name); + return -ENOMEM; + } + iommu->prq = page_address(pages); + + irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); + if (irq <= 0) { + pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", + iommu->name); + ret = -EINVAL; + err: + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + return ret; + } + iommu->pr_irq = irq; + + snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); + + ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, + iommu->prq_name, iommu); + if (ret) { + pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", + iommu->name); + dmar_free_hwirq(irq); + goto err; + } + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); + + return 0; +} + +int intel_svm_finish_prq(struct intel_iommu *iommu) +{ + dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); + dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); + + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + + free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu->prq = NULL; + + return 0; +} + static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, unsigned long address, int pages, int ih) { @@ -363,3 +427,112 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) return ret; } EXPORT_SYMBOL_GPL(intel_svm_unbind_mm); + +/* Page request queue descriptor */ +struct page_req_dsc { + u64 srr:1; + u64 bof:1; + u64 pasid_present:1; + u64 lpig:1; + u64 pasid:20; + u64 bus:8; + u64 private:23; + u64 prg_index:9; + u64 rd_req:1; + u64 wr_req:1; + u64 exe_req:1; + u64 priv_req:1; + u64 devfn:8; + u64 addr:52; +}; + +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) +static irqreturn_t prq_event_thread(int irq, void *d) +{ + struct intel_iommu *iommu = d; + struct intel_svm *svm = NULL; + int head, tail, handled = 0; + + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + while (head != tail) { + struct vm_area_struct *vma; + struct page_req_dsc *req; + struct qi_desc resp; + int ret, result; + u64 address; + + handled = 1; + + req = &iommu->prq[head / sizeof(*req)]; + + result = QI_RESP_FAILURE; + address = req->addr << PAGE_SHIFT; + if (!req->pasid_present) { + pr_err("%s: Page request without PASID: %08llx %08llx\n", + iommu->name, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto bad_req; + } + + if (!svm || svm->pasid != req->pasid) { + rcu_read_lock(); + svm = idr_find(&iommu->pasid_idr, req->pasid); + /* It *can't* go away, because the driver is not permitted + * to unbind the mm while any page faults are outstanding. + * So we only need RCU to protect the internal idr code. */ + rcu_read_unlock(); + + if (!svm) { + pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", + iommu->name, req->pasid, ((unsigned long long *)req)[0], + ((unsigned long long *)req)[1]); + goto bad_req; + } + } + + result = QI_RESP_INVALID; + down_read(&svm->mm->mmap_sem); + vma = find_extend_vma(svm->mm, address); + if (!vma || address < vma->vm_start) + goto invalid; + + ret = handle_mm_fault(svm->mm, vma, address, + req->wr_req ? FAULT_FLAG_WRITE : 0); + if (ret & VM_FAULT_ERROR) + goto invalid; + + result = QI_RESP_SUCCESS; + invalid: + up_read(&svm->mm->mmap_sem); + bad_req: + /* Accounting for major/minor faults? */ + + if (req->lpig) { + /* Page Group Response */ + resp.low = QI_PGRP_PASID(req->pasid) | + QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_RESP_TYPE; + resp.high = QI_PGRP_IDX(req->prg_index) | + QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); + + qi_submit_sync(&resp, svm->iommu); + } else if (req->srr) { + /* Page Stream Response */ + resp.low = QI_PSTRM_IDX(req->prg_index) | + QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) | + QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE; + resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | + QI_PSTRM_RESP_CODE(result); + + qi_submit_sync(&resp, svm->iommu); + } + + head = (head + sizeof(*req)) & PRQ_RING_MASK; + } + + dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); + + return IRQ_RETVAL(handled); +} -- cgit v1.1 From 0204a49609824163092c32a8aeb073f7e9acc76d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 13 Oct 2015 17:18:10 +0100 Subject: iommu/vt-d: Add callback to device driver on page faults Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 0e86542..006e95d 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -264,7 +264,7 @@ static const struct mmu_notifier_ops intel_mmuops = { static DEFINE_MUTEX(pasid_mutex); -int intel_svm_bind_mm(struct device *dev, int *pasid) +int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops) { struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct intel_svm_dev *sdev; @@ -302,6 +302,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid) list_for_each_entry(sdev, &svm->devs, list) { if (dev == sdev->dev) { + if (sdev->ops != ops) { + ret = -EBUSY; + goto out; + } sdev->users++; goto success; } @@ -327,6 +331,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid) } /* Finish the setup now we know we're keeping it */ sdev->users = 1; + sdev->ops = ops; init_rcu_head(&sdev->rcu); if (!svm) { @@ -456,6 +461,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; while (head != tail) { + struct intel_svm_dev *sdev; struct vm_area_struct *vma; struct page_req_dsc *req; struct qi_desc resp; @@ -507,6 +513,24 @@ static irqreturn_t prq_event_thread(int irq, void *d) up_read(&svm->mm->mmap_sem); bad_req: /* Accounting for major/minor faults? */ + rcu_read_lock(); + list_for_each_entry_rcu(sdev, &svm->devs, list) { + if (sdev->sid == PCI_DEVID(req->bus, req->devfn)); + break; + } + /* Other devices can go away, but the drivers are not permitted + * to unbind while any page faults might be in flight. So it's + * OK to drop the 'lock' here now we have it. */ + rcu_read_unlock(); + + if (WARN_ON(&sdev->list == &svm->devs)) + sdev = NULL; + + if (sdev && sdev->ops && sdev->ops->fault_cb) { + int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | + (req->wr_req << 1) | (req->exe_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + } if (req->lpig) { /* Page Group Response */ -- cgit v1.1 From 569e4f7782fb92d0e1b395b5fb01de642dd74dcf Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 13:59:14 +0100 Subject: iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique PASIDs Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 006e95d..89d4d47 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -285,11 +285,12 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ pasid_max = 1 << 20; mutex_lock(&pasid_mutex); - if (pasid) { + if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { int i; idr_for_each_entry(&iommu->pasid_idr, svm, i) { - if (svm->mm != current->mm) + if (svm->mm != current->mm || + (svm->flags & SVM_FLAG_PRIVATE_PASID)) continue; if (svm->pasid >= pasid_max) { @@ -355,6 +356,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ svm->pasid = ret; svm->notifier.ops = &intel_mmuops; svm->mm = get_task_mm(current); + svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); ret = -ENOMEM; if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) { -- cgit v1.1 From 5cec753709adf1a20c8b15edf8e5245cf4fd4e82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 15:52:15 +0100 Subject: iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is only usable for the static 1:1 mapping of physical memory. Any access to vmalloc or module regions will require some way of doing an IOTLB flush. It's theoretically possible to hook into the tlb_flush_kernel_range() function, but that seems like overkill — most of the addresses accessed through a kernel PASID *will* be in the 1:1 mapping. If we really need to allow access to more interesting kernel regions, then the answer will probably be an explicit IOTLB flush call after use, akin to the DMA API's unmap function. In fact, it might be worth introducing that sooner rather than later, and making it just BUG() if the address isn't in the static 1:1 mapping. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 89d4d47..817be76 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -269,11 +269,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; + struct mm_struct *mm = NULL; int pasid_max; int ret; - BUG_ON(pasid && !current->mm); - if (WARN_ON(!iommu)) return -EINVAL; @@ -284,12 +283,20 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else pasid_max = 1 << 20; + if ((flags & SVM_FLAG_SUPERVISOR_MODE)) { + if (!ecap_srs(iommu->ecap)) + return -EINVAL; + } else if (pasid) { + mm = get_task_mm(current); + BUG_ON(!mm); + } + mutex_lock(&pasid_mutex); if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { int i; idr_for_each_entry(&iommu->pasid_idr, svm, i) { - if (svm->mm != current->mm || + if (svm->mm != mm || (svm->flags & SVM_FLAG_PRIVATE_PASID)) continue; @@ -355,17 +362,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } svm->pasid = ret; svm->notifier.ops = &intel_mmuops; - svm->mm = get_task_mm(current); + svm->mm = mm; svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); ret = -ENOMEM; - if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) { - idr_remove(&svm->iommu->pasid_idr, svm->pasid); - kfree(svm); - kfree(sdev); - goto out; - } - iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1; + if (mm) { + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + idr_remove(&svm->iommu->pasid_idr, svm->pasid); + kfree(svm); + kfree(sdev); + goto out; + } + iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1; + mm = NULL; + } else + iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11); wmb(); } list_add_rcu(&sdev->list, &svm->devs); @@ -375,6 +387,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ret = 0; out: mutex_unlock(&pasid_mutex); + if (mm) + mmput(mm); return ret; } EXPORT_SYMBOL_GPL(intel_svm_bind_mm); @@ -416,7 +430,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) mmu_notifier_unregister(&svm->notifier, svm->mm); idr_remove(&svm->iommu->pasid_idr, svm->pasid); - mmput(svm->mm); + if (svm->mm) + mmput(svm->mm); /* We mandate that no page faults may be outstanding * for the PASID when intel_svm_unbind_mm() is called. * If that is not obeyed, subtle errors will happen. @@ -500,6 +515,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) } result = QI_RESP_INVALID; + /* Since we're using init_mm.pgd directly, we should never take + * any faults on kernel addresses. */ + if (!svm->mm) + goto bad_req; down_read(&svm->mm->mmap_sem); vma = find_extend_vma(svm->mm, address); if (!vma || address < vma->vm_start) -- cgit v1.1 From 26322ab55aa90717c7e4bdbd8cf60a70854636f5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 21:12:56 +0100 Subject: iommu/vt-d: Fix NULL pointer dereference in page request error case Dan Carpenter pointed out an error path which could lead to us dereferencing the 'svm' pointer after we know it to be NULL because the PASID lookup failed. Fix that, and make it less likely to happen again. Fixes: a222a7f0bb6c ('iommu/vt-d: Implement page request handling') Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 817be76..b7e923a 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -510,7 +510,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n", iommu->name, req->pasid, ((unsigned long long *)req)[0], ((unsigned long long *)req)[1]); - goto bad_req; + goto no_pasid; } } @@ -552,7 +552,11 @@ static irqreturn_t prq_event_thread(int irq, void *d) (req->wr_req << 1) | (req->exe_req); sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); } - + /* We get here in the error case where the PASID lookup failed, + and these can be NULL. Do not use them below this point! */ + sdev = NULL; + svm = NULL; + no_pasid: if (req->lpig) { /* Page Group Response */ resp.low = QI_PGRP_PASID(req->pasid) | @@ -562,7 +566,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) resp.high = QI_PGRP_IDX(req->prg_index) | QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result); - qi_submit_sync(&resp, svm->iommu); + qi_submit_sync(&resp, iommu); } else if (req->srr) { /* Page Stream Response */ resp.low = QI_PSTRM_IDX(req->prg_index) | @@ -571,7 +575,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) | QI_PSTRM_RESP_CODE(result); - qi_submit_sync(&resp, svm->iommu); + qi_submit_sync(&resp, iommu); } head = (head + sizeof(*req)) & PRQ_RING_MASK; -- cgit v1.1 From 95fb6144bb2222b4c0189e76c1aae006b0a02bff Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Oct 2015 21:25:15 +0300 Subject: iommu/vt-d: shift wrapping bug in prq_event_thread() The "req->addr" variable is a bit field declared as "u64 addr:52;". The "address" variable is a u64. We need to cast "req->addr" to a u64 before the shift or the result is truncated to 52 bits. Fixes: a222a7f0bb6c ('iommu/vt-d: Implement page request handling') Signed-off-by: Dan Carpenter Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index b7e923a..99a7803 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -490,7 +490,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) req = &iommu->prq[head / sizeof(*req)]; result = QI_RESP_FAILURE; - address = req->addr << PAGE_SHIFT; + address = (u64)req->addr << PAGE_SHIFT; if (!req->pasid_present) { pr_err("%s: Page request without PASID: %08llx %08llx\n", iommu->name, ((unsigned long long *)req)[0], -- cgit v1.1 From 7f92a2e9107c2cf870a11bb77738207daa94fee3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 16 Oct 2015 17:22:31 +0100 Subject: iommu/vt-d: Fix address shifting in page request handler This really should be VTD_PAGE_SHIFT, not PAGE_SHIFT. Not that we ever really anticipate seeing this used on IA64, but we should get it right anyway. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 99a7803..24ee57c 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -490,7 +490,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) req = &iommu->prq[head / sizeof(*req)]; result = QI_RESP_FAILURE; - address = (u64)req->addr << PAGE_SHIFT; + address = (u64)req->addr << VTD_PAGE_SHIFT; if (!req->pasid_present) { pr_err("%s: Page request without PASID: %08llx %08llx\n", iommu->name, ((unsigned long long *)req)[0], -- cgit v1.1 From e03499216023fe41d4ba1b9fcb45332a5a169643 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 16 Oct 2015 19:36:53 +0100 Subject: iommu/vt-d: Fix IOTLB flushing for global pages When flushing kernel-mode PASIDs, we need to flush global pages too. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 24ee57c..da73d2b 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -147,20 +147,27 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) } static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, int pages, int ih) + unsigned long address, int pages, int ih, int gl) { struct qi_desc desc; int mask = ilog2(__roundup_pow_of_two(pages)); if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) || mask > cap_max_amask_val(svm->iommu->cap)) { - desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | - QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; + /* For global kernel pages we have to flush them in *all* PASIDs + * because that's the only option the hardware gives us. Despite + * the fact that they are actually only accessible through one. */ + if (gl) + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE; + else + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | + QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; desc.high = 0; } else { desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; - desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) | + desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); } @@ -191,7 +198,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, - int pages, int ih) + int pages, int ih, int gl) { struct intel_svm_dev *sdev; @@ -202,7 +209,7 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) - intel_flush_svm_range_dev(svm, sdev, address, pages, ih); + intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl); rcu_read_unlock(); } @@ -211,7 +218,7 @@ static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - intel_flush_svm_range(svm, address, 1, 1); + intel_flush_svm_range(svm, address, 1, 1, 0); } static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, @@ -219,7 +226,7 @@ static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, { struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - intel_flush_svm_range(svm, address, 1, 1); + intel_flush_svm_range(svm, address, 1, 1, 0); } /* Pages have been freed at this point */ @@ -230,7 +237,7 @@ static void intel_invalidate_range(struct mmu_notifier *mn, struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); intel_flush_svm_range(svm, start, - (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0); + (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0); } @@ -423,7 +430,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ intel_flush_pasid_dev(svm, sdev); - intel_flush_svm_range_dev(svm, sdev, 0, -1, 0); + intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { -- cgit v1.1 From 3c7c2f32884214e5b05da6426c6f737d3cd4efcb Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 17 Oct 2015 08:18:47 +0300 Subject: iommu/vt-d: fix a loop in prq_event_thread() There is an extra semi-colon on this if statement so we always break on the first iteration. Fixes: 0204a4960982 ('iommu/vt-d: Add callback to device driver on page faults') Signed-off-by: Dan Carpenter Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index da73d2b..dd871d5 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -543,7 +543,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* Accounting for major/minor faults? */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == PCI_DEVID(req->bus, req->devfn)); + if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) break; } /* Other devices can go away, but the drivers are not permitted -- cgit v1.1 From b9997e385eb2b9004d989e3710bd9001532410c0 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Sun, 18 Oct 2015 20:54:37 -0700 Subject: iommu/vt-d: Use dev_err(..) in intel_svm_device_to_iommu(..) This will give a little bit of assistance to those developing drivers using SVM. It might cause a slight annoyance to end-users whose kernel disables the IOMMU when drivers are trying to use it. But the fix there is to fix the kernel to enable the IOMMU. Signed-off-by: Sudeep Dutt Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 68d71f8..7a1c15d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5042,12 +5042,12 @@ struct intel_iommu *intel_svm_device_to_iommu(struct device *dev) iommu = device_to_iommu(dev, &bus, &devfn); if ((!iommu)) { - dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n"); + dev_err(dev, "No IOMMU for device; cannot enable SVM\n"); return NULL; } if (!iommu->pasid_table) { - dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); + dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n"); return NULL; } -- cgit v1.1 From 5d52f482ebb7d0845e84cb235700061bc5682ada Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 20 Oct 2015 15:52:13 +0100 Subject: iommu/vt-d: Fix SVM IOTLB flush handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the 'pages' parameter to 'unsigned long' to avoid overflow. Fix the device-IOTLB flush parameter calculation — the size of the IOTLB flush is indicated by the position of the least significant zero bit in the address field. For example, a value of 0x12345f000 will flush from 0x123440000 to 0x12347ffff (256KiB). Finally, the cap_pgsel_inv() is not relevant to SVM; the spec says that *all* implementations must support page-selective invaliation for "first-level" translations. So don't check for it. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index dd871d5..a584df0 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -147,13 +147,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) } static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev, - unsigned long address, int pages, int ih, int gl) + unsigned long address, unsigned long pages, int ih, int gl) { struct qi_desc desc; - int mask = ilog2(__roundup_pow_of_two(pages)); - if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) || - mask > cap_max_amask_val(svm->iommu->cap)) { + if (pages == -1) { /* For global kernel pages we have to flush them in *all* PASIDs * because that's the only option the hardware gives us. Despite * the fact that they are actually only accessible through one. */ @@ -165,31 +163,28 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; desc.high = 0; } else { + int mask = ilog2(__roundup_pow_of_two(pages)); + desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) | QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE; desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) | QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask); } - qi_submit_sync(&desc, svm->iommu); if (sdev->dev_iotlb) { desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) | QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE; - if (mask) { - unsigned long adr, delta; - - /* Least significant zero bits in the address indicate the - * range of the request. So mask them out according to the - * size. */ - adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1); - - /* Now ensure that we round down further if the original - * request was not aligned w.r.t. its size */ - delta = address - adr; - if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask))) - adr &= ~(1 << (VTD_PAGE_SHIFT + mask)); - desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE; + if (pages == -1) { + desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE; + } else if (pages > 1) { + /* The least significant zero bit indicates the size. So, + * for example, an "address" value of 0x12345f000 will + * flush from 0x123440000 to 0x12347ffff (256KiB). */ + unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT); + unsigned long mask = __rounddown_pow_of_two(address ^ last);; + + desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE; } else { desc.high = QI_DEV_EIOTLB_ADDR(address); } @@ -198,7 +193,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d } static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, - int pages, int ih, int gl) + unsigned long pages, int ih, int gl) { struct intel_svm_dev *sdev; -- cgit v1.1 From 5a10ba27d963bc79d6ac2e4996cdbb012195c306 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 24 Oct 2015 21:06:39 +0200 Subject: iommu/vt-d: Handle Caching Mode implementations of SVM Not entirely clear why, but it seems we need to reserve PASID zero and flush it when we make a PASID entry present. Quite we we couldn't use the true PASID value, isn't clear. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a584df0..48a3d4a 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -236,12 +236,12 @@ static void intel_invalidate_range(struct mmu_notifier *mn, } -static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev) +static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid) { struct qi_desc desc; desc.high = 0; - desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid); + desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid); qi_submit_sync(&desc, svm->iommu); } @@ -356,8 +356,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ if (pasid_max > 2 << ecap_pss(iommu->ecap)) pasid_max = 2 << ecap_pss(iommu->ecap); - ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1, - GFP_KERNEL); + /* Do not use PASID 0 in caching mode (virtualised IOMMU) */ + ret = idr_alloc(&iommu->pasid_idr, svm, + !!cap_caching_mode(iommu->cap), + pasid_max - 1, GFP_KERNEL); if (ret < 0) { kfree(svm); goto out; @@ -381,6 +383,17 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11); wmb(); + /* In caching mode, we still have to flush with PASID 0 when + * a PASID table entry becomes present. Not entirely clear + * *why* that would be the case — surely we could just issue + * a flush with the PASID value that we've changed? The PASID + * is the index into the table, after all. It's not like domain + * IDs in the case of the equivalent context-entry change in + * caching mode. And for that matter it's not entirely clear why + * a VMM would be in the business of caching the PASID table + * anyway. Surely that can be left entirely to the guest? */ + if (cap_caching_mode(iommu->cap)) + intel_flush_pasid_dev(svm, sdev, 0); } list_add_rcu(&sdev->list, &svm->devs); @@ -424,7 +437,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) * to use. We have a *shared* PASID table, because it's * large and has to be physically contiguous. So it's * hard to be as defensive as we might like. */ - intel_flush_pasid_dev(svm, sdev); + intel_flush_pasid_dev(svm, sdev, svm->pasid); intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm); kfree_rcu(sdev, rcu); -- cgit v1.1 From d42fde70849c5ba2f00c37a0666305eb507a47b8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 24 Oct 2015 21:33:01 +0200 Subject: iommu/vt-d: Clean up pasid_enabled() and ecs_enabled() dependencies When booted with intel_iommu=ecs_off we were still allocating the PASID tables even though we couldn't actually use them. We really want to make the pasid_enabled() macro depend on ecs_enabled(). Which is unfortunate, because currently they're the other way round to cope with the Broadwell/Skylake problems with ECS. Instead of having ecs_enabled() depend on pasid_enabled(), which was never something that made me happy anyway, make it depend in the normal case on the "broken PASID" bit 28 *not* being set. Then pasid_enabled() can depend on ecs_enabled() as it should. And we also don't need to mess with it if we ever see an implementation that has some features requiring ECS (like PRI) but which *doesn't* have PASID support. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7a1c15d..2973c09 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -507,14 +507,30 @@ static int iommu_identity_mapping; #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* We only actually use ECS when PASID support (on the new bit 40) - * is also advertised. Some early implementations — the ones with - * PASID support on bit 28 — have issues even when we *only* use - * extended root/context tables. */ -#define pasid_enabled(iommu) (ecap_pasid(iommu->ecap) || \ - (intel_iommu_pasid28 && ecap_broken_pasid(iommu->ecap))) +/* Broadwell and Skylake have broken ECS support — normal so-called "second + * level" translation of DMA requests-without-PASID doesn't actually happen + * unless you also set the NESTE bit in an extended context-entry. Which of + * course means that SVM doesn't work because it's trying to do nested + * translation of the physical addresses it finds in the process page tables, + * through the IOVA->phys mapping found in the "second level" page tables. + * + * The VT-d specification was retroactively changed to change the definition + * of the capability bits and pretend that Broadwell/Skylake never happened... + * but unfortunately the wrong bit was changed. It's ECS which is broken, but + * for some reason it was the PASID capability bit which was redefined (from + * bit 28 on BDW/SKL to bit 40 in future). + * + * So our test for ECS needs to eschew those implementations which set the old + * PASID capabiity bit 28, since those are the ones on which ECS is broken. + * Unless we are working around the 'pasid28' limitations, that is, by putting + * the device into passthrough mode for normal DMA and thus masking the bug. + */ #define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - pasid_enabled(iommu)) + (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) +/* PASID support is thus enabled if ECS is enabled and *either* of the old + * or new capability bits are set. */ +#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ + (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); -- cgit v1.1 From 0bdec95ce52d2705787f813e82c1ff2f1b29af17 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 28 Oct 2015 15:14:09 +0900 Subject: iommu/vt-d: Fix rwxp flags in SVM device fault callback This is the downside of using bitfields in the struct definition, rather than doing all the explicit masking and shifting. Signed-off-by: David Woodhouse --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 48a3d4a..c69e3f9 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -564,7 +564,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->wr_req << 1) | (req->exe_req); + (req->exe_req << 1) | (req->priv_req); sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); } /* We get here in the error case where the PASID lookup failed, -- cgit v1.1