summaryrefslogtreecommitdiffstats
path: root/drivers/pci/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r--drivers/pci/intel-iommu.c217
1 files changed, 168 insertions, 49 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 855dd7c..4173125 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -48,6 +48,7 @@
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
#define IOAPIC_RANGE_START (0xfee00000)
#define IOAPIC_RANGE_END (0xfeefffff)
@@ -94,6 +95,7 @@ static inline unsigned long virt_to_dma_pfn(void *p)
/* global iommu list, set NULL for ignored DMAR units */
static struct intel_iommu **g_iommus;
+static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
/*
@@ -275,6 +277,7 @@ static int hw_pass_through = 1;
struct dmar_domain {
int id; /* domain id */
+ int nid; /* node id */
unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
struct list_head devices; /* all devices' list */
@@ -302,7 +305,7 @@ struct device_domain_info {
int segment; /* PCI domain */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
- struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
};
@@ -384,30 +387,14 @@ static struct kmem_cache *iommu_domain_cache;
static struct kmem_cache *iommu_devinfo_cache;
static struct kmem_cache *iommu_iova_cache;
-static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
+static inline void *alloc_pgtable_page(int node)
{
- unsigned int flags;
- void *vaddr;
-
- /* trying to avoid low memory issues */
- flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
- current->flags &= (~PF_MEMALLOC | flags);
- return vaddr;
-}
-
+ struct page *page;
+ void *vaddr = NULL;
-static inline void *alloc_pgtable_page(void)
-{
- unsigned int flags;
- void *vaddr;
-
- /* trying to avoid low memory issues */
- flags = current->flags & PF_MEMALLOC;
- current->flags |= PF_MEMALLOC;
- vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
- current->flags &= (~PF_MEMALLOC | flags);
+ page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
+ if (page)
+ vaddr = page_address(page);
return vaddr;
}
@@ -418,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr)
static inline void *alloc_domain_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_domain_cache);
+ return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
}
static void free_domain_mem(void *vaddr)
@@ -428,7 +415,7 @@ static void free_domain_mem(void *vaddr)
static inline void * alloc_devinfo_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_devinfo_cache);
+ return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
}
static inline void free_devinfo_mem(void *vaddr)
@@ -438,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr)
struct iova *alloc_iova_mem(void)
{
- return iommu_kmem_cache_alloc(iommu_iova_cache);
+ return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
}
void free_iova_mem(struct iova *iova)
@@ -587,7 +574,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
root = &iommu->root_entry[bus];
context = get_context_addr_from_root(root);
if (!context) {
- context = (struct context_entry *)alloc_pgtable_page();
+ context = (struct context_entry *)
+ alloc_pgtable_page(iommu->node);
if (!context) {
spin_unlock_irqrestore(&iommu->lock, flags);
return NULL;
@@ -730,7 +718,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
if (!dma_pte_present(pte)) {
uint64_t pteval;
- tmp_page = alloc_pgtable_page();
+ tmp_page = alloc_pgtable_page(domain->nid);
if (!tmp_page)
return NULL;
@@ -866,7 +854,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
struct root_entry *root;
unsigned long flags;
- root = (struct root_entry *)alloc_pgtable_page();
+ root = (struct root_entry *)alloc_pgtable_page(iommu->node);
if (!root)
return -ENOMEM;
@@ -1261,6 +1249,7 @@ static struct dmar_domain *alloc_domain(void)
if (!domain)
return NULL;
+ domain->nid = -1;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
domain->flags = 0;
@@ -1418,9 +1407,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_snooping = 0;
domain->iommu_count = 1;
+ domain->nid = iommu->node;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
__iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
@@ -1521,12 +1511,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
/* Skip top levels of page tables for
* iommu which has less agaw than default.
+ * Unnecessary for PT mode.
*/
- for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
- pgd = phys_to_virt(dma_pte_addr(pgd));
- if (!dma_pte_present(pgd)) {
- spin_unlock_irqrestore(&iommu->lock, flags);
- return -ENOMEM;
+ if (translation != CONTEXT_TT_PASS_THROUGH) {
+ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd)) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return -ENOMEM;
+ }
}
}
}
@@ -1575,6 +1568,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
spin_lock_irqsave(&domain->iommu_lock, flags);
if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
domain->iommu_count++;
+ if (domain->iommu_count == 1)
+ domain->nid = iommu->node;
domain_update_iommu_cap(domain);
}
spin_unlock_irqrestore(&domain->iommu_lock, flags);
@@ -1609,7 +1604,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
return ret;
parent = parent->bus->self;
}
- if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
return domain_context_mapping_one(domain,
pci_domain_nr(tmp->subordinate),
tmp->subordinate->number, 0,
@@ -1649,7 +1644,7 @@ static int domain_context_mapped(struct pci_dev *pdev)
return ret;
parent = parent->bus->self;
}
- if (tmp->is_pcie)
+ if (pci_is_pcie(tmp))
return device_context_mapped(iommu, tmp->subordinate->number,
0);
else
@@ -1819,7 +1814,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
dev_tmp = pci_find_upstream_pcie_bridge(pdev);
if (dev_tmp) {
- if (dev_tmp->is_pcie) {
+ if (pci_is_pcie(dev_tmp)) {
bus = dev_tmp->subordinate->number;
devfn = 0;
} else {
@@ -1934,6 +1929,9 @@ error:
}
static int iommu_identity_mapping;
+#define IDENTMAP_ALL 1
+#define IDENTMAP_GFX 2
+#define IDENTMAP_AZALIA 4
static int iommu_domain_identity_map(struct dmar_domain *domain,
unsigned long long start,
@@ -1986,6 +1984,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev,
"IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
pci_name(pdev), start, end);
+ if (end < start) {
+ WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ ret = -EIO;
+ goto error;
+ }
+
if (end >> agaw_to_width(domain->agaw)) {
WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
@@ -2151,8 +2159,14 @@ static int domain_add_dev_info(struct dmar_domain *domain,
static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
{
- if (iommu_identity_mapping == 2)
- return IS_GFX_DEVICE(pdev);
+ if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
+ return 1;
+
+ if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
+ return 1;
+
+ if (!(iommu_identity_mapping & IDENTMAP_ALL))
+ return 0;
/*
* We want to start off with all devices in the 1:1 domain, and
@@ -2171,7 +2185,7 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
* the 1:1 domain, just in _case_ one of their siblings turns out
* not to be able to map all of memory.
*/
- if (!pdev->is_pcie) {
+ if (!pci_is_pcie(pdev)) {
if (!pci_is_root_bus(pdev->bus))
return 0;
if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
@@ -2332,11 +2346,14 @@ int __init init_dmars(void)
}
if (iommu_pass_through)
- iommu_identity_mapping = 1;
+ iommu_identity_mapping |= IDENTMAP_ALL;
+
#ifdef CONFIG_DMAR_BROKEN_GFX_WA
- else
- iommu_identity_mapping = 2;
+ iommu_identity_mapping |= IDENTMAP_GFX;
#endif
+
+ check_tylersburg_isoch();
+
/*
* If pass through is not set or not enabled, setup context entries for
* identity mappings for rmrr, gfx, and isa and may fall back to static
@@ -2753,7 +2770,15 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size,
size = PAGE_ALIGN(size);
order = get_order(size);
- flags &= ~(GFP_DMA | GFP_DMA32);
+
+ if (!iommu_no_mapping(hwdev))
+ flags &= ~(GFP_DMA | GFP_DMA32);
+ else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
+ if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
+ flags |= GFP_DMA;
+ else
+ flags |= GFP_DMA32;
+ }
vaddr = (void *)__get_free_pages(flags, order);
if (!vaddr)
@@ -3193,6 +3218,36 @@ static int __init init_iommu_sysfs(void)
}
#endif /* CONFIG_PM */
+/*
+ * Here we only respond to action of unbound device from driver.
+ *
+ * Added device is not attached to its DMAR domain here yet. That will happen
+ * when mapping the device to iova.
+ */
+static int device_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct dmar_domain *domain;
+
+ if (iommu_no_mapping(dev))
+ return 0;
+
+ domain = find_domain(pdev);
+ if (!domain)
+ return 0;
+
+ if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
+ domain_remove_one_dev_info(domain, pdev);
+
+ return 0;
+}
+
+static struct notifier_block device_nb = {
+ .notifier_call = device_notifier,
+};
+
int __init intel_iommu_init(void)
{
int ret = 0;
@@ -3217,7 +3272,7 @@ int __init intel_iommu_init(void)
* Check the need for DMA-remapping initialization now.
* Above initialization will also be used by Interrupt-remapping.
*/
- if (no_iommu || swiotlb || dmar_disabled)
+ if (no_iommu || dmar_disabled)
return -ENODEV;
iommu_init_mempool();
@@ -3238,13 +3293,17 @@ int __init intel_iommu_init(void)
"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
init_timer(&unmap_timer);
- force_iommu = 1;
+#ifdef CONFIG_SWIOTLB
+ swiotlb = 0;
+#endif
dma_ops = &intel_dma_ops;
init_iommu_sysfs();
register_iommu(&intel_iommu_ops);
+ bus_register_notifier(&pci_bus_type, &device_nb);
+
return 0;
}
@@ -3266,7 +3325,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
parent->devfn);
parent = parent->bus->self;
}
- if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
+ if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
iommu_detach_dev(iommu,
tmp->subordinate->number, 0);
else /* this is a legacy PCI bridge */
@@ -3402,6 +3461,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void)
return NULL;
domain->id = vm_domid++;
+ domain->nid = -1;
memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
@@ -3428,9 +3488,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->max_addr = 0;
+ domain->nid = -1;
/* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page();
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@@ -3670,3 +3731,61 @@ static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
+
+/* On Tylersburg chipsets, some BIOSes have been known to enable the
+ ISOCH DMAR unit for the Azalia sound device, but not give it any
+ TLB entries, which causes it to deadlock. Check for that. We do
+ this in a function called from init_dmars(), instead of in a PCI
+ quirk, because we don't want to print the obnoxious "BIOS broken"
+ message if VT-d is actually disabled.
+*/
+static void __init check_tylersburg_isoch(void)
+{
+ struct pci_dev *pdev;
+ uint32_t vtisochctrl;
+
+ /* If there's no Azalia in the system anyway, forget it. */
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
+ if (!pdev)
+ return;
+ pci_dev_put(pdev);
+
+ /* System Management Registers. Might be hidden, in which case
+ we can't do the sanity check. But that's OK, because the
+ known-broken BIOSes _don't_ actually hide it, so far. */
+ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
+ if (!pdev)
+ return;
+
+ if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
+ pci_dev_put(pdev);
+ return;
+ }
+
+ pci_dev_put(pdev);
+
+ /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
+ if (vtisochctrl & 1)
+ return;
+
+ /* Drop all bits other than the number of TLB entries */
+ vtisochctrl &= 0x1c;
+
+ /* If we have the recommended number of TLB entries (16), fine. */
+ if (vtisochctrl == 0x10)
+ return;
+
+ /* Zero TLB entries? You get to ride the short bus to school. */
+ if (!vtisochctrl) {
+ WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
+ "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
+ dmi_get_system_info(DMI_BIOS_VENDOR),
+ dmi_get_system_info(DMI_BIOS_VERSION),
+ dmi_get_system_info(DMI_PRODUCT_VERSION));
+ iommu_identity_mapping |= IDENTMAP_AZALIA;
+ return;
+ }
+
+ printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
+ vtisochctrl);
+}
OpenPOWER on IntegriCloud