diff options
Diffstat (limited to 'drivers/pci/intel-iommu.c')
-rw-r--r-- | drivers/pci/intel-iommu.c | 323 |
1 files changed, 161 insertions, 162 deletions
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 562221e..855dd7c 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -38,6 +38,7 @@ #include <linux/intel-iommu.h> #include <linux/sysdev.h> #include <linux/tboot.h> +#include <linux/dmi.h> #include <asm/cacheflush.h> #include <asm/iommu.h> #include "pci.h" @@ -56,8 +57,14 @@ #define MAX_AGAW_WIDTH 64 -#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) -#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) + +/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR + to match. That way, we can use 'unsigned long' for PFNs with impunity. */ +#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ + __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) +#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) @@ -252,7 +259,8 @@ static inline int first_pte_in_page(struct dma_pte *pte) * 2. It maps to each iommu if successful. * 3. Each iommu mapps to this domain if successful. */ -struct dmar_domain *si_domain; +static struct dmar_domain *si_domain; +static int hw_pass_through = 1; /* devices under the same p2p bridge are owned in one domain */ #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) @@ -728,7 +736,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return NULL; domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (cmpxchg64(&pte->val, 0ULL, pteval)) { /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -778,9 +786,10 @@ static void dma_pte_clear_range(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ - while (start_pfn <= last_pfn) { + do { first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); if (!pte) { start_pfn = align_to_level(start_pfn + 1, 2); @@ -794,7 +803,8 @@ static void dma_pte_clear_range(struct dmar_domain *domain, domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); - } + + } while (start_pfn && start_pfn <= last_pfn); } /* free page table pages. last level pte should already be cleared */ @@ -810,6 +820,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(start_pfn > last_pfn); /* We don't need lock here; nobody else touches the iova range */ level = 2; @@ -820,7 +831,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp + level_size(level) - 1 <= last_pfn) { + do { first_pte = pte = dma_pfn_level_pte(domain, tmp, level); if (!pte) { tmp = align_to_level(tmp + 1, level + 1); @@ -839,7 +850,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); - } + } while (tmp && tmp + level_size(level) - 1 <= last_pfn); level++; } /* free pgd */ @@ -1158,6 +1169,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) pr_debug("Number of Domains supportd <%ld>\n", ndomains); nlongs = BITS_TO_LONGS(ndomains); + spin_lock_init(&iommu->lock); + /* TBD: there might be 64K domains, * consider other allocation for future chip */ @@ -1170,12 +1183,9 @@ static int iommu_init_domains(struct intel_iommu *iommu) GFP_KERNEL); if (!iommu->domains) { printk(KERN_ERR "Allocating domain array failed\n"); - kfree(iommu->domain_ids); return -ENOMEM; } - spin_lock_init(&iommu->lock); - /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -1195,22 +1205,24 @@ void free_dmar_iommu(struct intel_iommu *iommu) int i; unsigned long flags; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); - for (; i < cap_ndoms(iommu->cap); ) { - domain = iommu->domains[i]; - clear_bit(i, iommu->domain_ids); + if ((iommu->domains) && (iommu->domain_ids)) { + i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); + for (; i < cap_ndoms(iommu->cap); ) { + domain = iommu->domains[i]; + clear_bit(i, iommu->domain_ids); + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (--domain->iommu_count == 0) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + vm_domain_exit(domain); + else + domain_exit(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); - spin_lock_irqsave(&domain->iommu_lock, flags); - if (--domain->iommu_count == 0) { - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) - vm_domain_exit(domain); - else - domain_exit(domain); + i = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), i+1); } - spin_unlock_irqrestore(&domain->iommu_lock, flags); - - i = find_next_bit(iommu->domain_ids, - cap_ndoms(iommu->cap), i+1); } if (iommu->gcmd & DMA_GCMD_TE) @@ -1310,7 +1322,6 @@ static void iommu_detach_domain(struct dmar_domain *domain, } static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_alloc_key; static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) @@ -1321,8 +1332,6 @@ static void dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); - lockdep_set_class(&reserved_iova_list.iova_alloc_lock, - &reserved_alloc_key); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1959,14 +1968,35 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, struct dmar_domain *domain; int ret; - printk(KERN_INFO - "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; + /* For _hardware_ passthrough, don't bother. But for software + passthrough, we do it anyway -- it may indicate a memory + range which is reserved in E820, so which didn't get set + up to start with in si_domain */ + if (domain == si_domain && hw_pass_through) { + printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + return 0; + } + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + + if (end >> agaw_to_width(domain->agaw)) { + WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + agaw_to_width(domain->agaw), + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + ret = iommu_domain_identity_map(domain, start, end); if (ret) goto error; @@ -2017,23 +2047,6 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ -/* Initialize each context entry as pass through.*/ -static int __init init_context_pass_through(void) -{ - struct pci_dev *pdev = NULL; - struct dmar_domain *domain; - int ret; - - for_each_pci_dev(pdev) { - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - ret = domain_context_mapping(domain, pdev, - CONTEXT_TT_PASS_THROUGH); - if (ret) - return ret; - } - return 0; -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_work_fn(unsigned long start_pfn, @@ -2048,7 +2061,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn, } -static int si_domain_init(void) +static int __init si_domain_init(int hw) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -2075,6 +2088,9 @@ static int si_domain_init(void) si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + if (hw) + return 0; + for_each_online_node(nid) { work_with_active_regions(nid, si_domain_work_fn, &ret); if (ret) @@ -2101,15 +2117,23 @@ static int identity_mapping(struct pci_dev *pdev) } static int domain_add_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev) + struct pci_dev *pdev, + int translation) { struct device_domain_info *info; unsigned long flags; + int ret; info = alloc_devinfo_mem(); if (!info) return -ENOMEM; + ret = domain_context_mapping(domain, pdev, translation); + if (ret) { + free_devinfo_mem(info); + return ret; + } + info->segment = pci_domain_nr(pdev->bus); info->bus = pdev->bus->number; info->devfn = pdev->devfn; @@ -2166,27 +2190,25 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) return 1; } -static int iommu_prepare_static_identity_mapping(void) +static int __init iommu_prepare_static_identity_mapping(int hw) { struct pci_dev *pdev = NULL; int ret; - ret = si_domain_init(); + ret = si_domain_init(hw); if (ret) return -EFAULT; for_each_pci_dev(pdev) { if (iommu_should_identity_map(pdev, 1)) { - printk(KERN_INFO "IOMMU: identity mapping for device %s\n", - pci_name(pdev)); + printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", + hw ? "hardware" : "software", pci_name(pdev)); - ret = domain_context_mapping(si_domain, pdev, + ret = domain_add_dev_info(si_domain, pdev, + hw ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); if (ret) return ret; - ret = domain_add_dev_info(si_domain, pdev); - if (ret) - return ret; } } @@ -2200,14 +2222,6 @@ int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; - int pass_through = 1; - - /* - * In case pass through can not be enabled, iommu tries to use identity - * mapping. - */ - if (iommu_pass_through) - iommu_identity_mapping = 1; /* * for each drhd @@ -2235,7 +2249,6 @@ int __init init_dmars(void) deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } @@ -2262,14 +2275,8 @@ int __init init_dmars(void) goto error; } if (!ecap_pass_through(iommu->ecap)) - pass_through = 0; + hw_pass_through = 0; } - if (iommu_pass_through) - if (!pass_through) { - printk(KERN_INFO - "Pass Through is not supported by hardware.\n"); - iommu_pass_through = 0; - } /* * Start from the sane iommu hardware state. @@ -2324,64 +2331,57 @@ int __init init_dmars(void) } } + if (iommu_pass_through) + iommu_identity_mapping = 1; +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + else + iommu_identity_mapping = 2; +#endif /* - * If pass through is set and enabled, context entries of all pci - * devices are intialized by pass through translation type. + * If pass through is not set or not enabled, setup context entries for + * identity mappings for rmrr, gfx, and isa and may fall back to static + * identity mapping if iommu_identity_mapping is set. */ - if (iommu_pass_through) { - ret = init_context_pass_through(); + if (iommu_identity_mapping) { + ret = iommu_prepare_static_identity_mapping(hw_pass_through); if (ret) { - printk(KERN_ERR "IOMMU: Pass through init failed.\n"); - iommu_pass_through = 0; + printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); + goto error; } } - /* - * If pass through is not set or not enabled, setup context entries for - * identity mappings for rmrr, gfx, and isa and may fall back to static - * identity mapping if iommu_identity_mapping is set. + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor */ - if (!iommu_pass_through) { -#ifdef CONFIG_DMAR_BROKEN_GFX_WA - if (!iommu_identity_mapping) - iommu_identity_mapping = 2; -#endif - if (iommu_identity_mapping) - iommu_prepare_static_identity_mapping(); - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - printk(KERN_INFO "IOMMU: Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* - * some BIOS lists non-exist devices in DMAR - * table. - */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR - "IOMMU: mapping reserved region failed\n"); - } + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); } - - iommu_prepare_isa(); } + iommu_prepare_isa(); + /* * for each drhd * enable fault log @@ -2404,11 +2404,12 @@ int __init init_dmars(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); if (ret) goto error; + + iommu_disable_protect_mem_regions(iommu); } return 0; @@ -2455,8 +2456,7 @@ static struct iova *intel_alloc_iova(struct device *dev, return iova; } -static struct dmar_domain * -get_valid_domain_for_dev(struct pci_dev *pdev) +static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) { struct dmar_domain *domain; int ret; @@ -2484,6 +2484,18 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } +static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) +{ + struct device_domain_info *info; + + /* No lock here, assumes no domain exit in normal case */ + info = dev->dev.archdata.iommu; + if (likely(info)) + return info->domain; + + return __get_valid_domain_for_dev(dev); +} + static int iommu_dummy(struct pci_dev *pdev) { return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; @@ -2526,10 +2538,10 @@ static int iommu_no_mapping(struct device *dev) */ if (iommu_should_identity_map(pdev, 0)) { int ret; - ret = domain_add_dev_info(si_domain, pdev); - if (ret) - return 0; - ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL); + ret = domain_add_dev_info(si_domain, pdev, + hw_pass_through ? + CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); if (!ret) { printk(KERN_INFO "64bit %s uses identity mapping\n", pci_name(pdev)); @@ -2638,10 +2650,9 @@ static void flush_unmaps(void) unsigned long mask; struct iova *iova = deferred_flush[i].iova[j]; - mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; - mask = ilog2(mask >> VTD_PAGE_SHIFT); + mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); iommu_flush_dev_iotlb(deferred_flush[i].domain[j], - iova->pfn_lo << PAGE_SHIFT, mask); + (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; @@ -2734,12 +2745,6 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } -static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, - int dir) -{ - intel_unmap_page(dev, dev_addr, size, dir, NULL); -} - static void *intel_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { @@ -2772,7 +2777,7 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, size = PAGE_ALIGN(size); order = get_order(size); - intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); + intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, order); } @@ -2808,11 +2813,18 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - (last_pfn - start_pfn + 1)); - - /* free iova */ - __free_iova(&domain->iovad, iova); + if (intel_iommu_strict) { + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, + last_pfn - start_pfn + 1); + /* free iova */ + __free_iova(&domain->iovad, iova); + } else { + add_unmap(domain, iova); + /* + * queue up the release of the unmap to save the 1/6th of the + * cpu used up by the iotlb flush operation... + */ + } } static int intel_nontranslate_map_sg(struct device *hddev, @@ -3056,8 +3068,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_disable_protect_mem_regions(iommu); iommu_enable_translation(iommu); + iommu_disable_protect_mem_regions(iommu); } return 0; @@ -3205,7 +3217,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) + if (no_iommu || swiotlb || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -3227,14 +3239,7 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; - - if (!iommu_pass_through) { - printk(KERN_INFO - "Multi-level page-table translation for DMAR.\n"); - dma_ops = &intel_dma_ops; - } else - printk(KERN_INFO - "DMAR: Pass through translation for DMAR.\n"); + dma_ops = &intel_dma_ops; init_iommu_sysfs(); @@ -3517,7 +3522,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct intel_iommu *iommu; int addr_width; u64 end; - int ret; /* normally pdev is not mapped */ if (unlikely(domain_context_mapped(pdev))) { @@ -3549,12 +3553,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_add_dev_info(dmar_domain, pdev); - if (ret) - return ret; - - ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); - return ret; + return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); } static void intel_iommu_detach_device(struct iommu_domain *domain, |