diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-19 09:35:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-19 09:35:06 -0800 |
commit | 99e38df892234aa985185fc776647bad6f9bd7a7 (patch) | |
tree | eee5520bebf558016aac75f50e853c45cd6b2964 /drivers/iommu | |
parent | a200dcb34693084e56496960d855afdeaaf9578f (diff) | |
parent | 32704253dc008dfedead71da016b00d10cd1854f (diff) | |
download | op-kernel-dev-99e38df892234aa985185fc776647bad6f9bd7a7.zip op-kernel-dev-99e38df892234aa985185fc776647bad6f9bd7a7.tar.gz |
Merge tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
"The updates include:
- Small code cleanups in the AMD IOMMUv2 driver
- Scalability improvements for the DMA-API implementation of the AMD
IOMMU driver. This is just a starting point, but already showed
some good improvements in my tests.
- Removal of the unused Renesas IPMMU/IPMMUI driver
- Updates for ARM-SMMU include:
* Some fixes to get the driver working nicely on Broadcom hardware
* A change to the io-pgtable API to indicate the unit in which to
flush (all callers converted, with Ack from Laurent)
* Use of devm_* for allocating/freeing the SMMUv3 buffers
- Some other small fixes and improvements for other drivers"
* tag 'iommu-updates-v4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (46 commits)
iommu/vt-d: Fix up error handling in alloc_iommu
iommu/vt-d: Check the return value of iommu_device_create()
iommu/amd: Remove an unneeded condition
iommu/amd: Preallocate dma_ops apertures based on dma_mask
iommu/amd: Use trylock to aquire bitmap_lock
iommu/amd: Make dma_ops_domain->next_index percpu
iommu/amd: Relax locking in dma_ops path
iommu/amd: Initialize new aperture range before making it visible
iommu/amd: Build io page-tables with cmpxchg64
iommu/amd: Allocate new aperture ranges in dma_ops_alloc_addresses
iommu/amd: Optimize dma_ops_free_addresses
iommu/amd: Remove need_flush from struct dma_ops_domain
iommu/amd: Iterate over all aperture ranges in dma_ops_area_alloc
iommu/amd: Flush iommu tlb in dma_ops_free_addresses
iommu/amd: Rename dma_ops_domain->next_address to next_index
iommu/amd: Remove 'start' parameter from dma_ops_area_alloc
iommu/amd: Flush iommu tlb in dma_ops_aperture_alloc()
iommu/amd: Retry address allocation within one aperture
iommu/amd: Move aperture_range.offset to another cache-line
iommu/amd: Add dma_ops_aperture_alloc() function
...
Diffstat (limited to 'drivers/iommu')
-rw-r--r-- | drivers/iommu/Kconfig | 75 | ||||
-rw-r--r-- | drivers/iommu/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu.c | 396 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_types.h | 40 | ||||
-rw-r--r-- | drivers/iommu/amd_iommu_v2.c | 38 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu-v3.c | 210 | ||||
-rw-r--r-- | drivers/iommu/arm-smmu.c | 23 | ||||
-rw-r--r-- | drivers/iommu/dmar.c | 12 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable-arm.c | 49 | ||||
-rw-r--r-- | drivers/iommu/io-pgtable.h | 6 | ||||
-rw-r--r-- | drivers/iommu/ipmmu-vmsa.c | 4 | ||||
-rw-r--r-- | drivers/iommu/msm_iommu_dev.c | 25 | ||||
-rw-r--r-- | drivers/iommu/s390-iommu.c | 4 | ||||
-rw-r--r-- | drivers/iommu/shmobile-iommu.c | 402 | ||||
-rw-r--r-- | drivers/iommu/shmobile-ipmmu.c | 129 | ||||
-rw-r--r-- | drivers/iommu/shmobile-ipmmu.h | 34 |
16 files changed, 401 insertions, 1048 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index b9094e9..a1e75cb 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -263,81 +263,6 @@ config EXYNOS_IOMMU_DEBUG Say N unless you need kernel log message for IOMMU debugging. -config SHMOBILE_IPMMU - bool - -config SHMOBILE_IPMMU_TLB - bool - -config SHMOBILE_IOMMU - bool "IOMMU for Renesas IPMMU/IPMMUI" - default n - depends on ARM && MMU - depends on ARCH_SHMOBILE || COMPILE_TEST - select IOMMU_API - select ARM_DMA_USE_IOMMU - select SHMOBILE_IPMMU - select SHMOBILE_IPMMU_TLB - help - Support for Renesas IPMMU/IPMMUI. This option enables - remapping of DMA memory accesses from all of the IP blocks - on the ICB. - - Warning: Drivers (including userspace drivers of UIO - devices) of the IP blocks on the ICB *must* use addresses - allocated from the IPMMU (iova) for DMA with this option - enabled. - - If unsure, say N. - -choice - prompt "IPMMU/IPMMUI address space size" - default SHMOBILE_IOMMU_ADDRSIZE_2048MB - depends on SHMOBILE_IOMMU - help - This option sets IPMMU/IPMMUI address space size by - adjusting the 1st level page table size. The page table size - is calculated as follows: - - page table size = number of page table entries * 4 bytes - number of page table entries = address space size / 1 MiB - - For example, when the address space size is 2048 MiB, the - 1st level page table size is 8192 bytes. - - config SHMOBILE_IOMMU_ADDRSIZE_2048MB - bool "2 GiB" - - config SHMOBILE_IOMMU_ADDRSIZE_1024MB - bool "1 GiB" - - config SHMOBILE_IOMMU_ADDRSIZE_512MB - bool "512 MiB" - - config SHMOBILE_IOMMU_ADDRSIZE_256MB - bool "256 MiB" - - config SHMOBILE_IOMMU_ADDRSIZE_128MB - bool "128 MiB" - - config SHMOBILE_IOMMU_ADDRSIZE_64MB - bool "64 MiB" - - config SHMOBILE_IOMMU_ADDRSIZE_32MB - bool "32 MiB" - -endchoice - -config SHMOBILE_IOMMU_L1SIZE - int - default 8192 if SHMOBILE_IOMMU_ADDRSIZE_2048MB - default 4096 if SHMOBILE_IOMMU_ADDRSIZE_1024MB - default 2048 if SHMOBILE_IOMMU_ADDRSIZE_512MB - default 1024 if SHMOBILE_IOMMU_ADDRSIZE_256MB - default 512 if SHMOBILE_IOMMU_ADDRSIZE_128MB - default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB - default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB - config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" depends on ARM_LPAE diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 68faca02..42fc0c2 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -22,7 +22,5 @@ obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o -obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o -obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8b2be1e..539b0de 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -35,6 +35,7 @@ #include <linux/msi.h> #include <linux/dma-contiguous.h> #include <linux/irqdomain.h> +#include <linux/percpu.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> @@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); +/* + * For dynamic growth the aperture size is split into ranges of 128MB of + * DMA address space each. This struct represents one such range. + */ +struct aperture_range { + + spinlock_t bitmap_lock; + + /* address allocation bitmap */ + unsigned long *bitmap; + unsigned long offset; + unsigned long next_bit; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ + u64 *pte_pages[64]; +}; + +/* + * Data container for a dma_ops specific protection domain + */ +struct dma_ops_domain { + /* generic protection domain information */ + struct protection_domain domain; + + /* size of the aperture for the mappings */ + unsigned long aperture_size; + + /* aperture index we start searching for free addresses */ + u32 __percpu *next_index; + + /* address space relevant data */ + struct aperture_range *aperture[APERTURE_MAX_RANGES]; +}; + /**************************************************************************** * * Helper functions @@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain, end_lvl = PAGE_SIZE_LEVEL(page_size); while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { + u64 __pte, __npte; + + __pte = *pte; + + if (!IOMMU_PTE_PRESENT(__pte)) { page = (u64 *)get_zeroed_page(gfp); if (!page) return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + + __npte = PM_LEVEL_PDE(level, virt_to_phys(page)); + + if (cmpxchg64(pte, __pte, __npte)) { + free_page((unsigned long)page); + continue; + } } /* No level skipping support yet */ @@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; - struct amd_iommu *iommu; unsigned long i, old_size, pte_pgsize; + struct aperture_range *range; + struct amd_iommu *iommu; + unsigned long flags; #ifdef CONFIG_IOMMU_STRESS populate = false; @@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (index >= APERTURE_MAX_RANGES) return -ENOMEM; - dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); - if (!dma_dom->aperture[index]) + range = kzalloc(sizeof(struct aperture_range), gfp); + if (!range) return -ENOMEM; - dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); - if (!dma_dom->aperture[index]->bitmap) + range->bitmap = (void *)get_zeroed_page(gfp); + if (!range->bitmap) goto out_free; - dma_dom->aperture[index]->offset = dma_dom->aperture_size; + range->offset = dma_dom->aperture_size; + + spin_lock_init(&range->bitmap_lock); if (populate) { unsigned long address = dma_dom->aperture_size; @@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, if (!pte) goto out_free; - dma_dom->aperture[index]->pte_pages[i] = pte_page; + range->pte_pages[i] = pte_page; address += APERTURE_RANGE_SIZE / 64; } } - old_size = dma_dom->aperture_size; - dma_dom->aperture_size += APERTURE_RANGE_SIZE; + spin_lock_irqsave(&dma_dom->domain.lock, flags); + + /* First take the bitmap_lock and then publish the range */ + spin_lock(&range->bitmap_lock); + + old_size = dma_dom->aperture_size; + dma_dom->aperture[index] = range; + dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Reserve address range used for MSI messages */ if (old_size < MSI_ADDR_BASE_LO && @@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, update_domain(&dma_dom->domain); + spin_unlock(&range->bitmap_lock); + + spin_unlock_irqrestore(&dma_dom->domain.lock, flags); + return 0; out_free: update_domain(&dma_dom->domain); - free_page((unsigned long)dma_dom->aperture[index]->bitmap); + free_page((unsigned long)range->bitmap); - kfree(dma_dom->aperture[index]); - dma_dom->aperture[index] = NULL; + kfree(range); return -ENOMEM; } +static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom, + struct aperture_range *range, + unsigned long pages, + unsigned long dma_mask, + unsigned long boundary_size, + unsigned long align_mask, + bool trylock) +{ + unsigned long offset, limit, flags; + dma_addr_t address; + bool flush = false; + + offset = range->offset >> PAGE_SHIFT; + limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, + dma_mask >> PAGE_SHIFT); + + if (trylock) { + if (!spin_trylock_irqsave(&range->bitmap_lock, flags)) + return -1; + } else { + spin_lock_irqsave(&range->bitmap_lock, flags); + } + + address = iommu_area_alloc(range->bitmap, limit, range->next_bit, + pages, offset, boundary_size, align_mask); + if (address == -1) { + /* Nothing found, retry one time */ + address = iommu_area_alloc(range->bitmap, limit, + 0, pages, offset, boundary_size, + align_mask); + flush = true; + } + + if (address != -1) + range->next_bit = address + pages; + + spin_unlock_irqrestore(&range->bitmap_lock, flags); + + if (flush) { + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); + } + + return address; +} + static unsigned long dma_ops_area_alloc(struct device *dev, struct dma_ops_domain *dom, unsigned int pages, unsigned long align_mask, - u64 dma_mask, - unsigned long start) + u64 dma_mask) { - unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; - int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; - int i = start >> APERTURE_RANGE_SHIFT; unsigned long boundary_size, mask; unsigned long address = -1; - unsigned long limit; + bool first = true; + u32 start, i; - next_bit >>= PAGE_SHIFT; + preempt_disable(); mask = dma_get_seg_boundary(dev); +again: + start = this_cpu_read(*dom->next_index); + + /* Sanity check - is it really necessary? */ + if (unlikely(start > APERTURE_MAX_RANGES)) { + start = 0; + this_cpu_write(*dom->next_index, 0); + } + boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : 1UL << (BITS_PER_LONG - PAGE_SHIFT); - for (;i < max_index; ++i) { - unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; + for (i = 0; i < APERTURE_MAX_RANGES; ++i) { + struct aperture_range *range; + int index; - if (dom->aperture[i]->offset >= dma_mask) - break; + index = (start + i) % APERTURE_MAX_RANGES; - limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, - dma_mask >> PAGE_SHIFT); + range = dom->aperture[index]; - address = iommu_area_alloc(dom->aperture[i]->bitmap, - limit, next_bit, pages, 0, - boundary_size, align_mask); + if (!range || range->offset >= dma_mask) + continue; + + address = dma_ops_aperture_alloc(dom, range, pages, + dma_mask, boundary_size, + align_mask, first); if (address != -1) { - address = dom->aperture[i]->offset + - (address << PAGE_SHIFT); - dom->next_address = address + (pages << PAGE_SHIFT); + address = range->offset + (address << PAGE_SHIFT); + this_cpu_write(*dom->next_index, index); break; } + } - next_bit = 0; + if (address == -1 && first) { + first = false; + goto again; } + preempt_enable(); + return address; } @@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, unsigned long align_mask, u64 dma_mask) { - unsigned long address; - -#ifdef CONFIG_IOMMU_STRESS - dom->next_address = 0; - dom->need_flush = true; -#endif + unsigned long address = -1; - address = dma_ops_area_alloc(dev, dom, pages, align_mask, - dma_mask, dom->next_address); + while (address == -1) { + address = dma_ops_area_alloc(dev, dom, pages, + align_mask, dma_mask); - if (address == -1) { - dom->next_address = 0; - address = dma_ops_area_alloc(dev, dom, pages, align_mask, - dma_mask, 0); - dom->need_flush = true; + if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC)) + break; } if (unlikely(address == -1)) @@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { unsigned i = address >> APERTURE_RANGE_SHIFT; struct aperture_range *range = dom->aperture[i]; + unsigned long flags; BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); @@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, return; #endif - if (address >= dom->next_address) - dom->need_flush = true; + if (amd_iommu_unmap_flush) { + domain_flush_tlb(&dom->domain); + domain_flush_complete(&dom->domain); + } address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; + spin_lock_irqsave(&range->bitmap_lock, flags); + if (address + pages > range->next_bit) + range->next_bit = address + pages; bitmap_clear(range->bitmap, address, pages); + spin_unlock_irqrestore(&range->bitmap_lock, flags); } @@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + free_percpu(dom->next_index); + del_domain_from_list(&dom->domain); free_pagetable(&dom->domain); @@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) kfree(dom); } +static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom, + int max_apertures) +{ + int ret, i, apertures; + + apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + ret = 0; + + for (i = apertures; i < max_apertures; ++i) { + ret = alloc_new_range(dma_dom, false, GFP_KERNEL); + if (ret) + break; + } + + return ret; +} + /* * Allocates a new protection domain usable for the dma_ops functions. * It also initializes the page table and the address allocator data @@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; + int cpu; dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); if (!dma_dom) @@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (protection_domain_init(&dma_dom->domain)) goto free_dma_dom; + dma_dom->next_index = alloc_percpu(u32); + if (!dma_dom->next_index) + goto free_dma_dom; + dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - dma_dom->need_flush = false; - add_domain_to_list(&dma_dom->domain); if (alloc_new_range(dma_dom, true, GFP_KERNEL)) @@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) * a valid dma-address. So we can use 0 as error value */ dma_dom->aperture[0]->bitmap[0] = 1; - dma_dom->next_address = 0; + for_each_possible_cpu(cpu) + *per_cpu_ptr(dma_dom->next_index, cpu) = 0; return dma_dom; @@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, else if (direction == DMA_BIDIRECTIONAL) __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW; - WARN_ON(*pte); + WARN_ON_ONCE(*pte); *pte = __pte; @@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, pte += PM_LEVEL_INDEX(0, address); - WARN_ON(!*pte); + WARN_ON_ONCE(!*pte); *pte = 0ULL; } @@ -2393,26 +2537,11 @@ static dma_addr_t __map_single(struct device *dev, if (align) align_mask = (1UL << get_order(size)) - 1; -retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == DMA_ERROR_CODE)) { - /* - * setting next_address here will let the address - * allocator only scan the new allocated range in the - * first run. This is a small optimization. - */ - dma_dom->next_address = dma_dom->aperture_size; - if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) - goto out; - - /* - * aperture was successfully enlarged by 128 MB, try - * allocation again - */ - goto retry; - } + if (address == DMA_ERROR_CODE) + goto out; start = address; for (i = 0; i < pages; ++i) { @@ -2427,11 +2556,10 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); - if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - domain_flush_tlb(&dma_dom->domain); - dma_dom->need_flush = false; - } else if (unlikely(amd_iommu_np_cache)) + if (unlikely(amd_iommu_np_cache)) { domain_flush_pages(&dma_dom->domain, address, size); + domain_flush_complete(&dma_dom->domain); + } out: return address; @@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, SUB_STATS_COUNTER(alloced_io_mem, size); dma_ops_free_addresses(dma_dom, dma_addr, pages); - - if (amd_iommu_unmap_flush || dma_dom->need_flush) { - domain_flush_pages(&dma_dom->domain, flush_addr, size); - dma_dom->need_flush = false; - } } /* @@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page, enum dma_data_direction dir, struct dma_attrs *attrs) { - unsigned long flags; + phys_addr_t paddr = page_to_phys(page) + offset; struct protection_domain *domain; - dma_addr_t addr; u64 dma_mask; - phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); @@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page, dma_mask = *dev->dma_mask; - spin_lock_irqsave(&domain->lock, flags); - - addr = __map_single(dev, domain->priv, paddr, size, dir, false, + return __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == DMA_ERROR_CODE) - goto out; - - domain_flush_complete(domain); - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return addr; } /* @@ -2530,7 +2640,6 @@ out: static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - unsigned long flags; struct protection_domain *domain; INC_STATS_COUNTER(cnt_unmap_single); @@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, if (IS_ERR(domain)) return; - spin_lock_irqsave(&domain->lock, flags); - __unmap_single(domain->priv, dma_addr, size, dir); - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); } /* @@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - unsigned long flags; struct protection_domain *domain; int i; struct scatterlist *s; @@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, dma_mask = *dev->dma_mask; - spin_lock_irqsave(&domain->lock, flags); - for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); @@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - domain_flush_complete(domain); - -out: - spin_unlock_irqrestore(&domain->lock, flags); - return mapped_elems; + unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) @@ -2602,9 +2698,7 @@ unmap: s->dma_address = s->dma_length = 0; } - mapped_elems = 0; - - goto out; + return 0; } /* @@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - unsigned long flags; struct protection_domain *domain; struct scatterlist *s; int i; @@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, if (IS_ERR(domain)) return; - spin_lock_irqsave(&domain->lock, flags); - for_each_sg(sglist, s, nelems, i) { __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); } /* @@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size, { u64 dma_mask = dev->coherent_dma_mask; struct protection_domain *domain; - unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_alloc_coherent); @@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size, if (!dma_mask) dma_mask = *dev->dma_mask; - spin_lock_irqsave(&domain->lock, flags); - *dma_addr = __map_single(dev, domain->priv, page_to_phys(page), size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == DMA_ERROR_CODE) { - spin_unlock_irqrestore(&domain->lock, flags); + if (*dma_addr == DMA_ERROR_CODE) goto out_free; - } - - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); return page_address(page); @@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { struct protection_domain *domain; - unsigned long flags; struct page *page; INC_STATS_COUNTER(cnt_free_coherent); @@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size, if (IS_ERR(domain)) goto free_mem; - spin_lock_irqsave(&domain->lock, flags); - __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - domain_flush_complete(domain); - - spin_unlock_irqrestore(&domain->lock, flags); - free_mem: if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, get_order(size)); @@ -2746,14 +2817,43 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } +static int set_dma_mask(struct device *dev, u64 mask) +{ + struct protection_domain *domain; + int max_apertures = 1; + + domain = get_domain(dev); + if (IS_ERR(domain)) + return PTR_ERR(domain); + + if (mask == DMA_BIT_MASK(64)) + max_apertures = 8; + else if (mask > DMA_BIT_MASK(32)) + max_apertures = 4; + + /* + * To prevent lock contention it doesn't make sense to allocate more + * apertures than online cpus + */ + if (max_apertures > num_online_cpus()) + max_apertures = num_online_cpus(); + + if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures)) + dev_err(dev, "Can't allocate %d iommu apertures\n", + max_apertures); + + return 0; +} + static struct dma_map_ops amd_iommu_dma_ops = { - .alloc = alloc_coherent, - .free = free_coherent, - .map_page = map_page, - .unmap_page = unmap_page, - .map_sg = map_sg, - .unmap_sg = unmap_sg, - .dma_supported = amd_iommu_dma_supported, + .alloc = alloc_coherent, + .free = free_coherent, + .map_page = map_page, + .unmap_page = unmap_page, + .map_sg = map_sg, + .unmap_sg = unmap_sg, + .dma_supported = amd_iommu_dma_supported, + .set_dma_mask = set_dma_mask, }; int __init amd_iommu_init_api(void) @@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) case X86_IRQ_ALLOC_TYPE_MSI: case X86_IRQ_ALLOC_TYPE_MSIX: devid = get_device_id(&info->msi_dev->dev); - if (devid >= 0) { - iommu = amd_iommu_rlookup_table[devid]; - if (iommu) - return iommu->msi_domain; - } + iommu = amd_iommu_rlookup_table[devid]; + if (iommu) + return iommu->msi_domain; break; default: break; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index b08cf57..9d32b20 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -425,46 +425,6 @@ struct protection_domain { }; /* - * For dynamic growth the aperture size is split into ranges of 128MB of - * DMA address space each. This struct represents one such range. - */ -struct aperture_range { - - /* address allocation bitmap */ - unsigned long *bitmap; - - /* - * Array of PTE pages for the aperture. In this array we save all the - * leaf pages of the domain page table used for the aperture. This way - * we don't need to walk the page table to find a specific PTE. We can - * just calculate its address in constant time. - */ - u64 *pte_pages[64]; - - unsigned long offset; -}; - -/* - * Data container for a dma_ops specific protection domain - */ -struct dma_ops_domain { - /* generic protection domain information */ - struct protection_domain domain; - - /* size of the aperture for the mappings */ - unsigned long aperture_size; - - /* address we start to search for free addresses */ - unsigned long next_address; - - /* address space relevant data */ - struct aperture_range *aperture[APERTURE_MAX_RANGES]; - - /* This will be set to true when TLB needs to be flushed */ - bool need_flush; -}; - -/* * Structure where we save information about one hardware AMD IOMMU in the * system. */ diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 7caf2fa..c865737 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -432,7 +432,7 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) unbind_pasid(pasid_state); } -static struct mmu_notifier_ops iommu_mn = { +static const struct mmu_notifier_ops iommu_mn = { .release = mn_release, .clear_flush_young = mn_clear_flush_young, .invalidate_page = mn_invalidate_page, @@ -513,43 +513,39 @@ static bool access_error(struct vm_area_struct *vma, struct fault *fault) static void do_fault(struct work_struct *work) { struct fault *fault = container_of(work, struct fault, work); - struct mm_struct *mm; struct vm_area_struct *vma; + int ret = VM_FAULT_ERROR; + unsigned int flags = 0; + struct mm_struct *mm; u64 address; - int ret, write; - - write = !!(fault->flags & PPR_FAULT_WRITE); mm = fault->state->mm; address = fault->address; + if (fault->flags & PPR_FAULT_USER) + flags |= FAULT_FLAG_USER; + if (fault->flags & PPR_FAULT_WRITE) + flags |= FAULT_FLAG_WRITE; + down_read(&mm->mmap_sem); vma = find_extend_vma(mm, address); - if (!vma || address < vma->vm_start) { + if (!vma || address < vma->vm_start) /* failed to get a vma in the right range */ - up_read(&mm->mmap_sem); - handle_fault_error(fault); goto out; - } /* Check if we have the right permissions on the vma */ - if (access_error(vma, fault)) { - up_read(&mm->mmap_sem); - handle_fault_error(fault); + if (access_error(vma, fault)) goto out; - } - ret = handle_mm_fault(mm, vma, address, write); - if (ret & VM_FAULT_ERROR) { - /* failed to service fault */ - up_read(&mm->mmap_sem); - handle_fault_error(fault); - goto out; - } + ret = handle_mm_fault(mm, vma, address, flags); +out: up_read(&mm->mmap_sem); -out: + if (ret & VM_FAULT_ERROR) + /* failed to service fault */ + handle_fault_error(fault); + finish_pri_tag(fault->dev_state, fault->state, fault->tag); put_pasid_state(fault->state); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4e5118a..2087534 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -40,7 +40,10 @@ #define IDR0_ST_LVL_SHIFT 27 #define IDR0_ST_LVL_MASK 0x3 #define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT) -#define IDR0_STALL_MODEL (3 << 24) +#define IDR0_STALL_MODEL_SHIFT 24 +#define IDR0_STALL_MODEL_MASK 0x3 +#define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT) +#define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT) #define IDR0_TTENDIAN_SHIFT 21 #define IDR0_TTENDIAN_MASK 0x3 #define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT) @@ -253,6 +256,9 @@ #define STRTAB_STE_1_STRW_EL2 2UL #define STRTAB_STE_1_STRW_SHIFT 30 +#define STRTAB_STE_1_SHCFG_INCOMING 1UL +#define STRTAB_STE_1_SHCFG_SHIFT 44 + #define STRTAB_STE_2_S2VMID_SHIFT 0 #define STRTAB_STE_2_S2VMID_MASK 0xffffUL #define STRTAB_STE_2_VTCR_SHIFT 32 @@ -378,7 +384,6 @@ #define PRIQ_0_SID_MASK 0xffffffffUL #define PRIQ_0_SSID_SHIFT 32 #define PRIQ_0_SSID_MASK 0xfffffUL -#define PRIQ_0_OF (1UL << 57) #define PRIQ_0_PERM_PRIV (1UL << 58) #define PRIQ_0_PERM_EXEC (1UL << 59) #define PRIQ_0_PERM_READ (1UL << 60) @@ -855,15 +860,17 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) }; dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, - cerror_str[idx]); + idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); switch (idx) { - case CMDQ_ERR_CERROR_ILL_IDX: - break; case CMDQ_ERR_CERROR_ABT_IDX: dev_err(smmu->dev, "retrying command fetch\n"); case CMDQ_ERR_CERROR_NONE_IDX: return; + case CMDQ_ERR_CERROR_ILL_IDX: + /* Fallthrough */ + default: + break; } /* @@ -1042,6 +1049,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT : STRTAB_STE_0_CFG_BYPASS; dst[0] = cpu_to_le64(val); + dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING + << STRTAB_STE_1_SHCFG_SHIFT); dst[2] = 0; /* Nuke the VMID */ if (ste_live) arm_smmu_sync_ste_for_sid(smmu, sid); @@ -1056,12 +1065,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, STRTAB_STE_1_S1C_CACHE_WBRA << STRTAB_STE_1_S1COR_SHIFT | STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT | - STRTAB_STE_1_S1STALLD | #ifdef CONFIG_PCI_ATS STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT | #endif STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); + if (smmu->features & ARM_SMMU_FEAT_STALLS) + dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); + val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK << STRTAB_STE_0_S1CTXPTR_SHIFT) | STRTAB_STE_0_CFG_S1_TRANS; @@ -1123,8 +1134,8 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; desc->span = STRTAB_SPLIT + 1; - desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, - GFP_KERNEL); + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, + GFP_KERNEL | __GFP_ZERO); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -1250,50 +1261,50 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) { - u32 gerror, gerrorn; + u32 gerror, gerrorn, active; struct arm_smmu_device *smmu = dev; gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); - gerror ^= gerrorn; - if (!(gerror & GERROR_ERR_MASK)) + active = gerror ^ gerrorn; + if (!(active & GERROR_ERR_MASK)) return IRQ_NONE; /* No errors pending */ dev_warn(smmu->dev, "unexpected global error reported (0x%08x), this could be serious\n", - gerror); + active); - if (gerror & GERROR_SFM_ERR) { + if (active & GERROR_SFM_ERR) { dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); arm_smmu_device_disable(smmu); } - if (gerror & GERROR_MSI_GERROR_ABT_ERR) + if (active & GERROR_MSI_GERROR_ABT_ERR) dev_warn(smmu->dev, "GERROR MSI write aborted\n"); - if (gerror & GERROR_MSI_PRIQ_ABT_ERR) { + if (active & GERROR_MSI_PRIQ_ABT_ERR) { dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); arm_smmu_priq_handler(irq, smmu->dev); } - if (gerror & GERROR_MSI_EVTQ_ABT_ERR) { + if (active & GERROR_MSI_EVTQ_ABT_ERR) { dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); arm_smmu_evtq_handler(irq, smmu->dev); } - if (gerror & GERROR_MSI_CMDQ_ABT_ERR) { + if (active & GERROR_MSI_CMDQ_ABT_ERR) { dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); arm_smmu_cmdq_sync_handler(irq, smmu->dev); } - if (gerror & GERROR_PRIQ_ABT_ERR) + if (active & GERROR_PRIQ_ABT_ERR) dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); - if (gerror & GERROR_EVTQ_ABT_ERR) + if (active & GERROR_EVTQ_ABT_ERR) dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); - if (gerror & GERROR_CMDQ_ERR) + if (active & GERROR_CMDQ_ERR) arm_smmu_cmdq_skip_err(smmu); writel(gerror, smmu->base + ARM_SMMU_GERRORN); @@ -1335,7 +1346,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - bool leaf, void *cookie) + size_t granule, bool leaf, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -1354,7 +1365,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + do { + arm_smmu_cmdq_issue_cmd(smmu, &cmd); + cmd.tlbi.addr += granule; + } while (size -= granule); } static struct iommu_gather_ops arm_smmu_gather_ops = { @@ -1429,10 +1443,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; if (cfg->cdptr) { - dma_free_coherent(smmu_domain->smmu->dev, - CTXDESC_CD_DWORDS << 3, - cfg->cdptr, - cfg->cdptr_dma); + dmam_free_coherent(smmu_domain->smmu->dev, + CTXDESC_CD_DWORDS << 3, + cfg->cdptr, + cfg->cdptr_dma); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } @@ -1457,8 +1471,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, if (IS_ERR_VALUE(asid)) return asid; - cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, GFP_KERNEL); + cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, + &cfg->cdptr_dma, + GFP_KERNEL | __GFP_ZERO); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; @@ -1804,13 +1819,13 @@ static int arm_smmu_add_device(struct device *dev) smmu = arm_smmu_get_for_pci_dev(pdev); if (!smmu) { ret = -ENOENT; - goto out_put_group; + goto out_remove_dev; } smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL); if (!smmu_group) { ret = -ENOMEM; - goto out_put_group; + goto out_remove_dev; } smmu_group->ste.valid = true; @@ -1826,20 +1841,20 @@ static int arm_smmu_add_device(struct device *dev) for (i = 0; i < smmu_group->num_sids; ++i) { /* If we already know about this SID, then we're done */ if (smmu_group->sids[i] == sid) - return 0; + goto out_put_group; } /* Check the SID is in range of the SMMU and our stream table */ if (!arm_smmu_sid_in_range(smmu, sid)) { ret = -ERANGE; - goto out_put_group; + goto out_remove_dev; } /* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_l2_strtab(smmu, sid); if (ret) - goto out_put_group; + goto out_remove_dev; } /* Resize the SID array for the group */ @@ -1849,16 +1864,20 @@ static int arm_smmu_add_device(struct device *dev) if (!sids) { smmu_group->num_sids--; ret = -ENOMEM; - goto out_put_group; + goto out_remove_dev; } /* Add the new SID */ sids[smmu_group->num_sids - 1] = sid; smmu_group->sids = sids; - return 0; out_put_group: iommu_group_put(group); + return 0; + +out_remove_dev: + iommu_group_remove_device(dev); + iommu_group_put(group); return ret; } @@ -1937,7 +1956,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, { size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; - q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); + q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); if (!q->base) { dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", qsz); @@ -1957,23 +1976,6 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) -{ - size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3; - - dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma); -} - -static void arm_smmu_free_queues(struct arm_smmu_device *smmu) -{ - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); - - if (smmu->features & ARM_SMMU_FEAT_PRI) - arm_smmu_free_one_queue(smmu, &smmu->priq.q); -} - static int arm_smmu_init_queues(struct arm_smmu_device *smmu) { int ret; @@ -1983,49 +1985,20 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); if (ret) - goto out; + return ret; /* evtq */ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); if (ret) - goto out_free_cmdq; + return ret; /* priq */ if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; - ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); - if (ret) - goto out_free_evtq; - - return 0; - -out_free_evtq: - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); -out_free_cmdq: - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); -out: - return ret; -} - -static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu) -{ - int i; - size_t size; - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - - size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - for (i = 0; i < cfg->num_l1_ents; ++i) { - struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i]; - - if (!desc->l2ptr) - continue; - - dma_free_coherent(smmu->dev, size, desc->l2ptr, - desc->l2ptr_dma); - } + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, + ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); } static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -2054,7 +2027,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) void *strtab; u64 reg; u32 size, l1size; - int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* @@ -2077,8 +2049,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) size, smmu->sid_bits); l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL); + strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, + GFP_KERNEL | __GFP_ZERO); if (!strtab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", @@ -2095,13 +2067,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) << STRTAB_BASE_CFG_SPLIT_SHIFT; cfg->strtab_base_cfg = reg; - ret = arm_smmu_init_l1_strtab(smmu); - if (ret) - dma_free_coherent(smmu->dev, - l1size, - strtab, - cfg->strtab_dma); - return ret; + return arm_smmu_init_l1_strtab(smmu); } static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) @@ -2112,8 +2078,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL); + strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, + GFP_KERNEL | __GFP_ZERO); if (!strtab) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", @@ -2157,21 +2123,6 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) return 0; } -static void arm_smmu_free_strtab(struct arm_smmu_device *smmu) -{ - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - u32 size = cfg->num_l1_ents; - - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - arm_smmu_free_l2_strtab(smmu); - size *= STRTAB_L1_DESC_DWORDS << 3; - } else { - size *= STRTAB_STE_DWORDS * 3; - } - - dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma); -} - static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; @@ -2180,21 +2131,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_init_strtab(smmu); - if (ret) - goto out_free_queues; - - return 0; - -out_free_queues: - arm_smmu_free_queues(smmu); - return ret; -} - -static void arm_smmu_free_structures(struct arm_smmu_device *smmu) -{ - arm_smmu_free_strtab(smmu); - arm_smmu_free_queues(smmu); + return arm_smmu_init_strtab(smmu); } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, @@ -2532,8 +2469,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", coherent ? "true" : "false"); - if (reg & IDR0_STALL_MODEL) + switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { + case IDR0_STALL_MODEL_STALL: + /* Fallthrough */ + case IDR0_STALL_MODEL_FORCE: smmu->features |= ARM_SMMU_FEAT_STALLS; + } if (reg & IDR0_S1P) smmu->features |= ARM_SMMU_FEAT_TRANS_S1; @@ -2699,15 +2640,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, smmu); /* Reset the device */ - ret = arm_smmu_device_reset(smmu); - if (ret) - goto out_free_structures; - - return 0; - -out_free_structures: - arm_smmu_free_structures(smmu); - return ret; + return arm_smmu_device_reset(smmu); } static int arm_smmu_device_remove(struct platform_device *pdev) @@ -2715,7 +2648,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); - arm_smmu_free_structures(smmu); return 0; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 47dc7a7..59ee4b8 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -582,7 +582,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - bool leaf, void *cookie) + size_t granule, bool leaf, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; @@ -597,12 +597,18 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { iova &= ~12UL; iova |= ARM_SMMU_CB_ASID(cfg); - writel_relaxed(iova, reg); + do { + writel_relaxed(iova, reg); + iova += granule; + } while (size -= granule); #ifdef CONFIG_64BIT } else { iova >>= 12; iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; - writeq_relaxed(iova, reg); + do { + writeq_relaxed(iova, reg); + iova += granule >> 12; + } while (size -= granule); #endif } #ifdef CONFIG_64BIT @@ -610,7 +616,11 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; - writeq_relaxed(iova >> 12, reg); + iova >>= 12; + do { + writeq_relaxed(iova, reg); + iova += granule >> 12; + } while (size -= granule); #endif } else { reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; @@ -945,9 +955,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_irq(irq, domain); } - if (smmu_domain->pgtbl_ops) - free_io_pgtable_ops(smmu_domain->pgtbl_ops); - + free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); } @@ -1357,6 +1365,7 @@ static int arm_smmu_add_device(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); + iommu_group_put(group); return 0; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 80e3c17..62a400c 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1063,13 +1063,19 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); - drhd->iommu = iommu; - - if (intel_iommu_enabled) + if (intel_iommu_enabled) { iommu->iommu_dev = iommu_device_create(NULL, iommu, intel_iommu_groups, "%s", iommu->name); + if (IS_ERR(iommu->iommu_dev)) { + err = PTR_ERR(iommu->iommu_dev); + goto err_unmap; + } + } + + drhd->iommu = iommu; + return 0; err_unmap: diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7df9777..8bbcbfe 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -38,9 +38,6 @@ #define io_pgtable_to_data(x) \ container_of((x), struct arm_lpae_io_pgtable, iop) -#define io_pgtable_ops_to_pgtable(x) \ - container_of((x), struct io_pgtable, ops) - #define io_pgtable_ops_to_data(x) \ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) @@ -58,8 +55,10 @@ ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ * (d)->bits_per_level) + (d)->pg_shift) +#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift) + #define ARM_LPAE_PAGES_PER_PGD(d) \ - DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) + DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d)) /* * Calculate the index at level l used to map virtual address a using the @@ -169,7 +168,7 @@ /* IOPTE accessors */ #define iopte_deref(pte,d) \ (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ - & ~((1ULL << (d)->pg_shift) - 1))) + & ~(ARM_LPAE_GRANULE(d) - 1ULL))) #define iopte_type(pte,l) \ (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) @@ -326,7 +325,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = *ptep; if (!pte) { - cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, + cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; @@ -405,17 +404,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - /* Only leaf entries at the last level */ - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - return; - if (lvl == ARM_LPAE_START_LVL(data)) table_size = data->pgd_size; else - table_size = 1UL << data->pg_shift; + table_size = ARM_LPAE_GRANULE(data); start = ptep; - end = (void *)ptep + table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + end = ptep; + else + end = (void *)ptep + table_size; while (ptep != end) { arm_lpae_iopte pte = *ptep++; @@ -473,7 +473,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, __arm_lpae_set_pte(ptep, table, cfg); iova &= ~(blk_size - 1); - cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); + cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie); return size; } @@ -486,11 +486,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, void *cookie = data->iop.cookie; size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + /* Something went horribly wrong and we ran out of page table */ + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); pte = *ptep; - - /* Something went horribly wrong and we ran out of page table */ - if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) + if (WARN_ON(!pte)) return 0; /* If the size matches this level, we're in the right place */ @@ -499,12 +501,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, if (!iopte_leaf(pte, lvl)) { /* Also flush any partial walks */ - tlb->tlb_add_flush(iova, size, false, cookie); + tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data), + false, cookie); tlb->tlb_sync(cookie); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); } else { - tlb->tlb_add_flush(iova, size, true, cookie); + tlb->tlb_add_flush(iova, size, size, true, cookie); } return size; @@ -570,7 +573,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; found_translation: - iova &= ((1 << data->pg_shift) - 1); + iova &= (ARM_LPAE_GRANULE(data) - 1); return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; } @@ -668,7 +671,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); - switch (1 << data->pg_shift) { + switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: reg |= ARM_LPAE_TCR_TG0_4K; break; @@ -769,7 +772,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) sl = ARM_LPAE_START_LVL(data); - switch (1 << data->pg_shift) { + switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: reg |= ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ @@ -889,8 +892,8 @@ static void dummy_tlb_flush_all(void *cookie) WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, - void *cookie) +static void dummy_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index ac9e234..36673c8 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -26,8 +26,8 @@ enum io_pgtable_fmt { */ struct iommu_gather_ops { void (*tlb_flush_all)(void *cookie); - void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, - void *cookie); + void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, + bool leaf, void *cookie); void (*tlb_sync)(void *cookie); }; @@ -131,6 +131,8 @@ struct io_pgtable { struct io_pgtable_ops ops; }; +#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) + /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index dfb868e..2fdbac6 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -277,8 +277,8 @@ static void ipmmu_tlb_flush_all(void *cookie) ipmmu_tlb_invalidate(domain); } -static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, - void *cookie) +static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) { /* The hardware doesn't support selective TLB flush. */ } diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c index b6d01f9..4b09e81 100644 --- a/drivers/iommu/msm_iommu_dev.c +++ b/drivers/iommu/msm_iommu_dev.c @@ -359,30 +359,19 @@ static struct platform_driver msm_iommu_ctx_driver = { .remove = msm_iommu_ctx_remove, }; +static struct platform_driver * const drivers[] = { + &msm_iommu_driver, + &msm_iommu_ctx_driver, +}; + static int __init msm_iommu_driver_init(void) { - int ret; - ret = platform_driver_register(&msm_iommu_driver); - if (ret != 0) { - pr_err("Failed to register IOMMU driver\n"); - goto error; - } - - ret = platform_driver_register(&msm_iommu_ctx_driver); - if (ret != 0) { - platform_driver_unregister(&msm_iommu_driver); - pr_err("Failed to register IOMMU context driver\n"); - goto error; - } - -error: - return ret; + return platform_register_drivers(drivers, ARRAY_SIZE(drivers)); } static void __exit msm_iommu_driver_exit(void) { - platform_driver_unregister(&msm_iommu_ctx_driver); - platform_driver_unregister(&msm_iommu_driver); + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } subsys_initcall(msm_iommu_driver_init); diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 471ee36..a04d491 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -49,7 +49,7 @@ static bool s390_iommu_capable(enum iommu_cap cap) } } -struct iommu_domain *s390_domain_alloc(unsigned domain_type) +static struct iommu_domain *s390_domain_alloc(unsigned domain_type) { struct s390_domain *s390_domain; @@ -73,7 +73,7 @@ struct iommu_domain *s390_domain_alloc(unsigned domain_type) return &s390_domain->domain; } -void s390_domain_free(struct iommu_domain *domain) +static void s390_domain_free(struct iommu_domain *domain) { struct s390_domain *s390_domain = to_s390_domain(domain); diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c deleted file mode 100644 index a028751..0000000 --- a/drivers/iommu/shmobile-iommu.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * IOMMU for IPMMU/IPMMUI - * Copyright (C) 2012 Hideki EIRAKU - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#include <linux/dma-mapping.h> -#include <linux/io.h> -#include <linux/iommu.h> -#include <linux/platform_device.h> -#include <linux/sizes.h> -#include <linux/slab.h> -#include <asm/dma-iommu.h> -#include "shmobile-ipmmu.h" - -#define L1_SIZE CONFIG_SHMOBILE_IOMMU_L1SIZE -#define L1_LEN (L1_SIZE / 4) -#define L1_ALIGN L1_SIZE -#define L2_SIZE SZ_1K -#define L2_LEN (L2_SIZE / 4) -#define L2_ALIGN L2_SIZE - -struct shmobile_iommu_domain_pgtable { - uint32_t *pgtable; - dma_addr_t handle; -}; - -struct shmobile_iommu_archdata { - struct list_head attached_list; - struct dma_iommu_mapping *iommu_mapping; - spinlock_t attach_lock; - struct shmobile_iommu_domain *attached; - int num_attached_devices; - struct shmobile_ipmmu *ipmmu; -}; - -struct shmobile_iommu_domain { - struct shmobile_iommu_domain_pgtable l1, l2[L1_LEN]; - spinlock_t map_lock; - spinlock_t attached_list_lock; - struct list_head attached_list; - struct iommu_domain domain; -}; - -static struct shmobile_iommu_archdata *ipmmu_archdata; -static struct kmem_cache *l1cache, *l2cache; - -static struct shmobile_iommu_domain *to_sh_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct shmobile_iommu_domain, domain); -} - -static int pgtable_alloc(struct shmobile_iommu_domain_pgtable *pgtable, - struct kmem_cache *cache, size_t size) -{ - pgtable->pgtable = kmem_cache_zalloc(cache, GFP_ATOMIC); - if (!pgtable->pgtable) - return -ENOMEM; - pgtable->handle = dma_map_single(NULL, pgtable->pgtable, size, - DMA_TO_DEVICE); - return 0; -} - -static void pgtable_free(struct shmobile_iommu_domain_pgtable *pgtable, - struct kmem_cache *cache, size_t size) -{ - dma_unmap_single(NULL, pgtable->handle, size, DMA_TO_DEVICE); - kmem_cache_free(cache, pgtable->pgtable); -} - -static uint32_t pgtable_read(struct shmobile_iommu_domain_pgtable *pgtable, - unsigned int index) -{ - return pgtable->pgtable[index]; -} - -static void pgtable_write(struct shmobile_iommu_domain_pgtable *pgtable, - unsigned int index, unsigned int count, uint32_t val) -{ - unsigned int i; - - for (i = 0; i < count; i++) - pgtable->pgtable[index + i] = val; - dma_sync_single_for_device(NULL, pgtable->handle + index * sizeof(val), - sizeof(val) * count, DMA_TO_DEVICE); -} - -static struct iommu_domain *shmobile_iommu_domain_alloc(unsigned type) -{ - struct shmobile_iommu_domain *sh_domain; - int i, ret; - - if (type != IOMMU_DOMAIN_UNMANAGED) - return NULL; - - sh_domain = kzalloc(sizeof(*sh_domain), GFP_KERNEL); - if (!sh_domain) - return NULL; - ret = pgtable_alloc(&sh_domain->l1, l1cache, L1_SIZE); - if (ret < 0) { - kfree(sh_domain); - return NULL; - } - for (i = 0; i < L1_LEN; i++) - sh_domain->l2[i].pgtable = NULL; - spin_lock_init(&sh_domain->map_lock); - spin_lock_init(&sh_domain->attached_list_lock); - INIT_LIST_HEAD(&sh_domain->attached_list); - return &sh_domain->domain; -} - -static void shmobile_iommu_domain_free(struct iommu_domain *domain) -{ - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - int i; - - for (i = 0; i < L1_LEN; i++) { - if (sh_domain->l2[i].pgtable) - pgtable_free(&sh_domain->l2[i], l2cache, L2_SIZE); - } - pgtable_free(&sh_domain->l1, l1cache, L1_SIZE); - kfree(sh_domain); -} - -static int shmobile_iommu_attach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - int ret = -EBUSY; - - if (!archdata) - return -ENODEV; - spin_lock(&sh_domain->attached_list_lock); - spin_lock(&archdata->attach_lock); - if (archdata->attached != sh_domain) { - if (archdata->attached) - goto err; - ipmmu_tlb_set(archdata->ipmmu, sh_domain->l1.handle, L1_SIZE, - 0); - ipmmu_tlb_flush(archdata->ipmmu); - archdata->attached = sh_domain; - archdata->num_attached_devices = 0; - list_add(&archdata->attached_list, &sh_domain->attached_list); - } - archdata->num_attached_devices++; - ret = 0; -err: - spin_unlock(&archdata->attach_lock); - spin_unlock(&sh_domain->attached_list_lock); - return ret; -} - -static void shmobile_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct shmobile_iommu_archdata *archdata = dev->archdata.iommu; - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - - if (!archdata) - return; - spin_lock(&sh_domain->attached_list_lock); - spin_lock(&archdata->attach_lock); - archdata->num_attached_devices--; - if (!archdata->num_attached_devices) { - ipmmu_tlb_set(archdata->ipmmu, 0, 0, 0); - ipmmu_tlb_flush(archdata->ipmmu); - archdata->attached = NULL; - list_del(&archdata->attached_list); - } - spin_unlock(&archdata->attach_lock); - spin_unlock(&sh_domain->attached_list_lock); -} - -static void domain_tlb_flush(struct shmobile_iommu_domain *sh_domain) -{ - struct shmobile_iommu_archdata *archdata; - - spin_lock(&sh_domain->attached_list_lock); - list_for_each_entry(archdata, &sh_domain->attached_list, attached_list) - ipmmu_tlb_flush(archdata->ipmmu); - spin_unlock(&sh_domain->attached_list_lock); -} - -static int l2alloc(struct shmobile_iommu_domain *sh_domain, - unsigned int l1index) -{ - int ret; - - if (!sh_domain->l2[l1index].pgtable) { - ret = pgtable_alloc(&sh_domain->l2[l1index], l2cache, L2_SIZE); - if (ret < 0) - return ret; - } - pgtable_write(&sh_domain->l1, l1index, 1, - sh_domain->l2[l1index].handle | 0x1); - return 0; -} - -static void l2realfree(struct shmobile_iommu_domain_pgtable *l2) -{ - if (l2->pgtable) - pgtable_free(l2, l2cache, L2_SIZE); -} - -static void l2free(struct shmobile_iommu_domain *sh_domain, - unsigned int l1index, - struct shmobile_iommu_domain_pgtable *l2) -{ - pgtable_write(&sh_domain->l1, l1index, 1, 0); - if (sh_domain->l2[l1index].pgtable) { - *l2 = sh_domain->l2[l1index]; - sh_domain->l2[l1index].pgtable = NULL; - } -} - -static int shmobile_iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - unsigned int l1index, l2index; - int ret; - - l1index = iova >> 20; - switch (size) { - case SZ_4K: - l2index = (iova >> 12) & 0xff; - spin_lock(&sh_domain->map_lock); - ret = l2alloc(sh_domain, l1index); - if (!ret) - pgtable_write(&sh_domain->l2[l1index], l2index, 1, - paddr | 0xff2); - spin_unlock(&sh_domain->map_lock); - break; - case SZ_64K: - l2index = (iova >> 12) & 0xf0; - spin_lock(&sh_domain->map_lock); - ret = l2alloc(sh_domain, l1index); - if (!ret) - pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, - paddr | 0xff1); - spin_unlock(&sh_domain->map_lock); - break; - case SZ_1M: - spin_lock(&sh_domain->map_lock); - l2free(sh_domain, l1index, &l2); - pgtable_write(&sh_domain->l1, l1index, 1, paddr | 0xc02); - spin_unlock(&sh_domain->map_lock); - ret = 0; - break; - default: - ret = -EINVAL; - } - if (!ret) - domain_tlb_flush(sh_domain); - l2realfree(&l2); - return ret; -} - -static size_t shmobile_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ - struct shmobile_iommu_domain_pgtable l2 = { .pgtable = NULL }; - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - unsigned int l1index, l2index; - uint32_t l2entry = 0; - size_t ret = 0; - - l1index = iova >> 20; - if (!(iova & 0xfffff) && size >= SZ_1M) { - spin_lock(&sh_domain->map_lock); - l2free(sh_domain, l1index, &l2); - spin_unlock(&sh_domain->map_lock); - ret = SZ_1M; - goto done; - } - l2index = (iova >> 12) & 0xff; - spin_lock(&sh_domain->map_lock); - if (sh_domain->l2[l1index].pgtable) - l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); - switch (l2entry & 3) { - case 1: - if (l2index & 0xf) - break; - pgtable_write(&sh_domain->l2[l1index], l2index, 0x10, 0); - ret = SZ_64K; - break; - case 2: - pgtable_write(&sh_domain->l2[l1index], l2index, 1, 0); - ret = SZ_4K; - break; - } - spin_unlock(&sh_domain->map_lock); -done: - if (ret) - domain_tlb_flush(sh_domain); - l2realfree(&l2); - return ret; -} - -static phys_addr_t shmobile_iommu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) -{ - struct shmobile_iommu_domain *sh_domain = to_sh_domain(domain); - uint32_t l1entry = 0, l2entry = 0; - unsigned int l1index, l2index; - - l1index = iova >> 20; - l2index = (iova >> 12) & 0xff; - spin_lock(&sh_domain->map_lock); - if (sh_domain->l2[l1index].pgtable) - l2entry = pgtable_read(&sh_domain->l2[l1index], l2index); - else - l1entry = pgtable_read(&sh_domain->l1, l1index); - spin_unlock(&sh_domain->map_lock); - switch (l2entry & 3) { - case 1: - return (l2entry & ~0xffff) | (iova & 0xffff); - case 2: - return (l2entry & ~0xfff) | (iova & 0xfff); - default: - if ((l1entry & 3) == 2) - return (l1entry & ~0xfffff) | (iova & 0xfffff); - return 0; - } -} - -static int find_dev_name(struct shmobile_ipmmu *ipmmu, const char *dev_name) -{ - unsigned int i, n = ipmmu->num_dev_names; - - for (i = 0; i < n; i++) { - if (strcmp(ipmmu->dev_names[i], dev_name) == 0) - return 1; - } - return 0; -} - -static int shmobile_iommu_add_device(struct device *dev) -{ - struct shmobile_iommu_archdata *archdata = ipmmu_archdata; - struct dma_iommu_mapping *mapping; - - if (!find_dev_name(archdata->ipmmu, dev_name(dev))) - return 0; - mapping = archdata->iommu_mapping; - if (!mapping) { - mapping = arm_iommu_create_mapping(&platform_bus_type, 0, - L1_LEN << 20); - if (IS_ERR(mapping)) - return PTR_ERR(mapping); - archdata->iommu_mapping = mapping; - } - dev->archdata.iommu = archdata; - if (arm_iommu_attach_device(dev, mapping)) - pr_err("arm_iommu_attach_device failed\n"); - return 0; -} - -static const struct iommu_ops shmobile_iommu_ops = { - .domain_alloc = shmobile_iommu_domain_alloc, - .domain_free = shmobile_iommu_domain_free, - .attach_dev = shmobile_iommu_attach_device, - .detach_dev = shmobile_iommu_detach_device, - .map = shmobile_iommu_map, - .unmap = shmobile_iommu_unmap, - .map_sg = default_iommu_map_sg, - .iova_to_phys = shmobile_iommu_iova_to_phys, - .add_device = shmobile_iommu_add_device, - .pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K, -}; - -int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) -{ - static struct shmobile_iommu_archdata *archdata; - - l1cache = kmem_cache_create("shmobile-iommu-pgtable1", L1_SIZE, - L1_ALIGN, SLAB_HWCACHE_ALIGN, NULL); - if (!l1cache) - return -ENOMEM; - l2cache = kmem_cache_create("shmobile-iommu-pgtable2", L2_SIZE, - L2_ALIGN, SLAB_HWCACHE_ALIGN, NULL); - if (!l2cache) { - kmem_cache_destroy(l1cache); - return -ENOMEM; - } - archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); - if (!archdata) { - kmem_cache_destroy(l1cache); - kmem_cache_destroy(l2cache); - return -ENOMEM; - } - spin_lock_init(&archdata->attach_lock); - archdata->ipmmu = ipmmu; - ipmmu_archdata = archdata; - bus_set_iommu(&platform_bus_type, &shmobile_iommu_ops); - return 0; -} diff --git a/drivers/iommu/shmobile-ipmmu.c b/drivers/iommu/shmobile-ipmmu.c deleted file mode 100644 index 951651a..0000000 --- a/drivers/iommu/shmobile-ipmmu.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * IPMMU/IPMMUI - * Copyright (C) 2012 Hideki EIRAKU - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#include <linux/err.h> -#include <linux/export.h> -#include <linux/io.h> -#include <linux/platform_device.h> -#include <linux/slab.h> -#include <linux/platform_data/sh_ipmmu.h> -#include "shmobile-ipmmu.h" - -#define IMCTR1 0x000 -#define IMCTR2 0x004 -#define IMASID 0x010 -#define IMTTBR 0x014 -#define IMTTBCR 0x018 - -#define IMCTR1_TLBEN (1 << 0) -#define IMCTR1_FLUSH (1 << 1) - -static void ipmmu_reg_write(struct shmobile_ipmmu *ipmmu, unsigned long reg_off, - unsigned long data) -{ - iowrite32(data, ipmmu->ipmmu_base + reg_off); -} - -void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu) -{ - if (!ipmmu) - return; - - spin_lock(&ipmmu->flush_lock); - if (ipmmu->tlb_enabled) - ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH | IMCTR1_TLBEN); - else - ipmmu_reg_write(ipmmu, IMCTR1, IMCTR1_FLUSH); - spin_unlock(&ipmmu->flush_lock); -} - -void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, - int asid) -{ - if (!ipmmu) - return; - - spin_lock(&ipmmu->flush_lock); - switch (size) { - default: - ipmmu->tlb_enabled = 0; - break; - case 0x2000: - ipmmu_reg_write(ipmmu, IMTTBCR, 1); - ipmmu->tlb_enabled = 1; - break; - case 0x1000: - ipmmu_reg_write(ipmmu, IMTTBCR, 2); - ipmmu->tlb_enabled = 1; - break; - case 0x800: - ipmmu_reg_write(ipmmu, IMTTBCR, 3); - ipmmu->tlb_enabled = 1; - break; - case 0x400: - ipmmu_reg_write(ipmmu, IMTTBCR, 4); - ipmmu->tlb_enabled = 1; - break; - case 0x200: - ipmmu_reg_write(ipmmu, IMTTBCR, 5); - ipmmu->tlb_enabled = 1; - break; - case 0x100: - ipmmu_reg_write(ipmmu, IMTTBCR, 6); - ipmmu->tlb_enabled = 1; - break; - case 0x80: - ipmmu_reg_write(ipmmu, IMTTBCR, 7); - ipmmu->tlb_enabled = 1; - break; - } - ipmmu_reg_write(ipmmu, IMTTBR, phys); - ipmmu_reg_write(ipmmu, IMASID, asid); - spin_unlock(&ipmmu->flush_lock); -} - -static int ipmmu_probe(struct platform_device *pdev) -{ - struct shmobile_ipmmu *ipmmu; - struct resource *res; - struct shmobile_ipmmu_platform_data *pdata = pdev->dev.platform_data; - - ipmmu = devm_kzalloc(&pdev->dev, sizeof(*ipmmu), GFP_KERNEL); - if (!ipmmu) { - dev_err(&pdev->dev, "cannot allocate device data\n"); - return -ENOMEM; - } - spin_lock_init(&ipmmu->flush_lock); - ipmmu->dev = &pdev->dev; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ipmmu->ipmmu_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ipmmu->ipmmu_base)) - return PTR_ERR(ipmmu->ipmmu_base); - - ipmmu->dev_names = pdata->dev_names; - ipmmu->num_dev_names = pdata->num_dev_names; - platform_set_drvdata(pdev, ipmmu); - ipmmu_reg_write(ipmmu, IMCTR1, 0x0); /* disable TLB */ - ipmmu_reg_write(ipmmu, IMCTR2, 0x0); /* disable PMB */ - return ipmmu_iommu_init(ipmmu); -} - -static struct platform_driver ipmmu_driver = { - .probe = ipmmu_probe, - .driver = { - .name = "ipmmu", - }, -}; - -static int __init ipmmu_init(void) -{ - return platform_driver_register(&ipmmu_driver); -} -subsys_initcall(ipmmu_init); diff --git a/drivers/iommu/shmobile-ipmmu.h b/drivers/iommu/shmobile-ipmmu.h deleted file mode 100644 index 9524743..0000000 --- a/drivers/iommu/shmobile-ipmmu.h +++ /dev/null @@ -1,34 +0,0 @@ -/* shmobile-ipmmu.h - * - * Copyright (C) 2012 Hideki EIRAKU - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#ifndef __SHMOBILE_IPMMU_H__ -#define __SHMOBILE_IPMMU_H__ - -struct shmobile_ipmmu { - struct device *dev; - void __iomem *ipmmu_base; - int tlb_enabled; - spinlock_t flush_lock; - const char * const *dev_names; - unsigned int num_dev_names; -}; - -#ifdef CONFIG_SHMOBILE_IPMMU_TLB -void ipmmu_tlb_flush(struct shmobile_ipmmu *ipmmu); -void ipmmu_tlb_set(struct shmobile_ipmmu *ipmmu, unsigned long phys, int size, - int asid); -int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu); -#else -static inline int ipmmu_iommu_init(struct shmobile_ipmmu *ipmmu) -{ - return -EINVAL; -} -#endif - -#endif /* __SHMOBILE_IPMMU_H__ */ |