diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-10-11 10:55:04 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2012-10-11 10:55:04 +0100 |
commit | a0f0dd57f4a85310d9936f1770a0424b49fef876 (patch) | |
tree | 2f85b8b67dda13d19b02ca39e0fbef921cb1cf8b /arch/arm/mm | |
parent | 2a552d5e63d7fa602c9a9a0717008737f55625a6 (diff) | |
parent | 846a136881b8f73c1f74250bf6acfaa309cab1f2 (diff) | |
download | op-kernel-dev-a0f0dd57f4a85310d9936f1770a0424b49fef876.zip op-kernel-dev-a0f0dd57f4a85310d9936f1770a0424b49fef876.tar.gz |
Merge branch 'fixes' into for-linus
Conflicts:
arch/arm/kernel/smp.c
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/alignment.c | 6 | ||||
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 10 | ||||
-rw-r--r-- | arch/arm/mm/cache-tauros2.c | 83 | ||||
-rw-r--r-- | arch/arm/mm/cache-v7.S | 3 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 266 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/ioremap.c | 15 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 65 |
8 files changed, 343 insertions, 107 deletions
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 9107231..b9f60eb 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -699,7 +699,6 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, unsigned long instr = *pinstr; u16 tinst1 = (instr >> 16) & 0xffff; u16 tinst2 = instr & 0xffff; - poffset->un = 0; switch (tinst1 & 0xffe0) { /* A6.3.5 Load/Store multiple */ @@ -854,9 +853,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) break; case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ - if (thumb2_32b) + if (thumb2_32b) { + offset.un = 0; handler = do_alignment_t32_to_handler(&instr, regs, &offset); - else + } else handler = do_alignment_ldmstm; break; diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 2a8e380..8a97e64 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -368,14 +368,18 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) /* l2x0 controller is disabled */ writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); - l2x0_saved_regs.aux_ctrl = aux; - l2x0_inv_all(); /* enable L2X0 */ writel_relaxed(1, l2x0_base + L2X0_CTRL); } + /* Re-read it in case some bits are reserved. */ + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + /* Save the value for resuming. */ + l2x0_saved_regs.aux_ctrl = aux; + outer_cache.inv_range = l2x0_inv_range; outer_cache.clean_range = l2x0_clean_range; outer_cache.flush_range = l2x0_flush_range; @@ -554,7 +558,7 @@ static const struct of_device_id l2x0_ids[] __initconst = { int __init l2x0_of_init(u32 aux_val, u32 aux_mask) { struct device_node *np; - struct l2x0_of_data *data; + const struct l2x0_of_data *data; struct resource res; np = of_find_matching_node(NULL, l2x0_ids); diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c index 23a7643..1be0f4e 100644 --- a/arch/arm/mm/cache-tauros2.c +++ b/arch/arm/mm/cache-tauros2.c @@ -15,8 +15,11 @@ */ #include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> #include <asm/cacheflush.h> #include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-tauros2.h> @@ -144,25 +147,8 @@ static inline void __init write_extra_features(u32 u) __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); } -static void __init disable_l2_prefetch(void) -{ - u32 u; - - /* - * Read the CPU Extra Features register and verify that the - * Disable L2 Prefetch bit is set. - */ - u = read_extra_features(); - if (!(u & 0x01000000)) { - printk(KERN_INFO "Tauros2: Disabling L2 prefetch.\n"); - write_extra_features(u | 0x01000000); - } -} - static inline int __init cpuid_scheme(void) { - extern int processor_id; - return !!((processor_id & 0x000f0000) == 0x000f0000); } @@ -189,12 +175,36 @@ static inline void __init write_actlr(u32 actlr) __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); } -void __init tauros2_init(void) +static void enable_extra_feature(unsigned int features) +{ + u32 u; + + u = read_extra_features(); + + if (features & CACHE_TAUROS2_PREFETCH_ON) + u &= ~0x01000000; + else + u |= 0x01000000; + printk(KERN_INFO "Tauros2: %s L2 prefetch.\n", + (features & CACHE_TAUROS2_PREFETCH_ON) + ? "Enabling" : "Disabling"); + + if (features & CACHE_TAUROS2_LINEFILL_BURST8) + u |= 0x00100000; + else + u &= ~0x00100000; + printk(KERN_INFO "Tauros2: %s line fill burt8.\n", + (features & CACHE_TAUROS2_LINEFILL_BURST8) + ? "Enabling" : "Disabling"); + + write_extra_features(u); +} + +static void __init tauros2_internal_init(unsigned int features) { - extern int processor_id; - char *mode; + char *mode = NULL; - disable_l2_prefetch(); + enable_extra_feature(features); #ifdef CONFIG_CPU_32v5 if ((processor_id & 0xff0f0000) == 0x56050000) { @@ -286,3 +296,34 @@ void __init tauros2_init(void) printk(KERN_INFO "Tauros2: L2 cache support initialised " "in %s mode.\n", mode); } + +#ifdef CONFIG_OF +static const struct of_device_id tauros2_ids[] __initconst = { + { .compatible = "marvell,tauros2-cache"}, + {} +}; +#endif + +void __init tauros2_init(unsigned int features) +{ +#ifdef CONFIG_OF + struct device_node *node; + int ret; + unsigned int f; + + node = of_find_matching_node(NULL, tauros2_ids); + if (!node) { + pr_info("Not found marvell,tauros2-cache, disable it\n"); + return; + } + + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; +#endif + tauros2_internal_init(features); +} diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 140b294..cd95664 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -247,6 +247,9 @@ ENTRY(v7_coherent_user_range) * isn't mapped, fail with -EFAULT. */ 9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif mov r0, #-EFAULT mov pc, lr UNWIND(.fnend ) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index e59c4ab..477a2d2 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -73,11 +73,18 @@ static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(page, offset, size, dir); return pfn_to_dma(dev, page_to_pfn(page)) + offset; } +static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + /** * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices @@ -96,7 +103,7 @@ static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), handle & ~PAGE_MASK, size, dir); } @@ -106,8 +113,7 @@ static void arm_dma_sync_single_for_cpu(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_dma_sync_single_for_device(struct device *dev, @@ -115,8 +121,7 @@ static void arm_dma_sync_single_for_device(struct device *dev, { unsigned int offset = handle & (PAGE_SIZE - 1); struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(page, offset, size, dir); + __dma_page_cpu_to_dev(page, offset, size, dir); } static int arm_dma_set_mask(struct device *dev, u64 dma_mask); @@ -138,6 +143,22 @@ struct dma_map_ops arm_dma_ops = { }; EXPORT_SYMBOL(arm_dma_ops); +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +struct dma_map_ops arm_coherent_dma_ops = { + .alloc = arm_coherent_dma_alloc, + .free = arm_coherent_dma_free, + .mmap = arm_dma_mmap, + .get_sgtable = arm_dma_get_sgtable, + .map_page = arm_coherent_dma_map_page, + .map_sg = arm_dma_map_sg, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_coherent_dma_ops); + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; @@ -346,6 +367,8 @@ static int __init atomic_pool_init(void) (unsigned)pool->size / 1024); return 0; } + + kfree(pages); no_pages: kfree(bitmap); no_bitmap: @@ -584,7 +607,7 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, pgprot_t prot, const void *caller) + gfp_t gfp, pgprot_t prot, bool is_coherent, const void *caller) { u64 mask = get_coherent_dma_mask(dev); struct page *page; @@ -617,7 +640,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) + if (is_coherent || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (gfp & GFP_ATOMIC) addr = __alloc_from_pool(size, &page); @@ -645,7 +668,20 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, + return __dma_alloc(dev, size, handle, gfp, prot, false, + __builtin_return_address(0)); +} + +static void *arm_coherent_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + + return __dma_alloc(dev, size, handle, gfp, prot, true, __builtin_return_address(0)); } @@ -682,8 +718,9 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, /* * Free a buffer as defined by the above mapping. */ -void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, struct dma_attrs *attrs) +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs, + bool is_coherent) { struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); @@ -692,7 +729,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (arch_is_coherent() || nommu()) { + if (is_coherent || nommu()) { __dma_free_buffer(page, size); } else if (__free_from_pool(cpu_addr, size)) { return; @@ -708,6 +745,18 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, } } +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, false); +} + +static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + __arm_dma_free(dev, size, cpu_addr, handle, attrs, true); +} + int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t handle, size_t size, struct dma_attrs *attrs) @@ -1010,11 +1059,12 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t if (!pages[i]) goto error; - if (order) + if (order) { split_page(pages[i], order); - j = 1 << order; - while (--j) - pages[i + j] = pages[i] + j; + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } __dma_clear_buffer(pages[i], PAGE_SIZE << order); i += 1 << order; @@ -1301,7 +1351,8 @@ static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, */ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, size_t size, dma_addr_t *handle, - enum dma_data_direction dir, struct dma_attrs *attrs) + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct dma_iommu_mapping *mapping = dev->archdata.mapping; dma_addr_t iova, iova_base; @@ -1320,8 +1371,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, phys_addr_t phys = page_to_phys(sg_page(s)); unsigned int len = PAGE_ALIGN(s->offset + s->length); - if (!arch_is_coherent() && - !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!is_coherent && + !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); ret = iommu_map(mapping->domain, iova, phys, len, 0); @@ -1339,20 +1390,9 @@ fail: return ret; } -/** - * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA - * @dev: valid struct device pointer - * @sg: list of buffers - * @nents: number of buffers to map - * @dir: DMA transfer direction - * - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * The scatter gather list elements are merged together (if possible) and - * tagged with the appropriate dma address and length. They are obtained via - * sg_dma_{address,length}. - */ -int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s = sg, *dma = sg, *start = sg; int i, count = 0; @@ -1368,7 +1408,7 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { if (__map_sg_chunk(dev, start, size, &dma->dma_address, - dir, attrs) < 0) + dir, attrs, is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1381,7 +1421,8 @@ int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, } size += s->length; } - if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs) < 0) + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs, + is_coherent) < 0) goto bad_mapping; dma->dma_address += offset; @@ -1396,17 +1437,44 @@ bad_mapping: } /** - * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer * @sg: list of buffers - * @nents: number of buffers to unmap (same as was passed to dma_map_sg) - * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * @nents: number of buffers to map + * @dir: DMA transfer direction * - * Unmap a set of streaming mode DMA translations. Again, CPU access - * rules concerning calls here are the same as for dma_unmap_single(). + * Map a set of i/o coherent buffers described by scatterlist in streaming + * mode for DMA. The scatter gather list elements are merged together (if + * possible) and tagged with the appropriate dma address and length. They are + * obtained via sg_dma_{address,length}. */ -void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) +int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + return __iommu_map_sg(dev, sg, nents, dir, attrs, false); +} + +static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs, + bool is_coherent) { struct scatterlist *s; int i; @@ -1415,7 +1483,7 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, if (sg_dma_len(s)) __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); - if (!arch_is_coherent() && + if (!is_coherent && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); @@ -1423,6 +1491,38 @@ void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } /** + * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, true); +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + __iommu_unmap_sg(dev, sg, nents, dir, attrs, false); +} + +/** * arm_iommu_sync_sg_for_cpu * @dev: valid struct device pointer * @sg: list of buffers @@ -1436,8 +1536,7 @@ void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); } @@ -1455,22 +1554,21 @@ void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int i; for_each_sg(sg, s, nents, i) - if (!arch_is_coherent()) - __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); } /** - * arm_iommu_map_page + * arm_coherent_iommu_map_page * @dev: valid struct device pointer * @page: page that buffer resides in * @offset: offset into page for start of buffer * @size: size of buffer to map * @dir: DMA transfer direction * - * IOMMU aware version of arm_dma_map_page() + * Coherent IOMMU aware version of arm_dma_map_page() */ -static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, +static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { @@ -1478,9 +1576,6 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, dma_addr_t dma_addr; int ret, len = PAGE_ALIGN(size + offset); - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) - __dma_page_cpu_to_dev(page, offset, size, dir); - dma_addr = __alloc_iova(mapping, len); if (dma_addr == DMA_ERROR_CODE) return dma_addr; @@ -1496,6 +1591,51 @@ fail: } /** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs); +} + +/** + * arm_coherent_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Coherent IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** * arm_iommu_unmap_page * @dev: valid struct device pointer * @handle: DMA address of buffer @@ -1517,7 +1657,7 @@ static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, if (!iova) return; - if (!arch_is_coherent() && !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) + if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) __dma_page_dev_to_cpu(page, offset, size, dir); iommu_unmap(mapping->domain, iova, len); @@ -1535,8 +1675,7 @@ static void arm_iommu_sync_single_for_cpu(struct device *dev, if (!iova) return; - if (!arch_is_coherent()) - __dma_page_dev_to_cpu(page, offset, size, dir); + __dma_page_dev_to_cpu(page, offset, size, dir); } static void arm_iommu_sync_single_for_device(struct device *dev, @@ -1570,6 +1709,19 @@ struct dma_map_ops iommu_ops = { .sync_sg_for_device = arm_iommu_sync_sg_for_device, }; +struct dma_map_ops iommu_coherent_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_coherent_iommu_map_page, + .unmap_page = arm_coherent_iommu_unmap_page, + + .map_sg = arm_coherent_iommu_map_sg, + .unmap_sg = arm_coherent_iommu_unmap_sg, +}; + /** * arm_iommu_create_mapping * @bus: pointer to the bus holding the client device (for IOMMU calls) @@ -1663,7 +1815,7 @@ int arm_iommu_attach_device(struct device *dev, dev->archdata.mapping = mapping; set_dma_ops(dev, &iommu_ops); - pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); return 0; } diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 9aec41f..ad722f1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -324,7 +324,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) BUG_ON(!arm_memblock_steal_permitted); - phys = memblock_alloc(size, align); + phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE); memblock_free(phys, size); memblock_remove(phys, size); diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 566750f..5dcc2fd 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -36,6 +36,7 @@ #include <asm/system_info.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> #include "mm.h" int ioremap_page(unsigned long virt, unsigned long phys, @@ -247,6 +248,7 @@ void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, if (!area) return NULL; addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(pfn); #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) if (DOMAIN_IO == 0 && @@ -383,3 +385,16 @@ void __arm_iounmap(volatile void __iomem *io_addr) arch_iounmap(io_addr); } EXPORT_SYMBOL(__arm_iounmap); + +#ifdef CONFIG_PCI +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) +{ + BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); + + return ioremap_page_range(PCI_IO_VIRT_BASE + offset, + PCI_IO_VIRT_BASE + offset + SZ_64K, + phys_addr, + __pgprot(get_mem_type(MT_DEVICE)->prot_pte)); +} +EXPORT_SYMBOL_GPL(pci_ioremap_io); +#endif diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index c2fa21d..941dfb9 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -31,6 +31,7 @@ #include <asm/mach/arch.h> #include <asm/mach/map.h> +#include <asm/mach/pci.h> #include "mm.h" @@ -216,7 +217,7 @@ static struct mem_type mem_types[] = { .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, .domain = DOMAIN_IO, - }, + }, [MT_DEVICE_WC] = { /* ioremap_wc */ .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, .prot_l1 = PMD_TYPE_TABLE, @@ -422,17 +423,6 @@ static void __init build_mem_type_table(void) vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; /* - * Enable CPU-specific coherency if supported. - * (Only available on XSC3 at the moment.) - */ - if (arch_is_coherent() && cpu_is_xsc3()) { - mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; - mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; - mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; - } - /* * ARMv6 and above have extended page tables. */ if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { @@ -777,14 +767,27 @@ void __init iotable_init(struct map_desc *io_desc, int nr) create_mapping(md); vm->addr = (void *)(md->virtual & PAGE_MASK); vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); - vm->phys_addr = __pfn_to_phys(md->pfn); - vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; vm->flags |= VM_ARM_MTYPE(md->type); vm->caller = iotable_init; vm_area_add_early(vm++); } } +void __init vm_reserve_area_early(unsigned long addr, unsigned long size, + void *caller) +{ + struct vm_struct *vm; + + vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); + vm->addr = (void *)addr; + vm->size = size; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; + vm->caller = caller; + vm_area_add_early(vm); +} + #ifndef CONFIG_ARM_LPAE /* @@ -802,14 +805,7 @@ void __init iotable_init(struct map_desc *io_desc, int nr) static void __init pmd_empty_section_gap(unsigned long addr) { - struct vm_struct *vm; - - vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm)); - vm->addr = (void *)addr; - vm->size = SECTION_SIZE; - vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; - vm->caller = pmd_empty_section_gap; - vm_area_add_early(vm); + vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); } static void __init fill_pmd_gaps(void) @@ -858,6 +854,28 @@ static void __init fill_pmd_gaps(void) #define fill_pmd_gaps() do { } while (0) #endif +#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) +static void __init pci_reserve_io(void) +{ + struct vm_struct *vm; + unsigned long addr; + + /* we're still single threaded hence no lock needed here */ + for (vm = vmlist; vm; vm = vm->next) { + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + addr = (unsigned long)vm->addr; + addr &= ~(SZ_2M - 1); + if (addr == PCI_IO_VIRT_BASE) + return; + + } + vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); +} +#else +#define pci_reserve_io() do { } while (0) +#endif + static void * __initdata vmalloc_min = (void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET); @@ -1141,6 +1159,9 @@ static void __init devicemaps_init(struct machine_desc *mdesc) mdesc->map_io(); fill_pmd_gaps(); + /* Reserve fixed i/o space in VMALLOC region */ + pci_reserve_io(); + /* * Finally flush the caches and tlb to ensure that we're in a * consistent state wrt the writebuffer. This also ensures that |