From 09f3f326cda6b6e2dfc6471c7f8ac77696c87419 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 1 Jun 2015 21:11:35 +1000 Subject: powerpc/mm: Fix build break with STRICT_MM_TYPECHECKS && DEBUG_PAGEALLOC If both STRICT_MM_TYPECHECKS and DEBUG_PAGEALLOC are enabled, the code in kernel_map_linear_page() is built, and so we fail with: arch/powerpc/mm/hash_utils_64.c:1478:2: error: incompatible type for argument 1 of 'htab_convert_pte_flags' Fix it by using pgprot_val(). Reported-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fda236f..a04ca92 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1475,7 +1475,7 @@ static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi) unsigned long hash; unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize); unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize); - unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL); + unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); long ret; hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize); -- cgit v1.1 From c89ca8ab7457d891abdffca7e6f5e74cef670d6d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 2 Apr 2015 22:14:11 -0500 Subject: powerpc/e6500: Optimize hugepage TLB misses Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. With this patch I measured around a 5x speedup in handling hugepage TLB misses. Signed-off-by: Scott Wood --- arch/powerpc/mm/tlb_low_64e.S | 51 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 89bf95b..765b419 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */ + bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 ldx r14,r14,r15 /* Grab pmd entry */ mfspr r10,SPRN_MAS0 cmpdi cr0,r14,0 - bge tlb_miss_fault_e6500 + bge tlb_miss_huge_e6500 /* Now we build the MAS for a 2M indirect page: * @@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) clrrdi r15,r16,21 /* make EA 2M-aligned */ mtspr SPRN_MAS2,r15 +tlb_miss_huge_done_e6500: lbz r15,TCD_ESEL_NEXT(r11) lbz r16,TCD_ESEL_MAX(r11) lbz r14,TCD_ESEL_FIRST(r11) @@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) tlb_epilog_bolted rfi +tlb_miss_huge_e6500: + beq tlb_miss_fault_e6500 + li r10,1 + andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */ + rldimi r14,r10,63,0 /* Set PD_HUGE */ + xor r14,r14,r15 /* Clear size bits */ + ldx r14,0,r14 + + /* + * Now we build the MAS for a huge page. + * + * MAS 0 : ESEL needs to be filled by software round-robin + * - can be handled by indirect code + * MAS 1 : Need to clear IND and set TSIZE + * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler + */ + + subi r15,r15,10 /* Convert psize to tsize */ + mfspr r10,SPRN_MAS1 + rlwinm r10,r10,0,~MAS1_IND + rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK + mtspr SPRN_MAS1,r10 + + li r10,-0x400 + sld r15,r10,r15 /* Generate mask based on size */ + and r10,r16,r15 + rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT + rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */ + clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */ + rlwimi r15,r14,32-8,22,25 /* Move in U bits */ + mtspr SPRN_MAS2,r10 + andi. r10,r14,_PAGE_DIRTY + rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */ + + /* Mask out SW and UW if !DIRTY (XXX optimize this !) */ + bne 1f + li r10,MAS3_SW|MAS3_UW + andc r15,r15,r10 +1: + mtspr SPRN_MAS7_MAS3,r15 + + mfspr r10,SPRN_MAS0 + b tlb_miss_huge_done_e6500 + tlb_miss_kernel_e6500: ld r14,PACA_KERNELPGD(r13) cmpldi cr1,r15,8 /* Check for vmalloc region */ -- cgit v1.1 From 6c0cc62715bfd0b26f7d1a79e6e8143085950ca7 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 17 Apr 2015 16:17:14 -0500 Subject: powerpc/mm: Use PFN_PHYS() in devmem_is_allowed() This function can run on systems where physical addresses don't fit in unsigned long, so make sure to use the macro that contains the proper cast. Signed-off-by: Scott Wood --- arch/powerpc/mm/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 45fda71..0f11819 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -560,7 +560,7 @@ subsys_initcall(add_system_ram_resources); */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; -- cgit v1.1 From 85a97da9584868b6cd0c54b93c5f65319f615b08 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:06:55 +1000 Subject: powerpc/copro: Fix faulting kernel segments This fixes calculating the key bits (KP and KS) in the SLB VSID for kernel mappings. I'm not CCing this to stable as there are no uses of this currently. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index f031a47..7a71d7f 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(copro_handle_mm_fault); int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) { - u64 vsid; + u64 vsid, vsidkey; int psize, ssize; switch (REGION_ID(ea)) { @@ -109,6 +109,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); + vsidkey = SLB_VSID_USER; break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); @@ -118,19 +119,21 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) psize = mmu_io_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; case KERNEL_REGION_ID: pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; break; default: pr_debug("%s: invalid region access at %016llx\n", __func__, ea); return 1; } - vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey; vsid |= mmu_psize_defs[psize].sllp | ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); -- cgit v1.1 From ec249dd860ed88e15b3e2bd363cbfc76ba8c1884 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 27 May 2015 16:07:16 +1000 Subject: cxl: Move include file cxl.h -> cxl-base.h This moves the current include file from cxl.h -> cxl-base.h. This current include file is used only to pass information between the base driver that needs to be built into the kernel and the cxl module. This is to make way for a new include/misc/cxl.h which will contain just the kernel API for other driver to use Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 2 +- arch/powerpc/mm/hash_native_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 7a71d7f..6527882 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* * This ought to be kept in sync with the powerpc specific do_page_fault diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 9c4880d..13befa35 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,7 +29,7 @@ #include #include -#include +#include #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) -- cgit v1.1 From cfcb3d80a28380ba027331eb548ba309c4b66559 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 14 Apr 2015 13:05:57 +0530 Subject: powerpc/mm: Add trace point for tracking hash pte fault This enables us to understand how many hash fault we are taking when running benchmarks. For ex: -bash-4.2# ./perf stat -e powerpc:hash_fault -e page-faults /tmp/ebizzy.ppc64 -S 30 -P -n 1000 ... Performance counter stats for '/tmp/ebizzy.ppc64 -S 30 -P -n 1000': 1,10,04,075 powerpc:hash_fault 1,10,03,429 page-faults 30.865978991 seconds time elapsed NOTE: The impact of the tracepoint was not noticeable when running test. It was within the run-time variance of the test. For ex: without-patch: -------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.236562 task-clock (msec) # 0.928 CPUs utilized 2,179,213 stalled-cycles-frontend # 0.00% frontend cycles idle 17,174,367 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007794658 seconds time elapsed And with-patch: --------------- Performance counter stats for './a.out 3000 300': 643 page-faults # 0.089 M/sec 7.233746 task-clock (msec) # 0.921 CPUs utilized 0 context-switches # 0.000 K/sec 0.007854876 seconds time elapsed Performance counter stats for './a.out 3000 300': 643 page-faults # 0.087 M/sec 649 powerpc:hash_fault # 0.087 M/sec 7.430376 task-clock (msec) # 0.938 CPUs utilized 2,347,174 stalled-cycles-frontend # 0.00% frontend cycles idle 17,524,282 stalled-cycles-backend # 0.00% backend cycles idle 0 context-switches # 0.000 K/sec 0.007920284 seconds time elapsed Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a04ca92..5ec987f 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -57,6 +57,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -1004,6 +1005,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", ea, access, trap); + trace_hash_fault(ea, access, trap); /* Get region & vsid */ switch (REGION_ID(ea)) { -- cgit v1.1 From 15b244a88e1b2895605be4300b40b575345bcf50 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 5 Jun 2015 16:35:24 +1000 Subject: powerpc/mmu: Add userspace-to-physical addresses translation cache We are adding support for DMA memory pre-registration to be used in conjunction with VFIO. The idea is that the userspace which is going to run a guest may want to pre-register a user space memory region so it all gets pinned once and never goes away. Having this done, a hypervisor will not have to pin/unpin pages on every DMA map/unmap request. This is going to help with multiple pinning of the same memory. Another use of it is in-kernel real mode (mmu off) acceleration of DMA requests where real time translation of guest physical to host physical addresses is non-trivial and may fail as linux ptes may be temporarily invalid. Also, having cached host physical addresses (compared to just pinning at the start and then walking the page table again on every H_PUT_TCE), we can be sure that the addresses which we put into TCE table are the ones we already pinned. This adds a list of memory regions to mm_context_t. Each region consists of a header and a list of physical addresses. This adds API to: 1. register/unregister memory regions; 2. do final cleanup (which puts all pre-registered pages); 3. do userspace to physical address translation; 4. manage usage counters; multiple registration of the same memory is allowed (once per container). This implements 2 counters per registered memory region: - @mapped: incremented on every DMA mapping; decremented on unmapping; initialized to 1 when a region is just registered; once it becomes zero, no more mappings allowe; - @used: incremented on every "register" ioctl; decremented on "unregister"; unregistration is allowed for DMA mapped regions unless it is the very last reference. For the very last reference this checks that the region is still mapped and returns -EBUSY so the userspace gets to know that memory is still pinned and unregistration needs to be retried; @used remains 1. Host physical addresses are stored in vmalloc'ed array. In order to access these in the real mode (mmu off), there is a real_vmalloc_addr() helper. In-kernel acceleration patchset will move it from KVM to MMU code. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/mmu_context_hash64.c | 6 + arch/powerpc/mm/mmu_context_iommu.c | 316 +++++++++++++++++++++++++++++++++++ 3 files changed, 323 insertions(+) create mode 100644 arch/powerpc/mm/mmu_context_iommu.c (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 9c8770b..3eb73a3 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index 178876ae..4e4efbc 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c @@ -89,6 +89,9 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; #endif +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_init(&mm->context); +#endif return 0; } @@ -132,6 +135,9 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { +#ifdef CONFIG_SPAPR_TCE_IOMMU + mm_iommu_cleanup(&mm->context); +#endif #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c new file mode 100644 index 0000000..da6a216 --- /dev/null +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -0,0 +1,316 @@ +/* + * IOMMU helpers in MMU context. + * + * Copyright (C) 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include + +static DEFINE_MUTEX(mem_list_mutex); + +struct mm_iommu_table_group_mem_t { + struct list_head next; + struct rcu_head rcu; + unsigned long used; + atomic64_t mapped; + u64 ua; /* userspace address */ + u64 entries; /* number of entries in hpas[] */ + u64 *hpas; /* vmalloc'ed */ +}; + +static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, + unsigned long npages, bool incr) +{ + long ret = 0, locked, lock_limit; + + if (!npages) + return 0; + + down_write(&mm->mmap_sem); + + if (incr) { + locked = mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) + ret = -ENOMEM; + else + mm->locked_vm += npages; + } else { + if (WARN_ON_ONCE(npages > mm->locked_vm)) + npages = mm->locked_vm; + mm->locked_vm -= npages; + } + + pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", + current->pid, + incr ? '+' : '-', + npages << PAGE_SHIFT, + mm->locked_vm << PAGE_SHIFT, + rlimit(RLIMIT_MEMLOCK)); + up_write(&mm->mmap_sem); + + return ret; +} + +bool mm_iommu_preregistered(void) +{ + if (!current || !current->mm) + return false; + + return !list_empty(¤t->mm->context.iommu_group_mem_list); +} +EXPORT_SYMBOL_GPL(mm_iommu_preregistered); + +long mm_iommu_get(unsigned long ua, unsigned long entries, + struct mm_iommu_table_group_mem_t **pmem) +{ + struct mm_iommu_table_group_mem_t *mem; + long i, j, ret = 0, locked_entries = 0; + struct page *page = NULL; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ++mem->used; + *pmem = mem; + goto unlock_exit; + } + + /* Overlap? */ + if ((mem->ua < (ua + (entries << PAGE_SHIFT))) && + (ua < (mem->ua + + (mem->entries << PAGE_SHIFT)))) { + ret = -EINVAL; + goto unlock_exit; + } + + } + + ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + if (ret) + goto unlock_exit; + + locked_entries = entries; + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) { + ret = -ENOMEM; + goto unlock_exit; + } + + mem->hpas = vzalloc(entries * sizeof(mem->hpas[0])); + if (!mem->hpas) { + kfree(mem); + ret = -ENOMEM; + goto unlock_exit; + } + + for (i = 0; i < entries; ++i) { + if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT), + 1/* pages */, 1/* iswrite */, &page)) { + for (j = 0; j < i; ++j) + put_page(pfn_to_page( + mem->hpas[j] >> PAGE_SHIFT)); + vfree(mem->hpas); + kfree(mem); + ret = -EFAULT; + goto unlock_exit; + } + + mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; + } + + atomic64_set(&mem->mapped, 1); + mem->used = 1; + mem->ua = ua; + mem->entries = entries; + *pmem = mem; + + list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + +unlock_exit: + if (locked_entries && ret) + mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_get); + +static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) +{ + long i; + struct page *page = NULL; + + for (i = 0; i < mem->entries; ++i) { + if (!mem->hpas[i]) + continue; + + page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); + if (!page) + continue; + + put_page(page); + mem->hpas[i] = 0; + } +} + +static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) +{ + + mm_iommu_unpin(mem); + vfree(mem->hpas); + kfree(mem); +} + +static void mm_iommu_free(struct rcu_head *head) +{ + struct mm_iommu_table_group_mem_t *mem = container_of(head, + struct mm_iommu_table_group_mem_t, rcu); + + mm_iommu_do_free(mem); +} + +static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) +{ + list_del_rcu(&mem->next); + mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); + call_rcu(&mem->rcu, mm_iommu_free); +} + +long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +{ + long ret = 0; + + if (!current || !current->mm) + return -ESRCH; /* process exited */ + + mutex_lock(&mem_list_mutex); + + if (mem->used == 0) { + ret = -ENOENT; + goto unlock_exit; + } + + --mem->used; + /* There are still users, exit */ + if (mem->used) + goto unlock_exit; + + /* Are there still mappings? */ + if (atomic_cmpxchg(&mem->mapped, 1, 0) != 1) { + ++mem->used; + ret = -EBUSY; + goto unlock_exit; + } + + /* @mapped became 0 so now mappings are disabled, release the region */ + mm_iommu_release(mem); + +unlock_exit: + mutex_unlock(&mem_list_mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_put); + +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, + unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup); + +struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, + unsigned long entries) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_rcu(mem, + ¤t->mm->context.iommu_group_mem_list, + next) { + if ((mem->ua == ua) && (mem->entries == entries)) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_find); + +long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + u64 *va = &mem->hpas[entry]; + + if (entry >= mem->entries) + return -EFAULT; + + *hpa = *va | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); + +long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) +{ + if (atomic64_inc_not_zero(&mem->mapped)) + return 0; + + /* Last mm_iommu_put() has been called, no more mappings allowed() */ + return -ENXIO; +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); + +void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) +{ + atomic64_add_unless(&mem->mapped, -1, 1); +} +EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); + +void mm_iommu_init(mm_context_t *ctx) +{ + INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); +} + +void mm_iommu_cleanup(mm_context_t *ctx) +{ + struct mm_iommu_table_group_mem_t *mem, *tmp; + + list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_del_rcu(&mem->next); + mm_iommu_do_free(mem); + } +} -- cgit v1.1