diff options
Diffstat (limited to 'arch/arm/kvm/mmu.c')
-rw-r--r-- | arch/arm/kvm/mmu.c | 455 |
1 files changed, 257 insertions, 198 deletions
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2f12e40..9657065 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,8 +32,15 @@ extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; static DEFINE_MUTEX(kvm_hyp_pgd_mutex); +static void *init_bounce_page; +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); @@ -71,172 +78,224 @@ static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) return p; } -static void free_ptes(pmd_t *pmd, unsigned long addr) +static void clear_pud_entry(pud_t *pud) { - pte_t *pte; - unsigned int i; + pmd_t *pmd_table = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} - for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { - if (!pmd_none(*pmd) && pmd_table(*pmd)) { - pte = pte_offset_kernel(pmd, addr); - pte_free_kernel(NULL, pte); - } - pmd++; +static void clear_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static bool pmd_empty(pmd_t *pmd) +{ + struct page *pmd_page = virt_to_page(pmd); + return page_count(pmd_page) == 1; +} + +static void clear_pte_entry(pte_t *pte) +{ + if (pte_present(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); } } -static void free_hyp_pgd_entry(unsigned long addr) +static bool pte_empty(pte_t *pte) +{ + struct page *pte_page = virt_to_page(pte); + return page_count(pte_page) == 1; +} + +static void unmap_range(pgd_t *pgdp, unsigned long long start, u64 size) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - unsigned long hyp_addr = KERN_TO_HYP(addr); + pte_t *pte; + unsigned long long addr = start, end = start + size; + u64 range; + + while (addr < end) { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + addr += PUD_SIZE; + continue; + } - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + addr += PMD_SIZE; + continue; + } - if (pud_none(*pud)) - return; - BUG_ON(pud_bad(*pud)); + pte = pte_offset_kernel(pmd, addr); + clear_pte_entry(pte); + range = PAGE_SIZE; - pmd = pmd_offset(pud, hyp_addr); - free_ptes(pmd, addr); - pmd_free(NULL, pmd); - pud_clear(pud); + /* If we emptied the pte, walk back up the ladder */ + if (pte_empty(pte)) { + clear_pmd_entry(pmd); + range = PMD_SIZE; + if (pmd_empty(pmd)) { + clear_pud_entry(pud); + range = PUD_SIZE; + } + } + + addr += range; + } } /** - * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables + * free_boot_hyp_pgd - free HYP boot page tables * - * Assumes this is a page table used strictly in Hyp-mode and therefore contains - * either mappings in the kernel memory area (above PAGE_OFFSET), or - * device mappings in the vmalloc range (from VMALLOC_START to VMALLOC_END). + * Free the HYP boot page tables. The bounce page is also freed. */ -void free_hyp_pmds(void) +void free_boot_hyp_pgd(void) { - unsigned long addr; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); - for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - free_hyp_pgd_entry(addr); + + if (boot_hyp_pgd) { + unmap_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + unmap_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + kfree(boot_hyp_pgd); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) + unmap_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); + + kfree(init_bounce_page); + init_bounce_page = NULL; + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end) +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the vmalloc range (from + * VMALLOC_START to VMALLOC_END). + * + * boot_hyp_pgd should only map two pages for the init code. + */ +void free_hyp_pgds(void) { - pte_t *pte; unsigned long addr; - struct page *page; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); + free_boot_hyp_pgd(); + + mutex_lock(&kvm_hyp_pgd_mutex); - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(!virt_addr_valid(addr)); - page = virt_to_page(addr); - kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); + if (hyp_pgd) { + for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) + unmap_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + kfree(hyp_pgd); + hyp_pgd = NULL; } + + mutex_unlock(&kvm_hyp_pgd_mutex); } -static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, - unsigned long end, - unsigned long *pfn_base) +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) { pte_t *pte; unsigned long addr; - for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - - pte = pte_offset_kernel(pmd, hyp_addr); - BUG_ON(pfn_valid(*pfn_base)); - kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); - (*pfn_base)++; - } + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + kvm_flush_dcache_to_poc(pte, sizeof(*pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); } static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, - unsigned long end, unsigned long *pfn_base) + unsigned long end, unsigned long pfn, + pgprot_t prot) { pmd_t *pmd; pte_t *pte; unsigned long addr, next; - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pmd = pmd_offset(pud, hyp_addr); + addr = start; + do { + pmd = pmd_offset(pud, addr); BUG_ON(pmd_sect(*pmd)); if (pmd_none(*pmd)) { - pte = pte_alloc_one_kernel(NULL, hyp_addr); + pte = pte_alloc_one_kernel(NULL, addr); if (!pte) { kvm_err("Cannot allocate Hyp pte\n"); return -ENOMEM; } pmd_populate_kernel(NULL, pmd, pte); + get_page(virt_to_page(pmd)); + kvm_flush_dcache_to_poc(pmd, sizeof(*pmd)); } next = pmd_addr_end(addr, end); - /* - * If pfn_base is NULL, we map kernel pages into HYP with the - * virtual address. Otherwise, this is considered an I/O - * mapping and we map the physical region starting at - * *pfn_base to [start, end[. - */ - if (!pfn_base) - create_hyp_pte_mappings(pmd, addr, next); - else - create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); - } + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); return 0; } -static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) +static int __create_hyp_mappings(pgd_t *pgdp, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) { - unsigned long start = (unsigned long)from; - unsigned long end = (unsigned long)to; pgd_t *pgd; pud_t *pud; pmd_t *pmd; unsigned long addr, next; int err = 0; - if (start >= end) - return -EINVAL; - /* Check for a valid kernel memory mapping */ - if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) - return -EINVAL; - /* Check for a valid kernel IO mapping */ - if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) - return -EINVAL; - mutex_lock(&kvm_hyp_pgd_mutex); - for (addr = start; addr < end; addr = next) { - unsigned long hyp_addr = KERN_TO_HYP(addr); - pgd = hyp_pgd + pgd_index(hyp_addr); - pud = pud_offset(pgd, hyp_addr); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + pgd_index(addr); + pud = pud_offset(pgd, addr); if (pud_none_or_clear_bad(pud)) { - pmd = pmd_alloc_one(NULL, hyp_addr); + pmd = pmd_alloc_one(NULL, addr); if (!pmd) { kvm_err("Cannot allocate Hyp pmd\n"); err = -ENOMEM; goto out; } pud_populate(NULL, pud, pmd); + get_page(virt_to_page(pud)); + kvm_flush_dcache_to_poc(pud, sizeof(*pud)); } next = pgd_addr_end(addr, end); - err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); + err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); if (err) goto out; - } + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); out: mutex_unlock(&kvm_hyp_pgd_mutex); return err; @@ -250,27 +309,41 @@ out: * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. - * - * Note: Wrapping around zero in the "to" address is not supported. */ int create_hyp_mappings(void *from, void *to) { - return __create_hyp_mappings(from, to, NULL); + unsigned long phys_addr = virt_to_phys(from); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel memory mapping */ + if (!virt_addr_valid(from) || !virt_addr_valid(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP); } /** * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode * @from: The kernel start VA of the range * @to: The kernel end VA of the range (exclusive) - * @addr: The physical start address which gets mapped + * @phys_addr: The physical start address which gets mapped * * The resulting HYP VA is the same as the kernel VA, modulo * HYP_PAGE_OFFSET. */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) +int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long pfn = __phys_to_pfn(addr); - return __create_hyp_mappings(from, to, &pfn); + unsigned long start = KERN_TO_HYP((unsigned long)from); + unsigned long end = KERN_TO_HYP((unsigned long)to); + + /* Check for a valid kernel IO mapping */ + if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) + return -EINVAL; + + return __create_hyp_mappings(hyp_pgd, start, end, + __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); } /** @@ -307,42 +380,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) return 0; } -static void clear_pud_entry(pud_t *pud) -{ - pmd_t *pmd_table = pmd_offset(pud, 0); - pud_clear(pud); - pmd_free(NULL, pmd_table); - put_page(virt_to_page(pud)); -} - -static void clear_pmd_entry(pmd_t *pmd) -{ - pte_t *pte_table = pte_offset_kernel(pmd, 0); - pmd_clear(pmd); - pte_free_kernel(NULL, pte_table); - put_page(virt_to_page(pmd)); -} - -static bool pmd_empty(pmd_t *pmd) -{ - struct page *pmd_page = virt_to_page(pmd); - return page_count(pmd_page) == 1; -} - -static void clear_pte_entry(pte_t *pte) -{ - if (pte_present(*pte)) { - kvm_set_pte(pte, __pte(0)); - put_page(virt_to_page(pte)); - } -} - -static bool pte_empty(pte_t *pte) -{ - struct page *pte_page = virt_to_page(pte); - return page_count(pte_page) == 1; -} - /** * unmap_stage2_range -- Clear stage2 page table entries to unmap a range * @kvm: The VM pointer @@ -356,43 +393,7 @@ static bool pte_empty(pte_t *pte) */ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) { - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - phys_addr_t addr = start, end = start + size; - u64 range; - - while (addr < end) { - pgd = kvm->arch.pgd + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (pud_none(*pud)) { - addr += PUD_SIZE; - continue; - } - - pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { - addr += PMD_SIZE; - continue; - } - - pte = pte_offset_kernel(pmd, addr); - clear_pte_entry(pte); - range = PAGE_SIZE; - - /* If we emptied the pte, walk back up the ladder */ - if (pte_empty(pte)) { - clear_pmd_entry(pmd); - range = PMD_SIZE; - if (pmd_empty(pmd)) { - clear_pud_entry(pud); - range = PUD_SIZE; - } - } - - addr += range; - } + unmap_range(kvm->arch.pgd, start, size); } /** @@ -728,47 +729,105 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) phys_addr_t kvm_mmu_get_httbr(void) { - VM_BUG_ON(!virt_addr_valid(hyp_pgd)); return virt_to_phys(hyp_pgd); } +phys_addr_t kvm_mmu_get_boot_httbr(void) +{ + return virt_to_phys(boot_hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + int kvm_mmu_init(void) { - if (!hyp_pgd) { + int err; + + hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + + if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { + /* + * Our init code is crossing a page boundary. Allocate + * a bounce page, copy the code over and use that. + */ + size_t len = __hyp_idmap_text_end - __hyp_idmap_text_start; + phys_addr_t phys_base; + + init_bounce_page = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!init_bounce_page) { + kvm_err("Couldn't allocate HYP init bounce page\n"); + err = -ENOMEM; + goto out; + } + + memcpy(init_bounce_page, __hyp_idmap_text_start, len); + /* + * Warning: the code we just copied to the bounce page + * must be flushed to the point of coherency. + * Otherwise, the data may be sitting in L2, and HYP + * mode won't be able to observe it as it runs with + * caches off at that point. + */ + kvm_flush_dcache_to_poc(init_bounce_page, len); + + phys_base = virt_to_phys(init_bounce_page); + hyp_idmap_vector += phys_base - hyp_idmap_start; + hyp_idmap_start = phys_base; + hyp_idmap_end = phys_base + len; + + kvm_info("Using HYP init bounce page @%lx\n", + (unsigned long)phys_base); + } + + hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + boot_hyp_pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + if (!hyp_pgd || !boot_hyp_pgd) { kvm_err("Hyp mode PGD not allocated\n"); - return -ENOMEM; + err = -ENOMEM; + goto out; } - return 0; -} + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(boot_hyp_pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); -/** - * kvm_clear_idmap - remove all idmaps from the hyp pgd - * - * Free the underlying pmds for all pgds in range and clear the pgds (but - * don't free them) afterwards. - */ -void kvm_clear_hyp_idmap(void) -{ - unsigned long addr, end; - unsigned long next; - pgd_t *pgd = hyp_pgd; - pud_t *pud; - pmd_t *pmd; + if (err) { + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + goto out; + } - addr = virt_to_phys(__hyp_idmap_text_start); - end = virt_to_phys(__hyp_idmap_text_end); + /* Map the very same page at the trampoline VA */ + err = __create_hyp_mappings(boot_hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pgd += pgd_index(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - pud = pud_offset(pgd, addr); - pmd = pmd_offset(pud, addr); + /* Map the same page again into the runtime page tables */ + err = __create_hyp_mappings(hyp_pgd, + TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP); + if (err) { + kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", + TRAMPOLINE_VA); + goto out; + } - pud_clear(pud); - kvm_clean_pmd_entry(pmd); - pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); - } while (pgd++, addr = next, addr < end); + return 0; +out: + free_hyp_pgds(); + return err; } |