diff options
-rw-r--r-- | arch/powerpc/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/powerpc/include/asm/kvm_ppc.h | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup-common.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup.h | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 25 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_builtin.c | 105 |
6 files changed, 136 insertions, 12 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fac6f63..b526f1a 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -260,6 +260,8 @@ struct kvm_hpt_info { struct revmap_entry *rev; /* Guest HPT size is 2**(order) bytes */ u32 order; + /* 1 if HPT allocated from reserved region, 0 otherwise */ + int resv; /* 1 if HPT allocated with CMA, 0 otherwise */ int cma; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9b89b19..f150067 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -208,6 +208,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, unsigned long tce_value, unsigned long npages); extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba); +extern unsigned long kvmhv_alloc_resv_hpt(u32 order); +extern void kvmhv_release_resv_hpt(unsigned long hpt, u32 order); extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages); extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); @@ -435,6 +437,8 @@ struct openpic; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE extern void kvm_cma_reserve(void) __init; +extern void kvm_resv_hpt_init(void); + static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr; @@ -475,6 +479,9 @@ extern bool kvm_hv_mode_active(void); static inline void __init kvm_cma_reserve(void) {} +static inline void kvm_resv_hpt_init(void) +{} + static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) {} diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9ca9db7..6949cdb 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -982,6 +982,9 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff. */ mmu_context_init(); + /* Reserve memory for KVM HPTs */ + kvm_resv_hpt_init(); + #ifdef CONFIG_PPC64 /* Interrupt code needs to be 64K-aligned. */ if ((unsigned long)_stext & 0xffff) diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index c6a592b..6de1fac 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -53,13 +53,15 @@ extern unsigned long spr_default_dscr; #endif /* - * Having this in kvm_ppc.h makes include dependencies too - * tricky to solve for setup-common.c so have it here. + * Having these in kvm_ppc.h makes include dependencies too + * tricky to solve for setup-common.c so have them here. */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE void kvm_cma_reserve(void); +void kvm_resv_hpt_init(void); #else static inline void kvm_cma_reserve(void) { }; +static inline void kvm_resv_hpt_init(void) { } #endif #ifdef CONFIG_TAU diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c615617..efd5a6b 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -81,7 +81,7 @@ struct kvm_resize_hpt { int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) { unsigned long hpt = 0; - int cma = 0; + int resv = 0, cma = 0; struct page *page = NULL; struct revmap_entry *rev; unsigned long npte; @@ -89,11 +89,17 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER)) return -EINVAL; - page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT)); - if (page) { - hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + hpt = kvmhv_alloc_resv_hpt(order); + if (hpt) { memset((void *)hpt, 0, (1ul << order)); - cma = 1; + resv = 1; + } else { + page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT)); + if (page) { + hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page)); + memset((void *)hpt, 0, (1ul << order)); + cma = 1; + } } if (!hpt) @@ -109,7 +115,9 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) /* Allocate reverse map array */ rev = vmalloc(array_size(npte, sizeof(struct revmap_entry))); if (!rev) { - if (cma) + if (resv) + kvmhv_release_resv_hpt(hpt, order); + else if (cma) kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT)); else free_pages(hpt, order - PAGE_SHIFT); @@ -118,6 +126,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) info->order = order; info->virt = hpt; + info->resv = resv; info->cma = cma; info->rev = rev; @@ -191,7 +200,9 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info) { vfree(info->rev); info->rev = NULL; - if (info->cma) + if (info->resv) + kvmhv_release_resv_hpt(info->virt, info->order); + else if (info->cma) kvm_free_hpt_cma(virt_to_page(info->virt), 1 << (info->order - PAGE_SHIFT)); else if (info->virt) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index a71e2fc..18afe65 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -53,11 +53,109 @@ EXPORT_SYMBOL_GPL(__xive_vm_h_eoi); /* * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206) - * should be power of 2. + * only needs to be 256kB. */ -#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */ +#define HPT_ALIGN_ORDER 18 /* 256k */ +#define HPT_ALIGN_PAGES ((1 << HPT_ALIGN_ORDER) >> PAGE_SHIFT) + +#define KVM_RESV_CHUNK_ORDER HPT_ALIGN_ORDER + /* - * By default we reserve 5% of memory for hash pagetable allocation. + * By default we reserve 2% of memory exclusively for guest HPT + * allocations, plus another 3% in the CMA zone which can be used + * either for HPTs or for movable page allocations. + * Each guest's HPT will be sized at between 1/128 and 1/64 of its + * memory, i.e. up to 1.56%, and allowing for about a 3x memory + * overcommit factor gets us to about 5%. + */ +static unsigned long kvm_hpt_resv_ratio = 2; + +static int __init early_parse_kvm_hpt_resv(char *p) +{ + pr_debug("%s(%s)\n", __func__, p); + if (!p) + return -EINVAL; + return kstrtoul(p, 0, &kvm_hpt_resv_ratio); +} +early_param("kvm_hpt_resv_ratio", early_parse_kvm_hpt_resv); + +static unsigned long kvm_resv_addr; +static unsigned long *kvm_resv_bitmap; +static unsigned long kvm_resv_chunks; +static DEFINE_MUTEX(kvm_resv_lock); + +void kvm_resv_hpt_init(void) +{ + unsigned long align = 1ul << KVM_RESV_CHUNK_ORDER; + unsigned long size, bm_size; + unsigned long addr, bm; + unsigned long *bmp; + + if (!cpu_has_feature(CPU_FTR_HVMODE)) + return; + + size = memblock_phys_mem_size() * kvm_hpt_resv_ratio / 100; + size = ALIGN(size, align); + if (!size) + return; + + pr_info("KVM: Allocating %lu MiB for hashed page tables\n", + size >> 20); + + addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); + if (!addr) { + pr_err("KVM: Allocation of reserved memory for HPTs failed\n"); + return; + } + pr_info("KVM: %lu MiB reserved for HPTs at %lx\n", size >> 20, addr); + + bm_size = BITS_TO_LONGS(size >> KVM_RESV_CHUNK_ORDER) * sizeof(long); + bm = __memblock_alloc_base(bm_size, sizeof(long), + MEMBLOCK_ALLOC_ACCESSIBLE); + if (!bm) { + pr_err("KVM: Allocation of reserved memory bitmap failed\n"); + return; + } + bmp = __va(bm); + memset(bmp, 0, bm_size); + + kvm_resv_addr = (unsigned long) __va(addr); + kvm_resv_chunks = size >> KVM_RESV_CHUNK_ORDER; + kvm_resv_bitmap = bmp; +} + +unsigned long kvmhv_alloc_resv_hpt(u32 order) +{ + unsigned long nr_chunks = 1ul << (order - KVM_RESV_CHUNK_ORDER); + unsigned long chunk; + + mutex_lock(&kvm_resv_lock); + chunk = bitmap_find_next_zero_area(kvm_resv_bitmap, kvm_resv_chunks, + 0, nr_chunks, 0); + if (chunk < kvm_resv_chunks) + bitmap_set(kvm_resv_bitmap, chunk, nr_chunks); + mutex_unlock(&kvm_resv_lock); + + if (chunk < kvm_resv_chunks) + return kvm_resv_addr + (chunk << KVM_RESV_CHUNK_ORDER); + return 0; +} +EXPORT_SYMBOL_GPL(kvmhv_alloc_resv_hpt); + +void kvmhv_release_resv_hpt(unsigned long addr, u32 order) +{ + unsigned long nr_chunks = 1ul << (order - KVM_RESV_CHUNK_ORDER); + unsigned long chunk = (addr - kvm_resv_addr) >> KVM_RESV_CHUNK_ORDER; + + mutex_lock(&kvm_resv_lock); + if (chunk + nr_chunks <= kvm_resv_chunks) + bitmap_clear(kvm_resv_bitmap, chunk, nr_chunks); + mutex_unlock(&kvm_resv_lock); +} +EXPORT_SYMBOL_GPL(kvmhv_release_resv_hpt); + +/* + * By default we reserve 3% of memory for the CMA zone. */ static unsigned long kvm_cma_resv_ratio = 5; @@ -106,6 +204,7 @@ void __init kvm_cma_reserve(void) */ if (!cpu_has_feature(CPU_FTR_HVMODE)) return; + /* * We cannot use memblock_phys_mem_size() here, because * memblock_analyze() has not been called yet. |