summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@ozlabs.org>2018-09-11 17:00:30 +1000
committerroot <root@rcs-power9-talos>2018-10-27 02:51:26 -0400
commit9d8bed4d5db2e83b862d4c750e51dab3ce6d6bfc (patch)
tree9562002702c2c1edcd9f0d0c44289c41e92e4eb8
parentf531b4c031ad6f366fa74aa8bfbc0ee69a21543c (diff)
downloadop-kernel-dev-9d8bed4d5db2e83b862d4c750e51dab3ce6d6bfc.zip
op-kernel-dev-9d8bed4d5db2e83b862d4c750e51dab3ce6d6bfc.tar.gz
KVM: PPC: Book3S HV: Allocate a memory area exclusively for HPTs
Currently we allocate HPTs (hashed page tables) for guests using the CMA (contiguous memory allocator) facility. However, there are situations where the CMA region can get fragmented, notably when lots of guest pages get pinned for PCI pass-through, which then causes HPT allocations to fail even if there is sufficient CMA memory available overall. This commit adds the capability to reserve some memory at boot time exclusively for HPTs for KVM guests. The amount is controlled with the kvm_hpt_resv_ratio=N kernel command-line option, where N is the percentage of system memory to reserve. This reserved memory will be used first, and only when a guest HPT can't be allocated from this reserved memory will the CMA region be used. Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h7
-rw-r--r--arch/powerpc/kernel/setup-common.c3
-rw-r--r--arch/powerpc/kernel/setup.h6
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c25
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c105
6 files changed, 136 insertions, 12 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fac6f63..b526f1a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -260,6 +260,8 @@ struct kvm_hpt_info {
struct revmap_entry *rev;
/* Guest HPT size is 2**(order) bytes */
u32 order;
+ /* 1 if HPT allocated from reserved region, 0 otherwise */
+ int resv;
/* 1 if HPT allocated with CMA, 0 otherwise */
int cma;
};
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9b89b19..f150067 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -208,6 +208,8 @@ extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
+extern unsigned long kvmhv_alloc_resv_hpt(u32 order);
+extern void kvmhv_release_resv_hpt(unsigned long hpt, u32 order);
extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
@@ -435,6 +437,8 @@ struct openpic;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void kvm_cma_reserve(void) __init;
+extern void kvm_resv_hpt_init(void);
+
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
@@ -475,6 +479,9 @@ extern bool kvm_hv_mode_active(void);
static inline void __init kvm_cma_reserve(void)
{}
+static inline void kvm_resv_hpt_init(void)
+{}
+
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9ca9db7..6949cdb 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -982,6 +982,9 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff. */
mmu_context_init();
+ /* Reserve memory for KVM HPTs */
+ kvm_resv_hpt_init();
+
#ifdef CONFIG_PPC64
/* Interrupt code needs to be 64K-aligned. */
if ((unsigned long)_stext & 0xffff)
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
index c6a592b..6de1fac 100644
--- a/arch/powerpc/kernel/setup.h
+++ b/arch/powerpc/kernel/setup.h
@@ -53,13 +53,15 @@ extern unsigned long spr_default_dscr;
#endif
/*
- * Having this in kvm_ppc.h makes include dependencies too
- * tricky to solve for setup-common.c so have it here.
+ * Having these in kvm_ppc.h makes include dependencies too
+ * tricky to solve for setup-common.c so have them here.
*/
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
void kvm_cma_reserve(void);
+void kvm_resv_hpt_init(void);
#else
static inline void kvm_cma_reserve(void) { };
+static inline void kvm_resv_hpt_init(void) { }
#endif
#ifdef CONFIG_TAU
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c615617..efd5a6b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -81,7 +81,7 @@ struct kvm_resize_hpt {
int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
{
unsigned long hpt = 0;
- int cma = 0;
+ int resv = 0, cma = 0;
struct page *page = NULL;
struct revmap_entry *rev;
unsigned long npte;
@@ -89,11 +89,17 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
return -EINVAL;
- page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
- if (page) {
- hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ hpt = kvmhv_alloc_resv_hpt(order);
+ if (hpt) {
memset((void *)hpt, 0, (1ul << order));
- cma = 1;
+ resv = 1;
+ } else {
+ page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
+ if (page) {
+ hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+ memset((void *)hpt, 0, (1ul << order));
+ cma = 1;
+ }
}
if (!hpt)
@@ -109,7 +115,9 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
/* Allocate reverse map array */
rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
if (!rev) {
- if (cma)
+ if (resv)
+ kvmhv_release_resv_hpt(hpt, order);
+ else if (cma)
kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
else
free_pages(hpt, order - PAGE_SHIFT);
@@ -118,6 +126,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
info->order = order;
info->virt = hpt;
+ info->resv = resv;
info->cma = cma;
info->rev = rev;
@@ -191,7 +200,9 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info)
{
vfree(info->rev);
info->rev = NULL;
- if (info->cma)
+ if (info->resv)
+ kvmhv_release_resv_hpt(info->virt, info->order);
+ else if (info->cma)
kvm_free_hpt_cma(virt_to_page(info->virt),
1 << (info->order - PAGE_SHIFT));
else if (info->virt)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index a71e2fc..18afe65 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -53,11 +53,109 @@ EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
- * should be power of 2.
+ * only needs to be 256kB.
*/
-#define HPT_ALIGN_PAGES ((1 << 18) >> PAGE_SHIFT) /* 256k */
+#define HPT_ALIGN_ORDER 18 /* 256k */
+#define HPT_ALIGN_PAGES ((1 << HPT_ALIGN_ORDER) >> PAGE_SHIFT)
+
+#define KVM_RESV_CHUNK_ORDER HPT_ALIGN_ORDER
+
/*
- * By default we reserve 5% of memory for hash pagetable allocation.
+ * By default we reserve 2% of memory exclusively for guest HPT
+ * allocations, plus another 3% in the CMA zone which can be used
+ * either for HPTs or for movable page allocations.
+ * Each guest's HPT will be sized at between 1/128 and 1/64 of its
+ * memory, i.e. up to 1.56%, and allowing for about a 3x memory
+ * overcommit factor gets us to about 5%.
+ */
+static unsigned long kvm_hpt_resv_ratio = 2;
+
+static int __init early_parse_kvm_hpt_resv(char *p)
+{
+ pr_debug("%s(%s)\n", __func__, p);
+ if (!p)
+ return -EINVAL;
+ return kstrtoul(p, 0, &kvm_hpt_resv_ratio);
+}
+early_param("kvm_hpt_resv_ratio", early_parse_kvm_hpt_resv);
+
+static unsigned long kvm_resv_addr;
+static unsigned long *kvm_resv_bitmap;
+static unsigned long kvm_resv_chunks;
+static DEFINE_MUTEX(kvm_resv_lock);
+
+void kvm_resv_hpt_init(void)
+{
+ unsigned long align = 1ul << KVM_RESV_CHUNK_ORDER;
+ unsigned long size, bm_size;
+ unsigned long addr, bm;
+ unsigned long *bmp;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
+
+ size = memblock_phys_mem_size() * kvm_hpt_resv_ratio / 100;
+ size = ALIGN(size, align);
+ if (!size)
+ return;
+
+ pr_info("KVM: Allocating %lu MiB for hashed page tables\n",
+ size >> 20);
+
+ addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+ if (!addr) {
+ pr_err("KVM: Allocation of reserved memory for HPTs failed\n");
+ return;
+ }
+ pr_info("KVM: %lu MiB reserved for HPTs at %lx\n", size >> 20, addr);
+
+ bm_size = BITS_TO_LONGS(size >> KVM_RESV_CHUNK_ORDER) * sizeof(long);
+ bm = __memblock_alloc_base(bm_size, sizeof(long),
+ MEMBLOCK_ALLOC_ACCESSIBLE);
+ if (!bm) {
+ pr_err("KVM: Allocation of reserved memory bitmap failed\n");
+ return;
+ }
+ bmp = __va(bm);
+ memset(bmp, 0, bm_size);
+
+ kvm_resv_addr = (unsigned long) __va(addr);
+ kvm_resv_chunks = size >> KVM_RESV_CHUNK_ORDER;
+ kvm_resv_bitmap = bmp;
+}
+
+unsigned long kvmhv_alloc_resv_hpt(u32 order)
+{
+ unsigned long nr_chunks = 1ul << (order - KVM_RESV_CHUNK_ORDER);
+ unsigned long chunk;
+
+ mutex_lock(&kvm_resv_lock);
+ chunk = bitmap_find_next_zero_area(kvm_resv_bitmap, kvm_resv_chunks,
+ 0, nr_chunks, 0);
+ if (chunk < kvm_resv_chunks)
+ bitmap_set(kvm_resv_bitmap, chunk, nr_chunks);
+ mutex_unlock(&kvm_resv_lock);
+
+ if (chunk < kvm_resv_chunks)
+ return kvm_resv_addr + (chunk << KVM_RESV_CHUNK_ORDER);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmhv_alloc_resv_hpt);
+
+void kvmhv_release_resv_hpt(unsigned long addr, u32 order)
+{
+ unsigned long nr_chunks = 1ul << (order - KVM_RESV_CHUNK_ORDER);
+ unsigned long chunk = (addr - kvm_resv_addr) >> KVM_RESV_CHUNK_ORDER;
+
+ mutex_lock(&kvm_resv_lock);
+ if (chunk + nr_chunks <= kvm_resv_chunks)
+ bitmap_clear(kvm_resv_bitmap, chunk, nr_chunks);
+ mutex_unlock(&kvm_resv_lock);
+}
+EXPORT_SYMBOL_GPL(kvmhv_release_resv_hpt);
+
+/*
+ * By default we reserve 3% of memory for the CMA zone.
*/
static unsigned long kvm_cma_resv_ratio = 5;
@@ -106,6 +204,7 @@ void __init kvm_cma_reserve(void)
*/
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
+
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
OpenPOWER on IntegriCloud