From e7a3481c0246c8e45e79c629efd63b168e91fcda Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 25 Oct 2010 16:53:46 -0700 Subject: x86/pvclock: Zero last_value on resume If the guest domain has been suspend/resumed or migrated, then the system clock backing the pvclock clocksource may revert to a smaller value (ie, can be non-monotonic across the migration/save-restore). Make sure we zero last_value in that case so that the domain continues to see clock updates. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pvclock.h | 1 + arch/x86/kernel/pvclock.c | 5 +++++ arch/x86/xen/time.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7f7e577..31d84ac 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -11,6 +11,7 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec *ts); +void pvclock_resume(void); /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 008b91e..42eb330 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -83,6 +83,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) static atomic64_t last_value = ATOMIC64_INIT(0); +void pvclock_resume(void) +{ + atomic64_set(&last_value, 0); +} + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) { struct pvclock_shadow_time shadow; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index b2bb5aa3..5da5e53 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -426,6 +426,8 @@ void xen_timer_resume(void) { int cpu; + pvclock_resume(); + if (xen_clockevent != &xen_vcpuop_clockevent) return; -- cgit v1.1 From 52f6c5ad430e41736133acac179607b224eaaa11 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 15 Dec 2010 17:58:57 +0800 Subject: crypto: ghash-intel - ghash-clmulni-intel_glue needs err.h Add missing header file: arch/x86/crypto/ghash-clmulni-intel_glue.c:256: error: implicit declaration of function 'IS_ERR' arch/x86/crypto/ghash-clmulni-intel_glue.c:257: error: implicit declaration of function 'PTR_ERR' Signed-off-by: Randy Dunlap Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_glue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index cbcc8d8..7a6e68e 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -10,6 +10,7 @@ * by the Free Software Foundation. */ +#include #include #include #include -- cgit v1.1 From bb6f1d9a99f1947d91693de62ed54ac3bf1e2dfe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Dec 2010 17:03:13 -0600 Subject: lguest: fix crash lguest_time_init fe25c7fc2e "x86: lguest: Convert to new irq chip functions" converted enable_lguest_irq() to take a struct irq_data *, but didn't fix the one internal caller. Signed-off-by: Rusty Russell To: x86@kernel.org --- arch/x86/lguest/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 73b1e1a..45e64b3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1002,7 +1002,7 @@ static void lguest_time_init(void) clockevents_register_device(&lguest_clockevent); /* Finally, we unblock the timer interrupt. */ - enable_lguest_irq(0); + clear_bit(0, lguest_data.blocked_interrupts); } /* -- cgit v1.1 From bb4093deb259ea9c92415796a6a139e35272f8a8 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Dec 2010 17:03:15 -0600 Subject: lguest: restore boot speed lguest is dumb and drops *all* the pagetables for set_pte (which is only used for kernel mapping manipulation, so it's OK without highmem). But it's used a lot in boot, too. As a guest optimization, we suppressed this flushing until the first page switch. Now we have initial_page_table, that happens much earlier, so extend the heuristic to wait until we switch to something other than the swapper_pg_dir or initial_page_table. As measured on my laptop under kvm, this dropped the time-to-mount-root from 48 seconds to 4.3 seconds. Signed-off-by: Rusty Russell --- arch/x86/lguest/boot.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 45e64b3..24e4973 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -531,7 +531,10 @@ static void lguest_write_cr3(unsigned long cr3) { lguest_data.pgdir = cr3; lazy_hcall1(LHCALL_NEW_PGTABLE, cr3); - cr3_changed = true; + + /* These two page tables are simple, linear, and used during boot */ + if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) + cr3_changed = true; } static unsigned long lguest_read_cr3(void) @@ -703,9 +706,9 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, - * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell - * the Host anything changed until we've done the first page table switch, - * which brings boot back to 0.25 seconds. + * which makes booting astonishingly slow: 48 seconds! So we don't even tell + * the Host anything changed until we've done the first real page table switch, + * which brings boot back to 4.3 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { -- cgit v1.1 From da32dac101263fb5b155407507c548e3ac2a6a2a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 16 Dec 2010 17:03:15 -0600 Subject: lguest: populate initial_page_table Two x86 patches broke lguest: 1) v2.6.35-492-g72d7c3b, which changed x86 to use the memblock allocator. In lguest, the host places linear page tables at the top of mem, which used to be enough to get us up to the swapper_pg_dir page tables. With the first patch, the direct mapping tables used that memory: Before: kernel direct mapping tables up to 4000000 @ 7000-1a000 After: kernel direct mapping tables up to 4000000 @ 3fed000-4000000 I initially fixed this by lying about the amount of memory we had, so the kernel wouldn't blatt the lguest boot pagetables (yuk!), but then... 2) v2.6.36-rc8-54-gb40827f, which made x86 boot use initial_page_table. This was initialized in a part of head_32.S which isn't executed by lguest; it is then copied into swapper_pg_dir. So we have to initialize it; and anyway we switch to it before we blatt the old tables, so that fixes the previous damage as well. For the moment, I cut & pasted the code into lguest's boot code, but next merge window I will merge them. Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Konrad Rzeszutek Wilk To: x86@kernel.org --- arch/x86/kernel/head_32.S | 4 +- arch/x86/lguest/boot.c | 3 -- arch/x86/lguest/i386_head.S | 105 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index bcece91..f0bea76 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -620,13 +620,13 @@ ENTRY(initial_code) __PAGE_ALIGNED_BSS .align PAGE_SIZE_asm #ifdef CONFIG_X86_PAE -initial_pg_pmd: +ENTRY(initial_pg_pmd) .fill 1024*KPMDS,4,0 #else ENTRY(initial_page_table) .fill 1024,4,0 #endif -initial_pg_fixmap: +ENTRY(initial_pg_fixmap) .fill 1024,4,0 ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 24e4973..4996cf5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1352,9 +1352,6 @@ __init void lguest_init(void) */ switch_to_new_gdt(0); - /* We actually boot with all memory mapped, but let's say 128MB. */ - max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; - /* * The Host<->Guest Switcher lives at the top of our address space, and * the Host told us how big it is when we made LGUEST_INIT hypercall: diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 4f420c2f..e7d5382 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -4,6 +4,7 @@ #include #include #include +#include /*G:020 * Our story starts with the kernel booting into startup_32 in @@ -37,9 +38,113 @@ ENTRY(lguest_entry) /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp + call init_pagetables + /* Jumps are relative: we're running __PAGE_OFFSET too low. */ jmp lguest_init+__PAGE_OFFSET +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * FIXME: This code is taken verbatim from arch/x86/kernel/head_32.S: they + * don't have a stack at this point, so we can't just use call and ret. + */ +init_pagetables: +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif +#define pa(X) ((X) - __PAGE_OFFSET) + +/* Enough space to fit pagetables for the low memory linear map */ +MAPPING_BEYOND_END = \ + PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +#ifdef CONFIG_X86_PAE + + /* + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. + * + * Note the upper half of each PMD or PTE are always zero at this stage. + */ + +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ + + xorl %ebx,%ebx /* %ebx is kept at zero */ + + movl $pa(__brk_base), %edi + movl $pa(initial_pg_pmd), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ + movl %ecx,(%edx) /* Store PMD entry */ + /* Upper half already zero */ + addl $8,%edx + movl $512,%ecx +11: + stosl + xchgl %eax,%ebx + stosl + xchgl %eax,%ebx + addl $0x1000,%eax + loop 11b + + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b +1: + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) +#else /* Not PAE */ + +page_pde_offset = (__PAGE_OFFSET >> 20); + + movl $pa(__brk_base), %edi + movl $pa(initial_page_table), %edx + movl $PTE_IDENT_ATTR, %eax +10: + leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ + movl %ecx,(%edx) /* Store identity PDE entry */ + movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ + addl $4,%edx + movl $1024, %ecx +11: + stosl + addl $0x1000,%eax + loop 11b + /* + * End condition: we must map up to the end + MAPPING_BEYOND_END. + */ + movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp + cmpl %ebp,%eax + jb 10b + addl $__PAGE_OFFSET, %edi + movl %edi, pa(_brk_end) + shrl $12, %eax + movl %eax, pa(max_pfn_mapped) + + /* Do early initialization of the fixmap area */ + movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax + movl %eax,pa(initial_page_table+0xffc) +#endif + ret + /*G:055 * We create a macro which puts the assembler code between lgstart_ and lgend_ * markers. These templates are put in the .text section: they can't be -- cgit v1.1 From 8f1d97c79eb65de1d05799d6b81d79cd94169114 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 6 Dec 2010 11:40:00 -0600 Subject: x86: Support for this_cpu_add, sub, dec, inc_return Supply an implementation for x86 in order to generate more efficient code. V2->V3: - Cleanup - Remove strange type checking from percpu_add_return_op. tj: - Dropped unused typedef from percpu_add_return_op(). - Renamed ret__ to paro_ret__ in percpu_add_return_op(). - Minor indentation adjustments. Acked-by: H. Peter Anvin Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index f899e01..38f9e96 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -177,6 +177,39 @@ do { \ } \ } while (0) +/* + * Add return operation + */ +#define percpu_add_return_op(var, val) \ +({ \ + typeof(var) paro_ret__ = val; \ + switch (sizeof(var)) { \ + case 1: \ + asm("xaddb %0, "__percpu_arg(1) \ + : "+q" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 2: \ + asm("xaddw %0, "__percpu_arg(1) \ + : "+r" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 4: \ + asm("xaddl %0, "__percpu_arg(1) \ + : "+r" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + case 8: \ + asm("xaddq %0, "__percpu_arg(1) \ + : "+re" (paro_ret__), "+m" (var) \ + : : "memory"); \ + break; \ + default: __bad_percpu_size(); \ + } \ + paro_ret__ += val; \ + paro_ret__; \ +}) + #define percpu_from_op(op, var, constraint) \ ({ \ typeof(var) pfo_ret__; \ @@ -300,6 +333,14 @@ do { \ #define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) #define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) +#ifndef CONFIG_M386 +#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) +#endif /* * Per cpu atomic 64 bit operations are only available under 64 bit. * 32 bit must fall back to generic operations. @@ -324,6 +365,8 @@ do { \ #define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) +#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) #endif /* This is not atomic against other CPUs -- CPU preemption needs to be off */ -- cgit v1.1