diff options
-rw-r--r-- | arch/x86/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/kernel/ldt.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt-spinlocks.c | 31 | ||||
-rw-r--r-- | arch/x86/kernel/paravirt.c | 27 | ||||
-rw-r--r-- | arch/x86/xen/Makefile | 11 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 173 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 143 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 167 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 183 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_64.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 4 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 174 | ||||
-rw-r--r-- | drivers/xen/events.c | 11 | ||||
-rw-r--r-- | include/asm-x86/desc.h | 15 | ||||
-rw-r--r-- | include/asm-x86/paravirt.h | 13 | ||||
-rw-r--r-- | include/linux/kernel.h | 2 | ||||
-rw-r--r-- | lib/cmdline.c | 2 |
18 files changed, 491 insertions, 482 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 3db651f..d679cb2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg -CFLAGS_REMOVE_paravirt.o = -pg +CFLAGS_REMOVE_paravirt-spinlocks.o = -pg endif # @@ -89,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o -obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b68e21f..6e38841 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -51,6 +51,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, (mincount - oldsize) * LDT_ENTRY_SIZE); + paravirt_alloc_ldt(newldt, mincount); + #ifdef CONFIG_X86_64 /* CHECKME: Do we really need this ? */ wmb(); @@ -73,6 +75,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) #endif } if (oldsize) { + paravirt_free_ldt(oldldt, oldsize); if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(oldldt); else @@ -84,10 +87,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) static inline int copy_ldt(mm_context_t *new, mm_context_t *old) { int err = alloc_ldt(new, old->size, 0); + int i; if (err < 0) return err; - memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE); + + for(i = 0; i < old->size; i++) + write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } @@ -124,6 +130,7 @@ void destroy_context(struct mm_struct *mm) if (mm == current->active_mm) clear_LDT(); #endif + paravirt_free_ldt(mm->context.ldt, mm->context.size); if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) vfree(mm->context.ldt); else diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c new file mode 100644 index 0000000..38d7f7f --- /dev/null +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -0,0 +1,31 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include <linux/spinlock.h> +#include <linux/module.h> + +#include <asm/paravirt.h> + +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +#endif +}; +EXPORT_SYMBOL_GPL(pv_lock_ops); + +void __init paravirt_use_bytelocks(void) +{ +#ifdef CONFIG_SMP + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif +} diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d52..7faea18 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } -void __init paravirt_use_bytelocks(void) -{ -#ifdef CONFIG_SMP - pv_lock_ops.spin_is_locked = __byte_spin_is_locked; - pv_lock_ops.spin_is_contended = __byte_spin_is_contended; - pv_lock_ops.spin_lock = __byte_spin_lock; - pv_lock_ops.spin_trylock = __byte_spin_trylock; - pv_lock_ops.spin_unlock = __byte_spin_unlock; -#endif -} - struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -348,6 +337,10 @@ struct pv_cpu_ops pv_cpu_ops = { .write_ldt_entry = native_write_ldt_entry, .write_gdt_entry = native_write_gdt_entry, .write_idt_entry = native_write_idt_entry, + + .alloc_ldt = paravirt_nop, + .free_ldt = paravirt_nop, + .load_sp0 = native_load_sp0, #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) @@ -461,18 +454,6 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; -struct pv_lock_ops pv_lock_ops = { -#ifdef CONFIG_SMP - .spin_is_locked = __ticket_spin_is_locked, - .spin_is_contended = __ticket_spin_is_contended, - - .spin_lock = __ticket_spin_lock, - .spin_trylock = __ticket_spin_trylock, - .spin_unlock = __ticket_spin_unlock, -#endif -}; -EXPORT_SYMBOL_GPL(pv_lock_ops); - EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 59c1e53..9ee745f 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,11 @@ -obj-y := enlighten.o setup.o multicalls.o mmu.o \ +ifdef CONFIG_FTRACE +# Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_spinlock.o = -pg +CFLAGS_REMOVE_time.o = -pg +CFLAGS_REMOVE_irq.o = -pg +endif + +obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ time.o xen-asm_$(BITS).o grant-table.o suspend.o -obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += smp.o spinlock.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 9ff6e3c..cf8b3a9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -30,7 +30,6 @@ #include <xen/interface/xen.h> #include <xen/interface/physdev.h> #include <xen/interface/vcpu.h> -#include <xen/interface/sched.h> #include <xen/features.h> #include <xen/page.h> #include <xen/hvc-console.h> @@ -226,103 +225,66 @@ static unsigned long xen_get_debugreg(int reg) return HYPERVISOR_get_debugreg(reg); } -static unsigned long xen_save_fl(void) +static void xen_leave_lazy(void) { - struct vcpu_info *vcpu; - unsigned long flags; - - vcpu = x86_read_percpu(xen_vcpu); - - /* flag has opposite sense of mask */ - flags = !vcpu->evtchn_upcall_mask; - - /* convert to IF type flag - -0 -> 0x00000000 - -1 -> 0xffffffff - */ - return (-flags) & X86_EFLAGS_IF; + paravirt_leave_lazy(paravirt_get_lazy_mode()); + xen_mc_flush(); } -static void xen_restore_fl(unsigned long flags) +static unsigned long xen_store_tr(void) { - struct vcpu_info *vcpu; - - /* convert from IF type flag */ - flags = !(flags & X86_EFLAGS_IF); - - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = flags; - preempt_enable_no_resched(); - - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ - - if (flags == 0) { - preempt_check_resched(); - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); - } + return 0; } -static void xen_irq_disable(void) +/* + * If 'v' is a vmalloc mapping, then find the linear mapping of the + * page (if any) and also set its protections to match: + */ +static void set_aliased_prot(void *v, pgprot_t prot) { - /* There's a one instruction preempt window here. We need to - make sure we're don't switch CPUs between getting the vcpu - pointer and updating the mask. */ - preempt_disable(); - x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; - preempt_enable_no_resched(); -} + int level; + pte_t *ptep; + pte_t pte; + unsigned long pfn; + struct page *page; -static void xen_irq_enable(void) -{ - struct vcpu_info *vcpu; + ptep = lookup_address((unsigned long)v, &level); + BUG_ON(ptep == NULL); - /* We don't need to worry about being preempted here, since - either a) interrupts are disabled, so no preemption, or b) - the caller is confused and is trying to re-enable interrupts - on an indeterminate processor. */ + pfn = pte_pfn(*ptep); + page = pfn_to_page(pfn); - vcpu = x86_read_percpu(xen_vcpu); - vcpu->evtchn_upcall_mask = 0; + pte = pfn_pte(pfn, prot); - /* Doesn't matter if we get preempted here, because any - pending event will get dealt with anyway. */ + if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) + BUG(); - barrier(); /* unmask then check (avoid races) */ - if (unlikely(vcpu->evtchn_upcall_pending)) - force_evtchn_callback(); -} + if (!PageHighMem(page)) { + void *av = __va(PFN_PHYS(pfn)); -static void xen_safe_halt(void) -{ - /* Blocking includes an implicit local_irq_enable(). */ - if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) - BUG(); + if (av != v) + if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0)) + BUG(); + } else + kmap_flush_unused(); } -static void xen_halt(void) +static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries) { - if (irqs_disabled()) - HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); - else - xen_safe_halt(); -} + const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; + int i; -static void xen_leave_lazy(void) -{ - paravirt_leave_lazy(paravirt_get_lazy_mode()); - xen_mc_flush(); + for(i = 0; i < entries; i += entries_per_page) + set_aliased_prot(ldt + i, PAGE_KERNEL_RO); } -static unsigned long xen_store_tr(void) +static void xen_free_ldt(struct desc_struct *ldt, unsigned entries) { - return 0; + const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE; + int i; + + for(i = 0; i < entries; i += entries_per_page) + set_aliased_prot(ldt + i, PAGE_KERNEL); } static void xen_set_ldt(const void *addr, unsigned entries) @@ -426,7 +388,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, const void *ptr) { unsigned long lp = (unsigned long)&dt[entrynum]; - xmaddr_t mach_lp = virt_to_machine(lp); + xmaddr_t mach_lp = arbitrary_virt_to_machine(lp); u64 entry = *(u64 *)ptr; preempt_disable(); @@ -559,7 +521,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, } static void xen_load_sp0(struct tss_struct *tss, - struct thread_struct *thread) + struct thread_struct *thread) { struct multicall_space mcs = xen_mc_entry(0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); @@ -803,6 +765,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) ret = -EFAULT; break; #endif + + case MSR_STAR: + case MSR_CSTAR: + case MSR_LSTAR: + case MSR_SYSCALL_MASK: + case MSR_IA32_SYSENTER_CS: + case MSR_IA32_SYSENTER_ESP: + case MSR_IA32_SYSENTER_EIP: + /* Fast syscall setup is all done in hypercalls, so + these are all ignored. Stub them out here to stop + Xen console noise. */ + break; + default: ret = native_write_msr_safe(msr, low, high); } @@ -1220,6 +1195,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { .load_gs_index = xen_load_gs_index, #endif + .alloc_ldt = xen_alloc_ldt, + .free_ldt = xen_free_ldt, + .store_gdt = native_store_gdt, .store_idt = native_store_idt, .store_tr = xen_store_tr, @@ -1241,36 +1219,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { }, }; -static void __init __xen_init_IRQ(void) -{ -#ifdef CONFIG_X86_64 - int i; - - /* Create identity vector->irq map */ - for(i = 0; i < NR_VECTORS; i++) { - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[i] = i; - } -#endif /* CONFIG_X86_64 */ - - xen_init_IRQ(); -} - -static const struct pv_irq_ops xen_irq_ops __initdata = { - .init_IRQ = __xen_init_IRQ, - .save_fl = xen_save_fl, - .restore_fl = xen_restore_fl, - .irq_disable = xen_irq_disable, - .irq_enable = xen_irq_enable, - .safe_halt = xen_safe_halt, - .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif -}; - static const struct pv_apic_ops xen_apic_ops __initdata = { #ifdef CONFIG_X86_LOCAL_APIC .apic_write = xen_apic_write, @@ -1324,7 +1272,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .ptep_modify_prot_commit = __ptep_modify_prot_commit, .pte_val = xen_pte_val, - .pte_flags = native_pte_val, + .pte_flags = native_pte_flags, .pgd_val = xen_pgd_val, .make_pte = xen_make_pte, @@ -1673,10 +1621,11 @@ asmlinkage void __init xen_start_kernel(void) pv_init_ops = xen_init_ops; pv_time_ops = xen_time_ops; pv_cpu_ops = xen_cpu_ops; - pv_irq_ops = xen_irq_ops; pv_apic_ops = xen_apic_ops; pv_mmu_ops = xen_mmu_ops; + xen_init_irq_ops(); + if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c new file mode 100644 index 0000000..28b85ab --- /dev/null +++ b/arch/x86/xen/irq.c @@ -0,0 +1,143 @@ +#include <linux/hardirq.h> + +#include <xen/interface/xen.h> +#include <xen/interface/sched.h> +#include <xen/interface/vcpu.h> + +#include <asm/xen/hypercall.h> +#include <asm/xen/hypervisor.h> + +#include "xen-ops.h" + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void xen_force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + +static void __init __xen_init_IRQ(void) +{ +#ifdef CONFIG_X86_64 + int i; + + /* Create identity vector->irq map */ + for(i = 0; i < NR_VECTORS; i++) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[i] = i; + } +#endif /* CONFIG_X86_64 */ + + xen_init_IRQ(); +} + +static unsigned long xen_save_fl(void) +{ + struct vcpu_info *vcpu; + unsigned long flags; + + vcpu = x86_read_percpu(xen_vcpu); + + /* flag has opposite sense of mask */ + flags = !vcpu->evtchn_upcall_mask; + + /* convert to IF type flag + -0 -> 0x00000000 + -1 -> 0xffffffff + */ + return (-flags) & X86_EFLAGS_IF; +} + +static void xen_restore_fl(unsigned long flags) +{ + struct vcpu_info *vcpu; + + /* convert from IF type flag */ + flags = !(flags & X86_EFLAGS_IF); + + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = flags; + preempt_enable_no_resched(); + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + if (flags == 0) { + preempt_check_resched(); + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + xen_force_evtchn_callback(); + } +} + +static void xen_irq_disable(void) +{ + /* There's a one instruction preempt window here. We need to + make sure we're don't switch CPUs between getting the vcpu + pointer and updating the mask. */ + preempt_disable(); + x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; + preempt_enable_no_resched(); +} + +static void xen_irq_enable(void) +{ + struct vcpu_info *vcpu; + + /* We don't need to worry about being preempted here, since + either a) interrupts are disabled, so no preemption, or b) + the caller is confused and is trying to re-enable interrupts + on an indeterminate processor. */ + + vcpu = x86_read_percpu(xen_vcpu); + vcpu->evtchn_upcall_mask = 0; + + /* Doesn't matter if we get preempted here, because any + pending event will get dealt with anyway. */ + + barrier(); /* unmask then check (avoid races) */ + if (unlikely(vcpu->evtchn_upcall_pending)) + xen_force_evtchn_callback(); +} + +static void xen_safe_halt(void) +{ + /* Blocking includes an implicit local_irq_enable(). */ + if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0) + BUG(); +} + +static void xen_halt(void) +{ + if (irqs_disabled()) + HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); + else + xen_safe_halt(); +} + +static const struct pv_irq_ops xen_irq_ops __initdata = { + .init_IRQ = __xen_init_IRQ, + .save_fl = xen_save_fl, + .restore_fl = xen_restore_fl, + .irq_disable = xen_irq_disable, + .irq_enable = xen_irq_enable, + .safe_halt = xen_safe_halt, + .halt = xen_halt, +#ifdef CONFIG_X86_64 + .adjust_exception_frame = xen_adjust_exception_frame, +#endif +}; + +void __init xen_init_irq_ops() +{ + pv_irq_ops = xen_irq_ops; +} diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d8faf79..baca7f2 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,7 +15,6 @@ * This does not handle HOTPLUG_CPU yet. */ #include <linux/sched.h> -#include <linux/kernel_stat.h> #include <linux/err.h> #include <linux/smp.h> @@ -36,8 +35,6 @@ #include "xen-ops.h" #include "mmu.h" -static void __cpuinit xen_init_lock_cpu(int cpu); - cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); @@ -419,170 +416,6 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -struct xen_spinlock { - unsigned char lock; /* 0 -> free; 1 -> locked */ - unsigned short spinners; /* count of waiting cpus */ -}; - -static int xen_spin_is_locked(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - return xl->lock != 0; -} - -static int xen_spin_is_contended(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - /* Not strictly true; this is only the count of contended - lock-takers entering the slow path. */ - return xl->spinners != 0; -} - -static int xen_spin_trylock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - u8 old = 1; - - asm("xchgb %b0,%1" - : "+q" (old), "+m" (xl->lock) : : "memory"); - - return old == 0; -} - -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; -static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); - -static inline void spinning_lock(struct xen_spinlock *xl) -{ - __get_cpu_var(lock_spinners) = xl; - wmb(); /* set lock of interest before count */ - asm(LOCK_PREFIX " incw %0" - : "+m" (xl->spinners) : : "memory"); -} - -static inline void unspinning_lock(struct xen_spinlock *xl) -{ - asm(LOCK_PREFIX " decw %0" - : "+m" (xl->spinners) : : "memory"); - wmb(); /* decrement count before clearing lock */ - __get_cpu_var(lock_spinners) = NULL; -} - -static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - int irq = __get_cpu_var(lock_kicker_irq); - int ret; - - /* If kicker interrupts not initialized yet, just spin */ - if (irq == -1) - return 0; - - /* announce we're spinning */ - spinning_lock(xl); - - /* clear pending */ - xen_clear_irq_pending(irq); - - /* check again make sure it didn't become free while - we weren't looking */ - ret = xen_spin_trylock(lock); - if (ret) - goto out; - - /* block until irq becomes pending */ - xen_poll_irq(irq); - kstat_this_cpu.irqs[irq]++; - -out: - unspinning_lock(xl); - return ret; -} - -static void xen_spin_lock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - int timeout; - u8 oldval; - - do { - timeout = 1 << 10; - - asm("1: xchgb %1,%0\n" - " testb %1,%1\n" - " jz 3f\n" - "2: rep;nop\n" - " cmpb $0,%0\n" - " je 1b\n" - " dec %2\n" - " jnz 2b\n" - "3:\n" - : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) - : "1" (1) - : "memory"); - - } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); -} - -static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) -{ - int cpu; - - for_each_online_cpu(cpu) { - /* XXX should mix up next cpu selection */ - if (per_cpu(lock_spinners, cpu) == xl) { - xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; - } - } -} - -static void xen_spin_unlock(struct raw_spinlock *lock) -{ - struct xen_spinlock *xl = (struct xen_spinlock *)lock; - - smp_wmb(); /* make sure no writes get moved after unlock */ - xl->lock = 0; /* release lock */ - - /* make sure unlock happens before kick */ - barrier(); - - if (unlikely(xl->spinners)) - xen_spin_unlock_slow(xl); -} - -static __cpuinit void xen_init_lock_cpu(int cpu) -{ - int irq; - const char *name; - - name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); - irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, - cpu, - xen_reschedule_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, - name, - NULL); - - if (irq >= 0) { - disable_irq(irq); /* make sure it's never delivered */ - per_cpu(lock_kicker_irq, cpu) = irq; - } - - printk("cpu %d spinlock event irq %d\n", cpu, irq); -} - -static void __init xen_init_spinlocks(void) -{ - pv_lock_ops.spin_is_locked = xen_spin_is_locked; - pv_lock_ops.spin_is_contended = xen_spin_is_contended; - pv_lock_ops.spin_lock = xen_spin_lock; - pv_lock_ops.spin_trylock = xen_spin_trylock; - pv_lock_ops.spin_unlock = xen_spin_unlock; -} - static const struct smp_ops xen_smp_ops __initdata = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c new file mode 100644 index 0000000..8dc4d31 --- /dev/null +++ b/arch/x86/xen/spinlock.c @@ -0,0 +1,183 @@ +/* + * Split spinlock implementation out into its own file, so it can be + * compiled in a FTRACE-compatible way. + */ +#include <linux/kernel_stat.h> +#include <linux/spinlock.h> + +#include <asm/paravirt.h> + +#include <xen/interface/xen.h> +#include <xen/events.h> + +#include "xen-ops.h" + +struct xen_spinlock { + unsigned char lock; /* 0 -> free; 1 -> locked */ + unsigned short spinners; /* count of waiting cpus */ +}; + +static int xen_spin_is_locked(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + return xl->lock != 0; +} + +static int xen_spin_is_contended(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + /* Not strictly true; this is only the count of contended + lock-takers entering the slow path. */ + return xl->spinners != 0; +} + +static int xen_spin_trylock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + u8 old = 1; + + asm("xchgb %b0,%1" + : "+q" (old), "+m" (xl->lock) : : "memory"); + + return old == 0; +} + +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); + +static inline void spinning_lock(struct xen_spinlock *xl) +{ + __get_cpu_var(lock_spinners) = xl; + wmb(); /* set lock of interest before count */ + asm(LOCK_PREFIX " incw %0" + : "+m" (xl->spinners) : : "memory"); +} + +static inline void unspinning_lock(struct xen_spinlock *xl) +{ + asm(LOCK_PREFIX " decw %0" + : "+m" (xl->spinners) : : "memory"); + wmb(); /* decrement count before clearing lock */ + __get_cpu_var(lock_spinners) = NULL; +} + +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int irq = __get_cpu_var(lock_kicker_irq); + int ret; + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return 0; + + /* announce we're spinning */ + spinning_lock(xl); + + /* clear pending */ + xen_clear_irq_pending(irq); + + /* check again make sure it didn't become free while + we weren't looking */ + ret = xen_spin_trylock(lock); + if (ret) + goto out; + + /* block until irq becomes pending */ + xen_poll_irq(irq); + kstat_this_cpu.irqs[irq]++; + +out: + unspinning_lock(xl); + return ret; +} + +static void xen_spin_lock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int timeout; + u8 oldval; + + do { + timeout = 1 << 10; + + asm("1: xchgb %1,%0\n" + " testb %1,%1\n" + " jz 3f\n" + "2: rep;nop\n" + " cmpb $0,%0\n" + " je 1b\n" + " dec %2\n" + " jnz 2b\n" + "3:\n" + : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) + : "1" (1) + : "memory"); + + } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); +} + +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +{ + int cpu; + + for_each_online_cpu(cpu) { + /* XXX should mix up next cpu selection */ + if (per_cpu(lock_spinners, cpu) == xl) { + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; + } + } +} + +static void xen_spin_unlock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + smp_wmb(); /* make sure no writes get moved after unlock */ + xl->lock = 0; /* release lock */ + + /* make sure unlock happens before kick */ + barrier(); + + if (unlikely(xl->spinners)) + xen_spin_unlock_slow(xl); +} + +static irqreturn_t dummy_handler(int irq, void *dev_id) +{ + BUG(); + return IRQ_HANDLED; +} + +void __cpuinit xen_init_lock_cpu(int cpu) +{ + int irq; + const char *name; + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + dummy_handler, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, + NULL); + + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +} + +void __init xen_init_spinlocks(void) +{ + pv_lock_ops.spin_is_locked = xen_spin_is_locked; + pv_lock_ops.spin_is_contended = xen_spin_is_contended; + pv_lock_ops.spin_lock = xen_spin_lock; + pv_lock_ops.spin_trylock = xen_spin_trylock; + pv_lock_ops.spin_unlock = xen_spin_unlock; +} diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index 2497a30..42786f5 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -298,7 +298,7 @@ check_events: push %eax push %ecx push %edx - call force_evtchn_callback + call xen_force_evtchn_callback pop %edx pop %ecx pop %eax diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 7f58304..3b9bda4 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -122,7 +122,7 @@ check_events: push %r9 push %r10 push %r11 - call force_evtchn_callback + call xen_force_evtchn_callback pop %r11 pop %r10 pop %r9 diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index dd3c231..3c70ebc 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -31,6 +31,7 @@ void xen_vcpu_restore(void); void __init xen_build_dynamic_phys_to_machine(void); +void xen_init_irq_ops(void); void xen_setup_timer(int cpu); void xen_setup_cpu_clockevents(void); unsigned long xen_tsc_khz(void); @@ -50,6 +51,9 @@ void __init xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +void __init xen_init_spinlocks(void); +__cpuinit void xen_init_lock_cpu(int cpu); + extern cpumask_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index d4427cb..fff987b 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -60,7 +60,7 @@ #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) -#define BALLOON_CLASS_NAME "memory" +#define BALLOON_CLASS_NAME "xen_memory" struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ @@ -226,9 +226,8 @@ static int increase_reservation(unsigned long nr_pages) } set_xen_guest_handle(reservation.extent_start, frame_list); - reservation.nr_extents = nr_pages; - rc = HYPERVISOR_memory_op( - XENMEM_populate_physmap, &reservation); + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); if (rc < nr_pages) { if (rc > 0) { int ret; @@ -236,7 +235,7 @@ static int increase_reservation(unsigned long nr_pages) /* We hit the Xen hard limit: reprobe. */ reservation.nr_extents = rc; ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); + &reservation); BUG_ON(ret != rc); } if (rc >= 0) @@ -464,136 +463,13 @@ static void balloon_exit(void) module_exit(balloon_exit); -static void balloon_update_driver_allowance(long delta) -{ - unsigned long flags; - - spin_lock_irqsave(&balloon_lock, flags); - balloon_stats.driver_pages += delta; - spin_unlock_irqrestore(&balloon_lock, flags); -} - -static int dealloc_pte_fn( - pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) -{ - unsigned long mfn = pte_mfn(*pte); - int ret; - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &mfn); - set_pte_at(&init_mm, addr, pte, __pte_ma(0ull)); - set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); - BUG_ON(ret != 1); - return 0; -} - -static struct page **alloc_empty_pages_and_pagevec(int nr_pages) -{ - unsigned long vaddr, flags; - struct page *page, **pagevec; - int i, ret; - - pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL); - if (pagevec == NULL) - return NULL; - - for (i = 0; i < nr_pages; i++) { - page = pagevec[i] = alloc_page(GFP_KERNEL); - if (page == NULL) - goto err; - - vaddr = (unsigned long)page_address(page); - - scrub_page(page); - - spin_lock_irqsave(&balloon_lock, flags); - - if (xen_feature(XENFEAT_auto_translated_physmap)) { - unsigned long gmfn = page_to_pfn(page); - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = DOMID_SELF - }; - set_xen_guest_handle(reservation.extent_start, &gmfn); - ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, - &reservation); - if (ret == 1) - ret = 0; /* success */ - } else { - ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE, - dealloc_pte_fn, NULL); - } - - if (ret != 0) { - spin_unlock_irqrestore(&balloon_lock, flags); - __free_page(page); - goto err; - } - - totalram_pages = --balloon_stats.current_pages; - - spin_unlock_irqrestore(&balloon_lock, flags); - } - - out: - schedule_work(&balloon_worker); - flush_tlb_all(); - return pagevec; - - err: - spin_lock_irqsave(&balloon_lock, flags); - while (--i >= 0) - balloon_append(pagevec[i]); - spin_unlock_irqrestore(&balloon_lock, flags); - kfree(pagevec); - pagevec = NULL; - goto out; -} - -static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages) -{ - unsigned long flags; - int i; - - if (pagevec == NULL) - return; - - spin_lock_irqsave(&balloon_lock, flags); - for (i = 0; i < nr_pages; i++) { - BUG_ON(page_count(pagevec[i]) != 1); - balloon_append(pagevec[i]); - } - spin_unlock_irqrestore(&balloon_lock, flags); - - kfree(pagevec); - - schedule_work(&balloon_worker); -} - -static void balloon_release_driver_page(struct page *page) -{ - unsigned long flags; - - spin_lock_irqsave(&balloon_lock, flags); - balloon_append(page); - balloon_stats.driver_pages--; - spin_unlock_irqrestore(&balloon_lock, flags); - - schedule_work(&balloon_worker); -} - - -#define BALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - char *buf) \ - { \ - return sprintf(buf, format, ##args); \ - } \ +#define BALLOON_SHOW(name, format, args...) \ + static ssize_t show_##name(struct sys_device *dev, \ + struct sysdev_attribute *attr, \ + char *buf) \ + { \ + return sprintf(buf, format, ##args); \ + } \ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); @@ -604,7 +480,8 @@ BALLOON_SHOW(hard_limit_kb, (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); -static ssize_t show_target_kb(struct sys_device *dev, char *buf) +static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, + char *buf) { return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); } @@ -614,19 +491,14 @@ static ssize_t store_target_kb(struct sys_device *dev, const char *buf, size_t count) { - char memstring[64], *endchar; + char *endchar; unsigned long long target_bytes; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (count <= 1) - return -EBADMSG; /* runt */ - if (count > sizeof(memstring)) - return -EFBIG; /* too long */ - strcpy(memstring, buf); + target_bytes = memparse(buf, &endchar); - target_bytes = memparse(memstring, &endchar); balloon_set_new_target(target_bytes >> PAGE_SHIFT); return count; @@ -694,20 +566,4 @@ static int register_balloon(struct sys_device *sysdev) return error; } -static void unregister_balloon(struct sys_device *sysdev) -{ - int i; - - sysfs_remove_group(&sysdev->kobj, &balloon_info_group); - for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) - sysdev_remove_file(sysdev, balloon_attrs[i]); - sysdev_unregister(sysdev); - sysdev_class_unregister(&balloon_sysdev_class); -} - -static void balloon_sysfs_exit(void) -{ - unregister_balloon(&balloon_sysdev); -} - MODULE_LICENSE("GPL"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 0e0c285..a083703 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS]; /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) -/* - * Force a proper event-channel callback from Xen after clearing the - * callback mask. We do this in a very simple manner, by making a call - * down into Xen. The pending flag will be checked by Xen on return. - */ -void force_evtchn_callback(void) -{ - (void)HYPERVISOR_xen_version(0, NULL); -} -EXPORT_SYMBOL_GPL(force_evtchn_callback); - static struct irq_chip xen_dynamic_chip; /* Constructor for packed IRQ information. */ diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index a44c4dc..06f786f 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc, desc->d = info->seg_32bit; desc->g = info->limit_in_pages; desc->base2 = (info->base_addr & 0xff000000) >> 24; + /* + * Don't allow setting of the lm bit. It is useless anyway + * because 64bit system calls require __USER_CS: + */ + desc->l = 0; } extern struct desc_ptr idt_descr; @@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr) native_write_gdt_entry(dt, entry, desc, type) #define write_idt_entry(dt, entry, g) \ native_write_idt_entry(dt, entry, g) -#endif + +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ +} +#endif /* CONFIG_PARAVIRT */ static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93..db9b064 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -124,6 +124,9 @@ struct pv_cpu_ops { int entrynum, const void *desc, int size); void (*write_idt_entry)(gate_desc *, int entrynum, const gate_desc *gate); + void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries); + void (*free_ldt)(struct desc_struct *ldt, unsigned entries); + void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); void (*set_iopl_mask)(unsigned mask); @@ -824,6 +827,16 @@ do { \ (aux) = __aux; \ } while (0) +static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries); +} + +static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) +{ + PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries); +} + static inline void load_TR_desc(void) { PVOP_VCALL0(pv_cpu_ops.load_tr_desc); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index fdbbf72..7889c2f 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -176,7 +176,7 @@ extern int vsscanf(const char *, const char *, va_list) extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); -extern unsigned long long memparse(char *ptr, char **retptr); +extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); diff --git a/lib/cmdline.c b/lib/cmdline.c index 5ba8a94..f5f3ad8 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints) * megabyte, or one gigabyte, respectively. */ -unsigned long long memparse(char *ptr, char **retptr) +unsigned long long memparse(const char *ptr, char **retptr) { char *endptr; /* local pointer to end of parsed string */ |