diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-01-25 19:02:31 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-01-25 19:02:31 -0800 |
commit | 486d35e2220acfe45d85131c557d94fe889184a2 (patch) | |
tree | 6be42a8a0d82e7e09bb4ac05edcbdb96adf650dc | |
parent | a8d0b6666ecfe14226f1e46d693d5e2cde072337 (diff) | |
parent | 443c39bc9ef7d8f648408d74c97e943f3bb3f48a (diff) | |
download | op-kernel-dev-486d35e2220acfe45d85131c557d94fe889184a2.zip op-kernel-dev-486d35e2220acfe45d85131c557d94fe889184a2.tar.gz |
Merge branch 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm
* 'kvm-updates/2.6.33' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: x86: Fix leak of free lapic date in kvm_arch_vcpu_init()
KVM: x86: Fix probable memory leak of vcpu->arch.mce_banks
KVM: S390: fix potential array overrun in intercept handling
KVM: fix spurious interrupt with irqfd
eventfd - allow atomic read and waitqueue remove
KVM: MMU: bail out pagewalk on kvm_read_guest error
KVM: properly check max PIC pin in irq route setup
KVM: only allow one gsi per fd
KVM: x86: Fix host_mapping_level()
KVM: powerpc: Show timing option only on embedded
KVM: Fix race between APIC TMR and IRR
-rw-r--r-- | arch/powerpc/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 6 | ||||
-rw-r--r-- | fs/eventfd.c | 89 | ||||
-rw-r--r-- | include/linux/eventfd.h | 16 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 18 | ||||
-rw-r--r-- | virt/kvm/irq_comm.c | 6 |
10 files changed, 128 insertions, 34 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 07703f7..6fb6e8a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -53,7 +53,7 @@ config KVM_440 config KVM_EXIT_TIMING bool "Detailed exit timing" - depends on KVM + depends on KVM_440 || KVM_E500 ---help--- Calculate elapsed time for every exit/enter cycle. A per-vcpu report is available in debugfs kvm/vm#_vcpu#_timing. diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index ba9d8a7..b400964 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -213,7 +213,7 @@ static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) return rc2; } -static const intercept_handler_t intercept_funcs[0x48 >> 2] = { +static const intercept_handler_t intercept_funcs[] = { [0x00 >> 2] = handle_noop, [0x04 >> 2] = handle_instruction, [0x08 >> 2] = handle_prog, @@ -230,7 +230,7 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) intercept_handler_t func; u8 code = vcpu->arch.sie_block->icptcode; - if (code & 3 || code > 0x48) + if (code & 3 || (code >> 2) >= ARRAY_SIZE(intercept_funcs)) return -ENOTSUPP; func = intercept_funcs[code >> 2]; if (func) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3063a0c..ba8c045 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -373,6 +373,12 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, if (unlikely(!apic_enabled(apic))) break; + if (trig_mode) { + apic_debug("level trig mode for vector %d", vector); + apic_set_vector(vector, apic->regs + APIC_TMR); + } else + apic_clear_vector(vector, apic->regs + APIC_TMR); + result = !apic_test_and_set_irr(vector, apic); trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, trig_mode, vector, !result); @@ -383,11 +389,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; } - if (trig_mode) { - apic_debug("level trig mode for vector %d", vector); - apic_set_vector(vector, apic->regs + APIC_TMR); - } else - apic_clear_vector(vector, apic->regs + APIC_TMR); kvm_vcpu_kick(vcpu); break; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4c3e5b2..89a49fb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -477,7 +477,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) - return page_size; + return PT_PAGE_TABLE_LEVEL; down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, addr); @@ -515,11 +515,9 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) { - + for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) break; - } return level - 1; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 58a0f1e..ede2131 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -150,7 +150,9 @@ walk: walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte)); + if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) + goto not_present; + trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6651dbf..1ddcad4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5072,12 +5072,13 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) GFP_KERNEL); if (!vcpu->arch.mce_banks) { r = -ENOMEM; - goto fail_mmu_destroy; + goto fail_free_lapic; } vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; return 0; - +fail_free_lapic: + kvm_free_lapic(vcpu); fail_mmu_destroy: kvm_mmu_destroy(vcpu); fail_free_pio_data: @@ -5088,6 +5089,7 @@ fail: void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { + kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); down_read(&vcpu->kvm->slots_lock); kvm_mmu_destroy(vcpu); diff --git a/fs/eventfd.c b/fs/eventfd.c index d26402f..7758cc3 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait) return events; } -static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +{ + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; + ctx->count -= *cnt; +} + +/** + * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. + * @ctx: [in] Pointer to eventfd context. + * @wait: [in] Wait queue to be removed. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked. + * + * This is used to atomically remove a wait queue entry from the eventfd wait + * queue head, and read/reset the counter value. + */ +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt) +{ + unsigned long flags; + + spin_lock_irqsave(&ctx->wqh.lock, flags); + eventfd_ctx_do_read(ctx, cnt); + __remove_wait_queue(&ctx->wqh, wait); + if (*cnt != 0 && waitqueue_active(&ctx->wqh)) + wake_up_locked_poll(&ctx->wqh, POLLOUT); + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + + return *cnt != 0 ? 0 : -EAGAIN; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue); + +/** + * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero. + * @ctx: [in] Pointer to eventfd context. + * @no_wait: [in] Different from zero if the operation should not block. + * @cnt: [out] Pointer to the 64bit conter value. + * + * Returns zero if successful, or the following error codes: + * + * -EAGAIN : The operation would have blocked but @no_wait was nonzero. + * -ERESTARTSYS : A signal interrupted the wait operation. + * + * If @no_wait is zero, the function might sleep until the eventfd internal + * counter becomes greater than zero. + */ +ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt) { - struct eventfd_ctx *ctx = file->private_data; ssize_t res; - __u64 ucnt = 0; DECLARE_WAITQUEUE(wait, current); - if (count < sizeof(ucnt)) - return -EINVAL; spin_lock_irq(&ctx->wqh.lock); + *cnt = 0; res = -EAGAIN; if (ctx->count > 0) - res = sizeof(ucnt); - else if (!(file->f_flags & O_NONBLOCK)) { + res = 0; + else if (!no_wait) { __add_wait_queue(&ctx->wqh, &wait); - for (res = 0;;) { + for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (ctx->count > 0) { - res = sizeof(ucnt); + res = 0; break; } if (signal_pending(current)) { @@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, __remove_wait_queue(&ctx->wqh, &wait); __set_current_state(TASK_RUNNING); } - if (likely(res > 0)) { - ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; - ctx->count -= ucnt; + if (likely(res == 0)) { + eventfd_ctx_do_read(ctx, cnt); if (waitqueue_active(&ctx->wqh)) wake_up_locked_poll(&ctx->wqh, POLLOUT); } spin_unlock_irq(&ctx->wqh.lock); - if (res > 0 && put_user(ucnt, (__u64 __user *) buf)) - return -EFAULT; return res; } +EXPORT_SYMBOL_GPL(eventfd_ctx_read); + +static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct eventfd_ctx *ctx = file->private_data; + ssize_t res; + __u64 cnt; + + if (count < sizeof(cnt)) + return -EINVAL; + res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt); + if (res < 0) + return res; + + return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt); +} static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 94dd103..91bb4f2 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -10,6 +10,7 @@ #include <linux/fcntl.h> #include <linux/file.h> +#include <linux/wait.h> /* * CAREFUL: Check include/asm-generic/fcntl.h when defining @@ -34,6 +35,9 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); int eventfd_signal(struct eventfd_ctx *ctx, int n); +ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); +int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait, + __u64 *cnt); #else /* CONFIG_EVENTFD */ @@ -61,6 +65,18 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) } +static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, + __u64 *cnt) +{ + return -ENOSYS; +} + +static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, + wait_queue_t *wait, __u64 *cnt) +{ + return -ENOSYS; +} + #endif #endif /* _LINUX_EVENTFD_H */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 30f70fd..a9d3fc6 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -72,12 +72,13 @@ static void irqfd_shutdown(struct work_struct *work) { struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + u64 cnt; /* * Synchronize with the wait-queue and unhook ourselves to prevent * further events. */ - remove_wait_queue(irqfd->wqh, &irqfd->wait); + eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt); /* * We know no new events will be scheduled at this point, so block @@ -166,7 +167,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, static int kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) { - struct _irqfd *irqfd; + struct _irqfd *irqfd, *tmp; struct file *file = NULL; struct eventfd_ctx *eventfd = NULL; int ret; @@ -203,9 +204,20 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup); init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc); + spin_lock_irq(&kvm->irqfds.lock); + + ret = 0; + list_for_each_entry(tmp, &kvm->irqfds.items, list) { + if (irqfd->eventfd != tmp->eventfd) + continue; + /* This fd is used for another irq already. */ + ret = -EBUSY; + spin_unlock_irq(&kvm->irqfds.lock); + goto fail; + } + events = file->f_op->poll(file, &irqfd->pt); - spin_lock_irq(&kvm->irqfds.lock); list_add_tail(&irqfd->list, &kvm->irqfds.items); spin_unlock_irq(&kvm->irqfds.lock); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9b07734..9fd5b3e 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -302,6 +302,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, { int r = -EINVAL; int delta; + unsigned max_pin; struct kvm_kernel_irq_routing_entry *ei; struct hlist_node *n; @@ -322,12 +323,15 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_MASTER: e->set = kvm_set_pic_irq; + max_pin = 16; break; case KVM_IRQCHIP_PIC_SLAVE: e->set = kvm_set_pic_irq; + max_pin = 16; delta = 8; break; case KVM_IRQCHIP_IOAPIC: + max_pin = KVM_IOAPIC_NUM_PINS; e->set = kvm_set_ioapic_irq; break; default: @@ -335,7 +339,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, } e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; - if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) + if (e->irqchip.pin >= max_pin) goto out; rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; |