diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 156 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 23 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 23 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 153 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 18 |
6 files changed, 346 insertions, 35 deletions
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c new file mode 100644 index 0000000..5ae620d --- /dev/null +++ b/virt/kvm/coalesced_mmio.c @@ -0,0 +1,156 @@ +/* + * KVM coalesced MMIO + * + * Copyright (c) 2008 Bull S.A.S. + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + * + */ + +#include "iodev.h" + +#include <linux/kvm_host.h> +#include <linux/kvm.h> + +#include "coalesced_mmio.h" + +static int coalesced_mmio_in_range(struct kvm_io_device *this, + gpa_t addr, int len, int is_write) +{ + struct kvm_coalesced_mmio_dev *dev = + (struct kvm_coalesced_mmio_dev*)this->private; + struct kvm_coalesced_mmio_zone *zone; + int next; + int i; + + if (!is_write) + return 0; + + /* kvm->lock is taken by the caller and must be not released before + * dev.read/write + */ + + /* Are we able to batch it ? */ + + /* last is the first free entry + * check if we don't meet the first used entry + * there is always one unused entry in the buffer + */ + + next = (dev->kvm->coalesced_mmio_ring->last + 1) % + KVM_COALESCED_MMIO_MAX; + if (next == dev->kvm->coalesced_mmio_ring->first) { + /* full */ + return 0; + } + + /* is it in a batchable area ? */ + + for (i = 0; i < dev->nb_zones; i++) { + zone = &dev->zone[i]; + + /* (addr,len) is fully included in + * (zone->addr, zone->size) + */ + + if (zone->addr <= addr && + addr + len <= zone->addr + zone->size) + return 1; + } + return 0; +} + +static void coalesced_mmio_write(struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + struct kvm_coalesced_mmio_dev *dev = + (struct kvm_coalesced_mmio_dev*)this->private; + struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + + /* kvm->lock must be taken by caller before call to in_range()*/ + + /* copy data in first free entry of the ring */ + + ring->coalesced_mmio[ring->last].phys_addr = addr; + ring->coalesced_mmio[ring->last].len = len; + memcpy(ring->coalesced_mmio[ring->last].data, val, len); + smp_wmb(); + ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; +} + +static void coalesced_mmio_destructor(struct kvm_io_device *this) +{ + kfree(this); +} + +int kvm_coalesced_mmio_init(struct kvm *kvm) +{ + struct kvm_coalesced_mmio_dev *dev; + + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + dev->dev.write = coalesced_mmio_write; + dev->dev.in_range = coalesced_mmio_in_range; + dev->dev.destructor = coalesced_mmio_destructor; + dev->dev.private = dev; + dev->kvm = kvm; + kvm->coalesced_mmio_dev = dev; + kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); + + return 0; +} + +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone) +{ + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + + if (dev == NULL) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { + mutex_unlock(&kvm->lock); + return -ENOBUFS; + } + + dev->zone[dev->nb_zones] = *zone; + dev->nb_zones++; + + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone) +{ + int i; + struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + struct kvm_coalesced_mmio_zone *z; + + if (dev == NULL) + return -EINVAL; + + mutex_lock(&kvm->lock); + + i = dev->nb_zones; + while(i) { + z = &dev->zone[i - 1]; + + /* unregister all zones + * included in (zone->addr, zone->size) + */ + + if (zone->addr <= z->addr && + z->addr + z->size <= zone->addr + zone->size) { + dev->nb_zones--; + *z = dev->zone[dev->nb_zones]; + } + i--; + } + + mutex_unlock(&kvm->lock); + + return 0; +} diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h new file mode 100644 index 0000000..5ac0ec6 --- /dev/null +++ b/virt/kvm/coalesced_mmio.h @@ -0,0 +1,23 @@ +/* + * KVM coalesced MMIO + * + * Copyright (c) 2008 Bull S.A.S. + * + * Author: Laurent Vivier <Laurent.Vivier@bull.net> + * + */ + +#define KVM_COALESCED_MMIO_ZONE_MAX 100 + +struct kvm_coalesced_mmio_dev { + struct kvm_io_device dev; + struct kvm *kvm; + int nb_zones; + struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; +}; + +int kvm_coalesced_mmio_init(struct kvm *kvm); +int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone); +int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, + struct kvm_coalesced_mmio_zone *zone); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 4458908..c0d2287 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -146,6 +146,11 @@ static int ioapic_inj_irq(struct kvm_ioapic *ioapic, return kvm_apic_set_irq(vcpu, vector, trig_mode); } +static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) +{ + kvm_inject_nmi(vcpu); +} + static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, u8 dest_mode) { @@ -239,8 +244,19 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) } } break; - - /* TODO: NMI */ + case IOAPIC_NMI: + for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { + if (!(deliver_bitmask & (1 << vcpu_id))) + continue; + deliver_bitmask &= ~(1 << vcpu_id); + vcpu = ioapic->kvm->vcpus[vcpu_id]; + if (vcpu) + ioapic_inj_nmi(vcpu); + else + ioapic_debug("NMI to vcpu %d failed\n", + vcpu->vcpu_id); + } + break; default: printk(KERN_WARNING "Unsupported delivery mode %d\n", delivery_mode); @@ -291,7 +307,8 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) __kvm_ioapic_update_eoi(ioapic, i); } -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) +static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, + int len, int is_write) { struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index c14e642..55e8846 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -27,7 +27,8 @@ struct kvm_io_device { gpa_t addr, int len, const void *val); - int (*in_range)(struct kvm_io_device *this, gpa_t addr); + int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, + int is_write); void (*destructor)(struct kvm_io_device *this); void *private; @@ -49,9 +50,10 @@ static inline void kvm_iodevice_write(struct kvm_io_device *dev, dev->write(dev, addr, len, val); } -static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr) +static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, + gpa_t addr, int len, int is_write) { - return dev->in_range(dev, addr); + return dev->in_range(dev, addr, len, is_write); } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d4eae6a..904d7b7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,6 +47,10 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#include "coalesced_mmio.h" +#endif + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -65,6 +69,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +bool kvm_rebooting; + static inline int valid_vcpu(int n) { return likely(n >= 0 && n < KVM_MAX_VCPUS); @@ -99,10 +105,11 @@ static void ack_flush(void *_completed) void kvm_flush_remote_tlbs(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -111,21 +118,24 @@ void kvm_flush_remote_tlbs(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; ++kvm->stat.remote_tlb_flush; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } void kvm_reload_remote_mmus(struct kvm *kvm) { - int i, cpu; + int i, cpu, me; cpumask_t cpus; struct kvm_vcpu *vcpu; + me = get_cpu(); cpus_clear(cpus); for (i = 0; i < KVM_MAX_VCPUS; ++i) { vcpu = kvm->vcpus[i]; @@ -134,12 +144,14 @@ void kvm_reload_remote_mmus(struct kvm *kvm) if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) continue; cpu = vcpu->cpu; - if (cpu != -1 && cpu != raw_smp_processor_id()) + if (cpu != -1 && cpu != me) cpu_set(cpu, cpus); } if (cpus_empty(cpus)) - return; + goto out; smp_call_function_mask(cpus, ack_flush, NULL, 1); +out: + put_cpu(); } @@ -183,10 +195,23 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_uninit); static struct kvm *kvm_create_vm(void) { struct kvm *kvm = kvm_arch_create_vm(); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + struct page *page; +#endif if (IS_ERR(kvm)) goto out; +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) { + kfree(kvm); + return ERR_PTR(-ENOMEM); + } + kvm->coalesced_mmio_ring = + (struct kvm_coalesced_mmio_ring *)page_address(page); +#endif + kvm->mm = current->mm; atomic_inc(&kvm->mm->mm_count); spin_lock_init(&kvm->mmu_lock); @@ -198,6 +223,9 @@ static struct kvm *kvm_create_vm(void) spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + kvm_coalesced_mmio_init(kvm); +#endif out: return kvm; } @@ -240,6 +268,10 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_io_bus_destroy(&kvm->pio_bus); kvm_io_bus_destroy(&kvm->mmio_bus); +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + if (kvm->coalesced_mmio_ring != NULL) + free_page((unsigned long)kvm->coalesced_mmio_ring); +#endif kvm_arch_destroy_vm(kvm); mmdrop(mm); } @@ -333,6 +365,7 @@ int __kvm_set_memory_region(struct kvm *kvm, r = -ENOMEM; /* Allocate if a slot is being created */ +#ifndef CONFIG_S390 if (npages && !new.rmap) { new.rmap = vmalloc(npages * sizeof(struct page *)); @@ -373,10 +406,14 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out_free; memset(new.dirty_bitmap, 0, dirty_bytes); } +#endif /* not defined CONFIG_S390 */ if (mem->slot >= kvm->nmemslots) kvm->nmemslots = mem->slot + 1; + if (!npages) + kvm_arch_flush_shadow(kvm); + *memslot = new; r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc); @@ -532,6 +569,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) struct page *page[1]; unsigned long addr; int npages; + pfn_t pfn; might_sleep(); @@ -544,19 +582,38 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn) npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page, NULL); - if (npages != 1) { - get_page(bad_page); - return page_to_pfn(bad_page); - } + if (unlikely(npages != 1)) { + struct vm_area_struct *vma; - return page_to_pfn(page[0]); + vma = find_vma(current->mm, addr); + if (vma == NULL || addr < vma->vm_start || + !(vma->vm_flags & VM_PFNMAP)) { + get_page(bad_page); + return page_to_pfn(bad_page); + } + + pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + BUG_ON(pfn_valid(pfn)); + } else + pfn = page_to_pfn(page[0]); + + return pfn; } EXPORT_SYMBOL_GPL(gfn_to_pfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) { - return pfn_to_page(gfn_to_pfn(kvm, gfn)); + pfn_t pfn; + + pfn = gfn_to_pfn(kvm, gfn); + if (pfn_valid(pfn)) + return pfn_to_page(pfn); + + WARN_ON(!pfn_valid(pfn)); + + get_page(bad_page); + return bad_page; } EXPORT_SYMBOL_GPL(gfn_to_page); @@ -569,7 +626,8 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean); void kvm_release_pfn_clean(pfn_t pfn) { - put_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + put_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_release_pfn_clean); @@ -594,21 +652,25 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { - struct page *page = pfn_to_page(pfn); - if (!PageReserved(page)) - SetPageDirty(page); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) + SetPageDirty(page); + } } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(pfn_t pfn) { - mark_page_accessed(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); void kvm_get_pfn(pfn_t pfn) { - get_page(pfn_to_page(pfn)); + if (pfn_valid(pfn)) + get_page(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_get_pfn); @@ -799,6 +861,10 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) else if (vmf->pgoff == KVM_PIO_PAGE_OFFSET) page = virt_to_page(vcpu->arch.pio_data); #endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + else if (vmf->pgoff == KVM_COALESCED_MMIO_PAGE_OFFSET) + page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); +#endif else return VM_FAULT_SIGBUS; get_page(page); @@ -1121,6 +1187,32 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + case KVM_REGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } + case KVM_UNREGISTER_COALESCED_MMIO: { + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; + if (copy_from_user(&zone, argp, sizeof zone)) + goto out; + r = -ENXIO; + r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); + if (r) + goto out; + r = 0; + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -1179,7 +1271,6 @@ static int kvm_dev_ioctl_create_vm(void) static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - void __user *argp = (void __user *)arg; long r = -EINVAL; switch (ioctl) { @@ -1196,7 +1287,7 @@ static long kvm_dev_ioctl(struct file *filp, r = kvm_dev_ioctl_create_vm(); break; case KVM_CHECK_EXTENSION: - r = kvm_dev_ioctl_check_extension((long)argp); + r = kvm_dev_ioctl_check_extension(arg); break; case KVM_GET_VCPU_MMAP_SIZE: r = -EINVAL; @@ -1206,6 +1297,9 @@ static long kvm_dev_ioctl(struct file *filp, #ifdef CONFIG_X86 r += PAGE_SIZE; /* pio data page */ #endif +#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET + r += PAGE_SIZE; /* coalesced mmio ring page */ +#endif break; case KVM_TRACE_ENABLE: case KVM_TRACE_PAUSE: @@ -1247,7 +1341,6 @@ static void hardware_disable(void *junk) if (!cpu_isset(cpu, cpus_hardware_enabled)) return; cpu_clear(cpu, cpus_hardware_enabled); - decache_vcpus_on_cpu(cpu); kvm_arch_hardware_disable(NULL); } @@ -1277,6 +1370,18 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, return NOTIFY_OK; } + +asmlinkage void kvm_handle_fault_on_reboot(void) +{ + if (kvm_rebooting) + /* spin while reset goes on */ + while (true) + ; + /* Fault while not rebooting. We want the trace. */ + BUG(); +} +EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); + static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { @@ -1286,6 +1391,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * in vmx root mode. */ printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + kvm_rebooting = true; on_each_cpu(hardware_disable, NULL, 1); } return NOTIFY_OK; @@ -1312,14 +1418,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) } } -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr) +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len, int is_write) { int i; for (i = 0; i < bus->dev_count; i++) { struct kvm_io_device *pos = bus->devs[i]; - if (pos->in_range(pos, addr)) + if (pos->in_range(pos, addr, len, is_write)) return pos; } diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c index 0e49547..58141f3 100644 --- a/virt/kvm/kvm_trace.c +++ b/virt/kvm/kvm_trace.c @@ -72,11 +72,7 @@ static void kvm_add_trace(void *probe_private, void *call_data, rec.cycle_in = p->cycle_in; if (rec.cycle_in) { - u64 cycle = 0; - - cycle = get_cycles(); - rec.u.cycle.cycle_lo = (u32)cycle; - rec.u.cycle.cycle_hi = (u32)(cycle >> 32); + rec.u.cycle.cycle_u64 = get_cycles(); for (i = 0; i < rec.extra_u32; i++) rec.u.cycle.extra_u32[i] = va_arg(*args, u32); @@ -114,8 +110,18 @@ static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, { struct kvm_trace *kt; - if (!relay_buf_full(buf)) + if (!relay_buf_full(buf)) { + if (!prev_subbuf) { + /* + * executed only once when the channel is opened + * save metadata as first record + */ + subbuf_start_reserve(buf, sizeof(u32)); + *(u32 *)subbuf = 0x12345678; + } + return 1; + } kt = buf->chan->private_data; atomic_inc(&kt->lost_records); |