From af585b921e5d1e919947c4b1164b59507fe7cd7b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:46 +0200 Subject: KVM: Halt vcpu if page it tries to access is swapped out If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. [avi: remove call to get_user_pages_noio(), nacked by Linus; this makes everything synchrnous again] Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f702f82..b5f4c1a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -83,11 +83,14 @@ #define KVM_NR_FIXED_MTRR_REGION 88 #define KVM_NR_VAR_MTRR 8 +#define ASYNC_PF_PER_VCPU 64 + extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_vcpu; struct kvm; +struct kvm_async_pf; enum kvm_reg { VCPU_REGS_RAX = 0, @@ -412,6 +415,11 @@ struct kvm_vcpu_arch { u64 hv_vapic; cpumask_var_t wbinvd_dirty_mask; + + struct { + bool halted; + gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + } apf; }; struct kvm_arch { @@ -585,6 +593,10 @@ struct kvm_x86_ops { const struct trace_print_flags *exit_reasons_str; }; +struct kvm_arch_async_pf { + gfn_t gfn; +}; + extern struct kvm_x86_ops *kvm_x86_ops; int kvm_mmu_module_init(void); @@ -799,4 +811,10 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); +void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); +extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); + #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.1 From 56028d0861e48f7cc9c573d79f2d8a0a933a2bba Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 17 Oct 2010 18:13:42 +0200 Subject: KVM: Retry fault before vmentry When page is swapped in it is mapped into guest memory only after guest tries to access it again and generate another fault. To save this fault we can map it immediately since we know that guest is going to access the page. Do it only when tdp is enabled for now. Shadow paging case is more complicated. CR[034] and EFER registers should be switched before doing mapping and then switched back. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b5f4c1a..c3076bc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -241,7 +241,7 @@ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool no_apf); void (*inject_page_fault)(struct kvm_vcpu *vcpu); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, @@ -815,6 +815,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, + struct kvm_async_pf *work); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.1 From ca3f10172eea9b95bbb66487656f3c3e93855702 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:49 +0200 Subject: KVM paravirt: Move kvm_smp_prepare_boot_cpu() from kvmclock.c to kvm.c. Async PF also needs to hook into smp_prepare_boot_cpu so move the hook into generic code. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 7b562b6..e3faaaf 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,7 @@ struct kvm_mmu_op_release_pt { #include extern void kvmclock_init(void); +extern int kvm_register_clock(char *txt); /* This instruction is vmcall. On non-VT architectures, it will generate a -- cgit v1.1 From 344d9588a9df06182684168be4f1408b55c7da3e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:50 +0200 Subject: KVM: Add PV MSR to enable asynchronous page faults delivery. Guest enables async PF vcpu functionality using this MSR. Reviewed-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/asm/kvm_para.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c3076bc..0d70398 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -419,6 +419,8 @@ struct kvm_vcpu_arch { struct { bool halted; gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + struct gfn_to_hva_cache data; + u64 msr_val; } apf; }; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e3faaaf..8662ae0 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -20,6 +20,7 @@ * are available. The use of 0x11 and 0x12 is deprecated */ #define KVM_FEATURE_CLOCKSOURCE2 3 +#define KVM_FEATURE_ASYNC_PF 4 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -32,9 +33,12 @@ /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 +#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define KVM_MAX_MMU_OP_BATCH 32 +#define KVM_ASYNC_PF_ENABLED (1 << 0) + /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 #define KVM_MMU_OP_FLUSH_TLB 2 -- cgit v1.1 From fd10cde9294f73eeccbc16f3fec1ae6cde7b800c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:51 +0200 Subject: KVM paravirt: Add async PF initialization to PV guest. Enable async PF in a guest if async PF capability is discovered. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 8662ae0..2315398 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,12 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +struct kvm_vcpu_pv_apf_data { + __u32 reason; + __u8 pad[60]; + __u32 enabled; +}; + #ifdef __KERNEL__ #include -- cgit v1.1 From 631bc4878220932fe67fc46fc7cf7cccdb1ec597 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:52 +0200 Subject: KVM: Handle async PF in a guest. When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 12 ++++++++++++ arch/x86/include/asm/traps.h | 1 + 2 files changed, 13 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2315398..fbfd367 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u8 pad[60]; @@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda5..0310da6 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); -- cgit v1.1 From 7c90705bf2a373aa238661bdb6446f27299ef489 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:53 +0200 Subject: KVM: Inject asynchronous page fault into a PV guest if page is swapped out. Send async page fault to a PV guest if it accesses swapped out memory. Guest will choose another task to run upon receiving the fault. Allow async page fault injection only when guest is in user mode since otherwise guest may be in non-sleepable context and will not be able to reschedule. Vcpu will be halted if guest will fault on the same page again or if vcpu executes kernel code. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0d70398..167375c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -421,6 +421,7 @@ struct kvm_vcpu_arch { gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; struct gfn_to_hva_cache data; u64 msr_val; + u32 id; } apf; }; @@ -596,6 +597,7 @@ struct kvm_x86_ops { }; struct kvm_arch_async_pf { + u32 token; gfn_t gfn; }; @@ -819,6 +821,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.1 From 6adba527420651b6cacaf392541c09fb108711a2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:55 +0200 Subject: KVM: Let host know whether the guest can handle async PF in non-userspace context. If guest can detect that it runs in non-preemptable context it can handle async PFs at any time, so let host know that it can send async PF even if guest cpu is not in userspace. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/kvm_para.h | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 167375c..b2ea428 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; u64 msr_val; u32 id; + bool send_user_only; } apf; }; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index fbfd367..d3a1a48 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -38,6 +38,7 @@ #define KVM_MAX_MMU_OP_BATCH 32 #define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 -- cgit v1.1 From d4c90b0043bdb40a6f340d34b2ac1861040de88c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 20 Oct 2010 18:34:54 +0200 Subject: KVM: x86: Add missing inline tag to kvm_read_and_reset_pf_reason May otherwise generates build warnings about unused kvm_read_and_reset_pf_reason if included without CONFIG_KVM_GUEST enabled. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index d3a1a48..a427bf7 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -182,7 +182,7 @@ u32 kvm_read_and_reset_pf_reason(void); #define kvm_guest_init() do { } while (0) #define kvm_async_pf_task_wait(T) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) -static u32 kvm_read_and_reset_pf_reason(void) +static inline u32 kvm_read_and_reset_pf_reason(void) { return 0; } -- cgit v1.1 From ec25d5e66ee152e371fd7046f3f8441859579aea Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 1 Nov 2010 15:35:01 +0200 Subject: KVM: handle exit due to INVD in VMX Currently the exit is unhandled, so guest halts with error if it tries to execute INVD instruction. Call into emulator when INVD instruction is executed by a guest instead. This instruction is not needed by ordinary guests, but firmware (like OpenBIOS) use it and fail. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9f0cbd9..42d95905 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -239,6 +239,7 @@ enum vmcs_field { #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 #define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 #define EXIT_REASON_INVLPG 14 #define EXIT_REASON_RDPMC 15 #define EXIT_REASON_RDTSC 16 -- cgit v1.1 From 2a126faafb840e9a1e46514127cdb88ed998bd64 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 4 Nov 2010 18:29:42 +0800 Subject: KVM: remove unused function declaration Remove the declaration of kvm_mmu_set_base_ptes() Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b2ea428..116dac5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -611,7 +611,6 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); -void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask); -- cgit v1.1 From c4806acdcec020fe5bbb054ce9dc75aaecaf29dd Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 12 Nov 2010 14:49:55 +0800 Subject: KVM: MMU: fix apf prefault if nested guest is enabled If apf is generated in L2 guest and is completed in L1 guest, it will prefault this apf in L1 guest's mmu context. Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 116dac5..f1e8d5b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -600,6 +600,7 @@ struct kvm_x86_ops { struct kvm_arch_async_pf { u32 token; gfn_t gfn; + bool direct_map; }; extern struct kvm_x86_ops *kvm_x86_ops; -- cgit v1.1 From 90de84f50b425805bf7ddc430143ed2e224ebd8e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 17 Nov 2010 15:28:21 +0200 Subject: KVM: x86 emulator: preserve an operand's segment identity Currently the x86 emulator converts the segment register associated with an operand into a segment base which is added into the operand address. This loss of information results in us not doing segment limit checks properly. Replace struct operand's addr.mem field by a segmented_address structure which holds both the effetive address and segment. This will allow us to do the limit check at the point of access. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b36c6b3..b48c133 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -159,7 +159,10 @@ struct operand { }; union { unsigned long *reg; - unsigned long mem; + struct segmented_address { + ulong ea; + unsigned seg; + } mem; } addr; union { unsigned long val; -- cgit v1.1 From 586f9607962cd982293759a4e95ff06e75be5225 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 18 Nov 2010 13:09:54 +0200 Subject: KVM: Add instruction-set-specific exit qualifications to kvm_exit trace The exit reason alone is insufficient to understand exactly why an exit occured; add ISA-specific trace parameters for additional information. Because fetching these parameters is expensive on vmx, and because these parameters are fetched even if tracing is disabled, we fetch the parameters via a callback instead of as traditional trace arguments. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f1e8d5b..3cc80c4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -594,6 +594,7 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); const struct trace_print_flags *exit_reasons_str; }; -- cgit v1.1 From a4a8e6f76ecf963fa7e4d74b3635655a2033a27b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Fri, 19 Nov 2010 17:04:03 +0800 Subject: KVM: MMU: remove 'clear_unsync' parameter Remove it since we can judge it by using sp->unsync Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3cc80c4..1452478 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -250,7 +250,7 @@ struct kvm_mmu { void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); int (*sync_page)(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp, bool clear_unsync); + struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); hpa_t root_hpa; int root_level; -- cgit v1.1 From da9cb575b1127f84984b8ad6d973dcc05ac036dd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 22 Nov 2010 17:53:21 +0200 Subject: KVM: x86 emulator: introduce struct x86_exception to communicate faults Introduce a structure that can contain an exception to be passed back to main kvm code. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b48c133..b7c1127 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -15,6 +15,12 @@ struct x86_emulate_ctxt; +struct x86_exception { + u8 vector; + bool error_code_valid; + u16 error_code; +}; + /* * x86_emulate_ops: * @@ -229,9 +235,8 @@ struct x86_emulate_ctxt { bool perm_ok; /* do not check permissions if true */ - int exception; /* exception that happens during emulation or -1 */ - u32 error_code; /* error code for exception */ - bool error_code_valid; + bool have_exception; + struct x86_exception exception; /* decode cache */ struct decode_cache decode; -- cgit v1.1 From bcc55cba9f1fcda68412c8c3d8579c56d90b16f2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 22 Nov 2010 17:53:22 +0200 Subject: KVM: x86 emulator: make emulator memory callbacks return full exception This way, they can return #GP, not just #PF. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b7c1127..87d017e 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -70,7 +70,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to read from memory. */ int (*read_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * write_std: Write bytes of standard (non-emulated/special) memory. @@ -80,7 +81,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to write to memory. */ int (*write_std)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. @@ -89,7 +91,8 @@ struct x86_emulate_ops { * @bytes: [IN ] Number of bytes to read from memory. */ int (*fetch)(unsigned long addr, void *val, - unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + unsigned int bytes, struct kvm_vcpu *vcpu, + struct x86_exception *fault); /* * read_emulated: Read bytes from emulated/special memory area. @@ -100,7 +103,7 @@ struct x86_emulate_ops { int (*read_emulated)(unsigned long addr, void *val, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); /* @@ -113,7 +116,7 @@ struct x86_emulate_ops { int (*write_emulated)(unsigned long addr, const void *val, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); /* @@ -128,7 +131,7 @@ struct x86_emulate_ops { const void *old, const void *new, unsigned int bytes, - unsigned int *error, + struct x86_exception *fault, struct kvm_vcpu *vcpu); int (*pio_in_emulated)(int size, unsigned short port, void *val, -- cgit v1.1 From ab9ae3138789afacd133a9c4b3d7a3f1578e25c7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 22 Nov 2010 17:53:26 +0200 Subject: KVM: Push struct x86_exception info the various gva_to_gpa variants Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1452478..9980a24 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -245,7 +245,7 @@ struct kvm_mmu { void (*inject_page_fault)(struct kvm_vcpu *vcpu); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, - u32 *error); + struct x86_exception *exception); gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); @@ -708,10 +708,14 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); -gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error); +gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); +gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, + struct x86_exception *exception); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); -- cgit v1.1 From 6389ee946303cb4313dba0a49865e495a53351ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 29 Nov 2010 16:12:30 +0200 Subject: KVM: Pull extra page fault information into struct x86_exception Currently page fault cr2 and nesting infomation are carried outside the fault data structure. Instead they are placed in the vcpu struct, which results in confusion as global variables are manipulated instead of passing parameters. Fix this issue by adding address and nested fields to struct x86_exception, so this struct can carry all information associated with a fault. Signed-off-by: Avi Kivity Tested-by: Joerg Roedel Tested-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 2 ++ arch/x86/include/asm/kvm_host.h | 17 ++++------------- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 87d017e..bf70ece 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -19,6 +19,8 @@ struct x86_exception { u8 vector; bool error_code_valid; u16 error_code; + bool nested_page_fault; + u64 address; /* cr2 or nested page fault gpa */ }; /* diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9980a24..0c0941d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -242,7 +242,8 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool no_apf); - void (*inject_page_fault)(struct kvm_vcpu *vcpu); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + struct x86_exception *fault); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, struct x86_exception *exception); @@ -318,16 +319,6 @@ struct kvm_vcpu_arch { */ struct kvm_mmu *walk_mmu; - /* - * This struct is filled with the necessary information to propagate a - * page fault into the guest - */ - struct { - u64 address; - unsigned error_code; - bool nested; - } fault; - /* only needed in kvm_pv_mmu_op() path, but it's hot so * put it here to avoid allocation */ struct kvm_pv_mmu_op_buffer mmu_op_buffer; @@ -686,11 +677,11 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); -void kvm_inject_page_fault(struct kvm_vcpu *vcpu); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); -void kvm_propagate_fault(struct kvm_vcpu *vcpu); +void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); int kvm_pic_set_irq(void *opaque, int irq, int level); -- cgit v1.1 From ec9e60b21977007e3dfacc2b8fe3a8fbb9276b51 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 29 Nov 2010 17:51:47 +0100 Subject: KVM: X86: Introduce generic guest-mode representation This patch introduces a generic representation of guest-mode fpr a vcpu. This currently only exists in the SVM code. Having this representation generic will help making the non-svm code aware of nesting when this is necessary. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0c0941d..56e45a2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -779,6 +779,7 @@ enum { #define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) +#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ /* * Hardware virtualization extension instructions may fault if a -- cgit v1.1 From 4ee546b434504a618eac40421e595c68e494da9f Mon Sep 17 00:00:00 2001 From: "Roedel, Joerg" Date: Fri, 3 Dec 2010 10:50:51 +0100 Subject: KVM: SVM: Add manipulation functions for CRx intercepts This patch wraps changes to the CRx intercepts of SVM into seperate functions to abstract nested-svm better and prepare the implementation of the vmcb-clean-bits feature. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0e83105..39f9ddf 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -51,8 +51,7 @@ enum { struct __attribute__ ((__packed__)) vmcb_control_area { - u16 intercept_cr_read; - u16 intercept_cr_write; + u32 intercept_cr; u16 intercept_dr_read; u16 intercept_dr_write; u32 intercept_exceptions; @@ -204,10 +203,14 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK #define SVM_SELECTOR_CODE_MASK (1 << 3) -#define INTERCEPT_CR0_MASK 1 -#define INTERCEPT_CR3_MASK (1 << 3) -#define INTERCEPT_CR4_MASK (1 << 4) -#define INTERCEPT_CR8_MASK (1 << 8) +#define INTERCEPT_CR0_READ 0 +#define INTERCEPT_CR3_READ 3 +#define INTERCEPT_CR4_READ 4 +#define INTERCEPT_CR8_READ 8 +#define INTERCEPT_CR0_WRITE (16 + 0) +#define INTERCEPT_CR3_WRITE (16 + 3) +#define INTERCEPT_CR4_WRITE (16 + 4) +#define INTERCEPT_CR8_WRITE (16 + 8) #define INTERCEPT_DR0_MASK 1 #define INTERCEPT_DR1_MASK (1 << 1) -- cgit v1.1 From 3aed041a4c1b78cac87db76cf264b081df64dd37 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 30 Nov 2010 18:03:58 +0100 Subject: KVM: SVM: Add manipulation functions for DRx intercepts This patch wraps changes to the DRx intercepts of SVM into seperate functions to abstract nested-svm better and prepare the implementation of the vmcb-clean-bits feature. Signed-off-by: Joerg Roedel Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 39f9ddf..11dbca7 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -52,8 +52,7 @@ enum { struct __attribute__ ((__packed__)) vmcb_control_area { u32 intercept_cr; - u16 intercept_dr_read; - u16 intercept_dr_write; + u32 intercept_dr; u32 intercept_exceptions; u64 intercept; u8 reserved_1[42]; @@ -212,14 +211,22 @@ struct __attribute__ ((__packed__)) vmcb { #define INTERCEPT_CR4_WRITE (16 + 4) #define INTERCEPT_CR8_WRITE (16 + 8) -#define INTERCEPT_DR0_MASK 1 -#define INTERCEPT_DR1_MASK (1 << 1) -#define INTERCEPT_DR2_MASK (1 << 2) -#define INTERCEPT_DR3_MASK (1 << 3) -#define INTERCEPT_DR4_MASK (1 << 4) -#define INTERCEPT_DR5_MASK (1 << 5) -#define INTERCEPT_DR6_MASK (1 << 6) -#define INTERCEPT_DR7_MASK (1 << 7) +#define INTERCEPT_DR0_READ 0 +#define INTERCEPT_DR1_READ 1 +#define INTERCEPT_DR2_READ 2 +#define INTERCEPT_DR3_READ 3 +#define INTERCEPT_DR4_READ 4 +#define INTERCEPT_DR5_READ 5 +#define INTERCEPT_DR6_READ 6 +#define INTERCEPT_DR7_READ 7 +#define INTERCEPT_DR0_WRITE (16 + 0) +#define INTERCEPT_DR1_WRITE (16 + 1) +#define INTERCEPT_DR2_WRITE (16 + 2) +#define INTERCEPT_DR3_WRITE (16 + 3) +#define INTERCEPT_DR4_WRITE (16 + 4) +#define INTERCEPT_DR5_WRITE (16 + 5) +#define INTERCEPT_DR6_WRITE (16 + 6) +#define INTERCEPT_DR7_WRITE (16 + 7) #define SVM_EVTINJ_VEC_MASK 0xff -- cgit v1.1 From b7c4145ba2eb0717db0ddac1b5f7f48012189c53 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 2 Dec 2010 17:52:50 +0200 Subject: KVM: Don't spin on virt instruction faults during reboot Since vmx blocks INIT signals, we disable virtualization extensions during reboot. This leads to virtualization instructions faulting; we trap these faults and spin while the reboot continues. Unfortunately spinning on a non-preemptible kernel may block a task that reboot depends on; this causes the reboot to hang. Fix by skipping over the instruction and hoping for the best. Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 56e45a2..d968cc5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -786,14 +786,18 @@ enum { * reboot turns off virtualization while processes are running. * Trap the fault and ignore the instruction if that happens. */ -asmlinkage void kvm_handle_fault_on_reboot(void); +asmlinkage void kvm_spurious_fault(void); +extern bool kvm_rebooting; #define __kvm_handle_fault_on_reboot(insn) \ "666: " insn "\n\t" \ + "668: \n\t" \ ".pushsection .fixup, \"ax\" \n" \ "667: \n\t" \ + "cmpb $0, kvm_rebooting \n\t" \ + "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ - "jmp kvm_handle_fault_on_reboot \n\t" \ + "call kvm_spurious_fault \n\t" \ ".popsection \n\t" \ ".pushsection __ex_table, \"a\" \n\t" \ _ASM_PTR " 666b, 667b \n\t" \ -- cgit v1.1 From 8d28fec406e4d5ce6c109fe12699976e72e9748e Mon Sep 17 00:00:00 2001 From: "Roedel, Joerg" Date: Fri, 3 Dec 2010 13:15:21 +0100 Subject: KVM: SVM: Add clean-bits infrastructure code This patch adds the infrastructure for the implementation of the individual clean-bits. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/svm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 11dbca7..235dd73 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -79,7 +79,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 event_inj_err; u64 nested_cr3; u64 lbr_ctl; - u64 reserved_5; + u32 clean; + u32 reserved_5; u64 next_rip; u8 reserved_6[816]; }; -- cgit v1.1 From 78b2c54aa4a7e9e4257d2b8e3a4b96d2d0c6e636 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Dec 2010 10:48:06 +0800 Subject: KVM: MMU: rename 'no_apf' to 'prefault' It's the speculative path if 'no_apf = 1' and we will specially handle this speculative path in the later patch, so 'prefault' is better to fit the sense. Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d968cc5..aa1518d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -241,7 +241,8 @@ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); - int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, bool no_apf); + int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err, + bool prefault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); void (*free)(struct kvm_vcpu *vcpu); -- cgit v1.1 From fb67e14fc90f18250259faf61a269320ea8e4d8f Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 7 Dec 2010 10:35:25 +0800 Subject: KVM: MMU: retry #PF for softmmu Retry #PF for softmmu only when the current vcpu has the same cr3 as the time when #PF occurs Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aa1518d..4461429 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -593,6 +593,7 @@ struct kvm_x86_ops { struct kvm_arch_async_pf { u32 token; gfn_t gfn; + unsigned long cr3; bool direct_map; }; -- cgit v1.1 From 38e5e92fe8c02a8766459d505423b855caf9af1f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 3 Dec 2010 15:25:16 +0100 Subject: KVM: SVM: Implement Flush-By-Asid feature This patch adds the new flush-by-asid of upcoming AMD processors to the KVM-AMD module. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/svm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 235dd73..82ecaa32 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -88,6 +88,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define TLB_CONTROL_DO_NOTHING 0 #define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 +#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 #define V_TPR_MASK 0x0f -- cgit v1.1 From 443381a828910efa3d71ba4491d180f2d0bb4212 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Mon, 6 Dec 2010 10:53:38 -0600 Subject: KVM: VMX: add module parameter to avoid trapping HLT instructions (v5) In certain use-cases, we want to allocate guests fixed time slices where idle guest cycles leave the machine idling. There are many approaches to achieve this but the most direct is to simply avoid trapping the HLT instruction which lets the guest directly execute the instruction putting the processor to sleep. Introduce this as a module-level option for kvm-vmx.ko since if you do this for one guest, you probably want to do it for all. Signed-off-by: Anthony Liguori Signed-off-by: Avi Kivity --- arch/x86/include/asm/vmx.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 42d95905..9642c22 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -297,6 +297,12 @@ enum vmcs_field { #define GUEST_INTR_STATE_SMI 0x00000004 #define GUEST_INTR_STATE_NMI 0x00000008 +/* GUEST_ACTIVITY_STATE flags */ +#define GUEST_ACTIVITY_ACTIVE 0 +#define GUEST_ACTIVITY_HLT 1 +#define GUEST_ACTIVITY_SHUTDOWN 2 +#define GUEST_ACTIVITY_WAIT_SIPI 3 + /* * Exit Qualifications for MOV for Control Register Access */ -- cgit v1.1 From 81dd35d42c9aef3c1f7ae6ce4cf6a0d382661db5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 7 Dec 2010 17:15:06 +0100 Subject: KVM: SVM: Add xsetbv intercept This patch implements the xsetbv intercept to the AMD part of KVM. This makes AVX usable in a save way for the guest on AVX capable AMD hardware. The patch is tested by using AVX in the guest and host in parallel and checking for data corruption. I also used the KVM xsave unit-tests and they all pass. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/include/asm/svm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ecaa32..f7087bf 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -47,6 +47,7 @@ enum { INTERCEPT_MONITOR, INTERCEPT_MWAIT, INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, }; @@ -329,6 +330,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXIT_MONITOR 0x08a #define SVM_EXIT_MWAIT 0x08b #define SVM_EXIT_MWAIT_COND 0x08c +#define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 -- cgit v1.1 From eea1cff9ab732ea56358ff5e1bd8b99db2e8402d Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:00 +0100 Subject: KVM: x86: fix CR8 handling The handling of CR8 writes in KVM is currently somewhat cumbersome. This patch makes it look like the other CR register handlers and fixes a possible issue in VMX, where the RIP would be incremented despite an injected #GP. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4461429..cb5cad2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -661,7 +661,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); -- cgit v1.1 From db8fcefaa704ccb40b6dcd24e3b75bad3ce7dde3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:01 +0100 Subject: KVM: move complete_insn_gp() into x86.c move the complete_insn_gp() helper function out of the VMX part into the generic x86 part to make it usable by SVM. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cb5cad2..cd4a990 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -828,4 +828,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); + #endif /* _ASM_X86_KVM_HOST_H */ -- cgit v1.1 From 51d8b66199e94284e7725a79eae4a38de4b80d54 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:02 +0100 Subject: KVM: cleanup emulate_instruction emulate_instruction had many callers, but only one used all parameters. One parameter was unused, another one is now hidden by a wrapper function (required for a future addition anyway), so most callers use now a shorter parameter list. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cd4a990..de00b60 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,8 +634,15 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, u16 error_code, int emulation_type); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, + unsigned long cr2, int emulation_type); + +static inline int emulate_instruction(struct kvm_vcpu *vcpu, + int emulation_type) +{ + return x86_emulate_instruction(vcpu, 0, emulation_type); +} + void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -- cgit v1.1 From 7ff76d58a9dc03a38b86d283abcaae2ac3c74fe3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:04 +0100 Subject: KVM: SVM: enhance MOV CR intercept handler Newer SVM implementations provide the GPR number in the VMCB, so that the emulation path is no longer necesarry to handle CR register access intercepts. Implement the handling in svm.c and use it when the info is provided. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/svm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f7087bf..f0ffb81 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -260,6 +260,8 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 #define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 +#define SVM_EXITINFO_REG_MASK 0x0F + #define SVM_EXIT_READ_CR0 0x000 #define SVM_EXIT_READ_CR3 0x003 #define SVM_EXIT_READ_CR4 0x004 -- cgit v1.1 From dc25e89e07d5ef31c476117d2c76b34dbb22196c Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 21 Dec 2010 11:12:07 +0100 Subject: KVM: SVM: copy instruction bytes from VMCB In case of a nested page fault or an intercepted #PF newer SVM implementations provide a copy of the faulting instruction bytes in the VMCB. Use these bytes to feed the instruction emulator and avoid the costly guest instruction fetch in this case. Signed-off-by: Andre Przywara Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 2 +- arch/x86/include/asm/kvm_host.h | 9 +++++---- arch/x86/include/asm/svm.h | 4 +++- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index bf70ece..8e37deb 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -265,7 +265,7 @@ struct x86_emulate_ctxt { #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 #endif -int x86_decode_insn(struct x86_emulate_ctxt *ctxt); +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len); #define EMULATION_FAILED -1 #define EMULATION_OK 0 #define EMULATION_RESTART 1 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index de00b60..6268f6c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -634,13 +634,13 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) -int x86_emulate_instruction(struct kvm_vcpu *vcpu, - unsigned long cr2, int emulation_type); +int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, + int emulation_type, void *insn, int insn_len); static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type); + return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); } void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); @@ -721,7 +721,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_fix_hypercall(struct kvm_vcpu *vcpu); -int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); +int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, + void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index f0ffb81..f2b83bc 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -83,7 +83,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 clean; u32 reserved_5; u64 next_rip; - u8 reserved_6[816]; + u8 insn_len; + u8 insn_bytes[15]; + u8 reserved_6[800]; }; -- cgit v1.1 From 07c116d2f53d721377fe428d067e2cecf80c906e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 21 Dec 2010 12:54:19 +0200 Subject: KVM: VMX: Add definitions for more vm entry/exit control bits Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9642c22..84471b8 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -66,15 +66,23 @@ #define PIN_BASED_NMI_EXITING 0x00000008 #define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 #define VM_EXIT_SAVE_IA32_PAT 0x00040000 #define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 /* VMCS Encodings */ enum vmcs_field { -- cgit v1.1 From aff48baa34c033318ad322ecbf2e4bcd891b29ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 5 Dec 2010 18:56:11 +0200 Subject: KVM: Fetch guest cr3 from hardware on demand Instead of syncing the guest cr3 every exit, which is expensince on vmx with ept enabled, sync it only on demand. [sheng: fix incorrect cr3 seen by Windows XP] Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6268f6c..95f026b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -117,6 +117,7 @@ enum kvm_reg { enum kvm_reg_ex { VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR3, }; enum { @@ -533,6 +534,7 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); + void (*decache_cr3)(struct kvm_vcpu *vcpu); void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); -- cgit v1.1 From b034cf0105235e65ee1b0161dbe8fef0338d06e7 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 23 Dec 2010 16:08:35 +0800 Subject: KVM: MMU: audit: allow audit more guests at the same time It only allows to audit one guest in the system since: - 'audit_point' is a glob variable - mmu_audit_disable() is called in kvm_mmu_destroy(), so audit is disabled after a guest exited this patch fix those issues then allow to audit more guests at the same time Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/include/asm/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include/asm') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 95f026b..aa75f21 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -461,6 +461,10 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + + #ifdef CONFIG_KVM_MMU_AUDIT + int audit_point; + #endif }; struct kvm_vm_stat { -- cgit v1.1