diff options
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/amd64/gdb_machdep.c | 4 | ||||
-rw-r--r-- | sys/amd64/amd64/initcpu.c | 1 | ||||
-rw-r--r-- | sys/amd64/amd64/pmap.c | 7 | ||||
-rw-r--r-- | sys/amd64/amd64/trap.c | 14 | ||||
-rw-r--r-- | sys/amd64/amd64/vm_machdep.c | 2 | ||||
-rw-r--r-- | sys/amd64/conf/NOTES | 17 | ||||
-rw-r--r-- | sys/amd64/ia32/ia32_reg.c | 6 | ||||
-rw-r--r-- | sys/amd64/include/md_var.h | 1 | ||||
-rw-r--r-- | sys/amd64/include/vmm.h | 26 | ||||
-rw-r--r-- | sys/amd64/include/vmparam.h | 5 | ||||
-rw-r--r-- | sys/amd64/linux/linux_machdep.c | 2 | ||||
-rw-r--r-- | sys/amd64/linux/linux_support.s | 48 | ||||
-rw-r--r-- | sys/amd64/linux32/linux32_machdep.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/amd/svm.c | 27 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.c | 37 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx.h | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/intel/vmx_msr.c | 3 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm.c | 114 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_instruction_emul.c | 2 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_stat.c | 1 | ||||
-rw-r--r-- | sys/amd64/vmm/vmm_stat.h | 1 |
21 files changed, 228 insertions, 94 deletions
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c index 5775c8f..61ffad6 100644 --- a/sys/amd64/amd64/gdb_machdep.c +++ b/sys/amd64/amd64/gdb_machdep.c @@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$"); void * gdb_cpu_getreg(int regnum, size_t *regsz) { + static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL); + static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL); *regsz = gdb_cpu_regsz(regnum); @@ -76,6 +78,8 @@ gdb_cpu_getreg(int regnum, size_t *regsz) case 14: return (&kdb_thrctx->pcb_r14); case 15: return (&kdb_thrctx->pcb_r15); case 16: return (&kdb_thrctx->pcb_rip); + case 18: return (&_kcodesel); + case 19: return (&_kdatasel); } return (NULL); } diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c index 8551d27..36f2d0f 100644 --- a/sys/amd64/amd64/initcpu.c +++ b/sys/amd64/amd64/initcpu.c @@ -74,6 +74,7 @@ u_int cpu_fxsr; /* SSE enabled */ u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */ u_int cpu_clflush_line_size = 32; u_int cpu_stdext_feature; +u_int cpu_stdext_feature2; u_int cpu_max_ext_state_size; u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */ u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */ diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 5f9ee91..6a3de60 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -3935,7 +3935,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V; - vm_offset_t oldpteva; vm_page_t mpte; int PG_PTE_CACHE; @@ -3995,10 +3994,9 @@ setpte: if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; - oldpteva = (oldpte & PG_FRAME & PDRMASK) | - (va & ~PDRMASK); CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" - " in pmap %p", oldpteva, pmap); + " in pmap %p", (oldpte & PG_FRAME & PDRMASK) | + (va & ~PDRMASK), pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { atomic_add_long(&pmap_pde_p_failures, 1); @@ -4846,6 +4844,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, PG_PS_FRAME, &lock))) { *pde = srcptepaddr & ~PG_W; pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); + atomic_add_long(&pmap_pde_mappings, 1); } else dstmpde->wire_count--; continue; diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 193d207..fa74eb2 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -257,8 +257,8 @@ trap(struct trapframe *frame) td->td_pticks = 0; td->td_frame = frame; addr = frame->tf_rip; - if (td->td_ucred != p->p_ucred) - cred_update_thread(td); + if (td->td_cowgen != p->p_cowgen) + thread_cow_update(td); switch (type) { case T_PRIVINFLT: /* privileged instruction fault */ @@ -840,14 +840,8 @@ trap_fatal(frame, eva) if (frame->tf_rflags & PSL_RF) printf("resume, "); printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); - printf("current process = "); - if (curproc) { - printf("%lu (%s)\n", - (u_long)curproc->p_pid, curthread->td_name ? - curthread->td_name : ""); - } else { - printf("Idle\n"); - } + printf("current process = %d (%s)\n", + curproc->p_pid, curthread->td_name); #ifdef KDB if (debugger_on_panic || kdb_active) diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 0d03ed6..1fb7016 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -155,7 +155,6 @@ cpu_fork(td1, p2, td2, flags) struct pcb *pcb2; struct mdproc *mdp1, *mdp2; struct proc_ldt *pldt; - pmap_t pmap2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) { @@ -218,7 +217,6 @@ cpu_fork(td1, p2, td2, flags) * Set registers for trampoline to user mode. Leave space for the * return address on stack. These are the kernel mode register values. */ - pmap2 = vmspace_pmap(p2->p_vmspace); pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */ pcb2->pcb_rbp = 0; pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *); diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES index e0fe465..97eefbf 100644 --- a/sys/amd64/conf/NOTES +++ b/sys/amd64/conf/NOTES @@ -17,6 +17,23 @@ profile 2 # options KDTRACE_HOOKS +# DTrace core +# NOTE: introduces CDDL-licensed components into the kernel +#device dtrace + +# DTrace modules +#device dtrace_lockstat +#device dtrace_profile +#device dtrace_sdt +#device dtrace_fbt +#device dtrace_systrace +#device dtrace_prototype +#device dtnfscl +#device dtmalloc + +# Alternatively include all the DTrace modules +#device dtraceall + ##################################################################### # SMP OPTIONS: diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c index 5bc18f1..d0e6bfe 100644 --- a/sys/amd64/ia32/ia32_reg.c +++ b/sys/amd64/ia32/ia32_reg.c @@ -79,11 +79,9 @@ __FBSDID("$FreeBSD$"); int fill_regs32(struct thread *td, struct reg32 *regs) { - struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; - pcb = td->td_pcb; if (tp->tf_flags & TF_HASSEGS) { regs->r_gs = tp->tf_gs; regs->r_fs = tp->tf_fs; @@ -113,18 +111,16 @@ fill_regs32(struct thread *td, struct reg32 *regs) int set_regs32(struct thread *td, struct reg32 *regs) { - struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); - pcb = td->td_pcb; tp->tf_gs = regs->r_gs; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; - set_pcb_flags(pcb, PCB_FULL_IRET); + set_pcb_flags(td->td_pcb, PCB_FULL_IRET); tp->tf_flags = TF_HASSEGS; tp->tf_rdi = regs->r_edi; tp->tf_rsi = regs->r_esi; diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 0813e5f..954df3e 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -49,6 +49,7 @@ extern u_int via_feature_rng; extern u_int via_feature_xcrypt; extern u_int cpu_clflush_line_size; extern u_int cpu_stdext_feature; +extern u_int cpu_stdext_feature2; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index d3798bc..1a4e5ab 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -120,13 +120,18 @@ struct vm_object; struct vm_guest_paging; struct pmap; +struct vm_eventinfo { + void *rptr; /* rendezvous cookie */ + int *sptr; /* suspend cookie */ + int *iptr; /* reqidle cookie */ +}; + typedef int (*vmm_init_func_t)(int ipinum); typedef int (*vmm_cleanup_func_t)(void); typedef void (*vmm_resume_func_t)(void); typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap); typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip, - struct pmap *pmap, void *rendezvous_cookie, - void *suspend_cookie); + struct pmap *pmap, struct vm_eventinfo *info); typedef void (*vmi_cleanup_func_t)(void *vmi); typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num, uint64_t *retval); @@ -208,6 +213,7 @@ struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid); void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip); void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip); +void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip); #ifdef _SYS__CPUSET_H_ /* @@ -232,17 +238,24 @@ cpuset_t vm_suspended_cpus(struct vm *vm); #endif /* _SYS__CPUSET_H_ */ static __inline int -vcpu_rendezvous_pending(void *rendezvous_cookie) +vcpu_rendezvous_pending(struct vm_eventinfo *info) +{ + + return (*((uintptr_t *)(info->rptr)) != 0); +} + +static __inline int +vcpu_suspended(struct vm_eventinfo *info) { - return (*(uintptr_t *)rendezvous_cookie != 0); + return (*info->sptr); } static __inline int -vcpu_suspended(void *suspend_cookie) +vcpu_reqidle(struct vm_eventinfo *info) { - return (*(int *)suspend_cookie); + return (*info->iptr); } /* @@ -506,6 +519,7 @@ enum vm_exitcode { VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, + VM_EXITCODE_REQIDLE, VM_EXITCODE_MAX }; diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index a92dd4e..07158e8 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -90,13 +90,12 @@ #define VM_PHYSSEG_MAX 63 /* - * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool + * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for page tables and small UMA * objects are allocated. */ -#define VM_NFREEPOOL 3 -#define VM_FREEPOOL_CACHE 2 +#define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c index d6174e6..451e4b4 100644 --- a/sys/amd64/linux/linux_machdep.c +++ b/sys/amd64/linux/linux_machdep.c @@ -251,7 +251,7 @@ linux_mmap2(struct thread *td, struct linux_mmap2_args *args) */ PROC_LOCK(p); p->p_vmspace->vm_maxsaddr = (char *)USRSTACK - - lim_cur(p, RLIMIT_STACK); + lim_cur_proc(p, RLIMIT_STACK); PROC_UNLOCK(p); } diff --git a/sys/amd64/linux/linux_support.s b/sys/amd64/linux/linux_support.s index f809d11..2a3ba1a 100644 --- a/sys/amd64/linux/linux_support.s +++ b/sys/amd64/linux/linux_support.s @@ -45,9 +45,9 @@ ENTRY(futex_xchgl) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rsi ja futex_fault - xchgq %rdi,(%rsi) - movq %rdi,(%rdx) - xorq %rax,%rax + xchgl %edi,(%rsi) + movl %edi,(%rdx) + xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) ret @@ -60,9 +60,9 @@ ENTRY(futex_addl) #ifdef SMP lock #endif - xaddq %rdi,(%rsi) - movq %rdi,(%rdx) - xorq %rax,%rax + xaddl %edi,(%rsi) + movl %edi,(%rdx) + xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) ret @@ -72,16 +72,16 @@ ENTRY(futex_orl) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rsi ja futex_fault - movq (%rsi),%rax -1: movq %rax,%rcx - orq %rdi,%rcx + movl (%rsi),%eax +1: movl %eax,%ecx + orl %edi,%ecx #ifdef SMP lock #endif - cmpxchgq %rcx,(%rsi) + cmpxchgl %ecx,(%rsi) jnz 1b - movq %rax,(%rdx) - xorq %rax,%rax + movl %eax,(%rdx) + xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) ret @@ -91,16 +91,16 @@ ENTRY(futex_andl) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rsi ja futex_fault - movq (%rsi),%rax -1: movq %rax,%rcx - andq %rdi,%rcx + movl (%rsi),%eax +1: movl %eax,%ecx + andl %edi,%ecx #ifdef SMP lock #endif - cmpxchgq %rcx,(%rsi) + cmpxchgl %ecx,(%rsi) jnz 1b - movq %rax,(%rdx) - xorq %rax,%rax + movl %eax,(%rdx) + xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) ret @@ -110,15 +110,15 @@ ENTRY(futex_xorl) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rsi ja futex_fault - movq (%rsi),%rax -1: movq %rax,%rcx - xorq %rdi,%rcx + movl (%rsi),%eax +1: movl %eax,%ecx + xorl %edi,%ecx #ifdef SMP lock #endif - cmpxchgq %rcx,(%rsi) + cmpxchgl %ecx,(%rsi) jnz 1b - movq %rax,(%rdx) - xorq %rax,%rax + movl %eax,(%rdx) + xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) ret diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index f9c11ca..1c54c5c 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -615,7 +615,7 @@ linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, */ PROC_LOCK(p); p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - - lim_cur(p, RLIMIT_STACK); + lim_cur_proc(p, RLIMIT_STACK); PROC_UNLOCK(p); } diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 20e8f76..b25d69d 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -102,8 +102,8 @@ static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic"); /* Per-CPU context area. */ extern struct pcpu __pcpu[]; -static uint32_t svm_feature; /* AMD SVM features. */ -SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RD, &svm_feature, 0, +static uint32_t svm_feature = ~0U; /* AMD SVM features. */ +SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RDTUN, &svm_feature, 0, "SVM features advertised by CPUID.8000000AH:EDX"); static int disable_npf_assist; @@ -112,7 +112,7 @@ SYSCTL_INT(_hw_vmm_svm, OID_AUTO, disable_npf_assist, CTLFLAG_RWTUN, /* Maximum ASIDs supported by the processor */ static uint32_t nasid; -SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RD, &nasid, 0, +SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0, "Number of ASIDs supported by this processor"); /* Current ASID generation for each host cpu */ @@ -174,9 +174,14 @@ check_svm_features(void) /* CPUID Fn8000_000A is for SVM */ do_cpuid(0x8000000A, regs); - svm_feature = regs[3]; + svm_feature &= regs[3]; - nasid = regs[1]; + /* + * The number of ASIDs can be configured to be less than what is + * supported by the hardware but not more. + */ + if (nasid == 0 || nasid > regs[1]) + nasid = regs[1]; KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid)); /* bhyve requires the Nested Paging feature */ @@ -1900,7 +1905,7 @@ enable_gintr(void) */ static int svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, - void *rend_cookie, void *suspended_cookie) + struct vm_eventinfo *evinfo) { struct svm_regctx *gctx; struct svm_softc *svm_sc; @@ -1975,18 +1980,24 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap, */ disable_gintr(); - if (vcpu_suspended(suspended_cookie)) { + if (vcpu_suspended(evinfo)) { enable_gintr(); vm_exit_suspended(vm, vcpu, state->rip); break; } - if (vcpu_rendezvous_pending(rend_cookie)) { + if (vcpu_rendezvous_pending(evinfo)) { enable_gintr(); vm_exit_rendezvous(vm, vcpu, state->rip); break; } + if (vcpu_reqidle(evinfo)) { + enable_gintr(); + vm_exit_reqidle(vm, vcpu, state->rip); + break; + } + /* We are asked to give the cpu by scheduler. */ if (vcpu_should_yield(vm, vcpu)) { enable_gintr(); diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 4c3f20d..f590586 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -856,10 +856,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap) * VM exit and entry respectively. It is also restored from the * host VMCS area on a VM exit. * - * The TSC MSR is exposed read-only. Writes are disallowed as that - * will impact the host TSC. - * XXX Writes would be implemented with a wrmsr trap, and - * then modifying the TSC offset in the VMCS. + * The TSC MSR is exposed read-only. Writes are disallowed as + * that will impact the host TSC. If the guest does a write + * the "use TSC offsetting" execution control is enabled and the + * difference between the host TSC and the guest TSC is written + * into the TSC offset in the VMCS. */ if (guest_msr_rw(vmx, MSR_GSBASE) || guest_msr_rw(vmx, MSR_FSBASE) || @@ -1130,6 +1131,22 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting"); } +int +vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset) +{ + int error; + + if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) { + vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET; + vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); + VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting"); + } + + error = vmwrite(VMCS_TSC_OFFSET, offset); + + return (error); +} + #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) #define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ @@ -2554,7 +2571,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) static int vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, - void *rendezvous_cookie, void *suspend_cookie) + struct vm_eventinfo *evinfo) { int rc, handled, launched; struct vmx *vmx; @@ -2623,18 +2640,24 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap, * vmx_inject_interrupts() can suspend the vcpu due to a * triple fault. */ - if (vcpu_suspended(suspend_cookie)) { + if (vcpu_suspended(evinfo)) { enable_intr(); vm_exit_suspended(vmx->vm, vcpu, rip); break; } - if (vcpu_rendezvous_pending(rendezvous_cookie)) { + if (vcpu_rendezvous_pending(evinfo)) { enable_intr(); vm_exit_rendezvous(vmx->vm, vcpu, rip); break; } + if (vcpu_reqidle(evinfo)) { + enable_intr(); + vm_exit_reqidle(vmx->vm, vcpu, rip); + break; + } + if (vcpu_should_yield(vm, vcpu)) { enable_intr(); vm_exit_astpending(vmx->vm, vcpu, rip); diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h index bc48861..57f5b28 100644 --- a/sys/amd64/vmm/intel/vmx.h +++ b/sys/amd64/vmm/intel/vmx.h @@ -135,6 +135,8 @@ void vmx_call_isr(uintptr_t entry); u_long vmx_fix_cr0(u_long cr0); u_long vmx_fix_cr4(u_long cr4); +int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset); + extern char vmx_exit_guest[]; #endif diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c index 3091f68..91b2c01 100644 --- a/sys/amd64/vmm/intel/vmx_msr.c +++ b/sys/amd64/vmm/intel/vmx_msr.c @@ -474,6 +474,9 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu) else vm_inject_gp(vmx->vm, vcpuid); break; + case MSR_TSC: + error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc()); + break; default: error = EINVAL; break; diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 2671295..2c37a1a 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -95,6 +95,7 @@ struct vcpu { struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */ enum vcpu_state state; /* (o) vcpu state */ int hostcpu; /* (o) vcpu's host cpu */ + int reqidle; /* (i) request vcpu to idle */ struct vlapic *vlapic; /* (i) APIC device model */ enum x2apic_state x2apic_state; /* (i) APIC mode */ uint64_t exitintinfo; /* (i) events pending at VM exit */ @@ -164,8 +165,8 @@ static struct vmm_ops *ops; #define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0) #define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL) -#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \ - (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO) +#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \ + (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO) #define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL) #define VMSPACE_ALLOC(min, max) \ (ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL) @@ -221,6 +222,28 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, "Force use of I/O MMU even if no passthrough devices were found."); +static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); + +#ifdef KTR +static const char * +vcpu_state2str(enum vcpu_state state) +{ + + switch (state) { + case VCPU_IDLE: + return ("idle"); + case VCPU_FROZEN: + return ("frozen"); + case VCPU_RUNNING: + return ("running"); + case VCPU_SLEEPING: + return ("sleeping"); + default: + return ("unknown"); + } +} +#endif + static void vcpu_cleanup(struct vm *vm, int i, bool destroy) { @@ -255,6 +278,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id); vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED); + vcpu->reqidle = 0; vcpu->exitintinfo = 0; vcpu->nmi_pending = 0; vcpu->extint_pending = 0; @@ -980,11 +1004,13 @@ save_guest_fpustate(struct vcpu *vcpu) static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle"); static int -vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, +vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate, bool from_idle) { + struct vcpu *vcpu; int error; + vcpu = &vm->vcpu[vcpuid]; vcpu_assert_locked(vcpu); /* @@ -993,8 +1019,13 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, * ioctl() operating on a vcpu at any point. */ if (from_idle) { - while (vcpu->state != VCPU_IDLE) + while (vcpu->state != VCPU_IDLE) { + vcpu->reqidle = 1; + vcpu_notify_event_locked(vcpu, false); + VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to " + "idle requested", vcpu_state2str(vcpu->state)); msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz); + } } else { KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from " "vcpu idle state")); @@ -1031,6 +1062,9 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate, if (error) return (EBUSY); + VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s", + vcpu_state2str(vcpu->state), vcpu_state2str(newstate)); + vcpu->state = newstate; if (newstate == VCPU_RUNNING) vcpu->hostcpu = curcpu; @@ -1053,11 +1087,11 @@ vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate) } static void -vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate) +vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate) { int error; - if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0) + if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0) panic("Error %d setting state to %d", error, newstate); } @@ -1145,7 +1179,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) * vcpu returned from VMRUN() and before it acquired the * vcpu lock above. */ - if (vm->rendezvous_func != NULL || vm->suspend) + if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle) break; if (vm_nmi_pending(vm, vcpuid)) break; @@ -1182,13 +1216,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu) } t = ticks; - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); /* * XXX msleep_spin() cannot be interrupted by signals so * wake up periodically to check pending signals. */ msleep_spin(vcpu, &vcpu->mtx, wmesg, hz); - vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t); } @@ -1350,9 +1384,9 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) if (vm->rendezvous_func == NULL) { VCPU_CTR0(vm, vcpuid, "Sleeping during suspend"); - vcpu_require_state_locked(vcpu, VCPU_SLEEPING); + vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING); msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz); - vcpu_require_state_locked(vcpu, VCPU_FROZEN); + vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN); } else { VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend"); vcpu_unlock(vcpu); @@ -1375,6 +1409,19 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu) return (0); } +static int +vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu) +{ + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle)); + vcpu->reqidle = 0; + vcpu_unlock(vcpu); + *retu = true; + return (0); +} + int vm_suspend(struct vm *vm, enum vm_suspend_how how) { @@ -1432,6 +1479,18 @@ vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip) } void +vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip) +{ + struct vm_exit *vmexit; + + vmexit = vm_exitinfo(vm, vcpuid); + vmexit->rip = rip; + vmexit->inst_length = 0; + vmexit->exitcode = VM_EXITCODE_REQIDLE; + vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1); +} + +void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) { struct vm_exit *vmexit; @@ -1446,6 +1505,7 @@ vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip) int vm_run(struct vm *vm, struct vm_run *vmrun) { + struct vm_eventinfo evinfo; int error, vcpuid; struct vcpu *vcpu; struct pcb *pcb; @@ -1453,7 +1513,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun) struct vm_exit *vme; bool retu, intr_disabled; pmap_t pmap; - void *rptr, *sptr; vcpuid = vmrun->cpuid; @@ -1466,11 +1525,12 @@ vm_run(struct vm *vm, struct vm_run *vmrun) if (CPU_ISSET(vcpuid, &vm->suspended_cpus)) return (EINVAL); - rptr = &vm->rendezvous_func; - sptr = &vm->suspend; pmap = vmspace_pmap(vm->vmspace); vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; + evinfo.rptr = &vm->rendezvous_func; + evinfo.sptr = &vm->suspend; + evinfo.iptr = &vcpu->reqidle; restart: critical_enter(); @@ -1485,7 +1545,7 @@ restart: restore_guest_fpustate(vcpu); vcpu_require_state(vm, vcpuid, VCPU_RUNNING); - error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr); + error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo); vcpu_require_state(vm, vcpuid, VCPU_FROZEN); save_guest_fpustate(vcpu); @@ -1498,6 +1558,9 @@ restart: retu = false; vcpu->nextrip = vme->rip + vme->inst_length; switch (vme->exitcode) { + case VM_EXITCODE_REQIDLE: + error = vm_handle_reqidle(vm, vcpuid, &retu); + break; case VM_EXITCODE_SUSPENDED: error = vm_handle_suspend(vm, vcpuid, &retu); break; @@ -1536,6 +1599,8 @@ restart: if (error == 0 && retu == false) goto restart; + VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode); + /* copy the exit information */ bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit)); return (error); @@ -2072,7 +2137,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate, vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); - error = vcpu_set_state_locked(vcpu, newstate, from_idle); + error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle); vcpu_unlock(vcpu); return (error); @@ -2168,15 +2233,11 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state) * - If the vcpu is running on a different host_cpu then an IPI will be directed * to the host_cpu to cause the vcpu to trap into the hypervisor. */ -void -vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) +static void +vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr) { int hostcpu; - struct vcpu *vcpu; - - vcpu = &vm->vcpu[vcpuid]; - vcpu_lock(vcpu); hostcpu = vcpu->hostcpu; if (vcpu->state == VCPU_RUNNING) { KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu")); @@ -2201,6 +2262,15 @@ vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) if (vcpu->state == VCPU_SLEEPING) wakeup_one(vcpu); } +} + +void +vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr) +{ + struct vcpu *vcpu = &vm->vcpu[vcpuid]; + + vcpu_lock(vcpu); + vcpu_notify_event_locked(vcpu, lapic_intr); vcpu_unlock(vcpu); } diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 9b5713d..758b7e8 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -2342,7 +2342,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie) * instruction */ if (vie->base_register == VM_REG_GUEST_RIP) - base += vie->num_valid; + base += vie->num_processed; } idx = 0; diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c index 4ae5fb9..7e2f64d 100644 --- a/sys/amd64/vmm/vmm_stat.c +++ b/sys/amd64/vmm/vmm_stat.c @@ -164,6 +164,7 @@ VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault"); VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation"); VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason"); VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit"); +VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit"); VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace"); VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit"); VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions"); diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h index 1640ba3..c695840 100644 --- a/sys/amd64/vmm/vmm_stat.h +++ b/sys/amd64/vmm/vmm_stat.h @@ -157,4 +157,5 @@ VMM_STAT_DECLARE(VMEXIT_ASTPENDING); VMM_STAT_DECLARE(VMEXIT_USERSPACE); VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS); VMM_STAT_DECLARE(VMEXIT_EXCEPTION); +VMM_STAT_DECLARE(VMEXIT_REQIDLE); #endif |