summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/gdb_machdep.c4
-rw-r--r--sys/amd64/amd64/initcpu.c1
-rw-r--r--sys/amd64/amd64/pmap.c7
-rw-r--r--sys/amd64/amd64/trap.c14
-rw-r--r--sys/amd64/amd64/vm_machdep.c2
-rw-r--r--sys/amd64/conf/NOTES17
-rw-r--r--sys/amd64/ia32/ia32_reg.c6
-rw-r--r--sys/amd64/include/md_var.h1
-rw-r--r--sys/amd64/include/vmm.h26
-rw-r--r--sys/amd64/include/vmparam.h5
-rw-r--r--sys/amd64/linux/linux_machdep.c2
-rw-r--r--sys/amd64/linux/linux_support.s48
-rw-r--r--sys/amd64/linux32/linux32_machdep.c2
-rw-r--r--sys/amd64/vmm/amd/svm.c27
-rw-r--r--sys/amd64/vmm/intel/vmx.c37
-rw-r--r--sys/amd64/vmm/intel/vmx.h2
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c3
-rw-r--r--sys/amd64/vmm/vmm.c114
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c2
-rw-r--r--sys/amd64/vmm/vmm_stat.c1
-rw-r--r--sys/amd64/vmm/vmm_stat.h1
21 files changed, 228 insertions, 94 deletions
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 5775c8f..61ffad6 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -48,6 +48,8 @@ __FBSDID("$FreeBSD$");
void *
gdb_cpu_getreg(int regnum, size_t *regsz)
{
+ static uint32_t _kcodesel = GSEL(GCODE_SEL, SEL_KPL);
+ static uint32_t _kdatasel = GSEL(GDATA_SEL, SEL_KPL);
*regsz = gdb_cpu_regsz(regnum);
@@ -76,6 +78,8 @@ gdb_cpu_getreg(int regnum, size_t *regsz)
case 14: return (&kdb_thrctx->pcb_r14);
case 15: return (&kdb_thrctx->pcb_r15);
case 16: return (&kdb_thrctx->pcb_rip);
+ case 18: return (&_kcodesel);
+ case 19: return (&_kdatasel);
}
return (NULL);
}
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index 8551d27..36f2d0f 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -74,6 +74,7 @@ u_int cpu_fxsr; /* SSE enabled */
u_int cpu_mxcsr_mask; /* Valid bits in mxcsr */
u_int cpu_clflush_line_size = 32;
u_int cpu_stdext_feature;
+u_int cpu_stdext_feature2;
u_int cpu_max_ext_state_size;
u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */
u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 5f9ee91..6a3de60 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -3935,7 +3935,6 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
- vm_offset_t oldpteva;
vm_page_t mpte;
int PG_PTE_CACHE;
@@ -3995,10 +3994,9 @@ setpte:
if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW))
goto setpte;
oldpte &= ~PG_RW;
- oldpteva = (oldpte & PG_FRAME & PDRMASK) |
- (va & ~PDRMASK);
CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx"
- " in pmap %p", oldpteva, pmap);
+ " in pmap %p", (oldpte & PG_FRAME & PDRMASK) |
+ (va & ~PDRMASK), pmap);
}
if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) {
atomic_add_long(&pmap_pde_p_failures, 1);
@@ -4846,6 +4844,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
PG_PS_FRAME, &lock))) {
*pde = srcptepaddr & ~PG_W;
pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE);
+ atomic_add_long(&pmap_pde_mappings, 1);
} else
dstmpde->wire_count--;
continue;
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 193d207..fa74eb2 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -257,8 +257,8 @@ trap(struct trapframe *frame)
td->td_pticks = 0;
td->td_frame = frame;
addr = frame->tf_rip;
- if (td->td_ucred != p->p_ucred)
- cred_update_thread(td);
+ if (td->td_cowgen != p->p_cowgen)
+ thread_cow_update(td);
switch (type) {
case T_PRIVINFLT: /* privileged instruction fault */
@@ -840,14 +840,8 @@ trap_fatal(frame, eva)
if (frame->tf_rflags & PSL_RF)
printf("resume, ");
printf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12);
- printf("current process = ");
- if (curproc) {
- printf("%lu (%s)\n",
- (u_long)curproc->p_pid, curthread->td_name ?
- curthread->td_name : "");
- } else {
- printf("Idle\n");
- }
+ printf("current process = %d (%s)\n",
+ curproc->p_pid, curthread->td_name);
#ifdef KDB
if (debugger_on_panic || kdb_active)
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 0d03ed6..1fb7016 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -155,7 +155,6 @@ cpu_fork(td1, p2, td2, flags)
struct pcb *pcb2;
struct mdproc *mdp1, *mdp2;
struct proc_ldt *pldt;
- pmap_t pmap2;
p1 = td1->td_proc;
if ((flags & RFPROC) == 0) {
@@ -218,7 +217,6 @@ cpu_fork(td1, p2, td2, flags)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
- pmap2 = vmspace_pmap(p2->p_vmspace);
pcb2->pcb_r12 = (register_t)fork_return; /* fork_trampoline argument */
pcb2->pcb_rbp = 0;
pcb2->pcb_rsp = (register_t)td2->td_frame - sizeof(void *);
diff --git a/sys/amd64/conf/NOTES b/sys/amd64/conf/NOTES
index e0fe465..97eefbf 100644
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@@ -17,6 +17,23 @@ profile 2
#
options KDTRACE_HOOKS
+# DTrace core
+# NOTE: introduces CDDL-licensed components into the kernel
+#device dtrace
+
+# DTrace modules
+#device dtrace_lockstat
+#device dtrace_profile
+#device dtrace_sdt
+#device dtrace_fbt
+#device dtrace_systrace
+#device dtrace_prototype
+#device dtnfscl
+#device dtmalloc
+
+# Alternatively include all the DTrace modules
+#device dtraceall
+
#####################################################################
# SMP OPTIONS:
diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c
index 5bc18f1..d0e6bfe 100644
--- a/sys/amd64/ia32/ia32_reg.c
+++ b/sys/amd64/ia32/ia32_reg.c
@@ -79,11 +79,9 @@ __FBSDID("$FreeBSD$");
int
fill_regs32(struct thread *td, struct reg32 *regs)
{
- struct pcb *pcb;
struct trapframe *tp;
tp = td->td_frame;
- pcb = td->td_pcb;
if (tp->tf_flags & TF_HASSEGS) {
regs->r_gs = tp->tf_gs;
regs->r_fs = tp->tf_fs;
@@ -113,18 +111,16 @@ fill_regs32(struct thread *td, struct reg32 *regs)
int
set_regs32(struct thread *td, struct reg32 *regs)
{
- struct pcb *pcb;
struct trapframe *tp;
tp = td->td_frame;
if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
return (EINVAL);
- pcb = td->td_pcb;
tp->tf_gs = regs->r_gs;
tp->tf_fs = regs->r_fs;
tp->tf_es = regs->r_es;
tp->tf_ds = regs->r_ds;
- set_pcb_flags(pcb, PCB_FULL_IRET);
+ set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = regs->r_edi;
tp->tf_rsi = regs->r_esi;
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 0813e5f..954df3e 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -49,6 +49,7 @@ extern u_int via_feature_rng;
extern u_int via_feature_xcrypt;
extern u_int cpu_clflush_line_size;
extern u_int cpu_stdext_feature;
+extern u_int cpu_stdext_feature2;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index d3798bc..1a4e5ab 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -120,13 +120,18 @@ struct vm_object;
struct vm_guest_paging;
struct pmap;
+struct vm_eventinfo {
+ void *rptr; /* rendezvous cookie */
+ int *sptr; /* suspend cookie */
+ int *iptr; /* reqidle cookie */
+};
+
typedef int (*vmm_init_func_t)(int ipinum);
typedef int (*vmm_cleanup_func_t)(void);
typedef void (*vmm_resume_func_t)(void);
typedef void * (*vmi_init_func_t)(struct vm *vm, struct pmap *pmap);
typedef int (*vmi_run_func_t)(void *vmi, int vcpu, register_t rip,
- struct pmap *pmap, void *rendezvous_cookie,
- void *suspend_cookie);
+ struct pmap *pmap, struct vm_eventinfo *info);
typedef void (*vmi_cleanup_func_t)(void *vmi);
typedef int (*vmi_get_register_t)(void *vmi, int vcpu, int num,
uint64_t *retval);
@@ -208,6 +213,7 @@ struct vm_exit *vm_exitinfo(struct vm *vm, int vcpuid);
void vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip);
void vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip);
+void vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip);
#ifdef _SYS__CPUSET_H_
/*
@@ -232,17 +238,24 @@ cpuset_t vm_suspended_cpus(struct vm *vm);
#endif /* _SYS__CPUSET_H_ */
static __inline int
-vcpu_rendezvous_pending(void *rendezvous_cookie)
+vcpu_rendezvous_pending(struct vm_eventinfo *info)
+{
+
+ return (*((uintptr_t *)(info->rptr)) != 0);
+}
+
+static __inline int
+vcpu_suspended(struct vm_eventinfo *info)
{
- return (*(uintptr_t *)rendezvous_cookie != 0);
+ return (*info->sptr);
}
static __inline int
-vcpu_suspended(void *suspend_cookie)
+vcpu_reqidle(struct vm_eventinfo *info)
{
- return (*(int *)suspend_cookie);
+ return (*info->iptr);
}
/*
@@ -506,6 +519,7 @@ enum vm_exitcode {
VM_EXITCODE_MONITOR,
VM_EXITCODE_MWAIT,
VM_EXITCODE_SVM,
+ VM_EXITCODE_REQIDLE,
VM_EXITCODE_MAX
};
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index a92dd4e..07158e8 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -90,13 +90,12 @@
#define VM_PHYSSEG_MAX 63
/*
- * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for page tables and small UMA
* objects are allocated.
*/
-#define VM_NFREEPOOL 3
-#define VM_FREEPOOL_CACHE 2
+#define VM_NFREEPOOL 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c
index d6174e6..451e4b4 100644
--- a/sys/amd64/linux/linux_machdep.c
+++ b/sys/amd64/linux/linux_machdep.c
@@ -251,7 +251,7 @@ linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
*/
PROC_LOCK(p);
p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
- lim_cur(p, RLIMIT_STACK);
+ lim_cur_proc(p, RLIMIT_STACK);
PROC_UNLOCK(p);
}
diff --git a/sys/amd64/linux/linux_support.s b/sys/amd64/linux/linux_support.s
index f809d11..2a3ba1a 100644
--- a/sys/amd64/linux/linux_support.s
+++ b/sys/amd64/linux/linux_support.s
@@ -45,9 +45,9 @@ ENTRY(futex_xchgl)
movq $VM_MAXUSER_ADDRESS-4,%rax
cmpq %rax,%rsi
ja futex_fault
- xchgq %rdi,(%rsi)
- movq %rdi,(%rdx)
- xorq %rax,%rax
+ xchgl %edi,(%rsi)
+ movl %edi,(%rdx)
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r8)
ret
@@ -60,9 +60,9 @@ ENTRY(futex_addl)
#ifdef SMP
lock
#endif
- xaddq %rdi,(%rsi)
- movq %rdi,(%rdx)
- xorq %rax,%rax
+ xaddl %edi,(%rsi)
+ movl %edi,(%rdx)
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r8)
ret
@@ -72,16 +72,16 @@ ENTRY(futex_orl)
movq $VM_MAXUSER_ADDRESS-4,%rax
cmpq %rax,%rsi
ja futex_fault
- movq (%rsi),%rax
-1: movq %rax,%rcx
- orq %rdi,%rcx
+ movl (%rsi),%eax
+1: movl %eax,%ecx
+ orl %edi,%ecx
#ifdef SMP
lock
#endif
- cmpxchgq %rcx,(%rsi)
+ cmpxchgl %ecx,(%rsi)
jnz 1b
- movq %rax,(%rdx)
- xorq %rax,%rax
+ movl %eax,(%rdx)
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r8)
ret
@@ -91,16 +91,16 @@ ENTRY(futex_andl)
movq $VM_MAXUSER_ADDRESS-4,%rax
cmpq %rax,%rsi
ja futex_fault
- movq (%rsi),%rax
-1: movq %rax,%rcx
- andq %rdi,%rcx
+ movl (%rsi),%eax
+1: movl %eax,%ecx
+ andl %edi,%ecx
#ifdef SMP
lock
#endif
- cmpxchgq %rcx,(%rsi)
+ cmpxchgl %ecx,(%rsi)
jnz 1b
- movq %rax,(%rdx)
- xorq %rax,%rax
+ movl %eax,(%rdx)
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r8)
ret
@@ -110,15 +110,15 @@ ENTRY(futex_xorl)
movq $VM_MAXUSER_ADDRESS-4,%rax
cmpq %rax,%rsi
ja futex_fault
- movq (%rsi),%rax
-1: movq %rax,%rcx
- xorq %rdi,%rcx
+ movl (%rsi),%eax
+1: movl %eax,%ecx
+ xorl %edi,%ecx
#ifdef SMP
lock
#endif
- cmpxchgq %rcx,(%rsi)
+ cmpxchgl %ecx,(%rsi)
jnz 1b
- movq %rax,(%rdx)
- xorq %rax,%rax
+ movl %eax,(%rdx)
+ xorl %eax,%eax
movq %rax,PCB_ONFAULT(%r8)
ret
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index f9c11ca..1c54c5c 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -615,7 +615,7 @@ linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
*/
PROC_LOCK(p);
p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
- lim_cur(p, RLIMIT_STACK);
+ lim_cur_proc(p, RLIMIT_STACK);
PROC_UNLOCK(p);
}
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index 20e8f76..b25d69d 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -102,8 +102,8 @@ static MALLOC_DEFINE(M_SVM_VLAPIC, "svm-vlapic", "svm-vlapic");
/* Per-CPU context area. */
extern struct pcpu __pcpu[];
-static uint32_t svm_feature; /* AMD SVM features. */
-SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RD, &svm_feature, 0,
+static uint32_t svm_feature = ~0U; /* AMD SVM features. */
+SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, features, CTLFLAG_RDTUN, &svm_feature, 0,
"SVM features advertised by CPUID.8000000AH:EDX");
static int disable_npf_assist;
@@ -112,7 +112,7 @@ SYSCTL_INT(_hw_vmm_svm, OID_AUTO, disable_npf_assist, CTLFLAG_RWTUN,
/* Maximum ASIDs supported by the processor */
static uint32_t nasid;
-SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RD, &nasid, 0,
+SYSCTL_UINT(_hw_vmm_svm, OID_AUTO, num_asids, CTLFLAG_RDTUN, &nasid, 0,
"Number of ASIDs supported by this processor");
/* Current ASID generation for each host cpu */
@@ -174,9 +174,14 @@ check_svm_features(void)
/* CPUID Fn8000_000A is for SVM */
do_cpuid(0x8000000A, regs);
- svm_feature = regs[3];
+ svm_feature &= regs[3];
- nasid = regs[1];
+ /*
+ * The number of ASIDs can be configured to be less than what is
+ * supported by the hardware but not more.
+ */
+ if (nasid == 0 || nasid > regs[1])
+ nasid = regs[1];
KASSERT(nasid > 1, ("Insufficient ASIDs for guests: %#x", nasid));
/* bhyve requires the Nested Paging feature */
@@ -1900,7 +1905,7 @@ enable_gintr(void)
*/
static int
svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
- void *rend_cookie, void *suspended_cookie)
+ struct vm_eventinfo *evinfo)
{
struct svm_regctx *gctx;
struct svm_softc *svm_sc;
@@ -1975,18 +1980,24 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
*/
disable_gintr();
- if (vcpu_suspended(suspended_cookie)) {
+ if (vcpu_suspended(evinfo)) {
enable_gintr();
vm_exit_suspended(vm, vcpu, state->rip);
break;
}
- if (vcpu_rendezvous_pending(rend_cookie)) {
+ if (vcpu_rendezvous_pending(evinfo)) {
enable_gintr();
vm_exit_rendezvous(vm, vcpu, state->rip);
break;
}
+ if (vcpu_reqidle(evinfo)) {
+ enable_gintr();
+ vm_exit_reqidle(vm, vcpu, state->rip);
+ break;
+ }
+
/* We are asked to give the cpu by scheduler. */
if (vcpu_should_yield(vm, vcpu)) {
enable_gintr();
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 4c3f20d..f590586 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -856,10 +856,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
*
- * The TSC MSR is exposed read-only. Writes are disallowed as that
- * will impact the host TSC.
- * XXX Writes would be implemented with a wrmsr trap, and
- * then modifying the TSC offset in the VMCS.
+ * The TSC MSR is exposed read-only. Writes are disallowed as
+ * that will impact the host TSC. If the guest does a write
+ * the "use TSC offsetting" execution control is enabled and the
+ * difference between the host TSC and the guest TSC is written
+ * into the TSC offset in the VMCS.
*/
if (guest_msr_rw(vmx, MSR_GSBASE) ||
guest_msr_rw(vmx, MSR_FSBASE) ||
@@ -1130,6 +1131,22 @@ vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
VCPU_CTR0(vmx->vm, vcpu, "Disabling NMI window exiting");
}
+int
+vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset)
+{
+ int error;
+
+ if ((vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET) == 0) {
+ vmx->cap[vcpu].proc_ctls |= PROCBASED_TSC_OFFSET;
+ vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
+ VCPU_CTR0(vmx->vm, vcpu, "Enabling TSC offsetting");
+ }
+
+ error = vmwrite(VMCS_TSC_OFFSET, offset);
+
+ return (error);
+}
+
#define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \
VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)
#define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \
@@ -2554,7 +2571,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
static int
vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
- void *rendezvous_cookie, void *suspend_cookie)
+ struct vm_eventinfo *evinfo)
{
int rc, handled, launched;
struct vmx *vmx;
@@ -2623,18 +2640,24 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
* vmx_inject_interrupts() can suspend the vcpu due to a
* triple fault.
*/
- if (vcpu_suspended(suspend_cookie)) {
+ if (vcpu_suspended(evinfo)) {
enable_intr();
vm_exit_suspended(vmx->vm, vcpu, rip);
break;
}
- if (vcpu_rendezvous_pending(rendezvous_cookie)) {
+ if (vcpu_rendezvous_pending(evinfo)) {
enable_intr();
vm_exit_rendezvous(vmx->vm, vcpu, rip);
break;
}
+ if (vcpu_reqidle(evinfo)) {
+ enable_intr();
+ vm_exit_reqidle(vmx->vm, vcpu, rip);
+ break;
+ }
+
if (vcpu_should_yield(vm, vcpu)) {
enable_intr();
vm_exit_astpending(vmx->vm, vcpu, rip);
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index bc48861..57f5b28 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -135,6 +135,8 @@ void vmx_call_isr(uintptr_t entry);
u_long vmx_fix_cr0(u_long cr0);
u_long vmx_fix_cr4(u_long cr4);
+int vmx_set_tsc_offset(struct vmx *vmx, int vcpu, uint64_t offset);
+
extern char vmx_exit_guest[];
#endif
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index 3091f68..91b2c01 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -474,6 +474,9 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
else
vm_inject_gp(vmx->vm, vcpuid);
break;
+ case MSR_TSC:
+ error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 2671295..2c37a1a 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -95,6 +95,7 @@ struct vcpu {
struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
enum vcpu_state state; /* (o) vcpu state */
int hostcpu; /* (o) vcpu's host cpu */
+ int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
uint64_t exitintinfo; /* (i) events pending at VM exit */
@@ -164,8 +165,8 @@ static struct vmm_ops *ops;
#define VMM_RESUME() (ops != NULL ? (*ops->resume)() : 0)
#define VMINIT(vm, pmap) (ops != NULL ? (*ops->vminit)(vm, pmap): NULL)
-#define VMRUN(vmi, vcpu, rip, pmap, rptr, sptr) \
- (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, rptr, sptr) : ENXIO)
+#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
+ (ops != NULL ? (*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo) : ENXIO)
#define VMCLEANUP(vmi) (ops != NULL ? (*ops->vmcleanup)(vmi) : NULL)
#define VMSPACE_ALLOC(min, max) \
(ops != NULL ? (*ops->vmspace_alloc)(min, max) : NULL)
@@ -221,6 +222,28 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
"Force use of I/O MMU even if no passthrough devices were found.");
+static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
+
+#ifdef KTR
+static const char *
+vcpu_state2str(enum vcpu_state state)
+{
+
+ switch (state) {
+ case VCPU_IDLE:
+ return ("idle");
+ case VCPU_FROZEN:
+ return ("frozen");
+ case VCPU_RUNNING:
+ return ("running");
+ case VCPU_SLEEPING:
+ return ("sleeping");
+ default:
+ return ("unknown");
+ }
+}
+#endif
+
static void
vcpu_cleanup(struct vm *vm, int i, bool destroy)
{
@@ -255,6 +278,7 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
+ vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
vcpu->extint_pending = 0;
@@ -980,11 +1004,13 @@ save_guest_fpustate(struct vcpu *vcpu)
static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
static int
-vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
+vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
bool from_idle)
{
+ struct vcpu *vcpu;
int error;
+ vcpu = &vm->vcpu[vcpuid];
vcpu_assert_locked(vcpu);
/*
@@ -993,8 +1019,13 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
* ioctl() operating on a vcpu at any point.
*/
if (from_idle) {
- while (vcpu->state != VCPU_IDLE)
+ while (vcpu->state != VCPU_IDLE) {
+ vcpu->reqidle = 1;
+ vcpu_notify_event_locked(vcpu, false);
+ VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
+ "idle requested", vcpu_state2str(vcpu->state));
msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
+ }
} else {
KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
"vcpu idle state"));
@@ -1031,6 +1062,9 @@ vcpu_set_state_locked(struct vcpu *vcpu, enum vcpu_state newstate,
if (error)
return (EBUSY);
+ VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
+ vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
+
vcpu->state = newstate;
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
@@ -1053,11 +1087,11 @@ vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
}
static void
-vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
+vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
{
int error;
- if ((error = vcpu_set_state_locked(vcpu, newstate, false)) != 0)
+ if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)
panic("Error %d setting state to %d", error, newstate);
}
@@ -1145,7 +1179,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
* vcpu returned from VMRUN() and before it acquired the
* vcpu lock above.
*/
- if (vm->rendezvous_func != NULL || vm->suspend)
+ if (vm->rendezvous_func != NULL || vm->suspend || vcpu->reqidle)
break;
if (vm_nmi_pending(vm, vcpuid))
break;
@@ -1182,13 +1216,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
}
t = ticks;
- vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
/*
* XXX msleep_spin() cannot be interrupted by signals so
* wake up periodically to check pending signals.
*/
msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
- vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
@@ -1350,9 +1384,9 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
if (vm->rendezvous_func == NULL) {
VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
- vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
- vcpu_require_state_locked(vcpu, VCPU_FROZEN);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
} else {
VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
vcpu_unlock(vcpu);
@@ -1375,6 +1409,19 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
return (0);
}
+static int
+vm_handle_reqidle(struct vm *vm, int vcpuid, bool *retu)
+{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
+ vcpu->reqidle = 0;
+ vcpu_unlock(vcpu);
+ *retu = true;
+ return (0);
+}
+
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
@@ -1432,6 +1479,18 @@ vm_exit_rendezvous(struct vm *vm, int vcpuid, uint64_t rip)
}
void
+vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
+{
+ struct vm_exit *vmexit;
+
+ vmexit = vm_exitinfo(vm, vcpuid);
+ vmexit->rip = rip;
+ vmexit->inst_length = 0;
+ vmexit->exitcode = VM_EXITCODE_REQIDLE;
+ vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
+}
+
+void
vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
@@ -1446,6 +1505,7 @@ vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
int
vm_run(struct vm *vm, struct vm_run *vmrun)
{
+ struct vm_eventinfo evinfo;
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
@@ -1453,7 +1513,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
- void *rptr, *sptr;
vcpuid = vmrun->cpuid;
@@ -1466,11 +1525,12 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
return (EINVAL);
- rptr = &vm->rendezvous_func;
- sptr = &vm->suspend;
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ evinfo.rptr = &vm->rendezvous_func;
+ evinfo.sptr = &vm->suspend;
+ evinfo.iptr = &vcpu->reqidle;
restart:
critical_enter();
@@ -1485,7 +1545,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@@ -1498,6 +1558,9 @@ restart:
retu = false;
vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
+ case VM_EXITCODE_REQIDLE:
+ error = vm_handle_reqidle(vm, vcpuid, &retu);
+ break;
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid, &retu);
break;
@@ -1536,6 +1599,8 @@ restart:
if (error == 0 && retu == false)
goto restart;
+ VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
+
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
return (error);
@@ -2072,7 +2137,7 @@ vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
- error = vcpu_set_state_locked(vcpu, newstate, from_idle);
+ error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
vcpu_unlock(vcpu);
return (error);
@@ -2168,15 +2233,11 @@ vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
* - If the vcpu is running on a different host_cpu then an IPI will be directed
* to the host_cpu to cause the vcpu to trap into the hypervisor.
*/
-void
-vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+static void
+vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr)
{
int hostcpu;
- struct vcpu *vcpu;
-
- vcpu = &vm->vcpu[vcpuid];
- vcpu_lock(vcpu);
hostcpu = vcpu->hostcpu;
if (vcpu->state == VCPU_RUNNING) {
KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
@@ -2201,6 +2262,15 @@ vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
if (vcpu->state == VCPU_SLEEPING)
wakeup_one(vcpu);
}
+}
+
+void
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
+{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu_notify_event_locked(vcpu, lapic_intr);
vcpu_unlock(vcpu);
}
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 9b5713d..758b7e8 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -2342,7 +2342,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
* instruction
*/
if (vie->base_register == VM_REG_GUEST_RIP)
- base += vie->num_valid;
+ base += vie->num_processed;
}
idx = 0;
diff --git a/sys/amd64/vmm/vmm_stat.c b/sys/amd64/vmm/vmm_stat.c
index 4ae5fb9..7e2f64d 100644
--- a/sys/amd64/vmm/vmm_stat.c
+++ b/sys/amd64/vmm/vmm_stat.c
@@ -164,6 +164,7 @@ VMM_STAT(VMEXIT_NESTED_FAULT, "vm exits due to nested page fault");
VMM_STAT(VMEXIT_INST_EMUL, "vm exits for instruction emulation");
VMM_STAT(VMEXIT_UNKNOWN, "number of vm exits for unknown reason");
VMM_STAT(VMEXIT_ASTPENDING, "number of times astpending at exit");
+VMM_STAT(VMEXIT_REQIDLE, "number of times idle requested at exit");
VMM_STAT(VMEXIT_USERSPACE, "number of vm exits handled in userspace");
VMM_STAT(VMEXIT_RENDEZVOUS, "number of times rendezvous pending at exit");
VMM_STAT(VMEXIT_EXCEPTION, "number of vm exits due to exceptions");
diff --git a/sys/amd64/vmm/vmm_stat.h b/sys/amd64/vmm/vmm_stat.h
index 1640ba3..c695840 100644
--- a/sys/amd64/vmm/vmm_stat.h
+++ b/sys/amd64/vmm/vmm_stat.h
@@ -157,4 +157,5 @@ VMM_STAT_DECLARE(VMEXIT_ASTPENDING);
VMM_STAT_DECLARE(VMEXIT_USERSPACE);
VMM_STAT_DECLARE(VMEXIT_RENDEZVOUS);
VMM_STAT_DECLARE(VMEXIT_EXCEPTION);
+VMM_STAT_DECLARE(VMEXIT_REQIDLE);
#endif
OpenPOWER on IntegriCloud