summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2014-07-21 19:08:02 +0000
committerjhb <jhb@FreeBSD.org>2014-07-21 19:08:02 +0000
commite6b48465b7c368666e10a2bd8f4f500483497b24 (patch)
tree7e529f938e1d777df1f54045191fa3003056dbda /sys/amd64/vmm
parentb164bf591711a4c455ca47e1f58b0bb91e5e904c (diff)
downloadFreeBSD-src-e6b48465b7c368666e10a2bd8f4f500483497b24.zip
FreeBSD-src-e6b48465b7c368666e10a2bd8f4f500483497b24.tar.gz
MFC 264353,264509,264768,264770,264825,264846,264988,265114,265165,265365,
265941,265951,266390,266550,266910: Various bhyve fixes: - Don't save host's return address in 'struct vmxctx'. - Permit non-32-bit accesses to local APIC registers. - Factor out common ioport handler code. - Use calloc() in favor of malloc + memset. - Change the vlapic timer frequency to be in the ballpark of contemporary hardware. - Allow the guest to read the TSC via MSR 0x10. - A VMCS is always inactive when it exits the vmx_run() loop. Remove redundant code and the misleading comment that suggest otherwise. - Ignore writes to microcode update MSR. This MSR is accessed by RHEL7 guest. Add KTR tracepoints to annotate wrmsr and rdmsr VM exits. - Provide an alias for the userboot console and name it 'comconsole'. - Use EV_ADD to create an mevent and EV_ENABLE to enable it. - abort(3) the process in response to a VMEXIT_ABORT. - Don't include the guest memory segments in the bhyve(8) process core dump. - Make the vmx asm code dtrace-fbt-friendly. - Allow vmx_getdesc() and vmx_setdesc() to be called for a vcpu that is in the VCPU_RUNNING state. - Enable VMX in the IA32_FEATURE_CONTROL MSR if it not enabled and the MSR isn't locked.
Diffstat (limited to 'sys/amd64/vmm')
-rw-r--r--sys/amd64/vmm/intel/vmcs.c16
-rw-r--r--sys/amd64/vmm/intel/vmcs.h4
-rw-r--r--sys/amd64/vmm/intel/vmx.c45
-rw-r--r--sys/amd64/vmm/intel/vmx.h6
-rw-r--r--sys/amd64/vmm/intel/vmx_genassym.c1
-rw-r--r--sys/amd64/vmm/intel/vmx_support.S35
-rw-r--r--sys/amd64/vmm/io/vlapic.c7
-rw-r--r--sys/amd64/vmm/vmm_ioport.c34
-rw-r--r--sys/amd64/vmm/vmm_lapic.c8
9 files changed, 92 insertions, 64 deletions
diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c
index 1ddefe0..cc97d95 100644
--- a/sys/amd64/vmm/intel/vmcs.c
+++ b/sys/amd64/vmm/intel/vmcs.c
@@ -231,7 +231,7 @@ vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
}
int
-vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
+vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
@@ -240,7 +240,8 @@ vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
if (error != 0)
panic("vmcs_setdesc: invalid segment register %d", seg);
- VMPTRLD(vmcs);
+ if (!running)
+ VMPTRLD(vmcs);
if ((error = vmwrite(base, desc->base)) != 0)
goto done;
@@ -252,12 +253,13 @@ vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
goto done;
}
done:
- VMCLEAR(vmcs);
+ if (!running)
+ VMCLEAR(vmcs);
return (error);
}
int
-vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
+vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
@@ -267,7 +269,8 @@ vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
if (error != 0)
panic("vmcs_getdesc: invalid segment register %d", seg);
- VMPTRLD(vmcs);
+ if (!running)
+ VMPTRLD(vmcs);
if ((error = vmread(base, &u64)) != 0)
goto done;
desc->base = u64;
@@ -282,7 +285,8 @@ vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
desc->access = u64;
}
done:
- VMCLEAR(vmcs);
+ if (!running)
+ VMCLEAR(vmcs);
return (error);
}
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index 9cde999..657d5b0 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -49,9 +49,9 @@ int vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count);
int vmcs_init(struct vmcs *vmcs);
int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *rv);
int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
-int vmcs_getdesc(struct vmcs *vmcs, int ident,
+int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
-int vmcs_setdesc(struct vmcs *vmcs, int ident,
+int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
static __inline uint64_t
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 1c39552..e85e5e4 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -509,6 +509,15 @@ static void
vmx_enable(void *arg __unused)
{
int error;
+ uint64_t feature_control;
+
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
+ (feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
+ wrmsr(MSR_IA32_FEATURE_CONTROL,
+ feature_control | IA32_FEATURE_CONTROL_VMX_EN |
+ IA32_FEATURE_CONTROL_LOCK);
+ }
load_cr4(rcr4() | CR4_VMXE);
@@ -544,7 +553,7 @@ vmx_init(int ipinum)
* are set (bits 0 and 2 respectively).
*/
feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
- if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 0 ||
+ if ((feature_control & IA32_FEATURE_CONTROL_LOCK) == 1 &&
(feature_control & IA32_FEATURE_CONTROL_VMX_EN) == 0) {
printf("vmx_init: VMX operation disabled by BIOS\n");
return (ENXIO);
@@ -863,6 +872,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* MSR_EFER is saved and restored in the guest VMCS area on a
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
+ *
+ * The TSC MSR is exposed read-only. Writes are disallowed as that
+ * will impact the host TSC.
+ * XXX Writes would be implemented with a wrmsr trap, and
+ * then modifying the TSC offset in the VMCS.
*/
if (guest_msr_rw(vmx, MSR_GSBASE) ||
guest_msr_rw(vmx, MSR_FSBASE) ||
@@ -870,7 +884,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_KGSBASE) ||
- guest_msr_rw(vmx, MSR_EFER))
+ guest_msr_rw(vmx, MSR_EFER) ||
+ guest_msr_ro(vmx, MSR_TSC))
panic("vmx_vminit: error setting guest msr access");
/*
@@ -1829,6 +1844,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
retu = false;
ecx = vmxctx->guest_rcx;
+ VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_RDMSR;
@@ -1847,6 +1863,8 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
eax = vmxctx->guest_rax;
ecx = vmxctx->guest_rcx;
edx = vmxctx->guest_rdx;
+ VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
+ ecx, (uint64_t)edx << 32 | eax);
error = emulate_wrmsr(vmx->vm, vcpu, ecx,
(uint64_t)edx << 32 | eax, &retu);
if (error) {
@@ -2257,7 +2275,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
static void
vmx_vmcleanup(void *arg)
{
- int i, error;
+ int i;
struct vmx *vmx = arg;
if (apic_access_virtualization(vmx, 0))
@@ -2266,13 +2284,6 @@ vmx_vmcleanup(void *arg)
for (i = 0; i < VM_MAXCPU; i++)
vpid_free(vmx->state[i].vpid);
- /*
- * XXXSMP we also need to clear the VMCS active on the other vcpus.
- */
- error = vmclear(&vmx->vmcs[0]);
- if (error != 0)
- panic("vmx_vmcleanup: vmclear error %d on vcpu 0", error);
-
free(vmx, M_VMX);
return;
@@ -2430,17 +2441,27 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
static int
vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
{
+ int hostcpu, running;
struct vmx *vmx = arg;
- return (vmcs_getdesc(&vmx->vmcs[vcpu], reg, desc));
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu);
+
+ return (vmcs_getdesc(&vmx->vmcs[vcpu], running, reg, desc));
}
static int
vmx_setdesc(void *arg, int vcpu, int reg, struct seg_desc *desc)
{
+ int hostcpu, running;
struct vmx *vmx = arg;
- return (vmcs_setdesc(&vmx->vmcs[vcpu], reg, desc));
+ running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
+ if (running && hostcpu != curcpu)
+ panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
+
+ return (vmcs_setdesc(&vmx->vmcs[vcpu], running, reg, desc));
}
static int
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 80bfd72..208fcee 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -60,7 +60,6 @@ struct vmxctx {
register_t host_rbp;
register_t host_rsp;
register_t host_rbx;
- register_t host_rip;
/*
* XXX todo debug registers and fpu state
*/
@@ -68,7 +67,7 @@ struct vmxctx {
int inst_fail_status;
/*
- * The pmap needs to be deactivated in vmx_exit_guest()
+ * The pmap needs to be deactivated in vmx_enter_guest()
* so keep a copy of the 'pmap' in each vmxctx.
*/
struct pmap *pmap;
@@ -122,10 +121,11 @@ CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
#define VMX_VMLAUNCH_ERROR 2
#define VMX_INVEPT_ERROR 3
int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
-void vmx_exit_guest(void);
void vmx_call_isr(uintptr_t entry);
u_long vmx_fix_cr0(u_long cr0);
u_long vmx_fix_cr4(u_long cr4);
+extern char vmx_exit_guest[];
+
#endif
diff --git a/sys/amd64/vmm/intel/vmx_genassym.c b/sys/amd64/vmm/intel/vmx_genassym.c
index 5c91fec..e1b98d6 100644
--- a/sys/amd64/vmm/intel/vmx_genassym.c
+++ b/sys/amd64/vmm/intel/vmx_genassym.c
@@ -65,7 +65,6 @@ ASSYM(VMXCTX_HOST_R12, offsetof(struct vmxctx, host_r12));
ASSYM(VMXCTX_HOST_RBP, offsetof(struct vmxctx, host_rbp));
ASSYM(VMXCTX_HOST_RSP, offsetof(struct vmxctx, host_rsp));
ASSYM(VMXCTX_HOST_RBX, offsetof(struct vmxctx, host_rbx));
-ASSYM(VMXCTX_HOST_RIP, offsetof(struct vmxctx, host_rip));
ASSYM(VMXCTX_INST_FAIL_STATUS, offsetof(struct vmxctx, inst_fail_status));
ASSYM(VMXCTX_PMAP, offsetof(struct vmxctx, pmap));
diff --git a/sys/amd64/vmm/intel/vmx_support.S b/sys/amd64/vmm/intel/vmx_support.S
index 9e8cf2d..840b7e0 100644
--- a/sys/amd64/vmm/intel/vmx_support.S
+++ b/sys/amd64/vmm/intel/vmx_support.S
@@ -37,6 +37,10 @@
#define LK
#endif
+/* Be friendly to DTrace FBT's prologue/epilogue pattern matching */
+#define VENTER push %rbp ; mov %rsp,%rbp
+#define VLEAVE pop %rbp
+
/*
* Assumes that %rdi holds a pointer to the 'vmxctx'.
*
@@ -72,8 +76,7 @@
*
* Assumes that %rdi holds a pointer to the 'vmxctx'.
*/
-#define VMX_HOST_SAVE(tmpreg) \
- movq (%rsp), tmpreg; /* return address */ \
+#define VMX_HOST_SAVE \
movq %r15, VMXCTX_HOST_R15(%rdi); \
movq %r14, VMXCTX_HOST_R14(%rdi); \
movq %r13, VMXCTX_HOST_R13(%rdi); \
@@ -81,9 +84,8 @@
movq %rbp, VMXCTX_HOST_RBP(%rdi); \
movq %rsp, VMXCTX_HOST_RSP(%rdi); \
movq %rbx, VMXCTX_HOST_RBX(%rdi); \
- movq tmpreg, VMXCTX_HOST_RIP(%rdi)
-#define VMX_HOST_RESTORE(tmpreg) \
+#define VMX_HOST_RESTORE \
movq VMXCTX_HOST_R15(%rdi), %r15; \
movq VMXCTX_HOST_R14(%rdi), %r14; \
movq VMXCTX_HOST_R13(%rdi), %r13; \
@@ -91,8 +93,6 @@
movq VMXCTX_HOST_RBP(%rdi), %rbp; \
movq VMXCTX_HOST_RSP(%rdi), %rsp; \
movq VMXCTX_HOST_RBX(%rdi), %rbx; \
- movq VMXCTX_HOST_RIP(%rdi), tmpreg; \
- movq tmpreg, (%rsp) /* return address */
/*
* vmx_enter_guest(struct vmxctx *vmxctx, int launched)
@@ -102,10 +102,11 @@
* Interrupts must be disabled on entry.
*/
ENTRY(vmx_enter_guest)
+ VENTER
/*
* Save host state before doing anything else.
*/
- VMX_HOST_SAVE(%r10)
+ VMX_HOST_SAVE
/*
* Activate guest pmap on this cpu.
@@ -186,15 +187,18 @@ inst_error:
movl PCPU(CPUID), %r10d
LK btrl %r10d, PM_ACTIVE(%r11)
- VMX_HOST_RESTORE(%r10)
+ VMX_HOST_RESTORE
+ VLEAVE
ret
-END(vmx_enter_guest)
/*
- * void vmx_exit_guest(void)
- * %rsp points to the struct vmxctx
+ * Non-error VM-exit from the guest. Make this a label so it can
+ * be used by C code when setting up the VMCS.
+ * The VMCS-restored %rsp points to the struct vmxctx
*/
-ENTRY(vmx_exit_guest)
+ ALIGN_TEXT
+ .globl vmx_exit_guest
+vmx_exit_guest:
/*
* Save guest state that is not automatically saved in the vmcs.
*/
@@ -226,15 +230,16 @@ ENTRY(vmx_exit_guest)
movl PCPU(CPUID), %r10d
LK btrl %r10d, PM_ACTIVE(%r11)
- VMX_HOST_RESTORE(%r10)
+ VMX_HOST_RESTORE
/*
* This will return to the caller of 'vmx_enter_guest()' with a return
* value of VMX_GUEST_VMEXIT.
*/
movl $VMX_GUEST_VMEXIT, %eax
+ VLEAVE
ret
-END(vmx_exit_guest)
+END(vmx_enter_guest)
/*
* %rdi = interrupt handler entry point
@@ -243,6 +248,7 @@ END(vmx_exit_guest)
* instruction in Intel SDM, Vol 2.
*/
ENTRY(vmx_call_isr)
+ VENTER
mov %rsp, %r11 /* save %rsp */
and $~0xf, %rsp /* align on 16-byte boundary */
pushq $KERNEL_SS /* %ss */
@@ -251,5 +257,6 @@ ENTRY(vmx_call_isr)
pushq $KERNEL_CS /* %cs */
cli /* disable interrupts */
callq *%rdi /* push %rip and call isr */
+ VLEAVE
ret
END(vmx_call_isr)
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 2e0a575..d93641c 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -70,7 +70,12 @@ __FBSDID("$FreeBSD$");
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx))
#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx))
-#define VLAPIC_BUS_FREQ tsc_freq
+/*
+ * APIC timer frequency:
+ * - arbitrary but chosen to be in the ballpark of contemporary hardware.
+ * - power-of-two to avoid loss of precision when converted to a bintime.
+ */
+#define VLAPIC_BUS_FREQ (128 * 1024 * 1024)
static __inline uint32_t
vlapic_get_id(struct vlapic *vlapic)
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index ed17e40..eae45cc 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -69,18 +69,19 @@ emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit)
if (handler == NULL)
return (-1);
+ switch (vmexit->u.inout.bytes) {
+ case 1:
+ mask = 0xff;
+ break;
+ case 2:
+ mask = 0xffff;
+ break;
+ default:
+ mask = 0xffffffff;
+ break;
+ }
+
if (!vmexit->u.inout.in) {
- switch (vmexit->u.inout.bytes) {
- case 1:
- mask = 0xff;
- break;
- case 2:
- mask = 0xffff;
- break;
- default:
- mask = 0xffffffff;
- break;
- }
val = vmexit->u.inout.eax & mask;
}
@@ -88,17 +89,6 @@ emulate_ioport(struct vm *vm, int vcpuid, struct vm_exit *vmexit)
vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
if (!error && vmexit->u.inout.in) {
- switch (vmexit->u.inout.bytes) {
- case 1:
- mask = 0xff;
- break;
- case 2:
- mask = 0xffff;
- break;
- default:
- mask = 0xffffffff;
- break;
- }
vmexit->u.inout.eax &= ~mask;
vmexit->u.inout.eax |= val & mask;
}
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 640c779..fa9832e 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -230,10 +230,12 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
off = gpa - DEFAULT_APIC_BASE;
/*
- * Memory mapped local apic accesses must be 4 bytes wide and
- * aligned on a 16-byte boundary.
+ * Memory mapped local apic accesses should be aligned on a
+ * 16-byte boundary. They are also suggested to be 4 bytes
+ * wide, alas not all OSes follow suggestions.
*/
- if (size != 4 || off & 0xf)
+ off &= ~3;
+ if (off & 0xf)
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
OpenPOWER on IntegriCloud