26 files changed, 896 insertions, 568 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 58af2a5..0879ba2 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -82,6 +82,7 @@ enum vm_reg_name {
 	VM_REG_GUEST_PDPTE1,
 	VM_REG_GUEST_PDPTE2,
 	VM_REG_GUEST_PDPTE3,
+	VM_REG_GUEST_INTR_SHADOW,
 	VM_REG_LAST
 };
 
@@ -194,7 +195,6 @@ void vm_nmi_clear(struct vm *vm, int vcpuid);
 int vm_inject_extint(struct vm *vm, int vcpu);
 int vm_extint_pending(struct vm *vm, int vcpuid);
 void vm_extint_clear(struct vm *vm, int vcpuid);
-uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
 struct vlapic *vm_lapic(struct vm *vm, int cpu);
 struct vioapic *vm_ioapic(struct vm *vm);
 struct vhpet *vm_hpet(struct vm *vm);
@@ -485,6 +485,8 @@ enum vm_exitcode {
 	VM_EXITCODE_SUSPENDED,
 	VM_EXITCODE_INOUT_STR,
 	VM_EXITCODE_TASK_SWITCH,
+	VM_EXITCODE_MONITOR,
+	VM_EXITCODE_MWAIT,
 	VM_EXITCODE_MAX
 };
 
diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c
index 5f6c4d0..13c9788 100644
--- a/sys/amd64/vmm/intel/ept.c
+++ b/sys/amd64/vmm/intel/ept.c
@@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
 
 #include "vmx_cpufunc.h"
 #include "vmm_ipi.h"
-#include "vmx_msr.h"
 #include "ept.h"
 
 #define	EPT_SUPPORTS_EXEC_ONLY(cap)	((cap) & (1UL << 0))
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index 4e9557c..6122de5 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -54,6 +54,10 @@ int	vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
 int	vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
 		     struct seg_desc *desc);
 
+/*
+ * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
+ */
+#ifdef _VMX_CPUFUNC_H_
 static __inline uint64_t
 vmcs_read(uint32_t encoding)
 {
@@ -73,6 +77,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
 	error = vmwrite(encoding, val);
 	KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error));
 }
+#endif	/* _VMX_CPUFUNC_H_ */
 
 #define	vmexit_instruction_length()	vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
 #define	vmcs_guest_rip()		vmcs_read(VMCS_GUEST_RIP)
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index b2c5702..2fe5a27 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -52,20 +52,20 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmm.h>
 #include <machine/vmm_dev.h>
 #include <machine/vmm_instruction_emul.h>
+#include "vmm_lapic.h"
 #include "vmm_host.h"
 #include "vmm_ioport.h"
 #include "vmm_ipi.h"
-#include "vmm_msr.h"
 #include "vmm_ktr.h"
 #include "vmm_stat.h"
 #include "vatpic.h"
 #include "vlapic.h"
 #include "vlapic_priv.h"
 
-#include "vmx_msr.h"
 #include "ept.h"
 #include "vmx_cpufunc.h"
 #include "vmx.h"
+#include "vmx_msr.h"
 #include "x86.h"
 #include "vmx_controls.h"
 
@@ -81,6 +81,8 @@ __FBSDID("$FreeBSD$");
 
 #define	PROCBASED_CTLS_ONE_SETTING 					\
 	(PROCBASED_SECONDARY_CONTROLS	|				\
+	 PROCBASED_MWAIT_EXITING	|				\
+	 PROCBASED_MONITOR_EXITING	|				\
 	 PROCBASED_IO_EXITING		|				\
 	 PROCBASED_MSR_BITMAPS		|				\
 	 PROCBASED_CTLS_WINDOW_SETTING	|				\
@@ -94,34 +96,23 @@ __FBSDID("$FreeBSD$");
 #define	PROCBASED_CTLS2_ONE_SETTING	PROCBASED2_ENABLE_EPT
 #define	PROCBASED_CTLS2_ZERO_SETTING	0
 
-#define VM_EXIT_CTLS_ONE_SETTING_NO_PAT					\
+#define	VM_EXIT_CTLS_ONE_SETTING					\
 	(VM_EXIT_HOST_LMA			|			\
 	VM_EXIT_SAVE_EFER			|			\
-	VM_EXIT_LOAD_EFER)
-
-#define	VM_EXIT_CTLS_ONE_SETTING					\
-	(VM_EXIT_CTLS_ONE_SETTING_NO_PAT       	|			\
+	VM_EXIT_LOAD_EFER			|			\
 	VM_EXIT_ACKNOWLEDGE_INTERRUPT		|			\
 	VM_EXIT_SAVE_PAT			|			\
 	VM_EXIT_LOAD_PAT)
+
 #define	VM_EXIT_CTLS_ZERO_SETTING	VM_EXIT_SAVE_DEBUG_CONTROLS
 
-#define	VM_ENTRY_CTLS_ONE_SETTING_NO_PAT	VM_ENTRY_LOAD_EFER
+#define	VM_ENTRY_CTLS_ONE_SETTING	(VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
 
-#define	VM_ENTRY_CTLS_ONE_SETTING					\
-	(VM_ENTRY_CTLS_ONE_SETTING_NO_PAT     	|			\
-	VM_ENTRY_LOAD_PAT)
 #define	VM_ENTRY_CTLS_ZERO_SETTING					\
 	(VM_ENTRY_LOAD_DEBUG_CONTROLS		|			\
 	VM_ENTRY_INTO_SMM			|			\
 	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
 
-#define	guest_msr_rw(vmx, msr) \
-	msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
-
-#define	guest_msr_ro(vmx, msr) \
-    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
-
 #define	HANDLED		1
 #define	UNHANDLED	0
 
@@ -158,10 +149,6 @@ SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
  */
 static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW, NULL, NULL);
 
-static int vmx_patmsr;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, patmsr, CTLFLAG_RD, &vmx_patmsr, 0,
-    "PAT MSR saved and restored in VCMS");
-
 static int cap_halt_exit;
 SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0,
     "HLT triggers a VM-exit");
@@ -208,6 +195,7 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
 
 static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
 static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
+static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
 static void vmx_inject_pir(struct vlapic *vlapic);
 
 #ifdef KTR
@@ -475,22 +463,6 @@ vpid_init(void)
 }
 
 static void
-msr_save_area_init(struct msr_entry *g_area, int *g_count)
-{
-	int cnt;
-
-	static struct msr_entry guest_msrs[] = {
-		{ MSR_KGSBASE, 0, 0 },
-	};
-
-	cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]);
-	if (cnt > GUEST_MSR_MAX_ENTRIES)
-		panic("guest msr save area overrun");
-	bcopy(guest_msrs, g_area, sizeof(guest_msrs));
-	*g_count = cnt;
-}
-
-static void
 vmx_disable(void *arg __unused)
 {
 	struct invvpid_desc invvpid_desc = { 0 };
@@ -636,49 +608,24 @@ vmx_init(int ipinum)
 	}
 
 	/* Check support for VM-exit controls */
-	vmx_patmsr = 1;
 	error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
 			       VM_EXIT_CTLS_ONE_SETTING,
 			       VM_EXIT_CTLS_ZERO_SETTING,
 			       &exit_ctls);
 	if (error) {
-		/* Try again without the PAT MSR bits */
-		error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS,
-				       MSR_VMX_TRUE_EXIT_CTLS,
-				       VM_EXIT_CTLS_ONE_SETTING_NO_PAT,
-				       VM_EXIT_CTLS_ZERO_SETTING,
-				       &exit_ctls);
-		if (error) {
-			printf("vmx_init: processor does not support desired "
-			       "exit controls\n");
-			return (error);
-		} else {
-			if (bootverbose)
-				printf("vmm: PAT MSR access not supported\n");
-			guest_msr_valid(MSR_PAT);
-			vmx_patmsr = 0;
-		}
+		printf("vmx_init: processor does not support desired "
+		    "exit controls\n");
+		return (error);
 	}
 
 	/* Check support for VM-entry controls */
-	if (vmx_patmsr) {
-		error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
-				       MSR_VMX_TRUE_ENTRY_CTLS,
-				       VM_ENTRY_CTLS_ONE_SETTING,
-				       VM_ENTRY_CTLS_ZERO_SETTING,
-				       &entry_ctls);
-	} else {
-		error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
-				       MSR_VMX_TRUE_ENTRY_CTLS,
-				       VM_ENTRY_CTLS_ONE_SETTING_NO_PAT,
-				       VM_ENTRY_CTLS_ZERO_SETTING,
-				       &entry_ctls);
-	}
-
+	error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS,
+	    VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING,
+	    &entry_ctls);
 	if (error) {
 		printf("vmx_init: processor does not support desired "
-		       "entry controls\n");
-		       return (error);
+		    "entry controls\n");
+		return (error);
 	}
 
 	/*
@@ -800,6 +747,8 @@ vmx_init(int ipinum)
 
 	vpid_init();
 
+	vmx_msr_init();
+
 	/* enable VMX operation */
 	smp_rendezvous(NULL, vmx_enable, NULL, NULL);
 
@@ -869,7 +818,7 @@ static void *
 vmx_vminit(struct vm *vm, pmap_t pmap)
 {
 	uint16_t vpid[VM_MAXCPU];
-	int i, error, guest_msr_count;
+	int i, error;
 	struct vmx *vmx;
 	struct vmcs *vmcs;
 
@@ -905,16 +854,14 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 	 * how they are saved/restored so can be directly accessed by the
 	 * guest.
 	 *
-	 * Guest KGSBASE is saved and restored in the guest MSR save area.
-	 * Host KGSBASE is restored before returning to userland from the pcb.
-	 * There will be a window of time when we are executing in the host
-	 * kernel context with a value of KGSBASE from the guest. This is ok
-	 * because the value of KGSBASE is inconsequential in kernel context.
-	 *
 	 * MSR_EFER is saved and restored in the guest VMCS area on a
 	 * VM exit and entry respectively. It is also restored from the
 	 * host VMCS area on a VM exit.
 	 *
+	 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
+	 * and entry respectively. It is also restored from the host VMCS
+	 * area on a VM exit.
+	 *
 	 * The TSC MSR is exposed read-only. Writes are disallowed as that
 	 * will impact the host TSC.
 	 * XXX Writes would be implemented with a wrmsr trap, and
@@ -925,21 +872,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 	    guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
 	    guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
 	    guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
-	    guest_msr_rw(vmx, MSR_KGSBASE) ||
 	    guest_msr_rw(vmx, MSR_EFER) ||
+	    guest_msr_rw(vmx, MSR_PAT) ||
 	    guest_msr_ro(vmx, MSR_TSC))
 		panic("vmx_vminit: error setting guest msr access");
 
-	/*
-	 * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
-	 * and entry respectively. It is also restored from the host VMCS
-	 * area on a VM exit. However, if running on a system with no
-	 * MSR_PAT save/restore support, leave access disabled so accesses
-	 * will be trapped.
-	 */
-	if (vmx_patmsr && guest_msr_rw(vmx, MSR_PAT))
-		panic("vmx_vminit: error setting guest pat msr access");
-
 	vpid_alloc(vpid, VM_MAXCPU);
 
 	if (virtual_interrupt_delivery) {
@@ -958,6 +895,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 			      error, i);
 		}
 
+		vmx_msr_guest_init(vmx, i);
+
 		error = vmcs_init(vmcs);
 		KASSERT(error == 0, ("vmcs_init error %d", error));
 
@@ -996,13 +935,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
 		vmx->state[i].lastcpu = NOCPU;
 		vmx->state[i].vpid = vpid[i];
 
-		msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
-
-		error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]),
-		    guest_msr_count);
-		if (error != 0)
-			panic("vmcs_set_msr_save error %d", error);
-
 		/*
 		 * Set up the CR0/4 shadows, and init the read shadow
 		 * to the power-on register value from the Intel Sys Arch.
@@ -2078,6 +2010,46 @@ vmx_task_switch_reason(uint64_t qual)
 }
 
 static int
+emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+	int error;
+
+	if (lapic_msr(num))
+		error = lapic_wrmsr(vmx->vm, vcpuid, num, val, retu);
+	else
+		error = vmx_wrmsr(vmx, vcpuid, num, val, retu);
+
+	return (error);
+}
+
+static int
+emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
+{
+	struct vmxctx *vmxctx;
+	uint64_t result;
+	uint32_t eax, edx;
+	int error;
+
+	if (lapic_msr(num))
+		error = lapic_rdmsr(vmx->vm, vcpuid, num, &result, retu);
+	else
+		error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
+
+	if (error == 0) {
+		eax = result;
+		vmxctx = &vmx->ctx[vcpuid];
+		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
+		KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
+
+		edx = result >> 32;
+		error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
+		KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
+	}
+
+	return (error);
+}
+
+static int
 vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 {
 	int error, handled, in;
@@ -2215,7 +2187,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		retu = false;
 		ecx = vmxctx->guest_rcx;
 		VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
-		error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
+		error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_RDMSR;
 			vmexit->u.msr.code = ecx;
@@ -2224,7 +2196,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		} else {
 			/* Return to userspace with a valid exitcode */
 			KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
-			    ("emulate_wrmsr retu with bogus exitcode"));
+			    ("emulate_rdmsr retu with bogus exitcode"));
 		}
 		break;
 	case EXIT_REASON_WRMSR:
@@ -2235,7 +2207,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 		edx = vmxctx->guest_rdx;
 		VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
 		    ecx, (uint64_t)edx << 32 | eax);
-		error = emulate_wrmsr(vmx->vm, vcpu, ecx,
+		error = emulate_wrmsr(vmx, vcpu, ecx,
 		    (uint64_t)edx << 32 | eax, &retu);
 		if (error) {
 			vmexit->exitcode = VM_EXITCODE_WRMSR;
@@ -2403,6 +2375,12 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
 	case EXIT_REASON_XSETBV:
 		handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
 		break;
+	case EXIT_REASON_MONITOR:
+		vmexit->exitcode = VM_EXITCODE_MONITOR;
+		break;
+	case EXIT_REASON_MWAIT:
+		vmexit->exitcode = VM_EXITCODE_MWAIT;
+		break;
 	default:
 		vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
 		break;
@@ -2523,6 +2501,8 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
 	KASSERT(vmxctx->pmap == pmap,
 	    ("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
 
+	vmx_msr_guest_enter(vmx, vcpu);
+
 	VMPTRLD(vmcs);
 
 	/*
@@ -2624,6 +2604,8 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
 	    vmexit->exitcode);
 
 	VMCLEAR(vmcs);
+	vmx_msr_guest_exit(vmx, vcpu);
+
 	return (0);
 }
 
@@ -2712,6 +2694,46 @@ vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
 }
 
 static int
+vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
+{
+	uint64_t gi;
+	int error;
+
+	error = vmcs_getreg(&vmx->vmcs[vcpu], running, 
+	    VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
+	*retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+	return (error);
+}
+
+static int
+vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
+{
+	struct vmcs *vmcs;
+	uint64_t gi;
+	int error, ident;
+
+	/*
+	 * Forcing the vcpu into an interrupt shadow is not supported.
+	 */
+	if (val) {
+		error = EINVAL;
+		goto done;
+	}
+
+	vmcs = &vmx->vmcs[vcpu];
+	ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
+	error = vmcs_getreg(vmcs, running, ident, &gi);
+	if (error == 0) {
+		gi &= ~HWINTR_BLOCKING;
+		error = vmcs_setreg(vmcs, running, ident, gi);
+	}
+done:
+	VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
+	    error ? "failed" : "succeeded");
+	return (error);
+}
+
+static int
 vmx_shadow_reg(int reg)
 {
 	int shreg;
@@ -2742,6 +2764,9 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
 	if (running && hostcpu != curcpu)
 		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
+	if (reg == VM_REG_GUEST_INTR_SHADOW)
+		return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
+
 	if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
 		return (0);
 
@@ -2760,6 +2785,9 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
 	if (running && hostcpu != curcpu)
 		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
+	if (reg == VM_REG_GUEST_INTR_SHADOW)
+		return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
+
 	if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
 		return (0);
 
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 208fcee..2124554 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -33,8 +33,6 @@
 
 struct pmap;
 
-#define	GUEST_MSR_MAX_ENTRIES	64		/* arbitrary */
-
 struct vmxctx {
 	register_t	guest_rdi;		/* Guest state */
 	register_t	guest_rsi;
@@ -97,13 +95,23 @@ struct pir_desc {
 } __aligned(64);
 CTASSERT(sizeof(struct pir_desc) == 64);
 
+/* Index into the 'guest_msrs[]' array */
+enum {
+	IDX_MSR_LSTAR,
+	IDX_MSR_CSTAR,
+	IDX_MSR_STAR,
+	IDX_MSR_SF_MASK,
+	IDX_MSR_KGSBASE,
+	GUEST_MSR_NUM		/* must be the last enumeration */
+};
+
 /* virtual machine softc */
 struct vmx {
 	struct vmcs	vmcs[VM_MAXCPU];	/* one vmcs per virtual cpu */
 	struct apic_page apic_page[VM_MAXCPU];	/* one apic page per vcpu */
 	char		msr_bitmap[PAGE_SIZE];
 	struct pir_desc	pir_desc[VM_MAXCPU];
-	struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
+	uint64_t	guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
 	struct vmxctx	ctx[VM_MAXCPU];
 	struct vmxcap	cap[VM_MAXCPU];
 	struct vmxstate	state[VM_MAXCPU];
@@ -113,7 +121,6 @@ struct vmx {
 };
 CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
 CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
-CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
 CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
 
 #define	VMX_GUEST_VMEXIT	0
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index a3428db..746ca73 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -31,10 +31,15 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/cpuset.h>
 
+#include <machine/clock.h>
 #include <machine/cpufunc.h>
+#include <machine/md_var.h>
 #include <machine/specialreg.h>
+#include <machine/vmm.h>
 
+#include "vmx.h"
 #include "vmx_msr.h"
 
 static boolean_t
@@ -171,3 +176,213 @@ msr_bitmap_change_access(char *bitmap, u_int msr, int access)
 
 	return (0);
 }
+
+static uint64_t misc_enable;
+static uint64_t platform_info;
+static uint64_t turbo_ratio_limit;
+static uint64_t host_msrs[GUEST_MSR_NUM];
+
+static bool
+nehalem_cpu(void)
+{
+	u_int family, model;
+
+	/*
+	 * The family:model numbers belonging to the Nehalem microarchitecture
+	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
+	 */
+	family = CPUID_TO_FAMILY(cpu_id);
+	model = CPUID_TO_MODEL(cpu_id);
+	if (family == 0x6) {
+		switch (model) {
+		case 0x1A:
+		case 0x1E:
+		case 0x1F:
+		case 0x2E:
+			return (true);
+		default:
+			break;
+		}
+	}
+	return (false);
+}
+
+static bool
+westmere_cpu(void)
+{
+	u_int family, model;
+
+	/*
+	 * The family:model numbers belonging to the Westmere microarchitecture
+	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
+	 */
+	family = CPUID_TO_FAMILY(cpu_id);
+	model = CPUID_TO_MODEL(cpu_id);
+	if (family == 0x6) {
+		switch (model) {
+		case 0x25:
+		case 0x2C:
+			return (true);
+		default:
+			break;
+		}
+	}
+	return (false);
+}
+
+void
+vmx_msr_init(void)
+{
+	uint64_t bus_freq, ratio;
+	int i;
+
+	/*
+	 * It is safe to cache the values of the following MSRs because
+	 * they don't change based on curcpu, curproc or curthread.
+	 */
+	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+
+	/*
+	 * Initialize emulated MSRs
+	 */
+	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
+	/*
+	 * Set mandatory bits
+	 *  11:   branch trace disabled
+	 *  12:   PEBS unavailable
+	 * Clear unsupported features
+	 *  16:   SpeedStep enable
+	 *  18:   enable MONITOR FSM
+	 */
+	misc_enable |= (1 << 12) | (1 << 11);
+	misc_enable &= ~((1 << 18) | (1 << 16));
+
+	if (nehalem_cpu() || westmere_cpu())
+		bus_freq = 133330000;		/* 133Mhz */
+	else
+		bus_freq = 100000000;		/* 100Mhz */
+
+	/*
+	 * XXXtime
+	 * The ratio should really be based on the virtual TSC frequency as
+	 * opposed to the host TSC.
+	 */
+	ratio = (tsc_freq / bus_freq) & 0xff;
+
+	/*
+	 * The register definition is based on the micro-architecture
+	 * but the following bits are always the same:
+	 * [15:8]  Maximum Non-Turbo Ratio
+	 * [28]    Programmable Ratio Limit for Turbo Mode
+	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
+	 * [47:40] Maximum Efficiency Ratio
+	 *
+	 * The other bits can be safely set to 0 on all
+	 * micro-architectures up to Haswell.
+	 */
+	platform_info = (ratio << 8) | (ratio << 40);
+
+	/*
+	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
+	 * dependent on the maximum cores per package supported by the micro-
+	 * architecture. For e.g., Westmere supports 6 cores per package and
+	 * uses the low 48 bits. Sandybridge support 8 cores per package and
+	 * uses up all 64 bits.
+	 *
+	 * However, the unused bits are reserved so we pretend that all bits
+	 * in this MSR are valid.
+	 */
+	for (i = 0; i < 8; i++)
+		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
+}
+
+void
+vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
+{
+	/*
+	 * The permissions bitmap is shared between all vcpus so initialize it
+	 * once when initializing the vBSP.
+	 */
+	if (vcpuid == 0) {
+		guest_msr_rw(vmx, MSR_LSTAR);
+		guest_msr_rw(vmx, MSR_CSTAR);
+		guest_msr_rw(vmx, MSR_STAR);
+		guest_msr_rw(vmx, MSR_SF_MASK);
+		guest_msr_rw(vmx, MSR_KGSBASE);
+	}
+	return;
+}
+
+void
+vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
+{
+	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+	/* Save host MSRs (if any) and restore guest MSRs */
+	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
+	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
+	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
+	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
+	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
+}
+
+void
+vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
+{
+	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+	/* Save guest MSRs */
+	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
+
+	/* Restore host MSRs */
+	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
+	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
+	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
+	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
+
+	/* MSR_KGSBASE will be restored on the way back to userspace */
+}
+
+int
+vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
+{
+	int error = 0;
+
+	switch (num) {
+	case MSR_IA32_MISC_ENABLE:
+		*val = misc_enable;
+		break;
+	case MSR_PLATFORM_INFO:
+		*val = platform_info;
+		break;
+	case MSR_TURBO_RATIO_LIMIT:
+	case MSR_TURBO_RATIO_LIMIT1:
+		*val = turbo_ratio_limit;
+		break;
+	default:
+		error = EINVAL;
+		break;
+	}
+	return (error);
+}
+
+int
+vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+	int error = 0;
+
+	switch (num) {
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	return (error);
+}
diff --git a/sys/amd64/vmm/intel/vmx_msr.h b/sys/amd64/vmm/intel/vmx_msr.h
index 340b0f7..e77881c 100644
--- a/sys/amd64/vmm/intel/vmx_msr.h
+++ b/sys/amd64/vmm/intel/vmx_msr.h
@@ -29,6 +29,15 @@
 #ifndef _VMX_MSR_H_
 #define	_VMX_MSR_H_
 
+struct vmx;
+
+void vmx_msr_init(void);
+void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
+int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
+
 uint32_t vmx_revision(void);
 
 int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
@@ -52,4 +61,10 @@ int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
 void	msr_bitmap_initialize(char *bitmap);
 int	msr_bitmap_change_access(char *bitmap, u_int msr, int access);
 
+#define	guest_msr_rw(vmx, msr) \
+    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
+
+#define	guest_msr_ro(vmx, msr) \
+    msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
+
 #endif
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index 15620d5..b710a84 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -500,13 +500,19 @@ vatpic_pending_intr(struct vm *vm, int *vecptr)
 	VATPIC_LOCK(vatpic);
 
 	pin = vatpic_get_highest_irrpin(atpic);
-	if (pin == -1)
-		pin = 7;
 	if (pin == 2) {
 		atpic = &vatpic->atpic[1];
 		pin = vatpic_get_highest_irrpin(atpic);
 	}
 
+	/*
+	 * If there are no pins active at this moment then return the spurious
+	 * interrupt vector instead.
+	 */
+	if (pin == -1)
+		pin = 7;
+
+	KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
 	*vecptr = atpic->irq_base + pin;
 
 	VATPIC_UNLOCK(vatpic);
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 3c93463..d684dba 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -633,6 +633,7 @@ vlapic_fire_timer(struct vlapic *vlapic)
 	// The timer LVT always uses the fixed delivery mode.
 	lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
 	if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+		VLAPIC_CTR0(vlapic, "vlapic timer fired");
 		vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
 	}
 }
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index fa0200e..ddf875b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -74,7 +74,6 @@ __FBSDID("$FreeBSD$");
 #include "vhpet.h"
 #include "vioapic.h"
 #include "vlapic.h"
-#include "vmm_msr.h"
 #include "vmm_ipi.h"
 #include "vmm_stat.h"
 #include "vmm_lapic.h"
@@ -105,7 +104,6 @@ struct vcpu {
 	struct savefpu	*guestfpu;	/* (a,i) guest fpu state */
 	uint64_t	guest_xcr0;	/* (i) guest %xcr0 register */
 	void		*stats;		/* (a,i) statistics */
-	uint64_t guest_msrs[VMM_MSR_NUM]; /* (i) emulated MSRs */
 	struct vm_exit	exitinfo;	/* (x) exit reason and collateral */
 };
 
@@ -188,7 +186,6 @@ static struct vmm_ops *ops;
 #define	fpu_stop_emulating()	clts()
 
 static MALLOC_DEFINE(M_VM, "vm", "vm");
-CTASSERT(VMM_MSR_NUM <= 64);	/* msr_mask can keep track of up to 64 msrs */
 
 /* statistics */
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
@@ -250,7 +247,6 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
 	vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 	fpu_save_area_reset(vcpu->guestfpu);
 	vmm_stat_init(vcpu->stats);
-	guest_msrs_init(vm, vcpu_id);
 }
 
 struct vm_exit *
@@ -294,7 +290,6 @@ vmm_init(void)
 	else
 		return (ENXIO);
 
-	vmm_msr_init();
 	vmm_resume_p = vmm_resume;
 
 	return (VMM_INIT(vmm_ipinum));
@@ -1091,7 +1086,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vcpu *vcpu;
 	const char *wmesg;
-	int t, vcpu_halted, vm_halted;
+	int error, t, vcpu_halted, vm_halted;
 
 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
 
@@ -1099,6 +1094,22 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 	vcpu_halted = 0;
 	vm_halted = 0;
 
+	/*
+	 * The typical way to halt a cpu is to execute: "sti; hlt"
+	 *
+	 * STI sets RFLAGS.IF to enable interrupts. However, the processor
+	 * remains in an "interrupt shadow" for an additional instruction
+	 * following the STI. This guarantees that "sti; hlt" sequence is
+	 * atomic and a pending interrupt will be recognized after the HLT.
+	 *
+	 * After the HLT emulation is done the vcpu is no longer in an
+	 * interrupt shadow and a pending interrupt can be injected on
+	 * the next entry into the guest.
+	 */
+	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+	    __func__, error));
+
 	vcpu_lock(vcpu);
 	while (1) {
 		/*
@@ -1187,8 +1198,12 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
 	if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
 		rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
 		    vme->u.paging.gpa, ftype);
-		if (rv == 0)
+		if (rv == 0) {
+			VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
+			    ftype == VM_PROT_READ ? "accessed" : "dirty",
+			    vme->u.paging.gpa);
 			goto done;
+		}
 	}
 
 	map = &vm->vmspace->vm_map;
@@ -1229,6 +1244,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
 	paging = &vme->u.inst_emul.paging;
 	cpu_mode = paging->cpu_mode;
 
+	VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
+
 	vie_init(vie);
 
 	/* Fetch, decode and emulate the faulting instruction */
@@ -1425,7 +1442,6 @@ restart:
 	pcb = PCPU_GET(curpcb);
 	set_pcb_flags(pcb, PCB_FULL_IRET);
 
-	restore_guest_msrs(vm, vcpuid);	
 	restore_guest_fpustate(vcpu);
 
 	vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
@@ -1433,7 +1449,6 @@ restart:
 	vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
 
 	save_guest_fpustate(vcpu);
-	restore_host_msrs(vm, vcpuid);
 
 	vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
 
@@ -1467,6 +1482,10 @@ restart:
 		case VM_EXITCODE_INOUT_STR:
 			error = vm_handle_inout(vm, vcpuid, vme, &retu);
 			break;
+		case VM_EXITCODE_MONITOR:
+		case VM_EXITCODE_MWAIT:
+			vm_inject_ud(vm, vcpuid);
+			break;
 		default:
 			retu = true;	/* handled in userland */
 			break;
@@ -1875,12 +1894,6 @@ vm_set_capability(struct vm *vm, int vcpu, int type, int val)
 	return (VMSETCAP(vm->cookie, vcpu, type, val));
 }
 
-uint64_t *
-vm_guest_msrs(struct vm *vm, int cpu)
-{
-	return (vm->vcpu[cpu].guest_msrs);
-}
-
 struct vlapic *
 vm_lapic(struct vm *vm, int cpu)
 {
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 09453a2..c6ba01e 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -69,6 +69,7 @@ enum {
 	VIE_OP_TYPE_TWO_BYTE,
 	VIE_OP_TYPE_PUSH,
 	VIE_OP_TYPE_CMP,
+	VIE_OP_TYPE_POP,
 	VIE_OP_TYPE_LAST
 };
 
@@ -159,6 +160,11 @@ static const struct vie_op one_byte_opcodes[256] = {
 		.op_type = VIE_OP_TYPE_OR,
 		.op_flags = VIE_OP_F_IMM8,
 	},
+	[0x8F] = {
+		/* XXX Group 1A extended opcode - not just POP */
+		.op_byte = 0x8F,
+		.op_type = VIE_OP_TYPE_POP,
+	},
 	[0xFF] = {
 		/* XXX Group 5 extended opcode - not just PUSH */
 		.op_byte = 0xFF,
@@ -316,46 +322,36 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
 	return (error);
 }
 
+#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
 /*
  * Return the status flags that would result from doing (x - y).
  */
-static u_long
-getcc16(uint16_t x, uint16_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
-
-static u_long
-getcc32(uint32_t x, uint32_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
-
-static u_long
-getcc64(uint64_t x, uint64_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
+#define	GETCC(sz)							\
+static u_long								\
+getcc##sz(uint##sz##_t x, uint##sz##_t y)				\
+{									\
+	u_long rflags;							\
+									\
+	__asm __volatile("sub %2,%1; pushfq; popq %0" :			\
+	    "=r" (rflags), "+r" (x) : "m" (y));				\
+	return (rflags);						\
+} struct __hack
+
+GETCC(8);
+GETCC(16);
+GETCC(32);
+GETCC(64);
 
 static u_long
 getcc(int opsize, uint64_t x, uint64_t y)
 {
-	KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
+	KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
 	    ("getcc: invalid operand size %d", opsize));
 
-	if (opsize == 2)
+	if (opsize == 1)
+		return (getcc8(x, y));
+	else if (opsize == 2)
 		return (getcc16(x, y));
 	else if (opsize == 4)
 		return (getcc32(x, y));
@@ -569,7 +565,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 {
 	int error, size;
 	enum vm_reg_name reg;
-	uint64_t val1, val2;
+	uint64_t result, rflags, rflags2, val1, val2;
 
 	size = vie->opsize;
 	error = EINVAL;
@@ -597,8 +593,8 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 			break;
 
 		/* perform the operation and write the result */
-		val1 &= val2;
-		error = vie_update_register(vm, vcpuid, reg, val1, size);
+		result = val1 & val2;
+		error = vie_update_register(vm, vcpuid, reg, result, size);
 		break;
 	case 0x81:
 		/*
@@ -625,11 +621,11 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		switch (vie->reg & 7) {
 		case 0x4:
 			/* modrm:reg == b100, AND */
-			val1 &= vie->immediate;
+			result = val1 & vie->immediate;
 			break;
 		case 0x1:
 			/* modrm:reg == b001, OR */
-			val1 |= vie->immediate;
+			result = val1 | vie->immediate;
 			break;
 		default:
 			error = EINVAL;
@@ -638,11 +634,29 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		if (error)
 			break;
 
-		error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+		error = memwrite(vm, vcpuid, gpa, result, size, arg);
 		break;
 	default:
 		break;
 	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
 	return (error);
 }
 
@@ -651,7 +665,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
 {
 	int error, size;
-	uint64_t val1;
+	uint64_t val1, result, rflags, rflags2;
 
 	size = vie->opsize;
 	error = EINVAL;
@@ -681,17 +695,33 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		 * perform the operation with the pre-fetched immediate
 		 * operand and write the result
 		 */
-                val1 |= vie->immediate;
-                error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+                result = val1 | vie->immediate;
+                error = memwrite(vm, vcpuid, gpa, result, size, arg);
 		break;
 	default:
 		break;
 	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
 	return (error);
 }
 
-#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
-
 static int
 emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -797,7 +827,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 }
 
 static int
-emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
     struct vm_guest_paging *paging, mem_region_read_t memread,
     mem_region_write_t memwrite, void *arg)
 {
@@ -808,18 +838,12 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
 #endif
 	struct seg_desc ss_desc;
 	uint64_t cr0, rflags, rsp, stack_gla, val;
-	int error, size, stackaddrsize;
-
-	/*
-	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
-	 *
-	 * PUSH is part of the group 5 extended opcodes and is identified
-	 * by ModRM:reg = b110.
-	 */
-	if ((vie->reg & 7) != 6)
-		return (EINVAL);
+	int error, size, stackaddrsize, pushop;
 
+	val = 0;
 	size = vie->opsize;
+	pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
+
 	/*
 	 * From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
 	 */
@@ -858,10 +882,13 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
 
 	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
 	KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
+	if (pushop) {
+		rsp -= size;
+	}
 
-	rsp -= size;
 	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
-	    rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) {
+	    rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
+	    &stack_gla)) {
 		vm_inject_ss(vm, vcpuid, 0);
 		return (0);
 	}
@@ -876,8 +903,8 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
 		return (0);
 	}
 
-	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE,
-	    copyinfo, nitems(copyinfo));
+	error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
+	    pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo));
 	if (error == -1) {
 		/*
 		 * XXX cannot return a negative error value here because it
@@ -890,16 +917,66 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
 		return (0);
 	}
 
-	error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+	if (pushop) {
+		error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+		if (error == 0)
+			vm_copyout(vm, vcpuid, &val, copyinfo, size);
+	} else {
+		vm_copyin(vm, vcpuid, copyinfo, &val, size);
+		error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
+		rsp += size;
+	}
+#ifdef _KERNEL
+	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+#endif
+
 	if (error == 0) {
-		vm_copyout(vm, vcpuid, &val, copyinfo, size);
 		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
 		    stackaddrsize);
 		KASSERT(error == 0, ("error %d updating rsp", error));
 	}
-#ifdef _KERNEL
-	vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-#endif
+	return (error);
+}
+
+static int
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+	int error;
+
+	/*
+	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+	 *
+	 * PUSH is part of the group 5 extended opcodes and is identified
+	 * by ModRM:reg = b110.
+	 */
+	if ((vie->reg & 7) != 6)
+		return (EINVAL);
+
+	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+	    memwrite, arg);
+	return (error);
+}
+
+static int
+emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+    struct vm_guest_paging *paging, mem_region_read_t memread,
+    mem_region_write_t memwrite, void *arg)
+{
+	int error;
+
+	/*
+	 * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+	 *
+	 * POP is part of the group 1A extended opcodes and is identified
+	 * by ModRM:reg = b000.
+	 */
+	if ((vie->reg & 7) != 0)
+		return (EINVAL);
+
+	error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+	    memwrite, arg);
 	return (error);
 }
 
@@ -914,6 +991,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		return (EINVAL);
 
 	switch (vie->op.op_type) {
+	case VIE_OP_TYPE_POP:
+		error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
+		    memwrite, memarg);
+		break;
 	case VIE_OP_TYPE_PUSH:
 		error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
 		    memwrite, memarg);
diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c
deleted file mode 100644
index 03e0071..0000000
--- a/sys/amd64/vmm/vmm_msr.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/*-
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/smp.h>
-
-#include <machine/specialreg.h>
-
-#include <machine/vmm.h>
-#include "vmm_lapic.h"
-#include "vmm_msr.h"
-
-#define	VMM_MSR_F_EMULATE	0x01
-#define	VMM_MSR_F_READONLY	0x02
-#define VMM_MSR_F_INVALID	0x04  /* guest_msr_valid() can override this */
-
-struct vmm_msr {
-	int		num;
-	int		flags;
-	uint64_t	hostval;
-};
-
-static struct vmm_msr vmm_msr[] = {
-	{ MSR_LSTAR,	0 },
-	{ MSR_CSTAR,	0 },
-	{ MSR_STAR,	0 },
-	{ MSR_SF_MASK,	0 },
-	{ MSR_PAT,      VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID },
-	{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
-	{ MSR_MCG_CAP,	VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
-	{ MSR_IA32_PLATFORM_ID, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
-	{ MSR_IA32_MISC_ENABLE, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
-};
-
-#define	vmm_msr_num	(sizeof(vmm_msr) / sizeof(vmm_msr[0]))
-CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
-
-#define	readonly_msr(idx)	\
-	((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
-
-#define	emulated_msr(idx)	\
-	((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
-
-#define invalid_msr(idx)	\
-	((vmm_msr[(idx)].flags & VMM_MSR_F_INVALID) != 0)
-
-void
-vmm_msr_init(void)
-{
-	int i;
-
-	for (i = 0; i < vmm_msr_num; i++) {
-		if (emulated_msr(i))
-			continue;
-		/*
-		 * XXX this assumes that the value of the host msr does not
-		 * change after we have cached it.
-		 */
-		vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
-	}
-}
-
-void
-guest_msrs_init(struct vm *vm, int cpu)
-{
-	int i;
-	uint64_t *guest_msrs, misc;
-
-	guest_msrs = vm_guest_msrs(vm, cpu);
-	
-	for (i = 0; i < vmm_msr_num; i++) {
-		switch (vmm_msr[i].num) {
-		case MSR_LSTAR:
-		case MSR_CSTAR:
-		case MSR_STAR:
-		case MSR_SF_MASK:
-		case MSR_BIOS_SIGN:
-		case MSR_MCG_CAP:
-			guest_msrs[i] = 0;
-			break;
-		case MSR_PAT:
-			guest_msrs[i] = PAT_VALUE(0, PAT_WRITE_BACK)      |
-				PAT_VALUE(1, PAT_WRITE_THROUGH)   |
-				PAT_VALUE(2, PAT_UNCACHED)        |
-				PAT_VALUE(3, PAT_UNCACHEABLE)     |
-				PAT_VALUE(4, PAT_WRITE_BACK)      |
-				PAT_VALUE(5, PAT_WRITE_THROUGH)   |
-				PAT_VALUE(6, PAT_UNCACHED)        |
-				PAT_VALUE(7, PAT_UNCACHEABLE);
-			break;
-		case MSR_IA32_MISC_ENABLE:
-			misc = rdmsr(MSR_IA32_MISC_ENABLE);
-			/*
-			 * Set mandatory bits
-			 *  11:   branch trace disabled
-			 *  12:   PEBS unavailable
-			 * Clear unsupported features
-			 *  16:   SpeedStep enable
-			 *  18:   enable MONITOR FSM
-                         */
-			misc |= (1 << 12) | (1 << 11);
-			misc &= ~((1 << 18) | (1 << 16));
-			guest_msrs[i] = misc;
-			break;
-		case MSR_IA32_PLATFORM_ID:
-			guest_msrs[i] = 0;
-			break;
-		default:
-			panic("guest_msrs_init: missing initialization for msr "
-			      "0x%0x", vmm_msr[i].num);
-		}
-	}
-}
-
-static int
-msr_num_to_idx(u_int num)
-{
-	int i;
-
-	for (i = 0; i < vmm_msr_num; i++)
-		if (vmm_msr[i].num == num)
-			return (i);
-
-	return (-1);
-}
-
-int
-emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val, bool *retu)
-{
-	int idx;
-	uint64_t *guest_msrs;
-
-	if (lapic_msr(num))
-		return (lapic_wrmsr(vm, cpu, num, val, retu));
-
-	idx = msr_num_to_idx(num);
-	if (idx < 0 || invalid_msr(idx))
-		return (EINVAL);
-
-	if (!readonly_msr(idx)) {
-		guest_msrs = vm_guest_msrs(vm, cpu);
-
-		/* Stash the value */
-		guest_msrs[idx] = val;
-
-		/* Update processor state for non-emulated MSRs */
-		if (!emulated_msr(idx))
-			wrmsr(vmm_msr[idx].num, val);
-	}
-
-	return (0);
-}
-
-int
-emulate_rdmsr(struct vm *vm, int cpu, u_int num, bool *retu)
-{
-	int error, idx;
-	uint32_t eax, edx;
-	uint64_t result, *guest_msrs;
-
-	if (lapic_msr(num)) {
-		error = lapic_rdmsr(vm, cpu, num, &result, retu);
-		goto done;
-	}
-
-	idx = msr_num_to_idx(num);
-	if (idx < 0 || invalid_msr(idx)) {
-		error = EINVAL;
-		goto done;
-	}
-
-	guest_msrs = vm_guest_msrs(vm, cpu);
-	result = guest_msrs[idx];
-
-	/*
-	 * If this is not an emulated msr register make sure that the processor
-	 * state matches our cached state.
-	 */
-	if (!emulated_msr(idx) && (rdmsr(num) != result)) {
-		panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
-		      "(0x%016lx) and actual (0x%016lx) values", num,
-		      result, rdmsr(num));
-	}
-
-	error = 0;
-
-done:
-	if (error == 0) {
-		eax = result;
-		edx = result >> 32;
-		error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
-		if (error)
-			panic("vm_set_register(rax) error %d", error);
-		error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
-		if (error)
-			panic("vm_set_register(rdx) error %d", error);
-	}
-	return (error);
-}
-
-void
-restore_guest_msrs(struct vm *vm, int cpu)
-{
-	int i;
-	uint64_t *guest_msrs;
-
-	guest_msrs = vm_guest_msrs(vm, cpu);
-
-	for (i = 0; i < vmm_msr_num; i++) {
-		if (emulated_msr(i))
-			continue;
-		else
-			wrmsr(vmm_msr[i].num, guest_msrs[i]);
-	}
-}
-
-void
-restore_host_msrs(struct vm *vm, int cpu)
-{
-	int i;
-
-	for (i = 0; i < vmm_msr_num; i++) {
-		if (emulated_msr(i))
-			continue;
-		else
-			wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
-	}
-}
-
-/*
- * Must be called by the CPU-specific code before any guests are
- * created
- */
-void
-guest_msr_valid(int msr)
-{
-	int i;
-
-	for (i = 0; i < vmm_msr_num; i++) {
-		if (vmm_msr[i].num == msr && invalid_msr(i)) {
-			vmm_msr[i].flags &= ~VMM_MSR_F_INVALID;
-		}
-	}
-}
diff --git a/sys/amd64/vmm/vmm_msr.h b/sys/amd64/vmm/vmm_msr.h
deleted file mode 100644
index e070037..0000000
--- a/sys/amd64/vmm/vmm_msr.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef	_VMM_MSR_H_
-#define	_VMM_MSR_H_
-
-#define	VMM_MSR_NUM	16
-struct vm;
-
-void	vmm_msr_init(void);
-int	emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val,
-	    bool *retu);
-int	emulate_rdmsr(struct vm *vm, int vcpu, u_int msr, bool *retu);
-void	guest_msrs_init(struct vm *vm, int cpu);
-void	guest_msr_valid(int msr);
-void	restore_host_msrs(struct vm *vm, int cpu);
-void	restore_guest_msrs(struct vm *vm, int cpu);
-
-#endif
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index ef1557f..c7515cf 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
+#include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
@@ -45,20 +46,49 @@ __FBSDID("$FreeBSD$");
 #include "vmm_host.h"
 #include "x86.h"
 
+SYSCTL_DECL(_hw_vmm);
+static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
+
 #define	CPUID_VM_HIGH		0x40000000
 
 static const char bhyve_id[12] = "bhyve bhyve ";
 
 static uint64_t bhyve_xcpuids;
 
+/*
+ * The default CPU topology is a single thread per package.
+ */
+static u_int threads_per_core = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
+    &threads_per_core, 0, NULL);
+
+static u_int cores_per_package = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
+    &cores_per_package, 0, NULL);
+
+static int cpuid_leaf_b = 1;
+SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
+    &cpuid_leaf_b, 0, NULL);
+
+/*
+ * Round up to the next power of two, if necessary, and then take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+log2(u_int x)
+{
+
+	return (fls(x << (1 - powerof2(x))) - 1);
+}
+
 int
 x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
 	const struct xsave_limits *limits;
 	uint64_t cr4;
-	int error, enable_invpcid;
-	unsigned int 	func, regs[4];
+	int error, enable_invpcid, level, width, x2apic_id;
+	unsigned int func, regs[4], logical_cpus;
 	enum x2apic_state x2apic_state;
 
 	/*
@@ -207,30 +237,31 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
                         */
 			regs[3] &= ~CPUID_DS;
 
-			/*
-			 * Disable multi-core.
-			 */
+			logical_cpus = threads_per_core * cores_per_package;
 			regs[1] &= ~CPUID_HTT_CORES;
-			regs[3] &= ~CPUID_HTT;
+			regs[1] |= (logical_cpus & 0xff) << 16;
+			regs[3] |= CPUID_HTT;
 			break;
 
 		case CPUID_0000_0004:
-			do_cpuid(4, regs);
+			cpuid_count(*eax, *ecx, regs);
 
-			/*
-			 * Do not expose topology.
-			 *
-			 * The maximum number of processor cores in
-			 * this physical processor package and the
-			 * maximum number of threads sharing this
-			 * cache are encoded with "plus 1" encoding.
-			 * Adding one to the value in this register
-			 * field to obtains the actual value.
-			 *
-			 * Therefore 0 for both indicates 1 core per
-			 * package and no cache sharing.
-			 */
-			regs[0] &= 0xffff8000;
+			if (regs[0] || regs[1] || regs[2] || regs[3]) {
+				regs[0] &= 0x3ff;
+				regs[0] |= (cores_per_package - 1) << 26;
+				/*
+				 * Cache topology:
+				 * - L1 and L2 are shared only by the logical
+				 *   processors in a single core.
+				 * - L3 and above are shared by all logical
+				 *   processors in the package.
+				 */
+				logical_cpus = threads_per_core;
+				level = (regs[0] >> 5) & 0x7;
+				if (level >= 3)
+					logical_cpus *= cores_per_package;
+				regs[0] |= (logical_cpus - 1) << 14;
+			}
 			break;
 
 		case CPUID_0000_0007:
@@ -284,10 +315,32 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			/*
 			 * Processor topology enumeration
 			 */
-			regs[0] = 0;
-			regs[1] = 0;
-			regs[2] = *ecx & 0xff;
-			regs[3] = vcpu_id;
+			if (*ecx == 0) {
+				logical_cpus = threads_per_core;
+				width = log2(logical_cpus);
+				level = CPUID_TYPE_SMT;
+				x2apic_id = vcpu_id;
+			}
+
+			if (*ecx == 1) {
+				logical_cpus = threads_per_core *
+				    cores_per_package;
+				width = log2(logical_cpus);
+				level = CPUID_TYPE_CORE;
+				x2apic_id = vcpu_id;
+			}
+
+			if (!cpuid_leaf_b || *ecx >= 2) {
+				width = 0;
+				logical_cpus = 0;
+				level = 0;
+				x2apic_id = 0;
+			}
+
+			regs[0] = width & 0x1f;
+			regs[1] = logical_cpus & 0xffff;
+			regs[2] = (level << 8) | (*ecx & 0xff);
+			regs[3] = x2apic_id;
 			break;
 
 		case CPUID_0000_000D:
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index 76f9364..702587b 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -19,7 +19,6 @@ SRCS+=	vmm.c		\
 	vmm_ipi.c	\
 	vmm_lapic.c	\
 	vmm_mem.c	\
-	vmm_msr.c	\
 	vmm_stat.c	\
 	vmm_util.c	\
 	x86.c		\
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index c920e82..7298a2e 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -381,6 +381,7 @@
 #define	MSR_BIOS_SIGN		0x08b
 #define	MSR_PERFCTR0		0x0c1
 #define	MSR_PERFCTR1		0x0c2
+#define	MSR_PLATFORM_INFO	0x0ce
 #define	MSR_MPERF		0x0e7
 #define	MSR_APERF		0x0e8
 #define	MSR_IA32_EXT_CONFIG	0x0ee	/* Undocumented. Core Solo/Duo only */
@@ -404,6 +405,8 @@
 #define	MSR_THERM_STATUS	0x19c
 #define	MSR_IA32_MISC_ENABLE	0x1a0
 #define	MSR_IA32_TEMPERATURE_TARGET	0x1a2
+#define	MSR_TURBO_RATIO_LIMIT	0x1ad
+#define	MSR_TURBO_RATIO_LIMIT1	0x1ae
 #define	MSR_DEBUGCTLMSR		0x1d9
 #define	MSR_LASTBRANCHFROMIP	0x1db
 #define	MSR_LASTBRANCHTOIP	0x1dc
@@ -437,6 +440,11 @@
 #define	MSR_MC4_STATUS		0x411
 #define	MSR_MC4_ADDR		0x412
 #define	MSR_MC4_MISC		0x413
+#define	MSR_RAPL_POWER_UNIT	0x606
+#define	MSR_PKG_ENERGY_STATUS	0x611
+#define	MSR_DRAM_ENERGY_STATUS	0x619
+#define	MSR_PP0_ENERGY_STATUS	0x639
+#define	MSR_PP1_ENERGY_STATUS	0x641
 
 /*
  * VMX MSRs
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index 5dea300..c1f5f13 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -489,7 +489,7 @@ basl_fwrite_fadt(FILE *fp)
 	EFPRINTF(fp,
 	    "[0012]\t\tPM Timer Block : [Generic Address Structure]\n");
 	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
-	EFPRINTF(fp, "[0001]\t\tBit Width : 32\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 20\n");
 	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
 	EFPRINTF(fp,
 	    "[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
@@ -499,7 +499,7 @@ basl_fwrite_fadt(FILE *fp)
 
 	EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
 	EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
-	EFPRINTF(fp, "[0001]\t\tBit Width : 80\n");
+	EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
 	EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
 	EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
 	EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 7dcf6d0..b2b36bb 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -803,6 +803,12 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
+	error = init_msr();
+	if (error) {
+		fprintf(stderr, "init_msr error %d", error);
+		exit(1);
+	}
+
 	init_mem();
 	init_inout();
 	pci_irq_init(ctx);
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index 1ec0344..cbe5ac3 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -55,8 +55,7 @@ __FBSDID("$FreeBSD$");
 enum blockop {
 	BOP_READ,
 	BOP_WRITE,
-	BOP_FLUSH,
-	BOP_CANCEL
+	BOP_FLUSH
 };
 
 enum blockstat {
@@ -159,9 +158,6 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
 		break;
 	case BOP_FLUSH:
 		break;
-	case BOP_CANCEL:
-		err = EINTR;
-		break;
 	default:
 		err = EINVAL;
 		break;
@@ -278,6 +274,7 @@ blockif_open(const char *optstr, const char *ident)
 
 	bc->bc_magic = BLOCKIF_SIG;
 	bc->bc_fd = fd;
+	bc->bc_rdonly = ro;
 	bc->bc_size = size;
 	bc->bc_sectsz = sectsz;
 	pthread_mutex_init(&bc->bc_mtx, NULL);
@@ -355,9 +352,28 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
 int
 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
+	struct blockif_elem *be;
 
 	assert(bc->bc_magic == BLOCKIF_SIG);
-	return (blockif_request(bc, breq, BOP_CANCEL));
+
+	pthread_mutex_lock(&bc->bc_mtx);
+	TAILQ_FOREACH(be, &bc->bc_inuseq, be_link) {
+		if (be->be_req == breq)
+			break;
+	}
+	if (be == NULL) {
+		pthread_mutex_unlock(&bc->bc_mtx);
+		return (EINVAL);
+	}
+
+	TAILQ_REMOVE(&bc->bc_inuseq, be, be_link);
+	be->be_status = BST_FREE;
+	be->be_req = NULL;
+	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+	bc->bc_req_count--;
+	pthread_mutex_unlock(&bc->bc_mtx);
+
+	return (0);
 }
 
 int
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 214237d..42aa0b3 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <unistd.h>
 #include <assert.h>
 #include <pthread.h>
+#include <pthread_np.h>
 #include <inttypes.h>
 
 #include "bhyverun.h"
@@ -115,7 +116,8 @@ static FILE *dbg;
 struct ahci_ioreq {
 	struct blockif_req io_req;
 	struct ahci_port *io_pr;
-	STAILQ_ENTRY(ahci_ioreq) io_list;
+	STAILQ_ENTRY(ahci_ioreq) io_flist;
+	TAILQ_ENTRY(ahci_ioreq) io_blist;
 	uint8_t *cfis;
 	uint32_t len;
 	uint32_t done;
@@ -160,6 +162,7 @@ struct ahci_port {
 	struct ahci_ioreq *ioreq;
 	int ioqsz;
 	STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
+	TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
 };
 
 struct ahci_cmd_hdr {
@@ -360,6 +363,68 @@ ahci_write_reset_fis_d2h(struct ahci_port *p)
 }
 
 static void
+ahci_check_stopped(struct ahci_port *p)
+{
+	/*
+	 * If we are no longer processing the command list and nothing
+	 * is in-flight, clear the running bit.
+	 */
+	if (!(p->cmd & AHCI_P_CMD_ST)) {
+		if (p->pending == 0)
+			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
+	}
+}
+
+static void
+ahci_port_stop(struct ahci_port *p)
+{
+	struct ahci_ioreq *aior;
+	uint8_t *cfis;
+	int slot;
+	int ncq;
+	int error;
+
+	assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
+
+	TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
+		/*
+		 * Try to cancel the outstanding blockif request.
+		 */
+		error = blockif_cancel(p->bctx, &aior->io_req);
+		if (error != 0)
+			continue;
+
+		slot = aior->slot;
+		cfis = aior->cfis;
+		if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+		    cfis[2] == ATA_READ_FPDMA_QUEUED)
+			ncq = 1;
+
+		if (ncq)
+			p->sact &= ~(1 << slot);
+		else
+			p->ci &= ~(1 << slot);
+
+		/*
+		 * This command is now done.
+		 */
+		p->pending &= ~(1 << slot);
+
+		/*
+		 * Delete the blockif request from the busy list
+		 */
+		TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+		/*
+		 * Move the blockif request back to the free list
+		 */
+		STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+	}
+
+	ahci_check_stopped(p);
+}
+
+static void
 ahci_port_reset(struct ahci_port *pr)
 {
 	pr->sctl = 0;
@@ -492,7 +557,7 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
-	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = len;
@@ -503,15 +568,21 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
 	if (iovcnt > BLOCKIF_IOV_MAX) {
 		aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
 		iovcnt = BLOCKIF_IOV_MAX;
-		/*
-		 * Mark this command in-flight.
-		 */
-		p->pending |= 1 << slot;
 	} else
 		aior->prdtl = 0;
 	breq->br_iovcnt = iovcnt;
 
 	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	/*
 	 * Build up the iovec based on the prdt
 	 */
 	for (i = 0; i < iovcnt; i++) {
@@ -546,7 +617,7 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
-	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = 0;
@@ -554,6 +625,16 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
 	aior->prdtl = 0;
 	breq = &aior->io_req;
 
+	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
 	err = blockif_flush(p->bctx, breq);
 	assert(err == 0);
 }
@@ -961,7 +1042,7 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
 	 */
 	aior = STAILQ_FIRST(&p->iofhd);
 	assert(aior != NULL);
-	STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+	STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
 	aior->cfis = cfis;
 	aior->slot = slot;
 	aior->len = len;
@@ -977,6 +1058,16 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
 	breq->br_iovcnt = iovcnt;
 
 	/*
+	 * Mark this command in-flight.
+	 */
+	p->pending |= 1 << slot;
+
+	/*
+	 * Stuff request onto busy list
+	 */
+	TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+	/*
 	 * Build up the iovec based on the prdt
 	 */
 	for (i = 0; i < iovcnt; i++) {
@@ -1415,9 +1506,14 @@ ata_ioreq_cb(struct blockif_req *br, int err)
 	pthread_mutex_lock(&sc->mtx);
 
 	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
 	 * Move the blockif request back to the free list
 	 */
-	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 
 	if (pending && !err) {
 		ahci_handle_dma(p, slot, cfis, aior->done,
@@ -1438,17 +1534,18 @@ ata_ioreq_cb(struct blockif_req *br, int err)
 			p->serr |= (1 << slot);
 	}
 
-	/*
-	 * This command is now complete.
-	 */
-	p->pending &= ~(1 << slot);
-
 	if (ncq) {
 		p->sact &= ~(1 << slot);
 		ahci_write_fis_sdb(p, slot, tfd);
 	} else
 		ahci_write_fis_d2h(p, slot, cfis, tfd);
 
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
 out:
 	pthread_mutex_unlock(&sc->mtx);
 	DPRINTF("%s exit\n", __func__);
@@ -1478,9 +1575,14 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
 	pthread_mutex_lock(&sc->mtx);
 
 	/*
+	 * Delete the blockif request from the busy list
+	 */
+	TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+	/*
 	 * Move the blockif request back to the free list
 	 */
-	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
+	STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
 
 	if (pending && !err) {
 		atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
@@ -1500,6 +1602,12 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
 	cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
 	ahci_write_fis_d2h(p, slot, cfis, tfd);
 
+	/*
+	 * This command is now complete.
+	 */
+	p->pending &= ~(1 << slot);
+
+	ahci_check_stopped(p);
 out:
 	pthread_mutex_unlock(&sc->mtx);
 	DPRINTF("%s exit\n", __func__);
@@ -1526,8 +1634,10 @@ pci_ahci_ioreq_init(struct ahci_port *pr)
 		else
 			vr->io_req.br_callback = atapi_ioreq_cb;
 		vr->io_req.br_param = vr;
-		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
+		STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
 	}
+
+	TAILQ_INIT(&pr->iobhd);
 }
 
 static void
@@ -1565,9 +1675,7 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
 		p->cmd = value;
 		
 		if (!(value & AHCI_P_CMD_ST)) {
-			p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
-			p->ci = 0;
-			p->sact = 0;
+			ahci_port_stop(p);
 		} else {
 			uint64_t clb;
 
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 394b116..c66ad68 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -94,6 +94,8 @@ struct vtblk_config {
 struct virtio_blk_hdr {
 #define	VBH_OP_READ		0
 #define	VBH_OP_WRITE		1
+#define	VBH_OP_FLUSH		4
+#define	VBH_OP_FLUSH_OUT	5
 #define	VBH_OP_IDENT		8		
 #define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
 	uint32_t       	vbh_type;
@@ -217,6 +219,10 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
 		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
 		err = 0;
 		break;
+	case VBH_OP_FLUSH:
+	case VBH_OP_FLUSH_OUT:
+		err = fsync(sc->vbsc_fd);
+		break;
 	default:
 		err = -ENOSYS;
 		break;
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index 0002da8..b939c1a 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -725,6 +725,21 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
 
 	/*
+	 * Calculate the %eip to store in the old TSS before modifying the
+	 * 'inst_length'.
+	 */
+	eip = vmexit->rip + vmexit->inst_length;
+
+	/*
+	 * Set the 'inst_length' to '0'.
+	 *
+	 * If an exception is triggered during emulation of the task switch
+	 * then the exception handler should return to the instruction that
+	 * caused the task switch as opposed to the subsequent instruction.
+	 */
+	vmexit->inst_length = 0;
+
+	/*
 	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
 	 * The following page table accesses are implicitly supervisor mode:
 	 * - accesses to GDT or LDT to load segment descriptors
@@ -839,7 +854,6 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 	}
 
 	/* Save processor state in old TSS */
-	eip = vmexit->rip + vmexit->inst_length;
 	tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
 
 	/*
@@ -870,7 +884,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
 	 * the saved instruction pointer will belong to the new task.
 	 */
 	vmexit->rip = newtss.tss_eip;
-	vmexit->inst_length = 0;
+	assert(vmexit->inst_length == 0);
 
 	/* Load processor state from new TSS */
 	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c
index 9581fb0..19c0d47 100644
--- a/usr.sbin/bhyve/virtio.c
+++ b/usr.sbin/bhyve/virtio.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <stdint.h>
 #include <pthread.h>
+#include <pthread_np.h>
 
 #include "bhyverun.h"
 #include "pci_emul.h"
@@ -89,6 +90,9 @@ vi_reset_dev(struct virtio_softc *vs)
 	struct vqueue_info *vq;
 	int i, nvq;
 
+	if (vs->vs_mtx)
+		assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
 	nvq = vs->vs_vc->vc_nvq;
 	for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
 		vq->vq_flags = 0;
@@ -99,11 +103,9 @@ vi_reset_dev(struct virtio_softc *vs)
 	vs->vs_negotiated_caps = 0;
 	vs->vs_curq = 0;
 	/* vs->vs_status = 0; -- redundant */
-	VS_LOCK(vs);
 	if (vs->vs_isr)
 		pci_lintr_deassert(vs->vs_pi);
 	vs->vs_isr = 0;
-	VS_UNLOCK(vs);
 	vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
 }
 
@@ -137,7 +139,9 @@ vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
 
 	if (use_msix) {
 		vs->vs_flags |= VIRTIO_USE_MSIX;
+		VS_LOCK(vs);
 		vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
+		VS_UNLOCK(vs);
 		nvec = vs->vs_vc->vc_nvq + 1;
 		if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
 			return (1);
diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c
index 63522bf..1ed1ea1 100644
--- a/usr.sbin/bhyve/xmsr.c
+++ b/usr.sbin/bhyve/xmsr.c
@@ -31,33 +31,91 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 
+#include <machine/cpufunc.h>
 #include <machine/vmm.h>
+#include <machine/specialreg.h>
+
 #include <vmmapi.h>
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "xmsr.h"
 
+static int cpu_vendor_intel, cpu_vendor_amd;
+
 int
 emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
 {
 
-	switch (code) {
-	case 0xd04:			/* Sandy Bridge uncore PMC MSRs */
-	case 0xc24:
-		return (0);
-	case 0x79:
-		return (0);		/* IA32_BIOS_UPDT_TRIG MSR */
-	default:
-		break;
+	if (cpu_vendor_intel) {
+		switch (code) {
+		case 0xd04:		/* Sandy Bridge uncore PMCs */
+		case 0xc24:
+			return (0);
+		case MSR_BIOS_UPDT_TRIG:
+			return (0);
+		case MSR_BIOS_SIGN:
+			return (0);
+		default:
+			break;
+		}
 	}
 	return (-1);
 }
 
 int
-emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val)
+emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
 {
+	int error = 0;
 
-	return (-1);
+	if (cpu_vendor_intel) {
+		switch (num) {
+		case MSR_BIOS_SIGN:
+		case MSR_IA32_PLATFORM_ID:
+		case MSR_PKG_ENERGY_STATUS:
+		case MSR_PP0_ENERGY_STATUS:
+		case MSR_PP1_ENERGY_STATUS:
+		case MSR_DRAM_ENERGY_STATUS:
+			*val = 0;
+			break;
+		case MSR_RAPL_POWER_UNIT:
+			/*
+			 * Use the default value documented in section
+			 * "RAPL Interfaces" in Intel SDM vol3.
+			 */
+			*val = 0x000a1003;
+			break;
+		default:
+			error = -1;
+			break;
+		}
+	}
+	return (error);
+}
+
+int
+init_msr(void)
+{
+	int error;
+	u_int regs[4];
+	char cpu_vendor[13];
+
+	do_cpuid(0, regs);
+	((u_int *)&cpu_vendor)[0] = regs[1];
+	((u_int *)&cpu_vendor)[1] = regs[3];
+	((u_int *)&cpu_vendor)[2] = regs[2];
+	cpu_vendor[12] = '\0';
+
+	error = 0;
+	if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+		cpu_vendor_amd = 1;
+	} else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+		cpu_vendor_intel = 1;
+	} else {
+		fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
+		error = -1;
+	}
+	return (error);
 }
diff --git a/usr.sbin/bhyve/xmsr.h b/usr.sbin/bhyve/xmsr.h
index b097cf8..bcf65b7 100644
--- a/usr.sbin/bhyve/xmsr.h
+++ b/usr.sbin/bhyve/xmsr.h
@@ -29,6 +29,7 @@
 #ifndef	_XMSR_H_
 #define	_XMSR_H_
 
+int init_msr(void);
 int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
 int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val);
 
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index b6006b7..f5e50d3 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -309,7 +309,7 @@ dump_vmcs_msr_bitmap(int vcpu, u_long addr)
 	if (fd < 0)
 		goto done;
 
-	bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr);
+	bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, addr);
 	if (bitmap == MAP_FAILED)
 		goto done;