summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/include/vmm.h4
-rw-r--r--sys/amd64/vmm/intel/ept.c1
-rw-r--r--sys/amd64/vmm/intel/vmcs.h5
-rw-r--r--sys/amd64/vmm/intel/vmx.c224
-rw-r--r--sys/amd64/vmm/intel/vmx.h15
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c215
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.h15
-rw-r--r--sys/amd64/vmm/io/vatpic.c10
-rw-r--r--sys/amd64/vmm/io/vlapic.c1
-rw-r--r--sys/amd64/vmm/vmm.c43
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c205
-rw-r--r--sys/amd64/vmm/vmm_msr.c273
-rw-r--r--sys/amd64/vmm/vmm_msr.h44
-rw-r--r--sys/amd64/vmm/x86.c103
-rw-r--r--sys/modules/vmm/Makefile1
-rw-r--r--sys/x86/include/specialreg.h8
-rw-r--r--usr.sbin/bhyve/acpi.c4
-rw-r--r--usr.sbin/bhyve/bhyverun.c6
-rw-r--r--usr.sbin/bhyve/block_if.c28
-rw-r--r--usr.sbin/bhyve/pci_ahci.c146
-rw-r--r--usr.sbin/bhyve/pci_virtio_block.c6
-rw-r--r--usr.sbin/bhyve/task_switch.c18
-rw-r--r--usr.sbin/bhyve/virtio.c8
-rw-r--r--usr.sbin/bhyve/xmsr.c78
-rw-r--r--usr.sbin/bhyve/xmsr.h1
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c2
26 files changed, 896 insertions, 568 deletions
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 58af2a5..0879ba2 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -82,6 +82,7 @@ enum vm_reg_name {
VM_REG_GUEST_PDPTE1,
VM_REG_GUEST_PDPTE2,
VM_REG_GUEST_PDPTE3,
+ VM_REG_GUEST_INTR_SHADOW,
VM_REG_LAST
};
@@ -194,7 +195,6 @@ void vm_nmi_clear(struct vm *vm, int vcpuid);
int vm_inject_extint(struct vm *vm, int vcpu);
int vm_extint_pending(struct vm *vm, int vcpuid);
void vm_extint_clear(struct vm *vm, int vcpuid);
-uint64_t *vm_guest_msrs(struct vm *vm, int cpu);
struct vlapic *vm_lapic(struct vm *vm, int cpu);
struct vioapic *vm_ioapic(struct vm *vm);
struct vhpet *vm_hpet(struct vm *vm);
@@ -485,6 +485,8 @@ enum vm_exitcode {
VM_EXITCODE_SUSPENDED,
VM_EXITCODE_INOUT_STR,
VM_EXITCODE_TASK_SWITCH,
+ VM_EXITCODE_MONITOR,
+ VM_EXITCODE_MWAIT,
VM_EXITCODE_MAX
};
diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c
index 5f6c4d0..13c9788 100644
--- a/sys/amd64/vmm/intel/ept.c
+++ b/sys/amd64/vmm/intel/ept.c
@@ -44,7 +44,6 @@ __FBSDID("$FreeBSD$");
#include "vmx_cpufunc.h"
#include "vmm_ipi.h"
-#include "vmx_msr.h"
#include "ept.h"
#define EPT_SUPPORTS_EXEC_ONLY(cap) ((cap) & (1UL << 0))
diff --git a/sys/amd64/vmm/intel/vmcs.h b/sys/amd64/vmm/intel/vmcs.h
index 4e9557c..6122de5 100644
--- a/sys/amd64/vmm/intel/vmcs.h
+++ b/sys/amd64/vmm/intel/vmcs.h
@@ -54,6 +54,10 @@ int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
struct seg_desc *desc);
+/*
+ * Avoid header pollution caused by inline use of 'vtophys()' in vmx_cpufunc.h
+ */
+#ifdef _VMX_CPUFUNC_H_
static __inline uint64_t
vmcs_read(uint32_t encoding)
{
@@ -73,6 +77,7 @@ vmcs_write(uint32_t encoding, uint64_t val)
error = vmwrite(encoding, val);
KASSERT(error == 0, ("vmcs_write(%u) error %d", encoding, error));
}
+#endif /* _VMX_CPUFUNC_H_ */
#define vmexit_instruction_length() vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
#define vmcs_guest_rip() vmcs_read(VMCS_GUEST_RIP)
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index b2c5702..2fe5a27 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -52,20 +52,20 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
+#include "vmm_lapic.h"
#include "vmm_host.h"
#include "vmm_ioport.h"
#include "vmm_ipi.h"
-#include "vmm_msr.h"
#include "vmm_ktr.h"
#include "vmm_stat.h"
#include "vatpic.h"
#include "vlapic.h"
#include "vlapic_priv.h"
-#include "vmx_msr.h"
#include "ept.h"
#include "vmx_cpufunc.h"
#include "vmx.h"
+#include "vmx_msr.h"
#include "x86.h"
#include "vmx_controls.h"
@@ -81,6 +81,8 @@ __FBSDID("$FreeBSD$");
#define PROCBASED_CTLS_ONE_SETTING \
(PROCBASED_SECONDARY_CONTROLS | \
+ PROCBASED_MWAIT_EXITING | \
+ PROCBASED_MONITOR_EXITING | \
PROCBASED_IO_EXITING | \
PROCBASED_MSR_BITMAPS | \
PROCBASED_CTLS_WINDOW_SETTING | \
@@ -94,34 +96,23 @@ __FBSDID("$FreeBSD$");
#define PROCBASED_CTLS2_ONE_SETTING PROCBASED2_ENABLE_EPT
#define PROCBASED_CTLS2_ZERO_SETTING 0
-#define VM_EXIT_CTLS_ONE_SETTING_NO_PAT \
+#define VM_EXIT_CTLS_ONE_SETTING \
(VM_EXIT_HOST_LMA | \
VM_EXIT_SAVE_EFER | \
- VM_EXIT_LOAD_EFER)
-
-#define VM_EXIT_CTLS_ONE_SETTING \
- (VM_EXIT_CTLS_ONE_SETTING_NO_PAT | \
+ VM_EXIT_LOAD_EFER | \
VM_EXIT_ACKNOWLEDGE_INTERRUPT | \
VM_EXIT_SAVE_PAT | \
VM_EXIT_LOAD_PAT)
+
#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS
-#define VM_ENTRY_CTLS_ONE_SETTING_NO_PAT VM_ENTRY_LOAD_EFER
+#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
-#define VM_ENTRY_CTLS_ONE_SETTING \
- (VM_ENTRY_CTLS_ONE_SETTING_NO_PAT | \
- VM_ENTRY_LOAD_PAT)
#define VM_ENTRY_CTLS_ZERO_SETTING \
(VM_ENTRY_LOAD_DEBUG_CONTROLS | \
VM_ENTRY_INTO_SMM | \
VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
-#define guest_msr_rw(vmx, msr) \
- msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
-
-#define guest_msr_ro(vmx, msr) \
- msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
-
#define HANDLED 1
#define UNHANDLED 0
@@ -158,10 +149,6 @@ SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, initialized, CTLFLAG_RD,
*/
static SYSCTL_NODE(_hw_vmm_vmx, OID_AUTO, cap, CTLFLAG_RW, NULL, NULL);
-static int vmx_patmsr;
-SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, patmsr, CTLFLAG_RD, &vmx_patmsr, 0,
- "PAT MSR saved and restored in VCMS");
-
static int cap_halt_exit;
SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, halt_exit, CTLFLAG_RD, &cap_halt_exit, 0,
"HLT triggers a VM-exit");
@@ -208,6 +195,7 @@ SYSCTL_UINT(_hw_vmm_vmx, OID_AUTO, vpid_alloc_failed, CTLFLAG_RD,
static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
+static int vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val);
static void vmx_inject_pir(struct vlapic *vlapic);
#ifdef KTR
@@ -475,22 +463,6 @@ vpid_init(void)
}
static void
-msr_save_area_init(struct msr_entry *g_area, int *g_count)
-{
- int cnt;
-
- static struct msr_entry guest_msrs[] = {
- { MSR_KGSBASE, 0, 0 },
- };
-
- cnt = sizeof(guest_msrs) / sizeof(guest_msrs[0]);
- if (cnt > GUEST_MSR_MAX_ENTRIES)
- panic("guest msr save area overrun");
- bcopy(guest_msrs, g_area, sizeof(guest_msrs));
- *g_count = cnt;
-}
-
-static void
vmx_disable(void *arg __unused)
{
struct invvpid_desc invvpid_desc = { 0 };
@@ -636,49 +608,24 @@ vmx_init(int ipinum)
}
/* Check support for VM-exit controls */
- vmx_patmsr = 1;
error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
VM_EXIT_CTLS_ONE_SETTING,
VM_EXIT_CTLS_ZERO_SETTING,
&exit_ctls);
if (error) {
- /* Try again without the PAT MSR bits */
- error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS,
- MSR_VMX_TRUE_EXIT_CTLS,
- VM_EXIT_CTLS_ONE_SETTING_NO_PAT,
- VM_EXIT_CTLS_ZERO_SETTING,
- &exit_ctls);
- if (error) {
- printf("vmx_init: processor does not support desired "
- "exit controls\n");
- return (error);
- } else {
- if (bootverbose)
- printf("vmm: PAT MSR access not supported\n");
- guest_msr_valid(MSR_PAT);
- vmx_patmsr = 0;
- }
+ printf("vmx_init: processor does not support desired "
+ "exit controls\n");
+ return (error);
}
/* Check support for VM-entry controls */
- if (vmx_patmsr) {
- error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
- MSR_VMX_TRUE_ENTRY_CTLS,
- VM_ENTRY_CTLS_ONE_SETTING,
- VM_ENTRY_CTLS_ZERO_SETTING,
- &entry_ctls);
- } else {
- error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS,
- MSR_VMX_TRUE_ENTRY_CTLS,
- VM_ENTRY_CTLS_ONE_SETTING_NO_PAT,
- VM_ENTRY_CTLS_ZERO_SETTING,
- &entry_ctls);
- }
-
+ error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS,
+ VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING,
+ &entry_ctls);
if (error) {
printf("vmx_init: processor does not support desired "
- "entry controls\n");
- return (error);
+ "entry controls\n");
+ return (error);
}
/*
@@ -800,6 +747,8 @@ vmx_init(int ipinum)
vpid_init();
+ vmx_msr_init();
+
/* enable VMX operation */
smp_rendezvous(NULL, vmx_enable, NULL, NULL);
@@ -869,7 +818,7 @@ static void *
vmx_vminit(struct vm *vm, pmap_t pmap)
{
uint16_t vpid[VM_MAXCPU];
- int i, error, guest_msr_count;
+ int i, error;
struct vmx *vmx;
struct vmcs *vmcs;
@@ -905,16 +854,14 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* how they are saved/restored so can be directly accessed by the
* guest.
*
- * Guest KGSBASE is saved and restored in the guest MSR save area.
- * Host KGSBASE is restored before returning to userland from the pcb.
- * There will be a window of time when we are executing in the host
- * kernel context with a value of KGSBASE from the guest. This is ok
- * because the value of KGSBASE is inconsequential in kernel context.
- *
* MSR_EFER is saved and restored in the guest VMCS area on a
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
*
+ * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
+ * and entry respectively. It is also restored from the host VMCS
+ * area on a VM exit.
+ *
* The TSC MSR is exposed read-only. Writes are disallowed as that
* will impact the host TSC.
* XXX Writes would be implemented with a wrmsr trap, and
@@ -925,21 +872,11 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_CS_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
- guest_msr_rw(vmx, MSR_KGSBASE) ||
guest_msr_rw(vmx, MSR_EFER) ||
+ guest_msr_rw(vmx, MSR_PAT) ||
guest_msr_ro(vmx, MSR_TSC))
panic("vmx_vminit: error setting guest msr access");
- /*
- * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
- * and entry respectively. It is also restored from the host VMCS
- * area on a VM exit. However, if running on a system with no
- * MSR_PAT save/restore support, leave access disabled so accesses
- * will be trapped.
- */
- if (vmx_patmsr && guest_msr_rw(vmx, MSR_PAT))
- panic("vmx_vminit: error setting guest pat msr access");
-
vpid_alloc(vpid, VM_MAXCPU);
if (virtual_interrupt_delivery) {
@@ -958,6 +895,8 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
error, i);
}
+ vmx_msr_guest_init(vmx, i);
+
error = vmcs_init(vmcs);
KASSERT(error == 0, ("vmcs_init error %d", error));
@@ -996,13 +935,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
- msr_save_area_init(vmx->guest_msrs[i], &guest_msr_count);
-
- error = vmcs_set_msr_save(vmcs, vtophys(vmx->guest_msrs[i]),
- guest_msr_count);
- if (error != 0)
- panic("vmcs_set_msr_save error %d", error);
-
/*
* Set up the CR0/4 shadows, and init the read shadow
* to the power-on register value from the Intel Sys Arch.
@@ -2078,6 +2010,46 @@ vmx_task_switch_reason(uint64_t qual)
}
static int
+emulate_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+ int error;
+
+ if (lapic_msr(num))
+ error = lapic_wrmsr(vmx->vm, vcpuid, num, val, retu);
+ else
+ error = vmx_wrmsr(vmx, vcpuid, num, val, retu);
+
+ return (error);
+}
+
+static int
+emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
+{
+ struct vmxctx *vmxctx;
+ uint64_t result;
+ uint32_t eax, edx;
+ int error;
+
+ if (lapic_msr(num))
+ error = lapic_rdmsr(vmx->vm, vcpuid, num, &result, retu);
+ else
+ error = vmx_rdmsr(vmx, vcpuid, num, &result, retu);
+
+ if (error == 0) {
+ eax = result;
+ vmxctx = &vmx->ctx[vcpuid];
+ error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RAX, eax);
+ KASSERT(error == 0, ("vmxctx_setreg(rax) error %d", error));
+
+ edx = result >> 32;
+ error = vmxctx_setreg(vmxctx, VM_REG_GUEST_RDX, edx);
+ KASSERT(error == 0, ("vmxctx_setreg(rdx) error %d", error));
+ }
+
+ return (error);
+}
+
+static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
int error, handled, in;
@@ -2215,7 +2187,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
retu = false;
ecx = vmxctx->guest_rcx;
VCPU_CTR1(vmx->vm, vcpu, "rdmsr 0x%08x", ecx);
- error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
+ error = emulate_rdmsr(vmx, vcpu, ecx, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_RDMSR;
vmexit->u.msr.code = ecx;
@@ -2224,7 +2196,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
} else {
/* Return to userspace with a valid exitcode */
KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
- ("emulate_wrmsr retu with bogus exitcode"));
+ ("emulate_rdmsr retu with bogus exitcode"));
}
break;
case EXIT_REASON_WRMSR:
@@ -2235,7 +2207,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
edx = vmxctx->guest_rdx;
VCPU_CTR2(vmx->vm, vcpu, "wrmsr 0x%08x value 0x%016lx",
ecx, (uint64_t)edx << 32 | eax);
- error = emulate_wrmsr(vmx->vm, vcpu, ecx,
+ error = emulate_wrmsr(vmx, vcpu, ecx,
(uint64_t)edx << 32 | eax, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_WRMSR;
@@ -2403,6 +2375,12 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
case EXIT_REASON_XSETBV:
handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit);
break;
+ case EXIT_REASON_MONITOR:
+ vmexit->exitcode = VM_EXITCODE_MONITOR;
+ break;
+ case EXIT_REASON_MWAIT:
+ vmexit->exitcode = VM_EXITCODE_MWAIT;
+ break;
default:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);
break;
@@ -2523,6 +2501,8 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
KASSERT(vmxctx->pmap == pmap,
("pmap %p different than ctx pmap %p", pmap, vmxctx->pmap));
+ vmx_msr_guest_enter(vmx, vcpu);
+
VMPTRLD(vmcs);
/*
@@ -2624,6 +2604,8 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
vmexit->exitcode);
VMCLEAR(vmcs);
+ vmx_msr_guest_exit(vmx, vcpu);
+
return (0);
}
@@ -2712,6 +2694,46 @@ vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
}
static int
+vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
+{
+ uint64_t gi;
+ int error;
+
+ error = vmcs_getreg(&vmx->vmcs[vcpu], running,
+ VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
+ *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+ return (error);
+}
+
+static int
+vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
+{
+ struct vmcs *vmcs;
+ uint64_t gi;
+ int error, ident;
+
+ /*
+ * Forcing the vcpu into an interrupt shadow is not supported.
+ */
+ if (val) {
+ error = EINVAL;
+ goto done;
+ }
+
+ vmcs = &vmx->vmcs[vcpu];
+ ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
+ error = vmcs_getreg(vmcs, running, ident, &gi);
+ if (error == 0) {
+ gi &= ~HWINTR_BLOCKING;
+ error = vmcs_setreg(vmcs, running, ident, gi);
+ }
+done:
+ VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
+ error ? "failed" : "succeeded");
+ return (error);
+}
+
+static int
vmx_shadow_reg(int reg)
{
int shreg;
@@ -2742,6 +2764,9 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
if (running && hostcpu != curcpu)
panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
+ if (reg == VM_REG_GUEST_INTR_SHADOW)
+ return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
+
if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
return (0);
@@ -2760,6 +2785,9 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
if (running && hostcpu != curcpu)
panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
+ if (reg == VM_REG_GUEST_INTR_SHADOW)
+ return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
+
if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
return (0);
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 208fcee..2124554 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -33,8 +33,6 @@
struct pmap;
-#define GUEST_MSR_MAX_ENTRIES 64 /* arbitrary */
-
struct vmxctx {
register_t guest_rdi; /* Guest state */
register_t guest_rsi;
@@ -97,13 +95,23 @@ struct pir_desc {
} __aligned(64);
CTASSERT(sizeof(struct pir_desc) == 64);
+/* Index into the 'guest_msrs[]' array */
+enum {
+ IDX_MSR_LSTAR,
+ IDX_MSR_CSTAR,
+ IDX_MSR_STAR,
+ IDX_MSR_SF_MASK,
+ IDX_MSR_KGSBASE,
+ GUEST_MSR_NUM /* must be the last enumeration */
+};
+
/* virtual machine softc */
struct vmx {
struct vmcs vmcs[VM_MAXCPU]; /* one vmcs per virtual cpu */
struct apic_page apic_page[VM_MAXCPU]; /* one apic page per vcpu */
char msr_bitmap[PAGE_SIZE];
struct pir_desc pir_desc[VM_MAXCPU];
- struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
+ uint64_t guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
struct vmxctx ctx[VM_MAXCPU];
struct vmxcap cap[VM_MAXCPU];
struct vmxstate state[VM_MAXCPU];
@@ -113,7 +121,6 @@ struct vmx {
};
CTASSERT((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
CTASSERT((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
-CTASSERT((offsetof(struct vmx, guest_msrs) & 15) == 0);
CTASSERT((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
#define VMX_GUEST_VMEXIT 0
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index a3428db..746ca73 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -31,10 +31,15 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/cpuset.h>
+#include <machine/clock.h>
#include <machine/cpufunc.h>
+#include <machine/md_var.h>
#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include "vmx.h"
#include "vmx_msr.h"
static boolean_t
@@ -171,3 +176,213 @@ msr_bitmap_change_access(char *bitmap, u_int msr, int access)
return (0);
}
+
+static uint64_t misc_enable;
+static uint64_t platform_info;
+static uint64_t turbo_ratio_limit;
+static uint64_t host_msrs[GUEST_MSR_NUM];
+
+static bool
+nehalem_cpu(void)
+{
+ u_int family, model;
+
+ /*
+ * The family:model numbers belonging to the Nehalem microarchitecture
+ * are documented in Section 35.5, Intel SDM dated Feb 2014.
+ */
+ family = CPUID_TO_FAMILY(cpu_id);
+ model = CPUID_TO_MODEL(cpu_id);
+ if (family == 0x6) {
+ switch (model) {
+ case 0x1A:
+ case 0x1E:
+ case 0x1F:
+ case 0x2E:
+ return (true);
+ default:
+ break;
+ }
+ }
+ return (false);
+}
+
+static bool
+westmere_cpu(void)
+{
+ u_int family, model;
+
+ /*
+ * The family:model numbers belonging to the Westmere microarchitecture
+ * are documented in Section 35.6, Intel SDM dated Feb 2014.
+ */
+ family = CPUID_TO_FAMILY(cpu_id);
+ model = CPUID_TO_MODEL(cpu_id);
+ if (family == 0x6) {
+ switch (model) {
+ case 0x25:
+ case 0x2C:
+ return (true);
+ default:
+ break;
+ }
+ }
+ return (false);
+}
+
+void
+vmx_msr_init(void)
+{
+ uint64_t bus_freq, ratio;
+ int i;
+
+ /*
+ * It is safe to cache the values of the following MSRs because
+ * they don't change based on curcpu, curproc or curthread.
+ */
+ host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+ host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+ host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+ host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+
+ /*
+ * Initialize emulated MSRs
+ */
+ misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
+ /*
+ * Set mandatory bits
+ * 11: branch trace disabled
+ * 12: PEBS unavailable
+ * Clear unsupported features
+ * 16: SpeedStep enable
+ * 18: enable MONITOR FSM
+ */
+ misc_enable |= (1 << 12) | (1 << 11);
+ misc_enable &= ~((1 << 18) | (1 << 16));
+
+ if (nehalem_cpu() || westmere_cpu())
+ bus_freq = 133330000; /* 133Mhz */
+ else
+ bus_freq = 100000000; /* 100Mhz */
+
+ /*
+ * XXXtime
+ * The ratio should really be based on the virtual TSC frequency as
+ * opposed to the host TSC.
+ */
+ ratio = (tsc_freq / bus_freq) & 0xff;
+
+ /*
+ * The register definition is based on the micro-architecture
+ * but the following bits are always the same:
+ * [15:8] Maximum Non-Turbo Ratio
+ * [28] Programmable Ratio Limit for Turbo Mode
+ * [29] Programmable TDC-TDP Limit for Turbo Mode
+ * [47:40] Maximum Efficiency Ratio
+ *
+ * The other bits can be safely set to 0 on all
+ * micro-architectures up to Haswell.
+ */
+ platform_info = (ratio << 8) | (ratio << 40);
+
+ /*
+ * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
+ * dependent on the maximum cores per package supported by the micro-
+ * architecture. For e.g., Westmere supports 6 cores per package and
+ * uses the low 48 bits. Sandybridge support 8 cores per package and
+ * uses up all 64 bits.
+ *
+ * However, the unused bits are reserved so we pretend that all bits
+ * in this MSR are valid.
+ */
+ for (i = 0; i < 8; i++)
+ turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
+}
+
+void
+vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
+{
+ /*
+ * The permissions bitmap is shared between all vcpus so initialize it
+ * once when initializing the vBSP.
+ */
+ if (vcpuid == 0) {
+ guest_msr_rw(vmx, MSR_LSTAR);
+ guest_msr_rw(vmx, MSR_CSTAR);
+ guest_msr_rw(vmx, MSR_STAR);
+ guest_msr_rw(vmx, MSR_SF_MASK);
+ guest_msr_rw(vmx, MSR_KGSBASE);
+ }
+ return;
+}
+
+void
+vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
+{
+ uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+ /* Save host MSRs (if any) and restore guest MSRs */
+ wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
+ wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
+ wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
+ wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
+ wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
+}
+
+void
+vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
+{
+ uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
+
+ /* Save guest MSRs */
+ guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
+ guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
+ guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
+ guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
+ guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
+
+ /* Restore host MSRs */
+ wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
+ wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
+ wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
+ wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
+
+ /* MSR_KGSBASE will be restored on the way back to userspace */
+}
+
+int
+vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
+{
+ int error = 0;
+
+ switch (num) {
+ case MSR_IA32_MISC_ENABLE:
+ *val = misc_enable;
+ break;
+ case MSR_PLATFORM_INFO:
+ *val = platform_info;
+ break;
+ case MSR_TURBO_RATIO_LIMIT:
+ case MSR_TURBO_RATIO_LIMIT1:
+ *val = turbo_ratio_limit;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ return (error);
+}
+
+int
+vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
+{
+ int error = 0;
+
+ switch (num) {
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
diff --git a/sys/amd64/vmm/intel/vmx_msr.h b/sys/amd64/vmm/intel/vmx_msr.h
index 340b0f7..e77881c 100644
--- a/sys/amd64/vmm/intel/vmx_msr.h
+++ b/sys/amd64/vmm/intel/vmx_msr.h
@@ -29,6 +29,15 @@
#ifndef _VMX_MSR_H_
#define _VMX_MSR_H_
+struct vmx;
+
+void vmx_msr_init(void);
+void vmx_msr_guest_init(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_enter(struct vmx *vmx, int vcpuid);
+void vmx_msr_guest_exit(struct vmx *vmx, int vcpuid);
+int vmx_rdmsr(struct vmx *, int vcpuid, u_int num, uint64_t *val, bool *retu);
+int vmx_wrmsr(struct vmx *, int vcpuid, u_int num, uint64_t val, bool *retu);
+
uint32_t vmx_revision(void);
int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
@@ -52,4 +61,10 @@ int vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
void msr_bitmap_initialize(char *bitmap);
int msr_bitmap_change_access(char *bitmap, u_int msr, int access);
+#define guest_msr_rw(vmx, msr) \
+ msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_RW)
+
+#define guest_msr_ro(vmx, msr) \
+ msr_bitmap_change_access((vmx)->msr_bitmap, (msr), MSR_BITMAP_ACCESS_READ)
+
#endif
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index 15620d5..b710a84 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -500,13 +500,19 @@ vatpic_pending_intr(struct vm *vm, int *vecptr)
VATPIC_LOCK(vatpic);
pin = vatpic_get_highest_irrpin(atpic);
- if (pin == -1)
- pin = 7;
if (pin == 2) {
atpic = &vatpic->atpic[1];
pin = vatpic_get_highest_irrpin(atpic);
}
+ /*
+ * If there are no pins active at this moment then return the spurious
+ * interrupt vector instead.
+ */
+ if (pin == -1)
+ pin = 7;
+
+ KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
*vecptr = atpic->irq_base + pin;
VATPIC_UNLOCK(vatpic);
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index 3c93463..d684dba 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -633,6 +633,7 @@ vlapic_fire_timer(struct vlapic *vlapic)
// The timer LVT always uses the fixed delivery mode.
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
if (vlapic_fire_lvt(vlapic, lvt | APIC_LVT_DM_FIXED)) {
+ VLAPIC_CTR0(vlapic, "vlapic timer fired");
vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
}
}
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index fa0200e..ddf875b 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -74,7 +74,6 @@ __FBSDID("$FreeBSD$");
#include "vhpet.h"
#include "vioapic.h"
#include "vlapic.h"
-#include "vmm_msr.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
@@ -105,7 +104,6 @@ struct vcpu {
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
- uint64_t guest_msrs[VMM_MSR_NUM]; /* (i) emulated MSRs */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
};
@@ -188,7 +186,6 @@ static struct vmm_ops *ops;
#define fpu_stop_emulating() clts()
static MALLOC_DEFINE(M_VM, "vm", "vm");
-CTASSERT(VMM_MSR_NUM <= 64); /* msr_mask can keep track of up to 64 msrs */
/* statistics */
static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
@@ -250,7 +247,6 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create)
vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
fpu_save_area_reset(vcpu->guestfpu);
vmm_stat_init(vcpu->stats);
- guest_msrs_init(vm, vcpu_id);
}
struct vm_exit *
@@ -294,7 +290,6 @@ vmm_init(void)
else
return (ENXIO);
- vmm_msr_init();
vmm_resume_p = vmm_resume;
return (VMM_INIT(vmm_ipinum));
@@ -1091,7 +1086,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vcpu *vcpu;
const char *wmesg;
- int t, vcpu_halted, vm_halted;
+ int error, t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@@ -1099,6 +1094,22 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
vcpu_halted = 0;
vm_halted = 0;
+ /*
+ * The typical way to halt a cpu is to execute: "sti; hlt"
+ *
+ * STI sets RFLAGS.IF to enable interrupts. However, the processor
+ * remains in an "interrupt shadow" for an additional instruction
+ * following the STI. This guarantees that "sti; hlt" sequence is
+ * atomic and a pending interrupt will be recognized after the HLT.
+ *
+ * After the HLT emulation is done the vcpu is no longer in an
+ * interrupt shadow and a pending interrupt can be injected on
+ * the next entry into the guest.
+ */
+ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+ KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+ __func__, error));
+
vcpu_lock(vcpu);
while (1) {
/*
@@ -1187,8 +1198,12 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
vme->u.paging.gpa, ftype);
- if (rv == 0)
+ if (rv == 0) {
+ VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %#lx",
+ ftype == VM_PROT_READ ? "accessed" : "dirty",
+ vme->u.paging.gpa);
goto done;
+ }
}
map = &vm->vmspace->vm_map;
@@ -1229,6 +1244,8 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
paging = &vme->u.inst_emul.paging;
cpu_mode = paging->cpu_mode;
+ VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %#lx", gpa);
+
vie_init(vie);
/* Fetch, decode and emulate the faulting instruction */
@@ -1425,7 +1442,6 @@ restart:
pcb = PCPU_GET(curpcb);
set_pcb_flags(pcb, PCB_FULL_IRET);
- restore_guest_msrs(vm, vcpuid);
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
@@ -1433,7 +1449,6 @@ restart:
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
- restore_host_msrs(vm, vcpuid);
vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
@@ -1467,6 +1482,10 @@ restart:
case VM_EXITCODE_INOUT_STR:
error = vm_handle_inout(vm, vcpuid, vme, &retu);
break;
+ case VM_EXITCODE_MONITOR:
+ case VM_EXITCODE_MWAIT:
+ vm_inject_ud(vm, vcpuid);
+ break;
default:
retu = true; /* handled in userland */
break;
@@ -1875,12 +1894,6 @@ vm_set_capability(struct vm *vm, int vcpu, int type, int val)
return (VMSETCAP(vm->cookie, vcpu, type, val));
}
-uint64_t *
-vm_guest_msrs(struct vm *vm, int cpu)
-{
- return (vm->vcpu[cpu].guest_msrs);
-}
-
struct vlapic *
vm_lapic(struct vm *vm, int cpu)
{
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 09453a2..c6ba01e 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -69,6 +69,7 @@ enum {
VIE_OP_TYPE_TWO_BYTE,
VIE_OP_TYPE_PUSH,
VIE_OP_TYPE_CMP,
+ VIE_OP_TYPE_POP,
VIE_OP_TYPE_LAST
};
@@ -159,6 +160,11 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_OR,
.op_flags = VIE_OP_F_IMM8,
},
+ [0x8F] = {
+ /* XXX Group 1A extended opcode - not just POP */
+ .op_byte = 0x8F,
+ .op_type = VIE_OP_TYPE_POP,
+ },
[0xFF] = {
/* XXX Group 5 extended opcode - not just PUSH */
.op_byte = 0xFF,
@@ -316,46 +322,36 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
return (error);
}
+#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
/*
* Return the status flags that would result from doing (x - y).
*/
-static u_long
-getcc16(uint16_t x, uint16_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
-
-static u_long
-getcc32(uint32_t x, uint32_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
-
-static u_long
-getcc64(uint64_t x, uint64_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
+#define GETCC(sz) \
+static u_long \
+getcc##sz(uint##sz##_t x, uint##sz##_t y) \
+{ \
+ u_long rflags; \
+ \
+ __asm __volatile("sub %2,%1; pushfq; popq %0" : \
+ "=r" (rflags), "+r" (x) : "m" (y)); \
+ return (rflags); \
+} struct __hack
+
+GETCC(8);
+GETCC(16);
+GETCC(32);
+GETCC(64);
static u_long
getcc(int opsize, uint64_t x, uint64_t y)
{
- KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
+ KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
("getcc: invalid operand size %d", opsize));
- if (opsize == 2)
+ if (opsize == 1)
+ return (getcc8(x, y));
+ else if (opsize == 2)
return (getcc16(x, y));
else if (opsize == 4)
return (getcc32(x, y));
@@ -569,7 +565,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
{
int error, size;
enum vm_reg_name reg;
- uint64_t val1, val2;
+ uint64_t result, rflags, rflags2, val1, val2;
size = vie->opsize;
error = EINVAL;
@@ -597,8 +593,8 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
break;
/* perform the operation and write the result */
- val1 &= val2;
- error = vie_update_register(vm, vcpuid, reg, val1, size);
+ result = val1 & val2;
+ error = vie_update_register(vm, vcpuid, reg, result, size);
break;
case 0x81:
/*
@@ -625,11 +621,11 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
switch (vie->reg & 7) {
case 0x4:
/* modrm:reg == b100, AND */
- val1 &= vie->immediate;
+ result = val1 & vie->immediate;
break;
case 0x1:
/* modrm:reg == b001, OR */
- val1 |= vie->immediate;
+ result = val1 | vie->immediate;
break;
default:
error = EINVAL;
@@ -638,11 +634,29 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
if (error)
break;
- error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+ error = memwrite(vm, vcpuid, gpa, result, size, arg);
break;
default:
break;
}
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ if (error)
+ return (error);
+
+ /*
+ * OF and CF are cleared; the SF, ZF and PF flags are set according
+ * to the result; AF is undefined.
+ *
+ * The updated status flags are obtained by subtracting 0 from 'result'.
+ */
+ rflags2 = getcc(size, result, 0);
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
return (error);
}
@@ -651,7 +665,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
int error, size;
- uint64_t val1;
+ uint64_t val1, result, rflags, rflags2;
size = vie->opsize;
error = EINVAL;
@@ -681,17 +695,33 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* perform the operation with the pre-fetched immediate
* operand and write the result
*/
- val1 |= vie->immediate;
- error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+ result = val1 | vie->immediate;
+ error = memwrite(vm, vcpuid, gpa, result, size, arg);
break;
default:
break;
}
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ if (error)
+ return (error);
+
+ /*
+ * OF and CF are cleared; the SF, ZF and PF flags are set according
+ * to the result; AF is undefined.
+ *
+ * The updated status flags are obtained by subtracting 0 from 'result'.
+ */
+ rflags2 = getcc(size, result, 0);
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
return (error);
}
-#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
-
static int
emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -797,7 +827,7 @@ emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
-emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
mem_region_write_t memwrite, void *arg)
{
@@ -808,18 +838,12 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
#endif
struct seg_desc ss_desc;
uint64_t cr0, rflags, rsp, stack_gla, val;
- int error, size, stackaddrsize;
-
- /*
- * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
- *
- * PUSH is part of the group 5 extended opcodes and is identified
- * by ModRM:reg = b110.
- */
- if ((vie->reg & 7) != 6)
- return (EINVAL);
+ int error, size, stackaddrsize, pushop;
+ val = 0;
size = vie->opsize;
+ pushop = (vie->op.op_type == VIE_OP_TYPE_PUSH) ? 1 : 0;
+
/*
* From "Address-Size Attributes for Stack Accesses", Intel SDL, Vol 1
*/
@@ -858,10 +882,13 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSP, &rsp);
KASSERT(error == 0, ("%s: error %d getting rsp", __func__, error));
+ if (pushop) {
+ rsp -= size;
+ }
- rsp -= size;
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &ss_desc,
- rsp, size, stackaddrsize, PROT_WRITE, &stack_gla)) {
+ rsp, size, stackaddrsize, pushop ? PROT_WRITE : PROT_READ,
+ &stack_gla)) {
vm_inject_ss(vm, vcpuid, 0);
return (0);
}
@@ -876,8 +903,8 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
return (0);
}
- error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, PROT_WRITE,
- copyinfo, nitems(copyinfo));
+ error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size,
+ pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo));
if (error == -1) {
/*
* XXX cannot return a negative error value here because it
@@ -890,16 +917,66 @@ emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
return (0);
}
- error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+ if (pushop) {
+ error = memread(vm, vcpuid, mmio_gpa, &val, size, arg);
+ if (error == 0)
+ vm_copyout(vm, vcpuid, &val, copyinfo, size);
+ } else {
+ vm_copyin(vm, vcpuid, copyinfo, &val, size);
+ error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
+ rsp += size;
+ }
+#ifdef _KERNEL
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+#endif
+
if (error == 0) {
- vm_copyout(vm, vcpuid, &val, copyinfo, size);
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
stackaddrsize);
KASSERT(error == 0, ("error %d updating rsp", error));
}
-#ifdef _KERNEL
- vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-#endif
+ return (error);
+}
+
+static int
+emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+ int error;
+
+ /*
+ * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+ *
+ * PUSH is part of the group 5 extended opcodes and is identified
+ * by ModRM:reg = b110.
+ */
+ if ((vie->reg & 7) != 6)
+ return (EINVAL);
+
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+ memwrite, arg);
+ return (error);
+}
+
+static int
+emulate_pop(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+ int error;
+
+ /*
+ * Table A-6, "Opcode Extensions", Intel SDM, Vol 2.
+ *
+ * POP is part of the group 1A extended opcodes and is identified
+ * by ModRM:reg = b000.
+ */
+ if ((vie->reg & 7) != 0)
+ return (EINVAL);
+
+ error = emulate_stack_op(vm, vcpuid, mmio_gpa, vie, paging, memread,
+ memwrite, arg);
return (error);
}
@@ -914,6 +991,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (EINVAL);
switch (vie->op.op_type) {
+ case VIE_OP_TYPE_POP:
+ error = emulate_pop(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_PUSH:
error = emulate_push(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c
deleted file mode 100644
index 03e0071..0000000
--- a/sys/amd64/vmm/vmm_msr.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/*-
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/smp.h>
-
-#include <machine/specialreg.h>
-
-#include <machine/vmm.h>
-#include "vmm_lapic.h"
-#include "vmm_msr.h"
-
-#define VMM_MSR_F_EMULATE 0x01
-#define VMM_MSR_F_READONLY 0x02
-#define VMM_MSR_F_INVALID 0x04 /* guest_msr_valid() can override this */
-
-struct vmm_msr {
- int num;
- int flags;
- uint64_t hostval;
-};
-
-static struct vmm_msr vmm_msr[] = {
- { MSR_LSTAR, 0 },
- { MSR_CSTAR, 0 },
- { MSR_STAR, 0 },
- { MSR_SF_MASK, 0 },
- { MSR_PAT, VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID },
- { MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
- { MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
- { MSR_IA32_PLATFORM_ID, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
- { MSR_IA32_MISC_ENABLE, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
-};
-
-#define vmm_msr_num (sizeof(vmm_msr) / sizeof(vmm_msr[0]))
-CTASSERT(VMM_MSR_NUM >= vmm_msr_num);
-
-#define readonly_msr(idx) \
- ((vmm_msr[(idx)].flags & VMM_MSR_F_READONLY) != 0)
-
-#define emulated_msr(idx) \
- ((vmm_msr[(idx)].flags & VMM_MSR_F_EMULATE) != 0)
-
-#define invalid_msr(idx) \
- ((vmm_msr[(idx)].flags & VMM_MSR_F_INVALID) != 0)
-
-void
-vmm_msr_init(void)
-{
- int i;
-
- for (i = 0; i < vmm_msr_num; i++) {
- if (emulated_msr(i))
- continue;
- /*
- * XXX this assumes that the value of the host msr does not
- * change after we have cached it.
- */
- vmm_msr[i].hostval = rdmsr(vmm_msr[i].num);
- }
-}
-
-void
-guest_msrs_init(struct vm *vm, int cpu)
-{
- int i;
- uint64_t *guest_msrs, misc;
-
- guest_msrs = vm_guest_msrs(vm, cpu);
-
- for (i = 0; i < vmm_msr_num; i++) {
- switch (vmm_msr[i].num) {
- case MSR_LSTAR:
- case MSR_CSTAR:
- case MSR_STAR:
- case MSR_SF_MASK:
- case MSR_BIOS_SIGN:
- case MSR_MCG_CAP:
- guest_msrs[i] = 0;
- break;
- case MSR_PAT:
- guest_msrs[i] = PAT_VALUE(0, PAT_WRITE_BACK) |
- PAT_VALUE(1, PAT_WRITE_THROUGH) |
- PAT_VALUE(2, PAT_UNCACHED) |
- PAT_VALUE(3, PAT_UNCACHEABLE) |
- PAT_VALUE(4, PAT_WRITE_BACK) |
- PAT_VALUE(5, PAT_WRITE_THROUGH) |
- PAT_VALUE(6, PAT_UNCACHED) |
- PAT_VALUE(7, PAT_UNCACHEABLE);
- break;
- case MSR_IA32_MISC_ENABLE:
- misc = rdmsr(MSR_IA32_MISC_ENABLE);
- /*
- * Set mandatory bits
- * 11: branch trace disabled
- * 12: PEBS unavailable
- * Clear unsupported features
- * 16: SpeedStep enable
- * 18: enable MONITOR FSM
- */
- misc |= (1 << 12) | (1 << 11);
- misc &= ~((1 << 18) | (1 << 16));
- guest_msrs[i] = misc;
- break;
- case MSR_IA32_PLATFORM_ID:
- guest_msrs[i] = 0;
- break;
- default:
- panic("guest_msrs_init: missing initialization for msr "
- "0x%0x", vmm_msr[i].num);
- }
- }
-}
-
-static int
-msr_num_to_idx(u_int num)
-{
- int i;
-
- for (i = 0; i < vmm_msr_num; i++)
- if (vmm_msr[i].num == num)
- return (i);
-
- return (-1);
-}
-
-int
-emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val, bool *retu)
-{
- int idx;
- uint64_t *guest_msrs;
-
- if (lapic_msr(num))
- return (lapic_wrmsr(vm, cpu, num, val, retu));
-
- idx = msr_num_to_idx(num);
- if (idx < 0 || invalid_msr(idx))
- return (EINVAL);
-
- if (!readonly_msr(idx)) {
- guest_msrs = vm_guest_msrs(vm, cpu);
-
- /* Stash the value */
- guest_msrs[idx] = val;
-
- /* Update processor state for non-emulated MSRs */
- if (!emulated_msr(idx))
- wrmsr(vmm_msr[idx].num, val);
- }
-
- return (0);
-}
-
-int
-emulate_rdmsr(struct vm *vm, int cpu, u_int num, bool *retu)
-{
- int error, idx;
- uint32_t eax, edx;
- uint64_t result, *guest_msrs;
-
- if (lapic_msr(num)) {
- error = lapic_rdmsr(vm, cpu, num, &result, retu);
- goto done;
- }
-
- idx = msr_num_to_idx(num);
- if (idx < 0 || invalid_msr(idx)) {
- error = EINVAL;
- goto done;
- }
-
- guest_msrs = vm_guest_msrs(vm, cpu);
- result = guest_msrs[idx];
-
- /*
- * If this is not an emulated msr register make sure that the processor
- * state matches our cached state.
- */
- if (!emulated_msr(idx) && (rdmsr(num) != result)) {
- panic("emulate_rdmsr: msr 0x%0x has inconsistent cached "
- "(0x%016lx) and actual (0x%016lx) values", num,
- result, rdmsr(num));
- }
-
- error = 0;
-
-done:
- if (error == 0) {
- eax = result;
- edx = result >> 32;
- error = vm_set_register(vm, cpu, VM_REG_GUEST_RAX, eax);
- if (error)
- panic("vm_set_register(rax) error %d", error);
- error = vm_set_register(vm, cpu, VM_REG_GUEST_RDX, edx);
- if (error)
- panic("vm_set_register(rdx) error %d", error);
- }
- return (error);
-}
-
-void
-restore_guest_msrs(struct vm *vm, int cpu)
-{
- int i;
- uint64_t *guest_msrs;
-
- guest_msrs = vm_guest_msrs(vm, cpu);
-
- for (i = 0; i < vmm_msr_num; i++) {
- if (emulated_msr(i))
- continue;
- else
- wrmsr(vmm_msr[i].num, guest_msrs[i]);
- }
-}
-
-void
-restore_host_msrs(struct vm *vm, int cpu)
-{
- int i;
-
- for (i = 0; i < vmm_msr_num; i++) {
- if (emulated_msr(i))
- continue;
- else
- wrmsr(vmm_msr[i].num, vmm_msr[i].hostval);
- }
-}
-
-/*
- * Must be called by the CPU-specific code before any guests are
- * created
- */
-void
-guest_msr_valid(int msr)
-{
- int i;
-
- for (i = 0; i < vmm_msr_num; i++) {
- if (vmm_msr[i].num == msr && invalid_msr(i)) {
- vmm_msr[i].flags &= ~VMM_MSR_F_INVALID;
- }
- }
-}
diff --git a/sys/amd64/vmm/vmm_msr.h b/sys/amd64/vmm/vmm_msr.h
deleted file mode 100644
index e070037..0000000
--- a/sys/amd64/vmm/vmm_msr.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*-
- * Copyright (c) 2011 NetApp, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _VMM_MSR_H_
-#define _VMM_MSR_H_
-
-#define VMM_MSR_NUM 16
-struct vm;
-
-void vmm_msr_init(void);
-int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val,
- bool *retu);
-int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr, bool *retu);
-void guest_msrs_init(struct vm *vm, int cpu);
-void guest_msr_valid(int msr);
-void restore_host_msrs(struct vm *vm, int cpu);
-void restore_guest_msrs(struct vm *vm, int cpu);
-
-#endif
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index ef1557f..c7515cf 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <sys/systm.h>
#include <sys/cpuset.h>
+#include <sys/sysctl.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
@@ -45,20 +46,49 @@ __FBSDID("$FreeBSD$");
#include "vmm_host.h"
#include "x86.h"
+SYSCTL_DECL(_hw_vmm);
+static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
+
#define CPUID_VM_HIGH 0x40000000
static const char bhyve_id[12] = "bhyve bhyve ";
static uint64_t bhyve_xcpuids;
+/*
+ * The default CPU topology is a single thread per package.
+ */
+static u_int threads_per_core = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
+ &threads_per_core, 0, NULL);
+
+static u_int cores_per_package = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
+ &cores_per_package, 0, NULL);
+
+static int cpuid_leaf_b = 1;
+SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
+ &cpuid_leaf_b, 0, NULL);
+
+/*
+ * Round up to the next power of two, if necessary, and then take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+log2(u_int x)
+{
+
+ return (fls(x << (1 - powerof2(x))) - 1);
+}
+
int
x86_emulate_cpuid(struct vm *vm, int vcpu_id,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
const struct xsave_limits *limits;
uint64_t cr4;
- int error, enable_invpcid;
- unsigned int func, regs[4];
+ int error, enable_invpcid, level, width, x2apic_id;
+ unsigned int func, regs[4], logical_cpus;
enum x2apic_state x2apic_state;
/*
@@ -207,30 +237,31 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*/
regs[3] &= ~CPUID_DS;
- /*
- * Disable multi-core.
- */
+ logical_cpus = threads_per_core * cores_per_package;
regs[1] &= ~CPUID_HTT_CORES;
- regs[3] &= ~CPUID_HTT;
+ regs[1] |= (logical_cpus & 0xff) << 16;
+ regs[3] |= CPUID_HTT;
break;
case CPUID_0000_0004:
- do_cpuid(4, regs);
+ cpuid_count(*eax, *ecx, regs);
- /*
- * Do not expose topology.
- *
- * The maximum number of processor cores in
- * this physical processor package and the
- * maximum number of threads sharing this
- * cache are encoded with "plus 1" encoding.
- * Adding one to the value in this register
- * field to obtains the actual value.
- *
- * Therefore 0 for both indicates 1 core per
- * package and no cache sharing.
- */
- regs[0] &= 0xffff8000;
+ if (regs[0] || regs[1] || regs[2] || regs[3]) {
+ regs[0] &= 0x3ff;
+ regs[0] |= (cores_per_package - 1) << 26;
+ /*
+ * Cache topology:
+ * - L1 and L2 are shared only by the logical
+ * processors in a single core.
+ * - L3 and above are shared by all logical
+ * processors in the package.
+ */
+ logical_cpus = threads_per_core;
+ level = (regs[0] >> 5) & 0x7;
+ if (level >= 3)
+ logical_cpus *= cores_per_package;
+ regs[0] |= (logical_cpus - 1) << 14;
+ }
break;
case CPUID_0000_0007:
@@ -284,10 +315,32 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Processor topology enumeration
*/
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = *ecx & 0xff;
- regs[3] = vcpu_id;
+ if (*ecx == 0) {
+ logical_cpus = threads_per_core;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_SMT;
+ x2apic_id = vcpu_id;
+ }
+
+ if (*ecx == 1) {
+ logical_cpus = threads_per_core *
+ cores_per_package;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_CORE;
+ x2apic_id = vcpu_id;
+ }
+
+ if (!cpuid_leaf_b || *ecx >= 2) {
+ width = 0;
+ logical_cpus = 0;
+ level = 0;
+ x2apic_id = 0;
+ }
+
+ regs[0] = width & 0x1f;
+ regs[1] = logical_cpus & 0xffff;
+ regs[2] = (level << 8) | (*ecx & 0xff);
+ regs[3] = x2apic_id;
break;
case CPUID_0000_000D:
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index 76f9364..702587b 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -19,7 +19,6 @@ SRCS+= vmm.c \
vmm_ipi.c \
vmm_lapic.c \
vmm_mem.c \
- vmm_msr.c \
vmm_stat.c \
vmm_util.c \
x86.c \
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index c920e82..7298a2e 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -381,6 +381,7 @@
#define MSR_BIOS_SIGN 0x08b
#define MSR_PERFCTR0 0x0c1
#define MSR_PERFCTR1 0x0c2
+#define MSR_PLATFORM_INFO 0x0ce
#define MSR_MPERF 0x0e7
#define MSR_APERF 0x0e8
#define MSR_IA32_EXT_CONFIG 0x0ee /* Undocumented. Core Solo/Duo only */
@@ -404,6 +405,8 @@
#define MSR_THERM_STATUS 0x19c
#define MSR_IA32_MISC_ENABLE 0x1a0
#define MSR_IA32_TEMPERATURE_TARGET 0x1a2
+#define MSR_TURBO_RATIO_LIMIT 0x1ad
+#define MSR_TURBO_RATIO_LIMIT1 0x1ae
#define MSR_DEBUGCTLMSR 0x1d9
#define MSR_LASTBRANCHFROMIP 0x1db
#define MSR_LASTBRANCHTOIP 0x1dc
@@ -437,6 +440,11 @@
#define MSR_MC4_STATUS 0x411
#define MSR_MC4_ADDR 0x412
#define MSR_MC4_MISC 0x413
+#define MSR_RAPL_POWER_UNIT 0x606
+#define MSR_PKG_ENERGY_STATUS 0x611
+#define MSR_DRAM_ENERGY_STATUS 0x619
+#define MSR_PP0_ENERGY_STATUS 0x639
+#define MSR_PP1_ENERGY_STATUS 0x641
/*
* VMX MSRs
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index 5dea300..c1f5f13 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -489,7 +489,7 @@ basl_fwrite_fadt(FILE *fp)
EFPRINTF(fp,
"[0012]\t\tPM Timer Block : [Generic Address Structure]\n");
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
- EFPRINTF(fp, "[0001]\t\tBit Width : 32\n");
+ EFPRINTF(fp, "[0001]\t\tBit Width : 20\n");
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
EFPRINTF(fp,
"[0001]\t\tEncoded Access Width : 03 [DWord Access:32]\n");
@@ -499,7 +499,7 @@ basl_fwrite_fadt(FILE *fp)
EFPRINTF(fp, "[0012]\t\tGPE0 Block : [Generic Address Structure]\n");
EFPRINTF(fp, "[0001]\t\tSpace ID : 01 [SystemIO]\n");
- EFPRINTF(fp, "[0001]\t\tBit Width : 80\n");
+ EFPRINTF(fp, "[0001]\t\tBit Width : 00\n");
EFPRINTF(fp, "[0001]\t\tBit Offset : 00\n");
EFPRINTF(fp, "[0001]\t\tEncoded Access Width : 01 [Byte Access:8]\n");
EFPRINTF(fp, "[0008]\t\tAddress : 0000000000000000\n");
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 7dcf6d0..b2b36bb 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -803,6 +803,12 @@ main(int argc, char *argv[])
exit(1);
}
+ error = init_msr();
+ if (error) {
+ fprintf(stderr, "init_msr error %d", error);
+ exit(1);
+ }
+
init_mem();
init_inout();
pci_irq_init(ctx);
diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index 1ec0344..cbe5ac3 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -55,8 +55,7 @@ __FBSDID("$FreeBSD$");
enum blockop {
BOP_READ,
BOP_WRITE,
- BOP_FLUSH,
- BOP_CANCEL
+ BOP_FLUSH
};
enum blockstat {
@@ -159,9 +158,6 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
break;
case BOP_FLUSH:
break;
- case BOP_CANCEL:
- err = EINTR;
- break;
default:
err = EINVAL;
break;
@@ -278,6 +274,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
+ bc->bc_rdonly = ro;
bc->bc_size = size;
bc->bc_sectsz = sectsz;
pthread_mutex_init(&bc->bc_mtx, NULL);
@@ -355,9 +352,28 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
int
blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
{
+ struct blockif_elem *be;
assert(bc->bc_magic == BLOCKIF_SIG);
- return (blockif_request(bc, breq, BOP_CANCEL));
+
+ pthread_mutex_lock(&bc->bc_mtx);
+ TAILQ_FOREACH(be, &bc->bc_inuseq, be_link) {
+ if (be->be_req == breq)
+ break;
+ }
+ if (be == NULL) {
+ pthread_mutex_unlock(&bc->bc_mtx);
+ return (EINVAL);
+ }
+
+ TAILQ_REMOVE(&bc->bc_inuseq, be, be_link);
+ be->be_status = BST_FREE;
+ be->be_req = NULL;
+ TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
+ bc->bc_req_count--;
+ pthread_mutex_unlock(&bc->bc_mtx);
+
+ return (0);
}
int
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 214237d..42aa0b3 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
+#include <pthread_np.h>
#include <inttypes.h>
#include "bhyverun.h"
@@ -115,7 +116,8 @@ static FILE *dbg;
struct ahci_ioreq {
struct blockif_req io_req;
struct ahci_port *io_pr;
- STAILQ_ENTRY(ahci_ioreq) io_list;
+ STAILQ_ENTRY(ahci_ioreq) io_flist;
+ TAILQ_ENTRY(ahci_ioreq) io_blist;
uint8_t *cfis;
uint32_t len;
uint32_t done;
@@ -160,6 +162,7 @@ struct ahci_port {
struct ahci_ioreq *ioreq;
int ioqsz;
STAILQ_HEAD(ahci_fhead, ahci_ioreq) iofhd;
+ TAILQ_HEAD(ahci_bhead, ahci_ioreq) iobhd;
};
struct ahci_cmd_hdr {
@@ -360,6 +363,68 @@ ahci_write_reset_fis_d2h(struct ahci_port *p)
}
static void
+ahci_check_stopped(struct ahci_port *p)
+{
+ /*
+ * If we are no longer processing the command list and nothing
+ * is in-flight, clear the running bit.
+ */
+ if (!(p->cmd & AHCI_P_CMD_ST)) {
+ if (p->pending == 0)
+ p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
+ }
+}
+
+static void
+ahci_port_stop(struct ahci_port *p)
+{
+ struct ahci_ioreq *aior;
+ uint8_t *cfis;
+ int slot;
+ int ncq;
+ int error;
+
+ assert(pthread_mutex_isowned_np(&p->pr_sc->mtx));
+
+ TAILQ_FOREACH(aior, &p->iobhd, io_blist) {
+ /*
+ * Try to cancel the outstanding blockif request.
+ */
+ error = blockif_cancel(p->bctx, &aior->io_req);
+ if (error != 0)
+ continue;
+
+ slot = aior->slot;
+ cfis = aior->cfis;
+ if (cfis[2] == ATA_WRITE_FPDMA_QUEUED ||
+ cfis[2] == ATA_READ_FPDMA_QUEUED)
+ ncq = 1;
+
+ if (ncq)
+ p->sact &= ~(1 << slot);
+ else
+ p->ci &= ~(1 << slot);
+
+ /*
+ * This command is now done.
+ */
+ p->pending &= ~(1 << slot);
+
+ /*
+ * Delete the blockif request from the busy list
+ */
+ TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+ /*
+ * Move the blockif request back to the free list
+ */
+ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
+ }
+
+ ahci_check_stopped(p);
+}
+
+static void
ahci_port_reset(struct ahci_port *pr)
{
pr->sctl = 0;
@@ -492,7 +557,7 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
*/
aior = STAILQ_FIRST(&p->iofhd);
assert(aior != NULL);
- STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+ STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
aior->cfis = cfis;
aior->slot = slot;
aior->len = len;
@@ -503,15 +568,21 @@ ahci_handle_dma(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done,
if (iovcnt > BLOCKIF_IOV_MAX) {
aior->prdtl = iovcnt - BLOCKIF_IOV_MAX;
iovcnt = BLOCKIF_IOV_MAX;
- /*
- * Mark this command in-flight.
- */
- p->pending |= 1 << slot;
} else
aior->prdtl = 0;
breq->br_iovcnt = iovcnt;
/*
+ * Mark this command in-flight.
+ */
+ p->pending |= 1 << slot;
+
+ /*
+ * Stuff request onto busy list
+ */
+ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+ /*
* Build up the iovec based on the prdt
*/
for (i = 0; i < iovcnt; i++) {
@@ -546,7 +617,7 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
*/
aior = STAILQ_FIRST(&p->iofhd);
assert(aior != NULL);
- STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+ STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
aior->cfis = cfis;
aior->slot = slot;
aior->len = 0;
@@ -554,6 +625,16 @@ ahci_handle_flush(struct ahci_port *p, int slot, uint8_t *cfis)
aior->prdtl = 0;
breq = &aior->io_req;
+ /*
+ * Mark this command in-flight.
+ */
+ p->pending |= 1 << slot;
+
+ /*
+ * Stuff request onto busy list
+ */
+ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
err = blockif_flush(p->bctx, breq);
assert(err == 0);
}
@@ -961,7 +1042,7 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
*/
aior = STAILQ_FIRST(&p->iofhd);
assert(aior != NULL);
- STAILQ_REMOVE_HEAD(&p->iofhd, io_list);
+ STAILQ_REMOVE_HEAD(&p->iofhd, io_flist);
aior->cfis = cfis;
aior->slot = slot;
aior->len = len;
@@ -977,6 +1058,16 @@ atapi_read(struct ahci_port *p, int slot, uint8_t *cfis,
breq->br_iovcnt = iovcnt;
/*
+ * Mark this command in-flight.
+ */
+ p->pending |= 1 << slot;
+
+ /*
+ * Stuff request onto busy list
+ */
+ TAILQ_INSERT_HEAD(&p->iobhd, aior, io_blist);
+
+ /*
* Build up the iovec based on the prdt
*/
for (i = 0; i < iovcnt; i++) {
@@ -1415,9 +1506,14 @@ ata_ioreq_cb(struct blockif_req *br, int err)
pthread_mutex_lock(&sc->mtx);
/*
+ * Delete the blockif request from the busy list
+ */
+ TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+ /*
* Move the blockif request back to the free list
*/
- STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
+ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
if (pending && !err) {
ahci_handle_dma(p, slot, cfis, aior->done,
@@ -1438,17 +1534,18 @@ ata_ioreq_cb(struct blockif_req *br, int err)
p->serr |= (1 << slot);
}
- /*
- * This command is now complete.
- */
- p->pending &= ~(1 << slot);
-
if (ncq) {
p->sact &= ~(1 << slot);
ahci_write_fis_sdb(p, slot, tfd);
} else
ahci_write_fis_d2h(p, slot, cfis, tfd);
+ /*
+ * This command is now complete.
+ */
+ p->pending &= ~(1 << slot);
+
+ ahci_check_stopped(p);
out:
pthread_mutex_unlock(&sc->mtx);
DPRINTF("%s exit\n", __func__);
@@ -1478,9 +1575,14 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
pthread_mutex_lock(&sc->mtx);
/*
+ * Delete the blockif request from the busy list
+ */
+ TAILQ_REMOVE(&p->iobhd, aior, io_blist);
+
+ /*
* Move the blockif request back to the free list
*/
- STAILQ_INSERT_TAIL(&p->iofhd, aior, io_list);
+ STAILQ_INSERT_TAIL(&p->iofhd, aior, io_flist);
if (pending && !err) {
atapi_read(p, slot, cfis, aior->done, hdr->prdtl - pending);
@@ -1500,6 +1602,12 @@ atapi_ioreq_cb(struct blockif_req *br, int err)
cfis[4] = (cfis[4] & ~7) | ATA_I_CMD | ATA_I_IN;
ahci_write_fis_d2h(p, slot, cfis, tfd);
+ /*
+ * This command is now complete.
+ */
+ p->pending &= ~(1 << slot);
+
+ ahci_check_stopped(p);
out:
pthread_mutex_unlock(&sc->mtx);
DPRINTF("%s exit\n", __func__);
@@ -1526,8 +1634,10 @@ pci_ahci_ioreq_init(struct ahci_port *pr)
else
vr->io_req.br_callback = atapi_ioreq_cb;
vr->io_req.br_param = vr;
- STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_list);
+ STAILQ_INSERT_TAIL(&pr->iofhd, vr, io_flist);
}
+
+ TAILQ_INIT(&pr->iobhd);
}
static void
@@ -1565,9 +1675,7 @@ pci_ahci_port_write(struct pci_ahci_softc *sc, uint64_t offset, uint64_t value)
p->cmd = value;
if (!(value & AHCI_P_CMD_ST)) {
- p->cmd &= ~(AHCI_P_CMD_CR | AHCI_P_CMD_CCS_MASK);
- p->ci = 0;
- p->sact = 0;
+ ahci_port_stop(p);
} else {
uint64_t clb;
diff --git a/usr.sbin/bhyve/pci_virtio_block.c b/usr.sbin/bhyve/pci_virtio_block.c
index 394b116..c66ad68 100644
--- a/usr.sbin/bhyve/pci_virtio_block.c
+++ b/usr.sbin/bhyve/pci_virtio_block.c
@@ -94,6 +94,8 @@ struct vtblk_config {
struct virtio_blk_hdr {
#define VBH_OP_READ 0
#define VBH_OP_WRITE 1
+#define VBH_OP_FLUSH 4
+#define VBH_OP_FLUSH_OUT 5
#define VBH_OP_IDENT 8
#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
uint32_t vbh_type;
@@ -217,6 +219,10 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
err = 0;
break;
+ case VBH_OP_FLUSH:
+ case VBH_OP_FLUSH_OUT:
+ err = fsync(sc->vbsc_fd);
+ break;
default:
err = -ENOSYS;
break;
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index 0002da8..b939c1a 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -725,6 +725,21 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
/*
+ * Calculate the %eip to store in the old TSS before modifying the
+ * 'inst_length'.
+ */
+ eip = vmexit->rip + vmexit->inst_length;
+
+ /*
+ * Set the 'inst_length' to '0'.
+ *
+ * If an exception is triggered during emulation of the task switch
+ * then the exception handler should return to the instruction that
+ * caused the task switch as opposed to the subsequent instruction.
+ */
+ vmexit->inst_length = 0;
+
+ /*
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
* The following page table accesses are implicitly supervisor mode:
* - accesses to GDT or LDT to load segment descriptors
@@ -839,7 +854,6 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
}
/* Save processor state in old TSS */
- eip = vmexit->rip + vmexit->inst_length;
tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
/*
@@ -870,7 +884,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
* the saved instruction pointer will belong to the new task.
*/
vmexit->rip = newtss.tss_eip;
- vmexit->inst_length = 0;
+ assert(vmexit->inst_length == 0);
/* Load processor state from new TSS */
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
diff --git a/usr.sbin/bhyve/virtio.c b/usr.sbin/bhyve/virtio.c
index 9581fb0..19c0d47 100644
--- a/usr.sbin/bhyve/virtio.c
+++ b/usr.sbin/bhyve/virtio.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdint.h>
#include <pthread.h>
+#include <pthread_np.h>
#include "bhyverun.h"
#include "pci_emul.h"
@@ -89,6 +90,9 @@ vi_reset_dev(struct virtio_softc *vs)
struct vqueue_info *vq;
int i, nvq;
+ if (vs->vs_mtx)
+ assert(pthread_mutex_isowned_np(vs->vs_mtx));
+
nvq = vs->vs_vc->vc_nvq;
for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
vq->vq_flags = 0;
@@ -99,11 +103,9 @@ vi_reset_dev(struct virtio_softc *vs)
vs->vs_negotiated_caps = 0;
vs->vs_curq = 0;
/* vs->vs_status = 0; -- redundant */
- VS_LOCK(vs);
if (vs->vs_isr)
pci_lintr_deassert(vs->vs_pi);
vs->vs_isr = 0;
- VS_UNLOCK(vs);
vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
}
@@ -137,7 +139,9 @@ vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
if (use_msix) {
vs->vs_flags |= VIRTIO_USE_MSIX;
+ VS_LOCK(vs);
vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
+ VS_UNLOCK(vs);
nvec = vs->vs_vc->vc_nvq + 1;
if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
return (1);
diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c
index 63522bf..1ed1ea1 100644
--- a/usr.sbin/bhyve/xmsr.c
+++ b/usr.sbin/bhyve/xmsr.c
@@ -31,33 +31,91 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <machine/cpufunc.h>
#include <machine/vmm.h>
+#include <machine/specialreg.h>
+
#include <vmmapi.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "xmsr.h"
+static int cpu_vendor_intel, cpu_vendor_amd;
+
int
emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val)
{
- switch (code) {
- case 0xd04: /* Sandy Bridge uncore PMC MSRs */
- case 0xc24:
- return (0);
- case 0x79:
- return (0); /* IA32_BIOS_UPDT_TRIG MSR */
- default:
- break;
+ if (cpu_vendor_intel) {
+ switch (code) {
+ case 0xd04: /* Sandy Bridge uncore PMCs */
+ case 0xc24:
+ return (0);
+ case MSR_BIOS_UPDT_TRIG:
+ return (0);
+ case MSR_BIOS_SIGN:
+ return (0);
+ default:
+ break;
+ }
}
return (-1);
}
int
-emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val)
+emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
{
+ int error = 0;
- return (-1);
+ if (cpu_vendor_intel) {
+ switch (num) {
+ case MSR_BIOS_SIGN:
+ case MSR_IA32_PLATFORM_ID:
+ case MSR_PKG_ENERGY_STATUS:
+ case MSR_PP0_ENERGY_STATUS:
+ case MSR_PP1_ENERGY_STATUS:
+ case MSR_DRAM_ENERGY_STATUS:
+ *val = 0;
+ break;
+ case MSR_RAPL_POWER_UNIT:
+ /*
+ * Use the default value documented in section
+ * "RAPL Interfaces" in Intel SDM vol3.
+ */
+ *val = 0x000a1003;
+ break;
+ default:
+ error = -1;
+ break;
+ }
+ }
+ return (error);
+}
+
+int
+init_msr(void)
+{
+ int error;
+ u_int regs[4];
+ char cpu_vendor[13];
+
+ do_cpuid(0, regs);
+ ((u_int *)&cpu_vendor)[0] = regs[1];
+ ((u_int *)&cpu_vendor)[1] = regs[3];
+ ((u_int *)&cpu_vendor)[2] = regs[2];
+ cpu_vendor[12] = '\0';
+
+ error = 0;
+ if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+ cpu_vendor_amd = 1;
+ } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+ cpu_vendor_intel = 1;
+ } else {
+ fprintf(stderr, "Unknown cpu vendor \"%s\"\n", cpu_vendor);
+ error = -1;
+ }
+ return (error);
}
diff --git a/usr.sbin/bhyve/xmsr.h b/usr.sbin/bhyve/xmsr.h
index b097cf8..bcf65b7 100644
--- a/usr.sbin/bhyve/xmsr.h
+++ b/usr.sbin/bhyve/xmsr.h
@@ -29,6 +29,7 @@
#ifndef _XMSR_H_
#define _XMSR_H_
+int init_msr(void);
int emulate_wrmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t val);
int emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t code, uint64_t *val);
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index b6006b7..f5e50d3 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -309,7 +309,7 @@ dump_vmcs_msr_bitmap(int vcpu, u_long addr)
if (fd < 0)
goto done;
- bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, 0, fd, addr);
+ bitmap = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, addr);
if (bitmap == MAP_FAILED)
goto done;
OpenPOWER on IntegriCloud