summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2015-06-27 22:48:22 +0000
committerneel <neel@FreeBSD.org>2015-06-27 22:48:22 +0000
commit115742fae3f7a7c52d6d5f4894f37e68dff4fd5c (patch)
treecb845c4cb2d3a3b67b3e1134742c3c5b250ae954 /sys/amd64/vmm
parent02efaba1d135756ed65855bdc99e7d83f46cc4a2 (diff)
downloadFreeBSD-src-115742fae3f7a7c52d6d5f4894f37e68dff4fd5c.zip
FreeBSD-src-115742fae3f7a7c52d6d5f4894f37e68dff4fd5c.tar.gz
MFC r276428:
Replace bhyve's minimal RTC emulation with a fully featured one in vmm.ko. MFC r276432: Initialize all fields of 'struct vm_exception exception' before passing it to vm_inject_exception(). MFC r276763: Clear blocking due to STI or MOV SS in the hypervisor when an instruction is emulated or when the vcpu incurs an exception. MFC r277149: Clean up usage of 'struct vm_exception' to only to communicate information from userspace to vmm.ko when injecting an exception. MFC r277168: Fix typo (missing comma). MFC r277309: Make the error message explicit instead of just printing the usage if the virtual machine name is not specified. MFC r277310: Simplify instruction restart logic in bhyve. MFC r277359: Fix a bug in libvmmapi 'vm_copy_setup()' where it would return success even if the 'gpa' was in the guest MMIO region. MFC r277360: MOVS instruction emulation. MFC r277626: Add macro to identify AVIC capability (advanced virtual interrupt controller) in AMD processors. MFC r279220: Don't close a block context if it couldn't be opened avoiding a null deref. MFC r279225: Add "-u" option to bhyve(8) to indicate that the RTC should maintain UTC time. MFC r279227: Emulate MSR 0xC0011024 when running on AMD processors. MFC r279228: Always emulate MSR_PAT on Intel processors and don't rely on PAT save/restore capability of VT-x. This lets bhyve run nested in older VMware versions that don't support the PAT save/restore capability. MFC r279540: Fix warnings/errors when building vmm.ko with gcc.
Diffstat (limited to 'sys/amd64/vmm')
-rw-r--r--sys/amd64/vmm/amd/svm.c32
-rw-r--r--sys/amd64/vmm/amd/svm_softc.h1
-rw-r--r--sys/amd64/vmm/amd/svm_support.S12
-rw-r--r--sys/amd64/vmm/intel/vmcs.c12
-rw-r--r--sys/amd64/vmm/intel/vmx.c68
-rw-r--r--sys/amd64/vmm/intel/vmx.h2
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c54
-rw-r--r--sys/amd64/vmm/io/vhpet.c70
-rw-r--r--sys/amd64/vmm/io/vrtc.c952
-rw-r--r--sys/amd64/vmm/io/vrtc.h50
-rw-r--r--sys/amd64/vmm/vmm.c179
-rw-r--r--sys/amd64/vmm/vmm_dev.c30
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c271
-rw-r--r--sys/amd64/vmm/vmm_ioport.c5
14 files changed, 1556 insertions, 182 deletions
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index ab47041..88a846d 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -80,6 +80,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW, NULL, NULL);
#define AMD_CPUID_SVM_DECODE_ASSIST BIT(7) /* Decode assist */
#define AMD_CPUID_SVM_PAUSE_INC BIT(10) /* Pause intercept filter. */
#define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */
+#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */
#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
VMCB_CACHE_IOPM | \
@@ -554,6 +555,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
pml4_pa = svm_sc->nptp;
for (i = 0; i < VM_MAXCPU; i++) {
vcpu = svm_get_vcpu(svm_sc, i);
+ vcpu->nextrip = ~0;
vcpu->lastcpu = NOCPU;
vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
@@ -1200,7 +1202,6 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
struct vmcb_state *state;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
- struct vm_exception exception;
uint64_t code, info1, info2, val;
uint32_t eax, ecx, edx;
int error, errcode_valid, handled, idtvec, reflect;
@@ -1314,6 +1315,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
/* fallthru */
default:
errcode_valid = 0;
+ info1 = 0;
break;
}
KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) "
@@ -1322,14 +1324,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
if (reflect) {
/* Reflect the exception back into the guest */
- exception.vector = idtvec;
- exception.error_code_valid = errcode_valid;
- exception.error_code = errcode_valid ? info1 : 0;
VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
- "%d/%#x into the guest", exception.vector,
- exception.error_code);
- error = vm_inject_exception(svm_sc->vm, vcpu,
- &exception);
+ "%d/%#x into the guest", idtvec, (int)info1);
+ error = vm_inject_exception(svm_sc->vm, vcpu, idtvec,
+ errcode_valid, info1, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
}
@@ -1476,15 +1474,24 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
+ struct svm_vcpu *vcpustate;
uint8_t v_tpr;
int vector, need_intr_window, pending_apic_vector;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ vcpustate = svm_get_vcpu(sc, vcpu);
need_intr_window = 0;
pending_apic_vector = 0;
+ if (vcpustate->nextrip != state->rip) {
+ ctrl->intr_shadow = 0;
+ VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
+ "cleared due to rip change: %#lx/%#lx",
+ vcpustate->nextrip, state->rip);
+ }
+
/*
* Inject pending events or exceptions for this vcpu.
*
@@ -1634,7 +1641,7 @@ done:
* VMRUN.
*/
v_tpr = vlapic_get_cr8(vlapic);
- KASSERT(v_tpr >= 0 && v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
+ KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x",
ctrl->v_tpr, v_tpr);
@@ -1801,14 +1808,14 @@ static __inline void
disable_gintr(void)
{
- __asm __volatile("clgi" : : :);
+ __asm __volatile("clgi");
}
static __inline void
enable_gintr(void)
{
- __asm __volatile("stgi" : : :);
+ __asm __volatile("stgi");
}
/*
@@ -1955,6 +1962,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/* #VMEXIT disables interrupts so re-enable them here. */
enable_gintr();
+ /* Update 'nextrip' */
+ vcpustate->nextrip = state->rip;
+
/* Handle #VMEXIT and if required return to user space. */
handled = svm_vmexit(svm_sc, vcpu, vmexit);
} while (handled);
diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h
index a5bb57c..de0c3f7 100644
--- a/sys/amd64/vmm/amd/svm_softc.h
+++ b/sys/amd64/vmm/amd/svm_softc.h
@@ -45,6 +45,7 @@ struct svm_vcpu {
struct vmcb vmcb; /* hardware saved vcpu context */
struct svm_regctx swctx; /* software saved vcpu context */
uint64_t vmcb_pa; /* VMCB physical address */
+ uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that the vcpu last ran on */
uint32_t dirty; /* state cache bits that must be cleared */
long eptgen; /* pmap->pm_eptgen when the vcpu last ran */
diff --git a/sys/amd64/vmm/amd/svm_support.S b/sys/amd64/vmm/amd/svm_support.S
index 72327bd..b363101 100644
--- a/sys/amd64/vmm/amd/svm_support.S
+++ b/sys/amd64/vmm/amd/svm_support.S
@@ -22,6 +22,8 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
*/
#include <machine/asmacros.h>
@@ -35,6 +37,10 @@
#define VENTER push %rbp ; mov %rsp,%rbp
#define VLEAVE pop %rbp
+#define VMLOAD .byte 0x0f, 0x01, 0xda
+#define VMRUN .byte 0x0f, 0x01, 0xd8
+#define VMSAVE .byte 0x0f, 0x01, 0xdb
+
/*
* svm_launch(uint64_t vmcb, struct svm_regctx *gctx)
* %rdi: physical address of VMCB
@@ -79,9 +85,9 @@ ENTRY(svm_launch)
movq SCTX_RDI(%rsi), %rdi
movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */
- vmload %rax
- vmrun %rax
- vmsave %rax
+ VMLOAD
+ VMRUN
+ VMSAVE
pop %rax /* pop guest context pointer from the stack */
diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c
index ae4d9db..5962526 100644
--- a/sys/amd64/vmm/intel/vmcs.c
+++ b/sys/amd64/vmm/intel/vmcs.c
@@ -342,18 +342,6 @@ vmcs_init(struct vmcs *vmcs)
*/
VMPTRLD(vmcs);
- /* Initialize guest IA32_PAT MSR with the default value */
- pat = PAT_VALUE(0, PAT_WRITE_BACK) |
- PAT_VALUE(1, PAT_WRITE_THROUGH) |
- PAT_VALUE(2, PAT_UNCACHED) |
- PAT_VALUE(3, PAT_UNCACHEABLE) |
- PAT_VALUE(4, PAT_WRITE_BACK) |
- PAT_VALUE(5, PAT_WRITE_THROUGH) |
- PAT_VALUE(6, PAT_UNCACHED) |
- PAT_VALUE(7, PAT_UNCACHEABLE);
- if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
- goto done;
-
/* Host state */
/* Initialize host IA32_PAT MSR */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index c3dd04e..b81e48b 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -100,13 +100,11 @@ __FBSDID("$FreeBSD$");
(VM_EXIT_HOST_LMA | \
VM_EXIT_SAVE_EFER | \
VM_EXIT_LOAD_EFER | \
- VM_EXIT_ACKNOWLEDGE_INTERRUPT | \
- VM_EXIT_SAVE_PAT | \
- VM_EXIT_LOAD_PAT)
+ VM_EXIT_ACKNOWLEDGE_INTERRUPT)
#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS
-#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
+#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER)
#define VM_ENTRY_CTLS_ZERO_SETTING \
(VM_ENTRY_LOAD_DEBUG_CONTROLS | \
@@ -859,10 +857,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
*
- * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
- * and entry respectively. It is also restored from the host VMCS
- * area on a VM exit.
- *
* The TSC MSR is exposed read-only. Writes are disallowed as that
* will impact the host TSC.
* XXX Writes would be implemented with a wrmsr trap, and
@@ -874,7 +868,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_EFER) ||
- guest_msr_rw(vmx, MSR_PAT) ||
guest_msr_ro(vmx, MSR_TSC))
panic("vmx_vminit: error setting guest msr access");
@@ -941,6 +934,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->cap[i].proc_ctls = procbased_ctls;
vmx->cap[i].proc_ctls2 = procbased_ctls2;
+ vmx->state[i].nextrip = ~0;
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
@@ -1169,12 +1163,24 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu)
}
static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
+ uint64_t guestrip)
{
int vector, need_nmi_exiting, extint_pending;
uint64_t rflags, entryinfo;
uint32_t gi, info;
+ if (vmx->state[vcpu].nextrip != guestrip) {
+ gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ if (gi & HWINTR_BLOCKING) {
+ VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
+ "cleared due to rip change: %#lx/%#lx",
+ vmx->state[vcpu].nextrip, guestrip);
+ gi &= ~HWINTR_BLOCKING;
+ vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+ }
+ }
+
if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
"intinfo is not valid: %#lx", __func__, entryinfo));
@@ -1771,7 +1777,7 @@ vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
{
struct vm_guest_paging *paging;
uint32_t csar;
-
+
paging = &vmexit->u.inst_emul.paging;
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
@@ -2060,12 +2066,11 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
- int error, handled, in;
+ int error, errcode, errcode_valid, handled, in;
struct vmxctx *vmxctx;
struct vlapic *vlapic;
struct vm_inout_str *vis;
struct vm_task_switch *ts;
- struct vm_exception vmexc;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
uint32_t intr_type, intr_vec, reason;
uint64_t exitintinfo, qual, gpa;
@@ -2250,6 +2255,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
case EXIT_REASON_MTF:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
vmexit->exitcode = VM_EXITCODE_MTRAP;
+ vmexit->inst_length = 0;
break;
case EXIT_REASON_PAUSE:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
@@ -2376,15 +2382,15 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
/* Reflect all other exceptions back into the guest */
- bzero(&vmexc, sizeof(struct vm_exception));
- vmexc.vector = intr_vec;
+ errcode_valid = errcode = 0;
if (intr_info & VMCS_INTR_DEL_ERRCODE) {
- vmexc.error_code_valid = 1;
- vmexc.error_code = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
+ errcode_valid = 1;
+ errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
}
VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
- "the guest", vmexc.vector, vmexc.error_code);
- error = vm_inject_exception(vmx->vm, vcpu, &vmexc);
+ "the guest", intr_vec, errcode);
+ error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
+ errcode_valid, errcode, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
return (1);
@@ -2399,6 +2405,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
if (vm_mem_allocated(vmx->vm, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
+ vmexit->inst_length = 0;
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
@@ -2540,7 +2547,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
}
static int
-vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
+vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
void *rendezvous_cookie, void *suspend_cookie)
{
int rc, handled, launched;
@@ -2550,7 +2557,6 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
struct vmcs *vmcs;
struct vm_exit *vmexit;
struct vlapic *vlapic;
- uint64_t rip;
uint32_t exit_reason;
vmx = arg;
@@ -2578,11 +2584,13 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
vmcs_write(VMCS_HOST_CR3, rcr3());
- vmcs_write(VMCS_GUEST_RIP, startrip);
+ vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
do {
- handled = UNHANDLED;
+ KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
+ "%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
+ handled = UNHANDLED;
/*
* Interrupts are disabled from this point on until the
* guest starts executing. This is done for the following
@@ -2602,7 +2610,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
* pmap_invalidate_ept().
*/
disable_intr();
- vmx_inject_interrupts(vmx, vcpu, vlapic);
+ vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
/*
* Check for vcpu suspension after injecting events because
@@ -2611,20 +2619,20 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
if (vcpu_suspended(suspend_cookie)) {
enable_intr();
- vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
+ vm_exit_suspended(vmx->vm, vcpu, rip);
break;
}
if (vcpu_rendezvous_pending(rendezvous_cookie)) {
enable_intr();
- vm_exit_rendezvous(vmx->vm, vcpu, vmcs_guest_rip());
+ vm_exit_rendezvous(vmx->vm, vcpu, rip);
break;
}
if (vcpu_should_yield(vm, vcpu)) {
enable_intr();
- vm_exit_astpending(vmx->vm, vcpu, vmcs_guest_rip());
- vmx_astpending_trace(vmx, vcpu, vmexit->rip);
+ vm_exit_astpending(vmx->vm, vcpu, rip);
+ vmx_astpending_trace(vmx, vcpu, rip);
handled = HANDLED;
break;
}
@@ -2638,6 +2646,9 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
+ /* Update 'nextrip' */
+ vmx->state[vcpu].nextrip = rip;
+
if (rc == VMX_GUEST_VMEXIT) {
vmx_exit_handle_nmi(vmx, vcpu, vmexit);
enable_intr();
@@ -2648,6 +2659,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
}
launched = 1;
vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
+ rip = vmexit->rip;
} while (handled);
/*
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 2124554..bc48861 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -78,6 +78,7 @@ struct vmxcap {
};
struct vmxstate {
+ uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that this 'vcpu' last ran on */
uint16_t vpid;
};
@@ -102,6 +103,7 @@ enum {
IDX_MSR_STAR,
IDX_MSR_SF_MASK,
IDX_MSR_KGSBASE,
+ IDX_MSR_PAT,
GUEST_MSR_NUM /* must be the last enumeration */
};
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index f6bbf2a..e517778 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -230,6 +230,25 @@ westmere_cpu(void)
return (false);
}
+static bool
+pat_valid(uint64_t val)
+{
+ int i, pa;
+
+ /*
+ * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
+ *
+ * Extract PA0 through PA7 and validate that each one encodes a
+ * valid memory type.
+ */
+ for (i = 0; i < 8; i++) {
+ pa = (val >> (i * 8)) & 0xff;
+ if (pa == 2 || pa == 3 || pa >= 8)
+ return (false);
+ }
+ return (true);
+}
+
void
vmx_msr_init(void)
{
@@ -302,6 +321,10 @@ vmx_msr_init(void)
void
vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
{
+ uint64_t *guest_msrs;
+
+ guest_msrs = vmx->guest_msrs[vcpuid];
+
/*
* The permissions bitmap is shared between all vcpus so initialize it
* once when initializing the vBSP.
@@ -313,6 +336,19 @@ vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
guest_msr_rw(vmx, MSR_SF_MASK);
guest_msr_rw(vmx, MSR_KGSBASE);
}
+
+ /*
+ * Initialize guest IA32_PAT MSR with default value after reset.
+ */
+ guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
+ PAT_VALUE(1, PAT_WRITE_THROUGH) |
+ PAT_VALUE(2, PAT_UNCACHED) |
+ PAT_VALUE(3, PAT_UNCACHEABLE) |
+ PAT_VALUE(4, PAT_WRITE_BACK) |
+ PAT_VALUE(5, PAT_WRITE_THROUGH) |
+ PAT_VALUE(6, PAT_UNCACHED) |
+ PAT_VALUE(7, PAT_UNCACHEABLE);
+
return;
}
@@ -353,7 +389,11 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
int
vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
{
- int error = 0;
+ const uint64_t *guest_msrs;
+ int error;
+
+ guest_msrs = vmx->guest_msrs[vcpuid];
+ error = 0;
switch (num) {
case MSR_IA32_MISC_ENABLE:
@@ -366,6 +406,9 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
case MSR_TURBO_RATIO_LIMIT1:
*val = turbo_ratio_limit;
break;
+ case MSR_PAT:
+ *val = guest_msrs[IDX_MSR_PAT];
+ break;
default:
error = EINVAL;
break;
@@ -376,10 +419,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
int
vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
{
+ uint64_t *guest_msrs;
uint64_t changed;
int error;
+ guest_msrs = vmx->guest_msrs[vcpuid];
error = 0;
+
switch (num) {
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
@@ -401,6 +447,12 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
error = EINVAL;
break;
+ case MSR_PAT:
+ if (pat_valid(val))
+ guest_msrs[IDX_MSR_PAT] = val;
+ else
+ vm_inject_gp(vmx->vm, vcpuid);
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c
index 46e5ca7..a4c96cd 100644
--- a/sys/amd64/vmm/io/vhpet.c
+++ b/sys/amd64/vmm/io/vhpet.c
@@ -104,7 +104,6 @@ vhpet_capabilities(void)
uint64_t cap = 0;
cap |= 0x8086 << 16; /* vendor id */
- cap |= HPET_CAP_LEG_RT; /* legacy routing capable */
cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */
cap |= 1; /* revision */
cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */
@@ -127,15 +126,6 @@ vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
{
const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
- /*
- * LegacyReplacement Route configuration takes precedence over MSI
- * for timers 0 and 1.
- */
- if (n == 0 || n == 1) {
- if (vhpet->config & HPET_CNF_LEG_RT)
- return (false);
- }
-
if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
return (true);
else
@@ -152,41 +142,9 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
if (vhpet_timer_msi_enabled(vhpet, n))
return (0);
- if (vhpet->config & HPET_CNF_LEG_RT) {
- /*
- * In "legacy routing" timers 0 and 1 are connected to
- * ioapic pins 2 and 8 respectively.
- */
- switch (n) {
- case 0:
- return (2);
- case 1:
- return (8);
- }
- }
-
return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
}
-static __inline int
-vhpet_timer_atpic_pin(struct vhpet *vhpet, int n)
-{
- if (vhpet->config & HPET_CNF_LEG_RT) {
- /*
- * In "legacy routing" timers 0 and 1 are connected to
- * 8259 master pin 0 and slave pin 0 respectively.
- */
- switch (n) {
- case 0:
- return (0);
- case 1:
- return (8);
- }
- }
-
- return (-1);
-}
-
static uint32_t
vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
{
@@ -216,17 +174,12 @@ vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
static void
vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
{
- int pin, legacy_pin;
+ int pin;
if (vhpet->isr & (1 << n)) {
pin = vhpet_timer_ioapic_pin(vhpet, n);
KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
vioapic_deassert_irq(vhpet->vm, pin);
-
- legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
- if (legacy_pin != -1)
- vatpic_deassert_irq(vhpet->vm, legacy_pin);
-
vhpet->isr &= ~(1 << n);
}
}
@@ -252,12 +205,6 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
"timer %d is using MSI", n));
- /* The legacy replacement interrupts are always edge triggered */
- if (vhpet->config & HPET_CNF_LEG_RT) {
- if (n == 0 || n == 1)
- return (true);
- }
-
if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
return (true);
else
@@ -267,7 +214,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
static void
vhpet_timer_interrupt(struct vhpet *vhpet, int n)
{
- int pin, legacy_pin;
+ int pin;
/* If interrupts are not enabled for this timer then just return. */
if (!vhpet_timer_interrupt_enabled(vhpet, n))
@@ -293,17 +240,11 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n)
return;
}
- legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
-
if (vhpet_timer_edge_trig(vhpet, n)) {
vioapic_pulse_irq(vhpet->vm, pin);
- if (legacy_pin != -1)
- vatpic_pulse_irq(vhpet->vm, legacy_pin);
} else {
vhpet->isr |= 1 << n;
vioapic_assert_irq(vhpet->vm, pin);
- if (legacy_pin != -1)
- vatpic_assert_irq(vhpet->vm, legacy_pin);
}
}
@@ -579,6 +520,13 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
counter = vhpet_counter(vhpet, nowptr);
oldval = vhpet->config;
update_register(&vhpet->config, data, mask);
+
+ /*
+ * LegacyReplacement Routing is not supported so clear the
+ * bit explicitly.
+ */
+ vhpet->config &= ~HPET_CNF_LEG_RT;
+
if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
if (vhpet_counter_enabled(vhpet)) {
vhpet_start_counting(vhpet);
diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c
new file mode 100644
index 0000000..d5e93dc
--- /dev/null
+++ b/sys/amd64/vmm/io/vrtc.c
@@ -0,0 +1,952 @@
+/*-
+ * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/clock.h>
+#include <sys/sysctl.h>
+
+#include <machine/vmm.h>
+
+#include <isa/rtc.h>
+
+#include "vmm_ktr.h"
+#include "vatpic.h"
+#include "vioapic.h"
+#include "vrtc.h"
+
+/* Register layout of the RTC */
+struct rtcdev {
+ uint8_t sec;
+ uint8_t alarm_sec;
+ uint8_t min;
+ uint8_t alarm_min;
+ uint8_t hour;
+ uint8_t alarm_hour;
+ uint8_t day_of_week;
+ uint8_t day_of_month;
+ uint8_t month;
+ uint8_t year;
+ uint8_t reg_a;
+ uint8_t reg_b;
+ uint8_t reg_c;
+ uint8_t reg_d;
+ uint8_t nvram[128 - 14];
+} __packed;
+CTASSERT(sizeof(struct rtcdev) == 128);
+
+struct vrtc {
+ struct vm *vm;
+ struct mtx mtx;
+ struct callout callout;
+ u_int addr; /* RTC register to read or write */
+ sbintime_t base_uptime;
+ time_t base_rtctime;
+ struct rtcdev rtcdev;
+};
+
+#define VRTC_LOCK(vrtc) mtx_lock(&((vrtc)->mtx))
+#define VRTC_UNLOCK(vrtc) mtx_unlock(&((vrtc)->mtx))
+#define VRTC_LOCKED(vrtc) mtx_owned(&((vrtc)->mtx))
+
+/*
+ * RTC time is considered "broken" if:
+ * - RTC updates are halted by the guest
+ * - RTC date/time fields have invalid values
+ */
+#define VRTC_BROKEN_TIME ((time_t)-1)
+
+#define RTC_IRQ 8
+#define RTCSB_BIN 0x04
+#define RTCSB_ALL_INTRS (RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
+#define rtc_halted(vrtc) ((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
+#define aintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
+#define pintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
+#define uintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0)
+
+static void vrtc_callout_handler(void *arg);
+static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval);
+
+static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc");
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL);
+
+static int rtc_flag_broken_time = 1;
+SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN,
+ &rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected");
+
+static __inline bool
+divider_enabled(int reg_a)
+{
+ /*
+ * The RTC is counting only when dividers are not held in reset.
+ */
+ return ((reg_a & 0x70) == 0x20);
+}
+
+static __inline bool
+update_enabled(struct vrtc *vrtc)
+{
+ /*
+ * RTC date/time can be updated only if:
+ * - divider is not held in reset
+ * - guest has not disabled updates
+ * - the date/time fields have valid contents
+ */
+ if (!divider_enabled(vrtc->rtcdev.reg_a))
+ return (false);
+
+ if (rtc_halted(vrtc))
+ return (false);
+
+ if (vrtc->base_rtctime == VRTC_BROKEN_TIME)
+ return (false);
+
+ return (true);
+}
+
+static time_t
+vrtc_curtime(struct vrtc *vrtc)
+{
+ sbintime_t now, delta;
+ time_t t;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ t = vrtc->base_rtctime;
+ if (update_enabled(vrtc)) {
+ now = sbinuptime();
+ delta = now - vrtc->base_uptime;
+ KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
+ "%#lx to %#lx", vrtc->base_uptime, now));
+ t += delta / SBT_1S;
+ }
+ return (t);
+}
+
+static __inline uint8_t
+rtcset(struct rtcdev *rtc, int val)
+{
+
+ KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d",
+ __func__, val));
+
+ return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]);
+}
+
+static void
+secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
+{
+ struct clocktime ct;
+ struct timespec ts;
+ struct rtcdev *rtc;
+ int hour;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ if (rtctime < 0) {
+ KASSERT(rtctime == VRTC_BROKEN_TIME,
+ ("%s: invalid vrtc time %#lx", __func__, rtctime));
+ return;
+ }
+
+ /*
+ * If the RTC is halted then the guest has "ownership" of the
+ * date/time fields. Don't update the RTC date/time fields in
+ * this case (unless forced).
+ */
+ if (rtc_halted(vrtc) && !force_update)
+ return;
+
+ ts.tv_sec = rtctime;
+ ts.tv_nsec = 0;
+ clock_ts_to_ct(&ts, &ct);
+
+ KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d",
+ ct.sec));
+ KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d",
+ ct.min));
+ KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d",
+ ct.hour));
+ KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d",
+ ct.dow));
+ KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d",
+ ct.day));
+ KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d",
+ ct.mon));
+ KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d",
+ ct.year));
+
+ rtc = &vrtc->rtcdev;
+ rtc->sec = rtcset(rtc, ct.sec);
+ rtc->min = rtcset(rtc, ct.min);
+
+ hour = ct.hour;
+ if ((rtc->reg_b & RTCSB_24HR) == 0)
+ hour = (hour % 12) + 1; /* convert to a 12-hour format */
+
+ rtc->hour = rtcset(rtc, hour);
+
+ if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12)
+ rtc->hour |= 0x80; /* set MSB to indicate PM */
+
+ rtc->day_of_week = rtcset(rtc, ct.dow + 1);
+ rtc->day_of_month = rtcset(rtc, ct.day);
+ rtc->month = rtcset(rtc, ct.mon);
+ rtc->year = rtcset(rtc, ct.year % 100);
+}
+
+static int
+rtcget(struct rtcdev *rtc, int val, int *retval)
+{
+ uint8_t upper, lower;
+
+ if (rtc->reg_b & RTCSB_BIN) {
+ *retval = val;
+ return (0);
+ }
+
+ lower = val & 0xf;
+ upper = (val >> 4) & 0xf;
+
+ if (lower > 9 || upper > 9)
+ return (-1);
+
+ *retval = upper * 10 + lower;
+ return (0);
+}
+
+static time_t
+rtc_to_secs(struct vrtc *vrtc)
+{
+ struct clocktime ct;
+ struct timespec ts;
+ struct rtcdev *rtc;
+ struct vm *vm;
+ int error, hour, pm, year;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ vm = vrtc->vm;
+ rtc = &vrtc->rtcdev;
+
+ bzero(&ct, sizeof(struct clocktime));
+
+ error = rtcget(rtc, rtc->sec, &ct.sec);
+ if (error || ct.sec < 0 || ct.sec > 59) {
+ VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->min, &ct.min);
+ if (error || ct.min < 0 || ct.min > 59) {
+ VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
+ goto fail;
+ }
+
+ pm = 0;
+ hour = rtc->hour;
+ if ((rtc->reg_b & RTCSB_24HR) == 0) {
+ if (hour & 0x80) {
+ hour &= ~0x80;
+ pm = 1;
+ }
+ }
+ error = rtcget(rtc, hour, &ct.hour);
+ if ((rtc->reg_b & RTCSB_24HR) == 0) {
+ ct.hour -= 1;
+ if (pm)
+ ct.hour += 12;
+ }
+
+ if (error || ct.hour < 0 || ct.hour > 23) {
+ VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
+ goto fail;
+ }
+
+ /*
+ * Ignore 'rtc->dow' because some guests like Linux don't bother
+ * setting it at all while others like OpenBSD/i386 set it incorrectly.
+ *
+ * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
+ */
+ ct.dow = -1;
+
+ error = rtcget(rtc, rtc->day_of_month, &ct.day);
+ if (error || ct.day < 1 || ct.day > 31) {
+ VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
+ ct.day);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->month, &ct.mon);
+ if (error || ct.mon < 1 || ct.mon > 12) {
+ VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->year, &year);
+ if (error || year < 0 || year > 99) {
+ VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
+ goto fail;
+ }
+ if (year >= 70)
+ ct.year = 1900 + year;
+ else
+ ct.year = 2000 + year;
+
+ error = clock_ct_to_ts(&ct, &ts);
+ if (error || ts.tv_sec < 0) {
+ VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
+ ct.year, ct.mon, ct.day);
+ VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
+ ct.hour, ct.min, ct.sec);
+ goto fail;
+ }
+ return (ts.tv_sec); /* success */
+fail:
+ return (VRTC_BROKEN_TIME); /* failure */
+}
+
+static int
+vrtc_time_update(struct vrtc *vrtc, time_t newtime)
+{
+ struct rtcdev *rtc;
+ time_t oldtime;
+ uint8_t alarm_sec, alarm_min, alarm_hour;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ alarm_sec = rtc->alarm_sec;
+ alarm_min = rtc->alarm_min;
+ alarm_hour = rtc->alarm_hour;
+
+ oldtime = vrtc->base_rtctime;
+ VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
+ oldtime, newtime);
+
+ if (newtime == oldtime)
+ return (0);
+
+ /*
+ * If 'newtime' indicates that RTC updates are disabled then just
+ * record that and return. There is no need to do alarm interrupt
+ * processing or update 'base_uptime' in this case.
+ */
+ if (newtime == VRTC_BROKEN_TIME) {
+ vrtc->base_rtctime = VRTC_BROKEN_TIME;
+ return (0);
+ }
+
+ /*
+ * Return an error if RTC updates are halted by the guest.
+ */
+ if (rtc_halted(vrtc)) {
+ VM_CTR0(vrtc->vm, "RTC update halted by guest");
+ return (EBUSY);
+ }
+
+ do {
+ /*
+ * If the alarm interrupt is enabled and 'oldtime' is valid
+ * then visit all the seconds between 'oldtime' and 'newtime'
+ * to check for the alarm condition.
+ *
+ * Otherwise move the RTC time forward directly to 'newtime'.
+ */
+ if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME)
+ vrtc->base_rtctime++;
+ else
+ vrtc->base_rtctime = newtime;
+
+ if (aintr_enabled(vrtc)) {
+ /*
+ * Update the RTC date/time fields before checking
+ * if the alarm conditions are satisfied.
+ */
+ secs_to_rtc(vrtc->base_rtctime, vrtc, 0);
+
+ if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) &&
+ (alarm_min >= 0xC0 || alarm_min == rtc->min) &&
+ (alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) {
+ vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM);
+ }
+ }
+ } while (vrtc->base_rtctime != newtime);
+
+ if (uintr_enabled(vrtc))
+ vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
+
+ vrtc->base_uptime = sbinuptime();
+
+ return (0);
+}
+
+static sbintime_t
+vrtc_freq(struct vrtc *vrtc)
+{
+ int ratesel;
+
+ static sbintime_t pf[16] = {
+ 0,
+ SBT_1S / 256,
+ SBT_1S / 128,
+ SBT_1S / 8192,
+ SBT_1S / 4096,
+ SBT_1S / 2048,
+ SBT_1S / 1024,
+ SBT_1S / 512,
+ SBT_1S / 256,
+ SBT_1S / 128,
+ SBT_1S / 64,
+ SBT_1S / 32,
+ SBT_1S / 16,
+ SBT_1S / 8,
+ SBT_1S / 4,
+ SBT_1S / 2,
+ };
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ /*
+ * If both periodic and alarm interrupts are enabled then use the
+ * periodic frequency to drive the callout. The minimum periodic
+ * frequency (2 Hz) is higher than the alarm frequency (1 Hz) so
+ * piggyback the alarm on top of it. The same argument applies to
+ * the update interrupt.
+ */
+ if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
+ ratesel = vrtc->rtcdev.reg_a & 0xf;
+ return (pf[ratesel]);
+ } else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
+ return (SBT_1S);
+ } else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
+ return (SBT_1S);
+ } else {
+ return (0);
+ }
+}
+
+static void
+vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
+{
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ if (freqsbt == 0) {
+ if (callout_active(&vrtc->callout)) {
+ VM_CTR0(vrtc->vm, "RTC callout stopped");
+ callout_stop(&vrtc->callout);
+ }
+ return;
+ }
+ VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
+ callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
+ vrtc, 0);
+}
+
+static void
+vrtc_callout_handler(void *arg)
+{
+ struct vrtc *vrtc = arg;
+ sbintime_t freqsbt;
+ time_t rtctime;
+ int error;
+
+ VM_CTR0(vrtc->vm, "vrtc callout fired");
+
+ VRTC_LOCK(vrtc);
+ if (callout_pending(&vrtc->callout)) /* callout was reset */
+ goto done;
+
+ if (!callout_active(&vrtc->callout)) /* callout was stopped */
+ goto done;
+
+ callout_deactivate(&vrtc->callout);
+
+ KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0,
+ ("gratuitous vrtc callout"));
+
+ if (pintr_enabled(vrtc))
+ vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
+
+ if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
+ rtctime = vrtc_curtime(vrtc);
+ error = vrtc_time_update(vrtc, rtctime);
+ KASSERT(error == 0, ("%s: vrtc_time_update error %d",
+ __func__, error));
+ }
+
+ freqsbt = vrtc_freq(vrtc);
+ KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
+ vrtc_callout_reset(vrtc, freqsbt);
+done:
+ VRTC_UNLOCK(vrtc);
+}
+
+static __inline void
+vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
+{
+ int active;
+
+ active = callout_active(&vrtc->callout) ? 1 : 0;
+ KASSERT((freq == 0 && !active) || (freq != 0 && active),
+ ("vrtc callout %s with frequency %#lx",
+ active ? "active" : "inactive", freq));
+}
+
+static void
+vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
+{
+ struct rtcdev *rtc;
+ int oldirqf, newirqf;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE;
+
+ oldirqf = rtc->reg_c & RTCIR_INT;
+ if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) ||
+ (pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) ||
+ (uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) {
+ newirqf = RTCIR_INT;
+ } else {
+ newirqf = 0;
+ }
+
+ oldval = rtc->reg_c;
+ rtc->reg_c = newirqf | newval;
+ changed = oldval ^ rtc->reg_c;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
+ oldval, rtc->reg_c);
+ }
+
+ if (!oldirqf && newirqf) {
+ VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ);
+ vatpic_pulse_irq(vrtc->vm, RTC_IRQ);
+ vioapic_pulse_irq(vrtc->vm, RTC_IRQ);
+ } else if (oldirqf && !newirqf) {
+ VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ);
+ }
+}
+
+static int
+vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
+{
+ struct rtcdev *rtc;
+ sbintime_t oldfreq, newfreq;
+ time_t curtime, rtctime;
+ int error;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ oldval = rtc->reg_b;
+ oldfreq = vrtc_freq(vrtc);
+
+ rtc->reg_b = newval;
+ changed = oldval ^ newval;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
+ oldval, newval);
+ }
+
+ if (changed & RTCSB_HALT) {
+ if ((newval & RTCSB_HALT) == 0) {
+ rtctime = rtc_to_secs(vrtc);
+ if (rtctime == VRTC_BROKEN_TIME) {
+ /*
+ * Stop updating the RTC if the date/time
+ * programmed by the guest is not correct.
+ */
+ VM_CTR0(vrtc->vm, "Invalid RTC date/time "
+ "programming detected");
+
+ if (rtc_flag_broken_time)
+ return (-1);
+ }
+ } else {
+ curtime = vrtc_curtime(vrtc);
+ KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
+ "between vrtc basetime (%#lx) and curtime (%#lx)",
+ __func__, vrtc->base_rtctime, curtime));
+
+ /*
+ * Force a refresh of the RTC date/time fields so
+ * they reflect the time right before the guest set
+ * the HALT bit.
+ */
+ secs_to_rtc(curtime, vrtc, 1);
+
+ /*
+ * Updates are halted so mark 'base_rtctime' to denote
+ * that the RTC date/time is in flux.
+ */
+ rtctime = VRTC_BROKEN_TIME;
+ rtc->reg_b &= ~RTCSB_UINTR;
+ }
+ error = vrtc_time_update(vrtc, rtctime);
+ KASSERT(error == 0, ("vrtc_time_update error %d", error));
+ }
+
+ /*
+ * Side effect of changes to the interrupt enable bits.
+ */
+ if (changed & RTCSB_ALL_INTRS)
+ vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c);
+
+ /*
+ * Change the callout frequency if it has changed.
+ */
+ newfreq = vrtc_freq(vrtc);
+ if (newfreq != oldfreq)
+ vrtc_callout_reset(vrtc, newfreq);
+ else
+ vrtc_callout_check(vrtc, newfreq);
+
+ /*
+ * The side effect of bits that control the RTC date/time format
+ * is handled lazily when those fields are actually read.
+ */
+ return (0);
+}
+
+static void
+vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
+{
+ sbintime_t oldfreq, newfreq;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ newval &= ~RTCSA_TUP;
+ oldval = vrtc->rtcdev.reg_a;
+ oldfreq = vrtc_freq(vrtc);
+
+ if (divider_enabled(oldval) && !divider_enabled(newval)) {
+ VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
+ vrtc->base_rtctime, vrtc->base_uptime);
+ } else if (!divider_enabled(oldval) && divider_enabled(newval)) {
+ /*
+ * If the dividers are coming out of reset then update
+ * 'base_uptime' before this happens. This is done to
+ * maintain the illusion that the RTC date/time was frozen
+ * while the dividers were disabled.
+ */
+ vrtc->base_uptime = sbinuptime();
+ VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
+ vrtc->base_rtctime, vrtc->base_uptime);
+ } else {
+ /* NOTHING */
+ }
+
+ vrtc->rtcdev.reg_a = newval;
+ changed = oldval ^ newval;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
+ oldval, newval);
+ }
+
+ /*
+ * Side effect of changes to rate select and divider enable bits.
+ */
+ newfreq = vrtc_freq(vrtc);
+ if (newfreq != oldfreq)
+ vrtc_callout_reset(vrtc, newfreq);
+ else
+ vrtc_callout_check(vrtc, newfreq);
+}
+
+int
+vrtc_set_time(struct vm *vm, time_t secs)
+{
+ struct vrtc *vrtc;
+ int error;
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+ error = vrtc_time_update(vrtc, secs);
+ VRTC_UNLOCK(vrtc);
+
+ if (error) {
+ VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
+ secs);
+ } else {
+ VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
+ }
+
+ return (error);
+}
+
+time_t
+vrtc_get_time(struct vm *vm)
+{
+ struct vrtc *vrtc;
+ time_t t;
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+ t = vrtc_curtime(vrtc);
+ VRTC_UNLOCK(vrtc);
+
+ return (t);
+}
+
+int
+vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
+{
+ struct vrtc *vrtc;
+ uint8_t *ptr;
+
+ vrtc = vm_rtc(vm);
+
+ /*
+ * Don't allow writes to RTC control registers or the date/time fields.
+ */
+ if (offset < offsetof(struct rtcdev, nvram[0]) ||
+ offset >= sizeof(struct rtcdev)) {
+ VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
+ offset);
+ return (EINVAL);
+ }
+
+ VRTC_LOCK(vrtc);
+ ptr = (uint8_t *)(&vrtc->rtcdev);
+ ptr[offset] = value;
+ VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
+ VRTC_UNLOCK(vrtc);
+
+ return (0);
+}
+
+int
+vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
+{
+ struct vrtc *vrtc;
+ time_t curtime;
+ uint8_t *ptr;
+
+ /*
+ * Allow all offsets in the RTC to be read.
+ */
+ if (offset < 0 || offset >= sizeof(struct rtcdev))
+ return (EINVAL);
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+
+ /*
+ * Update RTC date/time fields if necessary.
+ */
+ if (offset < 10) {
+ curtime = vrtc_curtime(vrtc);
+ secs_to_rtc(curtime, vrtc, 0);
+ }
+
+ ptr = (uint8_t *)(&vrtc->rtcdev);
+ *retval = ptr[offset];
+
+ VRTC_UNLOCK(vrtc);
+ return (0);
+}
+
+int
+vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val)
+{
+ struct vrtc *vrtc;
+
+ vrtc = vm_rtc(vm);
+
+ if (bytes != 1)
+ return (-1);
+
+ if (in) {
+ *val = 0xff;
+ return (0);
+ }
+
+ VRTC_LOCK(vrtc);
+ vrtc->addr = *val & 0x7f;
+ VRTC_UNLOCK(vrtc);
+
+ return (0);
+}
+
+int
+vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val)
+{
+ struct vrtc *vrtc;
+ struct rtcdev *rtc;
+ time_t curtime;
+ int error, offset;
+
+ vrtc = vm_rtc(vm);
+ rtc = &vrtc->rtcdev;
+
+ if (bytes != 1)
+ return (-1);
+
+ VRTC_LOCK(vrtc);
+ offset = vrtc->addr;
+ if (offset >= sizeof(struct rtcdev)) {
+ VRTC_UNLOCK(vrtc);
+ return (-1);
+ }
+
+ error = 0;
+ curtime = vrtc_curtime(vrtc);
+ vrtc_time_update(vrtc, curtime);
+
+ if (in) {
+ /*
+ * Update RTC date/time fields if necessary.
+ */
+ if (offset < 10)
+ secs_to_rtc(curtime, vrtc, 0);
+
+ if (offset == 12) {
+ /*
+ * XXX
+ * reg_c interrupt flags are updated only if the
+ * corresponding interrupt enable bit in reg_b is set.
+ */
+ *val = vrtc->rtcdev.reg_c;
+ vrtc_set_reg_c(vrtc, 0);
+ } else {
+ *val = *((uint8_t *)rtc + offset);
+ }
+ VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
+ *val, offset);
+ } else {
+ switch (offset) {
+ case 10:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
+ vrtc_set_reg_a(vrtc, *val);
+ break;
+ case 11:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
+ error = vrtc_set_reg_b(vrtc, *val);
+ break;
+ case 12:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
+ *val);
+ break;
+ case 13:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
+ *val);
+ break;
+ case 0:
+ /*
+ * High order bit of 'seconds' is readonly.
+ */
+ *val &= 0x7f;
+ /* FALLTHRU */
+ default:
+ VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
+ offset, *val);
+ *((uint8_t *)rtc + offset) = *val;
+ break;
+ }
+ }
+ VRTC_UNLOCK(vrtc);
+ return (error);
+}
+
+void
+vrtc_reset(struct vrtc *vrtc)
+{
+ struct rtcdev *rtc;
+
+ VRTC_LOCK(vrtc);
+
+ rtc = &vrtc->rtcdev;
+ vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE));
+ vrtc_set_reg_c(vrtc, 0);
+ KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active"));
+
+ VRTC_UNLOCK(vrtc);
+}
+
+struct vrtc *
+vrtc_init(struct vm *vm)
+{
+ struct vrtc *vrtc;
+ struct rtcdev *rtc;
+ time_t curtime;
+
+ vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO);
+ vrtc->vm = vm;
+ mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF);
+ callout_init(&vrtc->callout, 1);
+
+ /* Allow dividers to keep time but disable everything else */
+ rtc = &vrtc->rtcdev;
+ rtc->reg_a = 0x20;
+ rtc->reg_b = RTCSB_24HR;
+ rtc->reg_c = 0;
+ rtc->reg_d = RTCSD_PWR;
+
+ /* Reset the index register to a safe value. */
+ vrtc->addr = RTC_STATUSD;
+
+ /*
+ * Initialize RTC time to 00:00:00 Jan 1, 1970.
+ */
+ curtime = 0;
+
+ VRTC_LOCK(vrtc);
+ vrtc->base_rtctime = VRTC_BROKEN_TIME;
+ vrtc_time_update(vrtc, curtime);
+ secs_to_rtc(curtime, vrtc, 0);
+ VRTC_UNLOCK(vrtc);
+
+ return (vrtc);
+}
+
+void
+vrtc_cleanup(struct vrtc *vrtc)
+{
+
+ callout_drain(&vrtc->callout);
+ free(vrtc, M_VRTC);
+}
diff --git a/sys/amd64/vmm/io/vrtc.h b/sys/amd64/vmm/io/vrtc.h
new file mode 100644
index 0000000..6fbbc9c
--- /dev/null
+++ b/sys/amd64/vmm/io/vrtc.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2014 Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VRTC_H_
+#define _VRTC_H_
+
+#include <isa/isareg.h>
+
+struct vrtc;
+
+struct vrtc *vrtc_init(struct vm *vm);
+void vrtc_cleanup(struct vrtc *vrtc);
+void vrtc_reset(struct vrtc *vrtc);
+
+time_t vrtc_get_time(struct vm *vm);
+int vrtc_set_time(struct vm *vm, time_t secs);
+int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
+int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
+
+int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val);
+int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val);
+
+#endif
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 4739a86..7f90c61 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include "vioapic.h"
#include "vlapic.h"
#include "vpmtmr.h"
+#include "vrtc.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
@@ -100,12 +101,15 @@ struct vcpu {
uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
int extint_pending; /* (i) INTR pending */
- struct vm_exception exception; /* (x) exception collateral */
int exception_pending; /* (i) exception pending */
+ int exc_vector; /* (x) exception collateral */
+ int exc_errcode_valid;
+ uint32_t exc_errcode;
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
+ uint64_t nextrip; /* (x) next instruction to execute */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -136,6 +140,7 @@ struct vm {
struct vatpic *vatpic; /* (i) virtual atpic */
struct vatpit *vatpit; /* (i) virtual atpit */
struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
+ struct vrtc *vrtc; /* (o) virtual RTC */
volatile cpuset_t active_cpus; /* (i) active vcpus */
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
@@ -376,6 +381,8 @@ vm_init(struct vm *vm, bool create)
vm->vatpic = vatpic_init(vm);
vm->vatpit = vatpit_init(vm);
vm->vpmtmr = vpmtmr_init(vm);
+ if (create)
+ vm->vrtc = vrtc_init(vm);
CPU_ZERO(&vm->active_cpus);
@@ -438,6 +445,10 @@ vm_cleanup(struct vm *vm, bool destroy)
if (vm->iommu != NULL)
iommu_destroy_domain(vm->iommu);
+ if (destroy)
+ vrtc_cleanup(vm->vrtc);
+ else
+ vrtc_reset(vm->vrtc);
vpmtmr_cleanup(vm->vpmtmr);
vatpit_cleanup(vm->vatpit);
vhpet_cleanup(vm->vhpet);
@@ -841,16 +852,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
}
int
-vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
{
+ struct vcpu *vcpu;
+ int error;
- if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
- return (VMSETREG(vm->cookie, vcpu, reg, val));
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error || reg != VM_REG_GUEST_RIP)
+ return (error);
+
+ /* Set 'nextrip' to match the value of %rip */
+ VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->nextrip = val;
+ return (0);
}
static boolean_t
@@ -1102,7 +1123,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vcpu *vcpu;
const char *wmesg;
- int error, t, vcpu_halted, vm_halted;
+ int t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@@ -1110,22 +1131,6 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
vcpu_halted = 0;
vm_halted = 0;
- /*
- * The typical way to halt a cpu is to execute: "sti; hlt"
- *
- * STI sets RFLAGS.IF to enable interrupts. However, the processor
- * remains in an "interrupt shadow" for an additional instruction
- * following the STI. This guarantees that "sti; hlt" sequence is
- * atomic and a pending interrupt will be recognized after the HLT.
- *
- * After the HLT emulation is done the vcpu is no longer in an
- * interrupt shadow and a pending interrupt can be injected on
- * the next entry into the guest.
- */
- error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
- KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
- __func__, error));
-
vcpu_lock(vcpu);
while (1) {
/*
@@ -1206,6 +1211,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
+ __func__, vme->inst_length));
+
ftype = vme->u.paging.fault_type;
KASSERT(ftype == VM_PROT_READ ||
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
@@ -1231,9 +1239,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
if (rv != KERN_SUCCESS)
return (EFAULT);
done:
- /* restart execution at the faulting instruction */
- vme->inst_length = 0;
-
return (0);
}
@@ -1288,10 +1293,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (EFAULT);
/*
- * If the instruction length is not specified the update it now.
+ * If the instruction length was not specified then update it now
+ * along with 'nextrip'.
*/
- if (vme->inst_length == 0)
+ if (vme->inst_length == 0) {
vme->inst_length = vie->num_processed;
+ vcpu->nextrip += vie->num_processed;
+ }
/* return to userland unless this is an in-kernel emulated device */
if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
@@ -1440,7 +1448,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
- uint64_t tscval, rip;
+ uint64_t tscval;
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
@@ -1462,7 +1470,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- rip = vmrun->rip;
restart:
critical_enter();
@@ -1477,7 +1484,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@@ -1488,6 +1495,7 @@ restart:
if (error == 0) {
retu = false;
+ vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid, &retu);
@@ -1524,10 +1532,8 @@ restart:
}
}
- if (error == 0 && retu == false) {
- rip = vme->rip + vme->inst_length;
+ if (error == 0 && retu == false)
goto restart;
- }
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
@@ -1535,6 +1541,49 @@ restart:
}
int
+vm_restart_instruction(void *arg, int vcpuid)
+{
+ struct vm *vm;
+ struct vcpu *vcpu;
+ enum vcpu_state state;
+ uint64_t rip;
+ int error;
+
+ vm = arg;
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ state = vcpu_get_state(vm, vcpuid, NULL);
+ if (state == VCPU_RUNNING) {
+ /*
+ * When a vcpu is "running" the next instruction is determined
+ * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
+ * Thus setting 'inst_length' to zero will cause the current
+ * instruction to be restarted.
+ */
+ vcpu->exitinfo.inst_length = 0;
+ VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
+ "setting inst_length to zero", vcpu->exitinfo.rip);
+ } else if (state == VCPU_FROZEN) {
+ /*
+ * When a vcpu is "frozen" it is outside the critical section
+ * around VMRUN() and 'nextrip' points to the next instruction.
+ * Thus instruction restart is achieved by setting 'nextrip'
+ * to the vcpu's %rip.
+ */
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
+ KASSERT(!error, ("%s: error %d getting rip", __func__, error));
+ VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
+ "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
+ vcpu->nextrip = rip;
+ } else {
+ panic("%s: invalid state %d", __func__, state);
+ }
+ return (0);
+}
+
+int
vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
{
struct vcpu *vcpu;
@@ -1664,11 +1713,11 @@ vcpu_exception_intinfo(struct vcpu *vcpu)
uint64_t info = 0;
if (vcpu->exception_pending) {
- info = vcpu->exception.vector & 0xff;
+ info = vcpu->exc_vector & 0xff;
info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
- if (vcpu->exception.error_code_valid) {
+ if (vcpu->exc_errcode_valid) {
info |= VM_INTINFO_DEL_ERRCODE;
- info |= (uint64_t)vcpu->exception.error_code << 32;
+ info |= (uint64_t)vcpu->exc_errcode << 32;
}
}
return (info);
@@ -1693,7 +1742,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
info2 = vcpu_exception_intinfo(vcpu);
vcpu->exception_pending = 0;
VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
- vcpu->exception.vector, info2);
+ vcpu->exc_vector, info2);
}
if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
@@ -1731,14 +1780,16 @@ vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
}
int
-vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
+vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
+ uint32_t errcode, int restart_instruction)
{
struct vcpu *vcpu;
+ int error;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
- if (exception->vector < 0 || exception->vector >= 32)
+ if (vector < 0 || vector >= 32)
return (EINVAL);
/*
@@ -1746,21 +1797,35 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
* the guest. It is a derived exception that results from specific
* combinations of nested faults.
*/
- if (exception->vector == IDT_DF)
+ if (vector == IDT_DF)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
if (vcpu->exception_pending) {
VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
- "pending exception %d", exception->vector,
- vcpu->exception.vector);
+ "pending exception %d", vector, vcpu->exc_vector);
return (EBUSY);
}
+ /*
+ * From section 26.6.1 "Interruptibility State" in Intel SDM:
+ *
+ * Event blocking by "STI" or "MOV SS" is cleared after guest executes
+ * one instruction or incurs an exception.
+ */
+ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+ KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+ __func__, error));
+
+ if (restart_instruction)
+ vm_restart_instruction(vm, vcpuid);
+
vcpu->exception_pending = 1;
- vcpu->exception = *exception;
- VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
+ vcpu->exc_vector = vector;
+ vcpu->exc_errcode = errcode;
+ vcpu->exc_errcode_valid = errcode_valid;
+ VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector);
return (0);
}
@@ -1768,28 +1833,15 @@ void
vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
int errcode)
{
- struct vm_exception exception;
- struct vm_exit *vmexit;
struct vm *vm;
- int error;
+ int error, restart_instruction;
vm = vmarg;
+ restart_instruction = 1;
- exception.vector = vector;
- exception.error_code = errcode;
- exception.error_code_valid = errcode_valid;
- error = vm_inject_exception(vm, vcpuid, &exception);
+ error = vm_inject_exception(vm, vcpuid, vector, errcode_valid,
+ errcode, restart_instruction);
KASSERT(error == 0, ("vm_inject_exception error %d", error));
-
- /*
- * A fault-like exception allows the instruction to be restarted
- * after the exception handler returns.
- *
- * By setting the inst_length to 0 we ensure that the instruction
- * pointer remains at the faulting instruction.
- */
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->inst_length = 0;
}
void
@@ -2223,6 +2275,13 @@ vm_pmtmr(struct vm *vm)
return (vm->vpmtmr);
}
+struct vrtc *
+vm_rtc(struct vm *vm)
+{
+
+ return (vm->vrtc);
+}
+
enum vm_reg_name
vm_segment_name(int seg)
{
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index a85109e..0293d191 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include "io/vatpic.h"
#include "io/vioapic.h"
#include "io/vhpet.h"
+#include "io/vrtc.h"
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
@@ -174,6 +175,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_activate_cpu *vac;
struct vm_cpuset *vm_cpuset;
struct vm_intinfo *vmii;
+ struct vm_rtc_time *rtctime;
+ struct vm_rtc_data *rtcdata;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@@ -202,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_ACTIVATE_CPU:
case VM_SET_INTINFO:
case VM_GET_INTINFO:
+ case VM_RESTART_INSTRUCTION:
/*
* XXX fragile, handle with care
* Assumes that the first field of the ioctl data is the vcpu.
@@ -307,7 +311,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
case VM_INJECT_EXCEPTION:
vmexc = (struct vm_exception *)data;
- error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
+ error = vm_inject_exception(sc->vm, vmexc->cpuid,
+ vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
+ vmexc->restart_instruction);
break;
case VM_INJECT_NMI:
vmnmi = (struct vm_nmi *)data;
@@ -482,6 +488,28 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
&vmii->info2);
break;
+ case VM_RTC_WRITE:
+ rtcdata = (struct vm_rtc_data *)data;
+ error = vrtc_nvram_write(sc->vm, rtcdata->offset,
+ rtcdata->value);
+ break;
+ case VM_RTC_READ:
+ rtcdata = (struct vm_rtc_data *)data;
+ error = vrtc_nvram_read(sc->vm, rtcdata->offset,
+ &rtcdata->value);
+ break;
+ case VM_RTC_SETTIME:
+ rtctime = (struct vm_rtc_time *)data;
+ error = vrtc_set_time(sc->vm, rtctime->secs);
+ break;
+ case VM_RTC_GETTIME:
+ error = 0;
+ rtctime = (struct vm_rtc_time *)data;
+ rtctime->secs = vrtc_get_time(sc->vm);
+ break;
+ case VM_RESTART_INSTRUCTION:
+ error = vm_restart_instruction(sc->vm, vcpu);
+ break;
default:
error = ENOTTY;
break;
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index d1d7173..3db890e 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -70,6 +70,7 @@ enum {
VIE_OP_TYPE_PUSH,
VIE_OP_TYPE_CMP,
VIE_OP_TYPE_POP,
+ VIE_OP_TYPE_MOVS,
VIE_OP_TYPE_LAST
};
@@ -78,6 +79,7 @@ enum {
#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */
#define VIE_OP_F_NO_MODRM (1 << 3)
+#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
static const struct vie_op two_byte_opcodes[256] = {
[0xB6] = {
@@ -133,6 +135,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOV,
.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
},
+ [0xA4] = {
+ .op_byte = 0xA4,
+ .op_type = VIE_OP_TYPE_MOVS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
+ [0xA5] = {
+ .op_byte = 0xA5,
+ .op_type = VIE_OP_TYPE_MOVS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@@ -559,6 +571,217 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+/*
+ * Helper function to calculate and validate a linear address.
+ *
+ * Returns 0 on success and 1 if an exception was injected into the guest.
+ */
+static int
+get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
+ int opsize, int addrsize, int prot, enum vm_reg_name seg,
+ enum vm_reg_name gpr, uint64_t *gla)
+{
+ struct seg_desc desc;
+ uint64_t cr0, val, rflags;
+ int error;
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
+ KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
+ __func__, error, seg));
+
+ error = vie_read_register(vm, vcpuid, gpr, &val);
+ KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
+ error, gpr));
+
+ if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
+ addrsize, prot, gla)) {
+ if (seg == VM_REG_GUEST_SS)
+ vm_inject_ss(vm, vcpuid, 0);
+ else
+ vm_inject_gp(vm, vcpuid);
+ return (1);
+ }
+
+ if (vie_canonical_check(paging->cpu_mode, *gla)) {
+ if (seg == VM_REG_GUEST_SS)
+ vm_inject_ss(vm, vcpuid, 0);
+ else
+ vm_inject_gp(vm, vcpuid);
+ return (1);
+ }
+
+ if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
+ vm_inject_ac(vm, vcpuid, 0);
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+#ifdef _KERNEL
+ struct vm_copyinfo copyinfo[2];
+#else
+ struct iovec copyinfo[2];
+#endif
+ uint64_t dstaddr, srcaddr, val;
+ uint64_t rcx, rdi, rsi, rflags;
+ int error, opsize, seg, repeat;
+
+ opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
+ val = 0;
+ error = 0;
+
+ /*
+ * XXX although the MOVS instruction is only supposed to be used with
+ * the "rep" prefix some guests like FreeBSD will use "repnz" instead.
+ *
+ * Empirically the "repnz" prefix has identical behavior to "rep"
+ * and the zero flag does not make a difference.
+ */
+ repeat = vie->repz_present | vie->repnz_present;
+
+ if (repeat) {
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+ /*
+ * The count register is %rcx, %ecx or %cx depending on the
+ * address size of the instruction.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+ return (0);
+ }
+
+ /*
+ * Source Destination Comments
+ * --------------------------------------------
+ * (1) memory memory n/a
+ * (2) memory mmio emulated
+ * (3) mmio memory emulated
+ * (4) mmio mmio not emulated
+ *
+ * At this point we don't have sufficient information to distinguish
+ * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
+ * out because it will succeed only when operating on regular memory.
+ *
+ * XXX the emulation doesn't properly handle the case where 'gpa'
+ * is straddling the boundary between the normal memory and MMIO.
+ */
+
+ seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
+ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+ PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr);
+ if (error)
+ goto done;
+
+ error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
+ copyinfo, nitems(copyinfo));
+ if (error == 0) {
+ /*
+ * case (2): read from system memory and write to mmio.
+ */
+ vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+ error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ goto done;
+ } else if (error > 0) {
+ /*
+ * Resume guest execution to handle fault.
+ */
+ goto done;
+ } else {
+ /*
+ * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
+ * if 'srcaddr' is in the mmio space.
+ */
+ }
+
+ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+ PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
+ if (error)
+ goto done;
+
+ error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
+ PROT_WRITE, copyinfo, nitems(copyinfo));
+ if (error == 0) {
+ /*
+ * case (3): read from MMIO and write to system memory.
+ *
+ * A MMIO read can have side-effects so we commit to it
+ * only after vm_copy_setup() is successful. If a page-fault
+ * needs to be injected into the guest then it will happen
+ * before the MMIO read is attempted.
+ */
+ error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ if (error)
+ goto done;
+
+ vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+ } else if (error > 0) {
+ /*
+ * Resume guest execution to handle fault.
+ */
+ goto done;
+ } else {
+ goto done;
+ }
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
+ KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ if (rflags & PSL_D) {
+ rsi -= opsize;
+ rdi -= opsize;
+ } else {
+ rsi += opsize;
+ rdi += opsize;
+ }
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+ if (repeat) {
+ rcx = rcx - 1;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ rcx, vie->addrsize);
+ KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+ /*
+ * Repeat the instruction if the count register is not zero.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+ vm_restart_instruction(vm, vcpuid);
+ }
+done:
+ if (error < 0)
+ return (EFAULT);
+ else
+ return (0);
+}
+
static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -926,9 +1149,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
rsp += size;
}
-#ifdef _KERNEL
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-#endif
if (error == 0) {
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
@@ -1012,6 +1233,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movx(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_MOVS:
+ error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@@ -1193,6 +1418,7 @@ vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
+ vie->segment_register = VM_REG_LAST;
if (inst_length) {
bcopy(inst_bytes, vie->inst, inst_length);
@@ -1458,6 +1684,35 @@ vie_advance(struct vie *vie)
vie->num_processed++;
}
+static bool
+segment_override(uint8_t x, int *seg)
+{
+
+ switch (x) {
+ case 0x2E:
+ *seg = VM_REG_GUEST_CS;
+ break;
+ case 0x36:
+ *seg = VM_REG_GUEST_SS;
+ break;
+ case 0x3E:
+ *seg = VM_REG_GUEST_DS;
+ break;
+ case 0x26:
+ *seg = VM_REG_GUEST_ES;
+ break;
+ case 0x64:
+ *seg = VM_REG_GUEST_FS;
+ break;
+ case 0x65:
+ *seg = VM_REG_GUEST_GS;
+ break;
+ default:
+ return (false);
+ }
+ return (true);
+}
+
static int
decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
{
@@ -1471,6 +1726,12 @@ decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
vie->opsize_override = 1;
else if (x == 0x67)
vie->addrsize_override = 1;
+ else if (x == 0xF3)
+ vie->repz_present = 1;
+ else if (x == 0xF2)
+ vie->repnz_present = 1;
+ else if (segment_override(x, &vie->segment_register))
+ vie->segment_override = 1;
else
break;
@@ -1923,8 +2184,10 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
if (verify_inst_length(vie))
return (-1);
- if (verify_gla(vm, cpuid, gla, vie))
- return (-1);
+ if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
+ if (verify_gla(vm, cpuid, gla, vie))
+ return (-1);
+ }
vie->decoded = 1; /* success */
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index e553599..fc68a61 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "vatpic.h"
#include "vatpit.h"
#include "vpmtmr.h"
+#include "vrtc.h"
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -60,6 +61,8 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[IO_ELCR1] = vatpic_elc_handler,
[IO_ELCR2] = vatpic_elc_handler,
[IO_PMTMR] = vpmtmr_handler,
+ [IO_RTC] = vrtc_addr_handler,
+ [IO_RTC + 1] = vrtc_data_handler,
};
#ifdef KTR
@@ -71,7 +74,7 @@ inout_instruction(struct vm_exit *vmexit)
static const char *iodesc[] = {
"outb", "outw", "outl",
"inb", "inw", "inl",
- "outsb", "outsw", "outsd"
+ "outsb", "outsw", "outsd",
"insb", "insw", "insd",
};
OpenPOWER on IntegriCloud