summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libvmmapi/vmmapi.c111
-rw-r--r--lib/libvmmapi/vmmapi.h21
-rwxr-xr-xshare/examples/bhyve/vmrun.sh16
-rw-r--r--sys/amd64/include/vmm.h14
-rw-r--r--sys/amd64/include/vmm_dev.h28
-rw-r--r--sys/amd64/vmm/amd/svm.c32
-rw-r--r--sys/amd64/vmm/amd/svm_softc.h1
-rw-r--r--sys/amd64/vmm/amd/svm_support.S12
-rw-r--r--sys/amd64/vmm/intel/vmcs.c12
-rw-r--r--sys/amd64/vmm/intel/vmx.c68
-rw-r--r--sys/amd64/vmm/intel/vmx.h2
-rw-r--r--sys/amd64/vmm/intel/vmx_msr.c54
-rw-r--r--sys/amd64/vmm/io/vhpet.c70
-rw-r--r--sys/amd64/vmm/io/vrtc.c952
-rw-r--r--sys/amd64/vmm/io/vrtc.h50
-rw-r--r--sys/amd64/vmm/vmm.c179
-rw-r--r--sys/amd64/vmm/vmm_dev.c30
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c271
-rw-r--r--sys/amd64/vmm/vmm_ioport.c5
-rw-r--r--sys/modules/vmm/Makefile3
-rw-r--r--usr.sbin/bhyve/bhyve.84
-rw-r--r--usr.sbin/bhyve/bhyverun.c57
-rw-r--r--usr.sbin/bhyve/bhyverun.h5
-rw-r--r--usr.sbin/bhyve/inout.c20
-rw-r--r--usr.sbin/bhyve/pci_ahci.c3
-rw-r--r--usr.sbin/bhyve/rtc.c303
-rw-r--r--usr.sbin/bhyve/rtc.h2
-rw-r--r--usr.sbin/bhyve/task_switch.c16
-rw-r--r--usr.sbin/bhyve/xmsr.c9
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c89
30 files changed, 1875 insertions, 564 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 93955c7..9828876 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -368,14 +368,13 @@ vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
}
int
-vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
+vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
{
int error;
struct vm_run vmrun;
bzero(&vmrun, sizeof(vmrun));
vmrun.cpuid = vcpu;
- vmrun.rip = rip;
error = ioctl(ctx->fd, VM_RUN, &vmrun);
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
@@ -399,36 +398,22 @@ vm_reinit(struct vmctx *ctx)
return (ioctl(ctx->fd, VM_REINIT, 0));
}
-static int
-vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
- int error_code, int error_code_valid)
+int
+vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
+ uint32_t errcode, int restart_instruction)
{
struct vm_exception exc;
- bzero(&exc, sizeof(exc));
exc.cpuid = vcpu;
exc.vector = vector;
- exc.error_code = error_code;
- exc.error_code_valid = error_code_valid;
+ exc.error_code = errcode;
+ exc.error_code_valid = errcode_valid;
+ exc.restart_instruction = restart_instruction;
return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
}
int
-vm_inject_exception(struct vmctx *ctx, int vcpu, int vector)
-{
-
- return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0));
-}
-
-int
-vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode)
-{
-
- return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1));
-}
-
-int
vm_apicid2vcpu(struct vmctx *ctx, int apicid)
{
/*
@@ -1002,6 +987,7 @@ int
vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
{
+ void *va;
uint64_t gpa;
int error, fault, i, n, off;
@@ -1021,7 +1007,11 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
off = gpa & PAGE_MASK;
n = min(len, PAGE_SIZE - off);
- iov->iov_base = (void *)gpa;
+ va = vm_map_gpa(ctx, gpa, n);
+ if (va == NULL)
+ return (-1);
+
+ iov->iov_base = va;
iov->iov_len = n;
iov++;
iovcnt--;
@@ -1033,19 +1023,24 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
}
void
+vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
+{
+
+ return;
+}
+
+void
vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
{
const char *src;
char *dst;
- uint64_t gpa;
size_t n;
dst = vp;
while (len) {
assert(iov->iov_len);
- gpa = (uint64_t)iov->iov_base;
n = min(len, iov->iov_len);
- src = vm_map_gpa(ctx, gpa, n);
+ src = iov->iov_base;
bcopy(src, dst, n);
iov++;
@@ -1060,15 +1055,13 @@ vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
{
const char *src;
char *dst;
- uint64_t gpa;
size_t n;
src = vp;
while (len) {
assert(iov->iov_len);
- gpa = (uint64_t)iov->iov_base;
n = min(len, iov->iov_len);
- dst = vm_map_gpa(ctx, gpa, n);
+ dst = iov->iov_base;
bcopy(src, dst, n);
iov++;
@@ -1146,3 +1139,63 @@ vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
return (error);
}
+
+int
+vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
+{
+ struct vm_rtc_data rtcdata;
+ int error;
+
+ bzero(&rtcdata, sizeof(struct vm_rtc_data));
+ rtcdata.offset = offset;
+ rtcdata.value = value;
+ error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
+ return (error);
+}
+
+int
+vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
+{
+ struct vm_rtc_data rtcdata;
+ int error;
+
+ bzero(&rtcdata, sizeof(struct vm_rtc_data));
+ rtcdata.offset = offset;
+ error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
+ if (error == 0)
+ *retval = rtcdata.value;
+ return (error);
+}
+
+int
+vm_rtc_settime(struct vmctx *ctx, time_t secs)
+{
+ struct vm_rtc_time rtctime;
+ int error;
+
+ bzero(&rtctime, sizeof(struct vm_rtc_time));
+ rtctime.secs = secs;
+ error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
+ return (error);
+}
+
+int
+vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
+{
+ struct vm_rtc_time rtctime;
+ int error;
+
+ bzero(&rtctime, sizeof(struct vm_rtc_time));
+ error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
+ if (error == 0)
+ *secs = rtctime.secs;
+ return (error);
+}
+
+int
+vm_restart_instruction(void *arg, int vcpu)
+{
+ struct vmctx *ctx = arg;
+
+ return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
+}
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index fbb6ddd..06b2930 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -32,6 +32,12 @@
#include <sys/param.h>
#include <sys/cpuset.h>
+/*
+ * API version for out-of-tree consumers like grub-bhyve for making compile
+ * time decisions.
+ */
+#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
+
struct iovec;
struct vmctx;
enum x2apic_state;
@@ -70,13 +76,12 @@ int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
struct seg_desc *seg_desc);
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
-int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
- struct vm_exit *ret_vmexit);
+int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_reinit(struct vmctx *ctx);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
-int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec);
-int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode);
+int vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
+ int errcode_valid, uint32_t errcode, int restart_instruction);
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
@@ -132,6 +137,14 @@ void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
void *host_dst, size_t len);
void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
struct iovec *guest_iov, size_t len);
+void vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov,
+ int iovcnt);
+
+/* RTC */
+int vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value);
+int vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval);
+int vm_rtc_settime(struct vmctx *ctx, time_t secs);
+int vm_rtc_gettime(struct vmctx *ctx, time_t *secs);
/* Reset vcpu register state */
int vcpu_reset(struct vmctx *ctx, int vcpu);
diff --git a/share/examples/bhyve/vmrun.sh b/share/examples/bhyve/vmrun.sh
index e1cbfcf..d3d5cdf 100755
--- a/share/examples/bhyve/vmrun.sh
+++ b/share/examples/bhyve/vmrun.sh
@@ -39,7 +39,13 @@ DEFAULT_CONSOLE=stdio
DEFAULT_VIRTIO_DISK="./diskdev"
DEFAULT_ISOFILE="./release.iso"
+errmsg() {
+ echo "*** $1"
+}
+
usage() {
+ local msg=$1
+
echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk file>]"
echo " [-e <name=value>] [-g <gdbport> ] [-H <directory>]"
echo " [-I <location of installation iso>] [-m <memsize>]"
@@ -58,18 +64,18 @@ usage() {
echo " -m: memory size (default is ${DEFAULT_MEMSIZE})"
echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
echo ""
- echo " This script needs to be executed with superuser privileges"
- echo ""
+ [ -n "$msg" ] && errmsg "$msg"
exit 1
}
if [ `id -u` -ne 0 ]; then
- usage
+ errmsg "This script must be executed with superuser privileges"
+ exit 1
fi
kldstat -n vmm > /dev/null 2>&1
if [ $? -ne 0 ]; then
- echo "vmm.ko is not loaded!"
+ errmsg "vmm.ko is not loaded"
exit 1
fi
@@ -143,7 +149,7 @@ fi
shift $((${OPTIND} - 1))
if [ $# -ne 1 ]; then
- usage
+ usage "virtual machine name not specified"
fi
vmname="$1"
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 8a8c3f4..cf7f5bc 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -286,9 +286,10 @@ int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
struct vpmtmr *vm_pmtmr(struct vm *vm);
+struct vrtc *vm_rtc(struct vm *vm);
/*
- * Inject exception 'vme' into the guest vcpu. This function returns 0 on
+ * Inject exception 'vector' into the guest vcpu. This function returns 0 on
* success and non-zero on failure.
*
* Wrapper functions like 'vm_inject_gp()' should be preferred to calling
@@ -298,7 +299,8 @@ struct vpmtmr *vm_pmtmr(struct vm *vm);
* This function should only be called in the context of the thread that is
* executing this vcpu.
*/
-int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
+int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
+ uint32_t errcode, int restart_instruction);
/*
* This function is called after a VM-exit that occurred during exception or
@@ -444,8 +446,11 @@ struct vie {
rex_x:1,
rex_b:1,
rex_present:1,
+ repz_present:1, /* REP/REPE/REPZ prefix */
+ repnz_present:1, /* REPNE/REPNZ prefix */
opsize_override:1, /* Operand size override */
- addrsize_override:1; /* Address size override */
+ addrsize_override:1, /* Address size override */
+ segment_override:1; /* Segment override */
uint8_t mod:2, /* ModRM byte */
reg:4,
@@ -461,6 +466,7 @@ struct vie {
uint8_t scale;
int base_register; /* VM_REG_GUEST_xyz */
int index_register; /* VM_REG_GUEST_xyz */
+ int segment_register; /* VM_REG_GUEST_xyz */
int64_t displacement; /* optional addr displacement */
int64_t immediate; /* optional immediate operand */
@@ -627,4 +633,6 @@ vm_inject_ss(void *vm, int vcpuid, int errcode)
void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
+int vm_restart_instruction(void *vm, int vcpuid);
+
#endif /* _VMM_H_ */
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index e4d839e..9d031a9 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -54,7 +54,6 @@ struct vm_seg_desc { /* data or code segment */
struct vm_run {
int cpuid;
- uint64_t rip; /* start running here */
struct vm_exit vm_exit;
};
@@ -63,6 +62,7 @@ struct vm_exception {
int vector;
uint32_t error_code;
int error_code_valid;
+ int restart_instruction;
};
struct vm_lapic_msi {
@@ -195,6 +195,15 @@ struct vm_intinfo {
uint64_t info2;
};
+struct vm_rtc_time {
+ time_t secs;
+};
+
+struct vm_rtc_data {
+ int offset;
+ uint8_t value;
+};
+
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@@ -228,6 +237,7 @@ enum {
IOCNUM_LAPIC_MSI = 36,
IOCNUM_LAPIC_LOCAL_IRQ = 37,
IOCNUM_IOAPIC_PINCOUNT = 38,
+ IOCNUM_RESTART_INSTRUCTION = 39,
/* PCI pass-thru */
IOCNUM_BIND_PPTDEV = 40,
@@ -254,6 +264,12 @@ enum {
/* vm_cpuset */
IOCNUM_ACTIVATE_CPU = 90,
IOCNUM_GET_CPUSET = 91,
+
+ /* RTC */
+ IOCNUM_RTC_READ = 100,
+ IOCNUM_RTC_WRITE = 101,
+ IOCNUM_RTC_SETTIME = 102,
+ IOCNUM_RTC_GETTIME = 103,
};
#define VM_RUN \
@@ -336,4 +352,14 @@ enum {
_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
#define VM_GET_INTINFO \
_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
+#define VM_RTC_WRITE \
+ _IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
+#define VM_RTC_READ \
+ _IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
+#define VM_RTC_SETTIME \
+ _IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
+#define VM_RTC_GETTIME \
+ _IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
+#define VM_RESTART_INSTRUCTION \
+ _IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
#endif
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index ab47041..88a846d 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -80,6 +80,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW, NULL, NULL);
#define AMD_CPUID_SVM_DECODE_ASSIST BIT(7) /* Decode assist */
#define AMD_CPUID_SVM_PAUSE_INC BIT(10) /* Pause intercept filter. */
#define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */
+#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */
#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
VMCB_CACHE_IOPM | \
@@ -554,6 +555,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
pml4_pa = svm_sc->nptp;
for (i = 0; i < VM_MAXCPU; i++) {
vcpu = svm_get_vcpu(svm_sc, i);
+ vcpu->nextrip = ~0;
vcpu->lastcpu = NOCPU;
vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
@@ -1200,7 +1202,6 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
struct vmcb_state *state;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
- struct vm_exception exception;
uint64_t code, info1, info2, val;
uint32_t eax, ecx, edx;
int error, errcode_valid, handled, idtvec, reflect;
@@ -1314,6 +1315,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
/* fallthru */
default:
errcode_valid = 0;
+ info1 = 0;
break;
}
KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) "
@@ -1322,14 +1324,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
if (reflect) {
/* Reflect the exception back into the guest */
- exception.vector = idtvec;
- exception.error_code_valid = errcode_valid;
- exception.error_code = errcode_valid ? info1 : 0;
VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
- "%d/%#x into the guest", exception.vector,
- exception.error_code);
- error = vm_inject_exception(svm_sc->vm, vcpu,
- &exception);
+ "%d/%#x into the guest", idtvec, (int)info1);
+ error = vm_inject_exception(svm_sc->vm, vcpu, idtvec,
+ errcode_valid, info1, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
}
@@ -1476,15 +1474,24 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
+ struct svm_vcpu *vcpustate;
uint8_t v_tpr;
int vector, need_intr_window, pending_apic_vector;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ vcpustate = svm_get_vcpu(sc, vcpu);
need_intr_window = 0;
pending_apic_vector = 0;
+ if (vcpustate->nextrip != state->rip) {
+ ctrl->intr_shadow = 0;
+ VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
+ "cleared due to rip change: %#lx/%#lx",
+ vcpustate->nextrip, state->rip);
+ }
+
/*
* Inject pending events or exceptions for this vcpu.
*
@@ -1634,7 +1641,7 @@ done:
* VMRUN.
*/
v_tpr = vlapic_get_cr8(vlapic);
- KASSERT(v_tpr >= 0 && v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
+ KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x",
ctrl->v_tpr, v_tpr);
@@ -1801,14 +1808,14 @@ static __inline void
disable_gintr(void)
{
- __asm __volatile("clgi" : : :);
+ __asm __volatile("clgi");
}
static __inline void
enable_gintr(void)
{
- __asm __volatile("stgi" : : :);
+ __asm __volatile("stgi");
}
/*
@@ -1955,6 +1962,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/* #VMEXIT disables interrupts so re-enable them here. */
enable_gintr();
+ /* Update 'nextrip' */
+ vcpustate->nextrip = state->rip;
+
/* Handle #VMEXIT and if required return to user space. */
handled = svm_vmexit(svm_sc, vcpu, vmexit);
} while (handled);
diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h
index a5bb57c..de0c3f7 100644
--- a/sys/amd64/vmm/amd/svm_softc.h
+++ b/sys/amd64/vmm/amd/svm_softc.h
@@ -45,6 +45,7 @@ struct svm_vcpu {
struct vmcb vmcb; /* hardware saved vcpu context */
struct svm_regctx swctx; /* software saved vcpu context */
uint64_t vmcb_pa; /* VMCB physical address */
+ uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that the vcpu last ran on */
uint32_t dirty; /* state cache bits that must be cleared */
long eptgen; /* pmap->pm_eptgen when the vcpu last ran */
diff --git a/sys/amd64/vmm/amd/svm_support.S b/sys/amd64/vmm/amd/svm_support.S
index 72327bd..b363101 100644
--- a/sys/amd64/vmm/amd/svm_support.S
+++ b/sys/amd64/vmm/amd/svm_support.S
@@ -22,6 +22,8 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
*/
#include <machine/asmacros.h>
@@ -35,6 +37,10 @@
#define VENTER push %rbp ; mov %rsp,%rbp
#define VLEAVE pop %rbp
+#define VMLOAD .byte 0x0f, 0x01, 0xda
+#define VMRUN .byte 0x0f, 0x01, 0xd8
+#define VMSAVE .byte 0x0f, 0x01, 0xdb
+
/*
* svm_launch(uint64_t vmcb, struct svm_regctx *gctx)
* %rdi: physical address of VMCB
@@ -79,9 +85,9 @@ ENTRY(svm_launch)
movq SCTX_RDI(%rsi), %rdi
movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */
- vmload %rax
- vmrun %rax
- vmsave %rax
+ VMLOAD
+ VMRUN
+ VMSAVE
pop %rax /* pop guest context pointer from the stack */
diff --git a/sys/amd64/vmm/intel/vmcs.c b/sys/amd64/vmm/intel/vmcs.c
index ae4d9db..5962526 100644
--- a/sys/amd64/vmm/intel/vmcs.c
+++ b/sys/amd64/vmm/intel/vmcs.c
@@ -342,18 +342,6 @@ vmcs_init(struct vmcs *vmcs)
*/
VMPTRLD(vmcs);
- /* Initialize guest IA32_PAT MSR with the default value */
- pat = PAT_VALUE(0, PAT_WRITE_BACK) |
- PAT_VALUE(1, PAT_WRITE_THROUGH) |
- PAT_VALUE(2, PAT_UNCACHED) |
- PAT_VALUE(3, PAT_UNCACHEABLE) |
- PAT_VALUE(4, PAT_WRITE_BACK) |
- PAT_VALUE(5, PAT_WRITE_THROUGH) |
- PAT_VALUE(6, PAT_UNCACHED) |
- PAT_VALUE(7, PAT_UNCACHEABLE);
- if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
- goto done;
-
/* Host state */
/* Initialize host IA32_PAT MSR */
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index c3dd04e..b81e48b 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -100,13 +100,11 @@ __FBSDID("$FreeBSD$");
(VM_EXIT_HOST_LMA | \
VM_EXIT_SAVE_EFER | \
VM_EXIT_LOAD_EFER | \
- VM_EXIT_ACKNOWLEDGE_INTERRUPT | \
- VM_EXIT_SAVE_PAT | \
- VM_EXIT_LOAD_PAT)
+ VM_EXIT_ACKNOWLEDGE_INTERRUPT)
#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS
-#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
+#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER)
#define VM_ENTRY_CTLS_ZERO_SETTING \
(VM_ENTRY_LOAD_DEBUG_CONTROLS | \
@@ -859,10 +857,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
*
- * MSR_PAT is saved and restored in the guest VMCS are on a VM exit
- * and entry respectively. It is also restored from the host VMCS
- * area on a VM exit.
- *
* The TSC MSR is exposed read-only. Writes are disallowed as that
* will impact the host TSC.
* XXX Writes would be implemented with a wrmsr trap, and
@@ -874,7 +868,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_EFER) ||
- guest_msr_rw(vmx, MSR_PAT) ||
guest_msr_ro(vmx, MSR_TSC))
panic("vmx_vminit: error setting guest msr access");
@@ -941,6 +934,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->cap[i].proc_ctls = procbased_ctls;
vmx->cap[i].proc_ctls2 = procbased_ctls2;
+ vmx->state[i].nextrip = ~0;
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
@@ -1169,12 +1163,24 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu)
}
static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
+ uint64_t guestrip)
{
int vector, need_nmi_exiting, extint_pending;
uint64_t rflags, entryinfo;
uint32_t gi, info;
+ if (vmx->state[vcpu].nextrip != guestrip) {
+ gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
+ if (gi & HWINTR_BLOCKING) {
+ VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
+ "cleared due to rip change: %#lx/%#lx",
+ vmx->state[vcpu].nextrip, guestrip);
+ gi &= ~HWINTR_BLOCKING;
+ vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
+ }
+ }
+
if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
"intinfo is not valid: %#lx", __func__, entryinfo));
@@ -1771,7 +1777,7 @@ vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
{
struct vm_guest_paging *paging;
uint32_t csar;
-
+
paging = &vmexit->u.inst_emul.paging;
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
@@ -2060,12 +2066,11 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
- int error, handled, in;
+ int error, errcode, errcode_valid, handled, in;
struct vmxctx *vmxctx;
struct vlapic *vlapic;
struct vm_inout_str *vis;
struct vm_task_switch *ts;
- struct vm_exception vmexc;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
uint32_t intr_type, intr_vec, reason;
uint64_t exitintinfo, qual, gpa;
@@ -2250,6 +2255,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
case EXIT_REASON_MTF:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
vmexit->exitcode = VM_EXITCODE_MTRAP;
+ vmexit->inst_length = 0;
break;
case EXIT_REASON_PAUSE:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
@@ -2376,15 +2382,15 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
/* Reflect all other exceptions back into the guest */
- bzero(&vmexc, sizeof(struct vm_exception));
- vmexc.vector = intr_vec;
+ errcode_valid = errcode = 0;
if (intr_info & VMCS_INTR_DEL_ERRCODE) {
- vmexc.error_code_valid = 1;
- vmexc.error_code = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
+ errcode_valid = 1;
+ errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
}
VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
- "the guest", vmexc.vector, vmexc.error_code);
- error = vm_inject_exception(vmx->vm, vcpu, &vmexc);
+ "the guest", intr_vec, errcode);
+ error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
+ errcode_valid, errcode, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
return (1);
@@ -2399,6 +2405,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
if (vm_mem_allocated(vmx->vm, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
+ vmexit->inst_length = 0;
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
@@ -2540,7 +2547,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
}
static int
-vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
+vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
void *rendezvous_cookie, void *suspend_cookie)
{
int rc, handled, launched;
@@ -2550,7 +2557,6 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
struct vmcs *vmcs;
struct vm_exit *vmexit;
struct vlapic *vlapic;
- uint64_t rip;
uint32_t exit_reason;
vmx = arg;
@@ -2578,11 +2584,13 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
vmcs_write(VMCS_HOST_CR3, rcr3());
- vmcs_write(VMCS_GUEST_RIP, startrip);
+ vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
do {
- handled = UNHANDLED;
+ KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
+ "%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
+ handled = UNHANDLED;
/*
* Interrupts are disabled from this point on until the
* guest starts executing. This is done for the following
@@ -2602,7 +2610,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
* pmap_invalidate_ept().
*/
disable_intr();
- vmx_inject_interrupts(vmx, vcpu, vlapic);
+ vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
/*
* Check for vcpu suspension after injecting events because
@@ -2611,20 +2619,20 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
if (vcpu_suspended(suspend_cookie)) {
enable_intr();
- vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
+ vm_exit_suspended(vmx->vm, vcpu, rip);
break;
}
if (vcpu_rendezvous_pending(rendezvous_cookie)) {
enable_intr();
- vm_exit_rendezvous(vmx->vm, vcpu, vmcs_guest_rip());
+ vm_exit_rendezvous(vmx->vm, vcpu, rip);
break;
}
if (vcpu_should_yield(vm, vcpu)) {
enable_intr();
- vm_exit_astpending(vmx->vm, vcpu, vmcs_guest_rip());
- vmx_astpending_trace(vmx, vcpu, vmexit->rip);
+ vm_exit_astpending(vmx->vm, vcpu, rip);
+ vmx_astpending_trace(vmx, vcpu, rip);
handled = HANDLED;
break;
}
@@ -2638,6 +2646,9 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
+ /* Update 'nextrip' */
+ vmx->state[vcpu].nextrip = rip;
+
if (rc == VMX_GUEST_VMEXIT) {
vmx_exit_handle_nmi(vmx, vcpu, vmexit);
enable_intr();
@@ -2648,6 +2659,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
}
launched = 1;
vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
+ rip = vmexit->rip;
} while (handled);
/*
diff --git a/sys/amd64/vmm/intel/vmx.h b/sys/amd64/vmm/intel/vmx.h
index 2124554..bc48861 100644
--- a/sys/amd64/vmm/intel/vmx.h
+++ b/sys/amd64/vmm/intel/vmx.h
@@ -78,6 +78,7 @@ struct vmxcap {
};
struct vmxstate {
+ uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that this 'vcpu' last ran on */
uint16_t vpid;
};
@@ -102,6 +103,7 @@ enum {
IDX_MSR_STAR,
IDX_MSR_SF_MASK,
IDX_MSR_KGSBASE,
+ IDX_MSR_PAT,
GUEST_MSR_NUM /* must be the last enumeration */
};
diff --git a/sys/amd64/vmm/intel/vmx_msr.c b/sys/amd64/vmm/intel/vmx_msr.c
index f6bbf2a..e517778 100644
--- a/sys/amd64/vmm/intel/vmx_msr.c
+++ b/sys/amd64/vmm/intel/vmx_msr.c
@@ -230,6 +230,25 @@ westmere_cpu(void)
return (false);
}
+static bool
+pat_valid(uint64_t val)
+{
+ int i, pa;
+
+ /*
+ * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
+ *
+ * Extract PA0 through PA7 and validate that each one encodes a
+ * valid memory type.
+ */
+ for (i = 0; i < 8; i++) {
+ pa = (val >> (i * 8)) & 0xff;
+ if (pa == 2 || pa == 3 || pa >= 8)
+ return (false);
+ }
+ return (true);
+}
+
void
vmx_msr_init(void)
{
@@ -302,6 +321,10 @@ vmx_msr_init(void)
void
vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
{
+ uint64_t *guest_msrs;
+
+ guest_msrs = vmx->guest_msrs[vcpuid];
+
/*
* The permissions bitmap is shared between all vcpus so initialize it
* once when initializing the vBSP.
@@ -313,6 +336,19 @@ vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
guest_msr_rw(vmx, MSR_SF_MASK);
guest_msr_rw(vmx, MSR_KGSBASE);
}
+
+ /*
+ * Initialize guest IA32_PAT MSR with default value after reset.
+ */
+ guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
+ PAT_VALUE(1, PAT_WRITE_THROUGH) |
+ PAT_VALUE(2, PAT_UNCACHED) |
+ PAT_VALUE(3, PAT_UNCACHEABLE) |
+ PAT_VALUE(4, PAT_WRITE_BACK) |
+ PAT_VALUE(5, PAT_WRITE_THROUGH) |
+ PAT_VALUE(6, PAT_UNCACHED) |
+ PAT_VALUE(7, PAT_UNCACHEABLE);
+
return;
}
@@ -353,7 +389,11 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
int
vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
{
- int error = 0;
+ const uint64_t *guest_msrs;
+ int error;
+
+ guest_msrs = vmx->guest_msrs[vcpuid];
+ error = 0;
switch (num) {
case MSR_IA32_MISC_ENABLE:
@@ -366,6 +406,9 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
case MSR_TURBO_RATIO_LIMIT1:
*val = turbo_ratio_limit;
break;
+ case MSR_PAT:
+ *val = guest_msrs[IDX_MSR_PAT];
+ break;
default:
error = EINVAL;
break;
@@ -376,10 +419,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
int
vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
{
+ uint64_t *guest_msrs;
uint64_t changed;
int error;
+ guest_msrs = vmx->guest_msrs[vcpuid];
error = 0;
+
switch (num) {
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
@@ -401,6 +447,12 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
error = EINVAL;
break;
+ case MSR_PAT:
+ if (pat_valid(val))
+ guest_msrs[IDX_MSR_PAT] = val;
+ else
+ vm_inject_gp(vmx->vm, vcpuid);
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/io/vhpet.c b/sys/amd64/vmm/io/vhpet.c
index 46e5ca7..a4c96cd 100644
--- a/sys/amd64/vmm/io/vhpet.c
+++ b/sys/amd64/vmm/io/vhpet.c
@@ -104,7 +104,6 @@ vhpet_capabilities(void)
uint64_t cap = 0;
cap |= 0x8086 << 16; /* vendor id */
- cap |= HPET_CAP_LEG_RT; /* legacy routing capable */
cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */
cap |= 1; /* revision */
cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */
@@ -127,15 +126,6 @@ vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
{
const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
- /*
- * LegacyReplacement Route configuration takes precedence over MSI
- * for timers 0 and 1.
- */
- if (n == 0 || n == 1) {
- if (vhpet->config & HPET_CNF_LEG_RT)
- return (false);
- }
-
if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
return (true);
else
@@ -152,41 +142,9 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
if (vhpet_timer_msi_enabled(vhpet, n))
return (0);
- if (vhpet->config & HPET_CNF_LEG_RT) {
- /*
- * In "legacy routing" timers 0 and 1 are connected to
- * ioapic pins 2 and 8 respectively.
- */
- switch (n) {
- case 0:
- return (2);
- case 1:
- return (8);
- }
- }
-
return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
}
-static __inline int
-vhpet_timer_atpic_pin(struct vhpet *vhpet, int n)
-{
- if (vhpet->config & HPET_CNF_LEG_RT) {
- /*
- * In "legacy routing" timers 0 and 1 are connected to
- * 8259 master pin 0 and slave pin 0 respectively.
- */
- switch (n) {
- case 0:
- return (0);
- case 1:
- return (8);
- }
- }
-
- return (-1);
-}
-
static uint32_t
vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
{
@@ -216,17 +174,12 @@ vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
static void
vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
{
- int pin, legacy_pin;
+ int pin;
if (vhpet->isr & (1 << n)) {
pin = vhpet_timer_ioapic_pin(vhpet, n);
KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
vioapic_deassert_irq(vhpet->vm, pin);
-
- legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
- if (legacy_pin != -1)
- vatpic_deassert_irq(vhpet->vm, legacy_pin);
-
vhpet->isr &= ~(1 << n);
}
}
@@ -252,12 +205,6 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
"timer %d is using MSI", n));
- /* The legacy replacement interrupts are always edge triggered */
- if (vhpet->config & HPET_CNF_LEG_RT) {
- if (n == 0 || n == 1)
- return (true);
- }
-
if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
return (true);
else
@@ -267,7 +214,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
static void
vhpet_timer_interrupt(struct vhpet *vhpet, int n)
{
- int pin, legacy_pin;
+ int pin;
/* If interrupts are not enabled for this timer then just return. */
if (!vhpet_timer_interrupt_enabled(vhpet, n))
@@ -293,17 +240,11 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n)
return;
}
- legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
-
if (vhpet_timer_edge_trig(vhpet, n)) {
vioapic_pulse_irq(vhpet->vm, pin);
- if (legacy_pin != -1)
- vatpic_pulse_irq(vhpet->vm, legacy_pin);
} else {
vhpet->isr |= 1 << n;
vioapic_assert_irq(vhpet->vm, pin);
- if (legacy_pin != -1)
- vatpic_assert_irq(vhpet->vm, legacy_pin);
}
}
@@ -579,6 +520,13 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
counter = vhpet_counter(vhpet, nowptr);
oldval = vhpet->config;
update_register(&vhpet->config, data, mask);
+
+ /*
+ * LegacyReplacement Routing is not supported so clear the
+ * bit explicitly.
+ */
+ vhpet->config &= ~HPET_CNF_LEG_RT;
+
if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
if (vhpet_counter_enabled(vhpet)) {
vhpet_start_counting(vhpet);
diff --git a/sys/amd64/vmm/io/vrtc.c b/sys/amd64/vmm/io/vrtc.c
new file mode 100644
index 0000000..d5e93dc
--- /dev/null
+++ b/sys/amd64/vmm/io/vrtc.c
@@ -0,0 +1,952 @@
+/*-
+ * Copyright (c) 2014, Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/clock.h>
+#include <sys/sysctl.h>
+
+#include <machine/vmm.h>
+
+#include <isa/rtc.h>
+
+#include "vmm_ktr.h"
+#include "vatpic.h"
+#include "vioapic.h"
+#include "vrtc.h"
+
+/* Register layout of the RTC */
+struct rtcdev {
+ uint8_t sec;
+ uint8_t alarm_sec;
+ uint8_t min;
+ uint8_t alarm_min;
+ uint8_t hour;
+ uint8_t alarm_hour;
+ uint8_t day_of_week;
+ uint8_t day_of_month;
+ uint8_t month;
+ uint8_t year;
+ uint8_t reg_a;
+ uint8_t reg_b;
+ uint8_t reg_c;
+ uint8_t reg_d;
+ uint8_t nvram[128 - 14];
+} __packed;
+CTASSERT(sizeof(struct rtcdev) == 128);
+
+struct vrtc {
+ struct vm *vm;
+ struct mtx mtx;
+ struct callout callout;
+ u_int addr; /* RTC register to read or write */
+ sbintime_t base_uptime;
+ time_t base_rtctime;
+ struct rtcdev rtcdev;
+};
+
+#define VRTC_LOCK(vrtc) mtx_lock(&((vrtc)->mtx))
+#define VRTC_UNLOCK(vrtc) mtx_unlock(&((vrtc)->mtx))
+#define VRTC_LOCKED(vrtc) mtx_owned(&((vrtc)->mtx))
+
+/*
+ * RTC time is considered "broken" if:
+ * - RTC updates are halted by the guest
+ * - RTC date/time fields have invalid values
+ */
+#define VRTC_BROKEN_TIME ((time_t)-1)
+
+#define RTC_IRQ 8
+#define RTCSB_BIN 0x04
+#define RTCSB_ALL_INTRS (RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
+#define rtc_halted(vrtc) ((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
+#define aintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
+#define pintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
+#define uintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0)
+
+static void vrtc_callout_handler(void *arg);
+static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval);
+
+static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc");
+
+SYSCTL_DECL(_hw_vmm);
+SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL);
+
+static int rtc_flag_broken_time = 1;
+SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN,
+ &rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected");
+
+static __inline bool
+divider_enabled(int reg_a)
+{
+ /*
+ * The RTC is counting only when dividers are not held in reset.
+ */
+ return ((reg_a & 0x70) == 0x20);
+}
+
+static __inline bool
+update_enabled(struct vrtc *vrtc)
+{
+ /*
+ * RTC date/time can be updated only if:
+ * - divider is not held in reset
+ * - guest has not disabled updates
+ * - the date/time fields have valid contents
+ */
+ if (!divider_enabled(vrtc->rtcdev.reg_a))
+ return (false);
+
+ if (rtc_halted(vrtc))
+ return (false);
+
+ if (vrtc->base_rtctime == VRTC_BROKEN_TIME)
+ return (false);
+
+ return (true);
+}
+
+static time_t
+vrtc_curtime(struct vrtc *vrtc)
+{
+ sbintime_t now, delta;
+ time_t t;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ t = vrtc->base_rtctime;
+ if (update_enabled(vrtc)) {
+ now = sbinuptime();
+ delta = now - vrtc->base_uptime;
+ KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
+ "%#lx to %#lx", vrtc->base_uptime, now));
+ t += delta / SBT_1S;
+ }
+ return (t);
+}
+
+static __inline uint8_t
+rtcset(struct rtcdev *rtc, int val)
+{
+
+ KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d",
+ __func__, val));
+
+ return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]);
+}
+
+static void
+secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
+{
+ struct clocktime ct;
+ struct timespec ts;
+ struct rtcdev *rtc;
+ int hour;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ if (rtctime < 0) {
+ KASSERT(rtctime == VRTC_BROKEN_TIME,
+ ("%s: invalid vrtc time %#lx", __func__, rtctime));
+ return;
+ }
+
+ /*
+ * If the RTC is halted then the guest has "ownership" of the
+ * date/time fields. Don't update the RTC date/time fields in
+ * this case (unless forced).
+ */
+ if (rtc_halted(vrtc) && !force_update)
+ return;
+
+ ts.tv_sec = rtctime;
+ ts.tv_nsec = 0;
+ clock_ts_to_ct(&ts, &ct);
+
+ KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d",
+ ct.sec));
+ KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d",
+ ct.min));
+ KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d",
+ ct.hour));
+ KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d",
+ ct.dow));
+ KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d",
+ ct.day));
+ KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d",
+ ct.mon));
+ KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d",
+ ct.year));
+
+ rtc = &vrtc->rtcdev;
+ rtc->sec = rtcset(rtc, ct.sec);
+ rtc->min = rtcset(rtc, ct.min);
+
+ hour = ct.hour;
+ if ((rtc->reg_b & RTCSB_24HR) == 0)
+ hour = (hour % 12) + 1; /* convert to a 12-hour format */
+
+ rtc->hour = rtcset(rtc, hour);
+
+ if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12)
+ rtc->hour |= 0x80; /* set MSB to indicate PM */
+
+ rtc->day_of_week = rtcset(rtc, ct.dow + 1);
+ rtc->day_of_month = rtcset(rtc, ct.day);
+ rtc->month = rtcset(rtc, ct.mon);
+ rtc->year = rtcset(rtc, ct.year % 100);
+}
+
+static int
+rtcget(struct rtcdev *rtc, int val, int *retval)
+{
+ uint8_t upper, lower;
+
+ if (rtc->reg_b & RTCSB_BIN) {
+ *retval = val;
+ return (0);
+ }
+
+ lower = val & 0xf;
+ upper = (val >> 4) & 0xf;
+
+ if (lower > 9 || upper > 9)
+ return (-1);
+
+ *retval = upper * 10 + lower;
+ return (0);
+}
+
+static time_t
+rtc_to_secs(struct vrtc *vrtc)
+{
+ struct clocktime ct;
+ struct timespec ts;
+ struct rtcdev *rtc;
+ struct vm *vm;
+ int error, hour, pm, year;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ vm = vrtc->vm;
+ rtc = &vrtc->rtcdev;
+
+ bzero(&ct, sizeof(struct clocktime));
+
+ error = rtcget(rtc, rtc->sec, &ct.sec);
+ if (error || ct.sec < 0 || ct.sec > 59) {
+ VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->min, &ct.min);
+ if (error || ct.min < 0 || ct.min > 59) {
+ VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
+ goto fail;
+ }
+
+ pm = 0;
+ hour = rtc->hour;
+ if ((rtc->reg_b & RTCSB_24HR) == 0) {
+ if (hour & 0x80) {
+ hour &= ~0x80;
+ pm = 1;
+ }
+ }
+ error = rtcget(rtc, hour, &ct.hour);
+ if ((rtc->reg_b & RTCSB_24HR) == 0) {
+ ct.hour -= 1;
+ if (pm)
+ ct.hour += 12;
+ }
+
+ if (error || ct.hour < 0 || ct.hour > 23) {
+ VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
+ goto fail;
+ }
+
+ /*
+ * Ignore 'rtc->dow' because some guests like Linux don't bother
+ * setting it at all while others like OpenBSD/i386 set it incorrectly.
+ *
+ * clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
+ */
+ ct.dow = -1;
+
+ error = rtcget(rtc, rtc->day_of_month, &ct.day);
+ if (error || ct.day < 1 || ct.day > 31) {
+ VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
+ ct.day);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->month, &ct.mon);
+ if (error || ct.mon < 1 || ct.mon > 12) {
+ VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
+ goto fail;
+ }
+
+ error = rtcget(rtc, rtc->year, &year);
+ if (error || year < 0 || year > 99) {
+ VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
+ goto fail;
+ }
+ if (year >= 70)
+ ct.year = 1900 + year;
+ else
+ ct.year = 2000 + year;
+
+ error = clock_ct_to_ts(&ct, &ts);
+ if (error || ts.tv_sec < 0) {
+ VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
+ ct.year, ct.mon, ct.day);
+ VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
+ ct.hour, ct.min, ct.sec);
+ goto fail;
+ }
+ return (ts.tv_sec); /* success */
+fail:
+ return (VRTC_BROKEN_TIME); /* failure */
+}
+
+static int
+vrtc_time_update(struct vrtc *vrtc, time_t newtime)
+{
+ struct rtcdev *rtc;
+ time_t oldtime;
+ uint8_t alarm_sec, alarm_min, alarm_hour;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ alarm_sec = rtc->alarm_sec;
+ alarm_min = rtc->alarm_min;
+ alarm_hour = rtc->alarm_hour;
+
+ oldtime = vrtc->base_rtctime;
+ VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
+ oldtime, newtime);
+
+ if (newtime == oldtime)
+ return (0);
+
+ /*
+ * If 'newtime' indicates that RTC updates are disabled then just
+ * record that and return. There is no need to do alarm interrupt
+ * processing or update 'base_uptime' in this case.
+ */
+ if (newtime == VRTC_BROKEN_TIME) {
+ vrtc->base_rtctime = VRTC_BROKEN_TIME;
+ return (0);
+ }
+
+ /*
+ * Return an error if RTC updates are halted by the guest.
+ */
+ if (rtc_halted(vrtc)) {
+ VM_CTR0(vrtc->vm, "RTC update halted by guest");
+ return (EBUSY);
+ }
+
+ do {
+ /*
+ * If the alarm interrupt is enabled and 'oldtime' is valid
+ * then visit all the seconds between 'oldtime' and 'newtime'
+ * to check for the alarm condition.
+ *
+ * Otherwise move the RTC time forward directly to 'newtime'.
+ */
+ if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME)
+ vrtc->base_rtctime++;
+ else
+ vrtc->base_rtctime = newtime;
+
+ if (aintr_enabled(vrtc)) {
+ /*
+ * Update the RTC date/time fields before checking
+ * if the alarm conditions are satisfied.
+ */
+ secs_to_rtc(vrtc->base_rtctime, vrtc, 0);
+
+ if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) &&
+ (alarm_min >= 0xC0 || alarm_min == rtc->min) &&
+ (alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) {
+ vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM);
+ }
+ }
+ } while (vrtc->base_rtctime != newtime);
+
+ if (uintr_enabled(vrtc))
+ vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
+
+ vrtc->base_uptime = sbinuptime();
+
+ return (0);
+}
+
+static sbintime_t
+vrtc_freq(struct vrtc *vrtc)
+{
+ int ratesel;
+
+ static sbintime_t pf[16] = {
+ 0,
+ SBT_1S / 256,
+ SBT_1S / 128,
+ SBT_1S / 8192,
+ SBT_1S / 4096,
+ SBT_1S / 2048,
+ SBT_1S / 1024,
+ SBT_1S / 512,
+ SBT_1S / 256,
+ SBT_1S / 128,
+ SBT_1S / 64,
+ SBT_1S / 32,
+ SBT_1S / 16,
+ SBT_1S / 8,
+ SBT_1S / 4,
+ SBT_1S / 2,
+ };
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ /*
+ * If both periodic and alarm interrupts are enabled then use the
+ * periodic frequency to drive the callout. The minimum periodic
+ * frequency (2 Hz) is higher than the alarm frequency (1 Hz) so
+ * piggyback the alarm on top of it. The same argument applies to
+ * the update interrupt.
+ */
+ if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
+ ratesel = vrtc->rtcdev.reg_a & 0xf;
+ return (pf[ratesel]);
+ } else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
+ return (SBT_1S);
+ } else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
+ return (SBT_1S);
+ } else {
+ return (0);
+ }
+}
+
+static void
+vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
+{
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ if (freqsbt == 0) {
+ if (callout_active(&vrtc->callout)) {
+ VM_CTR0(vrtc->vm, "RTC callout stopped");
+ callout_stop(&vrtc->callout);
+ }
+ return;
+ }
+ VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
+ callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
+ vrtc, 0);
+}
+
+static void
+vrtc_callout_handler(void *arg)
+{
+ struct vrtc *vrtc = arg;
+ sbintime_t freqsbt;
+ time_t rtctime;
+ int error;
+
+ VM_CTR0(vrtc->vm, "vrtc callout fired");
+
+ VRTC_LOCK(vrtc);
+ if (callout_pending(&vrtc->callout)) /* callout was reset */
+ goto done;
+
+ if (!callout_active(&vrtc->callout)) /* callout was stopped */
+ goto done;
+
+ callout_deactivate(&vrtc->callout);
+
+ KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0,
+ ("gratuitous vrtc callout"));
+
+ if (pintr_enabled(vrtc))
+ vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
+
+ if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
+ rtctime = vrtc_curtime(vrtc);
+ error = vrtc_time_update(vrtc, rtctime);
+ KASSERT(error == 0, ("%s: vrtc_time_update error %d",
+ __func__, error));
+ }
+
+ freqsbt = vrtc_freq(vrtc);
+ KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
+ vrtc_callout_reset(vrtc, freqsbt);
+done:
+ VRTC_UNLOCK(vrtc);
+}
+
+static __inline void
+vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
+{
+ int active;
+
+ active = callout_active(&vrtc->callout) ? 1 : 0;
+ KASSERT((freq == 0 && !active) || (freq != 0 && active),
+ ("vrtc callout %s with frequency %#lx",
+ active ? "active" : "inactive", freq));
+}
+
+static void
+vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
+{
+ struct rtcdev *rtc;
+ int oldirqf, newirqf;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE;
+
+ oldirqf = rtc->reg_c & RTCIR_INT;
+ if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) ||
+ (pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) ||
+ (uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) {
+ newirqf = RTCIR_INT;
+ } else {
+ newirqf = 0;
+ }
+
+ oldval = rtc->reg_c;
+ rtc->reg_c = newirqf | newval;
+ changed = oldval ^ rtc->reg_c;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
+ oldval, rtc->reg_c);
+ }
+
+ if (!oldirqf && newirqf) {
+ VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ);
+ vatpic_pulse_irq(vrtc->vm, RTC_IRQ);
+ vioapic_pulse_irq(vrtc->vm, RTC_IRQ);
+ } else if (oldirqf && !newirqf) {
+ VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ);
+ }
+}
+
+static int
+vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
+{
+ struct rtcdev *rtc;
+ sbintime_t oldfreq, newfreq;
+ time_t curtime, rtctime;
+ int error;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ rtc = &vrtc->rtcdev;
+ oldval = rtc->reg_b;
+ oldfreq = vrtc_freq(vrtc);
+
+ rtc->reg_b = newval;
+ changed = oldval ^ newval;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
+ oldval, newval);
+ }
+
+ if (changed & RTCSB_HALT) {
+ if ((newval & RTCSB_HALT) == 0) {
+ rtctime = rtc_to_secs(vrtc);
+ if (rtctime == VRTC_BROKEN_TIME) {
+ /*
+ * Stop updating the RTC if the date/time
+ * programmed by the guest is not correct.
+ */
+ VM_CTR0(vrtc->vm, "Invalid RTC date/time "
+ "programming detected");
+
+ if (rtc_flag_broken_time)
+ return (-1);
+ }
+ } else {
+ curtime = vrtc_curtime(vrtc);
+ KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
+ "between vrtc basetime (%#lx) and curtime (%#lx)",
+ __func__, vrtc->base_rtctime, curtime));
+
+ /*
+ * Force a refresh of the RTC date/time fields so
+ * they reflect the time right before the guest set
+ * the HALT bit.
+ */
+ secs_to_rtc(curtime, vrtc, 1);
+
+ /*
+ * Updates are halted so mark 'base_rtctime' to denote
+ * that the RTC date/time is in flux.
+ */
+ rtctime = VRTC_BROKEN_TIME;
+ rtc->reg_b &= ~RTCSB_UINTR;
+ }
+ error = vrtc_time_update(vrtc, rtctime);
+ KASSERT(error == 0, ("vrtc_time_update error %d", error));
+ }
+
+ /*
+ * Side effect of changes to the interrupt enable bits.
+ */
+ if (changed & RTCSB_ALL_INTRS)
+ vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c);
+
+ /*
+ * Change the callout frequency if it has changed.
+ */
+ newfreq = vrtc_freq(vrtc);
+ if (newfreq != oldfreq)
+ vrtc_callout_reset(vrtc, newfreq);
+ else
+ vrtc_callout_check(vrtc, newfreq);
+
+ /*
+ * The side effect of bits that control the RTC date/time format
+ * is handled lazily when those fields are actually read.
+ */
+ return (0);
+}
+
+static void
+vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
+{
+ sbintime_t oldfreq, newfreq;
+ uint8_t oldval, changed;
+
+ KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
+
+ newval &= ~RTCSA_TUP;
+ oldval = vrtc->rtcdev.reg_a;
+ oldfreq = vrtc_freq(vrtc);
+
+ if (divider_enabled(oldval) && !divider_enabled(newval)) {
+ VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
+ vrtc->base_rtctime, vrtc->base_uptime);
+ } else if (!divider_enabled(oldval) && divider_enabled(newval)) {
+ /*
+ * If the dividers are coming out of reset then update
+ * 'base_uptime' before this happens. This is done to
+ * maintain the illusion that the RTC date/time was frozen
+ * while the dividers were disabled.
+ */
+ vrtc->base_uptime = sbinuptime();
+ VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
+ vrtc->base_rtctime, vrtc->base_uptime);
+ } else {
+ /* NOTHING */
+ }
+
+ vrtc->rtcdev.reg_a = newval;
+ changed = oldval ^ newval;
+ if (changed) {
+ VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
+ oldval, newval);
+ }
+
+ /*
+ * Side effect of changes to rate select and divider enable bits.
+ */
+ newfreq = vrtc_freq(vrtc);
+ if (newfreq != oldfreq)
+ vrtc_callout_reset(vrtc, newfreq);
+ else
+ vrtc_callout_check(vrtc, newfreq);
+}
+
+int
+vrtc_set_time(struct vm *vm, time_t secs)
+{
+ struct vrtc *vrtc;
+ int error;
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+ error = vrtc_time_update(vrtc, secs);
+ VRTC_UNLOCK(vrtc);
+
+ if (error) {
+ VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
+ secs);
+ } else {
+ VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
+ }
+
+ return (error);
+}
+
+time_t
+vrtc_get_time(struct vm *vm)
+{
+ struct vrtc *vrtc;
+ time_t t;
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+ t = vrtc_curtime(vrtc);
+ VRTC_UNLOCK(vrtc);
+
+ return (t);
+}
+
+int
+vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
+{
+ struct vrtc *vrtc;
+ uint8_t *ptr;
+
+ vrtc = vm_rtc(vm);
+
+ /*
+ * Don't allow writes to RTC control registers or the date/time fields.
+ */
+ if (offset < offsetof(struct rtcdev, nvram[0]) ||
+ offset >= sizeof(struct rtcdev)) {
+ VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
+ offset);
+ return (EINVAL);
+ }
+
+ VRTC_LOCK(vrtc);
+ ptr = (uint8_t *)(&vrtc->rtcdev);
+ ptr[offset] = value;
+ VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
+ VRTC_UNLOCK(vrtc);
+
+ return (0);
+}
+
+int
+vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
+{
+ struct vrtc *vrtc;
+ time_t curtime;
+ uint8_t *ptr;
+
+ /*
+ * Allow all offsets in the RTC to be read.
+ */
+ if (offset < 0 || offset >= sizeof(struct rtcdev))
+ return (EINVAL);
+
+ vrtc = vm_rtc(vm);
+ VRTC_LOCK(vrtc);
+
+ /*
+ * Update RTC date/time fields if necessary.
+ */
+ if (offset < 10) {
+ curtime = vrtc_curtime(vrtc);
+ secs_to_rtc(curtime, vrtc, 0);
+ }
+
+ ptr = (uint8_t *)(&vrtc->rtcdev);
+ *retval = ptr[offset];
+
+ VRTC_UNLOCK(vrtc);
+ return (0);
+}
+
+int
+vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val)
+{
+ struct vrtc *vrtc;
+
+ vrtc = vm_rtc(vm);
+
+ if (bytes != 1)
+ return (-1);
+
+ if (in) {
+ *val = 0xff;
+ return (0);
+ }
+
+ VRTC_LOCK(vrtc);
+ vrtc->addr = *val & 0x7f;
+ VRTC_UNLOCK(vrtc);
+
+ return (0);
+}
+
+int
+vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val)
+{
+ struct vrtc *vrtc;
+ struct rtcdev *rtc;
+ time_t curtime;
+ int error, offset;
+
+ vrtc = vm_rtc(vm);
+ rtc = &vrtc->rtcdev;
+
+ if (bytes != 1)
+ return (-1);
+
+ VRTC_LOCK(vrtc);
+ offset = vrtc->addr;
+ if (offset >= sizeof(struct rtcdev)) {
+ VRTC_UNLOCK(vrtc);
+ return (-1);
+ }
+
+ error = 0;
+ curtime = vrtc_curtime(vrtc);
+ vrtc_time_update(vrtc, curtime);
+
+ if (in) {
+ /*
+ * Update RTC date/time fields if necessary.
+ */
+ if (offset < 10)
+ secs_to_rtc(curtime, vrtc, 0);
+
+ if (offset == 12) {
+ /*
+ * XXX
+ * reg_c interrupt flags are updated only if the
+ * corresponding interrupt enable bit in reg_b is set.
+ */
+ *val = vrtc->rtcdev.reg_c;
+ vrtc_set_reg_c(vrtc, 0);
+ } else {
+ *val = *((uint8_t *)rtc + offset);
+ }
+ VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
+ *val, offset);
+ } else {
+ switch (offset) {
+ case 10:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
+ vrtc_set_reg_a(vrtc, *val);
+ break;
+ case 11:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
+ error = vrtc_set_reg_b(vrtc, *val);
+ break;
+ case 12:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
+ *val);
+ break;
+ case 13:
+ VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
+ *val);
+ break;
+ case 0:
+ /*
+ * High order bit of 'seconds' is readonly.
+ */
+ *val &= 0x7f;
+ /* FALLTHRU */
+ default:
+ VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
+ offset, *val);
+ *((uint8_t *)rtc + offset) = *val;
+ break;
+ }
+ }
+ VRTC_UNLOCK(vrtc);
+ return (error);
+}
+
+void
+vrtc_reset(struct vrtc *vrtc)
+{
+ struct rtcdev *rtc;
+
+ VRTC_LOCK(vrtc);
+
+ rtc = &vrtc->rtcdev;
+ vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE));
+ vrtc_set_reg_c(vrtc, 0);
+ KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active"));
+
+ VRTC_UNLOCK(vrtc);
+}
+
+struct vrtc *
+vrtc_init(struct vm *vm)
+{
+ struct vrtc *vrtc;
+ struct rtcdev *rtc;
+ time_t curtime;
+
+ vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO);
+ vrtc->vm = vm;
+ mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF);
+ callout_init(&vrtc->callout, 1);
+
+ /* Allow dividers to keep time but disable everything else */
+ rtc = &vrtc->rtcdev;
+ rtc->reg_a = 0x20;
+ rtc->reg_b = RTCSB_24HR;
+ rtc->reg_c = 0;
+ rtc->reg_d = RTCSD_PWR;
+
+ /* Reset the index register to a safe value. */
+ vrtc->addr = RTC_STATUSD;
+
+ /*
+ * Initialize RTC time to 00:00:00 Jan 1, 1970.
+ */
+ curtime = 0;
+
+ VRTC_LOCK(vrtc);
+ vrtc->base_rtctime = VRTC_BROKEN_TIME;
+ vrtc_time_update(vrtc, curtime);
+ secs_to_rtc(curtime, vrtc, 0);
+ VRTC_UNLOCK(vrtc);
+
+ return (vrtc);
+}
+
+void
+vrtc_cleanup(struct vrtc *vrtc)
+{
+
+ callout_drain(&vrtc->callout);
+ free(vrtc, M_VRTC);
+}
diff --git a/sys/amd64/vmm/io/vrtc.h b/sys/amd64/vmm/io/vrtc.h
new file mode 100644
index 0000000..6fbbc9c
--- /dev/null
+++ b/sys/amd64/vmm/io/vrtc.h
@@ -0,0 +1,50 @@
+/*-
+ * Copyright (c) 2014 Neel Natu (neel@freebsd.org)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice unmodified, this list of conditions, and the following
+ * disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VRTC_H_
+#define _VRTC_H_
+
+#include <isa/isareg.h>
+
+struct vrtc;
+
+struct vrtc *vrtc_init(struct vm *vm);
+void vrtc_cleanup(struct vrtc *vrtc);
+void vrtc_reset(struct vrtc *vrtc);
+
+time_t vrtc_get_time(struct vm *vm);
+int vrtc_set_time(struct vm *vm, time_t secs);
+int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
+int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
+
+int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val);
+int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
+ uint32_t *val);
+
+#endif
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 4739a86..7f90c61 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include "vioapic.h"
#include "vlapic.h"
#include "vpmtmr.h"
+#include "vrtc.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
@@ -100,12 +101,15 @@ struct vcpu {
uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
int extint_pending; /* (i) INTR pending */
- struct vm_exception exception; /* (x) exception collateral */
int exception_pending; /* (i) exception pending */
+ int exc_vector; /* (x) exception collateral */
+ int exc_errcode_valid;
+ uint32_t exc_errcode;
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
+ uint64_t nextrip; /* (x) next instruction to execute */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@@ -136,6 +140,7 @@ struct vm {
struct vatpic *vatpic; /* (i) virtual atpic */
struct vatpit *vatpit; /* (i) virtual atpit */
struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
+ struct vrtc *vrtc; /* (o) virtual RTC */
volatile cpuset_t active_cpus; /* (i) active vcpus */
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
@@ -376,6 +381,8 @@ vm_init(struct vm *vm, bool create)
vm->vatpic = vatpic_init(vm);
vm->vatpit = vatpit_init(vm);
vm->vpmtmr = vpmtmr_init(vm);
+ if (create)
+ vm->vrtc = vrtc_init(vm);
CPU_ZERO(&vm->active_cpus);
@@ -438,6 +445,10 @@ vm_cleanup(struct vm *vm, bool destroy)
if (vm->iommu != NULL)
iommu_destroy_domain(vm->iommu);
+ if (destroy)
+ vrtc_cleanup(vm->vrtc);
+ else
+ vrtc_reset(vm->vrtc);
vpmtmr_cleanup(vm->vpmtmr);
vatpit_cleanup(vm->vatpit);
vhpet_cleanup(vm->vhpet);
@@ -841,16 +852,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
}
int
-vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
+vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
{
+ struct vcpu *vcpu;
+ int error;
- if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
- return (VMSETREG(vm->cookie, vcpu, reg, val));
+ error = VMSETREG(vm->cookie, vcpuid, reg, val);
+ if (error || reg != VM_REG_GUEST_RIP)
+ return (error);
+
+ /* Set 'nextrip' to match the value of %rip */
+ VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu->nextrip = val;
+ return (0);
}
static boolean_t
@@ -1102,7 +1123,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vcpu *vcpu;
const char *wmesg;
- int error, t, vcpu_halted, vm_halted;
+ int t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@@ -1110,22 +1131,6 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
vcpu_halted = 0;
vm_halted = 0;
- /*
- * The typical way to halt a cpu is to execute: "sti; hlt"
- *
- * STI sets RFLAGS.IF to enable interrupts. However, the processor
- * remains in an "interrupt shadow" for an additional instruction
- * following the STI. This guarantees that "sti; hlt" sequence is
- * atomic and a pending interrupt will be recognized after the HLT.
- *
- * After the HLT emulation is done the vcpu is no longer in an
- * interrupt shadow and a pending interrupt can be injected on
- * the next entry into the guest.
- */
- error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
- KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
- __func__, error));
-
vcpu_lock(vcpu);
while (1) {
/*
@@ -1206,6 +1211,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
+ KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
+ __func__, vme->inst_length));
+
ftype = vme->u.paging.fault_type;
KASSERT(ftype == VM_PROT_READ ||
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
@@ -1231,9 +1239,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
if (rv != KERN_SUCCESS)
return (EFAULT);
done:
- /* restart execution at the faulting instruction */
- vme->inst_length = 0;
-
return (0);
}
@@ -1288,10 +1293,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (EFAULT);
/*
- * If the instruction length is not specified the update it now.
+ * If the instruction length was not specified then update it now
+ * along with 'nextrip'.
*/
- if (vme->inst_length == 0)
+ if (vme->inst_length == 0) {
vme->inst_length = vie->num_processed;
+ vcpu->nextrip += vie->num_processed;
+ }
/* return to userland unless this is an in-kernel emulated device */
if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
@@ -1440,7 +1448,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
- uint64_t tscval, rip;
+ uint64_t tscval;
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
@@ -1462,7 +1470,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- rip = vmrun->rip;
restart:
critical_enter();
@@ -1477,7 +1484,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
- error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
+ error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@@ -1488,6 +1495,7 @@ restart:
if (error == 0) {
retu = false;
+ vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid, &retu);
@@ -1524,10 +1532,8 @@ restart:
}
}
- if (error == 0 && retu == false) {
- rip = vme->rip + vme->inst_length;
+ if (error == 0 && retu == false)
goto restart;
- }
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
@@ -1535,6 +1541,49 @@ restart:
}
int
+vm_restart_instruction(void *arg, int vcpuid)
+{
+ struct vm *vm;
+ struct vcpu *vcpu;
+ enum vcpu_state state;
+ uint64_t rip;
+ int error;
+
+ vm = arg;
+ if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ state = vcpu_get_state(vm, vcpuid, NULL);
+ if (state == VCPU_RUNNING) {
+ /*
+ * When a vcpu is "running" the next instruction is determined
+ * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
+ * Thus setting 'inst_length' to zero will cause the current
+ * instruction to be restarted.
+ */
+ vcpu->exitinfo.inst_length = 0;
+ VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
+ "setting inst_length to zero", vcpu->exitinfo.rip);
+ } else if (state == VCPU_FROZEN) {
+ /*
+ * When a vcpu is "frozen" it is outside the critical section
+ * around VMRUN() and 'nextrip' points to the next instruction.
+ * Thus instruction restart is achieved by setting 'nextrip'
+ * to the vcpu's %rip.
+ */
+ error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
+ KASSERT(!error, ("%s: error %d getting rip", __func__, error));
+ VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
+ "nextrip from %#lx to %#lx", vcpu->nextrip, rip);
+ vcpu->nextrip = rip;
+ } else {
+ panic("%s: invalid state %d", __func__, state);
+ }
+ return (0);
+}
+
+int
vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
{
struct vcpu *vcpu;
@@ -1664,11 +1713,11 @@ vcpu_exception_intinfo(struct vcpu *vcpu)
uint64_t info = 0;
if (vcpu->exception_pending) {
- info = vcpu->exception.vector & 0xff;
+ info = vcpu->exc_vector & 0xff;
info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
- if (vcpu->exception.error_code_valid) {
+ if (vcpu->exc_errcode_valid) {
info |= VM_INTINFO_DEL_ERRCODE;
- info |= (uint64_t)vcpu->exception.error_code << 32;
+ info |= (uint64_t)vcpu->exc_errcode << 32;
}
}
return (info);
@@ -1693,7 +1742,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
info2 = vcpu_exception_intinfo(vcpu);
vcpu->exception_pending = 0;
VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
- vcpu->exception.vector, info2);
+ vcpu->exc_vector, info2);
}
if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
@@ -1731,14 +1780,16 @@ vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
}
int
-vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
+vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
+ uint32_t errcode, int restart_instruction)
{
struct vcpu *vcpu;
+ int error;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
- if (exception->vector < 0 || exception->vector >= 32)
+ if (vector < 0 || vector >= 32)
return (EINVAL);
/*
@@ -1746,21 +1797,35 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
* the guest. It is a derived exception that results from specific
* combinations of nested faults.
*/
- if (exception->vector == IDT_DF)
+ if (vector == IDT_DF)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
if (vcpu->exception_pending) {
VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
- "pending exception %d", exception->vector,
- vcpu->exception.vector);
+ "pending exception %d", vector, vcpu->exc_vector);
return (EBUSY);
}
+ /*
+ * From section 26.6.1 "Interruptibility State" in Intel SDM:
+ *
+ * Event blocking by "STI" or "MOV SS" is cleared after guest executes
+ * one instruction or incurs an exception.
+ */
+ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+ KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+ __func__, error));
+
+ if (restart_instruction)
+ vm_restart_instruction(vm, vcpuid);
+
vcpu->exception_pending = 1;
- vcpu->exception = *exception;
- VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
+ vcpu->exc_vector = vector;
+ vcpu->exc_errcode = errcode;
+ vcpu->exc_errcode_valid = errcode_valid;
+ VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector);
return (0);
}
@@ -1768,28 +1833,15 @@ void
vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
int errcode)
{
- struct vm_exception exception;
- struct vm_exit *vmexit;
struct vm *vm;
- int error;
+ int error, restart_instruction;
vm = vmarg;
+ restart_instruction = 1;
- exception.vector = vector;
- exception.error_code = errcode;
- exception.error_code_valid = errcode_valid;
- error = vm_inject_exception(vm, vcpuid, &exception);
+ error = vm_inject_exception(vm, vcpuid, vector, errcode_valid,
+ errcode, restart_instruction);
KASSERT(error == 0, ("vm_inject_exception error %d", error));
-
- /*
- * A fault-like exception allows the instruction to be restarted
- * after the exception handler returns.
- *
- * By setting the inst_length to 0 we ensure that the instruction
- * pointer remains at the faulting instruction.
- */
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->inst_length = 0;
}
void
@@ -2223,6 +2275,13 @@ vm_pmtmr(struct vm *vm)
return (vm->vpmtmr);
}
+struct vrtc *
+vm_rtc(struct vm *vm)
+{
+
+ return (vm->vrtc);
+}
+
enum vm_reg_name
vm_segment_name(int seg)
{
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index a85109e..0293d191 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include "io/vatpic.h"
#include "io/vioapic.h"
#include "io/vhpet.h"
+#include "io/vrtc.h"
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
@@ -174,6 +175,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_activate_cpu *vac;
struct vm_cpuset *vm_cpuset;
struct vm_intinfo *vmii;
+ struct vm_rtc_time *rtctime;
+ struct vm_rtc_data *rtcdata;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@@ -202,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_ACTIVATE_CPU:
case VM_SET_INTINFO:
case VM_GET_INTINFO:
+ case VM_RESTART_INSTRUCTION:
/*
* XXX fragile, handle with care
* Assumes that the first field of the ioctl data is the vcpu.
@@ -307,7 +311,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
case VM_INJECT_EXCEPTION:
vmexc = (struct vm_exception *)data;
- error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
+ error = vm_inject_exception(sc->vm, vmexc->cpuid,
+ vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
+ vmexc->restart_instruction);
break;
case VM_INJECT_NMI:
vmnmi = (struct vm_nmi *)data;
@@ -482,6 +488,28 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
&vmii->info2);
break;
+ case VM_RTC_WRITE:
+ rtcdata = (struct vm_rtc_data *)data;
+ error = vrtc_nvram_write(sc->vm, rtcdata->offset,
+ rtcdata->value);
+ break;
+ case VM_RTC_READ:
+ rtcdata = (struct vm_rtc_data *)data;
+ error = vrtc_nvram_read(sc->vm, rtcdata->offset,
+ &rtcdata->value);
+ break;
+ case VM_RTC_SETTIME:
+ rtctime = (struct vm_rtc_time *)data;
+ error = vrtc_set_time(sc->vm, rtctime->secs);
+ break;
+ case VM_RTC_GETTIME:
+ error = 0;
+ rtctime = (struct vm_rtc_time *)data;
+ rtctime->secs = vrtc_get_time(sc->vm);
+ break;
+ case VM_RESTART_INSTRUCTION:
+ error = vm_restart_instruction(sc->vm, vcpu);
+ break;
default:
error = ENOTTY;
break;
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index d1d7173..3db890e 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -70,6 +70,7 @@ enum {
VIE_OP_TYPE_PUSH,
VIE_OP_TYPE_CMP,
VIE_OP_TYPE_POP,
+ VIE_OP_TYPE_MOVS,
VIE_OP_TYPE_LAST
};
@@ -78,6 +79,7 @@ enum {
#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */
#define VIE_OP_F_NO_MODRM (1 << 3)
+#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
static const struct vie_op two_byte_opcodes[256] = {
[0xB6] = {
@@ -133,6 +135,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOV,
.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
},
+ [0xA4] = {
+ .op_byte = 0xA4,
+ .op_type = VIE_OP_TYPE_MOVS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
+ [0xA5] = {
+ .op_byte = 0xA5,
+ .op_type = VIE_OP_TYPE_MOVS,
+ .op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
+ },
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@@ -559,6 +571,217 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
+/*
+ * Helper function to calculate and validate a linear address.
+ *
+ * Returns 0 on success and 1 if an exception was injected into the guest.
+ */
+static int
+get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
+ int opsize, int addrsize, int prot, enum vm_reg_name seg,
+ enum vm_reg_name gpr, uint64_t *gla)
+{
+ struct seg_desc desc;
+ uint64_t cr0, val, rflags;
+ int error;
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
+ KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
+ KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
+ __func__, error, seg));
+
+ error = vie_read_register(vm, vcpuid, gpr, &val);
+ KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
+ error, gpr));
+
+ if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
+ addrsize, prot, gla)) {
+ if (seg == VM_REG_GUEST_SS)
+ vm_inject_ss(vm, vcpuid, 0);
+ else
+ vm_inject_gp(vm, vcpuid);
+ return (1);
+ }
+
+ if (vie_canonical_check(paging->cpu_mode, *gla)) {
+ if (seg == VM_REG_GUEST_SS)
+ vm_inject_ss(vm, vcpuid, 0);
+ else
+ vm_inject_gp(vm, vcpuid);
+ return (1);
+ }
+
+ if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
+ vm_inject_ac(vm, vcpuid, 0);
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ struct vm_guest_paging *paging, mem_region_read_t memread,
+ mem_region_write_t memwrite, void *arg)
+{
+#ifdef _KERNEL
+ struct vm_copyinfo copyinfo[2];
+#else
+ struct iovec copyinfo[2];
+#endif
+ uint64_t dstaddr, srcaddr, val;
+ uint64_t rcx, rdi, rsi, rflags;
+ int error, opsize, seg, repeat;
+
+ opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
+ val = 0;
+ error = 0;
+
+ /*
+ * XXX although the MOVS instruction is only supposed to be used with
+ * the "rep" prefix some guests like FreeBSD will use "repnz" instead.
+ *
+ * Empirically the "repnz" prefix has identical behavior to "rep"
+ * and the zero flag does not make a difference.
+ */
+ repeat = vie->repz_present | vie->repnz_present;
+
+ if (repeat) {
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
+ KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
+
+ /*
+ * The count register is %rcx, %ecx or %cx depending on the
+ * address size of the instruction.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) == 0)
+ return (0);
+ }
+
+ /*
+ * Source Destination Comments
+ * --------------------------------------------
+ * (1) memory memory n/a
+ * (2) memory mmio emulated
+ * (3) mmio memory emulated
+ * (4) mmio mmio not emulated
+ *
+ * At this point we don't have sufficient information to distinguish
+ * between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
+ * out because it will succeed only when operating on regular memory.
+ *
+ * XXX the emulation doesn't properly handle the case where 'gpa'
+ * is straddling the boundary between the normal memory and MMIO.
+ */
+
+ seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
+ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+ PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr);
+ if (error)
+ goto done;
+
+ error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
+ copyinfo, nitems(copyinfo));
+ if (error == 0) {
+ /*
+ * case (2): read from system memory and write to mmio.
+ */
+ vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+ error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
+ goto done;
+ } else if (error > 0) {
+ /*
+ * Resume guest execution to handle fault.
+ */
+ goto done;
+ } else {
+ /*
+ * 'vm_copy_setup()' is expected to fail for cases (3) and (4)
+ * if 'srcaddr' is in the mmio space.
+ */
+ }
+
+ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
+ PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
+ if (error)
+ goto done;
+
+ error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
+ PROT_WRITE, copyinfo, nitems(copyinfo));
+ if (error == 0) {
+ /*
+ * case (3): read from MMIO and write to system memory.
+ *
+ * A MMIO read can have side-effects so we commit to it
+ * only after vm_copy_setup() is successful. If a page-fault
+ * needs to be injected into the guest then it will happen
+ * before the MMIO read is attempted.
+ */
+ error = memread(vm, vcpuid, gpa, &val, opsize, arg);
+ if (error)
+ goto done;
+
+ vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
+ vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
+ } else if (error > 0) {
+ /*
+ * Resume guest execution to handle fault.
+ */
+ goto done;
+ } else {
+ goto done;
+ }
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
+ KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
+ KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
+
+ if (rflags & PSL_D) {
+ rsi -= opsize;
+ rdi -= opsize;
+ } else {
+ rsi += opsize;
+ rdi += opsize;
+ }
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
+ vie->addrsize);
+ KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
+
+ if (repeat) {
+ rcx = rcx - 1;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
+ rcx, vie->addrsize);
+ KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
+
+ /*
+ * Repeat the instruction if the count register is not zero.
+ */
+ if ((rcx & vie_size2mask(vie->addrsize)) != 0)
+ vm_restart_instruction(vm, vcpuid);
+ }
+done:
+ if (error < 0)
+ return (EFAULT);
+ else
+ return (0);
+}
+
static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -926,9 +1149,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
rsp += size;
}
-#ifdef _KERNEL
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
-#endif
if (error == 0) {
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
@@ -1012,6 +1233,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movx(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_MOVS:
+ error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
+ memwrite, memarg);
+ break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@@ -1193,6 +1418,7 @@ vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
+ vie->segment_register = VM_REG_LAST;
if (inst_length) {
bcopy(inst_bytes, vie->inst, inst_length);
@@ -1458,6 +1684,35 @@ vie_advance(struct vie *vie)
vie->num_processed++;
}
+static bool
+segment_override(uint8_t x, int *seg)
+{
+
+ switch (x) {
+ case 0x2E:
+ *seg = VM_REG_GUEST_CS;
+ break;
+ case 0x36:
+ *seg = VM_REG_GUEST_SS;
+ break;
+ case 0x3E:
+ *seg = VM_REG_GUEST_DS;
+ break;
+ case 0x26:
+ *seg = VM_REG_GUEST_ES;
+ break;
+ case 0x64:
+ *seg = VM_REG_GUEST_FS;
+ break;
+ case 0x65:
+ *seg = VM_REG_GUEST_GS;
+ break;
+ default:
+ return (false);
+ }
+ return (true);
+}
+
static int
decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
{
@@ -1471,6 +1726,12 @@ decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
vie->opsize_override = 1;
else if (x == 0x67)
vie->addrsize_override = 1;
+ else if (x == 0xF3)
+ vie->repz_present = 1;
+ else if (x == 0xF2)
+ vie->repnz_present = 1;
+ else if (segment_override(x, &vie->segment_register))
+ vie->segment_override = 1;
else
break;
@@ -1923,8 +2184,10 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
if (verify_inst_length(vie))
return (-1);
- if (verify_gla(vm, cpuid, gla, vie))
- return (-1);
+ if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
+ if (verify_gla(vm, cpuid, gla, vie))
+ return (-1);
+ }
vie->decoded = 1; /* success */
diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
index e553599..fc68a61 100644
--- a/sys/amd64/vmm/vmm_ioport.c
+++ b/sys/amd64/vmm/vmm_ioport.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "vatpic.h"
#include "vatpit.h"
#include "vpmtmr.h"
+#include "vrtc.h"
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@@ -60,6 +61,8 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[IO_ELCR1] = vatpic_elc_handler,
[IO_ELCR2] = vatpic_elc_handler,
[IO_PMTMR] = vpmtmr_handler,
+ [IO_RTC] = vrtc_addr_handler,
+ [IO_RTC + 1] = vrtc_data_handler,
};
#ifdef KTR
@@ -71,7 +74,7 @@ inout_instruction(struct vm_exit *vmexit)
static const char *iodesc[] = {
"outb", "outw", "outl",
"inb", "inw", "inl",
- "outsb", "outsw", "outsd"
+ "outsb", "outsw", "outsd",
"insb", "insw", "insd",
};
diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
index 6aeaf80..6e1cf7f 100644
--- a/sys/modules/vmm/Makefile
+++ b/sys/modules/vmm/Makefile
@@ -35,7 +35,8 @@ SRCS+= iommu.c \
vhpet.c \
vioapic.c \
vlapic.c \
- vpmtmr.c
+ vpmtmr.c \
+ vrtc.c
# intel-specific files
.PATH: ${.CURDIR}/../../amd64/vmm/intel
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index 8de0989..e15f9ac 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -32,7 +32,7 @@
.Nd "run a guest operating system inside a virtual machine"
.Sh SYNOPSIS
.Nm
-.Op Fl abehwxACHPWY
+.Op Fl abehuwxACHPWY
.Op Fl c Ar numcpus
.Op Fl g Ar gdbport
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
@@ -239,6 +239,8 @@ The host device must have been reserved at boot-time using the
loader variable as described in
.Xr vmm 4 .
.El
+.It Fl u
+RTC keeps UTC time.
.It Fl U Ar uuid
Set the universally unique identifier
.Pq UUID
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 5971993..97ed046 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -122,7 +122,7 @@ usage(int code)
{
fprintf(stderr,
- "Usage: %s [-abehwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+ "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
@@ -137,6 +137,7 @@ usage(int code)
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
+ " -u: RTC keeps UTC time\n"
" -U: uuid\n"
" -w: ignore unimplemented MSRs\n"
" -W: force virtio to use single-vector MSI\n"
@@ -185,20 +186,14 @@ vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
int errcode)
{
struct vmctx *ctx;
- int error;
+ int error, restart_instruction;
ctx = arg;
- if (errcode_valid)
- error = vm_inject_exception2(ctx, vcpu, vector, errcode);
- else
- error = vm_inject_exception(ctx, vcpu, vector);
- assert(error == 0);
+ restart_instruction = 1;
- /*
- * Set the instruction length to 0 to ensure that the instruction is
- * restarted when the fault handler returns.
- */
- vmexit[vcpu].inst_length = 0;
+ error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
+ restart_instruction);
+ assert(error == 0);
}
void *
@@ -329,12 +324,6 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
}
error = emulate_inout(ctx, vcpu, vme, strictio);
- if (!error && in && !string) {
- error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
- vme->u.inout.eax);
- assert(error == 0);
- }
-
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
@@ -358,7 +347,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
}
@@ -384,7 +373,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, vme->u.msr.wval, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
}
return (VMEXIT_CONTINUE);
@@ -462,9 +451,11 @@ static int
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
+ assert(vmexit->inst_length == 0);
+
stats.vmexit_bogus++;
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
static int
@@ -494,9 +485,11 @@ static int
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
+ assert(vmexit->inst_length == 0);
+
stats.vmexit_mtrap++;
- return (VMEXIT_RESTART);
+ return (VMEXIT_CONTINUE);
}
static int
@@ -581,7 +574,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
};
static void
-vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
+vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
{
int error, rc, prevcpu;
enum vm_exitcode exitcode;
@@ -596,8 +589,11 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
error = vm_active_cpus(ctx, &active_cpus);
assert(CPU_ISSET(vcpu, &active_cpus));
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
+ assert(error == 0);
+
while (1) {
- error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
+ error = vm_run(ctx, vcpu, &vmexit[vcpu]);
if (error != 0)
break;
@@ -614,10 +610,6 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
switch (rc) {
case VMEXIT_CONTINUE:
- rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
- break;
- case VMEXIT_RESTART:
- rip = vmexit[vcpu].rip;
break;
case VMEXIT_ABORT:
abort();
@@ -694,6 +686,7 @@ main(int argc, char *argv[])
{
int c, error, gdb_port, err, bvmcons;
int dump_guest_memory, max_vcpus, mptgen;
+ int rtc_localtime;
struct vmctx *ctx;
uint64_t rip;
size_t memsize;
@@ -705,8 +698,9 @@ main(int argc, char *argv[])
guest_ncpus = 1;
memsize = 256 * MB;
mptgen = 1;
+ rtc_localtime = 1;
- while ((c = getopt(argc, argv, "abehwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
+ while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
switch (c) {
case 'a':
x2apic_mode = 0;
@@ -766,6 +760,9 @@ main(int argc, char *argv[])
case 'e':
strictio = 1;
break;
+ case 'u':
+ rtc_localtime = 0;
+ break;
case 'U':
guest_uuid_str = optarg;
break;
@@ -829,7 +826,7 @@ main(int argc, char *argv[])
pci_irq_init(ctx);
ioapic_init(ctx);
- rtc_init(ctx);
+ rtc_init(ctx, rtc_localtime);
sci_init(ctx);
/*
diff --git a/usr.sbin/bhyve/bhyverun.h b/usr.sbin/bhyve/bhyverun.h
index 87824ef..c51bf48 100644
--- a/usr.sbin/bhyve/bhyverun.h
+++ b/usr.sbin/bhyve/bhyverun.h
@@ -35,9 +35,8 @@
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
#endif
-#define VMEXIT_CONTINUE 1 /* continue from next instruction */
-#define VMEXIT_RESTART 2 /* restart current instruction */
-#define VMEXIT_ABORT 3 /* abort the vm run loop */
+#define VMEXIT_CONTINUE (0)
+#define VMEXIT_ABORT (-1)
struct vmctx;
extern int guest_ncpus;
diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c
index 1041a59..402b953 100644
--- a/usr.sbin/bhyve/inout.c
+++ b/usr.sbin/bhyve/inout.c
@@ -104,7 +104,7 @@ int
emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
{
int addrsize, bytes, flags, in, port, prot, rep;
- uint32_t val;
+ uint32_t eax, val;
inout_func_t handler;
void *arg;
int error, retval;
@@ -214,16 +214,20 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
}
/* Restart the instruction if more iterations remain */
- if (retval == 0 && count != 0)
- vmexit->inst_length = 0;
- } else {
- if (!in) {
- val = vmexit->u.inout.eax & vie_size2mask(bytes);
+ if (retval == 0 && count != 0) {
+ error = vm_restart_instruction(ctx, vcpu);
+ assert(error == 0);
}
+ } else {
+ eax = vmexit->u.inout.eax;
+ val = eax & vie_size2mask(bytes);
retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
if (retval == 0 && in) {
- vmexit->u.inout.eax &= ~vie_size2mask(bytes);
- vmexit->u.inout.eax |= val & vie_size2mask(bytes);
+ eax &= ~vie_size2mask(bytes);
+ eax |= val & vie_size2mask(bytes);
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
+ eax);
+ assert(error == 0);
}
}
return (retval);
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 9dbabcd..31e02f8 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -2299,7 +2299,8 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
open_fail:
if (ret) {
- blockif_close(sc->port[0].bctx);
+ if (sc->port[0].bctx != NULL)
+ blockif_close(sc->port[0].bctx);
free(sc);
}
diff --git a/usr.sbin/bhyve/rtc.c b/usr.sbin/bhyve/rtc.c
index 459c900..5c70154 100644
--- a/usr.sbin/bhyve/rtc.c
+++ b/usr.sbin/bhyve/rtc.c
@@ -30,10 +30,7 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
-#include <sys/time.h>
-#include <stdio.h>
-#include <string.h>
#include <time.h>
#include <assert.h>
@@ -41,47 +38,11 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "acpi.h"
-#include "inout.h"
#include "pci_lpc.h"
#include "rtc.h"
-#define IO_RTC 0x70
+#define IO_RTC 0x70
-#define RTC_SEC 0x00 /* seconds */
-#define RTC_SEC_ALARM 0x01
-#define RTC_MIN 0x02
-#define RTC_MIN_ALARM 0x03
-#define RTC_HRS 0x04
-#define RTC_HRS_ALARM 0x05
-#define RTC_WDAY 0x06
-#define RTC_DAY 0x07
-#define RTC_MONTH 0x08
-#define RTC_YEAR 0x09
-#define RTC_CENTURY 0x32 /* current century */
-
-#define RTC_STATUSA 0xA
-#define RTCSA_TUP 0x80 /* time update, don't look now */
-
-#define RTC_STATUSB 0xB
-#define RTCSB_DST 0x01
-#define RTCSB_24HR 0x02
-#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */
-#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
-#define RTCSB_HALT 0x80 /* stop clock updates */
-
-#define RTC_INTR 0x0c /* status register C (R) interrupt source */
-
-#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
-#define RTCSD_PWR 0x80 /* clock power OK */
-
-#define RTC_NVRAM_START 0x0e
-#define RTC_NVRAM_END 0x7f
-#define RTC_NVRAM_SZ (128 - RTC_NVRAM_START)
-#define nvoff(x) ((x) - RTC_NVRAM_START)
-
-#define RTC_DIAG 0x0e
-#define RTC_RSTCODE 0x0f
-#define RTC_EQUIPMENT 0x14
#define RTC_LMEM_LSB 0x34
#define RTC_LMEM_MSB 0x35
#define RTC_HMEM_LSB 0x5b
@@ -92,249 +53,30 @@ __FBSDID("$FreeBSD$");
#define m_16MB (16*1024*1024)
#define m_4GB (4ULL*1024*1024*1024)
-static int addr;
-
-static uint8_t rtc_nvram[RTC_NVRAM_SZ];
-
-/* XXX initialize these to default values as they would be from BIOS */
-static uint8_t status_a, status_b;
-
-static struct {
- uint8_t hours;
- uint8_t mins;
- uint8_t secs;
-} rtc_alarm;
-
-static u_char const bin2bcd_data[] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
- 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
- 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
- 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
- 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
- 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
-};
-#define bin2bcd(bin) (bin2bcd_data[bin])
-
-#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
-
-static void
-timevalfix(struct timeval *t1)
-{
-
- if (t1->tv_usec < 0) {
- t1->tv_sec--;
- t1->tv_usec += 1000000;
- }
- if (t1->tv_usec >= 1000000) {
- t1->tv_sec++;
- t1->tv_usec -= 1000000;
- }
-}
-
-static void
-timevalsub(struct timeval *t1, const struct timeval *t2)
-{
-
- t1->tv_sec -= t2->tv_sec;
- t1->tv_usec -= t2->tv_usec;
- timevalfix(t1);
-}
-
-static int
-rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
- uint32_t *eax, void *arg)
-{
- if (bytes != 1)
- return (-1);
-
- if (in) {
- /* straight read of this register will return 0xFF */
- *eax = 0xff;
- return (0);
- }
-
- switch (*eax & 0x7f) {
- case RTC_SEC:
- case RTC_SEC_ALARM:
- case RTC_MIN:
- case RTC_MIN_ALARM:
- case RTC_HRS:
- case RTC_HRS_ALARM:
- case RTC_WDAY:
- case RTC_DAY:
- case RTC_MONTH:
- case RTC_YEAR:
- case RTC_STATUSA:
- case RTC_STATUSB:
- case RTC_INTR:
- case RTC_STATUSD:
- case RTC_NVRAM_START ... RTC_NVRAM_END:
- break;
- default:
- return (-1);
- }
-
- addr = *eax & 0x7f;
- return (0);
-}
-
-static int
-rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
- uint32_t *eax, void *arg)
+/*
+ * Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
+ */
+static time_t
+rtc_time(struct vmctx *ctx, int use_localtime)
{
- int hour;
+ struct tm tm;
time_t t;
- struct timeval cur, delta;
-
- static struct timeval last;
- static struct tm tm;
-
- if (bytes != 1)
- return (-1);
-
- gettimeofday(&cur, NULL);
- /*
- * Increment the cached time only once per second so we can guarantee
- * that the guest has at least one second to read the hour:min:sec
- * separately and still get a coherent view of the time.
- */
- delta = cur;
- timevalsub(&delta, &last);
- if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
- t = cur.tv_sec;
+ time(&t);
+ if (use_localtime) {
localtime_r(&t, &tm);
- last = cur;
+ t = timegm(&tm);
}
-
- if (in) {
- switch (addr) {
- case RTC_SEC_ALARM:
- *eax = rtc_alarm.secs;
- break;
- case RTC_MIN_ALARM:
- *eax = rtc_alarm.mins;
- break;
- case RTC_HRS_ALARM:
- *eax = rtc_alarm.hours;
- break;
- case RTC_SEC:
- *eax = rtcout(tm.tm_sec);
- return (0);
- case RTC_MIN:
- *eax = rtcout(tm.tm_min);
- return (0);
- case RTC_HRS:
- if (status_b & RTCSB_24HR)
- hour = tm.tm_hour;
- else
- hour = (tm.tm_hour % 12) + 1;
-
- *eax = rtcout(hour);
-
- /*
- * If we are representing time in the 12-hour format
- * then set the MSB to indicate PM.
- */
- if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
- *eax |= 0x80;
-
- return (0);
- case RTC_WDAY:
- *eax = rtcout(tm.tm_wday + 1);
- return (0);
- case RTC_DAY:
- *eax = rtcout(tm.tm_mday);
- return (0);
- case RTC_MONTH:
- *eax = rtcout(tm.tm_mon + 1);
- return (0);
- case RTC_YEAR:
- *eax = rtcout(tm.tm_year % 100);
- return (0);
- case RTC_STATUSA:
- *eax = status_a;
- return (0);
- case RTC_STATUSB:
- *eax = status_b;
- return (0);
- case RTC_INTR:
- *eax = 0;
- return (0);
- case RTC_STATUSD:
- *eax = RTCSD_PWR;
- return (0);
- case RTC_NVRAM_START ... RTC_NVRAM_END:
- *eax = rtc_nvram[addr - RTC_NVRAM_START];
- return (0);
- default:
- return (-1);
- }
- }
-
- switch (addr) {
- case RTC_STATUSA:
- status_a = *eax & ~RTCSA_TUP;
- break;
- case RTC_STATUSB:
- /* XXX not implemented yet XXX */
- if (*eax & RTCSB_PINTR)
- return (-1);
- status_b = *eax;
- break;
- case RTC_STATUSD:
- /* ignore write */
- break;
- case RTC_SEC_ALARM:
- rtc_alarm.secs = *eax;
- break;
- case RTC_MIN_ALARM:
- rtc_alarm.mins = *eax;
- break;
- case RTC_HRS_ALARM:
- rtc_alarm.hours = *eax;
- break;
- case RTC_SEC:
- case RTC_MIN:
- case RTC_HRS:
- case RTC_WDAY:
- case RTC_DAY:
- case RTC_MONTH:
- case RTC_YEAR:
- /*
- * Ignore writes to the time of day registers
- */
- break;
- case RTC_NVRAM_START ... RTC_NVRAM_END:
- rtc_nvram[addr - RTC_NVRAM_START] = *eax;
- break;
- default:
- return (-1);
- }
- return (0);
+ return (t);
}
void
-rtc_init(struct vmctx *ctx)
+rtc_init(struct vmctx *ctx, int use_localtime)
{
- struct timeval cur;
- struct tm tm;
size_t himem;
size_t lomem;
int err;
- err = gettimeofday(&cur, NULL);
- assert(err == 0);
- (void) localtime_r(&cur.tv_sec, &tm);
-
- memset(rtc_nvram, 0, sizeof(rtc_nvram));
-
- rtc_nvram[nvoff(RTC_CENTURY)] = bin2bcd((tm.tm_year + 1900) / 100);
-
/* XXX init diag/reset code/equipment/checksum ? */
/*
@@ -344,17 +86,22 @@ rtc_init(struct vmctx *ctx)
* 0x5b/0x5c/0x5d - 64KB chunks above 4GB
*/
lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
- rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem;
- rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8;
+ err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem);
+ assert(err == 0);
+ err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8);
+ assert(err == 0);
himem = vm_get_highmem_size(ctx) / m_64KB;
- rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem;
- rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8;
- rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16;
-}
+ err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem);
+ assert(err == 0);
+ err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8);
+ assert(err == 0);
+ err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16);
+ assert(err == 0);
-INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler);
-INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
+ err = vm_rtc_settime(ctx, rtc_time(ctx, use_localtime));
+ assert(err == 0);
+}
static void
rtc_dsdt(void)
diff --git a/usr.sbin/bhyve/rtc.h b/usr.sbin/bhyve/rtc.h
index 72cffb3..5b08ca3 100644
--- a/usr.sbin/bhyve/rtc.h
+++ b/usr.sbin/bhyve/rtc.h
@@ -29,6 +29,6 @@
#ifndef _RTC_H_
#define _RTC_H_
-void rtc_init(struct vmctx *ctx);
+void rtc_init(struct vmctx *ctx, int use_localtime);
#endif /* _RTC_H_ */
diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c
index b939c1a..ba6a9d2 100644
--- a/usr.sbin/bhyve/task_switch.c
+++ b/usr.sbin/bhyve/task_switch.c
@@ -725,21 +725,11 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
/*
- * Calculate the %eip to store in the old TSS before modifying the
- * 'inst_length'.
+ * Calculate the instruction pointer to store in the old TSS.
*/
eip = vmexit->rip + vmexit->inst_length;
/*
- * Set the 'inst_length' to '0'.
- *
- * If an exception is triggered during emulation of the task switch
- * then the exception handler should return to the instruction that
- * caused the task switch as opposed to the subsequent instruction.
- */
- vmexit->inst_length = 0;
-
- /*
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
* The following page table accesses are implicitly supervisor mode:
* - accesses to GDT or LDT to load segment descriptors
@@ -883,8 +873,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
* after this point will be handled in the context of the new task and
* the saved instruction pointer will belong to the new task.
*/
- vmexit->rip = newtss.tss_eip;
- assert(vmexit->inst_length == 0);
+ error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
+ assert(error == 0);
/* Load processor state from new TSS */
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
diff --git a/usr.sbin/bhyve/xmsr.c b/usr.sbin/bhyve/xmsr.c
index d50a939..5b7bfbb 100644
--- a/usr.sbin/bhyve/xmsr.c
+++ b/usr.sbin/bhyve/xmsr.c
@@ -185,6 +185,15 @@ emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
*val = 0;
break;
+ /*
+ * OpenBSD guests test bit 0 of this MSR to detect if the
+ * workaround for erratum 721 is already applied.
+ * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf
+ */
+ case 0xC0011029:
+ *val = 1;
+ break;
+
default:
error = -1;
break;
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 0c4457e..e2b514d 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <fcntl.h>
#include <string.h>
#include <getopt.h>
+#include <time.h>
#include <assert.h>
#include <machine/cpufunc.h>
@@ -157,6 +158,11 @@ usage(bool cpu_intel)
" [--inject-nmi]\n"
" [--force-reset]\n"
" [--force-poweroff]\n"
+ " [--get-rtc-time]\n"
+ " [--set-rtc-time=<secs>]\n"
+ " [--get-rtc-nvram]\n"
+ " [--set-rtc-nvram=<val>]\n"
+ " [--rtc-nvram-offset=<offset>]\n"
" [--get-active-cpus]\n"
" [--get-suspended-cpus]\n"
" [--get-intinfo]\n"
@@ -220,6 +226,12 @@ usage(bool cpu_intel)
exit(1);
}
+static int get_rtc_time, set_rtc_time;
+static int get_rtc_nvram, set_rtc_nvram;
+static int rtc_nvram_offset;
+static uint8_t rtc_nvram_value;
+static time_t rtc_secs;
+
static int get_stats, getcap, setcap, capval, get_gpa_pmap;
static int inject_nmi, assert_lapic_lvt;
static int force_reset, force_poweroff;
@@ -545,6 +557,9 @@ enum {
UNASSIGN_PPTDEV,
GET_GPA_PMAP,
ASSERT_LAPIC_LVT,
+ SET_RTC_TIME,
+ SET_RTC_NVRAM,
+ RTC_NVRAM_OFFSET,
};
static void
@@ -1269,6 +1284,11 @@ setup_options(bool cpu_intel)
{ "setcap", REQ_ARG, 0, SET_CAP },
{ "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP },
{ "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT },
+ { "get-rtc-time", NO_ARG, &get_rtc_time, 1 },
+ { "set-rtc-time", REQ_ARG, 0, SET_RTC_TIME },
+ { "rtc-nvram-offset", REQ_ARG, 0, RTC_NVRAM_OFFSET },
+ { "get-rtc-nvram", NO_ARG, &get_rtc_nvram, 1 },
+ { "set-rtc-nvram", REQ_ARG, 0, SET_RTC_NVRAM },
{ "getcap", NO_ARG, &getcap, 1 },
{ "get-stats", NO_ARG, &get_stats, 1 },
{ "get-desc-ds",NO_ARG, &get_desc_ds, 1 },
@@ -1462,6 +1482,33 @@ setup_options(bool cpu_intel)
return (all_opts);
}
+static const char *
+wday_str(int idx)
+{
+ static const char *weekdays[] = {
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
+ };
+
+ if (idx >= 0 && idx < 7)
+ return (weekdays[idx]);
+ else
+ return ("UNK");
+}
+
+static const char *
+mon_str(int idx)
+{
+ static const char *months[] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+ };
+
+ if (idx >= 0 && idx < 12)
+ return (months[idx]);
+ else
+ return ("UNK");
+}
+
int
main(int argc, char *argv[])
{
@@ -1477,6 +1524,7 @@ main(int argc, char *argv[])
cpuset_t cpus;
bool cpu_intel;
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
+ struct tm tm;
struct option *opts;
cpu_intel = cpu_vendor_intel();
@@ -1594,6 +1642,17 @@ main(int argc, char *argv[])
capval = strtoul(optarg, NULL, 0);
setcap = 1;
break;
+ case SET_RTC_TIME:
+ rtc_secs = strtoul(optarg, NULL, 0);
+ set_rtc_time = 1;
+ break;
+ case SET_RTC_NVRAM:
+ rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0);
+ set_rtc_nvram = 1;
+ break;
+ case RTC_NVRAM_OFFSET:
+ rtc_nvram_offset = strtoul(optarg, NULL, 0);
+ break;
case GET_GPA_PMAP:
gpa_pmap = strtoul(optarg, NULL, 0);
get_gpa_pmap = 1;
@@ -1971,6 +2030,31 @@ main(int argc, char *argv[])
}
}
+ if (!error && set_rtc_nvram)
+ error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value);
+
+ if (!error && (get_rtc_nvram || get_all)) {
+ error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value);
+ if (error == 0) {
+ printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset,
+ rtc_nvram_value);
+ }
+ }
+
+ if (!error && set_rtc_time)
+ error = vm_rtc_settime(ctx, rtc_secs);
+
+ if (!error && (get_rtc_time || get_all)) {
+ error = vm_rtc_gettime(ctx, &rtc_secs);
+ if (error == 0) {
+ gmtime_r(&rtc_secs, &tm);
+ printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n",
+ rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon),
+ tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
+ 1900 + tm.tm_year);
+ }
+ }
+
if (!error && (getcap || get_all)) {
int captype, val, getcaptype;
@@ -2034,10 +2118,7 @@ main(int argc, char *argv[])
}
if (!error && run) {
- error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
- assert(error == 0);
-
- error = vm_run(ctx, vcpu, rip, &vmexit);
+ error = vm_run(ctx, vcpu, &vmexit);
if (error == 0)
dump_vm_run_exitcode(&vmexit, vcpu);
else
OpenPOWER on IntegriCloud