summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgrehan <grehan@FreeBSD.org>2016-02-01 14:56:11 +0000
committergrehan <grehan@FreeBSD.org>2016-02-01 14:56:11 +0000
commit83c1d10f0ce6085d288212702d5b72432e3a184b (patch)
tree221f5a6c07cf26f275b8166b1d46c75f5afdaa8b
parent9c4de571a758cb693a9c36eefd957350944f11b2 (diff)
downloadFreeBSD-src-83c1d10f0ce6085d288212702d5b72432e3a184b.zip
FreeBSD-src-83c1d10f0ce6085d288212702d5b72432e3a184b.tar.gz
MFC r284539, r284630, r284688, r284877, r285217, r285218,
r286837, r286838, r288470, r288522, r288524, r288826, r289001 Pull in bhyve bug fixes and changes to allow UEFI booting. This provides Windows support. Tested on Intel and AMD with: - Arch Linux i386+amd64 (kernel 4.3.3) - Ubuntu 15.10 server 64-bit - FreeBSD-CURRENT/amd64 20160127 snap - FreeBSD 10.2 i386+amd64 - OpenBSD 5.8 i386+amd64 - SmartOS latest - Windows 10 build 1511' Huge thanks to Yamagi Burmeister who submitted the patch and did the majority of the testing. r284539 - bootrom mem allocation support r284630 - Add SO_REUSEADDR when starting debug port r284688 - Fix a regression in "movs" emulation r284877 - verify_gla() non-zero segment base fix r285217 - Always assert DCD and DSR in the uart r285218 - devmem nodes moved to /dev/vmm.io/ r286837 - Add define for SATA Check-Power-Mode r286838 - Add simple (no-op) SATA cmd emulations r288470 - Increase virtio-blk indirect descs r288522 - Firmware guest query interface r288524 - Fix post-test typo r288826 - Clean up SATA unimplemented cmd msg r289001 - Add -l option to specify userboot path Submitted by: Yamagi Burmeister Approved by: re (kib)
-rw-r--r--lib/libvmmapi/vmmapi.c323
-rw-r--r--lib/libvmmapi/vmmapi.h52
-rwxr-xr-xshare/examples/bhyve/vmrun.sh22
-rw-r--r--sys/amd64/include/vmm.h35
-rw-r--r--sys/amd64/include/vmm_dev.h38
-rw-r--r--sys/amd64/vmm/amd/svm.c2
-rw-r--r--sys/amd64/vmm/intel/vmx.c2
-rw-r--r--sys/amd64/vmm/io/ppt.c16
-rw-r--r--sys/amd64/vmm/vmm.c471
-rw-r--r--sys/amd64/vmm/vmm_dev.c398
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c63
-rw-r--r--sys/amd64/vmm/vmm_mem.c32
-rw-r--r--sys/amd64/vmm/vmm_mem.h2
-rw-r--r--sys/sys/ata.h1
-rw-r--r--usr.sbin/bhyve/Makefile2
-rw-r--r--usr.sbin/bhyve/bhyve.828
-rw-r--r--usr.sbin/bhyve/bhyverun.c97
-rw-r--r--usr.sbin/bhyve/bootrom.c111
-rw-r--r--usr.sbin/bhyve/bootrom.h38
-rw-r--r--usr.sbin/bhyve/dbgport.c9
-rw-r--r--usr.sbin/bhyve/fwctl.c549
-rw-r--r--usr.sbin/bhyve/fwctl.h54
-rw-r--r--usr.sbin/bhyve/pci_ahci.c10
-rw-r--r--usr.sbin/bhyve/pci_lpc.c27
-rw-r--r--usr.sbin/bhyve/pci_lpc.h1
-rw-r--r--usr.sbin/bhyve/pci_passthru.c8
-rw-r--r--usr.sbin/bhyve/pci_virtio_net.c4
-rw-r--r--usr.sbin/bhyve/uart_emul.c51
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c111
-rw-r--r--usr.sbin/bhyveload/bhyveload.822
-rw-r--r--usr.sbin/bhyveload/bhyveload.c48
31 files changed, 2170 insertions, 457 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 1e6e627..fb8eb78 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$");
#define MB (1024 * 1024UL)
#define GB (1024 * 1024 * 1024UL)
+/*
+ * Size of the guard region before and after the virtual address space
+ * mapping the guest physical memory. This must be a multiple of the
+ * superpage size for performance reasons.
+ */
+#define VM_MMAP_GUARD_SIZE (4 * MB)
+
+#define PROT_RW (PROT_READ | PROT_WRITE)
+#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
+
struct vmctx {
int fd;
uint32_t lowmem_limit;
- enum vm_mmap_style vms;
int memflags;
size_t lowmem;
- char *lowmem_addr;
size_t highmem;
- char *highmem_addr;
+ char *baseaddr;
char *name;
};
@@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, size_t *ret_memsize)
return (error);
}
-int
-vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
- int *wired)
-{
- int error;
- struct vm_memory_segment seg;
-
- bzero(&seg, sizeof(seg));
- seg.gpa = gpa;
- error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
- *ret_len = seg.len;
- if (wired != NULL)
- *wired = seg.wired;
- return (error);
-}
-
uint32_t
vm_get_lowmem_limit(struct vmctx *ctx)
{
@@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int flags)
ctx->memflags = flags;
}
+int
+vm_get_memflags(struct vmctx *ctx)
+{
+
+ return (ctx->memflags);
+}
+
+/*
+ * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
+ */
+int
+vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot)
+{
+ struct vm_memmap memmap;
+ int error, flags;
+
+ memmap.gpa = gpa;
+ memmap.segid = segid;
+ memmap.segoff = off;
+ memmap.len = len;
+ memmap.prot = prot;
+ memmap.flags = 0;
+
+ if (ctx->memflags & VM_MEM_F_WIRED)
+ memmap.flags |= VM_MEMMAP_F_WIRED;
+
+ /*
+ * If this mapping already exists then don't create it again. This
+ * is the common case for SYSMEM mappings created by bhyveload(8).
+ */
+ error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
+ if (error == 0 && gpa == memmap.gpa) {
+ if (segid != memmap.segid || off != memmap.segoff ||
+ prot != memmap.prot || flags != memmap.flags) {
+ errno = EEXIST;
+ return (-1);
+ } else {
+ return (0);
+ }
+ }
+
+ error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
+ return (error);
+}
+
+int
+vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct vm_memmap memmap;
+ int error;
+
+ bzero(&memmap, sizeof(struct vm_memmap));
+ memmap.gpa = *gpa;
+ error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
+ if (error == 0) {
+ *gpa = memmap.gpa;
+ *segid = memmap.segid;
+ *segoff = memmap.segoff;
+ *len = memmap.len;
+ *prot = memmap.prot;
+ *flags = memmap.flags;
+ }
+ return (error);
+}
+
+/*
+ * Return 0 if the segments are identical and non-zero otherwise.
+ *
+ * This is slightly complicated by the fact that only device memory segments
+ * are named.
+ */
+static int
+cmpseg(size_t len, const char *str, size_t len2, const char *str2)
+{
+
+ if (len == len2) {
+ if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
+ return (0);
+ }
+ return (-1);
+}
+
static int
-setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
+vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
{
- int error, mmap_flags;
- struct vm_memory_segment seg;
+ struct vm_memseg memseg;
+ size_t n;
+ int error;
/*
- * Create and optionally map 'len' bytes of memory at guest
- * physical address 'gpa'
+ * If the memory segment has already been created then just return.
+ * This is the usual case for the SYSMEM segment created by userspace
+ * loaders like bhyveload(8).
*/
- bzero(&seg, sizeof(seg));
- seg.gpa = gpa;
- seg.len = len;
- error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
- if (error == 0 && addr != NULL) {
- mmap_flags = MAP_SHARED;
- if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
- mmap_flags |= MAP_NOCORE;
- *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
- ctx->fd, gpa);
+ error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+
+ if (memseg.len != 0) {
+ if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
+ errno = EINVAL;
+ return (-1);
+ } else {
+ return (0);
+ }
+ }
+
+ bzero(&memseg, sizeof(struct vm_memseg));
+ memseg.segid = segid;
+ memseg.len = len;
+ if (name != NULL) {
+ n = strlcpy(memseg.name, name, sizeof(memseg.name));
+ if (n >= sizeof(memseg.name)) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
}
+
+ error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
return (error);
}
int
-vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
+ size_t bufsize)
{
- char **addr;
+ struct vm_memseg memseg;
+ size_t n;
int error;
- /* XXX VM_MMAP_SPARSE not implemented yet */
- assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
- ctx->vms = vms;
+ memseg.segid = segid;
+ error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
+ if (error == 0) {
+ *lenp = memseg.len;
+ n = strlcpy(namebuf, memseg.name, bufsize);
+ if (n >= bufsize) {
+ errno = ENAMETOOLONG;
+ error = -1;
+ }
+ }
+ return (error);
+}
+
+static int
+setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
+{
+ char *ptr;
+ int error, flags;
+
+ /* Map 'len' bytes starting at 'gpa' in the guest address space */
+ error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
+ if (error)
+ return (error);
+
+ flags = MAP_SHARED | MAP_FIXED;
+ if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
+ flags |= MAP_NOCORE;
+
+ /* mmap into the process address space on the host */
+ ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
+ if (ptr == MAP_FAILED)
+ return (-1);
+
+ return (0);
+}
+
+int
+vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+{
+ size_t objsize, len;
+ vm_paddr_t gpa;
+ char *baseaddr, *ptr;
+ int error, flags;
+
+ assert(vms == VM_MMAP_ALL);
/*
* If 'memsize' cannot fit entirely in the 'lowmem' segment then
@@ -234,43 +371,69 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
*/
if (memsize > ctx->lowmem_limit) {
ctx->lowmem = ctx->lowmem_limit;
- ctx->highmem = memsize - ctx->lowmem;
+ ctx->highmem = memsize - ctx->lowmem_limit;
+ objsize = 4*GB + ctx->highmem;
} else {
ctx->lowmem = memsize;
ctx->highmem = 0;
+ objsize = ctx->lowmem;
}
- if (ctx->lowmem > 0) {
- addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
- error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
+ error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
+ if (error)
+ return (error);
+
+ /*
+ * Stake out a contiguous region covering the guest physical memory
+ * and the adjoining guard regions.
+ */
+ len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
+ flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
+ ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
+ if (ptr == MAP_FAILED)
+ return (-1);
+
+ baseaddr = ptr + VM_MMAP_GUARD_SIZE;
+ if (ctx->highmem > 0) {
+ gpa = 4*GB;
+ len = ctx->highmem;
+ error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
- if (ctx->highmem > 0) {
- addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
- error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
+ if (ctx->lowmem > 0) {
+ gpa = 0;
+ len = ctx->lowmem;
+ error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
+ ctx->baseaddr = baseaddr;
+
return (0);
}
+/*
+ * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
+ * the lowmem or highmem regions.
+ *
+ * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
+ * The instruction emulation code depends on this behavior.
+ */
void *
vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
{
- /* XXX VM_MMAP_SPARSE not implemented yet */
- assert(ctx->vms == VM_MMAP_ALL);
-
- if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
- return ((void *)(ctx->lowmem_addr + gaddr));
+ if (ctx->lowmem > 0) {
+ if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
+ return (ctx->baseaddr + gaddr);
+ }
- if (gaddr >= 4*GB) {
- gaddr -= 4*GB;
- if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
- return ((void *)(ctx->highmem_addr + gaddr));
+ if (ctx->highmem > 0) {
+ if (gaddr >= 4*GB && gaddr + len <= 4*GB + ctx->highmem)
+ return (ctx->baseaddr + gaddr);
}
return (NULL);
@@ -290,6 +453,56 @@ vm_get_highmem_size(struct vmctx *ctx)
return (ctx->highmem);
}
+void *
+vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
+{
+ char pathname[MAXPATHLEN];
+ size_t len2;
+ char *base, *ptr;
+ int fd, error, flags;
+
+ fd = -1;
+ ptr = MAP_FAILED;
+ if (name == NULL || strlen(name) == 0) {
+ errno = EINVAL;
+ goto done;
+ }
+
+ error = vm_alloc_memseg(ctx, segid, len, name);
+ if (error)
+ goto done;
+
+ strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
+ strlcat(pathname, ctx->name, sizeof(pathname));
+ strlcat(pathname, ".", sizeof(pathname));
+ strlcat(pathname, name, sizeof(pathname));
+
+ fd = open(pathname, O_RDWR);
+ if (fd < 0)
+ goto done;
+
+ /*
+ * Stake out a contiguous region covering the device memory and the
+ * adjoining guard regions.
+ */
+ len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
+ flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
+ base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
+ if (base == MAP_FAILED)
+ goto done;
+
+ flags = MAP_SHARED | MAP_FIXED;
+ if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
+ flags |= MAP_NOCORE;
+
+ /* mmap the devmem region in the host address space */
+ ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
+done:
+ if (fd >= 0)
+ close(fd);
+ return (ptr);
+}
+
int
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
uint64_t base, uint32_t limit, uint32_t access)
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index d3ecdc4..57f8c56 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -36,7 +36,7 @@
* API version for out-of-tree consumers like grub-bhyve for making compile
* time decisions.
*/
-#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
+#define VMMAPI_VERSION 0102 /* 2 digit major followed by 2 digit minor */
struct iovec;
struct vmctx;
@@ -52,14 +52,59 @@ enum vm_mmap_style {
VM_MMAP_SPARSE, /* mappings created on-demand */
};
+/*
+ * 'flags' value passed to 'vm_set_memflags()'.
+ */
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
+#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
+
+/*
+ * Identifiers for memory segments:
+ * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
+ * - the remaining identifiers can be used to create devmem segments.
+ */
+enum {
+ VM_SYSMEM,
+ VM_BOOTROM,
+ VM_FRAMEBUFFER,
+};
+
+/*
+ * Get the length and name of the memory segment identified by 'segid'.
+ * Note that system memory segments are identified with a nul name.
+ *
+ * Returns 0 on success and non-zero otherwise.
+ */
+int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
+ size_t namesiz);
+
+/*
+ * Iterate over the guest address space. This function finds an address range
+ * that starts at an address >= *gpa.
+ *
+ * Returns 0 if the next address range was found and non-zero otherwise.
+ */
+int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+/*
+ * Create a device memory segment identified by 'segid'.
+ *
+ * Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
+ */
+void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
+ size_t len);
+
+/*
+ * Map the memory segment identified by 'segid' into the guest address space
+ * at [gpa,gpa+len) with protection 'prot'.
+ */
+int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
+ vm_ooffset_t segoff, size_t len, int prot);
int vm_create(const char *name);
struct vmctx *vm_open(const char *name);
void vm_destroy(struct vmctx *ctx);
int vm_parse_memsize(const char *optarg, size_t *memsize);
-int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
- int *wired);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
@@ -68,6 +113,7 @@ int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
+int vm_get_memflags(struct vmctx *ctx);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
diff --git a/share/examples/bhyve/vmrun.sh b/share/examples/bhyve/vmrun.sh
index eeaf281..72193de 100755
--- a/share/examples/bhyve/vmrun.sh
+++ b/share/examples/bhyve/vmrun.sh
@@ -48,8 +48,8 @@ usage() {
echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk file>]"
echo " [-e <name=value>] [-g <gdbport> ] [-H <directory>]"
- echo " [-I <location of installation iso>] [-m <memsize>]"
- echo " [-t <tapdev>] <vmname>"
+ echo " [-I <location of installation iso>] [-l <loader>]"
+ echo " [-m <memsize>] [-t <tapdev>] <vmname>"
echo ""
echo " -h: display this help message"
echo " -a: force memory mapped local APIC access"
@@ -61,6 +61,7 @@ usage() {
echo " -H: host filesystem to export to the loader"
echo " -i: force boot of the Installation CDROM image"
echo " -I: Installation CDROM image location (default is ${DEFAULT_ISOFILE})"
+ echo " -l: the OS loader to use (default is /boot/userboot.so)"
echo " -m: memory size (default is ${DEFAULT_MEMSIZE})"
echo " -p: pass-through a host PCI device at bus/slot/func (e.g. 10/0/0)"
echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
@@ -87,15 +88,15 @@ console=${DEFAULT_CONSOLE}
cpus=${DEFAULT_CPUS}
tap_total=0
disk_total=0
-apic_opt=""
gdbport=0
loader_opt=""
+bhyverun_opt="-H -A -P"
pass_total=0
-while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
+while getopts ac:C:d:e:g:hH:iI:l:m:p:t: c ; do
case $c in
a)
- apic_opt="-a"
+ bhyverun_opt="${bhyverun_opt} -a"
;;
c)
cpus=${OPTARG}
@@ -125,6 +126,9 @@ while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
I)
isofile=${OPTARG}
;;
+ l)
+ loader_opt="${loader_opt} -l ${OPTARG}"
+ ;;
m)
memsize=${OPTARG}
;;
@@ -163,6 +167,12 @@ if [ -n "${host_base}" ]; then
loader_opt="${loader_opt} -h ${host_base}"
fi
+# If PCI passthru devices are configured then guest memory must be wired
+if [ ${pass_total} -gt 0 ]; then
+ loader_opt="${loader_opt} -S"
+ bhyverun_opt="${bhyverun_opt} -S"
+fi
+
make_and_check_diskdev()
{
local virtio_diskdev="$1"
@@ -263,7 +273,7 @@ while [ 1 ]; do
i=$(($i + 1))
done
- ${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P \
+ ${FBSDRUN} -c ${cpus} -m ${memsize} ${bhyverun_opt} \
-g ${gdbport} \
-s 0:0,hostbridge \
-s 1:0,lpc \
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index 1a4e5ab..f2de960 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -108,7 +108,6 @@ enum x2apic_state {
struct vm;
struct vm_exception;
-struct vm_memory_segment;
struct seg_desc;
struct vm_exit;
struct vm_run;
@@ -175,17 +174,33 @@ int vm_create(const char *name, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
-int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
+
+/*
+ * APIs that modify the guest memory map require all vcpus to be frozen.
+ */
+int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot, int flags);
+int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
+void vm_free_memseg(struct vm *vm, int ident);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
-void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
- void **cookie);
+int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
+int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
+
+/*
+ * APIs that inspect the guest memory map require only a *single* vcpu to
+ * be frozen. This acts like a read lock on the guest memory map since any
+ * modification requires *all* vcpus to be frozen.
+ */
+int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ struct vm_object **objptr);
+void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
+ int prot, void **cookie);
void vm_gpa_release(void *cookie);
-int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
- struct vm_memory_segment *seg);
-int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
- vm_offset_t *offset, struct vm_object **object);
-boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
+bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
+
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
@@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcpu)
void *vcpu_stats(struct vm *vm, int vcpu);
void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
struct vmspace *vm_get_vmspace(struct vm *vm);
-int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
-int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
struct vpmtmr *vm_pmtmr(struct vm *vm);
diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
index 9d031a9..1af75a3 100644
--- a/sys/amd64/include/vmm_dev.h
+++ b/sys/amd64/include/vmm_dev.h
@@ -34,10 +34,22 @@ void vmmdev_init(void);
int vmmdev_cleanup(void);
#endif
-struct vm_memory_segment {
- vm_paddr_t gpa; /* in */
+struct vm_memmap {
+ vm_paddr_t gpa;
+ int segid; /* memory segment */
+ vm_ooffset_t segoff; /* offset into memory segment */
+ size_t len; /* mmap length */
+ int prot; /* RWX */
+ int flags;
+};
+#define VM_MEMMAP_F_WIRED 0x01
+#define VM_MEMMAP_F_IOMMU 0x02
+
+#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
+struct vm_memseg {
+ int segid;
size_t len;
- int wired;
+ char name[SPECNAMELEN + 1];
};
struct vm_register {
@@ -214,10 +226,14 @@ enum {
IOCNUM_REINIT = 5,
/* memory apis */
- IOCNUM_MAP_MEMORY = 10,
- IOCNUM_GET_MEMORY_SEG = 11,
+ IOCNUM_MAP_MEMORY = 10, /* deprecated */
+ IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */
IOCNUM_GET_GPA_PMAP = 12,
IOCNUM_GLA2GPA = 13,
+ IOCNUM_ALLOC_MEMSEG = 14,
+ IOCNUM_GET_MEMSEG = 15,
+ IOCNUM_MMAP_MEMSEG = 16,
+ IOCNUM_MMAP_GETNEXT = 17,
/* register/state accessors */
IOCNUM_SET_REGISTER = 20,
@@ -278,10 +294,14 @@ enum {
_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_REINIT \
_IO('v', IOCNUM_REINIT)
-#define VM_MAP_MEMORY \
- _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
-#define VM_GET_MEMORY_SEG \
- _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
+#define VM_ALLOC_MEMSEG \
+ _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
+#define VM_GET_MEMSEG \
+ _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
+#define VM_MMAP_MEMSEG \
+ _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
+#define VM_MMAP_GETNEXT \
+ _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
#define VM_SET_REGISTER \
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
#define VM_GET_REGISTER \
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index abca613..ca5141a 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
"reserved bits set: info1(%#lx) info2(%#lx)",
info1, info2);
- } else if (vm_mem_allocated(svm_sc->vm, info2)) {
+ } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = info2;
vmexit->u.paging.fault_type = npf_fault_type(info1);
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 5bbfe99..c5cafa8 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2426,7 +2426,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
* this must be an instruction that accesses MMIO space.
*/
gpa = vmcs_gpa();
- if (vm_mem_allocated(vmx->vm, gpa) ||
+ if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->inst_length = 0;
diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c
index b789f77..692190a 100644
--- a/sys/amd64/vmm/io/ppt.c
+++ b/sys/amd64/vmm/io/ppt.c
@@ -76,11 +76,17 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */
uint64_t msg_data;
};
+struct pptseg {
+ vm_paddr_t gpa;
+ size_t len;
+ int wired;
+};
+
struct pptdev {
device_t dev;
struct vm *vm; /* owner of this device */
TAILQ_ENTRY(pptdev) next;
- struct vm_memory_segment mmio[MAX_MMIOSEGS];
+ struct pptseg mmio[MAX_MMIOSEGS];
struct {
int num_msgs; /* guest state */
@@ -207,14 +213,14 @@ static void
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
- bzero(seg, sizeof(struct vm_memory_segment));
+ bzero(seg, sizeof(struct pptseg));
}
}
@@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
{
int i;
struct pptdev *ppt;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
TAILQ_FOREACH(ppt, &pptdev_list, next) {
if (ppt->vm != vm)
@@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
int i, error;
- struct vm_memory_segment *seg;
+ struct pptseg *seg;
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index c3c7bb1..31e35d2 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -120,12 +120,21 @@ struct vcpu {
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
struct mem_seg {
+ size_t len;
+ bool sysmem;
+ struct vm_object *object;
+};
+#define VM_MAX_MEMSEGS 2
+
+struct mem_map {
vm_paddr_t gpa;
size_t len;
- boolean_t wired;
- vm_object_t object;
+ vm_ooffset_t segoff;
+ int segid;
+ int prot;
+ int flags;
};
-#define VM_MAX_MEMORY_SEGMENTS 2
+#define VM_MAX_MEMMAPS 4
/*
* Initialization:
@@ -151,8 +160,8 @@ struct vm {
void *rendezvous_arg; /* (x) rendezvous func/arg */
vm_rendezvous_func_t rendezvous_func;
struct mtx rendezvous_mtx; /* (o) rendezvous lock */
- int num_mem_segs; /* (o) guest memory segments */
- struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
+ struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
+ struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
@@ -224,6 +233,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
"Force use of I/O MMU even if no passthrough devices were found.");
+static void vm_free_memmap(struct vm *vm, int ident);
+static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
#ifdef KTR
@@ -444,7 +455,6 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
- vm->num_mem_segs = 0;
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
@@ -455,18 +465,9 @@ vm_create(const char *name, struct vm **retvm)
}
static void
-vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
-{
-
- if (seg->object != NULL)
- vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
-
- bzero(seg, sizeof(*seg));
-}
-
-static void
vm_cleanup(struct vm *vm, bool destroy)
{
+ struct mem_map *mm;
int i;
ppt_unassign_all(vm);
@@ -489,11 +490,23 @@ vm_cleanup(struct vm *vm, bool destroy)
VMCLEANUP(vm->cookie);
- if (destroy) {
- for (i = 0; i < vm->num_mem_segs; i++)
- vm_free_mem_seg(vm, &vm->mem_segs[i]);
+ /*
+ * System memory is removed from the guest address space only when
+ * the VM is destroyed. This is because the mapping remains the same
+ * across VM reset.
+ *
+ * Device memory can be relocated by the guest (e.g. using PCI BARs)
+ * so those mappings are removed on a VM reset.
+ */
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (destroy || !sysmem_mapping(vm, mm))
+ vm_free_memmap(vm, i);
+ }
- vm->num_mem_segs = 0;
+ if (destroy) {
+ for (i = 0; i < VM_MAX_MEMSEGS; i++)
+ vm_free_memseg(vm, i);
VMSPACE_FREE(vm->vmspace);
vm->vmspace = NULL;
@@ -551,146 +564,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
return (0);
}
-boolean_t
-vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
+/*
+ * Return 'true' if 'gpa' is allocated in the guest address space.
+ *
+ * This function is called in the context of a running vcpu which acts as
+ * an implicit lock on 'vm->mem_maps[]'.
+ */
+bool
+vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
{
+ struct mem_map *mm;
int i;
- vm_paddr_t gpabase, gpalimit;
- for (i = 0; i < vm->num_mem_segs; i++) {
- gpabase = vm->mem_segs[i].gpa;
- gpalimit = gpabase + vm->mem_segs[i].len;
- if (gpa >= gpabase && gpa < gpalimit)
- return (TRUE); /* 'gpa' is regular memory */
+#ifdef INVARIANTS
+ int hostcpu, state;
+ state = vcpu_get_state(vm, vcpuid, &hostcpu);
+ KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
+ ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
+#endif
+
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
+ return (true); /* 'gpa' is sysmem or devmem */
}
if (ppt_is_mmio(vm, gpa))
- return (TRUE); /* 'gpa' is pci passthru mmio */
+ return (true); /* 'gpa' is pci passthru mmio */
- return (FALSE);
+ return (false);
}
int
-vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
+vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
{
- int available, allocated;
struct mem_seg *seg;
- vm_object_t object;
- vm_paddr_t g;
+ vm_object_t obj;
- if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
return (EINVAL);
-
- available = allocated = 0;
- g = gpa;
- while (g < gpa + len) {
- if (vm_mem_allocated(vm, g))
- allocated++;
- else
- available++;
- g += PAGE_SIZE;
- }
-
- /*
- * If there are some allocated and some available pages in the address
- * range then it is an error.
- */
- if (allocated && available)
+ if (len == 0 || (len & PAGE_MASK))
return (EINVAL);
- /*
- * If the entire address range being requested has already been
- * allocated then there isn't anything more to do.
- */
- if (allocated && available == 0)
- return (0);
-
- if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
- return (E2BIG);
-
- seg = &vm->mem_segs[vm->num_mem_segs];
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ if (seg->len == len && seg->sysmem == sysmem)
+ return (EEXIST);
+ else
+ return (EINVAL);
+ }
- if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
+ obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
+ if (obj == NULL)
return (ENOMEM);
- seg->gpa = gpa;
seg->len = len;
- seg->object = object;
- seg->wired = FALSE;
+ seg->object = obj;
+ seg->sysmem = sysmem;
+ return (0);
+}
- vm->num_mem_segs++;
+int
+vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
+ vm_object_t *objptr)
+{
+ struct mem_seg *seg;
+
+ if (ident < 0 || ident >= VM_MAX_MEMSEGS)
+ return (EINVAL);
+ seg = &vm->mem_segs[ident];
+ if (len)
+ *len = seg->len;
+ if (sysmem)
+ *sysmem = seg->sysmem;
+ if (objptr)
+ *objptr = seg->object;
return (0);
}
-static vm_paddr_t
-vm_maxmem(struct vm *vm)
+void
+vm_free_memseg(struct vm *vm, int ident)
{
- int i;
- vm_paddr_t gpa, maxmem;
+ struct mem_seg *seg;
- maxmem = 0;
- for (i = 0; i < vm->num_mem_segs; i++) {
- gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
- if (gpa > maxmem)
- maxmem = gpa;
+ KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
+ ("%s: invalid memseg ident %d", __func__, ident));
+
+ seg = &vm->mem_segs[ident];
+ if (seg->object != NULL) {
+ vm_object_deallocate(seg->object);
+ bzero(seg, sizeof(struct mem_seg));
}
- return (maxmem);
}
-static void
-vm_gpa_unwire(struct vm *vm)
+int
+vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
+ size_t len, int prot, int flags)
{
- int i, rv;
struct mem_seg *seg;
+ struct mem_map *m, *map;
+ vm_ooffset_t last;
+ int i, error;
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- if (!seg->wired)
- continue;
+ if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
+ return (EINVAL);
+
+ if (flags & ~VM_MEMMAP_F_WIRED)
+ return (EINVAL);
+
+ if (segid < 0 || segid >= VM_MAX_MEMSEGS)
+ return (EINVAL);
- rv = vm_map_unwire(&vm->vmspace->vm_map,
- seg->gpa, seg->gpa + seg->len,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
- KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
- "%#lx/%ld could not be unwired: %d",
- vm_name(vm), seg->gpa, seg->len, rv));
+ seg = &vm->mem_segs[segid];
+ if (seg->object == NULL)
+ return (EINVAL);
+
+ last = first + len;
+ if (first < 0 || first >= last || last > seg->len)
+ return (EINVAL);
+
+ if ((gpa | first | last) & PAGE_MASK)
+ return (EINVAL);
+
+ map = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ m = &vm->mem_maps[i];
+ if (m->len == 0) {
+ map = m;
+ break;
+ }
+ }
- seg->wired = FALSE;
+ if (map == NULL)
+ return (ENOSPC);
+
+ error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
+ len, 0, VMFS_NO_SPACE, prot, prot, 0);
+ if (error != KERN_SUCCESS)
+ return (EFAULT);
+
+ vm_object_reference(seg->object);
+
+ if (flags & VM_MEMMAP_F_WIRED) {
+ error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
+ VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
+ if (error != KERN_SUCCESS) {
+ vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
+ return (EFAULT);
+ }
}
+
+ map->gpa = gpa;
+ map->len = len;
+ map->segoff = first;
+ map->segid = segid;
+ map->prot = prot;
+ map->flags = flags;
+ return (0);
}
-static int
-vm_gpa_wire(struct vm *vm)
+int
+vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
{
- int i, rv;
- struct mem_seg *seg;
+ struct mem_map *mm, *mmnext;
+ int i;
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- if (seg->wired)
+ mmnext = NULL;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (mm->len == 0 || mm->gpa < *gpa)
continue;
+ if (mmnext == NULL || mm->gpa < mmnext->gpa)
+ mmnext = mm;
+ }
- /* XXX rlimits? */
- rv = vm_map_wire(&vm->vmspace->vm_map,
- seg->gpa, seg->gpa + seg->len,
- VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
- if (rv != KERN_SUCCESS)
- break;
-
- seg->wired = TRUE;
+ if (mmnext != NULL) {
+ *gpa = mmnext->gpa;
+ if (segid)
+ *segid = mmnext->segid;
+ if (segoff)
+ *segoff = mmnext->segoff;
+ if (len)
+ *len = mmnext->len;
+ if (prot)
+ *prot = mmnext->prot;
+ if (flags)
+ *flags = mmnext->flags;
+ return (0);
+ } else {
+ return (ENOENT);
}
+}
- if (i < vm->num_mem_segs) {
- /*
- * Undo the wiring before returning an error.
- */
- vm_gpa_unwire(vm);
- return (EAGAIN);
+static void
+vm_free_memmap(struct vm *vm, int ident)
+{
+ struct mem_map *mm;
+ int error;
+
+ mm = &vm->mem_maps[ident];
+ if (mm->len) {
+ error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
+ mm->gpa + mm->len);
+ KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
+ __func__, error));
+ bzero(mm, sizeof(struct mem_map));
}
+}
- return (0);
+static __inline bool
+sysmem_mapping(struct vm *vm, struct mem_map *mm)
+{
+
+ if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
+ return (true);
+ else
+ return (false);
+}
+
+static vm_paddr_t
+sysmem_maxaddr(struct vm *vm)
+{
+ struct mem_map *mm;
+ vm_paddr_t maxaddr;
+ int i;
+
+ maxaddr = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm)) {
+ if (maxaddr < mm->gpa + mm->len)
+ maxaddr = mm->gpa + mm->len;
+ }
+ }
+ return (maxaddr);
}
static void
@@ -698,20 +808,36 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
{
int i, sz;
vm_paddr_t gpa, hpa;
- struct mem_seg *seg;
+ struct mem_map *mm;
void *vp, *cookie, *host_domain;
sz = PAGE_SIZE;
host_domain = iommu_host_domain();
- for (i = 0; i < vm->num_mem_segs; i++) {
- seg = &vm->mem_segs[i];
- KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
- vm_name(vm), seg->gpa, seg->len));
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (!sysmem_mapping(vm, mm))
+ continue;
- gpa = seg->gpa;
- while (gpa < seg->gpa + seg->len) {
- vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
+ if (map) {
+ KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
+ ("iommu map found invalid memmap %#lx/%#lx/%#x",
+ mm->gpa, mm->len, mm->flags));
+ if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
+ continue;
+ mm->flags |= VM_MEMMAP_F_IOMMU;
+ } else {
+ if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
+ continue;
+ mm->flags &= ~VM_MEMMAP_F_IOMMU;
+ KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
+ ("iommu unmap found invalid memmap %#lx/%#lx/%#x",
+ mm->gpa, mm->len, mm->flags));
+ }
+
+ gpa = mm->gpa;
+ while (gpa < mm->gpa + mm->len) {
+ vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
&cookie);
KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
vm_name(vm), gpa));
@@ -753,10 +879,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
if (error)
return (error);
- if (ppt_assigned_devices(vm) == 0) {
+ if (ppt_assigned_devices(vm) == 0)
vm_iommu_unmap(vm);
- vm_gpa_unwire(vm);
- }
+
return (0);
}
@@ -766,23 +891,12 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
int error;
vm_paddr_t maxaddr;
- /*
- * Virtual machines with pci passthru devices get special treatment:
- * - the guest physical memory is wired
- * - the iommu is programmed to do the 'gpa' to 'hpa' translation
- *
- * We need to do this before the first pci passthru device is attached.
- */
+ /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */
if (ppt_assigned_devices(vm) == 0) {
KASSERT(vm->iommu == NULL,
("vm_assign_pptdev: iommu must be NULL"));
- maxaddr = vm_maxmem(vm);
+ maxaddr = sysmem_maxaddr(vm);
vm->iommu = iommu_create_domain(maxaddr);
-
- error = vm_gpa_wire(vm);
- if (error)
- return (error);
-
vm_iommu_map(vm);
}
@@ -791,18 +905,43 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
}
void *
-vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
+vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
void **cookie)
{
- int count, pageoff;
+ int i, count, pageoff;
+ struct mem_map *mm;
vm_page_t m;
-
+#ifdef INVARIANTS
+ /*
+ * All vcpus are frozen by ioctls that modify the memory map
+ * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
+ * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
+ */
+ int state;
+ KASSERT(vcpuid >= -1 || vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d",
+ __func__, vcpuid));
+ for (i = 0; i < VM_MAXCPU; i++) {
+ if (vcpuid != -1 && vcpuid != i)
+ continue;
+ state = vcpu_get_state(vm, i, NULL);
+ KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
+ __func__, state));
+ }
+#endif
pageoff = gpa & PAGE_MASK;
if (len > PAGE_SIZE - pageoff)
panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
- count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
- trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ count = 0;
+ for (i = 0; i < VM_MAX_MEMMAPS; i++) {
+ mm = &vm->mem_maps[i];
+ if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
+ gpa < mm->gpa + mm->len) {
+ count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
+ trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
+ break;
+ }
+ }
if (count == 1) {
*cookie = m;
@@ -824,50 +963,6 @@ vm_gpa_release(void *cookie)
}
int
-vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
- struct vm_memory_segment *seg)
-{
- int i;
-
- for (i = 0; i < vm->num_mem_segs; i++) {
- if (gpabase == vm->mem_segs[i].gpa) {
- seg->gpa = vm->mem_segs[i].gpa;
- seg->len = vm->mem_segs[i].len;
- seg->wired = vm->mem_segs[i].wired;
- return (0);
- }
- }
- return (-1);
-}
-
-int
-vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
- vm_offset_t *offset, struct vm_object **object)
-{
- int i;
- size_t seg_len;
- vm_paddr_t seg_gpa;
- vm_object_t seg_obj;
-
- for (i = 0; i < vm->num_mem_segs; i++) {
- if ((seg_obj = vm->mem_segs[i].object) == NULL)
- continue;
-
- seg_gpa = vm->mem_segs[i].gpa;
- seg_len = vm->mem_segs[i].len;
-
- if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
- *offset = gpa - seg_gpa;
- *object = seg_obj;
- vm_object_reference(seg_obj);
- return (0);
- }
- }
-
- return (EINVAL);
-}
-
-int
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
{
@@ -2425,8 +2520,8 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
}
for (idx = 0; idx < nused; idx++) {
- hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
- prot, &cookie);
+ hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
+ copyinfo[idx].len, prot, &cookie);
if (hva == NULL)
break;
copyinfo[idx].hva = hva;
diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
index e3e140a..5cb4150 100644
--- a/sys/amd64/vmm/vmm_dev.c
+++ b/sys/amd64/vmm/vmm_dev.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
@@ -60,10 +61,19 @@ __FBSDID("$FreeBSD$");
#include "io/vhpet.h"
#include "io/vrtc.h"
+struct devmem_softc {
+ int segid;
+ char *name;
+ struct cdev *cdev;
+ struct vmmdev_softc *sc;
+ SLIST_ENTRY(devmem_softc) link;
+};
+
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
struct cdev *cdev;
SLIST_ENTRY(vmmdev_softc) link;
+ SLIST_HEAD(, devmem_softc) devmem;
int flags;
};
#define VSC_LINKED 0x01
@@ -76,6 +86,63 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
+static int devmem_create_cdev(const char *vmname, int id, char *devmem);
+static void devmem_destroy(void *arg);
+
+static int
+vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ int error;
+
+ if (vcpu < 0 || vcpu >= VM_MAXCPU)
+ return (EINVAL);
+
+ error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ return (error);
+}
+
+static void
+vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
+{
+ enum vcpu_state state;
+
+ state = vcpu_get_state(sc->vm, vcpu, NULL);
+ if (state != VCPU_FROZEN) {
+ panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
+ vcpu, state);
+ }
+
+ vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+}
+
+static int
+vcpu_lock_all(struct vmmdev_softc *sc)
+{
+ int error, vcpu;
+
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ break;
+ }
+
+ if (error) {
+ while (--vcpu >= 0)
+ vcpu_unlock_one(sc, vcpu);
+ }
+
+ return (error);
+}
+
+static void
+vcpu_unlock_all(struct vmmdev_softc *sc)
+{
+ int vcpu;
+
+ for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
+ vcpu_unlock_one(sc, vcpu);
+}
+
static struct vmmdev_softc *
vmmdev_lookup(const char *name)
{
@@ -108,12 +175,16 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
void *hpa, *cookie;
struct vmmdev_softc *sc;
- static char zerobuf[PAGE_SIZE];
-
- error = 0;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
- error = ENXIO;
+ return (ENXIO);
+
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ error = vcpu_lock_one(sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
while (uio->uio_resid > 0 && error == 0) {
@@ -129,10 +200,11 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
* Since this device does not support lseek(2), dd(1) will
* read(2) blocks of data to simulate the lseek(2).
*/
- hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
+ hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
if (hpa == NULL) {
if (uio->uio_rw == UIO_READ)
- error = uiomove(zerobuf, c, uio);
+ error = uiomove(__DECONST(void *, zero_region),
+ c, uio);
else
error = EFAULT;
} else {
@@ -140,6 +212,70 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
vm_gpa_release(cookie);
}
}
+ vcpu_unlock_one(sc, VM_MAXCPU - 1);
+ return (error);
+}
+
+CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
+
+static int
+get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ struct devmem_softc *dsc;
+ int error;
+ bool sysmem;
+
+ error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
+ if (error || mseg->len == 0)
+ return (error);
+
+ if (!sysmem) {
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ if (dsc->segid == mseg->segid)
+ break;
+ }
+ KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
+ __func__, mseg->segid));
+ error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
+ } else {
+ bzero(mseg->name, sizeof(mseg->name));
+ }
+
+ return (error);
+}
+
+static int
+alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
+{
+ char *name;
+ int error;
+ bool sysmem;
+
+ error = 0;
+ name = NULL;
+ sysmem = true;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ sysmem = false;
+ name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
+ error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0);
+ if (error)
+ goto done;
+ }
+
+ error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
+ if (error)
+ goto done;
+
+ if (VM_MEMSEG_NAME(mseg)) {
+ error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
+ if (error)
+ vm_free_memseg(sc->vm, mseg->segid);
+ else
+ name = NULL; /* freed when 'cdev' is destroyed */
+ }
+done:
+ free(name, M_VMMDEV);
return (error);
}
@@ -150,7 +286,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
int error, vcpu, state_changed, size;
cpuset_t *cpuset;
struct vmmdev_softc *sc;
- struct vm_memory_segment *seg;
struct vm_register *vmreg;
struct vm_seg_desc *vmsegdesc;
struct vm_run *vmrun;
@@ -177,6 +312,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_intinfo *vmii;
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
+ struct vm_memmap *mm;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@@ -211,43 +347,41 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
* Assumes that the first field of the ioctl data is the vcpu.
*/
vcpu = *(int *)data;
- if (vcpu < 0 || vcpu >= VM_MAXCPU) {
- error = EINVAL;
- goto done;
- }
-
- error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
+ error = vcpu_lock_one(sc, vcpu);
if (error)
goto done;
-
state_changed = 1;
break;
case VM_MAP_PPTDEV_MMIO:
case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV:
- case VM_MAP_MEMORY:
+ case VM_ALLOC_MEMSEG:
+ case VM_MMAP_MEMSEG:
case VM_REINIT:
/*
* ioctls that operate on the entire virtual machine must
* prevent all vcpus from running.
*/
- error = 0;
- for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
- error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
- if (error)
- break;
- }
-
- if (error) {
- while (--vcpu >= 0)
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
+ error = vcpu_lock_all(sc);
+ if (error)
goto done;
- }
-
state_changed = 2;
break;
+ case VM_GET_MEMSEG:
+ case VM_MMAP_GETNEXT:
+ /*
+ * Lock a vcpu to make sure that the memory map cannot be
+ * modified while it is being inspected.
+ */
+ vcpu = VM_MAXCPU - 1;
+ error = vcpu_lock_one(sc, vcpu);
+ if (error)
+ goto done;
+ state_changed = 1;
+ break;
+
default:
break;
}
@@ -372,15 +506,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vatpic_set_irq_trigger(sc->vm,
isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
break;
- case VM_MAP_MEMORY:
- seg = (struct vm_memory_segment *)data;
- error = vm_malloc(sc->vm, seg->gpa, seg->len);
+ case VM_MMAP_GETNEXT:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
+ &mm->segoff, &mm->len, &mm->prot, &mm->flags);
break;
- case VM_GET_MEMORY_SEG:
- seg = (struct vm_memory_segment *)data;
- seg->len = 0;
- (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
- error = 0;
+ case VM_MMAP_MEMSEG:
+ mm = (struct vm_memmap *)data;
+ error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
+ mm->len, mm->prot, mm->flags);
+ break;
+ case VM_ALLOC_MEMSEG:
+ error = alloc_memseg(sc, (struct vm_memseg *)data);
+ break;
+ case VM_GET_MEMSEG:
+ error = get_memseg(sc, (struct vm_memseg *)data);
break;
case VM_GET_REGISTER:
vmreg = (struct vm_register *)data;
@@ -505,12 +645,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
}
- if (state_changed == 1) {
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
- } else if (state_changed == 2) {
- for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
- vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
- }
+ if (state_changed == 1)
+ vcpu_unlock_one(sc, vcpu);
+ else if (state_changed == 2)
+ vcpu_unlock_all(sc);
done:
/* Make sure that no handler returns a bogus value like ERESTART */
@@ -519,26 +657,79 @@ done:
}
static int
-vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
- vm_size_t size, struct vm_object **object, int nprot)
+vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
+ struct vm_object **objp, int nprot)
{
- int error;
struct vmmdev_softc *sc;
+ vm_paddr_t gpa;
+ size_t len;
+ vm_ooffset_t segoff, first, last;
+ int error, found, segid;
+ bool sysmem;
+
+ first = *offset;
+ last = first + mapsize;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
sc = vmmdev_lookup2(cdev);
- if (sc != NULL && (nprot & PROT_EXEC) == 0)
- error = vm_get_memobj(sc->vm, *offset, size, offset, object);
- else
- error = EINVAL;
+ if (sc == NULL) {
+ /* virtual machine is in the process of being created */
+ return (EINVAL);
+ }
+ /*
+ * Get a read lock on the guest memory map by freezing any vcpu.
+ */
+ error = vcpu_lock_one(sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
+
+ gpa = 0;
+ found = 0;
+ while (!found) {
+ error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
+ NULL, NULL);
+ if (error)
+ break;
+
+ if (first >= gpa && last <= gpa + len)
+ found = 1;
+ else
+ gpa += len;
+ }
+
+ if (found) {
+ error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
+ KASSERT(error == 0 && *objp != NULL,
+ ("%s: invalid memory segment %d", __func__, segid));
+ if (sysmem) {
+ vm_object_reference(*objp);
+ *offset = segoff + (first - gpa);
+ } else {
+ error = EINVAL;
+ }
+ }
+ vcpu_unlock_one(sc, VM_MAXCPU - 1);
return (error);
}
static void
vmmdev_destroy(void *arg)
{
-
struct vmmdev_softc *sc = arg;
+ struct devmem_softc *dsc;
+ int error;
+
+ error = vcpu_lock_all(sc);
+ KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
+
+ while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
+ KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
+ SLIST_REMOVE_HEAD(&sc->devmem, link);
+ free(dsc->name, M_VMMDEV);
+ free(dsc, M_VMMDEV);
+ }
if (sc->cdev != NULL)
destroy_dev(sc->cdev);
@@ -560,6 +751,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
{
int error;
char buf[VM_MAX_NAMELEN];
+ struct devmem_softc *dsc;
struct vmmdev_softc *sc;
struct cdev *cdev;
@@ -578,22 +770,30 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
/*
* The 'cdev' will be destroyed asynchronously when 'si_threadcount'
* goes down to 0 so we should not do it again in the callback.
+ *
+ * Setting 'sc->cdev' to NULL is also used to indicate that the VM
+ * is scheduled for destruction.
*/
cdev = sc->cdev;
sc->cdev = NULL;
mtx_unlock(&vmmdev_mtx);
/*
- * Schedule the 'cdev' to be destroyed:
+ * Schedule all cdevs to be destroyed:
*
- * - any new operations on this 'cdev' will return an error (ENXIO).
+ * - any new operations on the 'cdev' will return an error (ENXIO).
*
* - when the 'si_threadcount' dwindles down to zero the 'cdev' will
* be destroyed and the callback will be invoked in a taskqueue
* context.
+ *
+ * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
*/
+ SLIST_FOREACH(dsc, &sc->devmem, link) {
+ KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
+ destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
+ }
destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
-
return (0);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
@@ -634,6 +834,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
sc->vm = vm;
+ SLIST_INIT(&sc->devmem);
/*
* Lookup the name again just in case somebody sneaked in when we
@@ -687,3 +888,96 @@ vmmdev_cleanup(void)
return (error);
}
+
+static int
+devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
+ struct vm_object **objp, int nprot)
+{
+ struct devmem_softc *dsc;
+ vm_ooffset_t first, last;
+ size_t seglen;
+ int error;
+ bool sysmem;
+
+ dsc = cdev->si_drv1;
+ if (dsc == NULL) {
+ /* 'cdev' has been created but is not ready for use */
+ return (ENXIO);
+ }
+
+ first = *offset;
+ last = *offset + len;
+ if ((nprot & PROT_EXEC) || first < 0 || first >= last)
+ return (EINVAL);
+
+ error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
+ if (error)
+ return (error);
+
+ error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
+ KASSERT(error == 0 && !sysmem && *objp != NULL,
+ ("%s: invalid devmem segment %d", __func__, dsc->segid));
+
+ vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
+
+ if (seglen >= last) {
+ vm_object_reference(*objp);
+ return (0);
+ } else {
+ return (EINVAL);
+ }
+}
+
+static struct cdevsw devmemsw = {
+ .d_name = "devmem",
+ .d_version = D_VERSION,
+ .d_mmap_single = devmem_mmap_single,
+};
+
+static int
+devmem_create_cdev(const char *vmname, int segid, char *devname)
+{
+ struct devmem_softc *dsc;
+ struct vmmdev_softc *sc;
+ struct cdev *cdev;
+ int error;
+
+ error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "vmm.io/%s.%s", vmname, devname);
+ if (error)
+ return (error);
+
+ dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
+
+ mtx_lock(&vmmdev_mtx);
+ sc = vmmdev_lookup(vmname);
+ KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
+ if (sc->cdev == NULL) {
+ /* virtual machine is being created or destroyed */
+ mtx_unlock(&vmmdev_mtx);
+ free(dsc, M_VMMDEV);
+ destroy_dev_sched_cb(cdev, NULL, 0);
+ return (ENODEV);
+ }
+
+ dsc->segid = segid;
+ dsc->name = devname;
+ dsc->cdev = cdev;
+ dsc->sc = sc;
+ SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
+ mtx_unlock(&vmmdev_mtx);
+
+ /* The 'cdev' is ready for use after 'si_drv1' is initialized */
+ cdev->si_drv1 = dsc;
+ return (0);
+}
+
+static void
+devmem_destroy(void *arg)
+{
+ struct devmem_softc *dsc = arg;
+
+ KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
+ dsc->cdev = NULL;
+ dsc->sc = NULL;
+}
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 758b7e8..ae5330f 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -1677,12 +1677,12 @@ ptp_release(void **cookie)
}
static void *
-ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
+ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
{
void *ptr;
ptp_release(cookie);
- ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
+ ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie);
return (ptr);
}
@@ -1729,7 +1729,8 @@ restart:
/* Zero out the lower 12 bits. */
ptpphys &= ~0xfff;
- ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+ ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
+ &cookie);
if (ptpbase32 == NULL)
goto error;
@@ -1788,7 +1789,8 @@ restart:
/* Zero out the lower 5 bits and the upper 32 bits */
ptpphys &= 0xffffffe0UL;
- ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
+ ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4,
+ &cookie);
if (ptpbase == NULL)
goto error;
@@ -1811,7 +1813,7 @@ restart:
/* Zero out the lower 12 bits and the upper 12 bits */
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
- ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
+ ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
if (ptpbase == NULL)
goto error;
@@ -2319,10 +2321,13 @@ decode_moffset(struct vie *vie)
* page table fault matches with our instruction decoding.
*/
static int
-verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
+verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie,
+ enum vm_cpu_mode cpu_mode)
{
int error;
- uint64_t base, idx, gla2;
+ uint64_t base, segbase, idx, gla2;
+ enum vm_reg_name seg;
+ struct seg_desc desc;
/* Skip 'gla' verification */
if (gla == VIE_INVALID_GLA)
@@ -2355,14 +2360,48 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
}
}
- /* XXX assuming that the base address of the segment is 0 */
- gla2 = base + vie->scale * idx + vie->displacement;
+ /*
+ * From "Specifying a Segment Selector", Intel SDM, Vol 1
+ *
+ * In 64-bit mode, segmentation is generally (but not
+ * completely) disabled. The exceptions are the FS and GS
+ * segments.
+ *
+ * In legacy IA-32 mode, when the ESP or EBP register is used
+ * as the base, the SS segment is the default segment. For
+ * other data references, except when relative to stack or
+ * string destination the DS segment is the default. These
+ * can be overridden to allow other segments to be accessed.
+ */
+ if (vie->segment_override)
+ seg = vie->segment_register;
+ else if (vie->base_register == VM_REG_GUEST_RSP ||
+ vie->base_register == VM_REG_GUEST_RBP)
+ seg = VM_REG_GUEST_SS;
+ else
+ seg = VM_REG_GUEST_DS;
+ if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
+ seg != VM_REG_GUEST_GS) {
+ segbase = 0;
+ } else {
+ error = vm_get_seg_desc(vm, cpuid, seg, &desc);
+ if (error) {
+ printf("verify_gla: error %d getting segment"
+ " descriptor %d", error,
+ vie->segment_register);
+ return (-1);
+ }
+ segbase = desc.base;
+ }
+
+ gla2 = segbase + base + vie->scale * idx + vie->displacement;
gla2 &= size2mask[vie->addrsize];
if (gla != gla2) {
- printf("verify_gla mismatch: "
+ printf("verify_gla mismatch: segbase(0x%0lx)"
"base(0x%0lx), scale(%d), index(0x%0lx), "
"disp(0x%0lx), gla(0x%0lx), gla2(0x%0lx)\n",
- base, vie->scale, idx, vie->displacement, gla, gla2);
+ segbase, base, vie->scale, idx, vie->displacement,
+ gla, gla2);
return (-1);
}
@@ -2396,7 +2435,7 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
return (-1);
if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
- if (verify_gla(vm, cpuid, gla, vie))
+ if (verify_gla(vm, cpuid, gla, vie, cpu_mode))
return (-1);
}
diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem.c
index 1019f2b..c9be6c9 100644
--- a/sys/amd64/vmm/vmm_mem.c
+++ b/sys/amd64/vmm/vmm_mem.c
@@ -114,38 +114,6 @@ vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
}
-vm_object_t
-vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
-{
- int error;
- vm_object_t obj;
-
- if (gpa & PAGE_MASK)
- panic("vmm_mem_alloc: invalid gpa %#lx", gpa);
-
- if (len == 0 || (len & PAGE_MASK) != 0)
- panic("vmm_mem_alloc: invalid allocation size %lu", len);
-
- obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
- if (obj != NULL) {
- error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
- VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0);
- if (error != KERN_SUCCESS) {
- vm_object_deallocate(obj);
- obj = NULL;
- }
- }
-
- return (obj);
-}
-
-void
-vmm_mem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
-{
-
- vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
-}
-
vm_paddr_t
vmm_mem_maxaddr(void)
{
diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h
index a375070..7773faa 100644
--- a/sys/amd64/vmm/vmm_mem.h
+++ b/sys/amd64/vmm/vmm_mem.h
@@ -33,10 +33,8 @@ struct vmspace;
struct vm_object;
int vmm_mem_init(void);
-struct vm_object *vmm_mem_alloc(struct vmspace *, vm_paddr_t gpa, size_t size);
struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
vm_paddr_t hpa);
-void vmm_mem_free(struct vmspace *, vm_paddr_t gpa, size_t size);
void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
vm_paddr_t vmm_mem_maxaddr(void);
diff --git a/sys/sys/ata.h b/sys/sys/ata.h
index 863f0e8..272b46a 100644
--- a/sys/sys/ata.h
+++ b/sys/sys/ata.h
@@ -399,6 +399,7 @@ struct ata_params {
#define ATA_IDLE_CMD 0xe3 /* idle */
#define ATA_READ_BUFFER 0xe4 /* read buffer */
#define ATA_READ_PM 0xe4 /* read portmultiplier */
+#define ATA_CHECK_POWER_MODE 0xe5 /* device power mode */
#define ATA_SLEEP 0xe6 /* sleep */
#define ATA_FLUSHCACHE 0xe7 /* flush cache to disk */
#define ATA_WRITE_PM 0xe8 /* write portmultiplier */
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index 377a2e6..33635f5 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -13,8 +13,10 @@ SRCS= \
acpi.c \
bhyverun.c \
block_if.c \
+ bootrom.c \
consport.c \
dbgport.c \
+ fwctl.c \
inout.c \
ioapic.c \
mem.c \
diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8
index 37d1b86..6509fe7 100644
--- a/usr.sbin/bhyve/bhyve.8
+++ b/usr.sbin/bhyve/bhyve.8
@@ -32,7 +32,7 @@
.Nd "run a guest operating system inside a virtual machine"
.Sh SYNOPSIS
.Nm
-.Op Fl abehuwxACHPWY
+.Op Fl abehuwxACHPSWY
.Op Fl c Ar numcpus
.Op Fl g Ar gdbport
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
@@ -99,10 +99,12 @@ Yield the virtual CPU thread when a HLT instruction is detected.
If this option is not specified, virtual CPUs will use 100% of a host CPU.
.It Fl l Ar lpcdev Ns Op , Ns Ar conf
Allow devices behind the LPC PCI-ISA bridge to be configured.
-The only supported devices are the TTY-class devices,
-.Li com1
+The only supported devices are the TTY-class devices
+.Ar com1
and
-.Li com2 .
+.Ar com2
+and the boot ROM device
+.Ar bootrom .
.It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
Guest physical memory size in bytes.
This must be the same size that was given to
@@ -173,8 +175,8 @@ AHCI controller attached to a SATA hard-drive.
.It Li uart
PCI 16550 serial device.
.It Li lpc
-LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge
-emulation can only be configured on bus 0.
+LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
+The LPC bridge emulation can only be configured on bus 0.
.El
.It Op Ar conf
This optional parameter describes the backend for device emulations.
@@ -234,6 +236,14 @@ process.
Use the host TTY device for serial port I/O.
.El
.Pp
+Boot ROM device:
+.Bl -tag -width 10n
+.It Pa romfile
+Map
+.Ar romfile
+in the guest address space reserved for boot firmware.
+.El
+.Pp
Pass-through devices:
.Bl -tag -width 10n
.It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
@@ -245,11 +255,17 @@ and
numbers.
.El
.Pp
+Guest memory must be wired using the
+.Fl S
+option when a pass-through device is configured.
+.Pp
The host device must have been reserved at boot-time using the
.Va pptdev
loader variable as described in
.Xr vmm 4 .
.El
+.It Fl S
+Wire guest memory.
.It Fl u
RTC keeps UTC time.
.It Fl U Ar uuid
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index d281dc5..e46b81c 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <pthread.h>
#include <pthread_np.h>
#include <sysexits.h>
+#include <stdbool.h>
#include <machine/vmm.h>
#include <vmmapi.h>
@@ -55,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include "acpi.h"
#include "inout.h"
#include "dbgport.h"
+#include "fwctl.h"
#include "ioapic.h"
#include "mem.h"
#include "mevent.h"
@@ -122,7 +124,7 @@ usage(int code)
{
fprintf(stderr,
- "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
+ "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
@@ -137,6 +139,7 @@ usage(int code)
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
+ " -S: guest memory cannot be swapped\n"
" -u: RTC keeps UTC time\n"
" -U: uuid\n"
" -w: ignore unimplemented MSRs\n"
@@ -698,26 +701,82 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
}
+static struct vmctx *
+do_open(const char *vmname)
+{
+ struct vmctx *ctx;
+ int error;
+ bool reinit, romboot;
+
+ reinit = romboot = false;
+
+ if (lpc_bootrom())
+ romboot = true;
+
+ error = vm_create(vmname);
+ if (error) {
+ if (errno == EEXIST) {
+ if (romboot) {
+ reinit = true;
+ } else {
+ /*
+ * The virtual machine has been setup by the
+ * userspace bootloader.
+ */
+ }
+ } else {
+ perror("vm_create");
+ exit(1);
+ }
+ } else {
+ if (!romboot) {
+ /*
+ * If the virtual machine was just created then a
+ * bootrom must be configured to boot it.
+ */
+ fprintf(stderr, "virtual machine cannot be booted\n");
+ exit(1);
+ }
+ }
+
+ ctx = vm_open(vmname);
+ if (ctx == NULL) {
+ perror("vm_open");
+ exit(1);
+ }
+
+ if (reinit) {
+ error = vm_reinit(ctx);
+ if (error) {
+ perror("vm_reinit");
+ exit(1);
+ }
+ }
+ return (ctx);
+}
+
int
main(int argc, char *argv[])
{
int c, error, gdb_port, err, bvmcons;
- int dump_guest_memory, max_vcpus, mptgen;
+ int max_vcpus, mptgen, memflags;
int rtc_localtime;
struct vmctx *ctx;
uint64_t rip;
size_t memsize;
+ char *optstr;
bvmcons = 0;
- dump_guest_memory = 0;
progname = basename(argv[0]);
gdb_port = 0;
guest_ncpus = 1;
memsize = 256 * MB;
mptgen = 1;
rtc_localtime = 1;
+ memflags = 0;
- while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
+ optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
+ while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
x2apic_mode = 0;
@@ -738,7 +797,7 @@ main(int argc, char *argv[])
guest_ncpus = atoi(optarg);
break;
case 'C':
- dump_guest_memory = 1;
+ memflags |= VM_MEM_F_INCORE;
break;
case 'g':
gdb_port = atoi(optarg);
@@ -754,6 +813,9 @@ main(int argc, char *argv[])
exit(1);
else
break;
+ case 'S':
+ memflags |= VM_MEM_F_WIRED;
+ break;
case 'm':
error = vm_parse_memsize(optarg, &memsize);
if (error)
@@ -808,12 +870,7 @@ main(int argc, char *argv[])
usage(1);
vmname = argv[0];
-
- ctx = vm_open(vmname);
- if (ctx == NULL) {
- perror("vm_open");
- exit(1);
- }
+ ctx = do_open(vmname);
if (guest_ncpus < 1) {
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
@@ -829,11 +886,10 @@ main(int argc, char *argv[])
fbsdrun_set_capabilities(ctx, BSP);
- if (dump_guest_memory)
- vm_set_memflags(ctx, VM_MEM_F_INCORE);
+ vm_set_memflags(ctx, memflags);
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (err) {
- fprintf(stderr, "Unable to setup memory (%d)\n", err);
+ fprintf(stderr, "Unable to setup memory (%d)\n", errno);
exit(1);
}
@@ -863,6 +919,16 @@ main(int argc, char *argv[])
if (bvmcons)
init_bvmcons();
+ if (lpc_bootrom()) {
+ if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
+ fprintf(stderr, "ROM boot failed: unrestricted guest "
+ "capability not available\n");
+ exit(1);
+ }
+ error = vcpu_reset(ctx, BSP);
+ assert(error == 0);
+ }
+
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
assert(error == 0);
@@ -883,6 +949,9 @@ main(int argc, char *argv[])
assert(error == 0);
}
+ if (lpc_bootrom())
+ fwctl_init();
+
/*
* Change the proc title to include the VM name.
*/
diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c
new file mode 100644
index 0000000..5e4e0e9
--- /dev/null
+++ b/usr.sbin/bhyve/bootrom.c
@@ -0,0 +1,111 @@
+/*-
+ * Copyright (c) 2015 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+
+#include <machine/vmm.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdbool.h>
+
+#include <vmmapi.h>
+#include "bhyverun.h"
+#include "bootrom.h"
+
+#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */
+
+int
+bootrom_init(struct vmctx *ctx, const char *romfile)
+{
+ struct stat sbuf;
+ vm_paddr_t gpa;
+ ssize_t rlen;
+ char *ptr;
+ int fd, i, rv, prot;
+
+ rv = -1;
+ fd = open(romfile, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "Error opening bootrom \"%s\": %s\n",
+ romfile, strerror(errno));
+ goto done;
+ }
+
+ if (fstat(fd, &sbuf) < 0) {
+ fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n",
+ romfile, strerror(errno));
+ goto done;
+ }
+
+ /*
+ * Limit bootrom size to 16MB so it doesn't encroach into reserved
+ * MMIO space (e.g. APIC, HPET, MSI).
+ */
+ if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) {
+ fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size);
+ goto done;
+ }
+
+ if (sbuf.st_size & PAGE_MASK) {
+ fprintf(stderr, "Bootrom size %ld is not a multiple of the "
+ "page size\n", sbuf.st_size);
+ goto done;
+ }
+
+ ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size);
+ if (ptr == MAP_FAILED)
+ goto done;
+
+ /* Map the bootrom into the guest address space */
+ prot = PROT_READ | PROT_EXEC;
+ gpa = (1ULL << 32) - sbuf.st_size;
+ if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0)
+ goto done;
+
+ /* Read 'romfile' into the guest address space */
+ for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) {
+ rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE);
+ if (rlen != PAGE_SIZE) {
+ fprintf(stderr, "Incomplete read of page %d of bootrom "
+ "file %s: %ld bytes\n", i, romfile, rlen);
+ goto done;
+ }
+ }
+ rv = 0;
+done:
+ if (fd >= 0)
+ close(fd);
+ return (rv);
+}
diff --git a/usr.sbin/bhyve/bootrom.h b/usr.sbin/bhyve/bootrom.h
new file mode 100644
index 0000000..af150d3
--- /dev/null
+++ b/usr.sbin/bhyve/bootrom.h
@@ -0,0 +1,38 @@
+/*-
+ * Copyright (c) 2015 Neel Natu <neel@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _BOOTROM_H_
+#define _BOOTROM_H_
+
+#include <stdbool.h>
+
+struct vmctx;
+
+int bootrom_init(struct vmctx *ctx, const char *romfile);
+
+#endif
diff --git a/usr.sbin/bhyve/dbgport.c b/usr.sbin/bhyve/dbgport.c
index 534ae65..5be0ceb 100644
--- a/usr.sbin/bhyve/dbgport.c
+++ b/usr.sbin/bhyve/dbgport.c
@@ -116,6 +116,8 @@ SYSRES_IO(BVM_DBG_PORT, 4);
void
init_dbgport(int sport)
{
+ int reuse;
+
conn_fd = -1;
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
@@ -128,6 +130,13 @@ init_dbgport(int sport)
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = htons(sport);
+ reuse = 1;
+ if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &reuse,
+ sizeof(reuse)) < 0) {
+ perror("setsockopt");
+ exit(1);
+ }
+
if (bind(listen_fd, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
perror("bind");
exit(1);
diff --git a/usr.sbin/bhyve/fwctl.c b/usr.sbin/bhyve/fwctl.c
new file mode 100644
index 0000000..4b6164b
--- /dev/null
+++ b/usr.sbin/bhyve/fwctl.c
@@ -0,0 +1,549 @@
+/*-
+ * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Guest firmware interface. Uses i/o ports x510/x511 as Qemu does,
+ * but with a request/response messaging protocol.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "bhyverun.h"
+#include "inout.h"
+#include "fwctl.h"
+
+/*
+ * Messaging protocol base operations
+ */
+#define OP_NULL 1
+#define OP_ECHO 2
+#define OP_GET 3
+#define OP_GET_LEN 4
+#define OP_SET 5
+#define OP_MAX OP_SET
+
+/* I/O ports */
+#define FWCTL_OUT 0x510
+#define FWCTL_IN 0x511
+
+/*
+ * Back-end state-machine
+ */
+enum state {
+ DORMANT,
+ IDENT_WAIT,
+ IDENT_SEND,
+ REQ,
+ RESP
+} be_state = DORMANT;
+
+static uint8_t sig[] = { 'B', 'H', 'Y', 'V' };
+static u_int ident_idx;
+
+struct op_info {
+ int op;
+ int (*op_start)(int len);
+ void (*op_data)(uint32_t data, int len);
+ int (*op_result)(struct iovec **data);
+ void (*op_done)(struct iovec *data);
+};
+static struct op_info *ops[OP_MAX+1];
+
+/* Return 0-padded uint32_t */
+static uint32_t
+fwctl_send_rest(uint32_t *data, size_t len)
+{
+ union {
+ uint8_t c[4];
+ uint32_t w;
+ } u;
+ uint8_t *cdata;
+ int i;
+
+ cdata = (uint8_t *) data;
+ u.w = 0;
+
+ for (i = 0, u.w = 0; i < len; i++)
+ u.c[i] = *cdata++;
+
+ return (u.w);
+}
+
+/*
+ * error op dummy proto - drop all data sent and return an error
+*/
+static int errop_code;
+
+static void
+errop_set(int err)
+{
+
+ errop_code = err;
+}
+
+static int
+errop_start(int len)
+{
+ errop_code = ENOENT;
+
+ /* accept any length */
+ return (errop_code);
+}
+
+static void
+errop_data(uint32_t data, int len)
+{
+
+ /* ignore */
+}
+
+static int
+errop_result(struct iovec **data)
+{
+
+ /* no data to send back; always successful */
+ *data = NULL;
+ return (errop_code);
+}
+
+static void
+errop_done(struct iovec *data)
+{
+
+ /* assert data is NULL */
+}
+
+static struct op_info errop_info = {
+ .op_start = errop_start,
+ .op_data = errop_data,
+ .op_result = errop_result,
+ .op_done = errop_done
+};
+
+/* OID search */
+SET_DECLARE(ctl_set, struct ctl);
+
+CTL_NODE("hw.ncpu", &guest_ncpus, sizeof(guest_ncpus));
+
+static struct ctl *
+ctl_locate(const char *str, int maxlen)
+{
+ struct ctl *cp, **cpp;
+
+ SET_FOREACH(cpp, ctl_set) {
+ cp = *cpp;
+ if (!strncmp(str, cp->c_oid, maxlen))
+ return (cp);
+ }
+
+ return (NULL);
+}
+
+/* uefi-sysctl get-len */
+#define FGET_STRSZ 80
+static struct iovec fget_biov[2];
+static char fget_str[FGET_STRSZ];
+static struct {
+ size_t f_sz;
+ uint32_t f_data[1024];
+} fget_buf;
+static int fget_cnt;
+static size_t fget_size;
+
+static int
+fget_start(int len)
+{
+
+ if (len > FGET_STRSZ)
+ return(E2BIG);
+
+ fget_cnt = 0;
+
+ return (0);
+}
+
+static void
+fget_data(uint32_t data, int len)
+{
+
+ *((uint32_t *) &fget_str[fget_cnt]) = data;
+ fget_cnt += sizeof(uint32_t);
+}
+
+static int
+fget_result(struct iovec **data, int val)
+{
+ struct ctl *cp;
+ int err;
+
+ err = 0;
+
+ /* Locate the OID */
+ cp = ctl_locate(fget_str, fget_cnt);
+ if (cp == NULL) {
+ *data = NULL;
+ err = ENOENT;
+ } else {
+ if (val) {
+ /* For now, copy the len/data into a buffer */
+ memset(&fget_buf, 0, sizeof(fget_buf));
+ fget_buf.f_sz = cp->c_len;
+ memcpy(fget_buf.f_data, cp->c_data, cp->c_len);
+ fget_biov[0].iov_base = (char *)&fget_buf;
+ fget_biov[0].iov_len = sizeof(fget_buf.f_sz) +
+ cp->c_len;
+ } else {
+ fget_size = cp->c_len;
+ fget_biov[0].iov_base = (char *)&fget_size;
+ fget_biov[0].iov_len = sizeof(fget_size);
+ }
+
+ fget_biov[1].iov_base = NULL;
+ fget_biov[1].iov_len = 0;
+ *data = fget_biov;
+ }
+
+ return (err);
+}
+
+static void
+fget_done(struct iovec *data)
+{
+
+ /* nothing needs to be freed */
+}
+
+static int
+fget_len_result(struct iovec **data)
+{
+ return (fget_result(data, 0));
+}
+
+static int
+fget_val_result(struct iovec **data)
+{
+ return (fget_result(data, 1));
+}
+
+static struct op_info fgetlen_info = {
+ .op_start = fget_start,
+ .op_data = fget_data,
+ .op_result = fget_len_result,
+ .op_done = fget_done
+};
+
+static struct op_info fgetval_info = {
+ .op_start = fget_start,
+ .op_data = fget_data,
+ .op_result = fget_val_result,
+ .op_done = fget_done
+};
+
+static struct req_info {
+ int req_error;
+ u_int req_count;
+ uint32_t req_size;
+ uint32_t req_type;
+ uint32_t req_txid;
+ struct op_info *req_op;
+ int resp_error;
+ int resp_count;
+ int resp_size;
+ int resp_off;
+ struct iovec *resp_biov;
+} rinfo;
+
+static void
+fwctl_response_done(void)
+{
+
+ (*rinfo.req_op->op_done)(rinfo.resp_biov);
+
+ /* reinit the req data struct */
+ memset(&rinfo, 0, sizeof(rinfo));
+}
+
+static void
+fwctl_request_done(void)
+{
+
+ rinfo.resp_error = (*rinfo.req_op->op_result)(&rinfo.resp_biov);
+
+ /* XXX only a single vector supported at the moment */
+ rinfo.resp_off = 0;
+ if (rinfo.resp_biov == NULL) {
+ rinfo.resp_size = 0;
+ } else {
+ rinfo.resp_size = rinfo.resp_biov[0].iov_len;
+ }
+}
+
+static int
+fwctl_request_start(void)
+{
+ int err;
+
+ /* Data size doesn't include header */
+ rinfo.req_size -= 12;
+
+ rinfo.req_op = &errop_info;
+ if (rinfo.req_type <= OP_MAX && ops[rinfo.req_type] != NULL)
+ rinfo.req_op = ops[rinfo.req_type];
+
+ err = (*rinfo.req_op->op_start)(rinfo.req_size);
+
+ if (err) {
+ errop_set(err);
+ rinfo.req_op = &errop_info;
+ }
+
+ /* Catch case of zero-length message here */
+ if (rinfo.req_size == 0) {
+ fwctl_request_done();
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+fwctl_request_data(uint32_t value)
+{
+ int remlen;
+
+ /* Make sure remaining size is >= 0 */
+ rinfo.req_size -= sizeof(uint32_t);
+ remlen = (rinfo.req_size > 0) ? rinfo.req_size: 0;
+
+ (*rinfo.req_op->op_data)(value, remlen);
+
+ if (rinfo.req_size < sizeof(uint32_t)) {
+ fwctl_request_done();
+ return (1);
+ }
+
+ return (0);
+}
+
+static int
+fwctl_request(uint32_t value)
+{
+
+ int ret;
+
+ ret = 0;
+
+ switch (rinfo.req_count) {
+ case 0:
+ /* Verify size */
+ if (value < 12) {
+ printf("msg size error");
+ exit(1);
+ }
+ rinfo.req_size = value;
+ rinfo.req_count = 1;
+ break;
+ case 1:
+ rinfo.req_type = value;
+ rinfo.req_count++;
+ break;
+ case 2:
+ rinfo.req_txid = value;
+ rinfo.req_count++;
+ ret = fwctl_request_start();
+ break;
+ default:
+ ret = fwctl_request_data(value);
+ break;
+ }
+
+ return (ret);
+}
+
+static int
+fwctl_response(uint32_t *retval)
+{
+ uint32_t *dp;
+ int remlen;
+
+ switch(rinfo.resp_count) {
+ case 0:
+ /* 4 x u32 header len + data */
+ *retval = 4*sizeof(uint32_t) +
+ roundup(rinfo.resp_size, sizeof(uint32_t));
+ rinfo.resp_count++;
+ break;
+ case 1:
+ *retval = rinfo.req_type;
+ rinfo.resp_count++;
+ break;
+ case 2:
+ *retval = rinfo.req_txid;
+ rinfo.resp_count++;
+ break;
+ case 3:
+ *retval = rinfo.resp_error;
+ rinfo.resp_count++;
+ break;
+ default:
+ remlen = rinfo.resp_size - rinfo.resp_off;
+ dp = (uint32_t *)
+ ((uint8_t *)rinfo.resp_biov->iov_base + rinfo.resp_off);
+ if (remlen >= sizeof(uint32_t)) {
+ *retval = *dp;
+ } else if (remlen > 0) {
+ *retval = fwctl_send_rest(dp, remlen);
+ }
+ rinfo.resp_off += sizeof(uint32_t);
+ break;
+ }
+
+ if (rinfo.resp_count > 3 &&
+ rinfo.resp_size - rinfo.resp_off <= 0) {
+ fwctl_response_done();
+ return (1);
+ }
+
+ return (0);
+}
+
+
+/*
+ * i/o port handling.
+ */
+static uint8_t
+fwctl_inb(void)
+{
+ uint8_t retval;
+
+ retval = 0xff;
+
+ switch (be_state) {
+ case IDENT_SEND:
+ retval = sig[ident_idx++];
+ if (ident_idx >= sizeof(sig))
+ be_state = REQ;
+ break;
+ default:
+ break;
+ }
+
+ return (retval);
+}
+
+static void
+fwctl_outw(uint16_t val)
+{
+ switch (be_state) {
+ case IDENT_WAIT:
+ if (val == 0) {
+ be_state = IDENT_SEND;
+ ident_idx = 0;
+ }
+ break;
+ default:
+ /* ignore */
+ break;
+ }
+}
+
+static uint32_t
+fwctl_inl(void)
+{
+ uint32_t retval;
+
+ switch (be_state) {
+ case RESP:
+ if (fwctl_response(&retval))
+ be_state = REQ;
+ break;
+ default:
+ retval = 0xffffffff;
+ break;
+ }
+
+ return (retval);
+}
+
+static void
+fwctl_outl(uint32_t val)
+{
+
+ switch (be_state) {
+ case REQ:
+ if (fwctl_request(val))
+ be_state = RESP;
+ default:
+ break;
+ }
+
+}
+
+static int
+fwctl_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
+ uint32_t *eax, void *arg)
+{
+
+ if (in) {
+ if (bytes == 1)
+ *eax = fwctl_inb();
+ else if (bytes == 4)
+ *eax = fwctl_inl();
+ else
+ *eax = 0xffff;
+ } else {
+ if (bytes == 2)
+ fwctl_outw(*eax);
+ else if (bytes == 4)
+ fwctl_outl(*eax);
+ }
+
+ return (0);
+}
+INOUT_PORT(fwctl_wreg, FWCTL_OUT, IOPORT_F_INOUT, fwctl_handler);
+INOUT_PORT(fwctl_rreg, FWCTL_IN, IOPORT_F_IN, fwctl_handler);
+
+void
+fwctl_init(void)
+{
+
+ ops[OP_GET_LEN] = &fgetlen_info;
+ ops[OP_GET] = &fgetval_info;
+
+ be_state = IDENT_WAIT;
+}
diff --git a/usr.sbin/bhyve/fwctl.h b/usr.sbin/bhyve/fwctl.h
new file mode 100644
index 0000000..f5f8d13
--- /dev/null
+++ b/usr.sbin/bhyve/fwctl.h
@@ -0,0 +1,54 @@
+/*-
+ * Copyright (c) 2015 Peter Grehan <grehan@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _FWCTL_H_
+#define _FWCTL_H_
+
+#include <sys/linker_set.h>
+
+/*
+ * Linker set api for export of information to guest firmware via
+ * a sysctl-like OID interface
+ */
+struct ctl {
+ const char *c_oid;
+ const void *c_data;
+ const int c_len;
+};
+
+#define CTL_NODE(oid, data, len) \
+ static struct ctl __CONCAT(__ctl, __LINE__) = { \
+ oid, \
+ (data), \
+ (len), \
+ }; \
+ DATA_SET(ctl_set, __CONCAT(__ctl, __LINE__))
+
+void fwctl_init(void);
+
+#endif /* _FWCTL_H_ */
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index 35a0859..5c4743c 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -926,7 +926,7 @@ handle_identify(struct ahci_port *p, int slot, uint8_t *cfis)
ata_string((uint8_t *)(buf+23), "001", 8);
ata_string((uint8_t *)(buf+27), "BHYVE SATA DISK", 40);
buf[47] = (0x8000 | 128);
- buf[48] = 0x1;
+ buf[48] = 0;
buf[49] = (1 << 8 | 1 << 9 | 1 << 11);
buf[50] = (1 << 14);
buf[53] = (1 << 1 | 1 << 2);
@@ -1683,15 +1683,23 @@ ahci_handle_cmd(struct ahci_port *p, int slot, uint8_t *cfis)
case ATA_READ_LOG_DMA_EXT:
ahci_handle_read_log(p, slot, cfis);
break;
+ case ATA_SECURITY_FREEZE_LOCK:
+ case ATA_SMART_CMD:
case ATA_NOP:
ahci_write_fis_d2h(p, slot, cfis,
(ATA_E_ABORT << 8) | ATA_S_READY | ATA_S_ERROR);
break;
+ case ATA_CHECK_POWER_MODE:
+ cfis[12] = 0xff; /* always on */
+ ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
+ break;
case ATA_STANDBY_CMD:
case ATA_STANDBY_IMMEDIATE:
case ATA_IDLE_CMD:
case ATA_IDLE_IMMEDIATE:
case ATA_SLEEP:
+ case ATA_READ_VERIFY:
+ case ATA_READ_VERIFY48:
ahci_write_fis_d2h(p, slot, cfis, ATA_S_READY | ATA_S_DSC);
break;
case ATA_ATAPI_IDENTIFY:
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
index e98b141..2203a00 100644
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "acpi.h"
+#include "bootrom.h"
#include "inout.h"
#include "pci_emul.h"
#include "pci_irq.h"
@@ -62,6 +63,8 @@ SYSRES_IO(NMISC_PORT, 1);
static struct pci_devinst *lpc_bridge;
+static const char *romfile;
+
#define LPC_UART_NUM 2
static struct lpc_uart_softc {
struct uart_softc *uart_softc;
@@ -76,7 +79,7 @@ static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
- * For e.g. "com1,stdio"
+ * For e.g. "com1,stdio" or "bootrom,/var/romfile"
*/
int
lpc_device_parse(const char *opts)
@@ -88,6 +91,11 @@ lpc_device_parse(const char *opts)
str = cpy = strdup(opts);
lpcdev = strsep(&str, ",");
if (lpcdev != NULL) {
+ if (strcasecmp(lpcdev, "bootrom") == 0) {
+ romfile = str;
+ error = 0;
+ goto done;
+ }
for (unit = 0; unit < LPC_UART_NUM; unit++) {
if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
lpc_uart_softc[unit].opts = str;
@@ -104,6 +112,13 @@ done:
return (error);
}
+const char *
+lpc_bootrom(void)
+{
+
+ return (romfile);
+}
+
static void
lpc_uart_intr_assert(void *arg)
{
@@ -156,13 +171,19 @@ lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
}
static int
-lpc_init(void)
+lpc_init(struct vmctx *ctx)
{
struct lpc_uart_softc *sc;
struct inout_port iop;
const char *name;
int unit, error;
+ if (romfile != NULL) {
+ error = bootrom_init(ctx, romfile);
+ if (error)
+ return (error);
+ }
+
/* COM1 and COM2 */
for (unit = 0; unit < LPC_UART_NUM; unit++) {
sc = &lpc_uart_softc[unit];
@@ -379,7 +400,7 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (-1);
}
- if (lpc_init() != 0)
+ if (lpc_init(ctx) != 0)
return (-1);
/* initialize config space */
diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h
index 55a5865..431f5cf 100644
--- a/usr.sbin/bhyve/pci_lpc.h
+++ b/usr.sbin/bhyve/pci_lpc.h
@@ -68,5 +68,6 @@ struct lpc_sysres {
int lpc_device_parse(const char *opt);
char *lpc_pirq_name(int pin);
void lpc_pirq_routed(void);
+const char *lpc_bootrom(void);
#endif
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
index 04d68c4..5b52b05 100644
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -548,12 +548,18 @@ done:
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
- int bus, slot, func, error;
+ int bus, slot, func, error, memflags;
struct passthru_softc *sc;
sc = NULL;
error = 1;
+ memflags = vm_get_memflags(ctx);
+ if (!(memflags & VM_MEM_F_WIRED)) {
+ fprintf(stderr, "passthru requires guest memory to be wired\n");
+ goto done;
+ }
+
if (pcifd < 0) {
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
if (pcifd < 0)
diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
index c13940b..18ff908 100644
--- a/usr.sbin/bhyve/pci_virtio_net.c
+++ b/usr.sbin/bhyve/pci_virtio_net.c
@@ -61,7 +61,7 @@ __FBSDID("$FreeBSD$");
#define VTNET_RINGSZ 1024
-#define VTNET_MAXSEGS 32
+#define VTNET_MAXSEGS 256
/*
* Host capabilities. Note that we only offer a few of these.
@@ -88,7 +88,7 @@ __FBSDID("$FreeBSD$");
#define VTNET_S_HOSTCAPS \
( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
- VIRTIO_F_NOTIFY_ON_EMPTY)
+ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
/*
* PCI config-space "registers"
diff --git a/usr.sbin/bhyve/uart_emul.c b/usr.sbin/bhyve/uart_emul.c
index 4242e5c..538bf58 100644
--- a/usr.sbin/bhyve/uart_emul.c
+++ b/usr.sbin/bhyve/uart_emul.c
@@ -272,6 +272,37 @@ uart_opentty(struct uart_softc *sc)
assert(sc->mev != NULL);
}
+static uint8_t
+modem_status(uint8_t mcr)
+{
+ uint8_t msr;
+
+ if (mcr & MCR_LOOPBACK) {
+ /*
+ * In the loopback mode certain bits from the MCR are
+ * reflected back into MSR.
+ */
+ msr = 0;
+ if (mcr & MCR_RTS)
+ msr |= MSR_CTS;
+ if (mcr & MCR_DTR)
+ msr |= MSR_DSR;
+ if (mcr & MCR_OUT1)
+ msr |= MSR_RI;
+ if (mcr & MCR_OUT2)
+ msr |= MSR_DCD;
+ } else {
+ /*
+ * Always assert DCD and DSR so tty open doesn't block
+ * even if CLOCAL is turned off.
+ */
+ msr = MSR_DCD | MSR_DSR;
+ }
+ assert((msr & MSR_DELTA_MASK) == 0);
+
+ return (msr);
+}
+
/*
* The IIR returns a prioritized interrupt reason:
* - receive data available
@@ -304,6 +335,7 @@ uart_reset(struct uart_softc *sc)
divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
sc->dll = divisor;
sc->dlh = divisor >> 16;
+ sc->msr = modem_status(sc->mcr);
rxfifo_reset(sc, 1); /* no fifo until enabled by software */
}
@@ -363,7 +395,7 @@ uart_write(struct uart_softc *sc, int offset, uint8_t value)
uint8_t msr;
pthread_mutex_lock(&sc->mtx);
-
+
/*
* Take care of the special case DLAB accesses first
*/
@@ -426,22 +458,7 @@ uart_write(struct uart_softc *sc, int offset, uint8_t value)
case REG_MCR:
/* Apply mask so that bits 5-7 are 0 */
sc->mcr = value & 0x1F;
-
- msr = 0;
- if (sc->mcr & MCR_LOOPBACK) {
- /*
- * In the loopback mode certain bits from the
- * MCR are reflected back into MSR
- */
- if (sc->mcr & MCR_RTS)
- msr |= MSR_CTS;
- if (sc->mcr & MCR_DTR)
- msr |= MSR_DSR;
- if (sc->mcr & MCR_OUT1)
- msr |= MSR_RI;
- if (sc->mcr & MCR_OUT2)
- msr |= MSR_DCD;
- }
+ msr = modem_status(sc->mcr);
/*
* Detect if there has been any change between the
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 7d3017f..51bade1 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/errno.h>
#include <sys/mman.h>
+#include <sys/cpuset.h>
#include <stdio.h>
#include <stdlib.h>
@@ -47,10 +48,12 @@ __FBSDID("$FreeBSD$");
#include <getopt.h>
#include <time.h>
#include <assert.h>
+#include <libutil.h>
#include <machine/cpufunc.h>
-#include <machine/vmm.h>
#include <machine/specialreg.h>
+#include <machine/vmm.h>
+#include <machine/vmm_dev.h>
#include <vmmapi.h>
#include "amd/vmcb.h"
@@ -236,7 +239,7 @@ static int get_stats, getcap, setcap, capval, get_gpa_pmap;
static int inject_nmi, assert_lapic_lvt;
static int force_reset, force_poweroff;
static const char *capname;
-static int create, destroy, get_lowmem, get_highmem;
+static int create, destroy, get_memmap, get_memseg;
static int get_intinfo;
static int get_active_cpus, get_suspended_cpus;
static uint64_t memsize;
@@ -1320,8 +1323,8 @@ setup_options(bool cpu_intel)
{ "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 },
{ "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 },
{ "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 },
- { "get-lowmem", NO_ARG, &get_lowmem, 1 },
- { "get-highmem",NO_ARG, &get_highmem, 1 },
+ { "get-memmap", NO_ARG, &get_memmap, 1 },
+ { "get-memseg", NO_ARG, &get_memseg, 1 },
{ "get-efer", NO_ARG, &get_efer, 1 },
{ "get-cr0", NO_ARG, &get_cr0, 1 },
{ "get-cr3", NO_ARG, &get_cr3, 1 },
@@ -1520,18 +1523,92 @@ mon_str(int idx)
return ("UNK");
}
+static int
+show_memmap(struct vmctx *ctx)
+{
+ char name[SPECNAMELEN + 1], numbuf[8];
+ vm_ooffset_t segoff;
+ vm_paddr_t gpa;
+ size_t maplen, seglen;
+ int error, flags, prot, segid, delim;
+
+ printf("Address Length Segment Offset ");
+ printf("Prot Flags\n");
+
+ gpa = 0;
+ while (1) {
+ error = vm_mmap_getnext(ctx, &gpa, &segid, &segoff, &maplen,
+ &prot, &flags);
+ if (error)
+ return (errno == ENOENT ? 0 : error);
+
+ error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
+ if (error)
+ return (error);
+
+ printf("%-12lX", gpa);
+ humanize_number(numbuf, sizeof(numbuf), maplen, "B",
+ HN_AUTOSCALE, HN_NOSPACE);
+ printf("%-12s", numbuf);
+
+ printf("%-12s", name[0] ? name : "sysmem");
+ printf("%-12lX", segoff);
+ printf("%c%c%c ", prot & PROT_READ ? 'R' : '-',
+ prot & PROT_WRITE ? 'W' : '-',
+ prot & PROT_EXEC ? 'X' : '-');
+
+ delim = '\0';
+ if (flags & VM_MEMMAP_F_WIRED) {
+ printf("%cwired", delim);
+ delim = '/';
+ }
+ if (flags & VM_MEMMAP_F_IOMMU) {
+ printf("%ciommu", delim);
+ delim = '/';
+ }
+ printf("\n");
+
+ gpa += maplen;
+ }
+}
+
+static int
+show_memseg(struct vmctx *ctx)
+{
+ char name[SPECNAMELEN + 1], numbuf[8];
+ size_t seglen;
+ int error, segid;
+
+ printf("ID Length Name\n");
+
+ segid = 0;
+ while (1) {
+ error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
+ if (error)
+ return (errno == EINVAL ? 0 : error);
+
+ if (seglen) {
+ printf("%-4d", segid);
+ humanize_number(numbuf, sizeof(numbuf), seglen, "B",
+ HN_AUTOSCALE, HN_NOSPACE);
+ printf("%-12s", numbuf);
+ printf("%s", name[0] ? name : "sysmem");
+ printf("\n");
+ }
+ segid++;
+ }
+}
+
int
main(int argc, char *argv[])
{
char *vmname;
int error, ch, vcpu, ptenum;
- vm_paddr_t gpa, gpa_pmap;
- size_t len;
+ vm_paddr_t gpa_pmap;
struct vm_exit vmexit;
uint64_t rax, cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat;
uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2];
struct vmctx *ctx;
- int wired;
cpuset_t cpus;
bool cpu_intel;
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
@@ -1703,7 +1780,7 @@ main(int argc, char *argv[])
}
if (!error && memsize)
- error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE);
+ error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (!error && set_efer)
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer);
@@ -1838,21 +1915,11 @@ main(int argc, char *argv[])
error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
}
- if (!error && (get_lowmem || get_all)) {
- gpa = 0;
- error = vm_get_memory_seg(ctx, gpa, &len, &wired);
- if (error == 0)
- printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len,
- wired ? " wired" : "");
- }
+ if (!error && (get_memseg || get_all))
+ error = show_memseg(ctx);
- if (!error && (get_highmem || get_all)) {
- gpa = 4 * GB;
- error = vm_get_memory_seg(ctx, gpa, &len, &wired);
- if (error == 0)
- printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len,
- wired ? " wired" : "");
- }
+ if (!error && (get_memmap || get_all))
+ error = show_memmap(ctx);
if (!error)
error = get_all_registers(ctx, vcpu);
diff --git a/usr.sbin/bhyveload/bhyveload.8 b/usr.sbin/bhyveload/bhyveload.8
index 0bdf151..62b6b27 100644
--- a/usr.sbin/bhyveload/bhyveload.8
+++ b/usr.sbin/bhyveload/bhyveload.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd January 7, 2012
+.Dd October 7, 2015
.Dt BHYVELOAD 8
.Os
.Sh NAME
@@ -35,10 +35,12 @@
guest inside a bhyve virtual machine
.Sh SYNOPSIS
.Nm
+.Op Fl S
.Op Fl c Ar cons-dev
.Op Fl d Ar disk-path
.Op Fl e Ar name=value
.Op Fl h Ar host-path
+.Op Fl l Ar os-loader
.Op Fl m Ar mem-size
.Ar vmname
.Sh DESCRIPTION
@@ -55,6 +57,7 @@ is based on
and will present an interface identical to the
.Fx
loader on the user's terminal.
+This behavior can be changed by specifying a different OS loader.
.Pp
The virtual machine is identified as
.Ar vmname
@@ -77,7 +80,9 @@ The
.Ar disk-path
is the pathname of the guest's boot disk image.
.It Fl e Ar name=value
-Set the FreeBSD loader environment variable
+Set the
+.Fx
+loader environment variable
.Ar name
to
.Ar value .
@@ -88,6 +93,15 @@ variable.
The
.Ar host-path
is the directory at the top of the guest's boot filesystem.
+.It Fl l Ar os-loader
+Specify a different OS loader.
+By default
+.Nm
+will use
+.Pa /boot/userboot.so ,
+which presents a standard
+.Fx
+loader.
.It Fl m Ar mem-size Xo
.Sm off
.Op Cm K | k | M | m | G | g | T | t
@@ -111,8 +125,10 @@ respectively.
The default value of
.Ar mem-size
is 256M.
-.El
+.It Fl S
+Wire guest memory.
.Sh EXAMPLES
+.El
To create a virtual machine named
.Ar freebsd-vm
that boots off the ISO image
diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c
index 8ebf116..badf5f4 100644
--- a/usr.sbin/bhyveload/bhyveload.c
+++ b/usr.sbin/bhyveload/bhyveload.c
@@ -629,7 +629,7 @@ usage(void)
{
fprintf(stderr,
- "usage: %s [-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
+ "usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
" %*s [-h <host-path>] [-m mem-size] <vmname>\n",
progname,
(int)strlen(progname), "");
@@ -639,19 +639,23 @@ usage(void)
int
main(int argc, char** argv)
{
+ char *loader;
void *h;
void (*func)(struct loader_callbacks *, void *, int, int);
uint64_t mem_size;
- int opt, error, need_reinit;
+ int opt, error, need_reinit, memflags;
progname = basename(argv[0]);
+ loader = NULL;
+
+ memflags = 0;
mem_size = 256 * MB;
consin_fd = STDIN_FILENO;
consout_fd = STDOUT_FILENO;
- while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "Sc:d:e:h:l:m:")) != -1) {
switch (opt) {
case 'c':
error = altcons_open(optarg);
@@ -673,11 +677,22 @@ main(int argc, char** argv)
host_base = optarg;
break;
+ case 'l':
+ if (loader != NULL)
+ errx(EX_USAGE, "-l can only be given once");
+ loader = strdup(optarg);
+ if (loader == NULL)
+ err(EX_OSERR, "malloc");
+ break;
+
case 'm':
error = vm_parse_memsize(optarg, &mem_size);
if (error != 0)
errx(EX_USAGE, "Invalid memsize '%s'", optarg);
break;
+ case 'S':
+ memflags |= VM_MEM_F_WIRED;
+ break;
case '?':
usage();
}
@@ -715,32 +730,43 @@ main(int argc, char** argv)
}
}
+ vm_set_memflags(ctx, memflags);
error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
if (error) {
perror("vm_setup_memory");
exit(1);
}
- tcgetattr(consout_fd, &term);
- oldterm = term;
- cfmakeraw(&term);
- term.c_cflag |= CLOCAL;
-
- tcsetattr(consout_fd, TCSAFLUSH, &term);
-
- h = dlopen("/boot/userboot.so", RTLD_LOCAL);
+ if (loader == NULL) {
+ loader = strdup("/boot/userboot.so");
+ if (loader == NULL)
+ err(EX_OSERR, "malloc");
+ }
+ h = dlopen(loader, RTLD_LOCAL);
if (!h) {
printf("%s\n", dlerror());
+ free(loader);
return (1);
}
func = dlsym(h, "loader_main");
if (!func) {
printf("%s\n", dlerror());
+ free(loader);
return (1);
}
+ tcgetattr(consout_fd, &term);
+ oldterm = term;
+ cfmakeraw(&term);
+ term.c_cflag |= CLOCAL;
+
+ tcsetattr(consout_fd, TCSAFLUSH, &term);
+
addenv("smbios.bios.vendor=BHYVE");
addenv("boot_serial=1");
func(&cb, NULL, USERBOOT_VERSION_3, ndisks);
+
+ free(loader);
+ return (0);
}
OpenPOWER on IntegriCloud