summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/libvmmapi/vmmapi.c323
-rw-r--r--lib/libvmmapi/vmmapi.h52
2 files changed, 317 insertions, 58 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
index 1e6e627..fb8eb78 100644
--- a/lib/libvmmapi/vmmapi.c
+++ b/lib/libvmmapi/vmmapi.c
@@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$");
#define MB (1024 * 1024UL)
#define GB (1024 * 1024 * 1024UL)
+/*
+ * Size of the guard region before and after the virtual address space
+ * mapping the guest physical memory. This must be a multiple of the
+ * superpage size for performance reasons.
+ */
+#define VM_MMAP_GUARD_SIZE (4 * MB)
+
+#define PROT_RW (PROT_READ | PROT_WRITE)
+#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
+
struct vmctx {
int fd;
uint32_t lowmem_limit;
- enum vm_mmap_style vms;
int memflags;
size_t lowmem;
- char *lowmem_addr;
size_t highmem;
- char *highmem_addr;
+ char *baseaddr;
char *name;
};
@@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, size_t *ret_memsize)
return (error);
}
-int
-vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
- int *wired)
-{
- int error;
- struct vm_memory_segment seg;
-
- bzero(&seg, sizeof(seg));
- seg.gpa = gpa;
- error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
- *ret_len = seg.len;
- if (wired != NULL)
- *wired = seg.wired;
- return (error);
-}
-
uint32_t
vm_get_lowmem_limit(struct vmctx *ctx)
{
@@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int flags)
ctx->memflags = flags;
}
+int
+vm_get_memflags(struct vmctx *ctx)
+{
+
+ return (ctx->memflags);
+}
+
+/*
+ * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
+ */
+int
+vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
+ size_t len, int prot)
+{
+ struct vm_memmap memmap;
+ int error, flags;
+
+ memmap.gpa = gpa;
+ memmap.segid = segid;
+ memmap.segoff = off;
+ memmap.len = len;
+ memmap.prot = prot;
+ memmap.flags = 0;
+
+ if (ctx->memflags & VM_MEM_F_WIRED)
+ memmap.flags |= VM_MEMMAP_F_WIRED;
+
+ /*
+ * If this mapping already exists then don't create it again. This
+ * is the common case for SYSMEM mappings created by bhyveload(8).
+ */
+ error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
+ if (error == 0 && gpa == memmap.gpa) {
+ if (segid != memmap.segid || off != memmap.segoff ||
+ prot != memmap.prot || flags != memmap.flags) {
+ errno = EEXIST;
+ return (-1);
+ } else {
+ return (0);
+ }
+ }
+
+ error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
+ return (error);
+}
+
+int
+vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
+{
+ struct vm_memmap memmap;
+ int error;
+
+ bzero(&memmap, sizeof(struct vm_memmap));
+ memmap.gpa = *gpa;
+ error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
+ if (error == 0) {
+ *gpa = memmap.gpa;
+ *segid = memmap.segid;
+ *segoff = memmap.segoff;
+ *len = memmap.len;
+ *prot = memmap.prot;
+ *flags = memmap.flags;
+ }
+ return (error);
+}
+
+/*
+ * Return 0 if the segments are identical and non-zero otherwise.
+ *
+ * This is slightly complicated by the fact that only device memory segments
+ * are named.
+ */
+static int
+cmpseg(size_t len, const char *str, size_t len2, const char *str2)
+{
+
+ if (len == len2) {
+ if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
+ return (0);
+ }
+ return (-1);
+}
+
static int
-setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
+vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
{
- int error, mmap_flags;
- struct vm_memory_segment seg;
+ struct vm_memseg memseg;
+ size_t n;
+ int error;
/*
- * Create and optionally map 'len' bytes of memory at guest
- * physical address 'gpa'
+ * If the memory segment has already been created then just return.
+ * This is the usual case for the SYSMEM segment created by userspace
+ * loaders like bhyveload(8).
*/
- bzero(&seg, sizeof(seg));
- seg.gpa = gpa;
- seg.len = len;
- error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
- if (error == 0 && addr != NULL) {
- mmap_flags = MAP_SHARED;
- if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
- mmap_flags |= MAP_NOCORE;
- *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
- ctx->fd, gpa);
+ error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
+ sizeof(memseg.name));
+ if (error)
+ return (error);
+
+ if (memseg.len != 0) {
+ if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
+ errno = EINVAL;
+ return (-1);
+ } else {
+ return (0);
+ }
+ }
+
+ bzero(&memseg, sizeof(struct vm_memseg));
+ memseg.segid = segid;
+ memseg.len = len;
+ if (name != NULL) {
+ n = strlcpy(memseg.name, name, sizeof(memseg.name));
+ if (n >= sizeof(memseg.name)) {
+ errno = ENAMETOOLONG;
+ return (-1);
+ }
}
+
+ error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
return (error);
}
int
-vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
+ size_t bufsize)
{
- char **addr;
+ struct vm_memseg memseg;
+ size_t n;
int error;
- /* XXX VM_MMAP_SPARSE not implemented yet */
- assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
- ctx->vms = vms;
+ memseg.segid = segid;
+ error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
+ if (error == 0) {
+ *lenp = memseg.len;
+ n = strlcpy(namebuf, memseg.name, bufsize);
+ if (n >= bufsize) {
+ errno = ENAMETOOLONG;
+ error = -1;
+ }
+ }
+ return (error);
+}
+
+static int
+setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
+{
+ char *ptr;
+ int error, flags;
+
+ /* Map 'len' bytes starting at 'gpa' in the guest address space */
+ error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
+ if (error)
+ return (error);
+
+ flags = MAP_SHARED | MAP_FIXED;
+ if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
+ flags |= MAP_NOCORE;
+
+ /* mmap into the process address space on the host */
+ ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
+ if (ptr == MAP_FAILED)
+ return (-1);
+
+ return (0);
+}
+
+int
+vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
+{
+ size_t objsize, len;
+ vm_paddr_t gpa;
+ char *baseaddr, *ptr;
+ int error, flags;
+
+ assert(vms == VM_MMAP_ALL);
/*
* If 'memsize' cannot fit entirely in the 'lowmem' segment then
@@ -234,43 +371,69 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
*/
if (memsize > ctx->lowmem_limit) {
ctx->lowmem = ctx->lowmem_limit;
- ctx->highmem = memsize - ctx->lowmem;
+ ctx->highmem = memsize - ctx->lowmem_limit;
+ objsize = 4*GB + ctx->highmem;
} else {
ctx->lowmem = memsize;
ctx->highmem = 0;
+ objsize = ctx->lowmem;
}
- if (ctx->lowmem > 0) {
- addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
- error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
+ error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
+ if (error)
+ return (error);
+
+ /*
+ * Stake out a contiguous region covering the guest physical memory
+ * and the adjoining guard regions.
+ */
+ len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
+ flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
+ ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
+ if (ptr == MAP_FAILED)
+ return (-1);
+
+ baseaddr = ptr + VM_MMAP_GUARD_SIZE;
+ if (ctx->highmem > 0) {
+ gpa = 4*GB;
+ len = ctx->highmem;
+ error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
- if (ctx->highmem > 0) {
- addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
- error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
+ if (ctx->lowmem > 0) {
+ gpa = 0;
+ len = ctx->lowmem;
+ error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
+ ctx->baseaddr = baseaddr;
+
return (0);
}
+/*
+ * Returns a non-NULL pointer if [gaddr, gaddr+len) is entirely contained in
+ * the lowmem or highmem regions.
+ *
+ * In particular return NULL if [gaddr, gaddr+len) falls in guest MMIO region.
+ * The instruction emulation code depends on this behavior.
+ */
void *
vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
{
- /* XXX VM_MMAP_SPARSE not implemented yet */
- assert(ctx->vms == VM_MMAP_ALL);
-
- if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
- return ((void *)(ctx->lowmem_addr + gaddr));
+ if (ctx->lowmem > 0) {
+ if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
+ return (ctx->baseaddr + gaddr);
+ }
- if (gaddr >= 4*GB) {
- gaddr -= 4*GB;
- if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
- return ((void *)(ctx->highmem_addr + gaddr));
+ if (ctx->highmem > 0) {
+ if (gaddr >= 4*GB && gaddr + len <= 4*GB + ctx->highmem)
+ return (ctx->baseaddr + gaddr);
}
return (NULL);
@@ -290,6 +453,56 @@ vm_get_highmem_size(struct vmctx *ctx)
return (ctx->highmem);
}
+void *
+vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
+{
+ char pathname[MAXPATHLEN];
+ size_t len2;
+ char *base, *ptr;
+ int fd, error, flags;
+
+ fd = -1;
+ ptr = MAP_FAILED;
+ if (name == NULL || strlen(name) == 0) {
+ errno = EINVAL;
+ goto done;
+ }
+
+ error = vm_alloc_memseg(ctx, segid, len, name);
+ if (error)
+ goto done;
+
+ strlcpy(pathname, "/dev/vmm.io/", sizeof(pathname));
+ strlcat(pathname, ctx->name, sizeof(pathname));
+ strlcat(pathname, ".", sizeof(pathname));
+ strlcat(pathname, name, sizeof(pathname));
+
+ fd = open(pathname, O_RDWR);
+ if (fd < 0)
+ goto done;
+
+ /*
+ * Stake out a contiguous region covering the device memory and the
+ * adjoining guard regions.
+ */
+ len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
+ flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
+ base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
+ if (base == MAP_FAILED)
+ goto done;
+
+ flags = MAP_SHARED | MAP_FIXED;
+ if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
+ flags |= MAP_NOCORE;
+
+ /* mmap the devmem region in the host address space */
+ ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
+done:
+ if (fd >= 0)
+ close(fd);
+ return (ptr);
+}
+
int
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
uint64_t base, uint32_t limit, uint32_t access)
diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
index d3ecdc4..57f8c56 100644
--- a/lib/libvmmapi/vmmapi.h
+++ b/lib/libvmmapi/vmmapi.h
@@ -36,7 +36,7 @@
* API version for out-of-tree consumers like grub-bhyve for making compile
* time decisions.
*/
-#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
+#define VMMAPI_VERSION 0102 /* 2 digit major followed by 2 digit minor */
struct iovec;
struct vmctx;
@@ -52,14 +52,59 @@ enum vm_mmap_style {
VM_MMAP_SPARSE, /* mappings created on-demand */
};
+/*
+ * 'flags' value passed to 'vm_set_memflags()'.
+ */
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
+#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
+
+/*
+ * Identifiers for memory segments:
+ * - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
+ * - the remaining identifiers can be used to create devmem segments.
+ */
+enum {
+ VM_SYSMEM,
+ VM_BOOTROM,
+ VM_FRAMEBUFFER,
+};
+
+/*
+ * Get the length and name of the memory segment identified by 'segid'.
+ * Note that system memory segments are identified with a nul name.
+ *
+ * Returns 0 on success and non-zero otherwise.
+ */
+int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
+ size_t namesiz);
+
+/*
+ * Iterate over the guest address space. This function finds an address range
+ * that starts at an address >= *gpa.
+ *
+ * Returns 0 if the next address range was found and non-zero otherwise.
+ */
+int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
+ vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
+/*
+ * Create a device memory segment identified by 'segid'.
+ *
+ * Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
+ */
+void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
+ size_t len);
+
+/*
+ * Map the memory segment identified by 'segid' into the guest address space
+ * at [gpa,gpa+len) with protection 'prot'.
+ */
+int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
+ vm_ooffset_t segoff, size_t len, int prot);
int vm_create(const char *name);
struct vmctx *vm_open(const char *name);
void vm_destroy(struct vmctx *ctx);
int vm_parse_memsize(const char *optarg, size_t *memsize);
-int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
- int *wired);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
@@ -68,6 +113,7 @@ int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
+int vm_get_memflags(struct vmctx *ctx);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
OpenPOWER on IntegriCloud