diff options
Diffstat (limited to 'lib/libvmmapi/vmmapi.c')
-rw-r--r-- | lib/libvmmapi/vmmapi.c | 320 |
1 files changed, 262 insertions, 58 deletions
diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 1e6e627..d5f387a 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$"); #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) +/* + * Size of the guard region before and after the virtual address space + * mapping the guest physical memory. This must be a multiple of the + * superpage size for performance reasons. + */ +#define VM_MMAP_GUARD_SIZE (4 * MB) + +#define PROT_RW (PROT_READ | PROT_WRITE) +#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) + struct vmctx { int fd; uint32_t lowmem_limit; - enum vm_mmap_style vms; int memflags; size_t lowmem; - char *lowmem_addr; size_t highmem; - char *highmem_addr; + char *baseaddr; char *name; }; @@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, size_t *ret_memsize) return (error); } -int -vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, - int *wired) -{ - int error; - struct vm_memory_segment seg; - - bzero(&seg, sizeof(seg)); - seg.gpa = gpa; - error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); - *ret_len = seg.len; - if (wired != NULL) - *wired = seg.wired; - return (error); -} - uint32_t vm_get_lowmem_limit(struct vmctx *ctx) { @@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int flags) ctx->memflags = flags; } +int +vm_get_memflags(struct vmctx *ctx) +{ + + return (ctx->memflags); +} + +/* + * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). + */ +int +vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot) +{ + struct vm_memmap memmap; + int error, flags; + + memmap.gpa = gpa; + memmap.segid = segid; + memmap.segoff = off; + memmap.len = len; + memmap.prot = prot; + memmap.flags = 0; + + if (ctx->memflags & VM_MEM_F_WIRED) + memmap.flags |= VM_MEMMAP_F_WIRED; + + /* + * If this mapping already exists then don't create it again. This + * is the common case for SYSMEM mappings created by bhyveload(8). + */ + error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); + if (error == 0 && gpa == memmap.gpa) { + if (segid != memmap.segid || off != memmap.segoff || + prot != memmap.prot || flags != memmap.flags) { + errno = EEXIST; + return (-1); + } else { + return (0); + } + } + + error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); + return (error); +} + +int +vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct vm_memmap memmap; + int error; + + bzero(&memmap, sizeof(struct vm_memmap)); + memmap.gpa = *gpa; + error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); + if (error == 0) { + *gpa = memmap.gpa; + *segid = memmap.segid; + *segoff = memmap.segoff; + *len = memmap.len; + *prot = memmap.prot; + *flags = memmap.flags; + } + return (error); +} + +/* + * Return 0 if the segments are identical and non-zero otherwise. + * + * This is slightly complicated by the fact that only device memory segments + * are named. + */ +static int +cmpseg(size_t len, const char *str, size_t len2, const char *str2) +{ + + if (len == len2) { + if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) + return (0); + } + return (-1); +} + static int -setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) +vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) { - int error, mmap_flags; - struct vm_memory_segment seg; + struct vm_memseg memseg; + size_t n; + int error; /* - * Create and optionally map 'len' bytes of memory at guest - * physical address 'gpa' + * If the memory segment has already been created then just return. + * This is the usual case for the SYSMEM segment created by userspace + * loaders like bhyveload(8). */ - bzero(&seg, sizeof(seg)); - seg.gpa = gpa; - seg.len = len; - error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); - if (error == 0 && addr != NULL) { - mmap_flags = MAP_SHARED; - if ((ctx->memflags & VM_MEM_F_INCORE) == 0) - mmap_flags |= MAP_NOCORE; - *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags, - ctx->fd, gpa); + error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, + sizeof(memseg.name)); + if (error) + return (error); + + if (memseg.len != 0) { + if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { + errno = EINVAL; + return (-1); + } else { + return (0); + } + } + + bzero(&memseg, sizeof(struct vm_memseg)); + memseg.segid = segid; + memseg.len = len; + if (name != NULL) { + n = strlcpy(memseg.name, name, sizeof(memseg.name)); + if (n >= sizeof(memseg.name)) { + errno = ENAMETOOLONG; + return (-1); + } } + + error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); return (error); } int -vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) +vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, + size_t bufsize) { - char **addr; + struct vm_memseg memseg; + size_t n; int error; - /* XXX VM_MMAP_SPARSE not implemented yet */ - assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL); - ctx->vms = vms; + memseg.segid = segid; + error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); + if (error == 0) { + *lenp = memseg.len; + n = strlcpy(namebuf, memseg.name, bufsize); + if (n >= bufsize) { + errno = ENAMETOOLONG; + error = -1; + } + } + return (error); +} + +static int +setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) +{ + char *ptr; + int error, flags; + + /* Map 'len' bytes starting at 'gpa' in the guest address space */ + error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); + if (error) + return (error); + + flags = MAP_SHARED | MAP_FIXED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + flags |= MAP_NOCORE; + + /* mmap into the process address space on the host */ + ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); + if (ptr == MAP_FAILED) + return (-1); + + return (0); +} + +int +vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) +{ + size_t objsize, len; + vm_paddr_t gpa; + char *baseaddr, *ptr; + int error, flags; + + assert(vms == VM_MMAP_ALL); /* * If 'memsize' cannot fit entirely in the 'lowmem' segment then @@ -234,46 +371,63 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) */ if (memsize > ctx->lowmem_limit) { ctx->lowmem = ctx->lowmem_limit; - ctx->highmem = memsize - ctx->lowmem; + ctx->highmem = memsize - ctx->lowmem_limit; + objsize = 4*GB + ctx->highmem; } else { ctx->lowmem = memsize; ctx->highmem = 0; + objsize = ctx->lowmem; } - if (ctx->lowmem > 0) { - addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL; - error = setup_memory_segment(ctx, 0, ctx->lowmem, addr); + error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); + if (error) + return (error); + + /* + * Stake out a contiguous region covering the guest physical memory + * and the adjoining guard regions. + */ + len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER; + ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0); + if (ptr == MAP_FAILED) + return (-1); + + baseaddr = ptr + VM_MMAP_GUARD_SIZE; + if (ctx->highmem > 0) { + gpa = 4*GB; + len = ctx->highmem; + error = setup_memory_segment(ctx, gpa, len, baseaddr); if (error) return (error); } - if (ctx->highmem > 0) { - addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL; - error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr); + if (ctx->lowmem > 0) { + gpa = 0; + len = ctx->lowmem; + error = setup_memory_segment(ctx, gpa, len, baseaddr); if (error) return (error); } + ctx->baseaddr = baseaddr; + return (0); } void * vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { + vm_paddr_t start, end, mapend; - /* XXX VM_MMAP_SPARSE not implemented yet */ - assert(ctx->vms == VM_MMAP_ALL); - - if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem) - return ((void *)(ctx->lowmem_addr + gaddr)); - - if (gaddr >= 4*GB) { - gaddr -= 4*GB; - if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem) - return ((void *)(ctx->highmem_addr + gaddr)); - } + start = gaddr; + end = gaddr + len; + mapend = ctx->highmem ? 4*GB + ctx->highmem : ctx->lowmem; - return (NULL); + if (start <= end && end <= mapend) + return (ctx->baseaddr + start); + else + return (NULL); } size_t @@ -290,6 +444,56 @@ vm_get_highmem_size(struct vmctx *ctx) return (ctx->highmem); } +void * +vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) +{ + char pathname[MAXPATHLEN]; + size_t len2; + char *base, *ptr; + int fd, error, flags; + + fd = -1; + ptr = MAP_FAILED; + if (name == NULL || strlen(name) == 0) { + errno = EINVAL; + goto done; + } + + error = vm_alloc_memseg(ctx, segid, len, name); + if (error) + goto done; + + strlcpy(pathname, "/dev/vmm/", sizeof(pathname)); + strlcat(pathname, ctx->name, sizeof(pathname)); + strlcat(pathname, ".", sizeof(pathname)); + strlcat(pathname, name, sizeof(pathname)); + + fd = open(pathname, O_RDWR); + if (fd < 0) + goto done; + + /* + * Stake out a contiguous region covering the device memory and the + * adjoining guard regions. + */ + len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER; + base = mmap(NULL, len2, PROT_NONE, flags, -1, 0); + if (base == MAP_FAILED) + goto done; + + flags = MAP_SHARED | MAP_FIXED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + flags |= MAP_NOCORE; + + /* mmap the devmem region in the host address space */ + ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); +done: + if (fd >= 0) + close(fd); + return (ptr); +} + int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access) |