summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/vm_mmap.c')
-rw-r--r--sys/vm/vm_mmap.c331
1 files changed, 105 insertions, 226 deletions
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 1dd2479..489a987 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -100,13 +100,6 @@ SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31)
#endif
-static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
- int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
-static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
- int *, struct cdev *, vm_ooffset_t *, vm_object_t *);
-static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
- int *, struct shmfd *, vm_ooffset_t, vm_object_t *);
-
#ifndef _SYS_SYSPROTO_H_
struct sbrk_args {
int incr;
@@ -197,16 +190,10 @@ sys_mmap(td, uap)
struct thread *td;
struct mmap_args *uap;
{
-#ifdef HWPMC_HOOKS
- struct pmckern_map_in pkm;
-#endif
struct file *fp;
- struct vnode *vp;
vm_offset_t addr;
vm_size_t size, pageoff;
- vm_prot_t cap_maxprot, maxprot;
- void *handle;
- objtype_t handle_type;
+ vm_prot_t cap_maxprot;
int align, error, flags, prot;
off_t pos;
struct vmspace *vms = td->td_proc->p_vmspace;
@@ -334,14 +321,22 @@ sys_mmap(td, uap)
lim_max(td->td_proc, RLIMIT_DATA));
PROC_UNLOCK(td->td_proc);
}
- if (flags & MAP_ANON) {
+ if (size == 0) {
+ /*
+ * Return success without mapping anything for old
+ * binaries that request a page-aligned mapping of
+ * length 0. For modern binaries, this function
+ * returns an error earlier.
+ */
+ error = 0;
+ } else if (flags & MAP_ANON) {
/*
* Mapping blank space is trivial.
+ *
+ * This relies on VM_PROT_* matching PROT_*.
*/
- handle = NULL;
- handle_type = OBJT_DEFAULT;
- maxprot = VM_PROT_ALL;
- cap_maxprot = VM_PROT_ALL;
+ error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
+ VM_PROT_ALL, flags, NULL, pos, FALSE, td);
} else {
/*
* Mapping file, get fp for validation and don't let the
@@ -366,93 +361,12 @@ sys_mmap(td, uap)
error = EINVAL;
goto done;
}
- if (fp->f_type == DTYPE_SHM) {
- handle = fp->f_data;
- handle_type = OBJT_SWAP;
- maxprot = VM_PROT_NONE;
-
- /* FREAD should always be set. */
- if (fp->f_flag & FREAD)
- maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
- if (fp->f_flag & FWRITE)
- maxprot |= VM_PROT_WRITE;
- goto map;
- }
- if (fp->f_type != DTYPE_VNODE) {
- error = ENODEV;
- goto done;
- }
-#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
- defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
- /*
- * POSIX shared-memory objects are defined to have
- * kernel persistence, and are not defined to support
- * read(2)/write(2) -- or even open(2). Thus, we can
- * use MAP_ASYNC to trade on-disk coherence for speed.
- * The shm_open(3) library routine turns on the FPOSIXSHM
- * flag to request this behavior.
- */
- if (fp->f_flag & FPOSIXSHM)
- flags |= MAP_NOSYNC;
-#endif
- vp = fp->f_vnode;
- /*
- * Ensure that file and memory protections are
- * compatible. Note that we only worry about
- * writability if mapping is shared; in this case,
- * current and max prot are dictated by the open file.
- * XXX use the vnode instead? Problem is: what
- * credentials do we use for determination? What if
- * proc does a setuid?
- */
- if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC)
- maxprot = VM_PROT_NONE;
- else
- maxprot = VM_PROT_EXECUTE;
- if (fp->f_flag & FREAD) {
- maxprot |= VM_PROT_READ;
- } else if (prot & PROT_READ) {
- error = EACCES;
- goto done;
- }
- /*
- * If we are sharing potential changes (either via
- * MAP_SHARED or via the implicit sharing of character
- * device mappings), and we are trying to get write
- * permission although we opened it without asking
- * for it, bail out.
- */
- if ((flags & MAP_SHARED) != 0) {
- if ((fp->f_flag & FWRITE) != 0) {
- maxprot |= VM_PROT_WRITE;
- } else if ((prot & PROT_WRITE) != 0) {
- error = EACCES;
- goto done;
- }
- } else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) {
- maxprot |= VM_PROT_WRITE;
- cap_maxprot |= VM_PROT_WRITE;
- }
- handle = (void *)vp;
- handle_type = OBJT_VNODE;
- }
-map:
- td->td_fpop = fp;
- maxprot &= cap_maxprot;
- /* This relies on VM_PROT_* matching PROT_*. */
- error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
- flags, handle_type, handle, pos);
- td->td_fpop = NULL;
-#ifdef HWPMC_HOOKS
- /* inform hwpmc(4) if an executable is being mapped */
- if (error == 0 && handle_type == OBJT_VNODE &&
- (prot & PROT_EXEC)) {
- pkm.pm_file = handle;
- pkm.pm_address = (uintptr_t) addr;
- PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+ /* This relies on VM_PROT_* matching PROT_*. */
+ error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
+ cap_maxprot, flags, pos, td);
}
-#endif
+
if (error == 0)
td->td_retval[0] = (register_t) (addr + pageoff);
done:
@@ -1311,9 +1225,6 @@ sys_munlock(td, uap)
*
* Helper function for vm_mmap. Perform sanity check specific for mmap
* operations on vnodes.
- *
- * For VCHR vnodes, the vnode lock is held over the call to
- * vm_mmap_cdev() to keep vp->v_rdev valid.
*/
int
vm_mmap_vnode(struct thread *td, vm_size_t objsize,
@@ -1360,12 +1271,6 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
*writecounted = TRUE;
vnode_pager_update_writecount(obj, 0, objsize);
}
- } else if (vp->v_type == VCHR) {
- error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp,
- vp->v_rdev, foffp, objp);
- if (error == 0)
- goto mark_atime;
- goto done;
} else {
error = EINVAL;
goto done;
@@ -1373,13 +1278,14 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
if ((error = VOP_GETATTR(vp, &va, cred)))
goto done;
#ifdef MAC
- error = mac_vnode_check_mmap(cred, vp, prot, flags);
+ /* This relies on VM_PROT_* matching PROT_*. */
+ error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
if (error != 0)
goto done;
#endif
if ((flags & MAP_SHARED) != 0) {
if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
- if (prot & PROT_WRITE) {
+ if (prot & VM_PROT_WRITE) {
error = EPERM;
goto done;
}
@@ -1414,7 +1320,6 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
*objp = obj;
*flagsp = flags;
-mark_atime:
vfs_mark_atime(vp, cred);
done:
@@ -1435,21 +1340,18 @@ done:
* operations on cdevs.
*/
int
-vm_mmap_cdev(struct thread *td, vm_size_t objsize,
- vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
- struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp)
+vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
+ vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
+ vm_ooffset_t *foff, vm_object_t *objp)
{
vm_object_t obj;
- struct cdevsw *dsw;
- int error, flags, ref;
+ int error, flags;
flags = *flagsp;
- dsw = dev_refthread(cdev, &ref);
- if (dsw == NULL)
- return (ENXIO);
if (dsw->d_flags & D_MMAP_ANON) {
- dev_relthread(cdev, ref);
+ *objp = NULL;
+ *foff = 0;
*maxprotp = VM_PROT_ALL;
*flagsp |= MAP_ANON;
return (0);
@@ -1458,24 +1360,18 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize,
* cdevs do not provide private mappings of any kind.
*/
if ((*maxprotp & VM_PROT_WRITE) == 0 &&
- (prot & PROT_WRITE) != 0) {
- dev_relthread(cdev, ref);
+ (prot & VM_PROT_WRITE) != 0)
return (EACCES);
- }
- if (flags & (MAP_PRIVATE|MAP_COPY)) {
- dev_relthread(cdev, ref);
+ if (flags & (MAP_PRIVATE|MAP_COPY))
return (EINVAL);
- }
/*
* Force device mappings to be shared.
*/
flags |= MAP_SHARED;
#ifdef MAC_XXX
- error = mac_cdev_check_mmap(td->td_ucred, cdev, prot);
- if (error != 0) {
- dev_relthread(cdev, ref);
+ error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
+ if (error != 0)
return (error);
- }
#endif
/*
* First, try d_mmap_single(). If that is not implemented
@@ -1487,7 +1383,6 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize,
* XXX assumes VM_PROT_* == PROT_*
*/
error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
- dev_relthread(cdev, ref);
if (error != ENODEV)
return (error);
obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
@@ -1500,59 +1395,89 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize,
}
/*
- * vm_mmap_shm()
- *
- * MPSAFE
+ * vm_mmap()
*
- * Helper function for vm_mmap. Perform sanity check specific for mmap
- * operations on shm file descriptors.
+ * Internal version of mmap used by exec, sys5 shared memory, and
+ * various device drivers. Handle is either a vnode pointer, a
+ * character device, or NULL for MAP_ANON.
*/
int
-vm_mmap_shm(struct thread *td, vm_size_t objsize,
- vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
- struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp)
+vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
+ vm_prot_t maxprot, int flags,
+ objtype_t handle_type, void *handle,
+ vm_ooffset_t foff)
{
+ vm_object_t object;
+ struct thread *td = curthread;
int error;
+ boolean_t writecounted;
- if ((*flagsp & MAP_SHARED) != 0 &&
- (*maxprotp & VM_PROT_WRITE) == 0 &&
- (prot & PROT_WRITE) != 0)
- return (EACCES);
-#ifdef MAC
- error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp);
- if (error != 0)
- return (error);
-#endif
- error = shm_mmap(shmfd, objsize, foff, objp);
+ if (size == 0)
+ return (EINVAL);
+
+ size = round_page(size);
+ writecounted = FALSE;
+
+ /*
+ * Lookup/allocate object.
+ */
+ switch (handle_type) {
+ case OBJT_DEVICE: {
+ struct cdevsw *dsw;
+ struct cdev *cdev;
+ int ref;
+
+ cdev = handle;
+ dsw = dev_refthread(cdev, &ref);
+ if (dsw == NULL)
+ return (ENXIO);
+ error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
+ dsw, &foff, &object);
+ dev_relthread(cdev, ref);
+ break;
+ }
+ case OBJT_VNODE:
+ error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
+ handle, &foff, &object, &writecounted);
+ break;
+ case OBJT_DEFAULT:
+ if (handle == NULL) {
+ error = 0;
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ error = EINVAL;
+ break;
+ }
if (error)
return (error);
- return (0);
+
+ error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
+ foff, writecounted, td);
+ if (error != 0 && object != NULL) {
+ /*
+ * If this mapping was accounted for in the vnode's
+ * writecount, then undo that now.
+ */
+ if (writecounted)
+ vnode_pager_release_writecount(object, 0, size);
+ vm_object_deallocate(object);
+ }
+ return (error);
}
/*
- * vm_mmap()
- *
- * MPSAFE
- *
- * Internal version of mmap. Currently used by mmap, exec, and sys5
- * shared memory. Handle is either a vnode pointer or NULL for MAP_ANON.
+ * Internal version of mmap that maps a specific VM object into an
+ * map. Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
*/
int
-vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
- vm_prot_t maxprot, int flags,
- objtype_t handle_type, void *handle,
- vm_ooffset_t foff)
+vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
+ vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
+ boolean_t writecounted, struct thread *td)
{
boolean_t fitit;
- vm_object_t object = NULL;
- struct thread *td = curthread;
int docow, error, findspace, rv;
- boolean_t writecounted;
-
- if (size == 0)
- return (0);
-
- size = round_page(size);
if (map == &td->td_proc->p_vmspace->vm_map) {
PROC_LOCK(td->td_proc);
@@ -1586,11 +1511,11 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
/*
* We currently can only deal with page aligned file offsets.
- * The check is here rather than in the syscall because the
- * kernel calls this function internally for other mmaping
- * operations (such as in exec) and non-aligned offsets will
- * cause pmap inconsistencies...so we want to be sure to
- * disallow this in all cases.
+ * The mmap() system call already enforces this by subtracting
+ * the page offset from the file offset, but checking here
+ * catches errors in device drivers (e.g. d_single_mmap()
+ * callbacks) and other internal mapping requests (such as in
+ * exec).
*/
if (foff & PAGE_MASK)
return (EINVAL);
@@ -1603,44 +1528,11 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
return (EINVAL);
fitit = FALSE;
}
- writecounted = FALSE;
- /*
- * Lookup/allocate object.
- */
- switch (handle_type) {
- case OBJT_DEVICE:
- error = vm_mmap_cdev(td, size, prot, &maxprot, &flags,
- handle, &foff, &object);
- break;
- case OBJT_VNODE:
- error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
- handle, &foff, &object, &writecounted);
- break;
- case OBJT_SWAP:
- error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
- handle, foff, &object);
- break;
- case OBJT_DEFAULT:
- if (handle == NULL) {
- error = 0;
- break;
- }
- /* FALLTHROUGH */
- default:
- error = EINVAL;
- break;
- }
- if (error)
- return (error);
if (flags & MAP_ANON) {
- object = NULL;
+ if (object != NULL || foff != 0)
+ return (EINVAL);
docow = 0;
- /*
- * Unnamed anonymous regions always start at 0.
- */
- if (handle == 0)
- foff = 0;
} else if (flags & MAP_PREFAULT_READ)
docow = MAP_PREFAULT;
else
@@ -1693,19 +1585,6 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
VM_MAP_WIRE_USER | ((flags & MAP_STACK) ?
VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES));
}
- } else {
- /*
- * If this mapping was accounted for in the vnode's
- * writecount, then undo that now.
- */
- if (writecounted)
- vnode_pager_release_writecount(object, 0, size);
- /*
- * Lose the object reference. Will destroy the
- * object if it's an unnamed anonymous mapping
- * or named anonymous without other references.
- */
- vm_object_deallocate(object);
}
return (vm_mmap_to_errno(rv));
}
OpenPOWER on IntegriCloud