8 files changed, 258 insertions, 66 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index a98a15d..c3953fe 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -732,7 +732,7 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 	u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0;
 	int32_t osrel = 0;
 	int error = 0, i, n, interp_name_len = 0;
-	const char *interp = NULL, *newinterp = NULL;
+	const char *err_str = NULL, *interp = NULL, *newinterp = NULL;
 	Elf_Brandinfo *brand_info;
 	char *path;
 	struct sysentvec *sv;
@@ -755,11 +755,14 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 	if ((hdr->e_phoff > PAGE_SIZE) ||
 	    (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) {
 		/* Only support headers in first page for now */
+		uprintf("Program headers not in the first page\n");
 		return (ENOEXEC);
 	}
-	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff);
-	if (!aligned(phdr, Elf_Addr))
+	phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 
+	if (!aligned(phdr, Elf_Addr)) {
+		uprintf("Unaligned program headers\n");
 		return (ENOEXEC);
+	}
 	n = 0;
 	baddr = 0;
 	for (i = 0; i < hdr->e_phnum; i++) {
@@ -773,8 +776,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 			/* Path to interpreter */
 			if (phdr[i].p_filesz > MAXPATHLEN ||
 			    phdr[i].p_offset > PAGE_SIZE ||
-			    phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset)
+			    phdr[i].p_filesz > PAGE_SIZE - phdr[i].p_offset) {
+				uprintf("Invalid PT_INTERP\n");
 				return (ENOEXEC);
+			}
 			interp = imgp->image_header + phdr[i].p_offset;
 			interp_name_len = phdr[i].p_filesz;
 			break;
@@ -795,8 +800,10 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 		return (ENOEXEC);
 	}
 	if (hdr->e_type == ET_DYN) {
-		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0)
+		if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) {
+			uprintf("Cannot execute shared object\n");
 			return (ENOEXEC);
+		}
 		/*
 		 * Honour the base load address from the dso if it is
 		 * non-zero for some reason.
@@ -901,12 +908,19 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 	 * not actually fault in all the segments pages.
 	 */
 	PROC_LOCK(imgp->proc);
-	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) ||
-	    text_size > maxtsiz ||
-	    total_size > lim_cur(imgp->proc, RLIMIT_VMEM) ||
-	    racct_set(imgp->proc, RACCT_DATA, data_size) != 0 ||
-	    racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) {
+	if (data_size > lim_cur(imgp->proc, RLIMIT_DATA))
+		err_str = "Data segment size exceeds process limit";
+	else if (text_size > maxtsiz)
+		err_str = "Text segment size exceeds system limit";
+	else if (total_size > lim_cur(imgp->proc, RLIMIT_VMEM))
+		err_str = "Total segment size exceeds process limit";
+	else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0)
+		err_str = "Data segment size exceeds resource limit";
+	else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0)
+		err_str = "Total segment size exceeds resource limit";
+	if (err_str != NULL) {
 		PROC_UNLOCK(imgp->proc);
+		uprintf("%s\n", err_str);
 		return (ENOMEM);
 	}
 
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 6618c08..fc33feb 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -2113,7 +2113,15 @@ sysctl_kern_proc_ovmmap(SYSCTL_HANDLER_ARGS)
 				vref(vp);
 				break;
 			case OBJT_SWAP:
-				kve->kve_type = KVME_TYPE_SWAP;
+				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+					kve->kve_type = KVME_TYPE_VNODE;
+					if ((lobj->flags & OBJ_TMPFS) != 0) {
+						vp = lobj->un_pager.swp.swp_tmpfs;
+						vref(vp);
+					}
+				} else {
+					kve->kve_type = KVME_TYPE_SWAP;
+				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
@@ -2339,7 +2347,15 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
 				vref(vp);
 				break;
 			case OBJT_SWAP:
-				kve->kve_type = KVME_TYPE_SWAP;
+				if ((lobj->flags & OBJ_TMPFS_NODE) != 0) {
+					kve->kve_type = KVME_TYPE_VNODE;
+					if ((lobj->flags & OBJ_TMPFS) != 0) {
+						vp = lobj->un_pager.swp.swp_tmpfs;
+						vref(vp);
+					}
+				} else {
+					kve->kve_type = KVME_TYPE_SWAP;
+				}
 				break;
 			case OBJT_DEVICE:
 				kve->kve_type = KVME_TYPE_DEVICE;
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 9dca0e8..01c61bd 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/timetc.h>
 #include <sys/timex.h>
 #include <sys/vdso.h>
+#include <machine/atomic.h>
 
 /*
  * A large step happens on boot.  This constant detects such steps.
@@ -71,7 +72,7 @@ struct timehands {
 	struct timeval		th_microtime;
 	struct timespec		th_nanotime;
 	/* Fields not to be copied in tc_windup start with th_generation. */
-	volatile u_int		th_generation;
+	u_int			th_generation;
 	struct timehands	*th_next;
 };
 
@@ -189,6 +190,33 @@ tc_delta(struct timehands *th)
 	    tc->tc_counter_mask);
 }
 
+static u_int
+tc_getgen(struct timehands *th)
+{
+
+#ifdef SMP
+	return (atomic_load_acq_int(&th->th_generation));
+#else
+	u_int gen;
+
+	gen = th->th_generation;
+	__compiler_membar();
+	return (gen);
+#endif
+}
+
+static void
+tc_setgen(struct timehands *th, u_int newgen)
+{
+
+#ifdef SMP
+	atomic_store_rel_int(&th->th_generation, newgen);
+#else
+	__compiler_membar();
+	th->th_generation = newgen;
+#endif
+}
+
 /*
  * Functions for reading the time.  We have to loop until we are sure that
  * the timehands that we operated on was not updated under our feet.  See
@@ -204,10 +232,10 @@ fbclock_binuptime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -262,9 +290,9 @@ fbclock_getbinuptime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -275,9 +303,9 @@ fbclock_getnanouptime(struct timespec *tsp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		bintime2timespec(&th->th_offset, tsp);
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -288,9 +316,9 @@ fbclock_getmicrouptime(struct timeval *tvp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		bintime2timeval(&th->th_offset, tvp);
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -301,9 +329,9 @@ fbclock_getbintime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 	bintime_add(bt, &boottimebin);
 }
 
@@ -315,9 +343,9 @@ fbclock_getnanotime(struct timespec *tsp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*tsp = th->th_nanotime;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -328,9 +356,9 @@ fbclock_getmicrotime(struct timeval *tvp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*tvp = th->th_microtime;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 #else /* !FFCLOCK */
 void
@@ -341,10 +369,10 @@ binuptime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
 		bintime_addx(bt, th->th_scale * tc_delta(th));
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -399,9 +427,9 @@ getbinuptime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -412,9 +440,9 @@ getnanouptime(struct timespec *tsp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		bintime2timespec(&th->th_offset, tsp);
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -425,9 +453,9 @@ getmicrouptime(struct timeval *tvp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		bintime2timeval(&th->th_offset, tvp);
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -438,9 +466,9 @@ getbintime(struct bintime *bt)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*bt = th->th_offset;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 	bintime_add(bt, &boottimebin);
 }
 
@@ -452,9 +480,9 @@ getnanotime(struct timespec *tsp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*tsp = th->th_nanotime;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 void
@@ -465,9 +493,9 @@ getmicrotime(struct timeval *tvp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*tvp = th->th_microtime;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 #endif /* FFCLOCK */
 
@@ -880,11 +908,11 @@ ffclock_read_counter(ffcounter *ffcount)
 	 */
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		ffth = fftimehands;
 		delta = tc_delta(th);
 		*ffcount = ffth->tick_ffcount;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 
 	*ffcount += delta;
 }
@@ -988,9 +1016,9 @@ dtrace_getnanotime(struct timespec *tsp)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		*tsp = th->th_nanotime;
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 }
 
 /*
@@ -1028,7 +1056,7 @@ sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
 
 	do {
 		th = timehands;
-		gen = th->th_generation;
+		gen = tc_getgen(th);
 		fbi->th_scale = th->th_scale;
 		fbi->tick_time = th->th_offset;
 #ifdef FFCLOCK
@@ -1042,7 +1070,7 @@ sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast)
 #endif
 		if (!fast)
 			delta = tc_delta(th);
-	} while (gen == 0 || gen != th->th_generation);
+	} while (gen == 0 || gen != tc_getgen(th));
 
 	clock_snap->delta = delta;
 	clock_snap->sysclock_active = sysclock_active;
@@ -1260,7 +1288,7 @@ tc_windup(void)
 	tho = timehands;
 	th = tho->th_next;
 	ogen = th->th_generation;
-	th->th_generation = 0;
+	tc_setgen(th, 0);
 	bcopy(tho, th, offsetof(struct timehands, th_generation));
 
 	/*
@@ -1377,7 +1405,7 @@ tc_windup(void)
 	 */
 	if (++ogen == 0)
 		ogen = 1;
-	th->th_generation = ogen;
+	tc_setgen(th, ogen);
 
 	/* Go live with the new struct timehands. */
 #ifdef FFCLOCK
@@ -1651,13 +1679,13 @@ pps_capture(struct pps_state *pps)
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_capture"));
 	th = timehands;
-	pps->capgen = th->th_generation;
+	pps->capgen = tc_getgen(th);
 	pps->capth = th;
 #ifdef FFCLOCK
 	pps->capffth = fftimehands;
 #endif
 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
-	if (pps->capgen != th->th_generation)
+	if (pps->capgen != tc_getgen(th))
 		pps->capgen = 0;
 }
 
@@ -1677,7 +1705,7 @@ pps_event(struct pps_state *pps, int event)
 
 	KASSERT(pps != NULL, ("NULL pps pointer in pps_event"));
 	/* If the timecounter was wound up underneath us, bail out. */
-	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
+	if (pps->capgen == 0 || pps->capgen != tc_getgen(pps->capth))
 		return;
 
 	/* Things would be easier with arrays. */
@@ -1727,7 +1755,7 @@ pps_event(struct pps_state *pps, int event)
 	bintime2timespec(&bt, &ts);
 
 	/* If the timecounter was wound up underneath us, bail out. */
-	if (pps->capgen != pps->capth->th_generation)
+	if (pps->capgen != tc_getgen(pps->capth))
 		return;
 
 	*pcount = pps->capcount;
diff --git a/sys/kern/subr_uio.c b/sys/kern/subr_uio.c
index 87892fd..410085e 100644
--- a/sys/kern/subr_uio.c
+++ b/sys/kern/subr_uio.c
@@ -417,7 +417,7 @@ copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
 	/* round size up to page boundry */
 	size = (vm_size_t)round_page(sz);
 
-	error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
+	error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE,
 	    VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
 
 	return (error);
diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c
index dc343d0..1280807 100644
--- a/sys/kern/subr_witness.c
+++ b/sys/kern/subr_witness.c
@@ -1170,19 +1170,25 @@ witness_checkorder(struct lock_object *lock, int flags, const char *file,
 	
 	/*
 	 * Try to perform most checks without a lock.  If this succeeds we
-	 * can skip acquiring the lock and return success.
+	 * can skip acquiring the lock and return success.  Otherwise we redo
+	 * the check with the lock held to handle races with concurrent updates.
 	 */
 	w1 = plock->li_lock->lo_witness;
 	if (witness_lock_order_check(w1, w))
 		return;
 
+	mtx_lock_spin(&w_mtx);
+	if (witness_lock_order_check(w1, w)) {
+		mtx_unlock_spin(&w_mtx);
+		return;
+	}
+	witness_lock_order_add(w1, w);
+
 	/*
 	 * Check for duplicate locks of the same type.  Note that we only
 	 * have to check for this on the last lock we just acquired.  Any
 	 * other cases will be caught as lock order violations.
 	 */
-	mtx_lock_spin(&w_mtx);
-	witness_lock_order_add(w1, w);
 	if (w1 == w) {
 		i = w->w_index;
 		if (!(lock->lo_flags & LO_DUPOK) && !(flags & LOP_DUPOK) &&
@@ -1996,7 +2002,10 @@ _isitmyx(struct witness *w1, struct witness *w2, int rmask, const char *fname)
 
 	/* The flags on one better be the inverse of the flags on the other */
 	if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) ||
-		(WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) {
+	    (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) {
+		/* Don't squawk if we're potentially racing with an update. */
+		if (!mtx_owned(&w_mtx))
+			return (0);
 		printf("%s: rmatrix mismatch between %s (index %d) and %s "
 		    "(index %d): w_rmatrix[%d][%d] == %hhx but "
 		    "w_rmatrix[%d][%d] == %hhx\n",
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 3bf2db8..d82fda0 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -402,7 +402,7 @@ ptrace_vm_entry(struct thread *td, struct proc *p, struct ptrace_vm_entry *pve)
 			lobj = tobj;
 			pve->pve_offset += tobj->backing_object_offset;
 		}
-		vp = (lobj->type == OBJT_VNODE) ? lobj->handle : NULL;
+		vp = vm_object_vnode(lobj);
 		if (vp != NULL)
 			vref(vp);
 		if (lobj != obj)
@@ -963,6 +963,7 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 				CTR1(KTR_PTRACE, "PT_DETACH: pid %d", p->p_pid);
 			p->p_oppid = 0;
 			p->p_flag &= ~(P_TRACED | P_WAITED | P_FOLLOWFORK);
+			p->p_stops = 0;
 
 			/* should we send SIGCHLD? */
 			/* childproc_continued(p); */
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 93c7ed1..3419c51 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -127,6 +127,7 @@ static fo_chmod_t	shm_chmod;
 static fo_chown_t	shm_chown;
 static fo_seek_t	shm_seek;
 static fo_fill_kinfo_t	shm_fill_kinfo;
+static fo_mmap_t	shm_mmap;
 
 /* File descriptor operations. */
 static struct fileops shm_ops = {
@@ -143,6 +144,7 @@ static struct fileops shm_ops = {
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = shm_seek,
 	.fo_fill_kinfo = shm_fill_kinfo,
+	.fo_mmap = shm_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
@@ -851,15 +853,37 @@ sys_shm_unlink(struct thread *td, struct shm_unlink_args *uap)
 	return (error);
 }
 
-/*
- * mmap() helper to validate mmap() requests against shm object state
- * and give mmap() the vm_object to use for the mapping.
- */
 int
-shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
-    vm_object_t *obj)
+shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags,
+    vm_ooffset_t foff, struct thread *td)
 {
+	struct shmfd *shmfd;
+	vm_prot_t maxprot;
+	int error;
+
+	shmfd = fp->f_data;
+	maxprot = VM_PROT_NONE;
+
+	/* FREAD should always be set. */
+	if ((fp->f_flag & FREAD) != 0)
+		maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
+	if ((fp->f_flag & FWRITE) != 0)
+		maxprot |= VM_PROT_WRITE;
+
+	/* Don't permit shared writable mappings on read-only descriptors. */
+	if ((flags & MAP_SHARED) != 0 &&
+	    (maxprot & VM_PROT_WRITE) == 0 &&
+	    (prot & VM_PROT_WRITE) != 0)
+		return (EACCES);
+	maxprot &= cap_maxprot;
 
+#ifdef MAC
+	error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags);
+	if (error != 0)
+		return (error);
+#endif
+	
 	/*
 	 * XXXRW: This validation is probably insufficient, and subject to
 	 * sign errors.  It should be fixed.
@@ -872,7 +896,11 @@ shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
 	vfs_timestamp(&shmfd->shm_atime);
 	mtx_unlock(&shm_timestamp_lock);
 	vm_object_reference(shmfd->shm_object);
-	*obj = shmfd->shm_object;
+
+	error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags,
+	    shmfd->shm_object, foff, FALSE, td);
+	if (error != 0)
+		vm_object_deallocate(shmfd->shm_object);
 	return (0);
 }
 
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a00da51..573d009 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
@@ -80,6 +81,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
 
 static fo_rdwr_t	vn_read;
 static fo_rdwr_t	vn_write;
@@ -90,6 +92,7 @@ static fo_poll_t	vn_poll;
 static fo_kqfilter_t	vn_kqfilter;
 static fo_stat_t	vn_statfile;
 static fo_close_t	vn_closefile;
+static fo_mmap_t	vn_mmap;
 
 struct 	fileops vnops = {
 	.fo_read = vn_io_fault,
@@ -105,6 +108,7 @@ struct 	fileops vnops = {
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
+	.fo_mmap = vn_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
@@ -2362,3 +2366,95 @@ vn_fill_kinfo_vnode(struct vnode *vp, struct kinfo_file *kif)
 	kif->kf_un.kf_file.kf_file_rdev = va.va_rdev;
 	return (0);
 }
+
+int
+vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
+    struct thread *td)
+{
+#ifdef HWPMC_HOOKS
+	struct pmckern_map_in pkm;
+#endif
+	struct mount *mp;
+	struct vnode *vp;
+	vm_object_t object;
+	vm_prot_t maxprot;
+	boolean_t writecounted;
+	int error;
+
+#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
+    defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
+	/*
+	 * POSIX shared-memory objects are defined to have
+	 * kernel persistence, and are not defined to support
+	 * read(2)/write(2) -- or even open(2).  Thus, we can
+	 * use MAP_ASYNC to trade on-disk coherence for speed.
+	 * The shm_open(3) library routine turns on the FPOSIXSHM
+	 * flag to request this behavior.
+	 */
+	if ((fp->f_flag & FPOSIXSHM) != 0)
+		flags |= MAP_NOSYNC;
+#endif
+	vp = fp->f_vnode;
+
+	/*
+	 * Ensure that file and memory protections are
+	 * compatible.  Note that we only worry about
+	 * writability if mapping is shared; in this case,
+	 * current and max prot are dictated by the open file.
+	 * XXX use the vnode instead?  Problem is: what
+	 * credentials do we use for determination? What if
+	 * proc does a setuid?
+	 */
+	mp = vp->v_mount;
+	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0)
+		maxprot = VM_PROT_NONE;
+	else
+		maxprot = VM_PROT_EXECUTE;
+	if ((fp->f_flag & FREAD) != 0)
+		maxprot |= VM_PROT_READ;
+	else if ((prot & VM_PROT_READ) != 0)
+		return (EACCES);
+
+	/*
+	 * If we are sharing potential changes via MAP_SHARED and we
+	 * are trying to get write permission although we opened it
+	 * without asking for it, bail out.
+	 */
+	if ((flags & MAP_SHARED) != 0) {
+		if ((fp->f_flag & FWRITE) != 0)
+			maxprot |= VM_PROT_WRITE;
+		else if ((prot & VM_PROT_WRITE) != 0)
+			return (EACCES);
+	} else {
+		maxprot |= VM_PROT_WRITE;
+		cap_maxprot |= VM_PROT_WRITE;
+	}
+	maxprot &= cap_maxprot;
+
+	writecounted = FALSE;
+	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
+	    &foff, &object, &writecounted);
+	if (error != 0)
+		return (error);
+	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
+	    foff, writecounted, td);
+	if (error != 0) {
+		/*
+		 * If this mapping was accounted for in the vnode's
+		 * writecount, then undo that now.
+		 */
+		if (writecounted)
+			vnode_pager_release_writecount(object, 0, size);
+		vm_object_deallocate(object);
+	}
+#ifdef HWPMC_HOOKS
+	/* Inform hwpmc(4) if an executable is being mapped. */
+	if (error == 0 && (prot & VM_PROT_EXECUTE) != 0) {
+		pkm.pm_file = vp;
+		pkm.pm_address = (uintptr_t) addr;
+		PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+	}
+#endif
+	return (error);
+}