Merge remote-tracking branch 'origin/stable/10' into devel

author: Renato Botelho <renato@netgate.com> 2016-07-19 11:28:18 -0300
committer: Renato Botelho <renato@netgate.com> 2016-07-19 11:28:18 -0300
commit: a3c20a378f7a9fa76e9301a43ac64ed07057d01e (patch)
tree: d14045c61b471f327f487431e05b0c4fe0cd8d76 /sys
parent: 3a4027cfafa37c1a0c0b05987c0edb1452c7bd2b (diff)
parent: 3f1f4f0e73b6d12c01e8cad4791d23e8e56127db (diff)
download: FreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.zip
FreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.tar.gz
63 files changed, 1075 insertions, 1082 deletions
diff --git a/sys/amd64/linux/linux.h b/sys/amd64/linux/linux.h
index 639499a..b30568f 100644
--- a/sys/amd64/linux/linux.h
+++ b/sys/amd64/linux/linux.h
@@ -139,13 +139,6 @@ struct l_rlimit {
 	l_ulong		rlim_max;
 };
 
-/* mmap options */
-#define	LINUX_MAP_SHARED	0x0001
-#define	LINUX_MAP_PRIVATE	0x0002
-#define	LINUX_MAP_FIXED		0x0010
-#define	LINUX_MAP_ANON		0x0020
-#define	LINUX_MAP_GROWSDOWN	0x0100
-
 /*
  * stat family of syscalls
  */
diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c
index 9664dac..80c48aa 100644
--- a/sys/amd64/linux/linux_machdep.c
+++ b/sys/amd64/linux/linux_machdep.c
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_file.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -122,181 +123,19 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
 	return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
-	struct proc *p = td->td_proc;
-	struct mmap_args /* {
-		caddr_t addr;
-		size_t len;
-		int prot;
-		int flags;
-		int fd;
-		long pad;
-		off_t pos;
-	} */ bsd_args;
-	int error;
-	struct file *fp;
-	cap_rights_t rights;
-
-	LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
-	    args->addr, args->len, args->prot,
-	    args->flags, args->fd, args->pgoff);
-
-	error = 0;
-	bsd_args.flags = 0;
-	fp = NULL;
-
-	/*
-	 * Linux mmap(2):
-	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-	 */
-	if (! ((args->flags & LINUX_MAP_SHARED) ^
-	    (args->flags & LINUX_MAP_PRIVATE)))
-		return (EINVAL);
-
-	if (args->flags & LINUX_MAP_SHARED)
-		bsd_args.flags |= MAP_SHARED;
-	if (args->flags & LINUX_MAP_PRIVATE)
-		bsd_args.flags |= MAP_PRIVATE;
-	if (args->flags & LINUX_MAP_FIXED)
-		bsd_args.flags |= MAP_FIXED;
-	if (args->flags & LINUX_MAP_ANON)
-		bsd_args.flags |= MAP_ANON;
-	else
-		bsd_args.flags |= MAP_NOSYNC;
-	if (args->flags & LINUX_MAP_GROWSDOWN)
-		bsd_args.flags |= MAP_STACK;
 
-	/*
-	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-	 * on Linux/i386. We do this to ensure maximum compatibility.
-	 * Linux/ia64 does the same in i386 emulation mode.
-	 */
-	bsd_args.prot = args->prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : args->fd;
-	if (bsd_args.fd != -1) {
-		/*
-		 * Linux follows Solaris mmap(2) description:
-		 * The file descriptor fildes is opened with
-		 * read permission, regardless of the
-		 * protection options specified.
-		 */
-
-		error = fget(td, bsd_args.fd,
-		    cap_rights_init(&rights, CAP_MMAP), &fp);
-		if (error != 0 )
-			return (error);
-		if (fp->f_type != DTYPE_VNODE) {
-			fdrop(fp, td);
-			return (EINVAL);
-		}
-
-		/* Linux mmap() just fails for O_WRONLY files */
-		if (!(fp->f_flag & FREAD)) {
-			fdrop(fp, td);
-			return (EACCES);
-		}
-
-		fdrop(fp, td);
-	}
-
-	if (args->flags & LINUX_MAP_GROWSDOWN) {
-		/*
-		 * The Linux MAP_GROWSDOWN option does not limit auto
-		 * growth of the region.  Linux mmap with this option
-		 * takes as addr the inital BOS, and as len, the initial
-		 * region size.  It can then grow down from addr without
-		 * limit.  However, Linux threads has an implicit internal
-		 * limit to stack size of STACK_SIZE.  Its just not
-		 * enforced explicitly in Linux.  But, here we impose
-		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-		 * region, since we can do this with our mmap.
-		 *
-		 * Our mmap with MAP_STACK takes addr as the maximum
-		 * downsize limit on BOS, and as len the max size of
-		 * the region.  It then maps the top SGROWSIZ bytes,
-		 * and auto grows the region down, up to the limit
-		 * in addr.
-		 *
-		 * If we don't use the MAP_STACK option, the effect
-		 * of this code is to allocate a stack region of a
-		 * fixed size of (STACK_SIZE - GUARD_SIZE).
-		 */
-
-		if ((caddr_t)PTRIN(args->addr) + args->len >
-		    p->p_vmspace->vm_maxsaddr) {
-			/*
-			 * Some Linux apps will attempt to mmap
-			 * thread stacks near the top of their
-			 * address space.  If their TOS is greater
-			 * than vm_maxsaddr, vm_map_growstack()
-			 * will confuse the thread stack with the
-			 * process stack and deliver a SEGV if they
-			 * attempt to grow the thread stack past their
-			 * current stacksize rlimit.  To avoid this,
-			 * adjust vm_maxsaddr upwards to reflect
-			 * the current stacksize rlimit rather
-			 * than the maximum possible stacksize.
-			 * It would be better to adjust the
-			 * mmap'ed region, but some apps do not check
-			 * mmap's return value.
-			 */
-			PROC_LOCK(p);
-			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
-			    lim_cur(p, RLIMIT_STACK);
-			PROC_UNLOCK(p);
-		}
-
-		/*
-		 * This gives us our maximum stack size and a new BOS.
-		 * If we're using VM_STACK, then mmap will just map
-		 * the top SGROWSIZ bytes, and let the stack grow down
-		 * to the limit at BOS.  If we're not using VM_STACK
-		 * we map the full stack, since we don't have a way
-		 * to autogrow it.
-		 */
-		if (args->len > STACK_SIZE - GUARD_SIZE) {
-			bsd_args.addr = (caddr_t)PTRIN(args->addr);
-			bsd_args.len = args->len;
-		} else {
-			bsd_args.addr = (caddr_t)PTRIN(args->addr) -
-			    (STACK_SIZE - GUARD_SIZE - args->len);
-			bsd_args.len = STACK_SIZE - GUARD_SIZE;
-		}
-	} else {
-		bsd_args.addr = (caddr_t)PTRIN(args->addr);
-		bsd_args.len  = args->len;
-	}
-	bsd_args.pos = (off_t)args->pgoff;
-
-	error = sys_mmap(td, &bsd_args);
-
-	LINUX_CTR2(mmap2, "return: %d (%p)",
-	    error, td->td_retval[0]);
-	return (error);
+	return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
+		args->flags, args->fd, args->pgoff));
 }
 
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-	struct mprotect_args bsd_args;
-
-	LINUX_CTR(mprotect);
 
-	bsd_args.addr = uap->addr;
-	bsd_args.len = uap->len;
-	bsd_args.prot = uap->prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-	return (sys_mprotect(td, &bsd_args));
+	return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
 }
 
 int
diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h
index 4946805..4651788 100644
--- a/sys/amd64/linux/linux_proto.h
+++ b/sys/amd64/linux/linux_proto.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb 
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #ifndef _LINUX_SYSPROTO_H_
@@ -499,7 +499,7 @@ struct linux_mknod_args {
 	char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
 };
 struct linux_personality_args {
-	char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+	char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
 };
 struct linux_ustat_args {
 	char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
diff --git a/sys/amd64/linux/linux_syscall.h b/sys/amd64/linux/linux_syscall.h
index 874651c..88fa022 100644
--- a/sys/amd64/linux/linux_syscall.h
+++ b/sys/amd64/linux/linux_syscall.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb 
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #define	LINUX_SYS_read	0
diff --git a/sys/amd64/linux/linux_syscalls.c b/sys/amd64/linux/linux_syscalls.c
index fcea91e..1b71749 100644
--- a/sys/amd64/linux/linux_syscalls.c
+++ b/sys/amd64/linux/linux_syscalls.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb 
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 const char *linux_syscallnames[] = {
diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c
index 14d5c87..50302e8 100644
--- a/sys/amd64/linux/linux_sysent.c
+++ b/sys/amd64/linux/linux_sysent.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb 
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #include <sys/param.h>
diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c
index 5dcdd55..be99518 100644
--- a/sys/amd64/linux/linux_systrace_args.c
+++ b/sys/amd64/linux/linux_systrace_args.c
@@ -1120,7 +1120,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 	/* linux_personality */
 	case 135: {
 		struct linux_personality_args *p = params;
-		iarg[0] = p->per; /* l_ulong */
+		iarg[0] = p->per; /* l_uint */
 		*n_args = 1;
 		break;
 	}
@@ -4112,7 +4112,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 	case 135:
 		switch(ndx) {
 		case 0:
-			p = "l_ulong";
+			p = "l_uint";
 			break;
 		default:
 			break;
diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index 50ddd92..a2c4a0b 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -270,7 +270,7 @@
 133	AUE_MKNOD	STD	{ int linux_mknod(char *path, l_int mode, \
 				    l_dev_t dev); }
 134	AUE_USELIB	UNIMPL	uselib
-135	AUE_PERSONALITY	STD	{ int linux_personality(l_ulong per); }
+135	AUE_PERSONALITY	STD	{ int linux_personality(l_uint per); }
 136	AUE_NULL	STD	{ int linux_ustat(l_dev_t dev, \
 				    struct l_ustat *ubuf); }
 137	AUE_STATFS	STD	{ int linux_statfs(char *path, \
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h
index 02d12f5..97da878 100644
--- a/sys/amd64/linux32/linux.h
+++ b/sys/amd64/linux32/linux.h
@@ -165,13 +165,6 @@ struct l_rusage {
 	l_long	ru_nivcsw;
 } __packed;
 
-/* mmap options */
-#define	LINUX_MAP_SHARED	0x0001
-#define	LINUX_MAP_PRIVATE	0x0002
-#define	LINUX_MAP_FIXED		0x0010
-#define	LINUX_MAP_ANON		0x0020
-#define	LINUX_MAP_GROWSDOWN	0x0100
-
 struct l_mmap_argv {
 	l_uintptr_t	addr;
 	l_size_t	len;
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index b38afb3..187ec15 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include <amd64/linux32/linux32_proto.h>
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -84,9 +85,6 @@ struct l_old_select_argv {
 	l_uintptr_t	timeout;
 } __packed;
 
-static int	linux_mmap_common(struct thread *td, l_uintptr_t addr,
-		    l_size_t len, l_int prot, l_int flags, l_int fd,
-		    l_loff_t pos);
 
 static void
 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
@@ -448,9 +446,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
 	return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
@@ -489,184 +484,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
 	    (uint32_t)linux_args.pgoff));
 }
 
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
-    l_int flags, l_int fd, l_loff_t pos)
-{
-	struct proc *p = td->td_proc;
-	struct mmap_args /* {
-		caddr_t addr;
-		size_t len;
-		int prot;
-		int flags;
-		int fd;
-		long pad;
-		off_t pos;
-	} */ bsd_args;
-	int error;
-	struct file *fp;
-	cap_rights_t rights;
-
-	error = 0;
-	bsd_args.flags = 0;
-	fp = NULL;
-
-	/*
-	 * Linux mmap(2):
-	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-	 */
-	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
-		return (EINVAL);
-
-	if (flags & LINUX_MAP_SHARED)
-		bsd_args.flags |= MAP_SHARED;
-	if (flags & LINUX_MAP_PRIVATE)
-		bsd_args.flags |= MAP_PRIVATE;
-	if (flags & LINUX_MAP_FIXED)
-		bsd_args.flags |= MAP_FIXED;
-	if (flags & LINUX_MAP_ANON) {
-		/* Enforce pos to be on page boundary, then ignore. */
-		if ((pos & PAGE_MASK) != 0)
-			return (EINVAL);
-		pos = 0;
-		bsd_args.flags |= MAP_ANON;
-	} else
-		bsd_args.flags |= MAP_NOSYNC;
-	if (flags & LINUX_MAP_GROWSDOWN)
-		bsd_args.flags |= MAP_STACK;
-
-	/*
-	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-	 * on Linux/i386. We do this to ensure maximum compatibility.
-	 * Linux/ia64 does the same in i386 emulation mode.
-	 */
-	bsd_args.prot = prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
-	if (bsd_args.fd != -1) {
-		/*
-		 * Linux follows Solaris mmap(2) description:
-		 * The file descriptor fildes is opened with
-		 * read permission, regardless of the
-		 * protection options specified.
-		 */
-
-		error = fget(td, bsd_args.fd,
-		    cap_rights_init(&rights, CAP_MMAP), &fp);
-		if (error != 0)
-			return (error);
-		if (fp->f_type != DTYPE_VNODE) {
-			fdrop(fp, td);
-			return (EINVAL);
-		}
-
-		/* Linux mmap() just fails for O_WRONLY files */
-		if (!(fp->f_flag & FREAD)) {
-			fdrop(fp, td);
-			return (EACCES);
-		}
-
-		fdrop(fp, td);
-	}
-
-	if (flags & LINUX_MAP_GROWSDOWN) {
-		/*
-		 * The Linux MAP_GROWSDOWN option does not limit auto
-		 * growth of the region.  Linux mmap with this option
-		 * takes as addr the inital BOS, and as len, the initial
-		 * region size.  It can then grow down from addr without
-		 * limit.  However, Linux threads has an implicit internal
-		 * limit to stack size of STACK_SIZE.  Its just not
-		 * enforced explicitly in Linux.  But, here we impose
-		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-		 * region, since we can do this with our mmap.
-		 *
-		 * Our mmap with MAP_STACK takes addr as the maximum
-		 * downsize limit on BOS, and as len the max size of
-		 * the region.  It then maps the top SGROWSIZ bytes,
-		 * and auto grows the region down, up to the limit
-		 * in addr.
-		 *
-		 * If we don't use the MAP_STACK option, the effect
-		 * of this code is to allocate a stack region of a
-		 * fixed size of (STACK_SIZE - GUARD_SIZE).
-		 */
-
-		if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
-			/*
-			 * Some Linux apps will attempt to mmap
-			 * thread stacks near the top of their
-			 * address space.  If their TOS is greater
-			 * than vm_maxsaddr, vm_map_growstack()
-			 * will confuse the thread stack with the
-			 * process stack and deliver a SEGV if they
-			 * attempt to grow the thread stack past their
-			 * current stacksize rlimit.  To avoid this,
-			 * adjust vm_maxsaddr upwards to reflect
-			 * the current stacksize rlimit rather
-			 * than the maximum possible stacksize.
-			 * It would be better to adjust the
-			 * mmap'ed region, but some apps do not check
-			 * mmap's return value.
-			 */
-			PROC_LOCK(p);
-			p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
-			    lim_cur(p, RLIMIT_STACK);
-			PROC_UNLOCK(p);
-		}
-
-		/*
-		 * This gives us our maximum stack size and a new BOS.
-		 * If we're using VM_STACK, then mmap will just map
-		 * the top SGROWSIZ bytes, and let the stack grow down
-		 * to the limit at BOS.  If we're not using VM_STACK
-		 * we map the full stack, since we don't have a way
-		 * to autogrow it.
-		 */
-		if (len > STACK_SIZE - GUARD_SIZE) {
-			bsd_args.addr = (caddr_t)PTRIN(addr);
-			bsd_args.len = len;
-		} else {
-			bsd_args.addr = (caddr_t)PTRIN(addr) -
-			    (STACK_SIZE - GUARD_SIZE - len);
-			bsd_args.len = STACK_SIZE - GUARD_SIZE;
-		}
-	} else {
-		bsd_args.addr = (caddr_t)PTRIN(addr);
-		bsd_args.len  = len;
-	}
-	bsd_args.pos = pos;
-
-#ifdef DEBUG
-	if (ldebug(mmap))
-		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
-		    __func__,
-		    (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
-		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
-	error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
-	if (ldebug(mmap))
-		printf("-> %s() return: 0x%x (0x%08x)\n",
-			__func__, error, (u_int)td->td_retval[0]);
-#endif
-	return (error);
-}
-
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-	struct mprotect_args bsd_args;
-
-	bsd_args.addr = uap->addr;
-	bsd_args.len = uap->len;
-	bsd_args.prot = uap->prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-	return (sys_mprotect(td, &bsd_args));
+
+	return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
 }
 
 int
diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h
index a960c09..b2d1a7e 100644
--- a/sys/amd64/linux32/linux32_proto.h
+++ b/sys/amd64/linux32/linux32_proto.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #ifndef _LINUX32_SYSPROTO_H_
@@ -427,7 +427,7 @@ struct linux_sysfs_args {
 	char arg2_l_[PADL_(l_ulong)]; l_ulong arg2; char arg2_r_[PADR_(l_ulong)];
 };
 struct linux_personality_args {
-	char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+	char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
 };
 struct linux_setfsuid16_args {
 	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
diff --git a/sys/amd64/linux32/linux32_syscall.h b/sys/amd64/linux32/linux32_syscall.h
index 25747e4..82764d0 100644
--- a/sys/amd64/linux32/linux32_syscall.h
+++ b/sys/amd64/linux32/linux32_syscall.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #define	LINUX32_SYS_linux_exit	1
diff --git a/sys/amd64/linux32/linux32_syscalls.c b/sys/amd64/linux32/linux32_syscalls.c
index ef965d7..0d995c3 100644
--- a/sys/amd64/linux32/linux32_syscalls.c
+++ b/sys/amd64/linux32/linux32_syscalls.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 const char *linux32_syscallnames[] = {
diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c
index 6eb215e..ee766d7 100644
--- a/sys/amd64/linux32/linux32_sysent.c
+++ b/sys/amd64/linux32/linux32_sysent.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #include "opt_compat.h"
diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c
index cabfab7..f3bde85 100644
--- a/sys/amd64/linux32/linux32_systrace_args.c
+++ b/sys/amd64/linux32/linux32_systrace_args.c
@@ -910,7 +910,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 	/* linux_personality */
 	case 136: {
 		struct linux_personality_args *p = params;
-		iarg[0] = p->per; /* l_ulong */
+		iarg[0] = p->per; /* l_uint */
 		*n_args = 1;
 		break;
 	}
@@ -3715,7 +3715,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 	case 136:
 		switch(ndx) {
 		case 0:
-			p = "l_ulong";
+			p = "l_uint";
 			break;
 		default:
 			break;
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 79cd2c8..20aa3c4 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -238,7 +238,7 @@
 134	AUE_BDFLUSH	STD	{ int linux_bdflush(void); }
 135	AUE_NULL	STD	{ int linux_sysfs(l_int option, \
 				    l_ulong arg1, l_ulong arg2); }
-136	AUE_PERSONALITY	STD	{ int linux_personality(l_ulong per); }
+136	AUE_PERSONALITY	STD	{ int linux_personality(l_uint per); }
 137	AUE_NULL	UNIMPL	afs_syscall
 138	AUE_SETFSUID	STD	{ int linux_setfsuid16(l_uid16_t uid); }
 139	AUE_SETFSGID	STD	{ int linux_setfsgid16(l_gid16_t gid); }
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
index 848007e..4214ca8 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
@@ -89,13 +89,14 @@ traverse(vnode_t **cvpp, int lktype)
 		if (vfsp == NULL)
 			break;
 		error = vfs_busy(vfsp, 0);
+
 		/*
 		 * tvp is NULL for *cvpp vnode, which we can't unlock.
-		 * At least some callers expect the reference to be
-		 * maintained to the original *cvpp
 		 */
 		if (tvp != NULL)
 			vput(cvp);
+		else
+			vrele(cvp);
 		if (error)
 			return (error);
 
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
index 49becbc..6af1e8b 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -121,34 +121,39 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
 	struct ucred *cr;
 	int error;
 
+	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
+
+	vp = *vpp;
+	*vpp = NULL;
+	error = 0;
+
 	/*
 	 * Be ultra-paranoid about making sure the type and fspath
 	 * variables will fit in our mp buffers, including the
 	 * terminating NUL.
 	 */
 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
-		return (ENAMETOOLONG);
-
-	vfsp = vfs_byname_kld(fstype, td, &error);
-	if (vfsp == NULL)
-		return (ENODEV);
-
-	vp = *vpp;
-	if (vp->v_type != VDIR)
-		return (ENOTDIR);
+		error = ENAMETOOLONG;
+	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
+		error = ENODEV;
+	if (error == 0 && vp->v_type != VDIR)
+		error = ENOTDIR;
 	/*
 	 * We need vnode lock to protect v_mountedhere and vnode interlock
 	 * to protect v_iflag.
 	 */
-	vn_lock(vp, LK_SHARED | LK_RETRY);
-	VI_LOCK(vp);
-	if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
+	if (error == 0) {
+		VI_LOCK(vp);
+		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
+			vp->v_iflag |= VI_MOUNT;
+		else
+			error = EBUSY;
 		VI_UNLOCK(vp);
-		VOP_UNLOCK(vp, 0);
-		return (EBUSY);
 	}
-	vp->v_iflag |= VI_MOUNT;
-	VI_UNLOCK(vp);
+	if (error != 0) {
+		vput(vp);
+		return (error);
+	}
 	VOP_UNLOCK(vp, 0);
 
 	/*
@@ -198,7 +203,6 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
 		vfs_unbusy(mp);
 		vfs_freeopts(mp->mnt_optnew);
 		vfs_mount_destroy(mp);
-		*vpp = NULL;
 		return (error);
 	}
 
@@ -229,7 +233,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
 	vfs_event_signal(NULL, VQ_MOUNT, 0);
 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
 		panic("mount: lost mount");
-	vput(vp);
+	VOP_UNLOCK(vp, 0);
 	vfs_unbusy(mp);
 	*vpp = mvp;
 	return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index ed99c4b..a087e6e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -7288,7 +7288,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
 
 		if (pred != NULL) {
 			dtrace_difo_t *dp = pred->dtp_difo;
-			int rval;
+			uint64_t rval;
 
 			rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
 
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
index c5c5c00..27d259d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
@@ -442,19 +442,9 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm)
 		*vpp = dvp;
 		return (0);
 	} else if (strcmp(nm, "..") == 0) {
-		if (pvp == NULL) {
-			ASSERT(dvp->v_flag & VROOT);
-			VN_HOLD(dvp);
-			*vpp = dvp;
-			ASSERT_VOP_ELOCKED(dvp, "gfs_lookup_dot: non-locked dvp");
-		} else {
-			ltype = VOP_ISLOCKED(dvp);
-			VOP_UNLOCK(dvp, 0);
-			VN_HOLD(pvp);
-			*vpp = pvp;
-			vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-			vn_lock(dvp, ltype | LK_RETRY);
-		}
+		ASSERT(pvp != NULL);
+		VN_HOLD(pvp);
+		*vpp = pvp;
 		return (0);
 	}
 
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index f6d19fe..9430037 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -233,10 +233,15 @@ int zfs_disable_dup_eviction = 0;
 uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
 u_int zfs_arc_free_target = 0;
 
+/* Absolute min for arc min / max is 16MB. */
+static uint64_t arc_abs_min = 16 << 20;
+
 static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
 static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS);
+static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS);
+static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS);
 
-#ifdef _KERNEL
+#if defined(__FreeBSD__) && defined(_KERNEL)
 static void
 arc_free_target_init(void *unused __unused)
 {
@@ -253,10 +258,10 @@ TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min);
 TUNABLE_QUAD("vfs.zfs.arc_average_blocksize", &zfs_arc_average_blocksize);
 TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift);
 SYSCTL_DECL(_vfs_zfs);
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
-    "Maximum ARC size");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0,
-    "Minimum ARC size");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_U64 | CTLFLAG_RWTUN,
+    0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_U64 | CTLFLAG_RWTUN,
+    0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size");
 SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
     &zfs_arc_average_blocksize, 0,
     "ARC average blocksize");
@@ -882,7 +887,7 @@ struct arc_buf_hdr {
 	l1arc_buf_hdr_t		b_l1hdr;
 };
 
-#ifdef _KERNEL
+#if defined(__FreeBSD__) && defined(_KERNEL)
 static int
 sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS)
 {
@@ -900,6 +905,82 @@ sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS)
 	arc_meta_limit = val;
 	return (0);
 }
+
+static int
+sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_arc_max;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (zfs_arc_max == 0) {
+		/* Loader tunable so blindly set */
+		zfs_arc_max = val;
+		return (0);
+	}
+
+	if (val < arc_abs_min || val > kmem_size())
+		return (EINVAL);
+	if (val < arc_c_min)
+		return (EINVAL);
+	if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit)
+		return (EINVAL);
+
+	arc_c_max = val;
+
+	arc_c = arc_c_max;
+        arc_p = (arc_c >> 1);
+
+	if (zfs_arc_meta_limit == 0) {
+		/* limit meta-data to 1/4 of the arc capacity */
+		arc_meta_limit = arc_c_max / 4;
+	}
+
+	/* if kmem_flags are set, lets try to use less memory */
+	if (kmem_debugging())
+		arc_c = arc_c / 2;
+
+	zfs_arc_max = arc_c;
+
+	return (0);
+}
+
+static int
+sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS)
+{
+	uint64_t val;
+	int err;
+
+	val = zfs_arc_min;
+	err = sysctl_handle_64(oidp, &val, 0, req);
+	if (err != 0 || req->newptr == NULL)
+		return (err);
+
+	if (zfs_arc_min == 0) {
+		/* Loader tunable so blindly set */
+		zfs_arc_min = val;
+		return (0);
+	}
+
+	if (val < arc_abs_min || val > arc_c_max)
+		return (EINVAL);
+
+	arc_c_min = val;
+
+	if (zfs_arc_meta_min == 0)
+                arc_meta_min = arc_c_min / 2;
+
+	if (arc_c < arc_c_min)
+                arc_c = arc_c_min;
+
+	zfs_arc_min = arc_c_min;
+
+	return (0);
+}
 #endif
 
 static arc_buf_t *arc_eviction_list;
@@ -2251,6 +2332,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
 		ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==,
 		    hdr->b_l1hdr.b_buf->b_data);
 		ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF);
+		hdr->b_l1hdr.b_tmp_cdata = NULL;
 		return;
 	}
 
@@ -5320,8 +5402,8 @@ arc_init(void)
 	arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
 #endif
 #endif	/* illumos */
-	/* set min cache to 1/32 of all memory, or 16MB, whichever is more */
-	arc_c_min = MAX(arc_c / 4, 16 << 20);
+	/* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
+	arc_c_min = MAX(arc_c / 4, arc_abs_min);
 	/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
 	if (arc_c * 8 >= 1 << 30)
 		arc_c_max = (arc_c * 8) - (1 << 30);
@@ -5342,11 +5424,11 @@ arc_init(void)
 #ifdef _KERNEL
 	/*
 	 * Allow the tunables to override our calculations if they are
-	 * reasonable (ie. over 16MB)
+	 * reasonable.
 	 */
-	if (zfs_arc_max > 16 << 20 && zfs_arc_max < kmem_size())
+	if (zfs_arc_max > arc_abs_min && zfs_arc_max < kmem_size())
 		arc_c_max = zfs_arc_max;
-	if (zfs_arc_min > 16 << 20 && zfs_arc_min <= arc_c_max)
+	if (zfs_arc_min > arc_abs_min && zfs_arc_min <= arc_c_max)
 		arc_c_min = zfs_arc_min;
 #endif
 
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index 8e14a97..d8d186c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -1083,8 +1083,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
 			else
 				XUIOSTAT_BUMP(xuiostat_rbuf_copied);
 		} else {
+#ifdef illumos
 			err = uiomove((char *)db->db_data + bufoff, tocpy,
 			    UIO_READ, uio);
+#else
+			err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
+			    tocpy, uio);
+#endif
 		}
 		if (err)
 			break;
@@ -1178,6 +1183,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
 		else
 			dmu_buf_will_dirty(db, tx);
 
+#ifdef illumos
 		/*
 		 * XXX uiomove could block forever (eg. nfs-backed
 		 * pages).  There needs to be a uiolockdown() function
@@ -1186,6 +1192,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
 		 */
 		err = uiomove((char *)db->db_data + bufoff, tocpy,
 		    UIO_WRITE, uio);
+#else
+		err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
+		    uio);
+#endif
 
 		if (tocpy == db->db_size)
 			dmu_buf_fill_done(db, tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
index d1b8002..9c34f45 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
@@ -49,8 +49,8 @@
 #include <sys/dsl_userhold.h>
 
 #ifdef __FreeBSD__
-#include <sys/sysctl.h>
 #include <sys/types.h>
+#include <sys/sysctl.h>
 #endif
 
 /*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
index 1945686..8849003 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
@@ -61,7 +61,6 @@ int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
 
 #define	ZFSCTL_INO_ROOT		0x1
 #define	ZFSCTL_INO_SNAPDIR	0x2
-#define	ZFSCTL_INO_SHARES	0x3
 
 #ifdef	__cplusplus
 }
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
index 693ba41..cbafa03 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
@@ -223,7 +223,7 @@ zfsctl_root_inode_cb(vnode_t *vp, int index)
 {
 	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 
-	ASSERT(index <= 2);
+	ASSERT(index < 2);
 
 	if (index == 0)
 		return (ZFSCTL_INO_SNAPDIR);
@@ -294,6 +294,22 @@ zfsctl_root(znode_t *zp)
 	return (zp->z_zfsvfs->z_ctldir);
 }
 
+static int
+zfsctl_common_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	vnode_t *vp = ap->a_vp;
+	gfs_file_t *fp = vp->v_data;
+
+	printf("    parent = %p\n", fp->gfs_parent);
+	printf("    type = %d\n", fp->gfs_type);
+	printf("    index = %d\n", fp->gfs_index);
+	printf("    ino = %ju\n", (uintmax_t)fp->gfs_ino);
+	return (0);
+}
+
 /*
  * Common open routine.  Disallow any write access.
  */
@@ -404,8 +420,6 @@ zfsctl_common_fid(ap)
 		ZFS_EXIT(zfsvfs);
 		return (SET_ERROR(ENOSPC));
 	}
-#else
-	fidp->fid_len = SHORT_FID_LEN;
 #endif
 
 	zfid = (zfid_short_t *)fidp;
@@ -454,25 +468,6 @@ zfsctl_shares_fid(ap)
 	return (error);
 }
 
-static int
-zfsctl_common_reclaim(ap)
-	struct vop_reclaim_args /* {
-		struct vnode *a_vp;
-		struct thread *a_td;
-	} */ *ap;
-{
-	vnode_t *vp = ap->a_vp;
-
-	/*
-	 * Destroy the vm object and flush associated pages.
-	 */
-	vnode_destroy_vobject(vp);
-	VI_LOCK(vp);
-	vp->v_data = NULL;
-	VI_UNLOCK(vp);
-	return (0);
-}
-
 /*
  * .zfs inode namespace
  *
@@ -537,9 +532,20 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 	ZFS_ENTER(zfsvfs);
 
 	if (strcmp(nm, "..") == 0) {
+#ifdef illumos
 		err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp);
+#else
+		/*
+		 * NB: can not use VFS_ROOT here as it would acquire
+		 * the vnode lock of the parent (root) vnode while
+		 * holding the child's (.zfs) lock.
+		 */
+		znode_t *rootzp;
+
+		err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
 		if (err == 0)
-			VOP_UNLOCK(*vpp, 0);
+			*vpp = ZTOV(rootzp);
+#endif
 	} else {
 		err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
 		    cr, ct, direntflags, realpnp);
@@ -550,6 +556,61 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 	return (err);
 }
 
+static int
+zfsctl_freebsd_root_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	vnode_t *dvp = ap->a_dvp;
+	vnode_t **vpp = ap->a_vpp;
+	cred_t *cr = ap->a_cnp->cn_cred;
+	int flags = ap->a_cnp->cn_flags;
+	int lkflags = ap->a_cnp->cn_lkflags;
+	int nameiop = ap->a_cnp->cn_nameiop;
+	char nm[NAME_MAX + 1];
+	int err;
+
+	if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
+		return (EOPNOTSUPP);
+
+	ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
+	strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
+relookup:
+	err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
+	if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) {
+		if (flags & ISDOTDOT) {
+			VOP_UNLOCK(dvp, 0);
+			err = vn_lock(*vpp, lkflags);
+			if (err != 0) {
+				vrele(*vpp);
+				*vpp = NULL;
+			}
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+		} else {
+			err = vn_lock(*vpp, LK_EXCLUSIVE);
+			if (err != 0) {
+				VERIFY3S(err, ==, ENOENT);
+				goto relookup;
+			}
+		}
+	}
+	return (err);
+}
+
+static int
+zfsctl_root_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	printf("    .zfs node\n");
+	zfsctl_common_print(ap);
+	return (0);
+}
+
 #ifdef illumos
 static int
 zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
@@ -585,49 +646,6 @@ static const fs_operation_def_t zfsctl_tops_root[] = {
 };
 #endif	/* illumos */
 
-/*
- * Special case the handling of "..".
- */
-/* ARGSUSED */
-int
-zfsctl_freebsd_root_lookup(ap)
-	struct vop_lookup_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-	} */ *ap;
-{
-	vnode_t *dvp = ap->a_dvp;
-	vnode_t **vpp = ap->a_vpp;
-	cred_t *cr = ap->a_cnp->cn_cred;
-	int flags = ap->a_cnp->cn_flags;
-	int nameiop = ap->a_cnp->cn_nameiop;
-	char nm[NAME_MAX + 1];
-	int err;
-	int ltype;
-
-	if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
-		return (EOPNOTSUPP);
-
-	ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
-	strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
-	err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
-	if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) {
-		ltype = VOP_ISLOCKED(dvp);
-		if (flags & ISDOTDOT) {
-			VN_HOLD(*vpp);
-			VOP_UNLOCK(dvp, 0);
-		}
-		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-		if (flags & ISDOTDOT) {
-			VN_RELE(*vpp);
-			vn_lock(dvp, ltype| LK_RETRY);
-		}
-	}
-
-	return (err);
-}
-
 static struct vop_vector zfsctl_ops_root = {
 	.vop_default =	&default_vnodeops,
 	.vop_open =	zfsctl_common_open,
@@ -643,6 +661,7 @@ static struct vop_vector zfsctl_ops_root = {
 	.vop_pathconf =	zfsctl_pathconf,
 #endif
 	.vop_fid =	zfsctl_common_fid,
+	.vop_print =	zfsctl_root_print,
 };
 
 /*
@@ -987,6 +1006,11 @@ zfsctl_snapdir_lookup(ap)
 
 	ZFS_ENTER(zfsvfs);
 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+		if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
+			VOP_UNLOCK(dvp, 0);
+			VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
+			VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
+		}
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
@@ -1011,6 +1035,7 @@ zfsctl_snapdir_lookup(ap)
 #endif
 	}
 
+relookup:
 	mutex_enter(&sdp->sd_lock);
 	search.se_name = (char *)nm;
 	if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
@@ -1018,7 +1043,6 @@ zfsctl_snapdir_lookup(ap)
 		VN_HOLD(*vpp);
 		err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
 		if (err != 0) {
-			VN_RELE(*vpp);
 			*vpp = NULL;
 		} else if (*vpp == sep->se_root) {
 			/*
@@ -1027,13 +1051,6 @@ zfsctl_snapdir_lookup(ap)
 			 */
 			VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0);
 			goto domount;
-		} else {
-			/*
-			 * VROOT was set during the traverse call.  We need
-			 * to clear it since we're pretending to be part
-			 * of our parent's vfs.
-			 */
-			(*vpp)->v_flag &= ~VROOT;
 		}
 		mutex_exit(&sdp->sd_lock);
 		ZFS_EXIT(zfsvfs);
@@ -1076,7 +1093,6 @@ zfsctl_snapdir_lookup(ap)
 	sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 	(void) strcpy(sep->se_name, nm);
 	*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
-	VN_HOLD(*vpp);
 	avl_insert(&sdp->sd_snaps, sep, where);
 
 	dmu_objset_rele(snap, FTAG);
@@ -1087,6 +1103,16 @@ domount:
 	(void) snprintf(mountpoint, mountpoint_len,
 	    "%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
 	    dvp->v_vfsp->mnt_stat.f_mntonname, nm);
+	mutex_exit(&sdp->sd_lock);
+
+	/*
+	 * The vnode may get reclaimed between dropping sd_lock and
+	 * getting the vnode lock.
+	 * */
+	err = vn_lock(*vpp, LK_EXCLUSIVE);
+	if (err == ENOENT)
+		goto relookup;
+	VERIFY0(err);
 	err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0);
 	kmem_free(mountpoint, mountpoint_len);
 	if (err == 0) {
@@ -1099,8 +1125,8 @@ domount:
 		 */
 		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
 		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
+		(*vpp)->v_flag &= ~VROOT;
 	}
-	mutex_exit(&sdp->sd_lock);
 	ZFS_EXIT(zfsvfs);
 
 #ifdef illumos
@@ -1142,6 +1168,11 @@ zfsctl_shares_lookup(ap)
 	strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
 
 	if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+		if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
+			VOP_UNLOCK(dvp, 0);
+			VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
+			VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
+		}
 		ZFS_EXIT(zfsvfs);
 		return (0);
 	}
@@ -1348,8 +1379,8 @@ zfsctl_snapdir_getattr(ap)
 
 /* ARGSUSED */
 static int
-zfsctl_snapdir_inactive(ap)
-	struct vop_inactive_args /* {
+zfsctl_snapdir_reclaim(ap)
+	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 		struct thread *a_td;
 	} */ *ap;
@@ -1358,22 +1389,37 @@ zfsctl_snapdir_inactive(ap)
 	zfsctl_snapdir_t *sdp = vp->v_data;
 	zfs_snapentry_t *sep;
 
-	/*
-	 * On forced unmount we have to free snapshots from here.
-	 */
-	mutex_enter(&sdp->sd_lock);
-	while ((sep = avl_first(&sdp->sd_snaps)) != NULL) {
-		avl_remove(&sdp->sd_snaps, sep);
-		kmem_free(sep->se_name, strlen(sep->se_name) + 1);
-		kmem_free(sep, sizeof (zfs_snapentry_t));
-	}
-	mutex_exit(&sdp->sd_lock);
-	gfs_dir_inactive(vp);
 	ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
 	mutex_destroy(&sdp->sd_lock);
 	avl_destroy(&sdp->sd_snaps);
-	kmem_free(sdp, sizeof (zfsctl_snapdir_t));
+	gfs_vop_reclaim(ap);
+
+	return (0);
+}
+
+static int
+zfsctl_shares_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	printf("    .zfs/shares node\n");
+	zfsctl_common_print(ap);
+	return (0);
+}
 
+static int
+zfsctl_snapdir_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	vnode_t *vp = ap->a_vp;
+	zfsctl_snapdir_t *sdp = vp->v_data;
+
+	printf("    .zfs/snapshot node\n");
+	printf("    number of children = %lu\n", avl_numnodes(&sdp->sd_snaps));
+	zfsctl_common_print(ap);
 	return (0);
 }
 
@@ -1419,9 +1465,10 @@ static struct vop_vector zfsctl_ops_snapdir = {
 	.vop_mkdir =	zfsctl_freebsd_snapdir_mkdir,
 	.vop_readdir =	gfs_vop_readdir,
 	.vop_lookup =	zfsctl_snapdir_lookup,
-	.vop_inactive =	zfsctl_snapdir_inactive,
-	.vop_reclaim =	zfsctl_common_reclaim,
+	.vop_inactive =	VOP_NULL,
+	.vop_reclaim =	zfsctl_snapdir_reclaim,
 	.vop_fid =	zfsctl_common_fid,
+	.vop_print =	zfsctl_snapdir_print,
 };
 
 static struct vop_vector zfsctl_ops_shares = {
@@ -1436,6 +1483,7 @@ static struct vop_vector zfsctl_ops_shares = {
 	.vop_inactive =	VOP_NULL,
 	.vop_reclaim =	gfs_vop_reclaim,
 	.vop_fid =	zfsctl_shares_fid,
+	.vop_print =	zfsctl_shares_print,
 };
 #endif	/* illumos */
 
@@ -1454,7 +1502,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
 
 	vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
 	    &zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
-	VN_HOLD(vp);
 	zcp = vp->v_data;
 	zcp->zc_id = objset;
 	VOP_UNLOCK(vp, 0);
@@ -1462,17 +1509,28 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
 	return (vp);
 }
 
+static int
+zfsctl_snapshot_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct thread *a_td;
+	} */ *ap;
+{
+	vnode_t *vp = ap->a_vp;
+
+	vrecycle(vp);
+	return (0);
+}
 
 static int
 zfsctl_snapshot_reclaim(ap)
-	struct vop_inactive_args /* {
+	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 		struct thread *a_td;
 	} */ *ap;
 {
 	vnode_t *vp = ap->a_vp;
 	cred_t *cr = ap->a_td->td_ucred;
-	struct vop_reclaim_args iap;
 	zfsctl_snapdir_t *sdp;
 	zfs_snapentry_t *sep, *next;
 	int locked;
@@ -1480,7 +1538,6 @@ zfsctl_snapshot_reclaim(ap)
 
 	VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
 	sdp = dvp->v_data;
-	VOP_UNLOCK(dvp, 0);
 	/* this may already have been unmounted */
 	if (sdp == NULL) {
 		VN_RELE(dvp);
@@ -1516,106 +1573,12 @@ zfsctl_snapshot_reclaim(ap)
 	 * "active".  If we lookup the same name again we will end up
 	 * creating a new vnode.
 	 */
-	iap.a_vp = vp;
-	gfs_vop_reclaim(&iap);
+	gfs_vop_reclaim(ap);
 	return (0);
 
 }
 
 static int
-zfsctl_traverse_begin(vnode_t **vpp, int lktype)
-{
-
-	VN_HOLD(*vpp);
-	/* Snapshot should be already mounted, but just in case. */
-	if (vn_mountedvfs(*vpp) == NULL)
-		return (ENOENT);
-	return (traverse(vpp, lktype));
-}
-
-static void
-zfsctl_traverse_end(vnode_t *vp, int err)
-{
-
-	if (err == 0)
-		vput(vp);
-	else
-		VN_RELE(vp);
-}
-
-static int
-zfsctl_snapshot_getattr(ap)
-	struct vop_getattr_args /* {
-		struct vnode *a_vp;
-		struct vattr *a_vap;
-		struct ucred *a_cred;
-	} */ *ap;
-{
-	vnode_t *vp = ap->a_vp;
-	int err;
-
-	err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
-	if (err == 0)
-		err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred);
-	zfsctl_traverse_end(vp, err);
-	return (err);
-}
-
-static int
-zfsctl_snapshot_fid(ap)
-	struct vop_fid_args /* {
-		struct vnode *a_vp;
-		struct fid *a_fid;
-	} */ *ap;
-{
-	vnode_t *vp = ap->a_vp;
-	int err;
-
-	err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
-	if (err == 0)
-		err = VOP_VPTOFH(vp, (void *)ap->a_fid);
-	zfsctl_traverse_end(vp, err);
-	return (err);
-}
-
-static int
-zfsctl_snapshot_lookup(ap)
-	struct vop_lookup_args /* {
-		struct vnode *a_dvp;
-		struct vnode **a_vpp;
-		struct componentname *a_cnp;
-	} */ *ap;
-{
-	vnode_t *dvp = ap->a_dvp;
-	vnode_t **vpp = ap->a_vpp;
-	struct componentname *cnp = ap->a_cnp;
-	cred_t *cr = ap->a_cnp->cn_cred;
-	zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
-	int error;
-
-	if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' ||
-	    cnp->cn_nameptr[1] != '.') {
-		return (ENOENT);
-	}
-
-	ASSERT(dvp->v_type == VDIR);
-	ASSERT(zfsvfs->z_ctldir != NULL);
-
-	error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp,
-	    NULL, 0, NULL, cr, NULL, NULL, NULL);
-	if (error == 0) {
-		int ltype = VOP_ISLOCKED(dvp);
-		VN_HOLD(*vpp);
-		VOP_UNLOCK(dvp, 0);
-		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
-		VN_RELE(*vpp);
-		vn_lock(dvp, ltype | LK_RETRY);
-	}
-
-	return (error);
-}
-
-static int
 zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
 {
 	zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
@@ -1657,18 +1620,31 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
 	return (error);
 }
 
+static int
+zfsctl_snaphot_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	vnode_t *vp = ap->a_vp;
+	zfsctl_node_t *zcp = vp->v_data;
+
+	printf("    .zfs/snapshot/<snap> node\n");
+	printf("    id = %ju\n", (uintmax_t)zcp->zc_id);
+	zfsctl_common_print(ap);
+	return (0);
+}
+
 /*
  * These VP's should never see the light of day.  They should always
  * be covered.
  */
 static struct vop_vector zfsctl_ops_snapshot = {
 	.vop_default =	&default_vnodeops,
-	.vop_inactive =	VOP_NULL,
-	.vop_lookup =	zfsctl_snapshot_lookup,
+	.vop_inactive =	zfsctl_snapshot_inactive,
 	.vop_reclaim =	zfsctl_snapshot_reclaim,
-	.vop_getattr =	zfsctl_snapshot_getattr,
-	.vop_fid =	zfsctl_snapshot_fid,
 	.vop_vptocnp =	zfsctl_snapshot_vptocnp,
+	.vop_print =	zfsctl_snaphot_print,
 };
 
 int
@@ -1709,16 +1685,15 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
 		 */
 		error = traverse(&vp, LK_SHARED | LK_RETRY);
 		if (error == 0) {
-			if (vp == sep->se_root)
+			if (vp == sep->se_root) {
+				VN_RELE(vp);	/* release covered vp */
 				error = SET_ERROR(EINVAL);
-			else
+			} else {
 				*zfsvfsp = VTOZ(vp)->z_zfsvfs;
+				VN_URELE(vp);	/* put snapshot's root vp */
+			}
 		}
 		mutex_exit(&sdp->sd_lock);
-		if (error == 0)
-			VN_URELE(vp);
-		else
-			VN_RELE(vp);
 	} else {
 		error = SET_ERROR(EINVAL);
 		mutex_exit(&sdp->sd_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index 651c596..4d6be94 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -1429,6 +1429,7 @@ static int
 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 {
 	objset_t *os;
+	vfs_t *vfsp;
 	int error;
 
 	error = dmu_objset_hold(dsname, FTAG, &os);
@@ -1442,19 +1443,21 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
 	mutex_enter(&os->os_user_ptr_lock);
 	*zfvp = dmu_objset_get_user(os);
 	if (*zfvp) {
-#ifdef illumos
-		VFS_HOLD((*zfvp)->z_vfs);
-#else
-		if (vfs_busy((*zfvp)->z_vfs, 0) != 0) {
-			*zfvp = NULL;
-			error = SET_ERROR(ESRCH);
-		}
-#endif
+		vfsp = (*zfvp)->z_vfs;
+		vfs_ref(vfsp);
 	} else {
 		error = SET_ERROR(ESRCH);
 	}
 	mutex_exit(&os->os_user_ptr_lock);
 	dmu_objset_rele(os, FTAG);
+	if (error == 0) {
+		error = vfs_busy(vfsp, 0);
+		vfs_rel(vfsp);
+		if (error != 0) {
+			*zfvp = NULL;
+			error = SET_ERROR(ESRCH);
+		}
+	}
 	return (error);
 }
 
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index fa6eac7..9bb6091 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -1171,6 +1171,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
+	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
 
 	/*
 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
@@ -1782,12 +1783,11 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
 
 	if (error == 0) {
 		error = vn_lock(*vpp, flags);
-		if (error == 0)
-			(*vpp)->v_vflag |= VV_ROOT;
+		if (error != 0) {
+			VN_RELE(*vpp);
+			*vpp = NULL;
+		}
 	}
-	if (error != 0)
-		*vpp = NULL;
-
 	return (error);
 }
 
@@ -2005,12 +2005,6 @@ zfs_umount(vfs_t *vfsp, int fflag)
 	 */
 	if (zfsvfs->z_ctldir != NULL)
 		zfsctl_destroy(zfsvfs);
-	if (zfsvfs->z_issnap) {
-		vnode_t *svp = vfsp->mnt_vnodecovered;
-
-		if (svp->v_count >= 2)
-			VN_RELE(svp);
-	}
 	zfs_freevfs(vfsp);
 
 	return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index 2a15cdf..b0f11ac 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -655,7 +655,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
 
 			zfs_vmobject_wunlock(obj);
 			va = zfs_map_page(pp, &sf);
+#ifdef illumos
 			error = uiomove(va + off, bytes, UIO_READ, uio);
+#else
+			error = vn_io_fault_uiomove(va + off, bytes, uio);
+#endif
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
 			page_unhold(pp);
@@ -1033,18 +1037,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
+#ifdef illumos
 			size_t cbytes;
+#endif
 
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
+#ifdef illumos
 			if (error = uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes)) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT(cbytes == max_blksz);
+#else
+			ssize_t resid = uio->uio_resid;
+			error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
+			if (error != 0) {
+				uio->uio_offset -= resid - uio->uio_resid;
+				uio->uio_resid = resid;
+				dmu_return_arcbuf(abuf);
+				break;
+			}
+#endif
 		}
 
 		/*
@@ -1122,8 +1139,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
 				    woff, abuf, tx);
 			}
+#ifdef illumos
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
+#endif
 		}
 		if (tx_bytes && vn_has_cached_data(vp)) {
 			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
@@ -1177,7 +1196,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 		while ((end_size = zp->z_size) < uio->uio_loffset) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
+#ifdef illumos
 			ASSERT(error == 0);
+#else
+			ASSERT(error == 0 || error == EFAULT);
+#endif
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
@@ -1187,7 +1210,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		if (error == 0)
+			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		else
+			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 		dmu_tx_commit(tx);
@@ -1214,6 +1240,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 		return (error);
 	}
 
+#ifdef __FreeBSD__
+	/*
+	 * EFAULT means that at least one page of the source buffer was not
+	 * available.  VFS will re-try remaining I/O upon this error.
+	 */
+	if (error == EFAULT) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+#endif
+
 	if (ioflag & (FSYNC | FDSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
@@ -7165,6 +7202,53 @@ zfs_freebsd_aclcheck(ap)
 	return (EOPNOTSUPP);
 }
 
+static int
+zfs_vptocnp(struct vop_vptocnp_args *ap)
+{
+	vnode_t *covered_vp;
+	vnode_t *vp = ap->a_vp;;
+	zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+	znode_t *zp = VTOZ(vp);
+	uint64_t parent;
+	int ltype;
+	int error;
+
+	ZFS_ENTER(zfsvfs);
+	ZFS_VERIFY_ZP(zp);
+
+	/*
+	 * If we are a snapshot mounted under .zfs, run the operation
+	 * on the covered vnode.
+	 */
+	if ((error = sa_lookup(zp->z_sa_hdl,
+	    SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+
+	if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) {
+		ZFS_EXIT(zfsvfs);
+		return (vop_stdvptocnp(ap));
+	}
+	ZFS_EXIT(zfsvfs);
+
+	covered_vp = vp->v_mount->mnt_vnodecovered;
+	vhold(covered_vp);
+	ltype = VOP_ISLOCKED(vp);
+	VOP_UNLOCK(vp, 0);
+	error = vget(covered_vp, LK_EXCLUSIVE, curthread);
+	vdrop(covered_vp);
+	if (error == 0) {
+		error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
+		    ap->a_buf, ap->a_buflen);
+		vput(covered_vp);
+	}
+	vn_lock(vp, ltype | LK_RETRY);
+	if ((vp->v_iflag & VI_DOOMED) != 0)
+		error = SET_ERROR(ENOENT);
+	return (error);
+}
+
 struct vop_vector zfs_vnodeops;
 struct vop_vector zfs_fifoops;
 struct vop_vector zfs_shareops;
@@ -7210,6 +7294,7 @@ struct vop_vector zfs_vnodeops = {
 	.vop_aclcheck =		zfs_freebsd_aclcheck,
 	.vop_getpages =		zfs_freebsd_getpages,
 	.vop_putpages =		zfs_freebsd_putpages,
+	.vop_vptocnp =		zfs_vptocnp,
 };
 
 struct vop_vector zfs_fifoops = {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
index 431a05d..3853838 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
@@ -574,9 +574,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
 
 	/*
-	 * Slap on VROOT if we are the root znode
+	 * Slap on VROOT if we are the root znode unless we are the root
+	 * node of a snapshot mounted under .zfs.
 	 */
-	if (zp->z_id == zfsvfs->z_root)
+	if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs)
 		ZTOV(zp)->v_flag |= VROOT;
 
 	mutex_exit(&zp->z_lock);
diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c
index c2bf3ae..b244eea 100644
--- a/sys/compat/linux/linux_emul.c
+++ b/sys/compat/linux/linux_emul.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_persona.h>
 #include <compat/linux/linux_util.h>
 
 
@@ -127,7 +128,7 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
 		 /* epoll should be destroyed in a case of exec. */
 		pem = pem_find(p);
 		KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
-
+		pem->persona = 0;
 		if (pem->epoll != NULL) {
 			emd = pem->epoll;
 			pem->epoll = NULL;
@@ -220,6 +221,9 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
 {
 	struct thread *td = curthread;
 	struct thread *othertd;
+#if defined(__amd64__)
+	struct linux_pemuldata *pem;
+#endif
 
 	/*
 	 * In a case of execing from linux binary properly detach
@@ -243,6 +247,17 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
 			linux_proc_init(td, NULL, 0);
 		else
 			linux_proc_init(td, td, 0);
+#if defined(__amd64__)
+		/*
+		 * An IA32 executable which has executable stack will have the
+		 * READ_IMPLIES_EXEC personality flag set automatically.
+		 */
+		if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
+		    imgp->stack_prot & VM_PROT_EXECUTE) {
+			pem = pem_find(p);
+			pem->persona |= LINUX_READ_IMPLIES_EXEC;
+		}
+#endif
 	}
 }
 
diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h
index 7262093..9a5a667 100644
--- a/sys/compat/linux/linux_emul.h
+++ b/sys/compat/linux/linux_emul.h
@@ -67,6 +67,7 @@ struct linux_pemuldata {
 	uint32_t	flags;		/* process emuldata flags */
 	struct sx	pem_sx;		/* lock for this struct */
 	void		*epoll;		/* epoll data */
+	uint32_t	persona;	/* process execution domain */
 };
 
 #define	LINUX_PEM_XLOCK(p)	sx_xlock(&(p)->pem_sx)
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index cb2379a5..2765dfd 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1198,15 +1198,23 @@ linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
 int
 linux_personality(struct thread *td, struct linux_personality_args *args)
 {
+	struct linux_pemuldata *pem;
+	struct proc *p = td->td_proc;
+	uint32_t old;
+
 #ifdef DEBUG
 	if (ldebug(personality))
-		printf(ARGS(personality, "%lu"), (unsigned long)args->per);
+		printf(ARGS(personality, "%u"), args->per);
 #endif
-	if (args->per != 0)
-		return (EINVAL);
 
-	/* Yes Jim, it's still a Linux... */
-	td->td_retval[0] = 0;
+	PROC_LOCK(p);
+	pem = pem_find(p);
+	old = pem->persona;
+	if (args->per != 0xffffffff)
+		pem->persona = args->per;
+	PROC_UNLOCK(p);
+
+	td->td_retval[0] = old;
 	return (0);
 }
 
diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c
new file mode 100644
index 0000000..0a5557b
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.c
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/capsicum.h>
+#include <sys/file.h>
+#include <sys/imgact.h>
+#include <sys/ktr.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_mmap.h>
+#include <compat/linux/linux_persona.h>
+#include <compat/linux/linux_util.h>
+
+
+#define STACK_SIZE  (2 * 1024 * 1024)
+#define GUARD_SIZE  (4 * PAGE_SIZE)
+
+#if defined(__amd64__)
+static void linux_fixup_prot(struct thread *td, int *prot);
+#endif
+
+
+int
+linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot,
+    int flags, int fd, off_t pos)
+{
+	struct proc *p = td->td_proc;
+	struct vmspace *vms = td->td_proc->p_vmspace;
+	struct mmap_args /* {
+		caddr_t addr;
+		size_t len;
+		int prot;
+		int flags;
+		int fd;
+		off_t pos;
+	} */ bsd_args;
+	int error;
+	struct file *fp;
+
+	cap_rights_t rights;
+	LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
+	    addr, len, prot, flags, fd, pos);
+
+	error = 0;
+	bsd_args.flags = 0;
+	fp = NULL;
+
+	/*
+	 * Linux mmap(2):
+	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
+	 */
+	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
+		return (EINVAL);
+
+	if (flags & LINUX_MAP_SHARED)
+		bsd_args.flags |= MAP_SHARED;
+	if (flags & LINUX_MAP_PRIVATE)
+		bsd_args.flags |= MAP_PRIVATE;
+	if (flags & LINUX_MAP_FIXED)
+		bsd_args.flags |= MAP_FIXED;
+	if (flags & LINUX_MAP_ANON) {
+		/* Enforce pos to be on page boundary, then ignore. */
+		if ((pos & PAGE_MASK) != 0)
+			return (EINVAL);
+		pos = 0;
+		bsd_args.flags |= MAP_ANON;
+	} else
+		bsd_args.flags |= MAP_NOSYNC;
+	if (flags & LINUX_MAP_GROWSDOWN)
+		bsd_args.flags |= MAP_STACK;
+
+	/*
+	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
+	 * on Linux/i386 if the binary requires executable stack.
+	 * We do this only for IA32 emulation as on native i386 this is does not
+	 * make sense without PAE.
+	 *
+	 * XXX. Linux checks that the file system is not mounted with noexec.
+	 */
+	bsd_args.prot = prot;
+#if defined(__amd64__)
+	linux_fixup_prot(td, &bsd_args.prot);
+#endif
+
+	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
+	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
+	if (bsd_args.fd != -1) {
+		/*
+		 * Linux follows Solaris mmap(2) description:
+		 * The file descriptor fildes is opened with
+		 * read permission, regardless of the
+		 * protection options specified.
+		 */
+
+		error = fget(td, bsd_args.fd,
+		    cap_rights_init(&rights, CAP_MMAP), &fp);
+		if (error != 0)
+			return (error);
+		if (fp->f_type != DTYPE_VNODE) {
+			fdrop(fp, td);
+			return (EINVAL);
+		}
+
+		/* Linux mmap() just fails for O_WRONLY files */
+		if (!(fp->f_flag & FREAD)) {
+			fdrop(fp, td);
+			return (EACCES);
+		}
+
+		fdrop(fp, td);
+	}
+
+	if (flags & LINUX_MAP_GROWSDOWN) {
+		/*
+		 * The Linux MAP_GROWSDOWN option does not limit auto
+		 * growth of the region.  Linux mmap with this option
+		 * takes as addr the initial BOS, and as len, the initial
+		 * region size.  It can then grow down from addr without
+		 * limit.  However, Linux threads has an implicit internal
+		 * limit to stack size of STACK_SIZE.  Its just not
+		 * enforced explicitly in Linux.  But, here we impose
+		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
+		 * region, since we can do this with our mmap.
+		 *
+		 * Our mmap with MAP_STACK takes addr as the maximum
+		 * downsize limit on BOS, and as len the max size of
+		 * the region.  It then maps the top SGROWSIZ bytes,
+		 * and auto grows the region down, up to the limit
+		 * in addr.
+		 *
+		 * If we don't use the MAP_STACK option, the effect
+		 * of this code is to allocate a stack region of a
+		 * fixed size of (STACK_SIZE - GUARD_SIZE).
+		 */
+
+		if ((caddr_t)addr + len > vms->vm_maxsaddr) {
+			/*
+			 * Some Linux apps will attempt to mmap
+			 * thread stacks near the top of their
+			 * address space.  If their TOS is greater
+			 * than vm_maxsaddr, vm_map_growstack()
+			 * will confuse the thread stack with the
+			 * process stack and deliver a SEGV if they
+			 * attempt to grow the thread stack past their
+			 * current stacksize rlimit.  To avoid this,
+			 * adjust vm_maxsaddr upwards to reflect
+			 * the current stacksize rlimit rather
+			 * than the maximum possible stacksize.
+			 * It would be better to adjust the
+			 * mmap'ed region, but some apps do not check
+			 * mmap's return value.
+			 */
+			PROC_LOCK(p);
+			vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack -
+			    lim_cur(p, RLIMIT_STACK);
+			PROC_UNLOCK(p);
+		}
+
+		/*
+		 * This gives us our maximum stack size and a new BOS.
+		 * If we're using VM_STACK, then mmap will just map
+		 * the top SGROWSIZ bytes, and let the stack grow down
+		 * to the limit at BOS.  If we're not using VM_STACK
+		 * we map the full stack, since we don't have a way
+		 * to autogrow it.
+		 */
+		if (len > STACK_SIZE - GUARD_SIZE) {
+			bsd_args.addr = (caddr_t)addr;
+			bsd_args.len = len;
+		} else {
+			bsd_args.addr = (caddr_t)addr -
+			    (STACK_SIZE - GUARD_SIZE - len);
+			bsd_args.len = STACK_SIZE - GUARD_SIZE;
+		}
+	} else {
+		bsd_args.addr = (caddr_t)addr;
+		bsd_args.len  = len;
+	}
+	bsd_args.pos = pos;
+
+	error = sys_mmap(td, &bsd_args);
+
+	LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]);
+
+	return (error);
+}
+
+int
+linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
+{
+	struct mprotect_args bsd_args;
+
+	bsd_args.addr = (void *)addr;
+	bsd_args.len = len;
+	bsd_args.prot = prot;
+
+#if defined(__amd64__)
+	linux_fixup_prot(td, &bsd_args.prot);
+#endif
+	return (sys_mprotect(td, &bsd_args));
+}
+
+#if defined(__amd64__)
+static void
+linux_fixup_prot(struct thread *td, int *prot)
+{
+	struct linux_pemuldata *pem;
+
+	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
+		pem = pem_find(td->td_proc);
+		if (pem->persona & LINUX_READ_IMPLIES_EXEC)
+			*prot |= PROT_EXEC;
+	}
+
+}
+#endif
diff --git a/sys/compat/linux/linux_mmap.h b/sys/compat/linux/linux_mmap.h
new file mode 100644
index 0000000..a27d99d
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer
+ *    in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_MMAP_H_
+#define	_LINUX_MMAP_H_
+
+/* mmap options */
+#define	LINUX_MAP_SHARED	0x0001
+#define	LINUX_MAP_PRIVATE	0x0002
+#define	LINUX_MAP_FIXED		0x0010
+#define	LINUX_MAP_ANON		0x0020
+#define	LINUX_MAP_GROWSDOWN	0x0100
+
+
+int linux_mmap_common(struct thread *, uintptr_t, size_t, int, int,
+			int, off_t);
+int linux_mprotect_common(struct thread *, uintptr_t, size_t, int);
+
+#endif	/* _LINUX_MMAP_H_ */
diff --git a/sys/compat/linux/linux_persona.h b/sys/compat/linux/linux_persona.h
new file mode 100644
index 0000000..8d84134
--- /dev/null
+++ b/sys/compat/linux/linux_persona.h
@@ -0,0 +1,56 @@
+/*
+ * $FreeBSD$
+ */
+
+#ifndef LINUX_PERSONALITY_H
+#define LINUX_PERSONALITY_H
+
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+	LINUX_UNAME26 =			0x0020000,
+	LINUX_ADDR_NO_RANDOMIZE = 	0x0040000,	/* disable randomization
+							 * of VA space
+							 */
+	LINUX_FDPIC_FUNCPTRS =		0x0080000,	/* userspace function
+							 * ptrs point to descriptors
+							 * (signal handling)
+							 */
+	LINUX_MMAP_PAGE_ZERO =		0x0100000,
+	LINUX_ADDR_COMPAT_LAYOUT =	0x0200000,
+	LINUX_READ_IMPLIES_EXEC =	0x0400000,
+	LINUX_ADDR_LIMIT_32BIT =	0x0800000,
+	LINUX_SHORT_INODE =		0x1000000,
+	LINUX_WHOLE_SECONDS =		0x2000000,
+	LINUX_STICKY_TIMEOUTS =		0x4000000,
+	LINUX_ADDR_LIMIT_3GB =		0x8000000,
+};
+
+/*
+ * Security-relevant compatibility flags that must be
+ * cleared upon setuid or setgid exec:
+ */
+#define LINUX_PER_CLEAR_ON_SETID	(LINUX_READ_IMPLIES_EXEC  | \
+					LINUX_ADDR_NO_RANDOMIZE  | \
+					LINUX_ADDR_COMPAT_LAYOUT | \
+					LINUX_MMAP_PAGE_ZERO)
+
+/*
+ * Personality types.
+ *
+ * These go in the low byte.  Avoid using the top bit, it will
+ * conflict with error returns.
+ */
+enum {
+	LINUX_PER_LINUX =	0x0000,
+	LINUX_PER_LINUX_32BIT =	0x0000 | LINUX_ADDR_LIMIT_32BIT,
+	LINUX_PER_LINUX_FDPIC =	0x0000 | LINUX_FDPIC_FUNCPTRS,
+	LINUX_PER_LINUX32 =	0x0008,
+	LINUX_PER_LINUX32_3GB =	0x0008 | LINUX_ADDR_LIMIT_3GB,
+	LINUX_PER_MASK =	0x00ff,
+};
+
+#endif /* LINUX_PERSONALITY_H */
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 798d105..e8d9c7d 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -549,6 +549,7 @@ compat/linux/linux_ioctl.c	optional	compat_linux32
 compat/linux/linux_ipc.c	optional	compat_linux32
 compat/linux/linux_mib.c	optional	compat_linux32
 compat/linux/linux_misc.c	optional	compat_linux32
+compat/linux/linux_mmap.c	optional	compat_linux32
 compat/linux/linux_signal.c	optional	compat_linux32
 compat/linux/linux_socket.c	optional	compat_linux32
 compat/linux/linux_stats.c	optional	compat_linux32
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 72686d9..19b44ef 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -102,6 +102,7 @@ compat/linux/linux_ioctl.c	optional compat_linux
 compat/linux/linux_ipc.c	optional compat_linux
 compat/linux/linux_mib.c	optional compat_linux
 compat/linux/linux_misc.c	optional compat_linux
+compat/linux/linux_mmap.c	optional compat_linux
 compat/linux/linux_signal.c	optional compat_linux
 compat/linux/linux_socket.c	optional compat_linux
 compat/linux/linux_stats.c	optional compat_linux
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index 1af0721..598743a 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -63,6 +63,7 @@ compat/linux/linux_ioctl.c	optional compat_linux
 compat/linux/linux_ipc.c	optional compat_linux
 compat/linux/linux_mib.c	optional compat_linux
 compat/linux/linux_misc.c	optional compat_linux
+compat/linux/linux_mmap.c	optional compat_linux
 compat/linux/linux_signal.c	optional compat_linux
 compat/linux/linux_socket.c	optional compat_linux
 compat/linux/linux_stats.c	optional compat_linux
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index 821682a..9db1c44 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -154,8 +154,8 @@ int
 ahci_attach(device_t dev)
 {
 	struct ahci_controller *ctlr = device_get_softc(dev);
-	int error, i, u, speed, unit;
-	u_int32_t version;
+	int error, i, speed, unit;
+	uint32_t u, version;
 	device_t child;
 
 	ctlr->dev = dev;
diff --git a/sys/dev/ahci/ahci.h b/sys/dev/ahci/ahci.h
index a243387..61643c1 100644
--- a/sys/dev/ahci/ahci.h
+++ b/sys/dev/ahci/ahci.h
@@ -472,7 +472,7 @@ struct ahci_enclosure {
 	uint8_t			status[AHCI_MAX_PORTS][4]; /* ArrayDev statuses */
 	int			quirks;
 	int			channels;
-	int			ichannels;
+	uint32_t		ichannels;
 };
 
 /* structure describing a AHCI controller */
@@ -503,7 +503,7 @@ struct ahci_controller {
 	int			quirks;
 	int			numirqs;
 	int			channels;
-	int			ichannels;
+	uint32_t		ichannels;
 	int			ccc;		/* CCC timeout */
 	int			cccv;		/* CCC vector */
 	int			direct;		/* Direct command completion */
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index ab445f6..48ef637 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -1161,10 +1161,27 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
 			}
 		}
 #endif
+#if __FreeBSD_version >= 1000000
+		/* HW cannot turn these on/off separately */
+		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+			ifp->if_capenable ^= IFCAP_RXCSUM;
+			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+			reinit = 1;
+		}
+		if (mask & IFCAP_TXCSUM) {
+			ifp->if_capenable ^= IFCAP_TXCSUM;
+			reinit = 1;
+		}
+		if (mask & IFCAP_TXCSUM_IPV6) {
+			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+			reinit = 1;
+		}
+#else
 		if (mask & IFCAP_HWCSUM) {
 			ifp->if_capenable ^= IFCAP_HWCSUM;
 			reinit = 1;
 		}
+#endif
 		if (mask & IFCAP_TSO4) {
 			ifp->if_capenable ^= IFCAP_TSO4;
 			reinit = 1;
@@ -1243,14 +1260,26 @@ igb_init_locked(struct adapter *adapter)
 	/* Set hardware offload abilities */
 	ifp->if_hwassist = 0;
 	if (ifp->if_capenable & IFCAP_TXCSUM) {
+#if __FreeBSD_version >= 1000000
+		ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
+		if (adapter->hw.mac.type != e1000_82575)
+			ifp->if_hwassist |= CSUM_IP_SCTP;
+#else
 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
 #if __FreeBSD_version >= 800000
-		if ((adapter->hw.mac.type == e1000_82576) ||
-		    (adapter->hw.mac.type == e1000_82580))
+		if (adapter->hw.mac.type != e1000_82575)
 			ifp->if_hwassist |= CSUM_SCTP;
 #endif
+#endif
 	}
 
+#if __FreeBSD_version >= 1000000
+	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
+		ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
+		if (adapter->hw.mac.type != e1000_82575)
+			ifp->if_hwassist |= CSUM_IP6_SCTP;
+	}
+#endif
 	if (ifp->if_capenable & IFCAP_TSO)
 		ifp->if_hwassist |= CSUM_TSO;
 
@@ -3044,6 +3073,9 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
 	ifp->if_capabilities = ifp->if_capenable = 0;
 
 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
+#if __FreeBSD_version >= 1000000
+	ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
+#endif
 	ifp->if_capabilities |= IFCAP_TSO;
 	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
 	ifp->if_capenable = ifp->if_capabilities;
@@ -3817,17 +3849,29 @@ igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
 
 	switch (ipproto) {
 		case IPPROTO_TCP:
+#if __FreeBSD_version >= 1000000
+			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
+#else
 			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
+#endif
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
 			break;
 		case IPPROTO_UDP:
+#if __FreeBSD_version >= 1000000
+			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
+#else
 			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
+#endif
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
 			break;
 
 #if __FreeBSD_version >= 800000
 		case IPPROTO_SCTP:
+#if __FreeBSD_version >= 1000000
+			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
+#else
 			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
+#endif
 				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
 			break;
 #endif
@@ -4514,8 +4558,7 @@ igb_initialize_receive_units(struct adapter *adapter)
 		rxcsum |= E1000_RXCSUM_PCSD;
 #if __FreeBSD_version >= 800000
 		/* For SCTP Offload */
-		if (((hw->mac.type == e1000_82576) ||
-		     (hw->mac.type == e1000_82580)) &&
+		if ((hw->mac.type != e1000_82575) &&
 		    (ifp->if_capenable & IFCAP_RXCSUM))
 			rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
@@ -4524,8 +4567,7 @@ igb_initialize_receive_units(struct adapter *adapter)
 		if (ifp->if_capenable & IFCAP_RXCSUM) {
 			rxcsum |= E1000_RXCSUM_IPPCSE;
 #if __FreeBSD_version >= 800000
-			if ((adapter->hw.mac.type == e1000_82576) ||
-			    (adapter->hw.mac.type == e1000_82580))
+			if (adapter->hw.mac.type != e1000_82575)
 				rxcsum |= E1000_RXCSUM_CRCOFL;
 #endif
 		} else
diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h
index 431437d..f23bb0f 100644
--- a/sys/dev/e1000/if_igb.h
+++ b/sys/dev/e1000/if_igb.h
@@ -291,7 +291,11 @@
 #define ETH_ADDR_LEN		6
 
 /* Offload bits in mbuf flag */
-#if __FreeBSD_version >= 800000
+#if __FreeBSD_version >= 1000000
+#define CSUM_OFFLOAD_IPV4       (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP)
+#define CSUM_OFFLOAD_IPV6       (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP)
+#define CSUM_OFFLOAD            (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6)
+#elif __FreeBSD_version >= 800000
 #define CSUM_OFFLOAD		(CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
 #else
 #define CSUM_OFFLOAD		(CSUM_IP|CSUM_TCP|CSUM_UDP)
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index a89a762..d9c29e3 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -805,6 +805,13 @@ hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
 
 	vm_srb = &vstor_packet->u.vm_srb;
 
+	/*
+	 * Copy some fields of the host's response into the request structure,
+	 * because the fields will be used later in storvsc_io_done().
+	 */
+	request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+	request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
+
 	if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
 			(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
 		/* Autosense data available */
@@ -1939,62 +1946,24 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
 }
 
 /*
- * Modified based on scsi_print_inquiry which is responsible to
- * print the detail information for scsi_inquiry_data.
- *
+ * SCSI Inquiry checks qualifier and type.
+ * If qualifier is 011b, means the device server is not capable
+ * of supporting a peripheral device on this logical unit, and
+ * the type should be set to 1Fh.
+ * 
  * Return 1 if it is valid, 0 otherwise.
  */
 static inline int
 is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
 {
 	uint8_t type;
-	char vendor[16], product[48], revision[16];
-
-	/*
-	 * Check device type and qualifier
-	 */
-	if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ||
-	    SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED))
+	if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
 		return (0);
-
+	}
 	type = SID_TYPE(inq_data);
-	switch (type) {
-	case T_DIRECT:
-	case T_SEQUENTIAL:
-	case T_PRINTER:
-	case T_PROCESSOR:
-	case T_WORM:
-	case T_CDROM:
-	case T_SCANNER:
-	case T_OPTICAL:
-	case T_CHANGER:
-	case T_COMM:
-	case T_STORARRAY:
-	case T_ENCLOSURE:
-	case T_RBC:
-	case T_OCRW:
-	case T_OSD:
-	case T_ADC:
-		break;
-	case T_NODEVICE:
-	default:
+	if (type == T_NODEVICE) {
 		return (0);
 	}
-
-	/*
-	 * Check vendor, product, and revision
-	 */
-	cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
-	    sizeof(vendor));
-	cam_strvis(product, inq_data->product, sizeof(inq_data->product),
-	    sizeof(product));
-	cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision),
-	    sizeof(revision));
-	if (strlen(vendor) == 0  ||
-	    strlen(product) == 0 ||
-	    strlen(revision) == 0)
-		return (0);
-
 	return (1);
 }
 
@@ -2071,7 +2040,6 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 	ccb->ccb_h.status &= ~CAM_STATUS_MASK;
 	if (vm_srb->scsi_status == SCSI_STATUS_OK) {
 		const struct scsi_generic *cmd;
-
 		/*
 		 * Check whether the data for INQUIRY cmd is valid or
 		 * not.  Windows 10 and Windows 2016 send all zero
@@ -2080,23 +2048,59 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
 		cmd = (const struct scsi_generic *)
 		    ((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
 		     csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
-		if (cmd->opcode == INQUIRY &&
-		    /* 
-		     * XXX: Temporary work around disk hot plugin on win2k12r2,
-		     * only filtering the invalid disk on win10 or 2016 server.
-		     * So, the hot plugin on win10 and 2016 server needs
-		     * to be fixed.
+		if (cmd->opcode == INQUIRY) {
+		    /*
+		     * The host of Windows 10 or 2016 server will response
+		     * the inquiry request with invalid data for unexisted device:
+			[0x7f 0x0 0x5 0x2 0x1f ... ]
+		     * But on windows 2012 R2, the response is:
+			[0x7f 0x0 0x0 0x0 0x0 ]
+		     * That is why here wants to validate the inquiry response.
+		     * The validation will skip the INQUIRY whose response is short,
+		     * which is less than SHORT_INQUIRY_LENGTH (36).
+		     *
+		     * For more information about INQUIRY, please refer to:
+		     *  ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
 		     */
-		    vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN10 && 
-		    is_inquiry_valid(
-		    (const struct scsi_inquiry_data *)csio->data_ptr) == 0) {
+		    const struct scsi_inquiry_data *inq_data =
+			(const struct scsi_inquiry_data *)csio->data_ptr;
+		    uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
+		    /* Get the buffer length reported by host */
+		    int resp_xfer_len = vm_srb->transfer_len;
+		    /* Get the available buffer length */
+		    int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+		    int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
+		    if (data_len < SHORT_INQUIRY_LENGTH) {
+			ccb->ccb_h.status |= CAM_REQ_CMP;
+			if (bootverbose && data_len >= 5) {
+				mtx_lock(&sc->hs_lock);
+				xpt_print(ccb->ccb_h.path,
+				    "storvsc skips the validation for short inquiry (%d)"
+				    " [%x %x %x %x %x]\n",
+				    data_len,resp_buf[0],resp_buf[1],resp_buf[2],
+				    resp_buf[3],resp_buf[4]);
+				mtx_unlock(&sc->hs_lock);
+			}
+		    } else if (is_inquiry_valid(inq_data) == 0) {
 			ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+			if (bootverbose && data_len >= 5) {
+				mtx_lock(&sc->hs_lock);
+				xpt_print(ccb->ccb_h.path,
+				    "storvsc uninstalled invalid device"
+				    " [%x %x %x %x %x]\n",
+				resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
+				mtx_unlock(&sc->hs_lock);
+			}
+		    } else {
+			ccb->ccb_h.status |= CAM_REQ_CMP;
 			if (bootverbose) {
 				mtx_lock(&sc->hs_lock);
 				xpt_print(ccb->ccb_h.path,
-				    "storvsc uninstalled device\n");
+				    "storvsc has passed inquiry response (%d) validation\n",
+				    data_len);
 				mtx_unlock(&sc->hs_lock);
 			}
+		    }
 		} else {
 			ccb->ccb_h.status |= CAM_REQ_CMP;
 		}
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 5949f02..a3674c7 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -235,7 +235,7 @@ IDTVEC(lcall_syscall)
 	pushfl				/* save eflags */
 	popl	8(%esp)			/* shuffle into tf_eflags */
 	pushl	$7			/* sizeof "lcall 7,0" */
-	subl	$4,%esp			/* skip over tf_trapno */
+	pushl	$0			/* tf_trapno */
 	pushal
 	pushl	$0
 	movw	%ds,(%esp)
@@ -264,7 +264,7 @@ IDTVEC(lcall_syscall)
 	SUPERALIGN_TEXT
 IDTVEC(int0x80_syscall)
 	pushl	$2			/* sizeof "int 0x80" */
-	subl	$4,%esp			/* skip over tf_trapno */
+	pushl	$0			/* tf_trapno */
 	pushal
 	pushl	$0
 	movw	%ds,(%esp)
diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h
index 36b2084..42e836c 100644
--- a/sys/i386/linux/linux.h
+++ b/sys/i386/linux/linux.h
@@ -140,13 +140,6 @@ struct l_rlimit {
 	l_ulong rlim_max;
 };
 
-/* mmap options */
-#define	LINUX_MAP_SHARED	0x0001
-#define	LINUX_MAP_PRIVATE	0x0002
-#define	LINUX_MAP_FIXED		0x0010
-#define	LINUX_MAP_ANON		0x0020
-#define	LINUX_MAP_GROWSDOWN	0x0100
-
 struct l_mmap_argv {
 	l_uintptr_t	addr;
 	l_size_t	len;
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index c9f969b..4b4b886 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
 #include <i386/linux/linux_proto.h>
 #include <compat/linux/linux_ipc.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_emul.h>
@@ -95,10 +96,6 @@ struct l_old_select_argv {
 	struct l_timeval	*timeout;
 };
 
-static int	linux_mmap_common(struct thread *td, l_uintptr_t addr,
-		    l_size_t len, l_int prot, l_int flags, l_int fd,
-		    l_loff_t pos);
-
 
 int
 linux_execve(struct thread *td, struct linux_execve_args *args)
@@ -340,9 +337,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
 	return (0);
 }
 
-#define STACK_SIZE  (2 * 1024 * 1024)
-#define GUARD_SIZE  (4 * PAGE_SIZE)
-
 int
 linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
 {
@@ -381,187 +375,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
 	    (uint32_t)linux_args.pgoff));
 }
 
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
-    l_int flags, l_int fd, l_loff_t pos)
-{
-	struct proc *p = td->td_proc;
-	struct mmap_args /* {
-		caddr_t addr;
-		size_t len;
-		int prot;
-		int flags;
-		int fd;
-		long pad;
-		off_t pos;
-	} */ bsd_args;
-	int error;
-	struct file *fp;
-	cap_rights_t rights;
-
-	error = 0;
-	bsd_args.flags = 0;
-	fp = NULL;
-
-	/*
-	 * Linux mmap(2):
-	 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
-	 */
-	if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
-		return (EINVAL);
-
-	if (flags & LINUX_MAP_SHARED)
-		bsd_args.flags |= MAP_SHARED;
-	if (flags & LINUX_MAP_PRIVATE)
-		bsd_args.flags |= MAP_PRIVATE;
-	if (flags & LINUX_MAP_FIXED)
-		bsd_args.flags |= MAP_FIXED;
-	if (flags & LINUX_MAP_ANON) {
-		/* Enforce pos to be on page boundary, then ignore. */
-		if ((pos & PAGE_MASK) != 0)
-			return (EINVAL);
-		pos = 0;
-		bsd_args.flags |= MAP_ANON;
-	} else
-		bsd_args.flags |= MAP_NOSYNC;
-	if (flags & LINUX_MAP_GROWSDOWN)
-		bsd_args.flags |= MAP_STACK;
-
-	/*
-	 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
-	 * on Linux/i386. We do this to ensure maximum compatibility.
-	 * Linux/ia64 does the same in i386 emulation mode.
-	 */
-	bsd_args.prot = prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-
-	/* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
-	bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
-	if (bsd_args.fd != -1) {
-		/*
-		 * Linux follows Solaris mmap(2) description:
-		 * The file descriptor fildes is opened with
-		 * read permission, regardless of the
-		 * protection options specified.
-		 *
-		 * Checking just CAP_MMAP is fine here, since the real work
-		 * is done in the FreeBSD mmap().
-		 */
-
-		error = fget(td, bsd_args.fd,
-		    cap_rights_init(&rights, CAP_MMAP), &fp);
-		if (error != 0)
-			return (error);
-		if (fp->f_type != DTYPE_VNODE) {
-			fdrop(fp, td);
-			return (EINVAL);
-		}
-
-		/* Linux mmap() just fails for O_WRONLY files */
-		if (!(fp->f_flag & FREAD)) {
-			fdrop(fp, td);
-			return (EACCES);
-		}
-
-		fdrop(fp, td);
-	}
-
-	if (flags & LINUX_MAP_GROWSDOWN) {
-		/* 
-		 * The Linux MAP_GROWSDOWN option does not limit auto
-		 * growth of the region.  Linux mmap with this option
-		 * takes as addr the inital BOS, and as len, the initial
-		 * region size.  It can then grow down from addr without
-		 * limit.  However, linux threads has an implicit internal
-		 * limit to stack size of STACK_SIZE.  Its just not
-		 * enforced explicitly in linux.  But, here we impose
-		 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
-		 * region, since we can do this with our mmap.
-		 *
-		 * Our mmap with MAP_STACK takes addr as the maximum
-		 * downsize limit on BOS, and as len the max size of
-		 * the region.  It them maps the top SGROWSIZ bytes,
-		 * and auto grows the region down, up to the limit
-		 * in addr.
-		 *
-		 * If we don't use the MAP_STACK option, the effect
-		 * of this code is to allocate a stack region of a
-		 * fixed size of (STACK_SIZE - GUARD_SIZE).
-		 */
-
-		if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
-			/* 
-			 * Some linux apps will attempt to mmap
-			 * thread stacks near the top of their
-			 * address space.  If their TOS is greater
-			 * than vm_maxsaddr, vm_map_growstack()
-			 * will confuse the thread stack with the
-			 * process stack and deliver a SEGV if they
-			 * attempt to grow the thread stack past their
-			 * current stacksize rlimit.  To avoid this,
-			 * adjust vm_maxsaddr upwards to reflect
-			 * the current stacksize rlimit rather
-			 * than the maximum possible stacksize.
-			 * It would be better to adjust the
-			 * mmap'ed region, but some apps do not check
-			 * mmap's return value.
-			 */
-			PROC_LOCK(p);
-			p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
-			    lim_cur(p, RLIMIT_STACK);
-			PROC_UNLOCK(p);
-		}
-
-		/*
-		 * This gives us our maximum stack size and a new BOS.
-		 * If we're using VM_STACK, then mmap will just map
-		 * the top SGROWSIZ bytes, and let the stack grow down
-		 * to the limit at BOS.  If we're not using VM_STACK
-		 * we map the full stack, since we don't have a way
-		 * to autogrow it.
-		 */
-		if (len > STACK_SIZE - GUARD_SIZE) {
-			bsd_args.addr = (caddr_t)PTRIN(addr);
-			bsd_args.len = len;
-		} else {
-			bsd_args.addr = (caddr_t)PTRIN(addr) -
-			    (STACK_SIZE - GUARD_SIZE - len);
-			bsd_args.len = STACK_SIZE - GUARD_SIZE;
-		}
-	} else {
-		bsd_args.addr = (caddr_t)PTRIN(addr);
-		bsd_args.len  = len;
-	}
-	bsd_args.pos = pos;
-
-#ifdef DEBUG
-	if (ldebug(mmap))
-		printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
-		    __func__,
-		    (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
-		    bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
-	error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
-	if (ldebug(mmap))
-		printf("-> %s() return: 0x%x (0x%08x)\n",
-			__func__, error, (u_int)td->td_retval[0]);
-#endif
-	return (error);
-}
-
 int
 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
 {
-	struct mprotect_args bsd_args;
-
-	bsd_args.addr = uap->addr;
-	bsd_args.len = uap->len;
-	bsd_args.prot = uap->prot;
-	if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
-		bsd_args.prot |= PROT_READ | PROT_EXEC;
-	return (sys_mprotect(td, &bsd_args));
+
+	return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
 }
 
 int
diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h
index 3717c9f..1b848cd 100644
--- a/sys/i386/linux/linux_proto.h
+++ b/sys/i386/linux/linux_proto.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #ifndef _LINUX_SYSPROTO_H_
@@ -435,7 +435,7 @@ struct linux_sysfs_args {
 	char arg2_l_[PADL_(l_ulong)]; l_ulong arg2; char arg2_r_[PADR_(l_ulong)];
 };
 struct linux_personality_args {
-	char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+	char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
 };
 struct linux_setfsuid16_args {
 	char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
diff --git a/sys/i386/linux/linux_syscall.h b/sys/i386/linux/linux_syscall.h
index e9d8ef7..aa9e4a0 100644
--- a/sys/i386/linux/linux_syscall.h
+++ b/sys/i386/linux/linux_syscall.h
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #define	LINUX_SYS_linux_exit	1
diff --git a/sys/i386/linux/linux_syscalls.c b/sys/i386/linux/linux_syscalls.c
index 54d8423..8b43796 100644
--- a/sys/i386/linux/linux_syscalls.c
+++ b/sys/i386/linux/linux_syscalls.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 const char *linux_syscallnames[] = {
diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c
index ce765b5..7218375 100644
--- a/sys/i386/linux/linux_sysent.c
+++ b/sys/i386/linux/linux_sysent.c
@@ -3,7 +3,7 @@
  *
  * DO NOT EDIT-- this file is automatically generated.
  * $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin 
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin 
  */
 
 #include <sys/param.h>
diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c
index f02f34f..0a0bf42 100644
--- a/sys/i386/linux/linux_systrace_args.c
+++ b/sys/i386/linux/linux_systrace_args.c
@@ -948,7 +948,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
 	/* linux_personality */
 	case 136: {
 		struct linux_personality_args *p = params;
-		iarg[0] = p->per; /* l_ulong */
+		iarg[0] = p->per; /* l_uint */
 		*n_args = 1;
 		break;
 	}
@@ -3849,7 +3849,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
 	case 136:
 		switch(ndx) {
 		case 0:
-			p = "l_ulong";
+			p = "l_uint";
 			break;
 		default:
 			break;
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index 7ec3154..5899adb 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -240,7 +240,7 @@
 134	AUE_BDFLUSH	STD	{ int linux_bdflush(void); }
 135	AUE_NULL	STD	{ int linux_sysfs(l_int option, \
 				    l_ulong arg1, l_ulong arg2); }
-136	AUE_PERSONALITY	STD	{ int linux_personality(l_ulong per); }
+136	AUE_PERSONALITY	STD	{ int linux_personality(l_uint per); }
 137	AUE_NULL	UNIMPL	afs_syscall
 138	AUE_SETFSUID	STD	{ int linux_setfsuid16(l_uid16_t uid); }
 139	AUE_SETFSGID	STD	{ int linux_setfsgid16(l_gid16_t gid); }
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index fb0b313..908f99a 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -658,8 +658,15 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
 	i = 0;
 	tid = (uintptr_t)curthread;
 
-	if (SCHEDULER_STOPPED())
+	if (SCHEDULER_STOPPED()) {
+		/*
+		 * Ensure that spinlock sections are balanced even when the
+		 * scheduler is stopped, since we may otherwise inadvertently
+		 * re-enable interrupts while dumping core.
+		 */
+		spinlock_enter();
 		return;
+	}
 
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index f5f522e..496b852 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -1315,7 +1315,8 @@ dounmount(struct mount *mp, int flags, struct thread *td)
 	 */
 	if ((flags & MNT_FORCE) &&
 	    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
-		if (mp->mnt_vnodecovered != NULL)
+		if (mp->mnt_vnodecovered != NULL &&
+		    (mp->mnt_flag & MNT_IGNORE) == 0)
 			mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
 		if (fsrootvp == rootvnode) {
 			vrele(rootvnode);
@@ -1336,7 +1337,8 @@ dounmount(struct mount *mp, int flags, struct thread *td)
 	if (error && error != ENXIO) {
 		if ((flags & MNT_FORCE) &&
 		    VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
-			if (mp->mnt_vnodecovered != NULL)
+			if (mp->mnt_vnodecovered != NULL &&
+			    (mp->mnt_flag & MNT_IGNORE) == 0)
 				mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
 			if (rootvnode == NULL) {
 				rootvnode = fsrootvp;
diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile
index 0e70ce6..124227e 100644
--- a/sys/modules/linux/Makefile
+++ b/sys/modules/linux/Makefile
@@ -30,7 +30,7 @@ SRCS+=	opt_apic.h
 OBJS=	${VDSO}.so
 
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+=	linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c \
+SRCS+=	linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c linux_mmap.c \
 	linux_emul.c opt_cpu.h linux.c
 .endif
 
diff --git a/sys/modules/linux_common/Makefile b/sys/modules/linux_common/Makefile
index 91449f7..2301796 100644
--- a/sys/modules/linux_common/Makefile
+++ b/sys/modules/linux_common/Makefile
@@ -3,7 +3,7 @@
 .PATH: ${.CURDIR}/../../compat/linux
 
 KMOD=	linux_common
-SRCS=	linux_common.c linux_mib.c linux_util.c linux_emul.c \
+SRCS=	linux_common.c linux_mib.c linux_mmap.c linux_util.c linux_emul.c \
 	linux.c opt_compat.h device_if.h vnode_if.h bus_if.h
 
 EXPORT_SYMS=
diff --git a/sys/net/mppcc.c b/sys/net/mppcc.c
index 01ce3ff..293ce46 100644
--- a/sys/net/mppcc.c
+++ b/sys/net/mppcc.c
@@ -233,7 +233,7 @@ int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, ch
 	    putbits16(*dst, 0xc000|(off-320), 16, &olen, &l);
 	} else {		/* NOTREACHED */
 	    rtn &= ~MPPC_OK;
-	    return rtn;
+	    return (rtn);
 	}
 
 	/* Encode length of match. */
diff --git a/sys/net/mppcd.c b/sys/net/mppcd.c
index c1730e5..d8e663c 100644
--- a/sys/net/mppcd.c
+++ b/sys/net/mppcd.c
@@ -170,7 +170,7 @@ int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt,
 			rtn &= ~MPPC_OK;
 			return (rtn);
 		    }
-		} else {		/* NOTREACHED */
+		} else {		/* This shouldn't happen. */
 		    rtn &= ~MPPC_OK;
 		    return (rtn);
 		}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 0398b03..03e8c72 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -683,7 +683,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 	 * connection when the SYN arrived.  If we can't create
 	 * the connection, abort it.
 	 */
-	so = sonewconn(lso, SS_ISCONNECTED);
+	so = sonewconn(lso, 0);
 	if (so == NULL) {
 		/*
 		 * Drop the connection; we will either send a RST or
@@ -922,6 +922,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 
 	INP_WUNLOCK(inp);
 
+	soisconnected(so);
+
 	TCPSTAT_INC(tcps_accepts);
 	return (so);
 
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 1ccbf9a..8d7e9a0 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -643,13 +643,6 @@ out:
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
- *
- * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
- * and is described in detail in the commit log entry for r175612.  Acquiring
- * it delays an accept(2) racing with sonewconn(), which inserts the socket
- * before the inpcb address/port fields are initialized.  A better fix would
- * prevent the socket from being placed in the listen queue until all fields
- * are fully initialized.
  */
 static int
 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
@@ -666,7 +659,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 
 	inp = sotoinpcb(so);
 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
-	INP_INFO_RLOCK(&V_tcbinfo);
 	INP_WLOCK(inp);
 	if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
 		error = ECONNABORTED;
@@ -686,7 +678,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
 out:
 	TCPDEBUG2(PRU_ACCEPT);
 	INP_WUNLOCK(inp);
-	INP_INFO_RUNLOCK(&V_tcbinfo);
 	if (error == 0)
 		*nam = in_sockaddr(port, &addr);
 	return error;
diff --git a/sys/netpfil/ipfw/dn_sched_fq_pie.c b/sys/netpfil/ipfw/dn_sched_fq_pie.c
index 2883cf8..bfcd6c5 100644
--- a/sys/netpfil/ipfw/dn_sched_fq_pie.c
+++ b/sys/netpfil/ipfw/dn_sched_fq_pie.c
@@ -111,7 +111,7 @@ struct fq_pie_flow {
 	int deficit;
 	int active;		/* 1: flow is active (in a list) */
 	struct pie_status pst;	/* pie status variables */
-	struct fq_pie_si *psi;	/* parent scheduler instance */
+	struct fq_pie_si_extra *psi_extra;
 	STAILQ_ENTRY(fq_pie_flow) flowchain;
 };
 
@@ -120,23 +120,30 @@ struct fq_pie_schk {
 	struct dn_sch_fq_pie_parms cfg;
 };
 
+
+/* fq_pie scheduler instance extra state vars.
+ * The purpose of separation this structure is to preserve number of active
+ * sub-queues and the flows array pointer even after the scheduler instance
+ * is destroyed.
+ * Preserving these varaiables allows freeing the allocated memory by
+ * fqpie_callout_cleanup() independently from fq_pie_free_sched().
+ */
+struct fq_pie_si_extra {
+	uint32_t nr_active_q;	/* number of active queues */
+	struct fq_pie_flow *flows;	/* array of flows (queues) */
+	};
+
 /* fq_pie scheduler instance */
 struct fq_pie_si {
-	struct dn_sch_inst _si;	/* standard scheduler instance */
+	struct dn_sch_inst _si;	/* standard scheduler instance. SHOULD BE FIRST */ 
 	struct dn_queue main_q; /* main queue is after si directly */
-	uint32_t nr_active_q;
-	struct fq_pie_flow *flows;	/* array of flows (queues) */
 	uint32_t perturbation; 	/* random value */
 	struct fq_pie_list newflows;	/* list of new queues */
 	struct fq_pie_list oldflows;	/* list of old queues */
+	struct fq_pie_si_extra *si_extra; /* extra state vars*/
 };
 
 
-struct mem_to_free {
-	void *mem_flows;
-	void *mem_callout;
-};
-static struct mtx freemem_mtx;
 static struct dn_alg fq_pie_desc;
 
 /*  Default FQ-PIE parameters including PIE */
@@ -371,22 +378,6 @@ fq_calculate_drop_prob(void *x)
 	int64_t p, prob, oldprob;
 	aqm_time_t now;
 
-	/* dealing with race condition */
-	if (callout_pending(&pst->aqm_pie_callout)) {
-		/* callout was reset */
-		mtx_unlock(&pst->lock_mtx);
-		return;
-	}
-
-	if (!callout_active(&pst->aqm_pie_callout)) {
-		/* callout was stopped */
-		mtx_unlock(&pst->lock_mtx);
-		mtx_destroy(&pst->lock_mtx);
-		q->psi->nr_active_q--;
-		return;
-	}
-	callout_deactivate(&pst->aqm_pie_callout);
-
 	now = AQM_UNOW;
 	pprms = pst->parms;
 	prob = pst->drop_prob;
@@ -524,20 +515,17 @@ fq_deactivate_pie(struct pie_status *pst)
   * Initialize PIE for sub-queue 'q'
   */
 static int
-pie_init(struct fq_pie_flow *q)
+pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk)
 {
 	struct pie_status *pst=&q->pst;
 	struct dn_aqm_pie_parms *pprms = pst->parms;
-	struct fq_pie_schk *fqpie_schk;
-	
-	fqpie_schk = (struct fq_pie_schk *)(q->psi->_si.sched+1);
-	int err = 0;
 
+	int err = 0;
 	if (!pprms){
 		D("AQM_PIE is not configured");
 		err = EINVAL;
 	} else {
-		q->psi->nr_active_q++;
+		q->psi_extra->nr_active_q++;
 
 		/* For speed optimization, we caculate 1/3 queue size once here */
 		// XXX limit divided by number of queues divided by 3 ??? 
@@ -553,8 +541,36 @@ pie_init(struct fq_pie_flow *q)
 }
 
 /* 
+ * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si
+ * extra memory when number of active sub-queues reaches zero.
+ * 'x' is a fq_pie_flow to be destroyed
+ */
+static void
+fqpie_callout_cleanup(void *x)
+{
+	struct fq_pie_flow *q = x;
+	struct pie_status *pst = &q->pst;
+	struct fq_pie_si_extra *psi_extra;
+
+	mtx_unlock(&pst->lock_mtx);
+	mtx_destroy(&pst->lock_mtx);
+	psi_extra = q->psi_extra;
+	
+	DN_BH_WLOCK();
+	psi_extra->nr_active_q--;
+
+	/* when all sub-queues are destroyed, free flows fq_pie extra vars memory */
+	if (!psi_extra->nr_active_q) {
+		free(psi_extra->flows, M_DUMMYNET);
+		free(psi_extra, M_DUMMYNET);
+		fq_pie_desc.ref_count--;
+	}
+	DN_BH_WUNLOCK();
+}
+
+/* 
  * Clean up PIE status for sub-queue 'q' 
- * Stop callout timer and destroy mtx 
+ * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout.
  */
 static int
 pie_cleanup(struct fq_pie_flow *q)
@@ -562,14 +578,9 @@ pie_cleanup(struct fq_pie_flow *q)
 	struct pie_status *pst  = &q->pst;
 
 	mtx_lock(&pst->lock_mtx);
-	if (callout_stop(&pst->aqm_pie_callout) || !(pst->sflags & PIE_ACTIVE)) {
-		mtx_unlock(&pst->lock_mtx);
-		mtx_destroy(&pst->lock_mtx);
-		q->psi->nr_active_q--;
-	} else {
-		mtx_unlock(&pst->lock_mtx);
-		return EBUSY;
-	}
+	callout_reset_sbt(&pst->aqm_pie_callout,
+		SBT_1US, 0, fqpie_callout_cleanup, q, 0);
+	mtx_unlock(&pst->lock_mtx);
 	return 0;
 }
 
@@ -831,10 +842,12 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
 	struct fq_pie_schk *schk;
 	struct dn_sch_fq_pie_parms *param;
 	struct dn_queue *mainq;
+	struct fq_pie_flow *flows;
 	int idx, drop, i, maxidx;
 
 	mainq = (struct dn_queue *)(_si + 1);
 	si = (struct fq_pie_si *)_si;
+	flows = si->si_extra->flows;
 	schk = (struct fq_pie_schk *)(si->_si.sched+1);
 	param = &schk->cfg;
 
@@ -844,7 +857,7 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
 	/* enqueue packet into appropriate queue using PIE AQM.
 	 * Note: 'pie_enqueue' function returns 1 only when it unable to 
 	 * add timestamp to packet (no limit check)*/
-	drop = pie_enqueue(&si->flows[idx], m, si);
+	drop = pie_enqueue(&flows[idx], m, si);
 	
 	/* pie unable to timestamp a packet */ 
 	if (drop)
@@ -853,11 +866,11 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
 	/* If the flow (sub-queue) is not active ,then add it to tail of
 	 * new flows list, initialize and activate it.
 	 */
-	if (!si->flows[idx].active) {
-		STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
-		si->flows[idx].deficit = param->quantum;
-		fq_activate_pie(&si->flows[idx]);
-		si->flows[idx].active = 1;
+	if (!flows[idx].active) {
+		STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain);
+		flows[idx].deficit = param->quantum;
+		fq_activate_pie(&flows[idx]);
+		flows[idx].active = 1;
 	}
 
 	/* check the limit for all queues and remove a packet from the
@@ -866,15 +879,15 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
 	if (mainq->ni.length > schk->cfg.limit) {
 		/* find first active flow */
 		for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
-			if (si->flows[maxidx].active)
+			if (flows[maxidx].active)
 				break;
 		if (maxidx < schk->cfg.flows_cnt) {
 			/* find the largest sub- queue */
 			for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++) 
-				if (si->flows[i].active && si->flows[i].stats.length >
-					si->flows[maxidx].stats.length)
+				if (flows[i].active && flows[i].stats.length >
+					flows[maxidx].stats.length)
 					maxidx = i;
-			pie_drop_head(&si->flows[maxidx], si);
+			pie_drop_head(&flows[maxidx], si);
 			drop = 1;
 		}
 	}
@@ -974,12 +987,13 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
 	struct fq_pie_si *si;
 	struct dn_queue *q;
 	struct fq_pie_schk *schk;
+	struct fq_pie_flow *flows;
 	int i;
 
 	si = (struct fq_pie_si *)_si;
 	schk = (struct fq_pie_schk *)(_si->sched+1);
 
-	if(si->flows) {
+	if(si->si_extra) {
 		D("si already configured!");
 		return 0;
 	}
@@ -990,17 +1004,27 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
 	q->_si = _si;
 	q->fs = _si->sched->fs;
 
+	/* allocate memory for scheduler instance extra vars */
+	si->si_extra = malloc(sizeof(struct fq_pie_si_extra),
+		 M_DUMMYNET, M_NOWAIT | M_ZERO);
+	if (si->si_extra == NULL) {
+		D("cannot allocate memory for fq_pie si extra vars");
+		return ENOMEM ; 
+	}
 	/* allocate memory for flows array */
-	si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
+	si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
 		 M_DUMMYNET, M_NOWAIT | M_ZERO);
-	if (si->flows == NULL) {
-		D("cannot allocate memory for fq_pie configuration parameters");
+	flows = si->si_extra->flows;
+	if (flows == NULL) {
+		free(si->si_extra, M_DUMMYNET);
+		si->si_extra = NULL;
+		D("cannot allocate memory for fq_pie flows");
 		return ENOMEM ; 
 	}
 
 	/* init perturbation for this si */
 	si->perturbation = random();
-	si->nr_active_q = 0;
+	si->si_extra->nr_active_q = 0;
 
 	/* init the old and new flows lists */
 	STAILQ_INIT(&si->newflows);
@@ -1008,45 +1032,16 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
 
 	/* init the flows (sub-queues) */
 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
-		si->flows[i].pst.parms = &schk->cfg.pcfg;
-		si->flows[i].psi = si;
-		pie_init(&si->flows[i]);
-	}
-
-	/* init mtx lock and callout function for free memory  */
-	if (!fq_pie_desc.ref_count) {
-		mtx_init(&freemem_mtx, "mtx_pie", NULL, MTX_DEF);
+		flows[i].pst.parms = &schk->cfg.pcfg;
+		flows[i].psi_extra = si->si_extra;
+		pie_init(&flows[i], schk);
 	}
 
-	mtx_lock(&freemem_mtx);
 	fq_pie_desc.ref_count++;
-	mtx_unlock(&freemem_mtx);
 
 	return 0;
 }
 
-/* 
- * Free FQ-PIE flows memory callout function.
- * This function is scheduled when a flow or more still active and
- *  the scheduer is about to be destroyed, to prevent memory leak.
- */
-static void 
-free_flows(void *_mem) 
-{
-	struct mem_to_free *mem = _mem;
-
-	free(mem->mem_flows, M_DUMMYNET);
-	free(mem->mem_callout, M_DUMMYNET);
-	free(_mem, M_DUMMYNET);
-
-	fq_pie_desc.ref_count--;
-	if (!fq_pie_desc.ref_count) {
-		mtx_unlock(&freemem_mtx);
-		mtx_destroy(&freemem_mtx);
-	} else
-		mtx_unlock(&freemem_mtx);
-	//D("mem freed ok!");
-}
 
 /*
  * Free fq_pie scheduler instance.
@@ -1056,61 +1051,17 @@ fq_pie_free_sched(struct dn_sch_inst *_si)
 {
 	struct fq_pie_si *si;
 	struct fq_pie_schk *schk;
+	struct fq_pie_flow *flows;
 	int i;
 
 	si = (struct fq_pie_si *)_si;
 	schk = (struct fq_pie_schk *)(_si->sched+1);
-
+	flows = si->si_extra->flows;
 	for (i = 0; i < schk->cfg.flows_cnt; i++) {
-		pie_cleanup(&si->flows[i]);
-	}
-
-	/* if there are still some queues have a callout going to start,
-	 * we cannot free flows memory. If we do so, a panic can happen
-	 *  as prob calculate callout function uses flows memory.
-	 */
-	if (!si->nr_active_q) {
-		/* free the flows array */
-		free(si->flows , M_DUMMYNET);
-		si->flows = NULL;
-		mtx_lock(&freemem_mtx);
-		fq_pie_desc.ref_count--;
-		if (!fq_pie_desc.ref_count) {
-			mtx_unlock(&freemem_mtx);
-			mtx_destroy(&freemem_mtx);
-		} else
-			mtx_unlock(&freemem_mtx);
-		//D("ok!");
-		return 0;
-	} else {
-		/* memory leak happens here. So, we register a callout function to free
-		 *  flows memory later.
-		 */
-		D("unable to stop all fq_pie sub-queues!");
-		mtx_lock(&freemem_mtx);
-
-		struct callout *mem_callout;
-		struct mem_to_free *mem;
-
-		mem = malloc(sizeof(*mem), M_DUMMYNET,
-			M_NOWAIT | M_ZERO);
-		mem_callout = malloc(sizeof(*mem_callout), M_DUMMYNET,
-			M_NOWAIT | M_ZERO);
-
-		callout_init_mtx(mem_callout, &freemem_mtx,
-			CALLOUT_RETURNUNLOCKED);
-
-		mem->mem_flows = si->flows;
-		mem->mem_callout = mem_callout;
-		callout_reset_sbt(mem_callout, 
-			(uint64_t)(si->flows[0].pst.parms->tupdate + 1000) * SBT_1US,
-			0, free_flows, mem, 0);
-
-		si->flows = NULL;
-		mtx_unlock(&freemem_mtx);
-
-		return EBUSY;
+		pie_cleanup(&flows[i]);
 	}
+	si->si_extra = NULL;
+	return 0;
 }
 
 /*
diff --git a/sys/ofed/include/linux/linux_idr.c b/sys/ofed/include/linux/linux_idr.c
index 8eb7949..c47903e 100644
--- a/sys/ofed/include/linux/linux_idr.c
+++ b/sys/ofed/include/linux/linux_idr.c
@@ -267,7 +267,8 @@ idr_get(struct idr *idr)
 		return (il);
 	}
 	il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT);
-	bitmap_fill(&il->bitmap, IDR_SIZE);
+	if (il != NULL)
+		bitmap_fill(&il->bitmap, IDR_SIZE);
 	return (il);
 }
 
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index c250c5d..7010580 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -981,8 +981,6 @@ static int
 vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
     vm_page_t mpred)
 {
-	vm_pindex_t sidx;
-	vm_object_t sobj;
 	vm_page_t msucc;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
@@ -1003,8 +1001,6 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
 	/*
 	 * Record the object/offset pair in this page
 	 */
-	sobj = m->object;
-	sidx = m->pindex;
 	m->object = object;
 	m->pindex = pindex;
 
@@ -1012,8 +1008,8 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
 	 * Now link into the object's ordered list of backed pages.
 	 */
 	if (vm_radix_insert(&object->rtree, m)) {
-		m->object = sobj;
-		m->pindex = sidx;
+		m->object = NULL;
+		m->pindex = 0;
 		return (1);
 	}
 	vm_page_insert_radixdone(m, object, mpred);
@@ -1640,6 +1636,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 			}
 			m->object = NULL;
 			m->oflags = VPO_UNMANAGED;
+			m->busy_lock = VPB_UNBUSIED;
 			vm_page_free(m);
 			return (NULL);
 		}
@@ -1842,6 +1839,7 @@ retry:
 						m->object = NULL;
 						m->oflags |= VPO_UNMANAGED;
 					}
+					m->busy_lock = VPB_UNBUSIED;
 					vm_page_free(m);
 				}
 				return (NULL);
author	Renato Botelho <renato@netgate.com>	2016-07-19 11:28:18 -0300
committer	Renato Botelho <renato@netgate.com>	2016-07-19 11:28:18 -0300
commit	a3c20a378f7a9fa76e9301a43ac64ed07057d01e (patch)
tree	d14045c61b471f327f487431e05b0c4fe0cd8d76 /sys
parent	3a4027cfafa37c1a0c0b05987c0edb1452c7bd2b (diff)
parent	3f1f4f0e73b6d12c01e8cad4791d23e8e56127db (diff)
download	FreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.zip FreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.tar.gz