summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-07-19 11:28:18 -0300
committerRenato Botelho <renato@netgate.com>2016-07-19 11:28:18 -0300
commita3c20a378f7a9fa76e9301a43ac64ed07057d01e (patch)
treed14045c61b471f327f487431e05b0c4fe0cd8d76 /sys
parent3a4027cfafa37c1a0c0b05987c0edb1452c7bd2b (diff)
parent3f1f4f0e73b6d12c01e8cad4791d23e8e56127db (diff)
downloadFreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.zip
FreeBSD-src-a3c20a378f7a9fa76e9301a43ac64ed07057d01e.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/linux/linux.h7
-rw-r--r--sys/amd64/linux/linux_machdep.c169
-rw-r--r--sys/amd64/linux/linux_proto.h4
-rw-r--r--sys/amd64/linux/linux_syscall.h2
-rw-r--r--sys/amd64/linux/linux_syscalls.c2
-rw-r--r--sys/amd64/linux/linux_sysent.c2
-rw-r--r--sys/amd64/linux/linux_systrace_args.c4
-rw-r--r--sys/amd64/linux/syscalls.master2
-rw-r--r--sys/amd64/linux32/linux.h7
-rw-r--r--sys/amd64/linux32/linux32_machdep.c184
-rw-r--r--sys/amd64/linux32/linux32_proto.h4
-rw-r--r--sys/amd64/linux32/linux32_syscall.h2
-rw-r--r--sys/amd64/linux32/linux32_syscalls.c2
-rw-r--r--sys/amd64/linux32/linux32_sysent.c2
-rw-r--r--sys/amd64/linux32/linux32_systrace_args.c4
-rw-r--r--sys/amd64/linux32/syscalls.master2
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c5
-rw-r--r--sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c40
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c104
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c10
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c2
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c377
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c19
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c16
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c87
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c5
-rw-r--r--sys/compat/linux/linux_emul.c17
-rw-r--r--sys/compat/linux/linux_emul.h1
-rw-r--r--sys/compat/linux/linux_misc.c18
-rw-r--r--sys/compat/linux/linux_mmap.c257
-rw-r--r--sys/compat/linux/linux_mmap.h49
-rw-r--r--sys/compat/linux/linux_persona.h56
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/dev/ahci/ahci.c4
-rw-r--r--sys/dev/ahci/ahci.h4
-rw-r--r--sys/dev/e1000/if_igb.c54
-rw-r--r--sys/dev/e1000/if_igb.h6
-rw-r--r--sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c118
-rw-r--r--sys/i386/i386/exception.s4
-rw-r--r--sys/i386/linux/linux.h7
-rw-r--r--sys/i386/linux/linux_machdep.c188
-rw-r--r--sys/i386/linux/linux_proto.h4
-rw-r--r--sys/i386/linux/linux_syscall.h2
-rw-r--r--sys/i386/linux/linux_syscalls.c2
-rw-r--r--sys/i386/linux/linux_sysent.c2
-rw-r--r--sys/i386/linux/linux_systrace_args.c4
-rw-r--r--sys/i386/linux/syscalls.master2
-rw-r--r--sys/kern/kern_mutex.c9
-rw-r--r--sys/kern/vfs_mount.c6
-rw-r--r--sys/modules/linux/Makefile2
-rw-r--r--sys/modules/linux_common/Makefile2
-rw-r--r--sys/net/mppcc.c2
-rw-r--r--sys/net/mppcd.c2
-rw-r--r--sys/netinet/tcp_syncache.c4
-rw-r--r--sys/netinet/tcp_usrreq.c9
-rw-r--r--sys/netpfil/ipfw/dn_sched_fq_pie.c223
-rw-r--r--sys/ofed/include/linux/linux_idr.c3
-rw-r--r--sys/vm/vm_page.c10
63 files changed, 1075 insertions, 1082 deletions
diff --git a/sys/amd64/linux/linux.h b/sys/amd64/linux/linux.h
index 639499a..b30568f 100644
--- a/sys/amd64/linux/linux.h
+++ b/sys/amd64/linux/linux.h
@@ -139,13 +139,6 @@ struct l_rlimit {
l_ulong rlim_max;
};
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
/*
* stat family of syscalls
*/
diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c
index 9664dac..80c48aa 100644
--- a/sys/amd64/linux/linux_machdep.c
+++ b/sys/amd64/linux/linux_machdep.c
@@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_file.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -122,181 +123,19 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
- args->addr, args->len, args->prot,
- args->flags, args->fd, args->pgoff);
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (! ((args->flags & LINUX_MAP_SHARED) ^
- (args->flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (args->flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (args->flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (args->flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (args->flags & LINUX_MAP_ANON)
- bsd_args.flags |= MAP_ANON;
- else
- bsd_args.flags |= MAP_NOSYNC;
- if (args->flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = args->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : args->fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0 )
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (args->flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the inital BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, Linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in Linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It then maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(args->addr) + args->len >
- p->p_vmspace->vm_maxsaddr) {
- /*
- * Some Linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
- lim_cur(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (args->len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(args->addr);
- bsd_args.len = args->len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(args->addr) -
- (STACK_SIZE - GUARD_SIZE - args->len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(args->addr);
- bsd_args.len = args->len;
- }
- bsd_args.pos = (off_t)args->pgoff;
-
- error = sys_mmap(td, &bsd_args);
-
- LINUX_CTR2(mmap2, "return: %d (%p)",
- error, td->td_retval[0]);
- return (error);
+ return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot,
+ args->flags, args->fd, args->pgoff));
}
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- LINUX_CTR(mprotect);
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/amd64/linux/linux_proto.h b/sys/amd64/linux/linux_proto.h
index 4946805..4651788 100644
--- a/sys/amd64/linux/linux_proto.h
+++ b/sys/amd64/linux/linux_proto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#ifndef _LINUX_SYSPROTO_H_
@@ -499,7 +499,7 @@ struct linux_mknod_args {
char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
};
struct linux_personality_args {
- char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+ char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
};
struct linux_ustat_args {
char dev_l_[PADL_(l_dev_t)]; l_dev_t dev; char dev_r_[PADR_(l_dev_t)];
diff --git a/sys/amd64/linux/linux_syscall.h b/sys/amd64/linux/linux_syscall.h
index 874651c..88fa022 100644
--- a/sys/amd64/linux/linux_syscall.h
+++ b/sys/amd64/linux/linux_syscall.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#define LINUX_SYS_read 0
diff --git a/sys/amd64/linux/linux_syscalls.c b/sys/amd64/linux/linux_syscalls.c
index fcea91e..1b71749 100644
--- a/sys/amd64/linux/linux_syscalls.c
+++ b/sys/amd64/linux/linux_syscalls.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
const char *linux_syscallnames[] = {
diff --git a/sys/amd64/linux/linux_sysent.c b/sys/amd64/linux/linux_sysent.c
index 14d5c87..50302e8 100644
--- a/sys/amd64/linux/linux_sysent.c
+++ b/sys/amd64/linux/linux_sysent.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 294368 2016-01-20 01:09:53Z jhb
+ * created from FreeBSD: stable/10/sys/amd64/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#include <sys/param.h>
diff --git a/sys/amd64/linux/linux_systrace_args.c b/sys/amd64/linux/linux_systrace_args.c
index 5dcdd55..be99518 100644
--- a/sys/amd64/linux/linux_systrace_args.c
+++ b/sys/amd64/linux/linux_systrace_args.c
@@ -1120,7 +1120,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
/* linux_personality */
case 135: {
struct linux_personality_args *p = params;
- iarg[0] = p->per; /* l_ulong */
+ iarg[0] = p->per; /* l_uint */
*n_args = 1;
break;
}
@@ -4112,7 +4112,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 135:
switch(ndx) {
case 0:
- p = "l_ulong";
+ p = "l_uint";
break;
default:
break;
diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master
index 50ddd92..a2c4a0b 100644
--- a/sys/amd64/linux/syscalls.master
+++ b/sys/amd64/linux/syscalls.master
@@ -270,7 +270,7 @@
133 AUE_MKNOD STD { int linux_mknod(char *path, l_int mode, \
l_dev_t dev); }
134 AUE_USELIB UNIMPL uselib
-135 AUE_PERSONALITY STD { int linux_personality(l_ulong per); }
+135 AUE_PERSONALITY STD { int linux_personality(l_uint per); }
136 AUE_NULL STD { int linux_ustat(l_dev_t dev, \
struct l_ustat *ubuf); }
137 AUE_STATFS STD { int linux_statfs(char *path, \
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h
index 02d12f5..97da878 100644
--- a/sys/amd64/linux32/linux.h
+++ b/sys/amd64/linux32/linux.h
@@ -165,13 +165,6 @@ struct l_rusage {
l_long ru_nivcsw;
} __packed;
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
struct l_mmap_argv {
l_uintptr_t addr;
l_size_t len;
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index b38afb3..187ec15 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
#include <amd64/linux32/linux32_proto.h>
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -84,9 +85,6 @@ struct l_old_select_argv {
l_uintptr_t timeout;
} __packed;
-static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
- l_size_t len, l_int prot, l_int flags, l_int fd,
- l_loff_t pos);
static void
bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru)
@@ -448,9 +446,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
@@ -489,184 +484,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
(uint32_t)linux_args.pgoff));
}
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
- l_int flags, l_int fd, l_loff_t pos)
-{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (flags & LINUX_MAP_ANON) {
- /* Enforce pos to be on page boundary, then ignore. */
- if ((pos & PAGE_MASK) != 0)
- return (EINVAL);
- pos = 0;
- bsd_args.flags |= MAP_ANON;
- } else
- bsd_args.flags |= MAP_NOSYNC;
- if (flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
-
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0)
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the inital BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, Linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in Linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It then maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
- /*
- * Some Linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK -
- lim_cur(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr) -
- (STACK_SIZE - GUARD_SIZE - len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- }
- bsd_args.pos = pos;
-
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
- __func__,
- (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot,
- bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
- error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s() return: 0x%x (0x%08x)\n",
- __func__, error, (u_int)td->td_retval[0]);
-#endif
- return (error);
-}
-
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/amd64/linux32/linux32_proto.h b/sys/amd64/linux32/linux32_proto.h
index a960c09..b2d1a7e 100644
--- a/sys/amd64/linux32/linux32_proto.h
+++ b/sys/amd64/linux32/linux32_proto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#ifndef _LINUX32_SYSPROTO_H_
@@ -427,7 +427,7 @@ struct linux_sysfs_args {
char arg2_l_[PADL_(l_ulong)]; l_ulong arg2; char arg2_r_[PADR_(l_ulong)];
};
struct linux_personality_args {
- char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+ char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
};
struct linux_setfsuid16_args {
char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
diff --git a/sys/amd64/linux32/linux32_syscall.h b/sys/amd64/linux32/linux32_syscall.h
index 25747e4..82764d0 100644
--- a/sys/amd64/linux32/linux32_syscall.h
+++ b/sys/amd64/linux32/linux32_syscall.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#define LINUX32_SYS_linux_exit 1
diff --git a/sys/amd64/linux32/linux32_syscalls.c b/sys/amd64/linux32/linux32_syscalls.c
index ef965d7..0d995c3 100644
--- a/sys/amd64/linux32/linux32_syscalls.c
+++ b/sys/amd64/linux32/linux32_syscalls.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
const char *linux32_syscallnames[] = {
diff --git a/sys/amd64/linux32/linux32_sysent.c b/sys/amd64/linux32/linux32_sysent.c
index 6eb215e..ee766d7 100644
--- a/sys/amd64/linux32/linux32_sysent.c
+++ b/sys/amd64/linux32/linux32_sysent.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/amd64/linux32/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#include "opt_compat.h"
diff --git a/sys/amd64/linux32/linux32_systrace_args.c b/sys/amd64/linux32/linux32_systrace_args.c
index cabfab7..f3bde85 100644
--- a/sys/amd64/linux32/linux32_systrace_args.c
+++ b/sys/amd64/linux32/linux32_systrace_args.c
@@ -910,7 +910,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
/* linux_personality */
case 136: {
struct linux_personality_args *p = params;
- iarg[0] = p->per; /* l_ulong */
+ iarg[0] = p->per; /* l_uint */
*n_args = 1;
break;
}
@@ -3715,7 +3715,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 136:
switch(ndx) {
case 0:
- p = "l_ulong";
+ p = "l_uint";
break;
default:
break;
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 79cd2c8..20aa3c4 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -238,7 +238,7 @@
134 AUE_BDFLUSH STD { int linux_bdflush(void); }
135 AUE_NULL STD { int linux_sysfs(l_int option, \
l_ulong arg1, l_ulong arg2); }
-136 AUE_PERSONALITY STD { int linux_personality(l_ulong per); }
+136 AUE_PERSONALITY STD { int linux_personality(l_uint per); }
137 AUE_NULL UNIMPL afs_syscall
138 AUE_SETFSUID STD { int linux_setfsuid16(l_uid16_t uid); }
139 AUE_SETFSGID STD { int linux_setfsgid16(l_gid16_t gid); }
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
index 848007e..4214ca8 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
@@ -89,13 +89,14 @@ traverse(vnode_t **cvpp, int lktype)
if (vfsp == NULL)
break;
error = vfs_busy(vfsp, 0);
+
/*
* tvp is NULL for *cvpp vnode, which we can't unlock.
- * At least some callers expect the reference to be
- * maintained to the original *cvpp
*/
if (tvp != NULL)
vput(cvp);
+ else
+ vrele(cvp);
if (error)
return (error);
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
index 49becbc..6af1e8b 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -121,34 +121,39 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
struct ucred *cr;
int error;
+ ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
+
+ vp = *vpp;
+ *vpp = NULL;
+ error = 0;
+
/*
* Be ultra-paranoid about making sure the type and fspath
* variables will fit in our mp buffers, including the
* terminating NUL.
*/
if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
- return (ENAMETOOLONG);
-
- vfsp = vfs_byname_kld(fstype, td, &error);
- if (vfsp == NULL)
- return (ENODEV);
-
- vp = *vpp;
- if (vp->v_type != VDIR)
- return (ENOTDIR);
+ error = ENAMETOOLONG;
+ if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
+ error = ENODEV;
+ if (error == 0 && vp->v_type != VDIR)
+ error = ENOTDIR;
/*
* We need vnode lock to protect v_mountedhere and vnode interlock
* to protect v_iflag.
*/
- vn_lock(vp, LK_SHARED | LK_RETRY);
- VI_LOCK(vp);
- if ((vp->v_iflag & VI_MOUNT) != 0 || vp->v_mountedhere != NULL) {
+ if (error == 0) {
+ VI_LOCK(vp);
+ if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
+ vp->v_iflag |= VI_MOUNT;
+ else
+ error = EBUSY;
VI_UNLOCK(vp);
- VOP_UNLOCK(vp, 0);
- return (EBUSY);
}
- vp->v_iflag |= VI_MOUNT;
- VI_UNLOCK(vp);
+ if (error != 0) {
+ vput(vp);
+ return (error);
+ }
VOP_UNLOCK(vp, 0);
/*
@@ -198,7 +203,6 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
vfs_unbusy(mp);
vfs_freeopts(mp->mnt_optnew);
vfs_mount_destroy(mp);
- *vpp = NULL;
return (error);
}
@@ -229,7 +233,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
vfs_event_signal(NULL, VQ_MOUNT, 0);
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
panic("mount: lost mount");
- vput(vp);
+ VOP_UNLOCK(vp, 0);
vfs_unbusy(mp);
*vpp = mvp;
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index ed99c4b..a087e6e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -7288,7 +7288,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (pred != NULL) {
dtrace_difo_t *dp = pred->dtp_difo;
- int rval;
+ uint64_t rval;
rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
index c5c5c00..27d259d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
@@ -442,19 +442,9 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm)
*vpp = dvp;
return (0);
} else if (strcmp(nm, "..") == 0) {
- if (pvp == NULL) {
- ASSERT(dvp->v_flag & VROOT);
- VN_HOLD(dvp);
- *vpp = dvp;
- ASSERT_VOP_ELOCKED(dvp, "gfs_lookup_dot: non-locked dvp");
- } else {
- ltype = VOP_ISLOCKED(dvp);
- VOP_UNLOCK(dvp, 0);
- VN_HOLD(pvp);
- *vpp = pvp;
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- vn_lock(dvp, ltype | LK_RETRY);
- }
+ ASSERT(pvp != NULL);
+ VN_HOLD(pvp);
+ *vpp = pvp;
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
index f6d19fe..9430037 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
@@ -233,10 +233,15 @@ int zfs_disable_dup_eviction = 0;
uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
u_int zfs_arc_free_target = 0;
+/* Absolute min for arc min / max is 16MB. */
+static uint64_t arc_abs_min = 16 << 20;
+
static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS);
static int sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS);
+static int sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS);
+static int sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS);
-#ifdef _KERNEL
+#if defined(__FreeBSD__) && defined(_KERNEL)
static void
arc_free_target_init(void *unused __unused)
{
@@ -253,10 +258,10 @@ TUNABLE_QUAD("vfs.zfs.arc_meta_min", &zfs_arc_meta_min);
TUNABLE_QUAD("vfs.zfs.arc_average_blocksize", &zfs_arc_average_blocksize);
TUNABLE_INT("vfs.zfs.arc_shrink_shift", &zfs_arc_shrink_shift);
SYSCTL_DECL(_vfs_zfs);
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0,
- "Maximum ARC size");
-SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0,
- "Minimum ARC size");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_max, CTLTYPE_U64 | CTLFLAG_RWTUN,
+ 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_max, "QU", "Maximum ARC size");
+SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_min, CTLTYPE_U64 | CTLFLAG_RWTUN,
+ 0, sizeof(uint64_t), sysctl_vfs_zfs_arc_min, "QU", "Minimum ARC size");
SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN,
&zfs_arc_average_blocksize, 0,
"ARC average blocksize");
@@ -882,7 +887,7 @@ struct arc_buf_hdr {
l1arc_buf_hdr_t b_l1hdr;
};
-#ifdef _KERNEL
+#if defined(__FreeBSD__) && defined(_KERNEL)
static int
sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS)
{
@@ -900,6 +905,82 @@ sysctl_vfs_zfs_arc_meta_limit(SYSCTL_HANDLER_ARGS)
arc_meta_limit = val;
return (0);
}
+
+static int
+sysctl_vfs_zfs_arc_max(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t val;
+ int err;
+
+ val = zfs_arc_max;
+ err = sysctl_handle_64(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == NULL)
+ return (err);
+
+ if (zfs_arc_max == 0) {
+ /* Loader tunable so blindly set */
+ zfs_arc_max = val;
+ return (0);
+ }
+
+ if (val < arc_abs_min || val > kmem_size())
+ return (EINVAL);
+ if (val < arc_c_min)
+ return (EINVAL);
+ if (zfs_arc_meta_limit > 0 && val < zfs_arc_meta_limit)
+ return (EINVAL);
+
+ arc_c_max = val;
+
+ arc_c = arc_c_max;
+ arc_p = (arc_c >> 1);
+
+ if (zfs_arc_meta_limit == 0) {
+ /* limit meta-data to 1/4 of the arc capacity */
+ arc_meta_limit = arc_c_max / 4;
+ }
+
+ /* if kmem_flags are set, lets try to use less memory */
+ if (kmem_debugging())
+ arc_c = arc_c / 2;
+
+ zfs_arc_max = arc_c;
+
+ return (0);
+}
+
+static int
+sysctl_vfs_zfs_arc_min(SYSCTL_HANDLER_ARGS)
+{
+ uint64_t val;
+ int err;
+
+ val = zfs_arc_min;
+ err = sysctl_handle_64(oidp, &val, 0, req);
+ if (err != 0 || req->newptr == NULL)
+ return (err);
+
+ if (zfs_arc_min == 0) {
+ /* Loader tunable so blindly set */
+ zfs_arc_min = val;
+ return (0);
+ }
+
+ if (val < arc_abs_min || val > arc_c_max)
+ return (EINVAL);
+
+ arc_c_min = val;
+
+ if (zfs_arc_meta_min == 0)
+ arc_meta_min = arc_c_min / 2;
+
+ if (arc_c < arc_c_min)
+ arc_c = arc_c_min;
+
+ zfs_arc_min = arc_c_min;
+
+ return (0);
+}
#endif
static arc_buf_t *arc_eviction_list;
@@ -2251,6 +2332,7 @@ arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==,
hdr->b_l1hdr.b_buf->b_data);
ASSERT3U(hdr->b_l2hdr.b_compress, ==, ZIO_COMPRESS_OFF);
+ hdr->b_l1hdr.b_tmp_cdata = NULL;
return;
}
@@ -5320,8 +5402,8 @@ arc_init(void)
arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
#endif
#endif /* illumos */
- /* set min cache to 1/32 of all memory, or 16MB, whichever is more */
- arc_c_min = MAX(arc_c / 4, 16 << 20);
+ /* set min cache to 1/32 of all memory, or arc_abs_min, whichever is more */
+ arc_c_min = MAX(arc_c / 4, arc_abs_min);
/* set max to 1/2 of all memory, or all but 1GB, whichever is more */
if (arc_c * 8 >= 1 << 30)
arc_c_max = (arc_c * 8) - (1 << 30);
@@ -5342,11 +5424,11 @@ arc_init(void)
#ifdef _KERNEL
/*
* Allow the tunables to override our calculations if they are
- * reasonable (ie. over 16MB)
+ * reasonable.
*/
- if (zfs_arc_max > 16 << 20 && zfs_arc_max < kmem_size())
+ if (zfs_arc_max > arc_abs_min && zfs_arc_max < kmem_size())
arc_c_max = zfs_arc_max;
- if (zfs_arc_min > 16 << 20 && zfs_arc_min <= arc_c_max)
+ if (zfs_arc_min > arc_abs_min && zfs_arc_min <= arc_c_max)
arc_c_min = zfs_arc_min;
#endif
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
index 8e14a97..d8d186c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
@@ -1083,8 +1083,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
else
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
} else {
+#ifdef illumos
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_READ, uio);
+#else
+ err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
+ tocpy, uio);
+#endif
}
if (err)
break;
@@ -1178,6 +1183,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
else
dmu_buf_will_dirty(db, tx);
+#ifdef illumos
/*
* XXX uiomove could block forever (eg. nfs-backed
* pages). There needs to be a uiolockdown() function
@@ -1186,6 +1192,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
*/
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_WRITE, uio);
+#else
+ err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
+ uio);
+#endif
if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
index d1b8002..9c34f45 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
@@ -49,8 +49,8 @@
#include <sys/dsl_userhold.h>
#ifdef __FreeBSD__
-#include <sys/sysctl.h>
#include <sys/types.h>
+#include <sys/sysctl.h>
#endif
/*
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
index 1945686..8849003 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h
@@ -61,7 +61,6 @@ int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp);
#define ZFSCTL_INO_ROOT 0x1
#define ZFSCTL_INO_SNAPDIR 0x2
-#define ZFSCTL_INO_SHARES 0x3
#ifdef __cplusplus
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
index 693ba41..cbafa03 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
@@ -223,7 +223,7 @@ zfsctl_root_inode_cb(vnode_t *vp, int index)
{
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
- ASSERT(index <= 2);
+ ASSERT(index < 2);
if (index == 0)
return (ZFSCTL_INO_SNAPDIR);
@@ -294,6 +294,22 @@ zfsctl_root(znode_t *zp)
return (zp->z_zfsvfs->z_ctldir);
}
+static int
+zfsctl_common_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ vnode_t *vp = ap->a_vp;
+ gfs_file_t *fp = vp->v_data;
+
+ printf(" parent = %p\n", fp->gfs_parent);
+ printf(" type = %d\n", fp->gfs_type);
+ printf(" index = %d\n", fp->gfs_index);
+ printf(" ino = %ju\n", (uintmax_t)fp->gfs_ino);
+ return (0);
+}
+
/*
* Common open routine. Disallow any write access.
*/
@@ -404,8 +420,6 @@ zfsctl_common_fid(ap)
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOSPC));
}
-#else
- fidp->fid_len = SHORT_FID_LEN;
#endif
zfid = (zfid_short_t *)fidp;
@@ -454,25 +468,6 @@ zfsctl_shares_fid(ap)
return (error);
}
-static int
-zfsctl_common_reclaim(ap)
- struct vop_reclaim_args /* {
- struct vnode *a_vp;
- struct thread *a_td;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
-
- /*
- * Destroy the vm object and flush associated pages.
- */
- vnode_destroy_vobject(vp);
- VI_LOCK(vp);
- vp->v_data = NULL;
- VI_UNLOCK(vp);
- return (0);
-}
-
/*
* .zfs inode namespace
*
@@ -537,9 +532,20 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
ZFS_ENTER(zfsvfs);
if (strcmp(nm, "..") == 0) {
+#ifdef illumos
err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp);
+#else
+ /*
+ * NB: can not use VFS_ROOT here as it would acquire
+ * the vnode lock of the parent (root) vnode while
+ * holding the child's (.zfs) lock.
+ */
+ znode_t *rootzp;
+
+ err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
if (err == 0)
- VOP_UNLOCK(*vpp, 0);
+ *vpp = ZTOV(rootzp);
+#endif
} else {
err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
cr, ct, direntflags, realpnp);
@@ -550,6 +556,61 @@ zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
return (err);
}
+static int
+zfsctl_freebsd_root_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ vnode_t *dvp = ap->a_dvp;
+ vnode_t **vpp = ap->a_vpp;
+ cred_t *cr = ap->a_cnp->cn_cred;
+ int flags = ap->a_cnp->cn_flags;
+ int lkflags = ap->a_cnp->cn_lkflags;
+ int nameiop = ap->a_cnp->cn_nameiop;
+ char nm[NAME_MAX + 1];
+ int err;
+
+ if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
+ return (EOPNOTSUPP);
+
+ ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
+ strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
+relookup:
+ err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
+ if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) {
+ if (flags & ISDOTDOT) {
+ VOP_UNLOCK(dvp, 0);
+ err = vn_lock(*vpp, lkflags);
+ if (err != 0) {
+ vrele(*vpp);
+ *vpp = NULL;
+ }
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
+ } else {
+ err = vn_lock(*vpp, LK_EXCLUSIVE);
+ if (err != 0) {
+ VERIFY3S(err, ==, ENOENT);
+ goto relookup;
+ }
+ }
+ }
+ return (err);
+}
+
+static int
+zfsctl_root_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ printf(" .zfs node\n");
+ zfsctl_common_print(ap);
+ return (0);
+}
+
#ifdef illumos
static int
zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
@@ -585,49 +646,6 @@ static const fs_operation_def_t zfsctl_tops_root[] = {
};
#endif /* illumos */
-/*
- * Special case the handling of "..".
- */
-/* ARGSUSED */
-int
-zfsctl_freebsd_root_lookup(ap)
- struct vop_lookup_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- vnode_t *dvp = ap->a_dvp;
- vnode_t **vpp = ap->a_vpp;
- cred_t *cr = ap->a_cnp->cn_cred;
- int flags = ap->a_cnp->cn_flags;
- int nameiop = ap->a_cnp->cn_nameiop;
- char nm[NAME_MAX + 1];
- int err;
- int ltype;
-
- if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE))
- return (EOPNOTSUPP);
-
- ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
- strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
- err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
- if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) {
- ltype = VOP_ISLOCKED(dvp);
- if (flags & ISDOTDOT) {
- VN_HOLD(*vpp);
- VOP_UNLOCK(dvp, 0);
- }
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- if (flags & ISDOTDOT) {
- VN_RELE(*vpp);
- vn_lock(dvp, ltype| LK_RETRY);
- }
- }
-
- return (err);
-}
-
static struct vop_vector zfsctl_ops_root = {
.vop_default = &default_vnodeops,
.vop_open = zfsctl_common_open,
@@ -643,6 +661,7 @@ static struct vop_vector zfsctl_ops_root = {
.vop_pathconf = zfsctl_pathconf,
#endif
.vop_fid = zfsctl_common_fid,
+ .vop_print = zfsctl_root_print,
};
/*
@@ -987,6 +1006,11 @@ zfsctl_snapdir_lookup(ap)
ZFS_ENTER(zfsvfs);
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+ if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
+ VOP_UNLOCK(dvp, 0);
+ VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
+ VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
+ }
ZFS_EXIT(zfsvfs);
return (0);
}
@@ -1011,6 +1035,7 @@ zfsctl_snapdir_lookup(ap)
#endif
}
+relookup:
mutex_enter(&sdp->sd_lock);
search.se_name = (char *)nm;
if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
@@ -1018,7 +1043,6 @@ zfsctl_snapdir_lookup(ap)
VN_HOLD(*vpp);
err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
if (err != 0) {
- VN_RELE(*vpp);
*vpp = NULL;
} else if (*vpp == sep->se_root) {
/*
@@ -1027,13 +1051,6 @@ zfsctl_snapdir_lookup(ap)
*/
VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0);
goto domount;
- } else {
- /*
- * VROOT was set during the traverse call. We need
- * to clear it since we're pretending to be part
- * of our parent's vfs.
- */
- (*vpp)->v_flag &= ~VROOT;
}
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
@@ -1076,7 +1093,6 @@ zfsctl_snapdir_lookup(ap)
sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
(void) strcpy(sep->se_name, nm);
*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
- VN_HOLD(*vpp);
avl_insert(&sdp->sd_snaps, sep, where);
dmu_objset_rele(snap, FTAG);
@@ -1087,6 +1103,16 @@ domount:
(void) snprintf(mountpoint, mountpoint_len,
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
dvp->v_vfsp->mnt_stat.f_mntonname, nm);
+ mutex_exit(&sdp->sd_lock);
+
+ /*
+ * The vnode may get reclaimed between dropping sd_lock and
+ * getting the vnode lock.
+ * */
+ err = vn_lock(*vpp, LK_EXCLUSIVE);
+ if (err == ENOENT)
+ goto relookup;
+ VERIFY0(err);
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0);
kmem_free(mountpoint, mountpoint_len);
if (err == 0) {
@@ -1099,8 +1125,8 @@ domount:
*/
ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
+ (*vpp)->v_flag &= ~VROOT;
}
- mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
#ifdef illumos
@@ -1142,6 +1168,11 @@ zfsctl_shares_lookup(ap)
strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+ if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
+ VOP_UNLOCK(dvp, 0);
+ VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
+ VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
+ }
ZFS_EXIT(zfsvfs);
return (0);
}
@@ -1348,8 +1379,8 @@ zfsctl_snapdir_getattr(ap)
/* ARGSUSED */
static int
-zfsctl_snapdir_inactive(ap)
- struct vop_inactive_args /* {
+zfsctl_snapdir_reclaim(ap)
+ struct vop_reclaim_args /* {
struct vnode *a_vp;
struct thread *a_td;
} */ *ap;
@@ -1358,22 +1389,37 @@ zfsctl_snapdir_inactive(ap)
zfsctl_snapdir_t *sdp = vp->v_data;
zfs_snapentry_t *sep;
- /*
- * On forced unmount we have to free snapshots from here.
- */
- mutex_enter(&sdp->sd_lock);
- while ((sep = avl_first(&sdp->sd_snaps)) != NULL) {
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- kmem_free(sep, sizeof (zfs_snapentry_t));
- }
- mutex_exit(&sdp->sd_lock);
- gfs_dir_inactive(vp);
ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
mutex_destroy(&sdp->sd_lock);
avl_destroy(&sdp->sd_snaps);
- kmem_free(sdp, sizeof (zfsctl_snapdir_t));
+ gfs_vop_reclaim(ap);
+
+ return (0);
+}
+
+static int
+zfsctl_shares_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ printf(" .zfs/shares node\n");
+ zfsctl_common_print(ap);
+ return (0);
+}
+static int
+zfsctl_snapdir_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ vnode_t *vp = ap->a_vp;
+ zfsctl_snapdir_t *sdp = vp->v_data;
+
+ printf(" .zfs/snapshot node\n");
+ printf(" number of children = %lu\n", avl_numnodes(&sdp->sd_snaps));
+ zfsctl_common_print(ap);
return (0);
}
@@ -1419,9 +1465,10 @@ static struct vop_vector zfsctl_ops_snapdir = {
.vop_mkdir = zfsctl_freebsd_snapdir_mkdir,
.vop_readdir = gfs_vop_readdir,
.vop_lookup = zfsctl_snapdir_lookup,
- .vop_inactive = zfsctl_snapdir_inactive,
- .vop_reclaim = zfsctl_common_reclaim,
+ .vop_inactive = VOP_NULL,
+ .vop_reclaim = zfsctl_snapdir_reclaim,
.vop_fid = zfsctl_common_fid,
+ .vop_print = zfsctl_snapdir_print,
};
static struct vop_vector zfsctl_ops_shares = {
@@ -1436,6 +1483,7 @@ static struct vop_vector zfsctl_ops_shares = {
.vop_inactive = VOP_NULL,
.vop_reclaim = gfs_vop_reclaim,
.vop_fid = zfsctl_shares_fid,
+ .vop_print = zfsctl_shares_print,
};
#endif /* illumos */
@@ -1454,7 +1502,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
&zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
- VN_HOLD(vp);
zcp = vp->v_data;
zcp->zc_id = objset;
VOP_UNLOCK(vp, 0);
@@ -1462,17 +1509,28 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
return (vp);
}
+static int
+zfsctl_snapshot_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct thread *a_td;
+ } */ *ap;
+{
+ vnode_t *vp = ap->a_vp;
+
+ vrecycle(vp);
+ return (0);
+}
static int
zfsctl_snapshot_reclaim(ap)
- struct vop_inactive_args /* {
+ struct vop_reclaim_args /* {
struct vnode *a_vp;
struct thread *a_td;
} */ *ap;
{
vnode_t *vp = ap->a_vp;
cred_t *cr = ap->a_td->td_ucred;
- struct vop_reclaim_args iap;
zfsctl_snapdir_t *sdp;
zfs_snapentry_t *sep, *next;
int locked;
@@ -1480,7 +1538,6 @@ zfsctl_snapshot_reclaim(ap)
VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
sdp = dvp->v_data;
- VOP_UNLOCK(dvp, 0);
/* this may already have been unmounted */
if (sdp == NULL) {
VN_RELE(dvp);
@@ -1516,106 +1573,12 @@ zfsctl_snapshot_reclaim(ap)
* "active". If we lookup the same name again we will end up
* creating a new vnode.
*/
- iap.a_vp = vp;
- gfs_vop_reclaim(&iap);
+ gfs_vop_reclaim(ap);
return (0);
}
static int
-zfsctl_traverse_begin(vnode_t **vpp, int lktype)
-{
-
- VN_HOLD(*vpp);
- /* Snapshot should be already mounted, but just in case. */
- if (vn_mountedvfs(*vpp) == NULL)
- return (ENOENT);
- return (traverse(vpp, lktype));
-}
-
-static void
-zfsctl_traverse_end(vnode_t *vp, int err)
-{
-
- if (err == 0)
- vput(vp);
- else
- VN_RELE(vp);
-}
-
-static int
-zfsctl_snapshot_getattr(ap)
- struct vop_getattr_args /* {
- struct vnode *a_vp;
- struct vattr *a_vap;
- struct ucred *a_cred;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- int err;
-
- err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
- if (err == 0)
- err = VOP_GETATTR(vp, ap->a_vap, ap->a_cred);
- zfsctl_traverse_end(vp, err);
- return (err);
-}
-
-static int
-zfsctl_snapshot_fid(ap)
- struct vop_fid_args /* {
- struct vnode *a_vp;
- struct fid *a_fid;
- } */ *ap;
-{
- vnode_t *vp = ap->a_vp;
- int err;
-
- err = zfsctl_traverse_begin(&vp, LK_SHARED | LK_RETRY);
- if (err == 0)
- err = VOP_VPTOFH(vp, (void *)ap->a_fid);
- zfsctl_traverse_end(vp, err);
- return (err);
-}
-
-static int
-zfsctl_snapshot_lookup(ap)
- struct vop_lookup_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- } */ *ap;
-{
- vnode_t *dvp = ap->a_dvp;
- vnode_t **vpp = ap->a_vpp;
- struct componentname *cnp = ap->a_cnp;
- cred_t *cr = ap->a_cnp->cn_cred;
- zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
- int error;
-
- if (cnp->cn_namelen != 2 || cnp->cn_nameptr[0] != '.' ||
- cnp->cn_nameptr[1] != '.') {
- return (ENOENT);
- }
-
- ASSERT(dvp->v_type == VDIR);
- ASSERT(zfsvfs->z_ctldir != NULL);
-
- error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp,
- NULL, 0, NULL, cr, NULL, NULL, NULL);
- if (error == 0) {
- int ltype = VOP_ISLOCKED(dvp);
- VN_HOLD(*vpp);
- VOP_UNLOCK(dvp, 0);
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
- VN_RELE(*vpp);
- vn_lock(dvp, ltype | LK_RETRY);
- }
-
- return (error);
-}
-
-static int
zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
{
zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;
@@ -1657,18 +1620,31 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
return (error);
}
+static int
+zfsctl_snaphot_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ vnode_t *vp = ap->a_vp;
+ zfsctl_node_t *zcp = vp->v_data;
+
+ printf(" .zfs/snapshot/<snap> node\n");
+ printf(" id = %ju\n", (uintmax_t)zcp->zc_id);
+ zfsctl_common_print(ap);
+ return (0);
+}
+
/*
* These VP's should never see the light of day. They should always
* be covered.
*/
static struct vop_vector zfsctl_ops_snapshot = {
.vop_default = &default_vnodeops,
- .vop_inactive = VOP_NULL,
- .vop_lookup = zfsctl_snapshot_lookup,
+ .vop_inactive = zfsctl_snapshot_inactive,
.vop_reclaim = zfsctl_snapshot_reclaim,
- .vop_getattr = zfsctl_snapshot_getattr,
- .vop_fid = zfsctl_snapshot_fid,
.vop_vptocnp = zfsctl_snapshot_vptocnp,
+ .vop_print = zfsctl_snaphot_print,
};
int
@@ -1709,16 +1685,15 @@ zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
*/
error = traverse(&vp, LK_SHARED | LK_RETRY);
if (error == 0) {
- if (vp == sep->se_root)
+ if (vp == sep->se_root) {
+ VN_RELE(vp); /* release covered vp */
error = SET_ERROR(EINVAL);
- else
+ } else {
*zfsvfsp = VTOZ(vp)->z_zfsvfs;
+ VN_URELE(vp); /* put snapshot's root vp */
+ }
}
mutex_exit(&sdp->sd_lock);
- if (error == 0)
- VN_URELE(vp);
- else
- VN_RELE(vp);
} else {
error = SET_ERROR(EINVAL);
mutex_exit(&sdp->sd_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index 651c596..4d6be94 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -1429,6 +1429,7 @@ static int
getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
{
objset_t *os;
+ vfs_t *vfsp;
int error;
error = dmu_objset_hold(dsname, FTAG, &os);
@@ -1442,19 +1443,21 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
mutex_enter(&os->os_user_ptr_lock);
*zfvp = dmu_objset_get_user(os);
if (*zfvp) {
-#ifdef illumos
- VFS_HOLD((*zfvp)->z_vfs);
-#else
- if (vfs_busy((*zfvp)->z_vfs, 0) != 0) {
- *zfvp = NULL;
- error = SET_ERROR(ESRCH);
- }
-#endif
+ vfsp = (*zfvp)->z_vfs;
+ vfs_ref(vfsp);
} else {
error = SET_ERROR(ESRCH);
}
mutex_exit(&os->os_user_ptr_lock);
dmu_objset_rele(os, FTAG);
+ if (error == 0) {
+ error = vfs_busy(vfsp, 0);
+ vfs_rel(vfsp);
+ if (error != 0) {
+ *zfvp = NULL;
+ error = SET_ERROR(ESRCH);
+ }
+ }
return (error);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index fa6eac7..9bb6091 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -1171,6 +1171,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
+ vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
/*
* The fsid is 64 bits, composed of an 8-bit fs type, which
@@ -1782,12 +1783,11 @@ zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
if (error == 0) {
error = vn_lock(*vpp, flags);
- if (error == 0)
- (*vpp)->v_vflag |= VV_ROOT;
+ if (error != 0) {
+ VN_RELE(*vpp);
+ *vpp = NULL;
+ }
}
- if (error != 0)
- *vpp = NULL;
-
return (error);
}
@@ -2005,12 +2005,6 @@ zfs_umount(vfs_t *vfsp, int fflag)
*/
if (zfsvfs->z_ctldir != NULL)
zfsctl_destroy(zfsvfs);
- if (zfsvfs->z_issnap) {
- vnode_t *svp = vfsp->mnt_vnodecovered;
-
- if (svp->v_count >= 2)
- VN_RELE(svp);
- }
zfs_freevfs(vfsp);
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index 2a15cdf..b0f11ac 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -655,7 +655,11 @@ mappedread(vnode_t *vp, int nbytes, uio_t *uio)
zfs_vmobject_wunlock(obj);
va = zfs_map_page(pp, &sf);
+#ifdef illumos
error = uiomove(va + off, bytes, UIO_READ, uio);
+#else
+ error = vn_io_fault_uiomove(va + off, bytes, uio);
+#endif
zfs_unmap_page(sf);
zfs_vmobject_wlock(obj);
page_unhold(pp);
@@ -1033,18 +1037,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
* holding up the transaction if the data copy hangs
* up on a pagefault (e.g., from an NFS server mapping).
*/
+#ifdef illumos
size_t cbytes;
+#endif
abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
max_blksz);
ASSERT(abuf != NULL);
ASSERT(arc_buf_size(abuf) == max_blksz);
+#ifdef illumos
if (error = uiocopy(abuf->b_data, max_blksz,
UIO_WRITE, uio, &cbytes)) {
dmu_return_arcbuf(abuf);
break;
}
ASSERT(cbytes == max_blksz);
+#else
+ ssize_t resid = uio->uio_resid;
+ error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
+ if (error != 0) {
+ uio->uio_offset -= resid - uio->uio_resid;
+ uio->uio_resid = resid;
+ dmu_return_arcbuf(abuf);
+ break;
+ }
+#endif
}
/*
@@ -1122,8 +1139,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
woff, abuf, tx);
}
+#ifdef illumos
ASSERT(tx_bytes <= uio->uio_resid);
uioskip(uio, tx_bytes);
+#endif
}
if (tx_bytes && vn_has_cached_data(vp)) {
update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
@@ -1177,7 +1196,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
while ((end_size = zp->z_size) < uio->uio_loffset) {
(void) atomic_cas_64(&zp->z_size, end_size,
uio->uio_loffset);
+#ifdef illumos
ASSERT(error == 0);
+#else
+ ASSERT(error == 0 || error == EFAULT);
+#endif
}
/*
* If we are replaying and eof is non zero then force
@@ -1187,7 +1210,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
zp->z_size = zfsvfs->z_replay_eof;
- error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+ if (error == 0)
+ error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+ else
+ (void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
dmu_tx_commit(tx);
@@ -1214,6 +1240,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
return (error);
}
+#ifdef __FreeBSD__
+ /*
+ * EFAULT means that at least one page of the source buffer was not
+ * available. VFS will re-try remaining I/O upon this error.
+ */
+ if (error == EFAULT) {
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+#endif
+
if (ioflag & (FSYNC | FDSYNC) ||
zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, zp->z_id);
@@ -7165,6 +7202,53 @@ zfs_freebsd_aclcheck(ap)
return (EOPNOTSUPP);
}
+static int
+zfs_vptocnp(struct vop_vptocnp_args *ap)
+{
+ vnode_t *covered_vp;
+ vnode_t *vp = ap->a_vp;;
+ zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
+ znode_t *zp = VTOZ(vp);
+ uint64_t parent;
+ int ltype;
+ int error;
+
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(zp);
+
+ /*
+ * If we are a snapshot mounted under .zfs, run the operation
+ * on the covered vnode.
+ */
+ if ((error = sa_lookup(zp->z_sa_hdl,
+ SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0) {
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+
+ if (zp->z_id != parent || zfsvfs->z_parent == zfsvfs) {
+ ZFS_EXIT(zfsvfs);
+ return (vop_stdvptocnp(ap));
+ }
+ ZFS_EXIT(zfsvfs);
+
+ covered_vp = vp->v_mount->mnt_vnodecovered;
+ vhold(covered_vp);
+ ltype = VOP_ISLOCKED(vp);
+ VOP_UNLOCK(vp, 0);
+ error = vget(covered_vp, LK_EXCLUSIVE, curthread);
+ vdrop(covered_vp);
+ if (error == 0) {
+ error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
+ ap->a_buf, ap->a_buflen);
+ vput(covered_vp);
+ }
+ vn_lock(vp, ltype | LK_RETRY);
+ if ((vp->v_iflag & VI_DOOMED) != 0)
+ error = SET_ERROR(ENOENT);
+ return (error);
+}
+
struct vop_vector zfs_vnodeops;
struct vop_vector zfs_fifoops;
struct vop_vector zfs_shareops;
@@ -7210,6 +7294,7 @@ struct vop_vector zfs_vnodeops = {
.vop_aclcheck = zfs_freebsd_aclcheck,
.vop_getpages = zfs_freebsd_getpages,
.vop_putpages = zfs_freebsd_putpages,
+ .vop_vptocnp = zfs_vptocnp,
};
struct vop_vector zfs_fifoops = {
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
index 431a05d..3853838 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
@@ -574,9 +574,10 @@ zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
/*
- * Slap on VROOT if we are the root znode
+ * Slap on VROOT if we are the root znode unless we are the root
+ * node of a snapshot mounted under .zfs.
*/
- if (zp->z_id == zfsvfs->z_root)
+ if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs)
ZTOV(zp)->v_flag |= VROOT;
mutex_exit(&zp->z_lock);
diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c
index c2bf3ae..b244eea 100644
--- a/sys/compat/linux/linux_emul.c
+++ b/sys/compat/linux/linux_emul.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_persona.h>
#include <compat/linux/linux_util.h>
@@ -127,7 +128,7 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags)
/* epoll should be destroyed in a case of exec. */
pem = pem_find(p);
KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
-
+ pem->persona = 0;
if (pem->epoll != NULL) {
emd = pem->epoll;
pem->epoll = NULL;
@@ -220,6 +221,9 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
{
struct thread *td = curthread;
struct thread *othertd;
+#if defined(__amd64__)
+ struct linux_pemuldata *pem;
+#endif
/*
* In a case of execing from linux binary properly detach
@@ -243,6 +247,17 @@ linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
linux_proc_init(td, NULL, 0);
else
linux_proc_init(td, td, 0);
+#if defined(__amd64__)
+ /*
+ * An IA32 executable which has executable stack will have the
+ * READ_IMPLIES_EXEC personality flag set automatically.
+ */
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
+ imgp->stack_prot & VM_PROT_EXECUTE) {
+ pem = pem_find(p);
+ pem->persona |= LINUX_READ_IMPLIES_EXEC;
+ }
+#endif
}
}
diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h
index 7262093..9a5a667 100644
--- a/sys/compat/linux/linux_emul.h
+++ b/sys/compat/linux/linux_emul.h
@@ -67,6 +67,7 @@ struct linux_pemuldata {
uint32_t flags; /* process emuldata flags */
struct sx pem_sx; /* lock for this struct */
void *epoll; /* epoll data */
+ uint32_t persona; /* process execution domain */
};
#define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx)
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index cb2379a5..2765dfd 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -1198,15 +1198,23 @@ linux_mknodat(struct thread *td, struct linux_mknodat_args *args)
int
linux_personality(struct thread *td, struct linux_personality_args *args)
{
+ struct linux_pemuldata *pem;
+ struct proc *p = td->td_proc;
+ uint32_t old;
+
#ifdef DEBUG
if (ldebug(personality))
- printf(ARGS(personality, "%lu"), (unsigned long)args->per);
+ printf(ARGS(personality, "%u"), args->per);
#endif
- if (args->per != 0)
- return (EINVAL);
- /* Yes Jim, it's still a Linux... */
- td->td_retval[0] = 0;
+ PROC_LOCK(p);
+ pem = pem_find(p);
+ old = pem->persona;
+ if (args->per != 0xffffffff)
+ pem->persona = args->per;
+ PROC_UNLOCK(p);
+
+ td->td_retval[0] = old;
return (0);
}
diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c
new file mode 100644
index 0000000..0a5557b
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.c
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/capsicum.h>
+#include <sys/file.h>
+#include <sys/imgact.h>
+#include <sys/ktr.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_mmap.h>
+#include <compat/linux/linux_persona.h>
+#include <compat/linux/linux_util.h>
+
+
+#define STACK_SIZE (2 * 1024 * 1024)
+#define GUARD_SIZE (4 * PAGE_SIZE)
+
+#if defined(__amd64__)
+static void linux_fixup_prot(struct thread *td, int *prot);
+#endif
+
+
+int
+linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot,
+ int flags, int fd, off_t pos)
+{
+ struct proc *p = td->td_proc;
+ struct vmspace *vms = td->td_proc->p_vmspace;
+ struct mmap_args /* {
+ caddr_t addr;
+ size_t len;
+ int prot;
+ int flags;
+ int fd;
+ off_t pos;
+ } */ bsd_args;
+ int error;
+ struct file *fp;
+
+ cap_rights_t rights;
+ LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx",
+ addr, len, prot, flags, fd, pos);
+
+ error = 0;
+ bsd_args.flags = 0;
+ fp = NULL;
+
+ /*
+ * Linux mmap(2):
+ * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
+ */
+ if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
+ return (EINVAL);
+
+ if (flags & LINUX_MAP_SHARED)
+ bsd_args.flags |= MAP_SHARED;
+ if (flags & LINUX_MAP_PRIVATE)
+ bsd_args.flags |= MAP_PRIVATE;
+ if (flags & LINUX_MAP_FIXED)
+ bsd_args.flags |= MAP_FIXED;
+ if (flags & LINUX_MAP_ANON) {
+ /* Enforce pos to be on page boundary, then ignore. */
+ if ((pos & PAGE_MASK) != 0)
+ return (EINVAL);
+ pos = 0;
+ bsd_args.flags |= MAP_ANON;
+ } else
+ bsd_args.flags |= MAP_NOSYNC;
+ if (flags & LINUX_MAP_GROWSDOWN)
+ bsd_args.flags |= MAP_STACK;
+
+ /*
+ * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
+ * on Linux/i386 if the binary requires executable stack.
+ * We do this only for IA32 emulation as on native i386 this is does not
+ * make sense without PAE.
+ *
+ * XXX. Linux checks that the file system is not mounted with noexec.
+ */
+ bsd_args.prot = prot;
+#if defined(__amd64__)
+ linux_fixup_prot(td, &bsd_args.prot);
+#endif
+
+ /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
+ bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
+ if (bsd_args.fd != -1) {
+ /*
+ * Linux follows Solaris mmap(2) description:
+ * The file descriptor fildes is opened with
+ * read permission, regardless of the
+ * protection options specified.
+ */
+
+ error = fget(td, bsd_args.fd,
+ cap_rights_init(&rights, CAP_MMAP), &fp);
+ if (error != 0)
+ return (error);
+ if (fp->f_type != DTYPE_VNODE) {
+ fdrop(fp, td);
+ return (EINVAL);
+ }
+
+ /* Linux mmap() just fails for O_WRONLY files */
+ if (!(fp->f_flag & FREAD)) {
+ fdrop(fp, td);
+ return (EACCES);
+ }
+
+ fdrop(fp, td);
+ }
+
+ if (flags & LINUX_MAP_GROWSDOWN) {
+ /*
+ * The Linux MAP_GROWSDOWN option does not limit auto
+ * growth of the region. Linux mmap with this option
+ * takes as addr the initial BOS, and as len, the initial
+ * region size. It can then grow down from addr without
+ * limit. However, Linux threads has an implicit internal
+ * limit to stack size of STACK_SIZE. Its just not
+ * enforced explicitly in Linux. But, here we impose
+ * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
+ * region, since we can do this with our mmap.
+ *
+ * Our mmap with MAP_STACK takes addr as the maximum
+ * downsize limit on BOS, and as len the max size of
+ * the region. It then maps the top SGROWSIZ bytes,
+ * and auto grows the region down, up to the limit
+ * in addr.
+ *
+ * If we don't use the MAP_STACK option, the effect
+ * of this code is to allocate a stack region of a
+ * fixed size of (STACK_SIZE - GUARD_SIZE).
+ */
+
+ if ((caddr_t)addr + len > vms->vm_maxsaddr) {
+ /*
+ * Some Linux apps will attempt to mmap
+ * thread stacks near the top of their
+ * address space. If their TOS is greater
+ * than vm_maxsaddr, vm_map_growstack()
+ * will confuse the thread stack with the
+ * process stack and deliver a SEGV if they
+ * attempt to grow the thread stack past their
+ * current stacksize rlimit. To avoid this,
+ * adjust vm_maxsaddr upwards to reflect
+ * the current stacksize rlimit rather
+ * than the maximum possible stacksize.
+ * It would be better to adjust the
+ * mmap'ed region, but some apps do not check
+ * mmap's return value.
+ */
+ PROC_LOCK(p);
+ vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack -
+ lim_cur(p, RLIMIT_STACK);
+ PROC_UNLOCK(p);
+ }
+
+ /*
+ * This gives us our maximum stack size and a new BOS.
+ * If we're using VM_STACK, then mmap will just map
+ * the top SGROWSIZ bytes, and let the stack grow down
+ * to the limit at BOS. If we're not using VM_STACK
+ * we map the full stack, since we don't have a way
+ * to autogrow it.
+ */
+ if (len > STACK_SIZE - GUARD_SIZE) {
+ bsd_args.addr = (caddr_t)addr;
+ bsd_args.len = len;
+ } else {
+ bsd_args.addr = (caddr_t)addr -
+ (STACK_SIZE - GUARD_SIZE - len);
+ bsd_args.len = STACK_SIZE - GUARD_SIZE;
+ }
+ } else {
+ bsd_args.addr = (caddr_t)addr;
+ bsd_args.len = len;
+ }
+ bsd_args.pos = pos;
+
+ error = sys_mmap(td, &bsd_args);
+
+ LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]);
+
+ return (error);
+}
+
+int
+linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot)
+{
+ struct mprotect_args bsd_args;
+
+ bsd_args.addr = (void *)addr;
+ bsd_args.len = len;
+ bsd_args.prot = prot;
+
+#if defined(__amd64__)
+ linux_fixup_prot(td, &bsd_args.prot);
+#endif
+ return (sys_mprotect(td, &bsd_args));
+}
+
+#if defined(__amd64__)
+static void
+linux_fixup_prot(struct thread *td, int *prot)
+{
+ struct linux_pemuldata *pem;
+
+ if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) {
+ pem = pem_find(td->td_proc);
+ if (pem->persona & LINUX_READ_IMPLIES_EXEC)
+ *prot |= PROT_EXEC;
+ }
+
+}
+#endif
diff --git a/sys/compat/linux/linux_mmap.h b/sys/compat/linux/linux_mmap.h
new file mode 100644
index 0000000..a27d99d
--- /dev/null
+++ b/sys/compat/linux/linux_mmap.h
@@ -0,0 +1,49 @@
+/*-
+ * Copyright (c) 2004 Tim J. Robbins
+ * Copyright (c) 2002 Doug Rabson
+ * Copyright (c) 2000 Marcel Moolenaar
+ * Copyright (c) 1994-1995 Søren Schmidt
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_MMAP_H_
+#define _LINUX_MMAP_H_
+
+/* mmap options */
+#define LINUX_MAP_SHARED 0x0001
+#define LINUX_MAP_PRIVATE 0x0002
+#define LINUX_MAP_FIXED 0x0010
+#define LINUX_MAP_ANON 0x0020
+#define LINUX_MAP_GROWSDOWN 0x0100
+
+
+int linux_mmap_common(struct thread *, uintptr_t, size_t, int, int,
+ int, off_t);
+int linux_mprotect_common(struct thread *, uintptr_t, size_t, int);
+
+#endif /* _LINUX_MMAP_H_ */
diff --git a/sys/compat/linux/linux_persona.h b/sys/compat/linux/linux_persona.h
new file mode 100644
index 0000000..8d84134
--- /dev/null
+++ b/sys/compat/linux/linux_persona.h
@@ -0,0 +1,56 @@
+/*
+ * $FreeBSD$
+ */
+
+#ifndef LINUX_PERSONALITY_H
+#define LINUX_PERSONALITY_H
+
+/*
+ * Flags for bug emulation.
+ *
+ * These occupy the top three bytes.
+ */
+enum {
+ LINUX_UNAME26 = 0x0020000,
+ LINUX_ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization
+ * of VA space
+ */
+ LINUX_FDPIC_FUNCPTRS = 0x0080000, /* userspace function
+ * ptrs point to descriptors
+ * (signal handling)
+ */
+ LINUX_MMAP_PAGE_ZERO = 0x0100000,
+ LINUX_ADDR_COMPAT_LAYOUT = 0x0200000,
+ LINUX_READ_IMPLIES_EXEC = 0x0400000,
+ LINUX_ADDR_LIMIT_32BIT = 0x0800000,
+ LINUX_SHORT_INODE = 0x1000000,
+ LINUX_WHOLE_SECONDS = 0x2000000,
+ LINUX_STICKY_TIMEOUTS = 0x4000000,
+ LINUX_ADDR_LIMIT_3GB = 0x8000000,
+};
+
+/*
+ * Security-relevant compatibility flags that must be
+ * cleared upon setuid or setgid exec:
+ */
+#define LINUX_PER_CLEAR_ON_SETID (LINUX_READ_IMPLIES_EXEC | \
+ LINUX_ADDR_NO_RANDOMIZE | \
+ LINUX_ADDR_COMPAT_LAYOUT | \
+ LINUX_MMAP_PAGE_ZERO)
+
+/*
+ * Personality types.
+ *
+ * These go in the low byte. Avoid using the top bit, it will
+ * conflict with error returns.
+ */
+enum {
+ LINUX_PER_LINUX = 0x0000,
+ LINUX_PER_LINUX_32BIT = 0x0000 | LINUX_ADDR_LIMIT_32BIT,
+ LINUX_PER_LINUX_FDPIC = 0x0000 | LINUX_FDPIC_FUNCPTRS,
+ LINUX_PER_LINUX32 = 0x0008,
+ LINUX_PER_LINUX32_3GB = 0x0008 | LINUX_ADDR_LIMIT_3GB,
+ LINUX_PER_MASK = 0x00ff,
+};
+
+#endif /* LINUX_PERSONALITY_H */
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 798d105..e8d9c7d 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -549,6 +549,7 @@ compat/linux/linux_ioctl.c optional compat_linux32
compat/linux/linux_ipc.c optional compat_linux32
compat/linux/linux_mib.c optional compat_linux32
compat/linux/linux_misc.c optional compat_linux32
+compat/linux/linux_mmap.c optional compat_linux32
compat/linux/linux_signal.c optional compat_linux32
compat/linux/linux_socket.c optional compat_linux32
compat/linux/linux_stats.c optional compat_linux32
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 72686d9..19b44ef 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -102,6 +102,7 @@ compat/linux/linux_ioctl.c optional compat_linux
compat/linux/linux_ipc.c optional compat_linux
compat/linux/linux_mib.c optional compat_linux
compat/linux/linux_misc.c optional compat_linux
+compat/linux/linux_mmap.c optional compat_linux
compat/linux/linux_signal.c optional compat_linux
compat/linux/linux_socket.c optional compat_linux
compat/linux/linux_stats.c optional compat_linux
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index 1af0721..598743a 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -63,6 +63,7 @@ compat/linux/linux_ioctl.c optional compat_linux
compat/linux/linux_ipc.c optional compat_linux
compat/linux/linux_mib.c optional compat_linux
compat/linux/linux_misc.c optional compat_linux
+compat/linux/linux_mmap.c optional compat_linux
compat/linux/linux_signal.c optional compat_linux
compat/linux/linux_socket.c optional compat_linux
compat/linux/linux_stats.c optional compat_linux
diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c
index 821682a..9db1c44 100644
--- a/sys/dev/ahci/ahci.c
+++ b/sys/dev/ahci/ahci.c
@@ -154,8 +154,8 @@ int
ahci_attach(device_t dev)
{
struct ahci_controller *ctlr = device_get_softc(dev);
- int error, i, u, speed, unit;
- u_int32_t version;
+ int error, i, speed, unit;
+ uint32_t u, version;
device_t child;
ctlr->dev = dev;
diff --git a/sys/dev/ahci/ahci.h b/sys/dev/ahci/ahci.h
index a243387..61643c1 100644
--- a/sys/dev/ahci/ahci.h
+++ b/sys/dev/ahci/ahci.h
@@ -472,7 +472,7 @@ struct ahci_enclosure {
uint8_t status[AHCI_MAX_PORTS][4]; /* ArrayDev statuses */
int quirks;
int channels;
- int ichannels;
+ uint32_t ichannels;
};
/* structure describing a AHCI controller */
@@ -503,7 +503,7 @@ struct ahci_controller {
int quirks;
int numirqs;
int channels;
- int ichannels;
+ uint32_t ichannels;
int ccc; /* CCC timeout */
int cccv; /* CCC vector */
int direct; /* Direct command completion */
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index ab445f6..48ef637 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -1161,10 +1161,27 @@ igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
}
#endif
+#if __FreeBSD_version >= 1000000
+ /* HW cannot turn these on/off separately */
+ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
+ ifp->if_capenable ^= IFCAP_RXCSUM;
+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
+ reinit = 1;
+ }
+ if (mask & IFCAP_TXCSUM) {
+ ifp->if_capenable ^= IFCAP_TXCSUM;
+ reinit = 1;
+ }
+ if (mask & IFCAP_TXCSUM_IPV6) {
+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
+ reinit = 1;
+ }
+#else
if (mask & IFCAP_HWCSUM) {
ifp->if_capenable ^= IFCAP_HWCSUM;
reinit = 1;
}
+#endif
if (mask & IFCAP_TSO4) {
ifp->if_capenable ^= IFCAP_TSO4;
reinit = 1;
@@ -1243,14 +1260,26 @@ igb_init_locked(struct adapter *adapter)
/* Set hardware offload abilities */
ifp->if_hwassist = 0;
if (ifp->if_capenable & IFCAP_TXCSUM) {
+#if __FreeBSD_version >= 1000000
+ ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
+ if (adapter->hw.mac.type != e1000_82575)
+ ifp->if_hwassist |= CSUM_IP_SCTP;
+#else
ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
#if __FreeBSD_version >= 800000
- if ((adapter->hw.mac.type == e1000_82576) ||
- (adapter->hw.mac.type == e1000_82580))
+ if (adapter->hw.mac.type != e1000_82575)
ifp->if_hwassist |= CSUM_SCTP;
#endif
+#endif
}
+#if __FreeBSD_version >= 1000000
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
+ ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
+ if (adapter->hw.mac.type != e1000_82575)
+ ifp->if_hwassist |= CSUM_IP6_SCTP;
+ }
+#endif
if (ifp->if_capenable & IFCAP_TSO)
ifp->if_hwassist |= CSUM_TSO;
@@ -3044,6 +3073,9 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_capabilities = ifp->if_capenable = 0;
ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
+#if __FreeBSD_version >= 1000000
+ ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
+#endif
ifp->if_capabilities |= IFCAP_TSO;
ifp->if_capabilities |= IFCAP_JUMBO_MTU;
ifp->if_capenable = ifp->if_capabilities;
@@ -3817,17 +3849,29 @@ igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
switch (ipproto) {
case IPPROTO_TCP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_TCP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
break;
case IPPROTO_UDP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_UDP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
break;
#if __FreeBSD_version >= 800000
case IPPROTO_SCTP:
+#if __FreeBSD_version >= 1000000
+ if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
+#else
if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
+#endif
type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
break;
#endif
@@ -4514,8 +4558,7 @@ igb_initialize_receive_units(struct adapter *adapter)
rxcsum |= E1000_RXCSUM_PCSD;
#if __FreeBSD_version >= 800000
/* For SCTP Offload */
- if (((hw->mac.type == e1000_82576) ||
- (hw->mac.type == e1000_82580)) &&
+ if ((hw->mac.type != e1000_82575) &&
(ifp->if_capenable & IFCAP_RXCSUM))
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
@@ -4524,8 +4567,7 @@ igb_initialize_receive_units(struct adapter *adapter)
if (ifp->if_capenable & IFCAP_RXCSUM) {
rxcsum |= E1000_RXCSUM_IPPCSE;
#if __FreeBSD_version >= 800000
- if ((adapter->hw.mac.type == e1000_82576) ||
- (adapter->hw.mac.type == e1000_82580))
+ if (adapter->hw.mac.type != e1000_82575)
rxcsum |= E1000_RXCSUM_CRCOFL;
#endif
} else
diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h
index 431437d..f23bb0f 100644
--- a/sys/dev/e1000/if_igb.h
+++ b/sys/dev/e1000/if_igb.h
@@ -291,7 +291,11 @@
#define ETH_ADDR_LEN 6
/* Offload bits in mbuf flag */
-#if __FreeBSD_version >= 800000
+#if __FreeBSD_version >= 1000000
+#define CSUM_OFFLOAD_IPV4 (CSUM_IP|CSUM_IP_TCP|CSUM_IP_UDP|CSUM_IP_SCTP)
+#define CSUM_OFFLOAD_IPV6 (CSUM_IP6_TCP|CSUM_IP6_UDP|CSUM_IP6_SCTP)
+#define CSUM_OFFLOAD (CSUM_OFFLOAD_IPV4|CSUM_OFFLOAD_IPV6)
+#elif __FreeBSD_version >= 800000
#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP|CSUM_SCTP)
#else
#define CSUM_OFFLOAD (CSUM_IP|CSUM_TCP|CSUM_UDP)
diff --git a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
index a89a762..d9c29e3 100644
--- a/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
+++ b/sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c
@@ -805,6 +805,13 @@ hv_storvsc_on_iocompletion(struct storvsc_softc *sc,
vm_srb = &vstor_packet->u.vm_srb;
+ /*
+ * Copy some fields of the host's response into the request structure,
+ * because the fields will be used later in storvsc_io_done().
+ */
+ request->vstor_packet.u.vm_srb.scsi_status = vm_srb->scsi_status;
+ request->vstor_packet.u.vm_srb.transfer_len = vm_srb->transfer_len;
+
if (((vm_srb->scsi_status & 0xFF) == SCSI_STATUS_CHECK_COND) &&
(vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID)) {
/* Autosense data available */
@@ -1939,62 +1946,24 @@ create_storvsc_request(union ccb *ccb, struct hv_storvsc_request *reqp)
}
/*
- * Modified based on scsi_print_inquiry which is responsible to
- * print the detail information for scsi_inquiry_data.
- *
+ * SCSI Inquiry checks qualifier and type.
+ * If qualifier is 011b, means the device server is not capable
+ * of supporting a peripheral device on this logical unit, and
+ * the type should be set to 1Fh.
+ *
* Return 1 if it is valid, 0 otherwise.
*/
static inline int
is_inquiry_valid(const struct scsi_inquiry_data *inq_data)
{
uint8_t type;
- char vendor[16], product[48], revision[16];
-
- /*
- * Check device type and qualifier
- */
- if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) ||
- SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED))
+ if (SID_QUAL(inq_data) != SID_QUAL_LU_CONNECTED) {
return (0);
-
+ }
type = SID_TYPE(inq_data);
- switch (type) {
- case T_DIRECT:
- case T_SEQUENTIAL:
- case T_PRINTER:
- case T_PROCESSOR:
- case T_WORM:
- case T_CDROM:
- case T_SCANNER:
- case T_OPTICAL:
- case T_CHANGER:
- case T_COMM:
- case T_STORARRAY:
- case T_ENCLOSURE:
- case T_RBC:
- case T_OCRW:
- case T_OSD:
- case T_ADC:
- break;
- case T_NODEVICE:
- default:
+ if (type == T_NODEVICE) {
return (0);
}
-
- /*
- * Check vendor, product, and revision
- */
- cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor),
- sizeof(vendor));
- cam_strvis(product, inq_data->product, sizeof(inq_data->product),
- sizeof(product));
- cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision),
- sizeof(revision));
- if (strlen(vendor) == 0 ||
- strlen(product) == 0 ||
- strlen(revision) == 0)
- return (0);
-
return (1);
}
@@ -2071,7 +2040,6 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
ccb->ccb_h.status &= ~CAM_STATUS_MASK;
if (vm_srb->scsi_status == SCSI_STATUS_OK) {
const struct scsi_generic *cmd;
-
/*
* Check whether the data for INQUIRY cmd is valid or
* not. Windows 10 and Windows 2016 send all zero
@@ -2080,23 +2048,59 @@ storvsc_io_done(struct hv_storvsc_request *reqp)
cmd = (const struct scsi_generic *)
((ccb->ccb_h.flags & CAM_CDB_POINTER) ?
csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes);
- if (cmd->opcode == INQUIRY &&
- /*
- * XXX: Temporary work around disk hot plugin on win2k12r2,
- * only filtering the invalid disk on win10 or 2016 server.
- * So, the hot plugin on win10 and 2016 server needs
- * to be fixed.
+ if (cmd->opcode == INQUIRY) {
+ /*
+ * The host of Windows 10 or 2016 server will response
+ * the inquiry request with invalid data for unexisted device:
+ [0x7f 0x0 0x5 0x2 0x1f ... ]
+ * But on windows 2012 R2, the response is:
+ [0x7f 0x0 0x0 0x0 0x0 ]
+ * That is why here wants to validate the inquiry response.
+ * The validation will skip the INQUIRY whose response is short,
+ * which is less than SHORT_INQUIRY_LENGTH (36).
+ *
+ * For more information about INQUIRY, please refer to:
+ * ftp://ftp.avc-pioneer.com/Mtfuji_7/Proposal/Jun09/INQUIRY.pdf
*/
- vmstor_proto_version == VMSTOR_PROTOCOL_VERSION_WIN10 &&
- is_inquiry_valid(
- (const struct scsi_inquiry_data *)csio->data_ptr) == 0) {
+ const struct scsi_inquiry_data *inq_data =
+ (const struct scsi_inquiry_data *)csio->data_ptr;
+ uint8_t* resp_buf = (uint8_t*)csio->data_ptr;
+ /* Get the buffer length reported by host */
+ int resp_xfer_len = vm_srb->transfer_len;
+ /* Get the available buffer length */
+ int resp_buf_len = resp_xfer_len >= 5 ? resp_buf[4] + 5 : 0;
+ int data_len = (resp_buf_len < resp_xfer_len) ? resp_buf_len : resp_xfer_len;
+ if (data_len < SHORT_INQUIRY_LENGTH) {
+ ccb->ccb_h.status |= CAM_REQ_CMP;
+ if (bootverbose && data_len >= 5) {
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "storvsc skips the validation for short inquiry (%d)"
+ " [%x %x %x %x %x]\n",
+ data_len,resp_buf[0],resp_buf[1],resp_buf[2],
+ resp_buf[3],resp_buf[4]);
+ mtx_unlock(&sc->hs_lock);
+ }
+ } else if (is_inquiry_valid(inq_data) == 0) {
ccb->ccb_h.status |= CAM_DEV_NOT_THERE;
+ if (bootverbose && data_len >= 5) {
+ mtx_lock(&sc->hs_lock);
+ xpt_print(ccb->ccb_h.path,
+ "storvsc uninstalled invalid device"
+ " [%x %x %x %x %x]\n",
+ resp_buf[0],resp_buf[1],resp_buf[2],resp_buf[3],resp_buf[4]);
+ mtx_unlock(&sc->hs_lock);
+ }
+ } else {
+ ccb->ccb_h.status |= CAM_REQ_CMP;
if (bootverbose) {
mtx_lock(&sc->hs_lock);
xpt_print(ccb->ccb_h.path,
- "storvsc uninstalled device\n");
+ "storvsc has passed inquiry response (%d) validation\n",
+ data_len);
mtx_unlock(&sc->hs_lock);
}
+ }
} else {
ccb->ccb_h.status |= CAM_REQ_CMP;
}
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 5949f02..a3674c7 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -235,7 +235,7 @@ IDTVEC(lcall_syscall)
pushfl /* save eflags */
popl 8(%esp) /* shuffle into tf_eflags */
pushl $7 /* sizeof "lcall 7,0" */
- subl $4,%esp /* skip over tf_trapno */
+ pushl $0 /* tf_trapno */
pushal
pushl $0
movw %ds,(%esp)
@@ -264,7 +264,7 @@ IDTVEC(lcall_syscall)
SUPERALIGN_TEXT
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
- subl $4,%esp /* skip over tf_trapno */
+ pushl $0 /* tf_trapno */
pushal
pushl $0
movw %ds,(%esp)
diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h
index 36b2084..42e836c 100644
--- a/sys/i386/linux/linux.h
+++ b/sys/i386/linux/linux.h
@@ -140,13 +140,6 @@ struct l_rlimit {
l_ulong rlim_max;
};
-/* mmap options */
-#define LINUX_MAP_SHARED 0x0001
-#define LINUX_MAP_PRIVATE 0x0002
-#define LINUX_MAP_FIXED 0x0010
-#define LINUX_MAP_ANON 0x0020
-#define LINUX_MAP_GROWSDOWN 0x0100
-
struct l_mmap_argv {
l_uintptr_t addr;
l_size_t len;
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index c9f969b..4b4b886 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <i386/linux/linux_proto.h>
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_mmap.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
@@ -95,10 +96,6 @@ struct l_old_select_argv {
struct l_timeval *timeout;
};
-static int linux_mmap_common(struct thread *td, l_uintptr_t addr,
- l_size_t len, l_int prot, l_int flags, l_int fd,
- l_loff_t pos);
-
int
linux_execve(struct thread *td, struct linux_execve_args *args)
@@ -340,9 +337,6 @@ linux_set_upcall_kse(struct thread *td, register_t stack)
return (0);
}
-#define STACK_SIZE (2 * 1024 * 1024)
-#define GUARD_SIZE (4 * PAGE_SIZE)
-
int
linux_mmap2(struct thread *td, struct linux_mmap2_args *args)
{
@@ -381,187 +375,11 @@ linux_mmap(struct thread *td, struct linux_mmap_args *args)
(uint32_t)linux_args.pgoff));
}
-static int
-linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot,
- l_int flags, l_int fd, l_loff_t pos)
-{
- struct proc *p = td->td_proc;
- struct mmap_args /* {
- caddr_t addr;
- size_t len;
- int prot;
- int flags;
- int fd;
- long pad;
- off_t pos;
- } */ bsd_args;
- int error;
- struct file *fp;
- cap_rights_t rights;
-
- error = 0;
- bsd_args.flags = 0;
- fp = NULL;
-
- /*
- * Linux mmap(2):
- * You must specify exactly one of MAP_SHARED and MAP_PRIVATE
- */
- if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE)))
- return (EINVAL);
-
- if (flags & LINUX_MAP_SHARED)
- bsd_args.flags |= MAP_SHARED;
- if (flags & LINUX_MAP_PRIVATE)
- bsd_args.flags |= MAP_PRIVATE;
- if (flags & LINUX_MAP_FIXED)
- bsd_args.flags |= MAP_FIXED;
- if (flags & LINUX_MAP_ANON) {
- /* Enforce pos to be on page boundary, then ignore. */
- if ((pos & PAGE_MASK) != 0)
- return (EINVAL);
- pos = 0;
- bsd_args.flags |= MAP_ANON;
- } else
- bsd_args.flags |= MAP_NOSYNC;
- if (flags & LINUX_MAP_GROWSDOWN)
- bsd_args.flags |= MAP_STACK;
-
- /*
- * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC
- * on Linux/i386. We do this to ensure maximum compatibility.
- * Linux/ia64 does the same in i386 emulation mode.
- */
- bsd_args.prot = prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
-
- /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */
- bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd;
- if (bsd_args.fd != -1) {
- /*
- * Linux follows Solaris mmap(2) description:
- * The file descriptor fildes is opened with
- * read permission, regardless of the
- * protection options specified.
- *
- * Checking just CAP_MMAP is fine here, since the real work
- * is done in the FreeBSD mmap().
- */
-
- error = fget(td, bsd_args.fd,
- cap_rights_init(&rights, CAP_MMAP), &fp);
- if (error != 0)
- return (error);
- if (fp->f_type != DTYPE_VNODE) {
- fdrop(fp, td);
- return (EINVAL);
- }
-
- /* Linux mmap() just fails for O_WRONLY files */
- if (!(fp->f_flag & FREAD)) {
- fdrop(fp, td);
- return (EACCES);
- }
-
- fdrop(fp, td);
- }
-
- if (flags & LINUX_MAP_GROWSDOWN) {
- /*
- * The Linux MAP_GROWSDOWN option does not limit auto
- * growth of the region. Linux mmap with this option
- * takes as addr the inital BOS, and as len, the initial
- * region size. It can then grow down from addr without
- * limit. However, linux threads has an implicit internal
- * limit to stack size of STACK_SIZE. Its just not
- * enforced explicitly in linux. But, here we impose
- * a limit of (STACK_SIZE - GUARD_SIZE) on the stack
- * region, since we can do this with our mmap.
- *
- * Our mmap with MAP_STACK takes addr as the maximum
- * downsize limit on BOS, and as len the max size of
- * the region. It them maps the top SGROWSIZ bytes,
- * and auto grows the region down, up to the limit
- * in addr.
- *
- * If we don't use the MAP_STACK option, the effect
- * of this code is to allocate a stack region of a
- * fixed size of (STACK_SIZE - GUARD_SIZE).
- */
-
- if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) {
- /*
- * Some linux apps will attempt to mmap
- * thread stacks near the top of their
- * address space. If their TOS is greater
- * than vm_maxsaddr, vm_map_growstack()
- * will confuse the thread stack with the
- * process stack and deliver a SEGV if they
- * attempt to grow the thread stack past their
- * current stacksize rlimit. To avoid this,
- * adjust vm_maxsaddr upwards to reflect
- * the current stacksize rlimit rather
- * than the maximum possible stacksize.
- * It would be better to adjust the
- * mmap'ed region, but some apps do not check
- * mmap's return value.
- */
- PROC_LOCK(p);
- p->p_vmspace->vm_maxsaddr = (char *)USRSTACK -
- lim_cur(p, RLIMIT_STACK);
- PROC_UNLOCK(p);
- }
-
- /*
- * This gives us our maximum stack size and a new BOS.
- * If we're using VM_STACK, then mmap will just map
- * the top SGROWSIZ bytes, and let the stack grow down
- * to the limit at BOS. If we're not using VM_STACK
- * we map the full stack, since we don't have a way
- * to autogrow it.
- */
- if (len > STACK_SIZE - GUARD_SIZE) {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr) -
- (STACK_SIZE - GUARD_SIZE - len);
- bsd_args.len = STACK_SIZE - GUARD_SIZE;
- }
- } else {
- bsd_args.addr = (caddr_t)PTRIN(addr);
- bsd_args.len = len;
- }
- bsd_args.pos = pos;
-
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n",
- __func__,
- (void *)bsd_args.addr, bsd_args.len, bsd_args.prot,
- bsd_args.flags, bsd_args.fd, (int)bsd_args.pos);
-#endif
- error = sys_mmap(td, &bsd_args);
-#ifdef DEBUG
- if (ldebug(mmap))
- printf("-> %s() return: 0x%x (0x%08x)\n",
- __func__, error, (u_int)td->td_retval[0]);
-#endif
- return (error);
-}
-
int
linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
{
- struct mprotect_args bsd_args;
-
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (sys_mprotect(td, &bsd_args));
+
+ return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot));
}
int
diff --git a/sys/i386/linux/linux_proto.h b/sys/i386/linux/linux_proto.h
index 3717c9f..1b848cd 100644
--- a/sys/i386/linux/linux_proto.h
+++ b/sys/i386/linux/linux_proto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#ifndef _LINUX_SYSPROTO_H_
@@ -435,7 +435,7 @@ struct linux_sysfs_args {
char arg2_l_[PADL_(l_ulong)]; l_ulong arg2; char arg2_r_[PADR_(l_ulong)];
};
struct linux_personality_args {
- char per_l_[PADL_(l_ulong)]; l_ulong per; char per_r_[PADR_(l_ulong)];
+ char per_l_[PADL_(l_uint)]; l_uint per; char per_r_[PADR_(l_uint)];
};
struct linux_setfsuid16_args {
char uid_l_[PADL_(l_uid16_t)]; l_uid16_t uid; char uid_r_[PADR_(l_uid16_t)];
diff --git a/sys/i386/linux/linux_syscall.h b/sys/i386/linux/linux_syscall.h
index e9d8ef7..aa9e4a0 100644
--- a/sys/i386/linux/linux_syscall.h
+++ b/sys/i386/linux/linux_syscall.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#define LINUX_SYS_linux_exit 1
diff --git a/sys/i386/linux/linux_syscalls.c b/sys/i386/linux/linux_syscalls.c
index 54d8423..8b43796 100644
--- a/sys/i386/linux/linux_syscalls.c
+++ b/sys/i386/linux/linux_syscalls.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
const char *linux_syscallnames[] = {
diff --git a/sys/i386/linux/linux_sysent.c b/sys/i386/linux/linux_sysent.c
index ce765b5..7218375 100644
--- a/sys/i386/linux/linux_sysent.c
+++ b/sys/i386/linux/linux_sysent.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 297300 2016-03-27 06:10:51Z dchagin
+ * created from FreeBSD: stable/10/sys/i386/linux/syscalls.master 302962 2016-07-17 15:07:33Z dchagin
*/
#include <sys/param.h>
diff --git a/sys/i386/linux/linux_systrace_args.c b/sys/i386/linux/linux_systrace_args.c
index f02f34f..0a0bf42 100644
--- a/sys/i386/linux/linux_systrace_args.c
+++ b/sys/i386/linux/linux_systrace_args.c
@@ -948,7 +948,7 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
/* linux_personality */
case 136: {
struct linux_personality_args *p = params;
- iarg[0] = p->per; /* l_ulong */
+ iarg[0] = p->per; /* l_uint */
*n_args = 1;
break;
}
@@ -3849,7 +3849,7 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
case 136:
switch(ndx) {
case 0:
- p = "l_ulong";
+ p = "l_uint";
break;
default:
break;
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index 7ec3154..5899adb 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -240,7 +240,7 @@
134 AUE_BDFLUSH STD { int linux_bdflush(void); }
135 AUE_NULL STD { int linux_sysfs(l_int option, \
l_ulong arg1, l_ulong arg2); }
-136 AUE_PERSONALITY STD { int linux_personality(l_ulong per); }
+136 AUE_PERSONALITY STD { int linux_personality(l_uint per); }
137 AUE_NULL UNIMPL afs_syscall
138 AUE_SETFSUID STD { int linux_setfsuid16(l_uid16_t uid); }
139 AUE_SETFSGID STD { int linux_setfsgid16(l_gid16_t gid); }
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index fb0b313..908f99a 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -658,8 +658,15 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line)
i = 0;
tid = (uintptr_t)curthread;
- if (SCHEDULER_STOPPED())
+ if (SCHEDULER_STOPPED()) {
+ /*
+ * Ensure that spinlock sections are balanced even when the
+ * scheduler is stopped, since we may otherwise inadvertently
+ * re-enable interrupts while dumping core.
+ */
+ spinlock_enter();
return;
+ }
#ifdef KDTRACE_HOOKS
spin_time -= lockstat_nsecs(&td->td_lock->lock_object);
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index f5f522e..496b852 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -1315,7 +1315,8 @@ dounmount(struct mount *mp, int flags, struct thread *td)
*/
if ((flags & MNT_FORCE) &&
VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
- if (mp->mnt_vnodecovered != NULL)
+ if (mp->mnt_vnodecovered != NULL &&
+ (mp->mnt_flag & MNT_IGNORE) == 0)
mountcheckdirs(fsrootvp, mp->mnt_vnodecovered);
if (fsrootvp == rootvnode) {
vrele(rootvnode);
@@ -1336,7 +1337,8 @@ dounmount(struct mount *mp, int flags, struct thread *td)
if (error && error != ENXIO) {
if ((flags & MNT_FORCE) &&
VFS_ROOT(mp, LK_EXCLUSIVE, &fsrootvp) == 0) {
- if (mp->mnt_vnodecovered != NULL)
+ if (mp->mnt_vnodecovered != NULL &&
+ (mp->mnt_flag & MNT_IGNORE) == 0)
mountcheckdirs(mp->mnt_vnodecovered, fsrootvp);
if (rootvnode == NULL) {
rootvnode = fsrootvp;
diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile
index 0e70ce6..124227e 100644
--- a/sys/modules/linux/Makefile
+++ b/sys/modules/linux/Makefile
@@ -30,7 +30,7 @@ SRCS+= opt_apic.h
OBJS= ${VDSO}.so
.if ${MACHINE_CPUARCH} == "i386"
-SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c \
+SRCS+= linux_ptrace.c imgact_linux.c linux_util.c linux_mib.c linux_mmap.c \
linux_emul.c opt_cpu.h linux.c
.endif
diff --git a/sys/modules/linux_common/Makefile b/sys/modules/linux_common/Makefile
index 91449f7..2301796 100644
--- a/sys/modules/linux_common/Makefile
+++ b/sys/modules/linux_common/Makefile
@@ -3,7 +3,7 @@
.PATH: ${.CURDIR}/../../compat/linux
KMOD= linux_common
-SRCS= linux_common.c linux_mib.c linux_util.c linux_emul.c \
+SRCS= linux_common.c linux_mib.c linux_mmap.c linux_util.c linux_emul.c \
linux.c opt_compat.h device_if.h vnode_if.h bus_if.h
EXPORT_SYMS=
diff --git a/sys/net/mppcc.c b/sys/net/mppcc.c
index 01ce3ff..293ce46 100644
--- a/sys/net/mppcc.c
+++ b/sys/net/mppcc.c
@@ -233,7 +233,7 @@ int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, ch
putbits16(*dst, 0xc000|(off-320), 16, &olen, &l);
} else { /* NOTREACHED */
rtn &= ~MPPC_OK;
- return rtn;
+ return (rtn);
}
/* Encode length of match. */
diff --git a/sys/net/mppcd.c b/sys/net/mppcd.c
index c1730e5..d8e663c 100644
--- a/sys/net/mppcd.c
+++ b/sys/net/mppcd.c
@@ -170,7 +170,7 @@ int MPPC_Decompress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt,
rtn &= ~MPPC_OK;
return (rtn);
}
- } else { /* NOTREACHED */
+ } else { /* This shouldn't happen. */
rtn &= ~MPPC_OK;
return (rtn);
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 0398b03..03e8c72 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -683,7 +683,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
* connection when the SYN arrived. If we can't create
* the connection, abort it.
*/
- so = sonewconn(lso, SS_ISCONNECTED);
+ so = sonewconn(lso, 0);
if (so == NULL) {
/*
* Drop the connection; we will either send a RST or
@@ -922,6 +922,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
INP_WUNLOCK(inp);
+ soisconnected(so);
+
TCPSTAT_INC(tcps_accepts);
return (so);
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 1ccbf9a..8d7e9a0 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -643,13 +643,6 @@ out:
/*
* Accept a connection. Essentially all the work is done at higher levels;
* just return the address of the peer, storing through addr.
- *
- * The rationale for acquiring the tcbinfo lock here is somewhat complicated,
- * and is described in detail in the commit log entry for r175612. Acquiring
- * it delays an accept(2) racing with sonewconn(), which inserts the socket
- * before the inpcb address/port fields are initialized. A better fix would
- * prevent the socket from being placed in the listen queue until all fields
- * are fully initialized.
*/
static int
tcp_usr_accept(struct socket *so, struct sockaddr **nam)
@@ -666,7 +659,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
- INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -686,7 +678,6 @@ tcp_usr_accept(struct socket *so, struct sockaddr **nam)
out:
TCPDEBUG2(PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
if (error == 0)
*nam = in_sockaddr(port, &addr);
return error;
diff --git a/sys/netpfil/ipfw/dn_sched_fq_pie.c b/sys/netpfil/ipfw/dn_sched_fq_pie.c
index 2883cf8..bfcd6c5 100644
--- a/sys/netpfil/ipfw/dn_sched_fq_pie.c
+++ b/sys/netpfil/ipfw/dn_sched_fq_pie.c
@@ -111,7 +111,7 @@ struct fq_pie_flow {
int deficit;
int active; /* 1: flow is active (in a list) */
struct pie_status pst; /* pie status variables */
- struct fq_pie_si *psi; /* parent scheduler instance */
+ struct fq_pie_si_extra *psi_extra;
STAILQ_ENTRY(fq_pie_flow) flowchain;
};
@@ -120,23 +120,30 @@ struct fq_pie_schk {
struct dn_sch_fq_pie_parms cfg;
};
+
+/* fq_pie scheduler instance extra state vars.
+ * The purpose of separation this structure is to preserve number of active
+ * sub-queues and the flows array pointer even after the scheduler instance
+ * is destroyed.
+ * Preserving these varaiables allows freeing the allocated memory by
+ * fqpie_callout_cleanup() independently from fq_pie_free_sched().
+ */
+struct fq_pie_si_extra {
+ uint32_t nr_active_q; /* number of active queues */
+ struct fq_pie_flow *flows; /* array of flows (queues) */
+ };
+
/* fq_pie scheduler instance */
struct fq_pie_si {
- struct dn_sch_inst _si; /* standard scheduler instance */
+ struct dn_sch_inst _si; /* standard scheduler instance. SHOULD BE FIRST */
struct dn_queue main_q; /* main queue is after si directly */
- uint32_t nr_active_q;
- struct fq_pie_flow *flows; /* array of flows (queues) */
uint32_t perturbation; /* random value */
struct fq_pie_list newflows; /* list of new queues */
struct fq_pie_list oldflows; /* list of old queues */
+ struct fq_pie_si_extra *si_extra; /* extra state vars*/
};
-struct mem_to_free {
- void *mem_flows;
- void *mem_callout;
-};
-static struct mtx freemem_mtx;
static struct dn_alg fq_pie_desc;
/* Default FQ-PIE parameters including PIE */
@@ -371,22 +378,6 @@ fq_calculate_drop_prob(void *x)
int64_t p, prob, oldprob;
aqm_time_t now;
- /* dealing with race condition */
- if (callout_pending(&pst->aqm_pie_callout)) {
- /* callout was reset */
- mtx_unlock(&pst->lock_mtx);
- return;
- }
-
- if (!callout_active(&pst->aqm_pie_callout)) {
- /* callout was stopped */
- mtx_unlock(&pst->lock_mtx);
- mtx_destroy(&pst->lock_mtx);
- q->psi->nr_active_q--;
- return;
- }
- callout_deactivate(&pst->aqm_pie_callout);
-
now = AQM_UNOW;
pprms = pst->parms;
prob = pst->drop_prob;
@@ -524,20 +515,17 @@ fq_deactivate_pie(struct pie_status *pst)
* Initialize PIE for sub-queue 'q'
*/
static int
-pie_init(struct fq_pie_flow *q)
+pie_init(struct fq_pie_flow *q, struct fq_pie_schk *fqpie_schk)
{
struct pie_status *pst=&q->pst;
struct dn_aqm_pie_parms *pprms = pst->parms;
- struct fq_pie_schk *fqpie_schk;
-
- fqpie_schk = (struct fq_pie_schk *)(q->psi->_si.sched+1);
- int err = 0;
+ int err = 0;
if (!pprms){
D("AQM_PIE is not configured");
err = EINVAL;
} else {
- q->psi->nr_active_q++;
+ q->psi_extra->nr_active_q++;
/* For speed optimization, we caculate 1/3 queue size once here */
// XXX limit divided by number of queues divided by 3 ???
@@ -553,8 +541,36 @@ pie_init(struct fq_pie_flow *q)
}
/*
+ * callout function to destroy PIE lock, and free fq_pie flows and fq_pie si
+ * extra memory when number of active sub-queues reaches zero.
+ * 'x' is a fq_pie_flow to be destroyed
+ */
+static void
+fqpie_callout_cleanup(void *x)
+{
+ struct fq_pie_flow *q = x;
+ struct pie_status *pst = &q->pst;
+ struct fq_pie_si_extra *psi_extra;
+
+ mtx_unlock(&pst->lock_mtx);
+ mtx_destroy(&pst->lock_mtx);
+ psi_extra = q->psi_extra;
+
+ DN_BH_WLOCK();
+ psi_extra->nr_active_q--;
+
+ /* when all sub-queues are destroyed, free flows fq_pie extra vars memory */
+ if (!psi_extra->nr_active_q) {
+ free(psi_extra->flows, M_DUMMYNET);
+ free(psi_extra, M_DUMMYNET);
+ fq_pie_desc.ref_count--;
+ }
+ DN_BH_WUNLOCK();
+}
+
+/*
* Clean up PIE status for sub-queue 'q'
- * Stop callout timer and destroy mtx
+ * Stop callout timer and destroy mtx using fqpie_callout_cleanup() callout.
*/
static int
pie_cleanup(struct fq_pie_flow *q)
@@ -562,14 +578,9 @@ pie_cleanup(struct fq_pie_flow *q)
struct pie_status *pst = &q->pst;
mtx_lock(&pst->lock_mtx);
- if (callout_stop(&pst->aqm_pie_callout) || !(pst->sflags & PIE_ACTIVE)) {
- mtx_unlock(&pst->lock_mtx);
- mtx_destroy(&pst->lock_mtx);
- q->psi->nr_active_q--;
- } else {
- mtx_unlock(&pst->lock_mtx);
- return EBUSY;
- }
+ callout_reset_sbt(&pst->aqm_pie_callout,
+ SBT_1US, 0, fqpie_callout_cleanup, q, 0);
+ mtx_unlock(&pst->lock_mtx);
return 0;
}
@@ -831,10 +842,12 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
struct fq_pie_schk *schk;
struct dn_sch_fq_pie_parms *param;
struct dn_queue *mainq;
+ struct fq_pie_flow *flows;
int idx, drop, i, maxidx;
mainq = (struct dn_queue *)(_si + 1);
si = (struct fq_pie_si *)_si;
+ flows = si->si_extra->flows;
schk = (struct fq_pie_schk *)(si->_si.sched+1);
param = &schk->cfg;
@@ -844,7 +857,7 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
/* enqueue packet into appropriate queue using PIE AQM.
* Note: 'pie_enqueue' function returns 1 only when it unable to
* add timestamp to packet (no limit check)*/
- drop = pie_enqueue(&si->flows[idx], m, si);
+ drop = pie_enqueue(&flows[idx], m, si);
/* pie unable to timestamp a packet */
if (drop)
@@ -853,11 +866,11 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
/* If the flow (sub-queue) is not active ,then add it to tail of
* new flows list, initialize and activate it.
*/
- if (!si->flows[idx].active) {
- STAILQ_INSERT_TAIL(&si->newflows, &si->flows[idx], flowchain);
- si->flows[idx].deficit = param->quantum;
- fq_activate_pie(&si->flows[idx]);
- si->flows[idx].active = 1;
+ if (!flows[idx].active) {
+ STAILQ_INSERT_TAIL(&si->newflows, &flows[idx], flowchain);
+ flows[idx].deficit = param->quantum;
+ fq_activate_pie(&flows[idx]);
+ flows[idx].active = 1;
}
/* check the limit for all queues and remove a packet from the
@@ -866,15 +879,15 @@ fq_pie_enqueue(struct dn_sch_inst *_si, struct dn_queue *_q,
if (mainq->ni.length > schk->cfg.limit) {
/* find first active flow */
for (maxidx = 0; maxidx < schk->cfg.flows_cnt; maxidx++)
- if (si->flows[maxidx].active)
+ if (flows[maxidx].active)
break;
if (maxidx < schk->cfg.flows_cnt) {
/* find the largest sub- queue */
for (i = maxidx + 1; i < schk->cfg.flows_cnt; i++)
- if (si->flows[i].active && si->flows[i].stats.length >
- si->flows[maxidx].stats.length)
+ if (flows[i].active && flows[i].stats.length >
+ flows[maxidx].stats.length)
maxidx = i;
- pie_drop_head(&si->flows[maxidx], si);
+ pie_drop_head(&flows[maxidx], si);
drop = 1;
}
}
@@ -974,12 +987,13 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
struct fq_pie_si *si;
struct dn_queue *q;
struct fq_pie_schk *schk;
+ struct fq_pie_flow *flows;
int i;
si = (struct fq_pie_si *)_si;
schk = (struct fq_pie_schk *)(_si->sched+1);
- if(si->flows) {
+ if(si->si_extra) {
D("si already configured!");
return 0;
}
@@ -990,17 +1004,27 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
q->_si = _si;
q->fs = _si->sched->fs;
+ /* allocate memory for scheduler instance extra vars */
+ si->si_extra = malloc(sizeof(struct fq_pie_si_extra),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si->si_extra == NULL) {
+ D("cannot allocate memory for fq_pie si extra vars");
+ return ENOMEM ;
+ }
/* allocate memory for flows array */
- si->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
+ si->si_extra->flows = malloc(schk->cfg.flows_cnt * sizeof(struct fq_pie_flow),
M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (si->flows == NULL) {
- D("cannot allocate memory for fq_pie configuration parameters");
+ flows = si->si_extra->flows;
+ if (flows == NULL) {
+ free(si->si_extra, M_DUMMYNET);
+ si->si_extra = NULL;
+ D("cannot allocate memory for fq_pie flows");
return ENOMEM ;
}
/* init perturbation for this si */
si->perturbation = random();
- si->nr_active_q = 0;
+ si->si_extra->nr_active_q = 0;
/* init the old and new flows lists */
STAILQ_INIT(&si->newflows);
@@ -1008,45 +1032,16 @@ fq_pie_new_sched(struct dn_sch_inst *_si)
/* init the flows (sub-queues) */
for (i = 0; i < schk->cfg.flows_cnt; i++) {
- si->flows[i].pst.parms = &schk->cfg.pcfg;
- si->flows[i].psi = si;
- pie_init(&si->flows[i]);
- }
-
- /* init mtx lock and callout function for free memory */
- if (!fq_pie_desc.ref_count) {
- mtx_init(&freemem_mtx, "mtx_pie", NULL, MTX_DEF);
+ flows[i].pst.parms = &schk->cfg.pcfg;
+ flows[i].psi_extra = si->si_extra;
+ pie_init(&flows[i], schk);
}
- mtx_lock(&freemem_mtx);
fq_pie_desc.ref_count++;
- mtx_unlock(&freemem_mtx);
return 0;
}
-/*
- * Free FQ-PIE flows memory callout function.
- * This function is scheduled when a flow or more still active and
- * the scheduer is about to be destroyed, to prevent memory leak.
- */
-static void
-free_flows(void *_mem)
-{
- struct mem_to_free *mem = _mem;
-
- free(mem->mem_flows, M_DUMMYNET);
- free(mem->mem_callout, M_DUMMYNET);
- free(_mem, M_DUMMYNET);
-
- fq_pie_desc.ref_count--;
- if (!fq_pie_desc.ref_count) {
- mtx_unlock(&freemem_mtx);
- mtx_destroy(&freemem_mtx);
- } else
- mtx_unlock(&freemem_mtx);
- //D("mem freed ok!");
-}
/*
* Free fq_pie scheduler instance.
@@ -1056,61 +1051,17 @@ fq_pie_free_sched(struct dn_sch_inst *_si)
{
struct fq_pie_si *si;
struct fq_pie_schk *schk;
+ struct fq_pie_flow *flows;
int i;
si = (struct fq_pie_si *)_si;
schk = (struct fq_pie_schk *)(_si->sched+1);
-
+ flows = si->si_extra->flows;
for (i = 0; i < schk->cfg.flows_cnt; i++) {
- pie_cleanup(&si->flows[i]);
- }
-
- /* if there are still some queues have a callout going to start,
- * we cannot free flows memory. If we do so, a panic can happen
- * as prob calculate callout function uses flows memory.
- */
- if (!si->nr_active_q) {
- /* free the flows array */
- free(si->flows , M_DUMMYNET);
- si->flows = NULL;
- mtx_lock(&freemem_mtx);
- fq_pie_desc.ref_count--;
- if (!fq_pie_desc.ref_count) {
- mtx_unlock(&freemem_mtx);
- mtx_destroy(&freemem_mtx);
- } else
- mtx_unlock(&freemem_mtx);
- //D("ok!");
- return 0;
- } else {
- /* memory leak happens here. So, we register a callout function to free
- * flows memory later.
- */
- D("unable to stop all fq_pie sub-queues!");
- mtx_lock(&freemem_mtx);
-
- struct callout *mem_callout;
- struct mem_to_free *mem;
-
- mem = malloc(sizeof(*mem), M_DUMMYNET,
- M_NOWAIT | M_ZERO);
- mem_callout = malloc(sizeof(*mem_callout), M_DUMMYNET,
- M_NOWAIT | M_ZERO);
-
- callout_init_mtx(mem_callout, &freemem_mtx,
- CALLOUT_RETURNUNLOCKED);
-
- mem->mem_flows = si->flows;
- mem->mem_callout = mem_callout;
- callout_reset_sbt(mem_callout,
- (uint64_t)(si->flows[0].pst.parms->tupdate + 1000) * SBT_1US,
- 0, free_flows, mem, 0);
-
- si->flows = NULL;
- mtx_unlock(&freemem_mtx);
-
- return EBUSY;
+ pie_cleanup(&flows[i]);
}
+ si->si_extra = NULL;
+ return 0;
}
/*
diff --git a/sys/ofed/include/linux/linux_idr.c b/sys/ofed/include/linux/linux_idr.c
index 8eb7949..c47903e 100644
--- a/sys/ofed/include/linux/linux_idr.c
+++ b/sys/ofed/include/linux/linux_idr.c
@@ -267,7 +267,8 @@ idr_get(struct idr *idr)
return (il);
}
il = malloc(sizeof(*il), M_IDR, M_ZERO | M_NOWAIT);
- bitmap_fill(&il->bitmap, IDR_SIZE);
+ if (il != NULL)
+ bitmap_fill(&il->bitmap, IDR_SIZE);
return (il);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index c250c5d..7010580 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -981,8 +981,6 @@ static int
vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
vm_page_t mpred)
{
- vm_pindex_t sidx;
- vm_object_t sobj;
vm_page_t msucc;
VM_OBJECT_ASSERT_WLOCKED(object);
@@ -1003,8 +1001,6 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
/*
* Record the object/offset pair in this page
*/
- sobj = m->object;
- sidx = m->pindex;
m->object = object;
m->pindex = pindex;
@@ -1012,8 +1008,8 @@ vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
* Now link into the object's ordered list of backed pages.
*/
if (vm_radix_insert(&object->rtree, m)) {
- m->object = sobj;
- m->pindex = sidx;
+ m->object = NULL;
+ m->pindex = 0;
return (1);
}
vm_page_insert_radixdone(m, object, mpred);
@@ -1640,6 +1636,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
}
m->object = NULL;
m->oflags = VPO_UNMANAGED;
+ m->busy_lock = VPB_UNBUSIED;
vm_page_free(m);
return (NULL);
}
@@ -1842,6 +1839,7 @@ retry:
m->object = NULL;
m->oflags |= VPO_UNMANAGED;
}
+ m->busy_lock = VPB_UNBUSIED;
vm_page_free(m);
}
return (NULL);
OpenPOWER on IntegriCloud