summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--contrib/openbsm/etc/audit_event7
-rw-r--r--lib/libc/include/compat.h2
-rw-r--r--lib/libc/sys/Makefile.inc8
-rw-r--r--lib/libc/sys/Symbol.map7
-rw-r--r--lib/libc/sys/cap_enter.238
-rw-r--r--lib/libc/sys/cap_fcntls_limit.2127
-rw-r--r--lib/libc/sys/cap_ioctls_limit.2158
-rw-r--r--lib/libc/sys/cap_rights_limit.2 (renamed from lib/libc/sys/cap_new.2)382
-rw-r--r--lib/libc/sys/dup.26
-rw-r--r--lib/libprocstat/libprocstat.c1
-rw-r--r--lib/libprocstat/libprocstat.h1
-rw-r--r--sys/bsm/audit_kevents.h7
-rw-r--r--sys/cddl/compat/opensolaris/sys/file.h8
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c2
-rw-r--r--sys/compat/freebsd32/syscalls.master12
-rw-r--r--sys/compat/linux/linux_file.c9
-rw-r--r--sys/compat/svr4/svr4_fcntl.c6
-rw-r--r--sys/compat/svr4/svr4_filio.c10
-rw-r--r--sys/compat/svr4/svr4_misc.c7
-rw-r--r--sys/compat/svr4/svr4_stream.c4
-rw-r--r--sys/dev/iscsi/initiator/iscsi.c4
-rw-r--r--sys/fs/fdescfs/fdesc_vfsops.c2
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c2
-rw-r--r--sys/fs/nfs/nfsdport.h2
-rw-r--r--sys/fs/nfsclient/nfs_clport.c2
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c8
-rw-r--r--sys/i386/ibcs2/ibcs2_misc.c7
-rw-r--r--sys/kern/capabilities.conf18
-rw-r--r--sys/kern/kern_descrip.c663
-rw-r--r--sys/kern/kern_exec.c3
-rw-r--r--sys/kern/kern_exit.c2
-rw-r--r--sys/kern/kern_fork.c2
-rw-r--r--sys/kern/sys_capability.c645
-rw-r--r--sys/kern/sys_generic.c131
-rw-r--r--sys/kern/syscalls.master12
-rw-r--r--sys/kern/tty.c16
-rw-r--r--sys/kern/uipc_mqueue.c9
-rw-r--r--sys/kern/uipc_sem.c4
-rw-r--r--sys/kern/uipc_shm.c4
-rw-r--r--sys/kern/uipc_syscalls.c46
-rw-r--r--sys/kern/uipc_usrreq.c89
-rw-r--r--sys/kern/vfs_aio.c6
-rw-r--r--sys/kern/vfs_lookup.c13
-rw-r--r--sys/kern/vfs_syscalls.c88
-rw-r--r--sys/netsmb/smb_dev.c4
-rw-r--r--sys/nfsserver/nfs_srvkrpc.c3
-rw-r--r--sys/ofed/include/linux/file.h9
-rw-r--r--sys/security/audit/audit.h7
-rw-r--r--sys/security/audit/audit_arg.c13
-rw-r--r--sys/security/audit/audit_bsm.c15
-rw-r--r--sys/security/audit/audit_private.h2
-rw-r--r--sys/sys/capability.h197
-rw-r--r--sys/sys/file.h7
-rw-r--r--sys/sys/filedesc.h45
-rw-r--r--sys/sys/namei.h5
-rw-r--r--sys/sys/user.h4
-rw-r--r--sys/vm/vm_mmap.c6
-rw-r--r--usr.bin/kdump/kdump.c9
-rw-r--r--usr.bin/kdump/mksubr1
-rw-r--r--usr.bin/procstat/procstat_files.c51
61 files changed, 1840 insertions, 1124 deletions
diff --git a/contrib/openbsm/etc/audit_event b/contrib/openbsm/etc/audit_event
index 0350389..f82841a 100644
--- a/contrib/openbsm/etc/audit_event
+++ b/contrib/openbsm/etc/audit_event
@@ -548,7 +548,7 @@
43184:AUE_OPENAT:openat(2) - attr only:fa
43185:AUE_POSIX_OPENPT:posix_openpt(2):ip
43186:AUE_CAP_NEW:cap_new(2):fm
-43187:AUE_CAP_GETRIGHTS:cap_getrights(2):fm
+43187:AUE_CAP_RIGHTS_GET:cap_rights_get(2):fm
43188:AUE_CAP_ENTER:cap_enter(2):pc
43189:AUE_CAP_GETMODE:cap_getmode(2):pc
43190:AUE_POSIX_SPAWN:posix_spawn(2):pc
@@ -563,6 +563,11 @@
43199:AUE_PDGETPID:pdgetpid(2):pc
43200:AUE_PDWAIT:pdwait(2):pc
43201:AUE_WAIT6:wait6(2):pc
+43202:AUE_CAP_RIGHTS_LIMIT:cap_rights_limit(2):fm
+43203:AUE_CAP_IOCTLS_LIMIT:cap_ioctls_limit(2):fm
+43204:AUE_CAP_IOCTLS_GET:cap_ioctls_get(2):fm
+43205:AUE_CAP_FCNTLS_LIMIT:cap_fcntls_limit(2):fm
+43206:AUE_CAP_FCNTLS_GET:cap_fcntls_get(2):fm
#
# Solaris userspace events.
#
diff --git a/lib/libc/include/compat.h b/lib/libc/include/compat.h
index 7694540..3739fe1 100644
--- a/lib/libc/include/compat.h
+++ b/lib/libc/include/compat.h
@@ -42,6 +42,8 @@ __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0);
__sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0);
__sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0);
+__sym_compat(cap_getrights, cap_rights_get, FBSD_1.2);
+
#undef __sym_compat
#endif /* __LIBC_COMPAT_H__ */
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index 9f216dc..03c0090 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -93,7 +93,9 @@ MAN+= abort2.2 \
bind.2 \
brk.2 \
cap_enter.2 \
- cap_new.2 \
+ cap_fcntls_limit.2 \
+ cap_ioctls_limit.2 \
+ cap_rights_limit.2 \
chdir.2 \
chflags.2 \
chmod.2 \
@@ -270,7 +272,9 @@ MLINKS+=access.2 eaccess.2 \
access.2 faccessat.2
MLINKS+=brk.2 sbrk.2
MLINKS+=cap_enter.2 cap_getmode.2
-MLINKS+=cap_new.2 cap_getrights.2
+MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2
+MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2
+MLINKS+=cap_rights_limit.2 cap_rights_get.2
MLINKS+=chdir.2 fchdir.2
MLINKS+=chflags.2 fchflags.2 \
chflags.2 lchflags.2
diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index d126255..7738e46 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -364,7 +364,6 @@ FBSD_1.2 {
cap_enter;
cap_getmode;
cap_new;
- cap_getrights;
getloginclass;
pdfork;
pdgetpid;
@@ -379,6 +378,12 @@ FBSD_1.2 {
};
FBSD_1.3 {
+ cap_fcntls_get;
+ cap_fcntls_limit;
+ cap_ioctls_get;
+ cap_ioctls_limit;
+ cap_rights_get;
+ cap_rights_limit;
cap_sandboxed;
clock_getcpuclockid2;
ffclock_getcounter;
diff --git a/lib/libc/sys/cap_enter.2 b/lib/libc/sys/cap_enter.2
index c3cefe8..3369669 100644
--- a/lib/libc/sys/cap_enter.2
+++ b/lib/libc/sys/cap_enter.2
@@ -58,8 +58,10 @@ or
.Xr pdfork 2
will be placed in capability mode from inception.
.Pp
-When combined with capabilities created with
-.Xr cap_new 2 ,
+When combined with
+.Xr cap_rights_limit 2 ,
+.Xr cap_ioctls_limit 2 ,
+.Xr cap_fcntls_limit 2 ,
.Fn cap_enter
may be used to create kernel-enforced sandboxes in which
appropriately-crafted applications or application components may be run.
@@ -71,11 +73,6 @@ sandbox.
Creating effective process sandboxes is a tricky process that involves
identifying the least possible rights required by the process and then
passing those rights into the process in a safe manner.
-See the CAVEAT
-section of
-.Xr cap_new 2
-for why this is particularly tricky with UNIX file descriptors as the
-canonical representation of a right.
Consumers of
.Fn cap_enter
should also be aware of other inherited rights, such as access to VM
@@ -87,8 +84,33 @@ to create a runtime environment inside the sandbox that has as few implicitly
acquired rights as possible.
.Sh RETURN VALUES
.Rv -std cap_enter cap_getmode
+.Sh ERRORS
+The
+.Fn cap_enter
+and
+.Fn cap_getmode
+system calls
+will fail if:
+.Bl -tag -width Er
+.It Bq Er ENOSYS
+The kernel is compiled without:
+.Pp
+.Cd "options CAPABILITY_MODE"
+.El
+.Pp
+The
+.Fn cap_getmode
+system call may also return the following error:
+.Bl -tag -width Er
+.It Bq Er EFAULT
+Pointer
+.Fa modep
+points outside the process's allocated address space.
+.El
.Sh SEE ALSO
-.Xr cap_new 2 ,
+.Xr cap_fcntls_limit 2 ,
+.Xr cap_ioctls_limit 2 ,
+.Xr cap_rights_limit 2 ,
.Xr fexecve 2 ,
.Xr cap_sandboxed 3 ,
.Xr capsicum 4
diff --git a/lib/libc/sys/cap_fcntls_limit.2 b/lib/libc/sys/cap_fcntls_limit.2
new file mode 100644
index 0000000..8fa7463
--- /dev/null
+++ b/lib/libc/sys/cap_fcntls_limit.2
@@ -0,0 +1,127 @@
+.\"
+.\" Copyright (c) 2012 The FreeBSD Foundation
+.\" All rights reserved.
+.\"
+.\" This documentation was written by Pawel Jakub Dawidek under sponsorship
+.\" the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 20, 2012
+.Dt CAP_FCNTLS_LIMIT 2
+.Os
+.Sh NAME
+.Nm cap_fcntls_limit ,
+.Nm cap_fcntls_get
+.Nd manage allowed fcntl commands
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/capability.h
+.Ft int
+.Fn cap_fcntls_limit "int fd" "uint32_t fcntlrights"
+.Ft int
+.Fn cap_fcntls_get "int fd" "uint32_t *fcntlrightsp"
+.Sh DESCRIPTION
+If a file descriptor is granted the
+.Dv CAP_FCNTL
+capability right, the list of allowed
+.Xr fcntl 2
+commands can be selectively reduced (but never expanded) with the
+.Fn cap_fcntls_limit
+system call.
+.Pp
+A bitmask of allowed fcntls commands for a given file descriptor can be obtained
+with the
+.Fn cap_fcntls_get
+system call.
+.Sh FLAGS
+The following flags may be specified in the
+.Fa fcntlrights
+argument or returned in the
+.Fa fcntlrightsp
+argument:
+.Bl -tag -width CAP_FCNTL_GETOWN
+.It Dv CAP_FCNTL_GETFL
+Permit
+.Dv F_GETFL
+command.
+.It Dv CAP_FCNTL_SETFL
+Permit
+.Dv F_SETFL
+command.
+.It Dv CAP_FCNTL_GETOWN
+Permit
+.Dv F_GETOWN
+command.
+.It Dv CAP_FCNTL_SETOWN
+Permit
+.Dv F_SETOWN
+command.
+.El
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+.Fn cap_fcntls_limit
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid descriptor.
+.It Bq Er EINVAL
+An invalid flag has been passed in
+.Fa fcntlrights .
+.It Bq Er ENOTCAPABLE
+.Fa fcntlrights
+would expand the list of allowed
+.Xr fcntl 2
+commands.
+.El
+.Pp
+.Fn cap_fcntls_get
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid descriptor.
+.It Bq Er EFAULT
+The
+.Fa fcntlrightsp
+argument points at an invalid address.
+.El
+.Sh SEE ALSO
+.Xr cap_ioctls_limit 2 ,
+.Xr cap_rights_limit 2 ,
+.Xr fcntl 2
+.Sh HISTORY
+Support for capabilities and capabilities mode was developed as part of the
+.Tn TrustedBSD
+Project.
+.Pp
+.Sh AUTHORS
+This function was created by
+.An Pawel Jakub Dawidek Aq pawel@dawidek.net
+under sponsorship of the FreeBSD Foundation.
diff --git a/lib/libc/sys/cap_ioctls_limit.2 b/lib/libc/sys/cap_ioctls_limit.2
new file mode 100644
index 0000000..5eca18c
--- /dev/null
+++ b/lib/libc/sys/cap_ioctls_limit.2
@@ -0,0 +1,158 @@
+.\"
+.\" Copyright (c) 2012 The FreeBSD Foundation
+.\" All rights reserved.
+.\"
+.\" This documentation was written by Pawel Jakub Dawidek under sponsorship
+.\" the FreeBSD Foundation.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 20, 2012
+.Dt CAP_IOCTLS_LIMIT 2
+.Os
+.Sh NAME
+.Nm cap_ioctls_limit ,
+.Nm cap_ioctls_get
+.Nd manage allowed ioctl commands
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In sys/capability.h
+.Ft int
+.Fn cap_ioctls_limit "int fd" "const unsigned long *cmds" "size_t ncmds"
+.Ft ssize_t
+.Fn cap_ioctls_get "int fd" "unsigned long *cmds" "size_t maxcmds"
+.Sh DESCRIPTION
+If a file descriptor is granted the
+.Dv CAP_IOCTL
+capability right, the list of allowed
+.Xr ioctl 2
+commands can be selectively reduced (but never expanded) with the
+.Fn cap_ioctls_limit
+system call.
+The
+.Fa cmds
+argument is an array of
+.Xr ioctl 2
+commands and the
+.Fa ncmds
+argument specifies the number of elements in the array.
+There might be up to
+.Va 256
+elements in the array.
+.Pp
+The list of allowed ioctl commands for a given file descriptor can be obtained
+with the
+.Fn cap_ioctls_get
+system call.
+The
+.Fa cmds
+argument points at memory that can hold up to
+.Fa maxcmds
+values.
+The function populates the provided buffer with up to
+.Fa maxcmds
+elements, but always returns the total number of ioctl commands allowed for the
+given file descriptor.
+The total number of ioctls commands for the given file descriptor can be
+obtained by passing
+.Dv NULL as the
+.Fa cmds
+argument and
+.Va 0
+as the
+.Fa maxcmds
+argument.
+If all ioctl commands are allowed
+.Dv ( CAP_IOCTL
+capability right is assigned to the file descriptor and the
+.Fn cap_ioctls_limit
+system call was never called for this file descriptor), the
+.Fn cap_ioctls_get
+system call will return
+.Dv CAP_IOCTLS_ALL
+and won't modify the buffer pointed out by the
+.Fa cmds
+argument.
+.Sh RETURN VALUES
+.Rv -std cap_ioctls_limit
+.Pp
+The
+.Fn cap_ioctls_limit
+function, if successfull, returns the total number of allowed ioctl commands or
+the value
+.Dv INT_MAX
+if all ioctls commands are allowed.
+On failure the value
+.Va -1
+is returned and the global variable errno is set to indicate the error.
+.Sh ERRORS
+.Fn cap_ioctls_limit
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid descriptor.
+.It Bq Er EFAULT
+The
+.Fa cmds
+argument points at an invalid address.
+.It Bq Er EINVAL
+The
+.Fa ncmds
+argument is greater than
+.Va 256 .
+.It Bq Er ENOTCAPABLE
+.Fa cmds
+would expand the list of allowed
+.Xr ioctl 2
+commands.
+.El
+.Pp
+.Fn cap_ioctls_get
+succeeds unless:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid descriptor.
+.It Bq Er EFAULT
+The
+.Fa cmds
+argument points at invalid address.
+.El
+.Sh SEE ALSO
+.Xr cap_fcntls_limit 2 ,
+.Xr cap_rights_limit 2 ,
+.Xr ioctl 2
+.Sh HISTORY
+Support for capabilities and capabilities mode was developed as part of the
+.Tn TrustedBSD
+Project.
+.Pp
+.Sh AUTHORS
+This function was created by
+.An Pawel Jakub Dawidek Aq pawel@dawidek.net
+under sponsorship of the FreeBSD Foundation.
diff --git a/lib/libc/sys/cap_new.2 b/lib/libc/sys/cap_rights_limit.2
index a18fd3b..d8d8777 100644
--- a/lib/libc/sys/cap_new.2
+++ b/lib/libc/sys/cap_rights_limit.2
@@ -1,10 +1,14 @@
.\"
.\" Copyright (c) 2008-2010 Robert N. M. Watson
+.\" Copyright (c) 2012-2013 The FreeBSD Foundation
.\" All rights reserved.
.\"
.\" This software was developed at the University of Cambridge Computer
.\" Laboratory with support from a grant from Google, Inc.
.\"
+.\" Portions of this documentation were written by Pawel Jakub Dawidek
+.\" under sponsorship from the FreeBSD Foundation.
+.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
@@ -28,76 +32,48 @@
.\"
.\" $FreeBSD$
.\"
-.Dd July 20, 2011
-.Dt CAP_NEW 2
+.Dd February 23, 2013
+.Dt CAP_RIGHTS_LIMIT 2
.Os
.Sh NAME
-.Nm cap_new ,
-.Nm cap_getrights
-.Nd System calls to manipulate capabilities
+.Nm cap_rights_limit ,
+.Nm cap_rights_get
+.Nd manage capability rights
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In sys/capability.h
.Ft int
-.Fn cap_new "int fd" "cap_rights_t rights"
+.Fn cap_rights_limit "int fd" "cap_rights_t rights"
.Ft int
-.Fn cap_getrights "int fd" "cap_rights_t *rightsp"
+.Fn cap_rights_get "int fd" "cap_rights_t *rightsp"
.Sh DESCRIPTION
-Capabilities are special file descriptors derived from an existing file
-descriptor, such as one returned by
+When a file descriptor is created by a function such as
.Xr fhopen 2 ,
.Xr kqueue 2 ,
.Xr mq_open 2 ,
.Xr open 2 ,
+.Xr openat 2 ,
+.Xr pdfork 2 ,
.Xr pipe 2 ,
.Xr shm_open 2 ,
.Xr socket 2 ,
or
.Xr socketpair 2 ,
-but with a restricted set of permitted operations determined by a rights
-mask set when the capability is created.
-These restricted rights cannot be changed after the capability is created,
-although further capabilities with yet more restricted rights may be created
-from an existing capability.
-In every other sense, a capability behaves in the same way as the file
-descriptor it was created from.
-.Pp
-.Fn cap_new
-creates a new capability for the existing file descriptor
-.Fa fd ,
-and returns a file descriptor for it.
-Operations on the capability will be limited to those permitted by
-.Fa rights ,
-which is static for the lifetime of the capability.
-If
-.Fa fd
-refers to an existing capability, then
-.Fa rights
-must be equal to or a subset of the rights on that capability.
-As with
-.Xr dup 2
-and
-.Xr dup2 2 ,
-many properties are shared between the new capability and the existing file
-descriptor, including open file flags, blocking disposition, and file offset.
-Many applications will prefer to use the
-.Xr cap_limitfd 3
-library call, part of
-.Xr libcapsicum 3 ,
-as it offers a more convenient interface.
-.Pp
-.Fn cap_getrights
-queries the rights associated with the capability referred to by file
-descriptor
-.Fa fd .
+it is assigned all capability rights.
+Those rights can be reduced (but never expanded) by using the
+.Fn cap_rights_limit
+system call.
+Once capability rights are reduced, operations on the file descriptor will be
+limited to those permitted by
+.Fa rights .
.Pp
-These system calls, when combined with
-.Xr cap_enter 2 ,
-may be used to construct process sandboxes with highly granular rights
-assignment.
+A bitmask of capability rights assigned to a file descriptor can be obtained with
+the
+.Fn cap_rights_get
+system call.
.Sh RIGHTS
-The following rights may be specified in a new capability rights mask:
+The following rights may be specified in a rights mask:
.Bl -tag -width CAP_EXTATTR_DELETE
.It Dv CAP_ACCEPT
Permit
@@ -134,6 +110,13 @@ Permit
also required for
.Xr sendto 2
with a non-NULL destination address.
+.It Dv CAP_CREATE
+Permit
+.Xr openat 2
+with the
+.Dv O_CREAT
+flag.
+.\" XXXPJD: Doesn't exist anymore.
.It Dv CAP_EVENT
Permit
.Xr select 2 ,
@@ -143,7 +126,12 @@ and
to be used in monitoring the file descriptor for events.
.It Dv CAP_FEXECVE
Permit
-.Xr fexecve 2 ;
+.Xr fexecve 2
+and
+.Xr openat 2
+with the
+.Dv O_EXEC
+flag;
.Dv CAP_READ
will also be required.
.It Dv CAP_EXTATTR_DELETE
@@ -166,19 +154,49 @@ Permit
.Xr fchflags 2 .
.It Dv CAP_FCHMOD
Permit
-.Xr fchmod 2 .
+.Xr fchmod 2
+and
+.Xr fchmodat 2 .
+.It Dv CAP_FCHMODAT
+An alias to
+.Dv CAP_FCHMOD .
.It Dv CAP_FCHOWN
Permit
-.Xr fchown 2 .
+.Xr fchown 2
+and
+.Xr fchownat 2 .
+.It Dv CAP_FCHOWNAT
+An alias to
+.Dv CAP_FCHOWN .
.It Dv CAP_FCNTL
Permit
-.Xr fcntl 2 ;
-be aware that this call provides indirect access to other operations, such as
-.Xr flock 2 .
+.Xr fcntl 2 .
+Note that only the
+.Dv F_GETFL ,
+.Dv F_SETFL ,
+.Dv F_GETOWN
+and
+.Dv F_SETOWN
+commands require this capability right.
+Also note that the list of permitted commands can be further limited with the
+.Xr cap_fcntls_limit 2
+system call.
.It Dv CAP_FLOCK
Permit
-.Xr flock 2
-and related calls.
+.Xr flock 2 ,
+.Xr fcntl 2
+(with
+.Dv F_GETLK ,
+.Dv F_SETLK
+or
+.Dv F_SETLKW
+flag) and
+.Xr openat 2
+(with
+.Dv O_EXLOCK
+or
+.Dv O_SHLOCK
+flag).
.It Dv CAP_FPATHCONF
Permit
.Xr fpathconf 2 .
@@ -186,22 +204,42 @@ Permit
Permit UFS background-fsck operations on the descriptor.
.It Dv CAP_FSTAT
Permit
-.Xr fstat 2 .
+.Xr fstat 2
+and
+.Xr fstatat 2 .
+.It Dv CAP_FSTATAT
+An alias to
+.Dv CAP_FSTAT .
.It Dv CAP_FSTATFS
Permit
.Xr fstatfs 2 .
.It Dv CAP_FSYNC
Permit
-.Xr aio_fsync 2
+.Xr aio_fsync 2 ,
+.Xr fsync 2
and
-.Xr fsync 2 .
-.Pp
+.Xr openat 2
+with
+.Dv O_FSYNC
+or
+.Dv O_SYNC
+flag.
.It Dv CAP_FTRUNCATE
Permit
-.Xr ftruncate 2 .
+.Xr ftruncate 2
+and
+.Xr openat 2
+with the
+.Dv O_TRUNC
+flag.
.It Dv CAP_FUTIMES
Permit
-.Xr futimes 2 .
+.Xr futimes 2
+and
+.Xr futimesat 2 .
+.It Dv CAP_FUTIMESAT
+An alias to
+.Dv CAP_FUTIMES .
.It Dv CAP_GETPEERNAME
Permit
.Xr getpeername 2 .
@@ -216,42 +254,106 @@ Permit
.Xr ioctl 2 .
Be aware that this system call has enormous scope, including potentially
global scope for some objects.
+The list of permitted ioctl commands can be further limited with the
+.Xr cap_ioctls_limit 2
+system call.
+.\" XXXPJD: Doesn't exist anymore.
.It Dv CAP_KEVENT
Permit
.Xr kevent 2 ;
.Dv CAP_EVENT
is also required on file descriptors that will be monitored using
.Xr kevent 2 .
+.It Dv CAP_LINKAT
+Permit
+.Xr linkat 2
+and
+.Xr renameat 2 .
+This right is required for the destination directory descriptor.
.It Dv CAP_LISTEN
Permit
.Xr listen 2 ;
not much use (generally) without
.Dv CAP_BIND .
.It Dv CAP_LOOKUP
-Permit the file descriptor to be used as a starting directory for calls such
-as
+Permit the file descriptor to be used as a starting directory for calls such as
.Xr linkat 2 ,
.Xr openat 2 ,
and
.Xr unlinkat 2 .
-Note that these calls are not available in capability mode as they manipulate
-a global name space; see
-.Xr cap_enter 2
-for details.
.It Dv CAP_MAC_GET
Permit
.Xr mac_get_fd 3 .
.It Dv CAP_MAC_SET
Permit
.Xr mac_set_fd 3 .
+.It Dv CAP_MKDIRAT
+Permit
+.Xr mkdirat 2 .
+.It Dv CAP_MKFIFOAT
+Permit
+.Xr mkfifoat 2 .
+.It Dv CAP_MKNODAT
+Permit
+.Xr mknodat 2 .
.It Dv CAP_MMAP
Permit
-.Xr mmap 2 ;
-specific invocations may also require
+.Xr mmap 2
+with the
+.Dv PROT_NONE
+protection.
+.It Dv CAP_MMAP_R
+Permit
+.Xr mmap 2
+with the
+.Dv PROT_READ
+protection.
+This also implies
.Dv CAP_READ
-or
-.Dv CAP_WRITE .
-.Pp
+and
+.Dv CAP_SEEK
+rights.
+.It Dv CAP_MMAP_W
+Permit
+.Xr mmap 2
+with the
+.Dv PROT_WRITE
+protection.
+This also implies
+.Dv CAP_WRITE
+and
+.Dv CAP_SEEK
+rights.
+.It Dv CAP_MMAP_X
+Permit
+.Xr mmap 2
+with the
+.Dv PROT_EXEC
+protection.
+This also implies
+.Dv CAP_SEEK
+right.
+.It Dv CAP_MMAP_RW
+Implies
+.Dv CAP_MMAP_R
+and
+.Dv CAP_MMAP_W .
+.It Dv CAP_MMAP_RX
+Implies
+.Dv CAP_MMAP_R
+and
+.Dv CAP_MMAP_X .
+.It Dv CAP_MMAP_WX
+Implies
+.Dv CAP_MMAP_W
+and
+.Dv CAP_MMAP_X .
+.It Dv CAP_MMAP_RWX
+Implies
+.Dv CAP_MMAP_R ,
+.Dv CAP_MMAP_W
+and
+.Dv CAP_MMAP_X .
.It Dv CAP_PDGETPID
Permit
.Xr pdgetpid 2 .
@@ -264,30 +366,46 @@ Permit
.It Dv CAP_PEELOFF
Permit
.Xr sctp_peeloff 2 .
+.\" XXXPJD: Not documented.
+.It Dv CAP_POLL_EVENT
+.\" XXXPJD: Not documented.
+.It Dv CAP_POST_EVENT
+.It Dv CAP_PREAD
+Implies
+.Dv CAP_SEEK
+and
+.Dv CAP_READ .
+.It Dv CAP_PWRITE
+Implies
+.Dv CAP_SEEK
+and
+.Dv CAP_WRITE .
.It Dv CAP_READ
Allow
.Xr aio_read 2 ,
-.Xr pread 2 ,
+.Xr openat
+with the
+.Dv O_RDONLY flag,
.Xr read 2 ,
.Xr recv 2 ,
.Xr recvfrom 2 ,
-.Xr recvmsg 2 ,
+.Xr recvmsg 2
and related system calls.
-.Pp
-For files and other seekable objects,
-.Dv CAP_SEEK
-may also be required.
-.It Dv CAP_REVOKE
+.It Dv CAP_RECV
+An alias to
+.Dv CAP_READ .
+.It Dv CAP_RENAMEAT
Permit
-.Xr frevoke 2
-in certain ABI compatibility modes that support this system call.
+.Xr renameat 2 .
+This right is required for the source directory descriptor.
.It Dv CAP_SEEK
Permit operations that seek on the file descriptor, such as
.Xr lseek 2 ,
-but also required for I/O system calls that modify the file offset, such as
-.Xr read 2
+but also required for I/O system calls that can read or write at any position
+in the file, such as
+.Xr pread 2
and
-.Xr write 2 .
+.Xr pwrite 2 .
.It Dv CAP_SEM_GETVALUE
Permit
.Xr sem_getvalue 3 .
@@ -299,6 +417,9 @@ Permit
.Xr sem_wait 3
and
.Xr sem_trywait 3 .
+.It Dv CAP_SEND
+An alias to
+.Dv CAP_WRITE .
.It Dv CAP_SETSOCKOPT
Permit
.Xr setsockopt 2 ;
@@ -308,49 +429,56 @@ connecting, and other behaviors with global scope.
Permit explicit
.Xr shutdown 2 ;
closing the socket will also generally shut down any connections on it.
+.It Dv CAP_SYMLINKAT
+Permit
+.Xr symlinkat 2 .
.It Dv CAP_TTYHOOK
Allow configuration of TTY hooks, such as
.Xr snp 4 ,
on the file descriptor.
+.It Dv CAP_UNLINKAT
+Permit
+.Xr unlinkat 2
+and
+.Xr renameat 2 .
+This right is only required for
+.Xr renameat 2
+on the destination directory descriptor if the destination object already
+exists and will be removed by the rename.
.It Dv CAP_WRITE
Allow
.Xr aio_write 2 ,
-.Xr pwrite 2 ,
+.Xr openat 2
+with
+.Dv O_WRONLY
+and
+.Dv O_APPEND
+flags,
.Xr send 2 ,
.Xr sendmsg 2 ,
.Xr sendto 2 ,
.Xr write 2 ,
and related system calls.
-.Pp
-For files and other seekable objects,
-.Dv CAP_SEEK
-may also be required.
-.Pp
For
.Xr sendto 2
with a non-NULL connection address,
.Dv CAP_CONNECT
is also required.
+For
+.Xr openat 2
+with the
+.Dv O_WRONLY
+flag, but without the
+.Dv O_APPEND
+flag,
+.Dv CAP_SEEK
+is also required.
.El
-.Sh CAVEAT
-The
-.Fn cap_new
-system call and the capabilities it creates may be used to assign
-fine-grained rights to sandboxed processes running in capability mode.
-However, the semantics of objects accessed via file descriptors are complex,
-so caution should be exercised in passing object capabilities into sandboxes.
.Sh RETURN VALUES
-If successful,
-.Fn cap_new
-returns a non-negative integer, termed a file descriptor.
-It returns -1 on failure, and sets
-.Va errno
-to indicate the error.
-.Pp
-.Rv -std cap_getrights
+.Rv -std
.Sh ERRORS
-.Fn cap_new
-may return the following errors:
+.Fn cap_rights_limit
+succeeds unless:
.Bl -tag -width Er
.It Bq Er EBADF
The
@@ -359,29 +487,23 @@ argument is not a valid active descriptor.
.It Bq Er EINVAL
An invalid right has been requested in
.Fa rights .
-.It Bq Er EMFILE
-The process has already reached its limit for open file descriptors.
-.It Bq Er ENFILE
-The system file table is full.
-.It Bq Er EPERM
+.It Bq Er ENOTCAPABLE
.Fa rights
contains requested rights not present in the current rights mask associated
-with the capability referenced by
-.Fa fd ,
-if any.
+with the given file descriptor.
.El
.Pp
-.Fn cap_getrights
-may return the following errors:
+.Fn cap_rights_get
+succeeds unless:
.Bl -tag -width Er
.It Bq Er EBADF
The
.Fa fd
argument is not a valid active descriptor.
-.It Bq Er EINVAL
+.It Bq Er EFAULT
The
-.Fa fd
-argument is not a capability.
+.Fa rightsp
+argument points at an invalid address.
.El
.Sh SEE ALSO
.Xr accept 2 ,
@@ -390,6 +512,9 @@ argument is not a capability.
.Xr aio_write 2 ,
.Xr bind 2 ,
.Xr cap_enter 2 ,
+.Xr cap_fcntls_limit 2 ,
+.Xr cap_ioctls_limit 2 ,
+.Xr cap_rights_limit 2 ,
.Xr connect 2 ,
.Xr dup 2 ,
.Xr dup2 2 ,
@@ -421,6 +546,7 @@ argument is not a capability.
.Xr mq_open 2 ,
.Xr open 2 ,
.Xr openat 2 ,
+.Xr pdfork 2 ,
.Xr pdgetpid 2 ,
.Xr pdkill 2 ,
.Xr pdwait4 2 ,
@@ -432,6 +558,7 @@ argument is not a capability.
.Xr recv 2 ,
.Xr recvfrom 2 ,
.Xr recvmsg 2 ,
+.Xr renameat 2 ,
.Xr sctp_peeloff 2 ,
.Xr select 2 ,
.Xr send 2 ,
@@ -442,6 +569,7 @@ argument is not a capability.
.Xr shutdown 2 ,
.Xr socket 2 ,
.Xr socketpair 2 ,
+.Xr symlinkat 2 ,
.Xr unlinkat 2 ,
.Xr write 2 ,
.Xr acl_delete_fd_np 3 ,
@@ -462,11 +590,11 @@ argument is not a capability.
Support for capabilities and capabilities mode was developed as part of the
.Tn TrustedBSD
Project.
+.Pp
.Sh AUTHORS
-These functions and the capability facility were created by
-.An "Robert N. M. Watson"
-at the University of Cambridge Computer Laboratory with support from a grant
-from Google, Inc.
+This function was created by
+.An Pawel Jakub Dawidek Aq pawel@dawidek.net
+under sponsorship of the FreeBSD Foundation.
.Sh BUGS
This man page should list the set of permitted system calls more specifically
for each capability right.
diff --git a/lib/libc/sys/dup.2 b/lib/libc/sys/dup.2
index 7a07c21..6e1de20 100644
--- a/lib/libc/sys/dup.2
+++ b/lib/libc/sys/dup.2
@@ -115,11 +115,6 @@ and
is a valid descriptor, then
.Fn dup2
is successful, and does nothing.
-.Pp
-The related
-.Xr cap_new 2
-system call allows file descriptors to be duplicated with restrictions on
-their use.
.Sh RETURN VALUES
The value -1 is returned if an error occurs in either call.
The external variable
@@ -152,7 +147,6 @@ argument is negative or exceeds the maximum allowable descriptor number
.El
.Sh SEE ALSO
.Xr accept 2 ,
-.Xr cap_new 2 ,
.Xr close 2 ,
.Xr fcntl 2 ,
.Xr getdtablesize 2 ,
diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c
index 9d9c111..f23ec96 100644
--- a/lib/libprocstat/libprocstat.c
+++ b/lib/libprocstat/libprocstat.c
@@ -600,7 +600,6 @@ kinfo_fflags2fst(int kfflags)
} kfflags2fst[] = {
{ KF_FLAG_APPEND, PS_FST_FFLAG_APPEND },
{ KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC },
- { KF_FLAG_CAPABILITY, PS_FST_FFLAG_CAPABILITY },
{ KF_FLAG_CREAT, PS_FST_FFLAG_CREAT },
{ KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT },
{ KF_FLAG_EXCL, PS_FST_FFLAG_EXCL },
diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h
index 662ea37..1c55aa7 100644
--- a/lib/libprocstat/libprocstat.h
+++ b/lib/libprocstat/libprocstat.h
@@ -88,7 +88,6 @@
#define PS_FST_FFLAG_DIRECT 0x1000
#define PS_FST_FFLAG_EXEC 0x2000
#define PS_FST_FFLAG_HASLOCK 0x4000
-#define PS_FST_FFLAG_CAPABILITY 0x8000
struct procstat;
struct filestat {
diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h
index d227981..9d71fa2 100644
--- a/sys/bsm/audit_kevents.h
+++ b/sys/bsm/audit_kevents.h
@@ -588,7 +588,7 @@
#define AUE_OPENAT 43184 /* FreeBSD. */
#define AUE_POSIX_OPENPT 43185 /* FreeBSD. */
#define AUE_CAP_NEW 43186 /* TrustedBSD. */
-#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */
+#define AUE_CAP_RIGHTS_GET 43187 /* TrustedBSD. */
#define AUE_CAP_ENTER 43188 /* TrustedBSD. */
#define AUE_CAP_GETMODE 43189 /* TrustedBSD. */
#define AUE_POSIX_SPAWN 43190 /* Darwin. */
@@ -603,6 +603,11 @@
#define AUE_PDGETPID 43199 /* FreeBSD. */
#define AUE_PDWAIT 43200 /* FreeBSD. */
#define AUE_WAIT6 43201 /* FreeBSD. */
+#define AUE_CAP_RIGHTS_LIMIT 43202 /* TrustedBSD. */
+#define AUE_CAP_IOCTLS_LIMIT 43203 /* TrustedBSD. */
+#define AUE_CAP_IOCTLS_GET 43204 /* TrustedBSD. */
+#define AUE_CAP_FCNTLS_LIMIT 43205 /* TrustedBSD. */
+#define AUE_CAP_FCNTLS_GET 43206 /* TrustedBSD. */
/*
* Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the
diff --git a/sys/cddl/compat/opensolaris/sys/file.h b/sys/cddl/compat/opensolaris/sys/file.h
index 7a3df36..0b8f875 100644
--- a/sys/cddl/compat/opensolaris/sys/file.h
+++ b/sys/cddl/compat/opensolaris/sys/file.h
@@ -39,15 +39,11 @@ typedef struct file file_t;
#include <sys/capability.h>
static __inline file_t *
-getf(int fd)
+getf(int fd, cap_rights_t rights)
{
struct file *fp;
- /*
- * We wouldn't need all of these rights on every invocation
- * if we had more information about intent.
- */
- if (fget(curthread, fd, CAP_READ | CAP_WRITE | CAP_SEEK, &fp) == 0)
+ if (fget(curthread, fd, rights, &fp) == 0)
return (fp);
return (NULL);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
index af0c9f7..fce4bb5 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
@@ -3822,7 +3822,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
return (error);
fd = zc->zc_cookie;
- fp = getf(fd);
+ fp = getf(fd, CAP_PREAD);
if (fp == NULL) {
nvlist_free(props);
return (EBADF);
@@ -4079,7 +4079,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
&zc->zc_objset_type);
} else {
- file_t *fp = getf(zc->zc_cookie);
+ file_t *fp = getf(zc->zc_cookie, CAP_WRITE);
if (fp == NULL) {
dsl_dataset_rele(ds, FTAG);
if (dsfrom)
@@ -4675,7 +4675,7 @@ zfs_ioc_diff(zfs_cmd_t *zc)
return (error);
}
- fp = getf(zc->zc_cookie);
+ fp = getf(zc->zc_cookie, CAP_WRITE);
if (fp == NULL) {
dmu_objset_rele(fromsnap, FTAG);
dmu_objset_rele(tosnap, FTAG);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
index ca0acfd..c12826f 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c
@@ -124,7 +124,7 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp)
void *data;
int error;
- fp = getf(fd);
+ fp = getf(fd, CAP_NONE);
if (fp == NULL)
return (EBADF);
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index 4106447..6552d13 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -963,7 +963,7 @@
struct shmid_ds32 *buf); }
513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); }
514 AUE_CAP_NEW NOPROTO { int cap_new(int fd, uint64_t rights); }
-515 AUE_CAP_GETRIGHTS NOPROTO { int cap_getrights(int fd, \
+515 AUE_CAP_RIGHTS_GET NOPROTO { int cap_rights_get(int fd, \
uint64_t *rightsp); }
516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); }
517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); }
@@ -1005,3 +1005,13 @@
struct wrusage32 *wrusage, \
siginfo_t *info); }
+533 AUE_CAP_RIGHTS_LIMIT NOPROTO { int cap_rights_limit(int fd, \
+ uint64_t rights); }
+534 AUE_CAP_IOCTLS_LIMIT NOPROTO { int cap_ioctls_limit(int fd, \
+ const u_long *cmds, size_t ncmds); }
+535 AUE_CAP_IOCTLS_GET NOPROTO { ssize_t cap_ioctls_get(int fd, \
+ u_long *cmds, size_t maxcmds); }
+536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \
+ uint32_t fcntlrights); }
+537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \
+ uint32_t *fcntlrightsp); }
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 0318a5c..346d178 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -154,6 +154,7 @@ linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mod
SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
PROC_UNLOCK(p);
sx_unlock(&proctree_lock);
+ /* XXXPJD: Verify if TIOCSCTTY is allowed. */
if (fp->f_type == DTYPE_VNODE)
(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
td->td_ucred, td);
@@ -1038,11 +1039,11 @@ linux_pread(td, uap)
error = sys_pread(td, &bsd);
if (error == 0) {
- /* This seems to violate POSIX but linux does it */
- if ((error = fgetvp(td, uap->fd, CAP_READ, &vp)) != 0)
- return (error);
+ /* This seems to violate POSIX but linux does it */
+ if ((error = fgetvp(td, uap->fd, CAP_PREAD, &vp)) != 0)
+ return (error);
if (vp->v_type == VDIR) {
- vrele(vp);
+ vrele(vp);
return (EISDIR);
}
vrele(vp);
diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c
index b9d3ace..86fab78 100644
--- a/sys/compat/svr4/svr4_fcntl.c
+++ b/sys/compat/svr4/svr4_fcntl.c
@@ -265,14 +265,14 @@ fd_revoke(td, fd)
/*
* If we ever want to support Capsicum on SVR4 processes (unlikely)
* or FreeBSD grows a native frevoke() (more likely), we will need a
- * CAP_REVOKE here.
+ * CAP_FREVOKE here.
*
- * In the meantime, use CAP_MASK_VALID: if a SVR4 process wants to
+ * In the meantime, use CAP_ALL: if a SVR4 process wants to
* do an frevoke(), it needs to do it on either a regular file
* descriptor or a fully-privileged capability (which is effectively
* the same as a non-capability-restricted file descriptor).
*/
- if ((error = fgetvp(td, fd, CAP_MASK_VALID, &vp)) != 0)
+ if ((error = fgetvp(td, fd, CAP_ALL, &vp)) != 0)
return (error);
if (vp->v_type != VCHR && vp->v_type != VBLK) {
diff --git a/sys/compat/svr4/svr4_filio.c b/sys/compat/svr4/svr4_filio.c
index 967169b..0fbba07 100644
--- a/sys/compat/svr4/svr4_filio.c
+++ b/sys/compat/svr4/svr4_filio.c
@@ -197,22 +197,24 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data)
u_long cmd;
caddr_t data;
{
- int error;
- int num;
struct filedesc *fdp = td->td_proc->p_fd;
+ struct filedescent *fde;
+ int error, num;
*retval = 0;
switch (cmd) {
case SVR4_FIOCLEX:
FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ fde = &fdp->fd_ofiles[fd];
+ fde->fde_flags |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
return 0;
case SVR4_FIONCLEX:
FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
+ fde = &fdp->fd_ofiles[fd];
+ fde->fde_flags &= ~UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
return 0;
diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c
index d6bc4eb..0cfaeae 100644
--- a/sys/compat/svr4/svr4_misc.c
+++ b/sys/compat/svr4/svr4_misc.c
@@ -247,10 +247,8 @@ svr4_sys_getdents64(td, uap)
DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n",
uap->fd, uap->nbytes));
- if ((error = getvnode(td->td_proc->p_fd, uap->fd,
- CAP_READ | CAP_SEEK, &fp)) != 0) {
+ if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0)
return (error);
- }
if ((fp->f_flag & FREAD) == 0) {
fdrop(fp, td);
@@ -426,8 +424,7 @@ svr4_sys_getdents(td, uap)
if (uap->nbytes < 0)
return (EINVAL);
- if ((error = getvnode(td->td_proc->p_fd, uap->fd,
- CAP_READ | CAP_SEEK, &fp)) != 0)
+ if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c
index 27014e3..1c7e83e 100644
--- a/sys/compat/svr4/svr4_stream.c
+++ b/sys/compat/svr4/svr4_stream.c
@@ -1449,7 +1449,7 @@ svr4_sys_putmsg(td, uap)
struct file *fp;
int error;
- if ((error = fget(td, uap->fd, CAP_WRITE, &fp)) != 0) {
+ if ((error = fget(td, uap->fd, CAP_SEND, &fp)) != 0) {
#ifdef DEBUG_SVR4
uprintf("putmsg: bad fp\n");
#endif
@@ -1621,7 +1621,7 @@ svr4_sys_getmsg(td, uap)
struct file *fp;
int error;
- if ((error = fget(td, uap->fd, CAP_READ, &fp)) != 0) {
+ if ((error = fget(td, uap->fd, CAP_RECV, &fp)) != 0) {
#ifdef DEBUG_SVR4
uprintf("getmsg: bad fp\n");
#endif
diff --git a/sys/dev/iscsi/initiator/iscsi.c b/sys/dev/iscsi/initiator/iscsi.c
index a93a685..3737b7f 100644
--- a/sys/dev/iscsi/initiator/iscsi.c
+++ b/sys/dev/iscsi/initiator/iscsi.c
@@ -387,11 +387,11 @@ i_setsoc(isc_session_t *sp, int fd, struct thread *td)
if(sp->soc != NULL)
isc_stop_receiver(sp);
- error = fget(td, fd, CAP_SOCK_ALL, &sp->fp);
+ error = fget(td, fd, CAP_SOCK_CLIENT, &sp->fp);
if(error)
return error;
- if((error = fgetsock(td, fd, CAP_SOCK_ALL, &sp->soc, 0)) == 0) {
+ if((error = fgetsock(td, fd, CAP_SOCK_CLIENT, &sp->soc, 0)) == 0) {
sp->td = td;
isc_start_receiver(sp);
}
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
index c3dbccf..cb5e3c0 100644
--- a/sys/fs/fdescfs/fdesc_vfsops.c
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -205,7 +205,7 @@ fdesc_statfs(mp, sbp)
last = min(fdp->fd_nfiles, lim);
freefd = 0;
for (i = fdp->fd_freefile; i < last; i++)
- if (fdp->fd_ofiles[i] == NULL)
+ if (fdp->fd_ofiles[i].fde_file == NULL)
freefd++;
/*
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 3c4f44d..7923fc6 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -534,7 +534,7 @@ fdesc_readdir(ap)
dp->d_type = DT_DIR;
break;
default:
- if (fdp->fd_ofiles[fcnt] == NULL)
+ if (fdp->fd_ofiles[fcnt].fde_file == NULL)
break;
dp->d_namlen = sprintf(dp->d_name, "%d", fcnt);
dp->d_reclen = UIO_MX;
diff --git a/sys/fs/nfs/nfsdport.h b/sys/fs/nfs/nfsdport.h
index 529ada2..a09a6dd 100644
--- a/sys/fs/nfs/nfsdport.h
+++ b/sys/fs/nfs/nfsdport.h
@@ -94,8 +94,6 @@ struct nfsexstuff {
#define NFSFPCRED(f) ((f)->f_cred)
#define NFSFPFLAG(f) ((f)->f_flag)
-int fp_getfvp(NFSPROC_T *, int, struct file **, struct vnode **);
-
#define NFSNAMEICNDSET(n, c, o, f) do { \
(n)->cn_cred = (c); \
(n)->cn_nameiop = (o); \
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 1179eb5..a07a67f 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -1215,7 +1215,7 @@ nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap)
* pretend that we need them all. It is better to be too
* careful than too reckless.
*/
- if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp))
+ if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_CLIENT, &fp))
!= 0) {
return (error);
}
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index 1731c72..ef98e2b 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -2767,7 +2767,7 @@ out:
/*
* glue for fp.
*/
-int
+static int
fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
{
struct filedesc *fdp;
@@ -2775,8 +2775,8 @@ fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp)
int error = 0;
fdp = p->td_proc->p_fd;
- if (fd >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[fd]) == NULL) {
+ if (fd < 0 || fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd].fde_file) == NULL) {
error = EBADF;
goto out;
}
@@ -3041,7 +3041,7 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap)
* pretend that we need them all. It is better to be too
* careful than too reckless.
*/
- if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
+ if ((error = fget(td, sockarg.sock, CAP_SOCK_SERVER, &fp)) != 0)
goto out;
if (fp->f_type != DTYPE_SOCKET) {
fdrop(fp, td);
diff --git a/sys/i386/ibcs2/ibcs2_misc.c b/sys/i386/ibcs2/ibcs2_misc.c
index 0692122..9f382aa 100644
--- a/sys/i386/ibcs2/ibcs2_misc.c
+++ b/sys/i386/ibcs2/ibcs2_misc.c
@@ -337,8 +337,7 @@ ibcs2_getdents(td, uap)
#define BSD_DIRENT(cp) ((struct dirent *)(cp))
#define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short))
- if ((error = getvnode(td->td_proc->p_fd, uap->fd,
- CAP_READ | CAP_SEEK, &fp)) != 0)
+ if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
fdrop(fp, td);
@@ -491,8 +490,8 @@ ibcs2_read(td, uap)
u_long *cookies = NULL, *cookiep;
int ncookies;
- if ((error = getvnode(td->td_proc->p_fd, uap->fd,
- CAP_READ | CAP_SEEK, &fp)) != 0) {
+ if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ,
+ &fp)) != 0) {
if (error == EINVAL)
return sys_read(td, (struct read_args *)uap);
else
diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf
index 11aad16..3c08782 100644
--- a/sys/kern/capabilities.conf
+++ b/sys/kern/capabilities.conf
@@ -110,9 +110,14 @@ aio_write
## Allow capability mode and capability system calls.
##
cap_enter
+cap_fcntls_get
+cap_fcntls_limit
cap_getmode
-cap_getrights
+cap_ioctls_get
+cap_ioctls_limit
cap_new
+cap_rights_get
+cap_rights_limit
##
## Allow read-only clock operations.
@@ -239,7 +244,7 @@ getcontext
## Allow directory I/O on a file descriptor, subject to capability rights.
## Originally we had separate capabilities for directory-specific read
## operations, but on BSD we allow reading the raw directory data, so we just
-## rely on CAP_READ and CAP_SEEK now.
+## rely on CAP_READ now.
##
getdents
getdirentries
@@ -317,13 +322,10 @@ gettimeofday
getuid
##
-## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global
-## scope, but this is a tricky one as it is also required for tty control.
-## We do have a capability right for this operation.
+## Allow ioctl(2), which hopefully will be limited by applications only to
+## required commands with cap_ioctls_limit(2) syscall.
##
-## XXXRW: This needs to be revisited.
-##
-#ioctl
+ioctl
##
## Allow querying current process credential state.
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index acdea40..b146bab 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -110,15 +110,8 @@ MALLOC_DECLARE(M_FADVISE);
static uma_zone_t file_zone;
-/* Flags for do_dup() */
-#define DUP_FIXED 0x1 /* Force fixed allocation */
-#define DUP_FCNTL 0x2 /* fcntl()-style errors */
-#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */
-
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
struct thread *td, int holdleaders);
-static int do_dup(struct thread *td, int flags, int old, int new,
- register_t *retval);
static int fd_first_free(struct filedesc *fdp, int low, int size);
static int fd_last_used(struct filedesc *fdp, int size);
static void fdgrowtable(struct filedesc *fdp, int nfd);
@@ -166,7 +159,7 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif);
* the process exits.
*/
struct freetable {
- struct file **ft_table;
+ struct filedescent *ft_table;
SLIST_ENTRY(freetable) ft_next;
};
@@ -177,8 +170,7 @@ struct freetable {
struct filedesc0 {
struct filedesc fd_fd;
SLIST_HEAD(, freetable) fd_free;
- struct file *fd_dfiles[NDFILE];
- char fd_dfileflags[NDFILE];
+ struct filedescent fd_dfiles[NDFILE];
NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
};
@@ -284,7 +276,8 @@ fdunused(struct filedesc *fdp, int fd)
FILEDESC_XLOCK_ASSERT(fdp);
KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd));
- KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd));
+ KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
+ ("fd=%d is still in use", fd));
fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
if (fd < fdp->fd_freefile)
@@ -294,6 +287,20 @@ fdunused(struct filedesc *fdp, int fd)
}
/*
+ * Free a file descriptor.
+ */
+static inline void
+fdfree(struct filedesc *fdp, int fd)
+{
+ struct filedescent *fde;
+
+ fde = &fdp->fd_ofiles[fd];
+ filecaps_free(&fde->fde_caps);
+ bzero(fde, sizeof(*fde));
+ fdunused(fdp, fd);
+}
+
+/*
* System calls on descriptors.
*/
#ifndef _SYS_SYSPROTO_H_
@@ -434,36 +441,14 @@ sys_fcntl(struct thread *td, struct fcntl_args *uap)
return (error);
}
-static inline int
-fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp)
-{
-
- FILEDESC_LOCK_ASSERT(fdp);
-
- *fpp = fget_locked(fdp, fd);
- if (*fpp == NULL)
- return (EBADF);
-
-#ifdef CAPABILITIES
- if ((*fpp)->f_type == DTYPE_CAPABILITY) {
- int err = cap_funwrap(*fpp, rights, fpp);
- if (err != 0) {
- *fpp = NULL;
- return (err);
- }
- }
-#endif /* CAPABILITIES */
- return (0);
-}
-
int
kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
{
struct filedesc *fdp;
struct flock *flp;
- struct file *fp;
+ struct file *fp, *fp2;
+ struct filedescent *fde;
struct proc *p;
- char *pop;
struct vnode *vp;
int error, flg, tmp;
u_int old, new;
@@ -505,8 +490,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = EBADF;
break;
}
- pop = &fdp->fd_ofileflags[fd];
- td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+ fde = &fdp->fd_ofiles[fd];
+ td->td_retval[0] =
+ (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
FILEDESC_SUNLOCK(fdp);
break;
@@ -517,32 +503,24 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
error = EBADF;
break;
}
- pop = &fdp->fd_ofileflags[fd];
- *pop = (*pop &~ UF_EXCLOSE) |
+ fde = &fdp->fd_ofiles[fd];
+ fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
(arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
FILEDESC_XUNLOCK(fdp);
break;
case F_GETFL:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FCNTL, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL);
+ if (error != 0)
break;
- }
td->td_retval[0] = OFLAGS(fp->f_flag);
- FILEDESC_SUNLOCK(fdp);
+ fdrop(fp, td);
break;
case F_SETFL:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FCNTL, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL);
+ if (error != 0)
break;
- }
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
do {
tmp = flg = fp->f_flag;
tmp &= ~FCNTLFLAGS;
@@ -550,7 +528,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
tmp = fp->f_flag & FNONBLOCK;
error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
- if (error) {
+ if (error != 0) {
fdrop(fp, td);
break;
}
@@ -567,14 +545,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
break;
case F_GETOWN:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FCNTL, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL);
+ if (error != 0)
break;
- }
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
if (error == 0)
td->td_retval[0] = tmp;
@@ -582,14 +555,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
break;
case F_SETOWN:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FCNTL, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL);
+ if (error != 0)
break;
- }
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
tmp = arg;
error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
fdrop(fp, td);
@@ -608,17 +576,15 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
case F_SETLK:
do_setlk:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FLOCK, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL);
+ if (error != 0)
break;
- }
if (fp->f_type != DTYPE_VNODE) {
- FILEDESC_SUNLOCK(fdp);
error = EBADF;
+ fdrop(fp, td);
break;
}
+
flp = (struct flock *)arg;
if (flp->l_whence == SEEK_CUR) {
foffset = foffset_get(fp);
@@ -627,16 +593,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
foffset > OFF_MAX - flp->l_start)) {
FILEDESC_SUNLOCK(fdp);
error = EOVERFLOW;
+ fdrop(fp, td);
break;
}
flp->l_start += foffset;
}
- /*
- * VOP_ADVLOCK() may block.
- */
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
vp = fp->f_vnode;
switch (flp->l_type) {
case F_RDLCK:
@@ -703,37 +665,37 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
* that the closing thread was a bit slower and that the
* advisory lock succeeded before the close.
*/
- FILEDESC_SLOCK(fdp);
- if (fget_locked(fdp, fd) != fp) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL);
+ if (error != 0) {
+ fdrop(fp, td);
+ break;
+ }
+ if (fp != fp2) {
flp->l_whence = SEEK_SET;
flp->l_start = 0;
flp->l_len = 0;
flp->l_type = F_UNLCK;
(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
F_UNLCK, flp, F_POSIX);
- } else
- FILEDESC_SUNLOCK(fdp);
+ }
fdrop(fp, td);
+ fdrop(fp2, td);
break;
case F_GETLK:
- FILEDESC_SLOCK(fdp);
- error = fdunwrap(fd, CAP_FLOCK, fdp, &fp);
- if (error != 0) {
- FILEDESC_SUNLOCK(fdp);
+ error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL);
+ if (error != 0)
break;
- }
if (fp->f_type != DTYPE_VNODE) {
- FILEDESC_SUNLOCK(fdp);
error = EBADF;
+ fdrop(fp, td);
break;
}
flp = (struct flock *)arg;
if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
flp->l_type != F_UNLCK) {
- FILEDESC_SUNLOCK(fdp);
error = EINVAL;
+ fdrop(fp, td);
break;
}
if (flp->l_whence == SEEK_CUR) {
@@ -744,15 +706,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
foffset < OFF_MIN - flp->l_start)) {
FILEDESC_SUNLOCK(fdp);
error = EOVERFLOW;
+ fdrop(fp, td);
break;
}
flp->l_start += foffset;
}
- /*
- * VOP_ADVLOCK() may block.
- */
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
vp = fp->f_vnode;
error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
F_POSIX);
@@ -763,19 +721,14 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
arg = arg ? 128 * 1024: 0;
/* FALLTHROUGH */
case F_READAHEAD:
- FILEDESC_SLOCK(fdp);
- if ((fp = fget_locked(fdp, fd)) == NULL) {
- FILEDESC_SUNLOCK(fdp);
- error = EBADF;
+ error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL);
+ if (error != 0)
break;
- }
if (fp->f_type != DTYPE_VNODE) {
- FILEDESC_SUNLOCK(fdp);
+ fdrop(fp, td);
error = EBADF;
break;
}
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
if (arg >= 0) {
vp = fp->f_vnode;
error = vn_lock(vp, LK_SHARED);
@@ -809,11 +762,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
/*
* Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
*/
-static int
+int
do_dup(struct thread *td, int flags, int old, int new,
register_t *retval)
{
struct filedesc *fdp;
+ struct filedescent *oldfde, *newfde;
struct proc *p;
struct file *fp;
struct file *delfp;
@@ -842,14 +796,15 @@ do_dup(struct thread *td, int flags, int old, int new,
FILEDESC_XUNLOCK(fdp);
return (EBADF);
}
+ oldfde = &fdp->fd_ofiles[old];
if (flags & DUP_FIXED && old == new) {
*retval = new;
if (flags & DUP_CLOEXEC)
- fdp->fd_ofileflags[new] |= UF_EXCLOSE;
+ fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
return (0);
}
- fp = fdp->fd_ofiles[old];
+ fp = oldfde->fde_file;
fhold(fp);
/*
@@ -878,8 +833,10 @@ do_dup(struct thread *td, int flags, int old, int new,
}
#endif
fdgrowtable(fdp, new + 1);
+ oldfde = &fdp->fd_ofiles[old];
}
- if (fdp->fd_ofiles[new] == NULL)
+ newfde = &fdp->fd_ofiles[new];
+ if (newfde->fde_file == NULL)
fdused(fdp, new);
} else {
if ((error = fdalloc(td, new, &new)) != 0) {
@@ -887,20 +844,23 @@ do_dup(struct thread *td, int flags, int old, int new,
fdrop(fp, td);
return (error);
}
+ newfde = &fdp->fd_ofiles[new];
}
- KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified"));
+ KASSERT(fp == oldfde->fde_file, ("old fd has been modified"));
KASSERT(old != new, ("new fd is same as old"));
- delfp = fdp->fd_ofiles[new];
+ delfp = newfde->fde_file;
+
/*
* Duplicate the source descriptor.
*/
- fdp->fd_ofiles[new] = fp;
+ *newfde = *oldfde;
+ filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps);
if ((flags & DUP_CLOEXEC) != 0)
- fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE;
+ newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE;
else
- fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
+ newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE;
if (new > fdp->fd_lastfile)
fdp->fd_lastfile = new;
*retval = new;
@@ -1141,7 +1101,6 @@ static int
closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
int holdleaders)
{
- struct file *fp_object;
int error;
FILEDESC_XLOCK_ASSERT(fdp);
@@ -1167,12 +1126,10 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td,
knote_fdclose(td, fd);
/*
- * When we're closing an fd with a capability, we need to notify
- * mqueue if the underlying object is of type mqueue.
+ * We need to notify mqueue if the object is of type mqueue.
*/
- (void)cap_funwrap(fp, 0, &fp_object);
- if (fp_object->f_type == DTYPE_MQUEUE)
- mq_fdclose(td, fd, fp_object);
+ if (fp->f_type == DTYPE_MQUEUE)
+ mq_fdclose(td, fd, fp);
FILEDESC_XUNLOCK(fdp);
error = closef(fp, td);
@@ -1224,9 +1181,7 @@ kern_close(td, fd)
FILEDESC_XUNLOCK(fdp);
return (EBADF);
}
- fdp->fd_ofiles[fd] = NULL;
- fdp->fd_ofileflags[fd] = 0;
- fdunused(fdp, fd);
+ fdfree(fdp, fd);
/* closefp() drops the FILEDESC lock for us. */
return (closefp(fdp, fd, fp, td, 1));
@@ -1258,7 +1213,7 @@ sys_closefrom(struct thread *td, struct closefrom_args *uap)
uap->lowfd = 0;
FILEDESC_SLOCK(fdp);
for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
- if (fdp->fd_ofiles[fd] != NULL) {
+ if (fdp->fd_ofiles[fd].fde_file != NULL) {
FILEDESC_SUNLOCK(fdp);
(void)kern_close(td, fd);
FILEDESC_SLOCK(fdp);
@@ -1410,6 +1365,91 @@ out:
}
/*
+ * Initialize filecaps structure.
+ */
+void
+filecaps_init(struct filecaps *fcaps)
+{
+
+ bzero(fcaps, sizeof(*fcaps));
+ fcaps->fc_nioctls = -1;
+}
+
+/*
+ * Copy filecaps structure allocating memory for ioctls array if needed.
+ */
+void
+filecaps_copy(const struct filecaps *src, struct filecaps *dst)
+{
+ size_t size;
+
+ *dst = *src;
+ if (src->fc_ioctls != NULL) {
+ KASSERT(src->fc_nioctls > 0,
+ ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls));
+
+ size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls;
+ dst->fc_ioctls = malloc(size, M_TEMP, M_WAITOK);
+ bcopy(src->fc_ioctls, dst->fc_ioctls, size);
+ }
+}
+
+/*
+ * Move filecaps structure to the new place and clear the old place.
+ */
+static void
+filecaps_move(struct filecaps *src, struct filecaps *dst)
+{
+
+ *dst = *src;
+ bzero(src, sizeof(*src));
+}
+
+/*
+ * Fill the given filecaps structure with full rights.
+ */
+static void
+filecaps_fill(struct filecaps *fcaps)
+{
+
+ fcaps->fc_rights = CAP_ALL;
+ fcaps->fc_ioctls = NULL;
+ fcaps->fc_nioctls = -1;
+ fcaps->fc_fcntls = CAP_FCNTL_ALL;
+}
+
+/*
+ * Free memory allocated within filecaps structure.
+ */
+void
+filecaps_free(struct filecaps *fcaps)
+{
+
+ free(fcaps->fc_ioctls, M_TEMP);
+ bzero(fcaps, sizeof(*fcaps));
+}
+
+/*
+ * Validate the given filecaps structure.
+ */
+static void
+filecaps_validate(const struct filecaps *fcaps, const char *func)
+{
+
+ KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0,
+ ("%s: invalid rights", func));
+ KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0,
+ ("%s: invalid fcntls", func));
+ KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0,
+ ("%s: fcntls without CAP_FCNTL", func));
+ KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 :
+ (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0),
+ ("%s: invalid ioctls", func));
+ KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0,
+ ("%s: ioctls without CAP_IOCTL", func));
+}
+
+/*
* Grow the file table to accomodate (at least) nfd descriptors.
*/
static void
@@ -1417,9 +1457,8 @@ fdgrowtable(struct filedesc *fdp, int nfd)
{
struct filedesc0 *fdp0;
struct freetable *ft;
- struct file **ntable;
- struct file **otable;
- char *nfileflags, *ofileflags;
+ struct filedescent *ntable;
+ struct filedescent *otable;
int nnfiles, onfiles;
NDSLOTTYPE *nmap, *omap;
@@ -1430,7 +1469,6 @@ fdgrowtable(struct filedesc *fdp, int nfd)
/* save old values */
onfiles = fdp->fd_nfiles;
otable = fdp->fd_ofiles;
- ofileflags = fdp->fd_ofileflags;
omap = fdp->fd_map;
/* compute the size of the new table */
@@ -1440,27 +1478,25 @@ fdgrowtable(struct filedesc *fdp, int nfd)
return;
/*
- * Allocate a new table and map. We need enough space for a) the
- * file entries themselves, b) the file flags, and c) the struct
- * freetable we will use when we decommission the table and place
- * it on the freelist. We place the struct freetable in the
- * middle so we don't have to worry about padding.
+ * Allocate a new table and map. We need enough space for the
+ * file entries themselves and the struct freetable we will use
+ * when we decommission the table and place it on the freelist.
+ * We place the struct freetable in the middle so we don't have
+ * to worry about padding.
*/
- ntable = malloc(nnfiles * sizeof(*ntable) + sizeof(struct freetable) +
- nnfiles * sizeof(*nfileflags), M_FILEDESC, M_ZERO | M_WAITOK);
- nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable);
+ ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable),
+ M_FILEDESC, M_ZERO | M_WAITOK);
nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC,
M_ZERO | M_WAITOK);
/* copy the old data over and point at the new tables */
memcpy(ntable, otable, onfiles * sizeof(*otable));
- memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags));
memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap));
/* update the pointers and counters */
fdp->fd_nfiles = nnfiles;
+ memcpy(ntable, otable, onfiles * sizeof(ntable[0]));
fdp->fd_ofiles = ntable;
- fdp->fd_ofileflags = nfileflags;
fdp->fd_map = nmap;
/*
@@ -1536,8 +1572,9 @@ fdalloc(struct thread *td, int minfd, int *result)
("invalid descriptor %d", fd));
KASSERT(!fdisused(fdp, fd),
("fd_first_free() returned non-free descriptor"));
- KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free"));
- KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set"));
+ KASSERT(fdp->fd_ofiles[fd].fde_file == NULL,
+ ("file descriptor isn't free"));
+ KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set"));
fdused(fdp, fd);
*result = fd;
return (0);
@@ -1568,7 +1605,7 @@ fdavail(struct thread *td, int n)
return (1);
last = min(fdp->fd_nfiles, lim);
for (i = fdp->fd_freefile; i < last; i++) {
- if (fdp->fd_ofiles[i] == NULL && --n <= 0)
+ if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0)
return (1);
}
return (0);
@@ -1591,7 +1628,7 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags)
if (error)
return (error); /* no reference held on error */
- error = finstall(td, fp, &fd, flags);
+ error = finstall(td, fp, &fd, flags, NULL);
if (error) {
fdrop(fp, td); /* one reference (fp only) */
return (error);
@@ -1645,13 +1682,17 @@ falloc_noinstall(struct thread *td, struct file **resultfp)
* Install a file in a file descriptor table.
*/
int
-finstall(struct thread *td, struct file *fp, int *fd, int flags)
+finstall(struct thread *td, struct file *fp, int *fd, int flags,
+ struct filecaps *fcaps)
{
struct filedesc *fdp = td->td_proc->p_fd;
+ struct filedescent *fde;
int error;
KASSERT(fd != NULL, ("%s: fd == NULL", __func__));
KASSERT(fp != NULL, ("%s: fp == NULL", __func__));
+ if (fcaps != NULL)
+ filecaps_validate(fcaps, __func__);
FILEDESC_XLOCK(fdp);
if ((error = fdalloc(td, 0, fd))) {
@@ -1659,9 +1700,14 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags)
return (error);
}
fhold(fp);
- fdp->fd_ofiles[*fd] = fp;
+ fde = &fdp->fd_ofiles[*fd];
+ fde->fde_file = fp;
if ((flags & O_CLOEXEC) != 0)
- fdp->fd_ofileflags[*fd] |= UF_EXCLOSE;
+ fde->fde_flags |= UF_EXCLOSE;
+ if (fcaps != NULL)
+ filecaps_move(fcaps, &fde->fde_caps);
+ else
+ filecaps_fill(&fde->fde_caps);
FILEDESC_XUNLOCK(fdp);
return (0);
}
@@ -1696,7 +1742,6 @@ fdinit(struct filedesc *fdp)
newfdp->fd_fd.fd_holdcnt = 1;
newfdp->fd_fd.fd_cmask = CMASK;
newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
- newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
newfdp->fd_fd.fd_nfiles = NDFILE;
newfdp->fd_fd.fd_map = newfdp->fd_dmap;
newfdp->fd_fd.fd_lastfile = -1;
@@ -1764,7 +1809,7 @@ fdunshare(struct proc *p, struct thread *td)
FILEDESC_XUNLOCK(p->p_fd);
tmp = fdcopy(p->p_fd);
- fdfree(td);
+ fdescfree(td);
p->p_fd = tmp;
} else
FILEDESC_XUNLOCK(p->p_fd);
@@ -1778,6 +1823,7 @@ struct filedesc *
fdcopy(struct filedesc *fdp)
{
struct filedesc *newfdp;
+ struct filedescent *nfde, *ofde;
int i;
/* Certain daemons might not have file descriptors. */
@@ -1796,12 +1842,14 @@ fdcopy(struct filedesc *fdp)
/* copy all passable descriptors (i.e. not kqueue) */
newfdp->fd_freefile = -1;
for (i = 0; i <= fdp->fd_lastfile; ++i) {
+ ofde = &fdp->fd_ofiles[i];
if (fdisused(fdp, i) &&
- (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) &&
- fdp->fd_ofiles[i]->f_ops != &badfileops) {
- newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
- newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
- fhold(newfdp->fd_ofiles[i]);
+ (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) &&
+ ofde->fde_file->f_ops != &badfileops) {
+ nfde = &newfdp->fd_ofiles[i];
+ *nfde = *ofde;
+ filecaps_copy(&ofde->fde_caps, &nfde->fde_caps);
+ fhold(nfde->fde_file);
newfdp->fd_lastfile = i;
} else {
if (newfdp->fd_freefile == -1)
@@ -1811,9 +1859,10 @@ fdcopy(struct filedesc *fdp)
newfdp->fd_cmask = fdp->fd_cmask;
FILEDESC_SUNLOCK(fdp);
FILEDESC_XLOCK(newfdp);
- for (i = 0; i <= newfdp->fd_lastfile; ++i)
- if (newfdp->fd_ofiles[i] != NULL)
+ for (i = 0; i <= newfdp->fd_lastfile; ++i) {
+ if (newfdp->fd_ofiles[i].fde_file != NULL)
fdused(newfdp, i);
+ }
if (newfdp->fd_freefile == -1)
newfdp->fd_freefile = i;
FILEDESC_XUNLOCK(newfdp);
@@ -1824,7 +1873,7 @@ fdcopy(struct filedesc *fdp)
* Release a filedesc structure.
*/
void
-fdfree(struct thread *td)
+fdescfree(struct thread *td)
{
struct filedesc *fdp;
int i;
@@ -1849,12 +1898,12 @@ fdfree(struct thread *td)
if (fdtol != NULL) {
FILEDESC_XLOCK(fdp);
KASSERT(fdtol->fdl_refcount > 0,
- ("filedesc_to_refcount botch: fdl_refcount=%d",
- fdtol->fdl_refcount));
+ ("filedesc_to_refcount botch: fdl_refcount=%d",
+ fdtol->fdl_refcount));
if (fdtol->fdl_refcount == 1 &&
(td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
for (i = 0; i <= fdp->fd_lastfile; i++) {
- fp = fdp->fd_ofiles[i];
+ fp = fdp->fd_ofiles[i].fde_file;
if (fp == NULL || fp->f_type != DTYPE_VNODE)
continue;
fhold(fp);
@@ -1914,10 +1963,10 @@ fdfree(struct thread *td)
return;
for (i = 0; i <= fdp->fd_lastfile; i++) {
- fp = fdp->fd_ofiles[i];
+ fp = fdp->fd_ofiles[i].fde_file;
if (fp != NULL) {
FILEDESC_XLOCK(fdp);
- fdp->fd_ofiles[i] = NULL;
+ fdfree(fdp, i);
FILEDESC_XUNLOCK(fdp);
(void) closef(fp, td);
}
@@ -1982,6 +2031,7 @@ void
setugidsafety(struct thread *td)
{
struct filedesc *fdp;
+ struct file *fp;
int i;
/* Certain daemons might not have file descriptors. */
@@ -1997,18 +2047,14 @@ setugidsafety(struct thread *td)
for (i = 0; i <= fdp->fd_lastfile; i++) {
if (i > 2)
break;
- if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
- struct file *fp;
-
+ fp = fdp->fd_ofiles[i].fde_file;
+ if (fp != NULL && is_unsafe(fp)) {
knote_fdclose(td, i);
/*
* NULL-out descriptor prior to close to avoid
* a race while close blocks.
*/
- fp = fdp->fd_ofiles[i];
- fdp->fd_ofiles[i] = NULL;
- fdp->fd_ofileflags[i] = 0;
- fdunused(fdp, i);
+ fdfree(fdp, i);
FILEDESC_XUNLOCK(fdp);
(void) closef(fp, td);
FILEDESC_XLOCK(fdp);
@@ -2029,9 +2075,8 @@ fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
{
FILEDESC_XLOCK(fdp);
- if (fdp->fd_ofiles[idx] == fp) {
- fdp->fd_ofiles[idx] = NULL;
- fdunused(fdp, idx);
+ if (fdp->fd_ofiles[idx].fde_file == fp) {
+ fdfree(fdp, idx);
FILEDESC_XUNLOCK(fdp);
fdrop(fp, td);
} else
@@ -2045,6 +2090,7 @@ void
fdcloseexec(struct thread *td)
{
struct filedesc *fdp;
+ struct filedescent *fde;
struct file *fp;
int i;
@@ -2054,17 +2100,16 @@ fdcloseexec(struct thread *td)
return;
/*
- * We cannot cache fd_ofiles or fd_ofileflags since operations
+ * We cannot cache fd_ofiles since operations
* may block and rip them out from under us.
*/
FILEDESC_XLOCK(fdp);
for (i = 0; i <= fdp->fd_lastfile; i++) {
- fp = fdp->fd_ofiles[i];
+ fde = &fdp->fd_ofiles[i];
+ fp = fde->fde_file;
if (fp != NULL && (fp->f_type == DTYPE_MQUEUE ||
- (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
- fdp->fd_ofiles[i] = NULL;
- fdp->fd_ofileflags[i] = 0;
- fdunused(fdp, i);
+ (fde->fde_flags & UF_EXCLOSE))) {
+ fdfree(fdp, i);
(void) closefp(fdp, i, fp, td, 0);
/* closefp() drops the FILEDESC lock. */
FILEDESC_XLOCK(fdp);
@@ -2094,7 +2139,7 @@ fdcheckstd(struct thread *td)
devnull = -1;
error = 0;
for (i = 0; i < 3; i++) {
- if (fdp->fd_ofiles[i] != NULL)
+ if (fdp->fd_ofiles[i].fde_file != NULL)
continue;
if (devnull < 0) {
save = td->td_retval[0];
@@ -2129,7 +2174,6 @@ closef(struct file *fp, struct thread *td)
struct flock lf;
struct filedesc_to_leader *fdtol;
struct filedesc *fdp;
- struct file *fp_object;
/*
* POSIX record locking dictates that any close releases ALL
@@ -2142,13 +2186,9 @@ closef(struct file *fp, struct thread *td)
* NULL thread pointer when there really is no owning
* context that might have locks, or the locks will be
* leaked.
- *
- * If this is a capability, we do lock processing under the underlying
- * node, not the capability itself.
*/
- (void)cap_funwrap(fp, 0, &fp_object);
- if (fp_object->f_type == DTYPE_VNODE && td != NULL) {
- vp = fp_object->f_vnode;
+ if (fp->f_type == DTYPE_VNODE && td != NULL) {
+ vp = fp->f_vnode;
if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
@@ -2177,7 +2217,7 @@ closef(struct file *fp, struct thread *td)
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
- vp = fp_object->f_vnode;
+ vp = fp->f_vnode;
(void) VOP_ADVLOCK(vp,
(caddr_t)fdtol->fdl_leader, F_UNLCK, &lf,
F_POSIX);
@@ -2211,14 +2251,19 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
}
-struct file *
-fget_unlocked(struct filedesc *fdp, int fd)
+int
+fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights,
+ int needfcntl, struct file **fpp, cap_rights_t *haverightsp)
{
struct file *fp;
u_int count;
+#ifdef CAPABILITIES
+ cap_rights_t haverights;
+ int error;
+#endif
if (fd < 0 || fd >= fdp->fd_nfiles)
- return (NULL);
+ return (EBADF);
/*
* Fetch the descriptor locklessly. We avoid fdrop() races by
* never raising a refcount above 0. To accomplish this we have
@@ -2228,9 +2273,20 @@ fget_unlocked(struct filedesc *fdp, int fd)
* due to preemption.
*/
for (;;) {
- fp = fdp->fd_ofiles[fd];
+ fp = fdp->fd_ofiles[fd].fde_file;
if (fp == NULL)
- break;
+ return (EBADF);
+#ifdef CAPABILITIES
+ haverights = cap_rights(fdp, fd);
+ error = cap_check(haverights, needrights);
+ if (error != 0)
+ return (error);
+ if ((needrights & CAP_FCNTL) != 0) {
+ error = cap_fcntl_check(fdp, fd, needfcntl);
+ if (error != 0)
+ return (error);
+ }
+#endif
count = fp->f_count;
if (count == 0)
continue;
@@ -2240,12 +2296,19 @@ fget_unlocked(struct filedesc *fdp, int fd)
*/
if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
continue;
- if (fp == fdp->fd_ofiles[fd])
+ if (fp == fdp->fd_ofiles[fd].fde_file)
break;
fdrop(fp, curthread);
}
-
- return (fp);
+ *fpp = fp;
+ if (haverightsp != NULL) {
+#ifdef CAPABILITIES
+ *haverightsp = haverights;
+#else
+ *haverightsp = CAP_ALL;
+#endif
+ }
+ return (0);
}
/*
@@ -2255,33 +2318,29 @@ fget_unlocked(struct filedesc *fdp, int fd)
* If the descriptor doesn't exist or doesn't match 'flags', EBADF is
* returned.
*
- * If the FGET_GETCAP flag is set, the capability itself will be returned.
- * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL.
- * Otherwise, if the file is a capability, its rights will be checked against
- * the capability rights mask, and if successful, the object will be unwrapped.
+ * File's rights will be checked against the capability rights mask.
*
* If an error occured the non-zero error is returned and *fpp is set to
* NULL. Otherwise *fpp is held and set and zero is returned. Caller is
* responsible for fdrop().
*/
-#define FGET_GETCAP 0x00000001
static __inline int
_fget(struct thread *td, int fd, struct file **fpp, int flags,
- cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp,
- int fget_flags)
+ cap_rights_t needrights, u_char *maxprotp)
{
struct filedesc *fdp;
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
-#endif
+ cap_rights_t haverights;
int error;
*fpp = NULL;
if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
return (EBADF);
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
+ if (maxprotp != NULL)
+ needrights |= CAP_MMAP;
+ error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights);
+ if (error != 0)
+ return (error);
if (fp->f_ops == &badfileops) {
fdrop(fp, td);
return (EBADF);
@@ -2289,50 +2348,11 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags,
#ifdef CAPABILITIES
/*
- * If this is a capability, what rights does it have?
+ * If requested, convert capability rights to access flags.
*/
- if (haverightsp != NULL) {
- if (fp->f_type == DTYPE_CAPABILITY)
- *haverightsp = cap_rights(fp);
- else
- *haverightsp = CAP_MASK_VALID;
- }
-
- /*
- * If a capability has been requested, return the capability directly.
- * Otherwise, check capability rights, extract the underlying object,
- * and check its access flags.
- */
- if (fget_flags & FGET_GETCAP) {
- if (fp->f_type != DTYPE_CAPABILITY) {
- fdrop(fp, td);
- return (EINVAL);
- }
- } else {
- if (maxprotp == NULL)
- error = cap_funwrap(fp, needrights, &fp_fromcap);
- else
- error = cap_funwrap_mmap(fp, needrights, maxprotp,
- &fp_fromcap);
- if (error != 0) {
- fdrop(fp, td);
- return (error);
- }
-
- /*
- * If we've unwrapped a file, drop the original capability
- * and hold the new descriptor. fp after this point refers to
- * the actual (unwrapped) object, not the capability.
- */
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, td);
- fp = fp_fromcap;
- }
- }
+ if (maxprotp != NULL)
+ *maxprotp = cap_rights_to_vmprot(haverights);
#else /* !CAPABILITIES */
- KASSERT(fp->f_type != DTYPE_CAPABILITY,
- ("%s: saw capability", __func__));
if (maxprotp != NULL)
*maxprotp = VM_PROT_ALL;
#endif /* CAPABILITIES */
@@ -2371,7 +2391,7 @@ int
fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp)
{
- return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0));
+ return(_fget(td, fd, fpp, 0, rights, NULL));
}
int
@@ -2379,37 +2399,24 @@ fget_mmap(struct thread *td, int fd, cap_rights_t rights, u_char *maxprotp,
struct file **fpp)
{
- return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0));
+ return (_fget(td, fd, fpp, 0, rights, maxprotp));
}
int
fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp)
{
- return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0));
+ return(_fget(td, fd, fpp, FREAD, rights, NULL));
}
int
fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp)
{
- return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0));
+ return (_fget(td, fd, fpp, FWRITE, rights, NULL));
}
/*
- * Unlike the other fget() calls, which accept and check capability rights
- * but never return capabilities, fgetcap() returns the capability but doesn't
- * check capability rights.
- */
-int
-fgetcap(struct thread *td, int fd, struct file **fpp)
-{
-
- return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP));
-}
-
-
-/*
* Like fget() but loads the underlying vnode, or returns an error if the
* descriptor does not represent a vnode. Note that pipes use vnodes but
* never have VM objects. The returned vnode will be vref()'d.
@@ -2418,14 +2425,14 @@ fgetcap(struct thread *td, int fd, struct file **fpp)
*/
static __inline int
_fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights,
- cap_rights_t *haverightsp, struct vnode **vpp)
+ struct vnode **vpp)
{
struct file *fp;
int error;
*vpp = NULL;
- if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp,
- NULL, 0)) != 0)
+ error = _fget(td, fd, &fp, flags, needrights, NULL);
+ if (error)
return (error);
if (fp->f_vnode == NULL) {
error = EINVAL;
@@ -2442,28 +2449,54 @@ int
fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp)
{
- return (_fgetvp(td, fd, 0, rights, NULL, vpp));
+ return (_fgetvp(td, fd, 0, rights, vpp));
}
int
-fgetvp_rights(struct thread *td, int fd, cap_rights_t need, cap_rights_t *have,
- struct vnode **vpp)
+fgetvp_rights(struct thread *td, int fd, cap_rights_t need,
+ struct filecaps *havecaps, struct vnode **vpp)
{
- return (_fgetvp(td, fd, 0, need, have, vpp));
+ struct filedesc *fdp;
+ struct file *fp;
+#ifdef CAPABILITIES
+ int error;
+#endif
+
+ if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
+ return (EBADF);
+
+ fp = fget_locked(fdp, fd);
+ if (fp == NULL || fp->f_ops == &badfileops)
+ return (EBADF);
+
+#ifdef CAPABILITIES
+ error = cap_check(cap_rights(fdp, fd), need);
+ if (error != 0)
+ return (error);
+#endif
+
+ if (fp->f_vnode == NULL)
+ return (EINVAL);
+
+ *vpp = fp->f_vnode;
+ vref(*vpp);
+ filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps);
+
+ return (0);
}
int
fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp)
{
- return (_fgetvp(td, fd, FREAD, rights, NULL, vpp));
+ return (_fgetvp(td, fd, FREAD, rights, vpp));
}
int
fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp)
{
- return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp));
+ return (_fgetvp(td, fd, FEXEC, rights, vpp));
}
#ifdef notyet
@@ -2472,7 +2505,7 @@ fgetvp_write(struct thread *td, int fd, cap_rights_t rights,
struct vnode **vpp)
{
- return (_fgetvp(td, fd, FWRITE, rights, NULL, vpp));
+ return (_fgetvp(td, fd, FWRITE, rights, vpp));
}
#endif
@@ -2497,7 +2530,7 @@ fgetsock(struct thread *td, int fd, cap_rights_t rights, struct socket **spp,
*spp = NULL;
if (fflagp != NULL)
*fflagp = 0;
- if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0)
+ if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0)
return (error);
if (fp->f_type != DTYPE_SOCKET) {
error = ENOTSOCK;
@@ -2533,9 +2566,6 @@ fputsock(struct socket *so)
/*
* Handle the last reference to a file being closed.
- *
- * No special capability handling here, as the capability's fo_close will run
- * instead of the object here, and perform any necessary drop on the object.
*/
int
_fdrop(struct file *fp, struct thread *td)
@@ -2612,7 +2642,8 @@ done2:
* Duplicate the specified descriptor to a free descriptor.
*/
int
-dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp)
+dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
+ int openerror, int *indxp)
{
struct file *fp;
int error, indx;
@@ -2656,18 +2687,17 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int opener
FILEDESC_XUNLOCK(fdp);
return (EACCES);
}
- fdp->fd_ofiles[indx] = fp;
- fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
fhold(fp);
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps,
+ &fdp->fd_ofiles[indx].fde_caps);
break;
case ENXIO:
/*
* Steal away the file pointer from dfd and stuff it into indx.
*/
- fdp->fd_ofiles[indx] = fp;
- fdp->fd_ofiles[dfd] = NULL;
- fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
- fdp->fd_ofileflags[dfd] = 0;
+ fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
+ bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd]));
fdunused(fdp, dfd);
break;
}
@@ -2823,7 +2853,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
continue;
FILEDESC_SLOCK(fdp);
for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
- if ((fp = fdp->fd_ofiles[n]) == NULL)
+ if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
continue;
xf.xf_fd = n;
xf.xf_file = fp;
@@ -2935,7 +2965,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
fdp, req);
for (i = 0; i < fdp->fd_nfiles; i++) {
- if ((fp = fdp->fd_ofiles[i]) == NULL)
+ if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
continue;
bzero(kif, sizeof(*kif));
kif->kf_structsize = sizeof(*kif);
@@ -2945,21 +2975,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
shmfd = NULL;
kif->kf_fd = i;
-#ifdef CAPABILITIES
- /*
- * When reporting a capability, most fields will be from the
- * underlying object, but do mark as a capability. With
- * ofiledesc, we don't have a field to export the cap_rights_t,
- * but we do with the new filedesc.
- */
- if (fp->f_type == DTYPE_CAPABILITY) {
- kif->kf_flags |= KF_FLAG_CAPABILITY;
- (void)cap_funwrap(fp, 0, &fp);
- }
-#else
- KASSERT(fp->f_type != DTYPE_CAPABILITY,
- ("sysctl_kern_proc_ofiledesc: saw capability"));
-#endif
switch (fp->f_type) {
case DTYPE_VNODE:
kif->kf_type = KF_TYPE_VNODE;
@@ -3128,8 +3143,8 @@ CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
static int
export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt,
- int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights,
- struct kinfo_file *kif, struct sysctl_req *req)
+ int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif,
+ struct sysctl_req *req)
{
struct {
int fflag;
@@ -3191,10 +3206,7 @@ export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt,
for (i = 0; i < NFFLAGS; i++)
if (fflags & fflags_table[i].fflag)
kif->kf_flags |= fflags_table[i].kf_fflag;
- if (fd_is_cap)
- kif->kf_flags |= KF_FLAG_CAPABILITY;
- if (fd_is_cap)
- kif->kf_cap_rights = fd_cap_rights;
+ kif->kf_cap_rights = fd_cap_rights;
kif->kf_fd = fd;
kif->kf_type = type;
kif->kf_ref_count = refcnt;
@@ -3222,7 +3234,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
int64_t offset;
void *data;
int error, i, *name;
- int fd_is_cap, type, refcnt, fflags;
+ int type, refcnt, fflags;
cap_rights_t fd_cap_rights;
name = (int *)arg1;
@@ -3252,13 +3264,13 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
if (tracevp != NULL)
export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE,
- FREAD | FWRITE, -1, -1, 0, 0, kif, req);
+ FREAD | FWRITE, -1, -1, 0, kif, req);
if (textvp != NULL)
export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT,
- FREAD, -1, -1, 0, 0, kif, req);
+ FREAD, -1, -1, 0, kif, req);
if (cttyvp != NULL)
export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY,
- FREAD | FWRITE, -1, -1, 0, 0, kif, req);
+ FREAD | FWRITE, -1, -1, 0, kif, req);
if (fdp == NULL)
goto fail;
FILEDESC_SLOCK(fdp);
@@ -3268,7 +3280,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
data = fdp->fd_cdir;
FILEDESC_SUNLOCK(fdp);
export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD,
- FREAD, -1, -1, 0, 0, kif, req);
+ FREAD, -1, -1, 0, kif, req);
FILEDESC_SLOCK(fdp);
}
/* root directory */
@@ -3277,7 +3289,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
data = fdp->fd_rdir;
FILEDESC_SUNLOCK(fdp);
export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT,
- FREAD, -1, -1, 0, 0, kif, req);
+ FREAD, -1, -1, 0, kif, req);
FILEDESC_SLOCK(fdp);
}
/* jail directory */
@@ -3286,30 +3298,17 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
data = fdp->fd_jdir;
FILEDESC_SUNLOCK(fdp);
export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL,
- FREAD, -1, -1, 0, 0, kif, req);
+ FREAD, -1, -1, 0, kif, req);
FILEDESC_SLOCK(fdp);
}
for (i = 0; i < fdp->fd_nfiles; i++) {
- if ((fp = fdp->fd_ofiles[i]) == NULL)
+ if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
continue;
data = NULL;
- fd_is_cap = 0;
- fd_cap_rights = 0;
-
#ifdef CAPABILITIES
- /*
- * When reporting a capability, most fields will be from the
- * underlying object, but do mark as a capability and export
- * the capability rights mask.
- */
- if (fp->f_type == DTYPE_CAPABILITY) {
- fd_is_cap = 1;
- fd_cap_rights = cap_rights(fp);
- (void)cap_funwrap(fp, 0, &fp);
- }
+ fd_cap_rights = cap_rights(fdp, i);
#else /* !CAPABILITIES */
- KASSERT(fp->f_type != DTYPE_CAPABILITY,
- ("sysctl_kern_proc_filedesc: saw capability"));
+ fd_cap_rights = 0;
#endif
switch (fp->f_type) {
case DTYPE_VNODE:
@@ -3385,7 +3384,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO)
FILEDESC_SUNLOCK(fdp);
error = export_fd_for_sysctl(data, type, i, fflags, refcnt,
- offset, fd_is_cap, fd_cap_rights, kif, req);
+ offset, fd_cap_rights, kif, req);
if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO)
FILEDESC_SLOCK(fdp);
if (error) {
@@ -3644,7 +3643,7 @@ file_to_first_proc(struct file *fp)
if (fdp == NULL)
continue;
for (n = 0; n < fdp->fd_nfiles; n++) {
- if (fp == fdp->fd_ofiles[n])
+ if (fp == fdp->fd_ofiles[n].fde_file)
return (p);
}
}
@@ -3694,7 +3693,7 @@ DB_SHOW_COMMAND(files, db_show_files)
if ((fdp = p->p_fd) == NULL)
continue;
for (n = 0; n < fdp->fd_nfiles; ++n) {
- if ((fp = fdp->fd_ofiles[n]) == NULL)
+ if ((fp = fdp->fd_ofiles[n].fde_file) == NULL)
continue;
db_print_file(fp, header);
header = 0;
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 965ce31..7c0d2d6 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -438,9 +438,6 @@ interpret:
} else {
AUDIT_ARG_FD(args->fd);
/*
- * Some might argue that CAP_READ and/or CAP_MMAP should also
- * be required here; such arguments will be entertained.
- *
* Descriptors opened only with O_EXEC or O_RDONLY are allowed.
*/
error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 82f0344..5bd2daa 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -297,7 +297,7 @@ exit1(struct thread *td, int rv)
* Close open files and release open-file table.
* This may block!
*/
- fdfree(td);
+ fdescfree(td);
/*
* If this thread tickled GEOM, we need to wait for the giggling to
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 287d202..b5a4934 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -342,7 +342,7 @@ fork_norfproc(struct thread *td, int flags)
if (flags & RFCFDG) {
struct filedesc *fdtmp;
fdtmp = fdinit(td->td_proc->p_fd);
- fdfree(td);
+ fdescfree(td);
p1->p_fd = fdtmp;
}
diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c
index 6fb4fee..ba168e9 100644
--- a/sys/kern/sys_capability.c
+++ b/sys/kern/sys_capability.c
@@ -1,11 +1,15 @@
/*-
* Copyright (c) 2008-2011 Robert N. M. Watson
* Copyright (c) 2010-2011 Jonathan Anderson
+ * Copyright (c) 2012 FreeBSD Foundation
* All rights reserved.
*
* This software was developed at the University of Cambridge Computer
* Laboratory with support from a grant from Google, Inc.
*
+ * Portions of this software were developed by Pawel Jakub Dawidek under
+ * sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -62,6 +66,7 @@ __FBSDID("$FreeBSD$");
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
@@ -139,90 +144,48 @@ sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap)
FEATURE(security_capabilities, "Capsicum Capabilities");
-/*
- * struct capability describes a capability, and is hung off of its struct
- * file f_data field. cap_file and cap_rightss are static once hooked up, as
- * neither the object it references nor the rights it encapsulates are
- * permitted to change.
- */
-struct capability {
- struct file *cap_object; /* Underlying object's file. */
- struct file *cap_file; /* Back-pointer to cap's file. */
- cap_rights_t cap_rights; /* Mask of rights on object. */
-};
+static inline int
+_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type)
+{
+
+
+ if ((need & ~have) != 0) {
+#ifdef KTRACE
+ if (KTRPOINT(curthread, KTR_CAPFAIL))
+ ktrcapfail(type, need, have);
+#endif
+ return (ENOTCAPABLE);
+ }
+ return (0);
+}
/*
- * Capabilities have a fileops vector, but in practice none should ever be
- * called except for fo_close, as the capability will normally not be
- * returned during a file descriptor lookup in the system call code.
+ * Test whether a capability grants the requested rights.
*/
-static fo_rdwr_t capability_read;
-static fo_rdwr_t capability_write;
-static fo_truncate_t capability_truncate;
-static fo_ioctl_t capability_ioctl;
-static fo_poll_t capability_poll;
-static fo_kqfilter_t capability_kqfilter;
-static fo_stat_t capability_stat;
-static fo_close_t capability_close;
-static fo_chmod_t capability_chmod;
-static fo_chown_t capability_chown;
-
-static struct fileops capability_ops = {
- .fo_read = capability_read,
- .fo_write = capability_write,
- .fo_truncate = capability_truncate,
- .fo_ioctl = capability_ioctl,
- .fo_poll = capability_poll,
- .fo_kqfilter = capability_kqfilter,
- .fo_stat = capability_stat,
- .fo_close = capability_close,
- .fo_chmod = capability_chmod,
- .fo_chown = capability_chown,
- .fo_flags = DFLAG_PASSABLE,
-};
-
-static struct fileops capability_ops_unpassable = {
- .fo_read = capability_read,
- .fo_write = capability_write,
- .fo_truncate = capability_truncate,
- .fo_ioctl = capability_ioctl,
- .fo_poll = capability_poll,
- .fo_kqfilter = capability_kqfilter,
- .fo_stat = capability_stat,
- .fo_close = capability_close,
- .fo_chmod = capability_chmod,
- .fo_chown = capability_chown,
- .fo_flags = 0,
-};
-
-static uma_zone_t capability_zone;
-
-static void
-capability_init(void *dummy __unused)
+int
+cap_check(cap_rights_t have, cap_rights_t need)
{
- capability_zone = uma_zcreate("capability", sizeof(struct capability),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- if (capability_zone == NULL)
- panic("capability_init: capability_zone not initialized");
+ return (_cap_check(have, need, CAPFAIL_NOTCAPABLE));
}
-SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL);
/*
- * Test whether a capability grants the requested rights.
+ * Convert capability rights into VM access flags.
*/
-static int
-cap_check(struct capability *c, cap_rights_t rights)
+u_char
+cap_rights_to_vmprot(cap_rights_t have)
{
+ u_char maxprot;
- if ((c->cap_rights | rights) != c->cap_rights) {
-#ifdef KTRACE
- if (KTRPOINT(curthread, KTR_CAPFAIL))
- ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights);
-#endif
- return (ENOTCAPABLE);
- }
- return (0);
+ maxprot = VM_PROT_NONE;
+ if (have & CAP_MMAP_R)
+ maxprot |= VM_PROT_READ;
+ if (have & CAP_MMAP_W)
+ maxprot |= VM_PROT_WRITE;
+ if (have & CAP_MMAP_X)
+ maxprot |= VM_PROT_EXECUTE;
+
+ return (maxprot);
}
/*
@@ -231,43 +194,49 @@ cap_check(struct capability *c, cap_rights_t rights)
* this one file.
*/
cap_rights_t
-cap_rights(struct file *fp_cap)
+cap_rights(struct filedesc *fdp, int fd)
{
- struct capability *c;
-
- KASSERT(fp_cap->f_type == DTYPE_CAPABILITY,
- ("cap_rights: !capability"));
- c = fp_cap->f_data;
- return (c->cap_rights);
+ return (fdp->fd_ofiles[fd].fde_rights);
}
/*
- * System call to create a new capability reference to either an existing
- * file object or an an existing capability.
+ * System call to limit rights of the given capability.
*/
int
-sys_cap_new(struct thread *td, struct cap_new_args *uap)
+sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
- int error, capfd;
- int fd = uap->fd;
- struct file *fp;
- cap_rights_t rights = uap->rights;
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ int error, fd;
+
+ fd = uap->fd;
+ rights = uap->rights;
AUDIT_ARG_FD(fd);
AUDIT_ARG_RIGHTS(rights);
- error = fget(td, fd, rights, &fp);
- if (error)
- return (error);
- AUDIT_ARG_FILE(td->td_proc, fp);
- error = kern_capwrap(td, fp, rights, &capfd);
- /*
- * Release our reference to the file (kern_capwrap has held a reference
- * for the filedesc array).
- */
- fdrop(fp, td);
- if (error == 0)
- td->td_retval[0] = capfd;
+
+ if ((rights & ~CAP_ALL) != 0)
+ return (EINVAL);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
+ error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE);
+ if (error == 0) {
+ fdp->fd_ofiles[fd].fde_rights = rights;
+ if ((rights & CAP_IOCTL) == 0) {
+ free(fdp->fd_ofiles[fd].fde_ioctls, M_TEMP);
+ fdp->fd_ofiles[fd].fde_ioctls = NULL;
+ fdp->fd_ofiles[fd].fde_nioctls = 0;
+ }
+ if ((rights & CAP_FCNTL) == 0)
+ fdp->fd_ofiles[fd].fde_fcntls = 0;
+ }
+ FILEDESC_XUNLOCK(fdp);
return (error);
}
@@ -275,247 +244,321 @@ sys_cap_new(struct thread *td, struct cap_new_args *uap)
* System call to query the rights mask associated with a capability.
*/
int
-sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap)
+sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap)
{
- struct capability *cp;
- struct file *fp;
- int error;
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ int fd;
- AUDIT_ARG_FD(uap->fd);
- error = fgetcap(td, uap->fd, &fp);
- if (error)
- return (error);
- cp = fp->f_data;
- error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp));
- fdrop(fp, td);
- return (error);
+ fd = uap->fd;
+
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ rights = cap_rights(fdp, fd);
+ FILEDESC_SUNLOCK(fdp);
+ return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp)));
}
/*
- * Create a capability to wrap around an existing file.
+ * Test whether a capability grants the given ioctl command.
+ * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and
+ * ENOTCAPABLE will be returned.
*/
int
-kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
- int *capfdp)
+cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd)
{
- struct capability *cp, *cp_old;
- struct file *fp_object, *fcapp;
- int error;
-
- if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
- return (EINVAL);
+ u_long *cmds;
+ ssize_t ncmds;
+ long i;
- /*
- * If a new capability is being derived from an existing capability,
- * then the new capability rights must be a subset of the existing
- * rights.
- */
- if (fp->f_type == DTYPE_CAPABILITY) {
- cp_old = fp->f_data;
- if ((cp_old->cap_rights | rights) != cp_old->cap_rights) {
-#ifdef KTRACE
- if (KTRPOINT(curthread, KTR_CAPFAIL))
- ktrcapfail(CAPFAIL_INCREASE,
- rights, cp_old->cap_rights);
-#endif
- return (ENOTCAPABLE);
- }
- }
+ FILEDESC_LOCK_ASSERT(fdp);
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("%s: invalid fd=%d", __func__, fd));
- /*
- * Allocate a new file descriptor to hang the capability off of.
- */
- error = falloc(td, &fcapp, capfdp, fp->f_flag);
- if (error)
- return (error);
+ ncmds = fdp->fd_ofiles[fd].fde_nioctls;
+ if (ncmds == -1)
+ return (0);
- /*
- * Rather than nesting capabilities, directly reference the object an
- * existing capability references. There's nothing else interesting
- * to preserve for future use, as we've incorporated the previous
- * rights mask into the new one. This prevents us from having to
- * deal with capability chains.
- */
- if (fp->f_type == DTYPE_CAPABILITY)
- fp_object = ((struct capability *)fp->f_data)->cap_object;
- else
- fp_object = fp;
- fhold(fp_object);
- cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
- cp->cap_rights = rights;
- cp->cap_object = fp_object;
- cp->cap_file = fcapp;
- if (fp->f_flag & DFLAG_PASSABLE)
- finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
- &capability_ops);
- else
- finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
- &capability_ops_unpassable);
+ cmds = fdp->fd_ofiles[fd].fde_ioctls;
+ for (i = 0; i < ncmds; i++) {
+ if (cmds[i] == cmd)
+ return (0);
+ }
- /*
- * Release our private reference (the proc filedesc still has one).
- */
- fdrop(fcapp, td);
- return (0);
+ return (ENOTCAPABLE);
}
/*
- * Given a file descriptor, test it against a capability rights mask and then
- * return the file descriptor on which to actually perform the requested
- * operation. As long as the reference to fp_cap remains valid, the returned
- * pointer in *fp will remain valid, so no extra reference management is
- * required, and the caller should fdrop() fp_cap as normal when done with
- * both.
+ * Check if the current ioctls list can be replaced by the new one.
*/
-int
-cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp)
+static int
+cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds,
+ size_t ncmds)
{
- struct capability *c;
- int error;
+ u_long *ocmds;
+ ssize_t oncmds;
+ u_long i;
+ long j;
- if (fp_cap->f_type != DTYPE_CAPABILITY) {
- *fpp = fp_cap;
+ oncmds = fdp->fd_ofiles[fd].fde_nioctls;
+ if (oncmds == -1)
return (0);
+ if (oncmds < (ssize_t)ncmds)
+ return (ENOTCAPABLE);
+
+ ocmds = fdp->fd_ofiles[fd].fde_ioctls;
+ for (i = 0; i < ncmds; i++) {
+ for (j = 0; j < oncmds; j++) {
+ if (cmds[i] == ocmds[j])
+ break;
+ }
+ if (j == oncmds)
+ return (ENOTCAPABLE);
}
- c = fp_cap->f_data;
- error = cap_check(c, rights);
- if (error)
- return (error);
- *fpp = c->cap_object;
+
return (0);
}
-/*
- * Slightly different routine for memory mapping file descriptors: unwrap the
- * capability and check CAP_MMAP, but also return a bitmask representing the
- * maximum mapping rights the capability allows on the object.
- */
int
-cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp,
- struct file **fpp)
+sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
{
- struct capability *c;
- u_char maxprot;
- int error;
+ struct filedesc *fdp;
+ u_long *cmds, *ocmds;
+ size_t ncmds;
+ int error, fd;
- if (fp_cap->f_type != DTYPE_CAPABILITY) {
- *fpp = fp_cap;
- *maxprotp = VM_PROT_ALL;
- return (0);
+ fd = uap->fd;
+ ncmds = uap->ncmds;
+
+ AUDIT_ARG_FD(fd);
+
+ if (ncmds > 256) /* XXX: Is 256 sane? */
+ return (EINVAL);
+
+ if (ncmds == 0) {
+ cmds = NULL;
+ } else {
+ cmds = malloc(sizeof(cmds[0]) * ncmds, M_TEMP, M_WAITOK);
+ error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds);
+ if (error != 0) {
+ free(cmds, M_TEMP);
+ return (error);
+ }
}
- c = fp_cap->f_data;
- error = cap_check(c, rights | CAP_MMAP);
- if (error)
- return (error);
- *fpp = c->cap_object;
- maxprot = 0;
- if (c->cap_rights & CAP_READ)
- maxprot |= VM_PROT_READ;
- if (c->cap_rights & CAP_WRITE)
- maxprot |= VM_PROT_WRITE;
- if (c->cap_rights & CAP_MAPEXEC)
- maxprot |= VM_PROT_EXECUTE;
- *maxprotp = maxprot;
- return (0);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
+
+ if (fget_locked(fdp, fd) == NULL) {
+ error = EBADF;
+ goto out;
+ }
+
+ error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds);
+ if (error != 0)
+ goto out;
+
+ ocmds = fdp->fd_ofiles[fd].fde_ioctls;
+ fdp->fd_ofiles[fd].fde_ioctls = cmds;
+ fdp->fd_ofiles[fd].fde_nioctls = ncmds;
+
+ cmds = ocmds;
+ error = 0;
+out:
+ FILEDESC_XUNLOCK(fdp);
+ free(cmds, M_TEMP);
+ return (error);
}
-/*
- * When a capability is closed, simply drop the reference on the underlying
- * object and free the capability. fdrop() will handle the case where the
- * underlying object also needs to close, and the caller will have already
- * performed any object-specific lock or mqueue handling.
- */
-static int
-capability_close(struct file *fp, struct thread *td)
+int
+sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
{
- struct capability *c;
- struct file *fp_object;
-
- KASSERT(fp->f_type == DTYPE_CAPABILITY,
- ("capability_close: !capability"));
-
- c = fp->f_data;
- fp->f_ops = &badfileops;
- fp->f_data = NULL;
- fp_object = c->cap_object;
- uma_zfree(capability_zone, c);
- return (fdrop(fp_object, td));
+ struct filedesc *fdp;
+ struct filedescent *fdep;
+ u_long *cmds;
+ size_t maxcmds;
+ int error, fd;
+
+ fd = uap->fd;
+ cmds = uap->cmds;
+ maxcmds = uap->maxcmds;
+
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+
+ if (fget_locked(fdp, fd) == NULL) {
+ error = EBADF;
+ goto out;
+ }
+
+ /*
+ * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL)
+ * the only sane thing we can do is to not populate the given array and
+ * return CAP_IOCTLS_ALL.
+ */
+
+ fdep = &fdp->fd_ofiles[fd];
+ if (cmds != NULL && fdep->fde_ioctls != NULL) {
+ error = copyout(fdep->fde_ioctls, cmds,
+ sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds));
+ if (error != 0)
+ goto out;
+ }
+ if (fdep->fde_nioctls == -1)
+ td->td_retval[0] = CAP_IOCTLS_ALL;
+ else
+ td->td_retval[0] = fdep->fde_nioctls;
+
+ error = 0;
+out:
+ FILEDESC_SUNLOCK(fdp);
+ return (error);
}
/*
- * In general, file descriptor operations should never make it to the
- * capability, only the underlying file descriptor operation vector, so panic
- * if we do turn up here.
+ * Test whether a capability grants the given fcntl command.
*/
-static int
-capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
+int
+cap_fcntl_check(struct filedesc *fdp, int fd, int cmd)
{
+ uint32_t fcntlcap;
- panic("capability_read");
-}
-
-static int
-capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
- int flags, struct thread *td)
-{
+ KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
+ ("%s: invalid fd=%d", __func__, fd));
- panic("capability_write");
-}
+ fcntlcap = (1 << cmd);
+ KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0,
+ ("Unsupported fcntl=%d.", cmd));
-static int
-capability_truncate(struct file *fp, off_t length, struct ucred *active_cred,
- struct thread *td)
-{
+ if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0)
+ return (0);
- panic("capability_truncate");
+ return (ENOTCAPABLE);
}
-static int
-capability_ioctl(struct file *fp, u_long com, void *data,
- struct ucred *active_cred, struct thread *td)
+int
+sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
{
+ struct filedesc *fdp;
+ uint32_t fcntlrights;
+ int fd;
- panic("capability_ioctl");
-}
+ fd = uap->fd;
+ fcntlrights = uap->fcntlrights;
-static int
-capability_poll(struct file *fp, int events, struct ucred *active_cred,
- struct thread *td)
-{
+ AUDIT_ARG_FD(fd);
+ AUDIT_ARG_FCNTL_RIGHTS(fcntlrights);
- panic("capability_poll");
-}
+ if ((fcntlrights & ~CAP_FCNTL_ALL) != 0)
+ return (EINVAL);
-static int
-capability_kqfilter(struct file *fp, struct knote *kn)
-{
+ fdp = td->td_proc->p_fd;
+ FILEDESC_XLOCK(fdp);
- panic("capability_kqfilter");
-}
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_XUNLOCK(fdp);
+ return (EBADF);
+ }
-static int
-capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
- struct thread *td)
-{
+ if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) {
+ FILEDESC_XUNLOCK(fdp);
+ return (ENOTCAPABLE);
+ }
- panic("capability_stat");
+ fdp->fd_ofiles[fd].fde_fcntls = fcntlrights;
+ FILEDESC_XUNLOCK(fdp);
+
+ return (0);
}
int
-capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred,
- struct thread *td)
+sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
{
+ struct filedesc *fdp;
+ uint32_t rights;
+ int fd;
+
+ fd = uap->fd;
- panic("capability_chmod");
+ AUDIT_ARG_FD(fd);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ rights = fdp->fd_ofiles[fd].fde_fcntls;
+ FILEDESC_SUNLOCK(fdp);
+
+ return (copyout(&rights, uap->fcntlrightsp, sizeof(rights)));
}
+/*
+ * For backward compatibility.
+ */
int
-capability_chown(struct file *fp, uid_t uid, gid_t gid,
- struct ucred *active_cred, struct thread *td)
+sys_cap_new(struct thread *td, struct cap_new_args *uap)
{
+ struct filedesc *fdp;
+ cap_rights_t rights;
+ register_t newfd;
+ int error, fd;
+
+ fd = uap->fd;
+ rights = uap->rights;
+
+ AUDIT_ARG_FD(fd);
+ AUDIT_ARG_RIGHTS(rights);
+
+ if ((rights & ~CAP_ALL) != 0)
+ return (EINVAL);
+
+ fdp = td->td_proc->p_fd;
+ FILEDESC_SLOCK(fdp);
+ if (fget_locked(fdp, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE);
+ FILEDESC_SUNLOCK(fdp);
+ if (error != 0)
+ return (error);
+
+ error = do_dup(td, 0, fd, 0, &newfd);
+ if (error != 0)
+ return (error);
- panic("capability_chown");
+ FILEDESC_XLOCK(fdp);
+ /*
+ * We don't really care about the race between checking capability
+ * rights for the source descriptor and now. If capability rights
+ * were ok at that earlier point, the process had this descriptor
+ * with those rights, so we don't increase them in security sense,
+ * the process might have done the cap_new(2) a bit earlier to get
+ * the same effect.
+ */
+ fdp->fd_ofiles[newfd].fde_rights = rights;
+ if ((rights & CAP_IOCTL) == 0) {
+ free(fdp->fd_ofiles[newfd].fde_ioctls, M_TEMP);
+ fdp->fd_ofiles[newfd].fde_ioctls = NULL;
+ fdp->fd_ofiles[newfd].fde_nioctls = 0;
+ }
+ if ((rights & CAP_FCNTL) == 0)
+ fdp->fd_ofiles[newfd].fde_fcntls = 0;
+ FILEDESC_XUNLOCK(fdp);
+
+ td->td_retval[0] = newfd;
+
+ return (0);
}
#else /* !CAPABILITIES */
@@ -524,42 +567,54 @@ capability_chown(struct file *fp, uid_t uid, gid_t gid,
* Stub Capability functions for when options CAPABILITIES isn't compiled
* into the kernel.
*/
+
int
-sys_cap_new(struct thread *td, struct cap_new_args *uap)
+sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap)
{
return (ENOSYS);
}
int
-sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap)
+sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap)
{
return (ENOSYS);
}
int
-cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp)
+sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap)
{
- KASSERT(fp_cap->f_type != DTYPE_CAPABILITY,
- ("cap_funwrap: saw capability"));
+ return (ENOSYS);
+}
- *fpp = fp_cap;
- return (0);
+int
+sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap)
+{
+
+ return (ENOSYS);
}
int
-cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp,
- struct file **fpp)
+sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap)
{
- KASSERT(fp_cap->f_type != DTYPE_CAPABILITY,
- ("cap_funwrap_mmap: saw capability"));
+ return (ENOSYS);
+}
- *fpp = fp_cap;
- *maxprotp = VM_PROT_ALL;
- return (0);
+int
+sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap)
+{
+
+ return (ENOSYS);
+}
+
+int
+sys_cap_new(struct thread *td, struct cap_new_args *uap)
+{
+
+ return (ENOSYS);
}
#endif /* CAPABILITIES */
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index b97ff7f..39f33f3 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/filio.h>
#include <sys/fcntl.h>
#include <sys/file.h>
+#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/socketvar.h>
@@ -244,7 +245,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio)
struct file *fp;
int error;
- error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
+ error = fget_read(td, fd, CAP_READ, &fp);
if (error)
return (error);
error = dofileread(td, fd, fp, auio, (off_t)-1, 0);
@@ -287,7 +288,7 @@ kern_preadv(td, fd, auio, offset)
struct file *fp;
int error;
- error = fget_read(td, fd, CAP_READ, &fp);
+ error = fget_read(td, fd, CAP_PREAD, &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -453,7 +454,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio)
struct file *fp;
int error;
- error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
+ error = fget_write(td, fd, CAP_WRITE, &fp);
if (error)
return (error);
error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0);
@@ -496,7 +497,7 @@ kern_pwritev(td, fd, auio, offset)
struct file *fp;
int error;
- error = fget_write(td, fd, CAP_WRITE, &fp);
+ error = fget_write(td, fd, CAP_PWRITE, &fp);
if (error)
return (error);
if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
@@ -704,28 +705,60 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
{
struct file *fp;
struct filedesc *fdp;
- int error;
- int tmp;
+ int error, tmp, locked;
AUDIT_ARG_FD(fd);
AUDIT_ARG_CMD(com);
- if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0)
- return (error);
- if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
- fdrop(fp, td);
- return (EBADF);
- }
+
fdp = td->td_proc->p_fd;
+
switch (com) {
case FIONCLEX:
+ case FIOCLEX:
FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ locked = LA_XLOCKED;
+ break;
+ default:
+#ifdef CAPABILITIES
+ FILEDESC_SLOCK(fdp);
+ locked = LA_SLOCKED;
+#else
+ locked = LA_UNLOCKED;
+#endif
+ break;
+ }
+
+#ifdef CAPABILITIES
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
+ error = EBADF;
+ goto out;
+ }
+ if ((error = cap_ioctl_check(fdp, fd, com)) != 0) {
+ fp = NULL; /* fhold() was not called yet */
+ goto out;
+ }
+ fhold(fp);
+ if (locked == LA_SLOCKED) {
+ FILEDESC_SUNLOCK(fdp);
+ locked = LA_UNLOCKED;
+ }
+#else
+ if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) {
+ fp = NULL;
+ goto out;
+ }
+#endif
+ if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
+ error = EBADF;
+ goto out;
+ }
+
+ switch (com) {
+ case FIONCLEX:
+ fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE;
goto out;
case FIOCLEX:
- FILEDESC_XLOCK(fdp);
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
- FILEDESC_XUNLOCK(fdp);
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
goto out;
case FIONBIO:
if ((tmp = *(int *)data))
@@ -745,7 +778,21 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data)
error = fo_ioctl(fp, com, data, td->td_ucred, td);
out:
- fdrop(fp, td);
+ switch (locked) {
+ case LA_XLOCKED:
+ FILEDESC_XUNLOCK(fdp);
+ break;
+#ifdef CAPABILITIES
+ case LA_SLOCKED:
+ FILEDESC_SUNLOCK(fdp);
+ break;
+#endif
+ default:
+ FILEDESC_UNLOCK_ASSERT(fdp);
+ break;
+ }
+ if (fp != NULL)
+ fdrop(fp, td);
return (error);
}
@@ -1130,32 +1177,8 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events)
static __inline int
getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp)
{
- struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
- int error;
-#endif
- if ((fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor references by the capability.
- */
- error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
- return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
- *fpp = fp;
- return (0);
+ return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, 0, fpp, NULL));
}
/*
@@ -1349,13 +1372,14 @@ pollrescan(struct thread *td)
/* If the selinfo wasn't cleared the event didn't fire. */
if (si != NULL)
continue;
- fp = fdp->fd_ofiles[fd->fd];
+ fp = fdp->fd_ofiles[fd->fd].fde_file;
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fd->fd), CAP_POLL_EVENT) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fd->revents = POLLNVAL;
n++;
continue;
@@ -1408,9 +1432,8 @@ pollscan(td, fds, nfd)
u_int nfd;
{
struct filedesc *fdp = td->td_proc->p_fd;
- int i;
struct file *fp;
- int n = 0;
+ int i, n = 0;
FILEDESC_SLOCK(fdp);
for (i = 0; i < nfd; i++, fds++) {
@@ -1420,13 +1443,15 @@ pollscan(td, fds, nfd)
} else if (fds->fd < 0) {
fds->revents = 0;
} else {
- fp = fdp->fd_ofiles[fds->fd];
+ fp = fdp->fd_ofiles[fds->fd].fde_file;
#ifdef CAPABILITIES
- if ((fp == NULL)
- || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) {
+ if (fp == NULL ||
+ cap_check(cap_rights(fdp, fds->fd),
+ CAP_POLL_EVENT) != 0)
#else
- if (fp == NULL) {
+ if (fp == NULL)
#endif
+ {
fds->revents = POLLNVAL;
n++;
} else {
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 148dea3..1a89010 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -917,7 +917,7 @@
struct shmid_ds *buf); }
513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); }
514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); }
-515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \
+515 AUE_CAP_RIGHTS_GET STD { int cap_rights_get(int fd, \
uint64_t *rightsp); }
516 AUE_CAP_ENTER STD { int cap_enter(void); }
517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); }
@@ -955,5 +955,15 @@
int *status, int options, \
struct __wrusage *wrusage, \
siginfo_t *info); }
+533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \
+ uint64_t rights); }
+534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \
+ const u_long *cmds, size_t ncmds); }
+535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \
+ u_long *cmds, size_t maxcmds); }
+536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \
+ uint32_t fcntlrights); }
+537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \
+ uint32_t *fcntlrightsp); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/tty.c b/sys/kern/tty.c
index 5c7b753..02eccd7 100644
--- a/sys/kern/tty.c
+++ b/sys/kern/tty.c
@@ -1840,23 +1840,15 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd,
int error, ref;
/* Validate the file descriptor. */
- if ((fdp = p->p_fd) == NULL)
- return (EBADF);
-
- fp = fget_unlocked(fdp, fd);
- if (fp == NULL)
- return (EBADF);
+ fdp = p->p_fd;
+ error = fget_unlocked(fdp, fd, CAP_TTYHOOK, 0, &fp, NULL);
+ if (error != 0)
+ return (error);
if (fp->f_ops == &badfileops) {
error = EBADF;
goto done1;
}
-#ifdef CAPABILITIES
- error = cap_funwrap(fp, CAP_TTYHOOK, &fp);
- if (error)
- goto done1;
-#endif
-
/*
* Make sure the vnode is bound to a character device.
* Unlocked check for the vnode type is ok there, because we
diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c
index 9da464c..2d18e77 100644
--- a/sys/kern/uipc_mqueue.c
+++ b/sys/kern/uipc_mqueue.c
@@ -45,6 +45,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_capsicum.h"
#include "opt_compat.h"
#include <sys/param.h>
@@ -2032,8 +2033,8 @@ kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode,
&mqueueops);
FILEDESC_XLOCK(fdp);
- if (fdp->fd_ofiles[fd] == fp)
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ if (fdp->fd_ofiles[fd].fde_file == fp)
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
td->td_retval[0] = fd;
fdrop(fp, td);
@@ -2275,11 +2276,13 @@ again:
error = EBADF;
goto out;
}
- error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2);
+#ifdef CAPABILITIES
+ error = cap_check(cap_rights(fdp, uap->mqd), CAP_POLL_EVENT);
if (error) {
FILEDESC_SUNLOCK(fdp);
goto out;
}
+#endif
if (fp2 != fp) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
diff --git a/sys/kern/uipc_sem.c b/sys/kern/uipc_sem.c
index c219844..2de3409 100644
--- a/sys/kern/uipc_sem.c
+++ b/sys/kern/uipc_sem.c
@@ -579,8 +579,8 @@ ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode,
finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops);
FILEDESC_XLOCK(fdp);
- if (fdp->fd_ofiles[fd] == fp)
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ if (fdp->fd_ofiles[fd].fde_file == fp)
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
fdrop(fp, td);
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 7f75bdc..0cbb8b3 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -629,8 +629,8 @@ sys_shm_open(struct thread *td, struct shm_open_args *uap)
finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);
FILEDESC_XLOCK(fdp);
- if (fdp->fd_ofiles[fd] == fp)
- fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ if (fdp->fd_ofiles[fd].fde_file == fp)
+ fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
td->td_retval[0] = fd;
fdrop(fp, td);
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 665eb6d..847db35 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -121,38 +121,20 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
"Number of sendfile(2) sf_bufs in use");
/*
- * Convert a user file descriptor to a kernel file entry and check that, if
- * it is a capability, the right rights are present. A reference on the file
- * entry is held upon returning.
+ * Convert a user file descriptor to a kernel file entry and check if required
+ * capability rights are present.
+ * A reference on the file entry is held upon returning.
*/
static int
getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights,
struct file **fpp, u_int *fflagp)
{
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
int error;
-#endif
- if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use
- * the file descriptor referenced by the capability.
- */
- error = cap_funwrap(fp, rights, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
+ error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL);
+ if (error != 0)
return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
if (fp->f_type != DTYPE_SOCKET) {
fdrop(fp, curthread);
return (ENOTSOCK);
@@ -765,7 +747,7 @@ kern_sendit(td, s, mp, flags, control, segflg)
#endif
AUDIT_ARG_FD(s);
- rights = CAP_WRITE;
+ rights = CAP_SEND;
if (mp->msg_name != NULL) {
AUDIT_ARG_SOCKADDR(td, mp->msg_name);
rights |= CAP_CONNECT;
@@ -974,7 +956,7 @@ kern_recvit(td, s, mp, fromseg, controlp)
*controlp = NULL;
AUDIT_ARG_FD(s);
- error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL);
if (error)
return (error);
so = fp->f_data;
@@ -1850,7 +1832,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* we send only the header/trailer and no payload data.
*/
AUDIT_ARG_FD(uap->fd);
- if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0)
+ /*
+ * sendfile(2) can start at any offset within a file so we require
+ * CAP_READ+CAP_SEEK = CAP_PREAD.
+ */
+ if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0)
goto out;
vn_lock(vp, LK_SHARED | LK_RETRY);
if (vp->v_type == VREG) {
@@ -1886,7 +1872,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap,
* The socket must be a stream socket and connected.
* Remember if it a blocking or non-blocking socket.
*/
- if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE,
+ if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND,
&sock_fp, NULL)) != 0)
goto out;
so = sock_fp->f_data;
@@ -2423,7 +2409,7 @@ sys_sctp_generic_sendmsg (td, uap)
u_sinfo = &sinfo;
}
- rights = CAP_WRITE;
+ rights = CAP_SEND;
if (uap->tolen) {
error = getsockaddr(&to, uap->to, uap->tolen);
if (error) {
@@ -2534,7 +2520,7 @@ sys_sctp_generic_sendmsg_iov(td, uap)
return (error);
u_sinfo = &sinfo;
}
- rights = CAP_WRITE;
+ rights = CAP_SEND;
if (uap->tolen) {
error = getsockaddr(&to, uap->to, uap->tolen);
if (error) {
@@ -2658,7 +2644,7 @@ sys_sctp_generic_recvmsg(td, uap)
#endif
AUDIT_ARG_FD(uap->sd);
- error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL);
if (error) {
return (error);
}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index a6c308f..dcfd009 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -279,7 +279,7 @@ static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
static void unp_scan(struct mbuf *, void (*)(struct file *));
static void unp_discard(struct file *);
-static void unp_freerights(struct file **, int);
+static void unp_freerights(struct filedescent *, int);
static void unp_init(void);
static int unp_internalize(struct mbuf **, struct thread *);
static void unp_internalize_fp(struct file *);
@@ -1642,14 +1642,14 @@ unp_drop(struct unpcb *unp, int errno)
}
static void
-unp_freerights(struct file **rp, int fdcount)
+unp_freerights(struct filedescent *fde, int fdcount)
{
- int i;
struct file *fp;
+ int i;
- for (i = 0; i < fdcount; i++) {
- fp = *rp;
- *rp++ = NULL;
+ for (i = 0; i < fdcount; i++, fde++) {
+ fp = fde->fde_file;
+ bzero(fde, sizeof(*fde));
unp_discard(fp);
}
}
@@ -1661,8 +1661,8 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
int i;
int *fdp;
- struct file **rp;
- struct file *fp;
+ struct filedesc *fdesc = td->td_proc->p_fd;
+ struct filedescent *fde, *fdep;
void *data;
socklen_t clen = control->m_len, datalen;
int error, newfds;
@@ -1683,20 +1683,20 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
if (cm->cmsg_level == SOL_SOCKET
&& cm->cmsg_type == SCM_RIGHTS) {
- newfds = datalen / sizeof(struct file *);
- rp = data;
+ newfds = datalen / sizeof(*fdep);
+ fdep = data;
/* If we're not outputting the descriptors free them. */
if (error || controlp == NULL) {
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
- FILEDESC_XLOCK(td->td_proc->p_fd);
+ FILEDESC_XLOCK(fdesc);
/* if the new FD's will not fit free them. */
if (!fdavail(td, newfds)) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = EMSGSIZE;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
@@ -1710,23 +1710,24 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
error = E2BIG;
- unp_freerights(rp, newfds);
+ unp_freerights(fdep, newfds);
goto next;
}
fdp = (int *)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < newfds; i++) {
+ for (i = 0; i < newfds; i++, fdep++, fdp++) {
if (fdalloc(td, 0, &f))
panic("unp_externalize fdalloc failed");
- fp = *rp++;
- td->td_proc->p_fd->fd_ofiles[f] = fp;
- unp_externalize_fp(fp);
- *fdp++ = f;
+ fde = &fdesc->fd_ofiles[f];
+ fde->fde_file = fdep->fde_file;
+ filecaps_copy(&fdep->fde_caps, &fde->fde_caps);
+ unp_externalize_fp(fde->fde_file);
+ *fdp = f;
}
- FILEDESC_XUNLOCK(td->td_proc->p_fd);
+ FILEDESC_XUNLOCK(fdesc);
} else {
/* We can just copy anything else across. */
if (error || controlp == NULL)
@@ -1797,11 +1798,11 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
{
struct mbuf *control = *controlp;
struct proc *p = td->td_proc;
- struct filedesc *fdescp = p->p_fd;
+ struct filedesc *fdesc = p->p_fd;
struct bintime *bt;
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
struct cmsgcred *cmcred;
- struct file **rp;
+ struct filedescent *fde, *fdep;
struct file *fp;
struct timeval *tv;
int i, fd, *fdp;
@@ -1854,18 +1855,17 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
* files. If not, reject the entire operation.
*/
fdp = data;
- FILEDESC_SLOCK(fdescp);
+ FILEDESC_SLOCK(fdesc);
for (i = 0; i < oldfds; i++) {
fd = *fdp++;
- if (fd < 0 || fd >= fdescp->fd_nfiles ||
- fdescp->fd_ofiles[fd] == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ if (fget_locked(fdesc, fd) == NULL) {
+ FILEDESC_SUNLOCK(fdesc);
error = EBADF;
goto out;
}
- fp = fdescp->fd_ofiles[fd];
+ fp = fdesc->fd_ofiles[fd].fde_file;
if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = EOPNOTSUPP;
goto out;
}
@@ -1874,25 +1874,26 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
/*
* Now replace the integer FDs with pointers to the
- * associated global file table entry..
+ * file structure and capability rights.
*/
- newlen = oldfds * sizeof(struct file *);
+ newlen = oldfds * sizeof(*fdep);
*controlp = sbcreatecontrol(NULL, newlen,
SCM_RIGHTS, SOL_SOCKET);
if (*controlp == NULL) {
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
error = E2BIG;
goto out;
}
fdp = data;
- rp = (struct file **)
+ fdep = (struct filedescent *)
CMSG_DATA(mtod(*controlp, struct cmsghdr *));
- for (i = 0; i < oldfds; i++) {
- fp = fdescp->fd_ofiles[*fdp++];
- *rp++ = fp;
- unp_internalize_fp(fp);
+ for (i = 0; i < oldfds; i++, fdep++, fdp++) {
+ fde = &fdesc->fd_ofiles[*fdp];
+ fdep->fde_file = fde->fde_file;
+ filecaps_copy(&fde->fde_caps, &fdep->fde_caps);
+ unp_internalize_fp(fdep->fde_file);
}
- FILEDESC_SUNLOCK(fdescp);
+ FILEDESC_SUNLOCK(fdesc);
break;
case SCM_TIMESTAMP:
@@ -2252,7 +2253,7 @@ static void
unp_scan(struct mbuf *m0, void (*op)(struct file *))
{
struct mbuf *m;
- struct file **rp;
+ struct filedescent *fdep;
struct cmsghdr *cm;
void *data;
int i;
@@ -2277,10 +2278,10 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
if (cm->cmsg_level == SOL_SOCKET &&
cm->cmsg_type == SCM_RIGHTS) {
- qfds = datalen / sizeof (struct file *);
- rp = data;
- for (i = 0; i < qfds; i++)
- (*op)(*rp++);
+ qfds = datalen / sizeof(*fdep);
+ fdep = data;
+ for (i = 0; i < qfds; i++, fdep++)
+ (*op)(fdep->fde_file);
}
if (CMSG_SPACE(datalen) < clen) {
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 99b0197..cba1638 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1593,16 +1593,16 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj,
fd = aiocbe->uaiocb.aio_fildes;
switch (opcode) {
case LIO_WRITE:
- error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp);
+ error = fget_write(td, fd, CAP_PWRITE, &fp);
break;
case LIO_READ:
- error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp);
+ error = fget_read(td, fd, CAP_PREAD, &fp);
break;
case LIO_SYNC:
error = fget(td, fd, CAP_FSYNC, &fp);
break;
case LIO_NOP:
- error = fget(td, fd, 0, &fp);
+ error = fget(td, fd, CAP_NONE, &fp);
break;
default:
error = EINVAL;
diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c
index fbde152..94d11f2 100644
--- a/sys/kern/vfs_lookup.c
+++ b/sys/kern/vfs_lookup.c
@@ -227,17 +227,18 @@ namei(struct nameidata *ndp)
AUDIT_ARG_ATFD2(ndp->ni_dirfd);
error = fgetvp_rights(td, ndp->ni_dirfd,
ndp->ni_rightsneeded | CAP_LOOKUP,
- &(ndp->ni_baserights), &dp);
+ &ndp->ni_filecaps, &dp);
#ifdef CAPABILITIES
/*
- * Lookups relative to a capability must also be
+ * If file descriptor doesn't have all rights,
+ * all lookups relative to it must also be
* strictly relative.
- *
- * Note that a capability with rights CAP_MASK_VALID
- * is treated exactly like a regular file descriptor.
*/
- if (ndp->ni_baserights != CAP_MASK_VALID)
+ if (ndp->ni_filecaps.fc_rights != CAP_ALL ||
+ ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL ||
+ ndp->ni_filecaps.fc_nioctls != -1) {
ndp->ni_strictrelative = 1;
+ }
#endif
}
if (error != 0 || dp != NULL) {
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index bd44a3a..787399a 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -970,6 +970,8 @@ flags_to_rights(int flags)
/* FALLTHROUGH */
case O_WRONLY:
rights |= CAP_WRITE;
+ if (!(flags & O_APPEND))
+ rights |= CAP_SEEK;
break;
}
}
@@ -1143,19 +1145,22 @@ success:
* If we haven't already installed the FD (for dupfdopen), do so now.
*/
if (indx == -1) {
+ struct filecaps *fcaps;
+
#ifdef CAPABILITIES
- if (nd.ni_strictrelative == 1) {
- /*
- * We are doing a strict relative lookup; wrap the
- * result in a capability.
- */
- if ((error = kern_capwrap(td, fp, nd.ni_baserights,
- &indx)) != 0)
- goto bad;
- } else
+ if (nd.ni_strictrelative == 1)
+ fcaps = &nd.ni_filecaps;
+ else
#endif
- if ((error = finstall(td, fp, &indx, flags)) != 0)
- goto bad;
+ fcaps = NULL;
+ error = finstall(td, fp, &indx, flags, fcaps);
+ /* On success finstall() consumes fcaps. */
+ if (error != 0) {
+ filecaps_free(&nd.ni_filecaps);
+ goto bad;
+ }
+ } else {
+ filecaps_free(&nd.ni_filecaps);
}
/*
@@ -1279,7 +1284,7 @@ kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
- pathseg, path, fd, CAP_MKNOD, td);
+ pathseg, path, fd, CAP_MKNODAT, td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
@@ -1399,7 +1404,7 @@ kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
- pathseg, path, fd, CAP_MKFIFO, td);
+ pathseg, path, fd, CAP_MKFIFOAT, td);
if ((error = namei(&nd)) != 0)
return (error);
if (nd.ni_vp != NULL) {
@@ -1553,7 +1558,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2,
return (error);
}
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2,
- segflg, path2, fd2, CAP_CREATE, td);
+ segflg, path2, fd2, CAP_LINKAT, td);
if ((error = namei(&nd)) == 0) {
if (nd.ni_vp != NULL) {
if (nd.ni_dvp == nd.ni_vp)
@@ -1646,7 +1651,7 @@ kern_symlinkat(struct thread *td, char *path1, int fd, char *path2,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
- segflg, path2, fd, CAP_CREATE, td);
+ segflg, path2, fd, CAP_SYMLINKAT, td);
if ((error = namei(&nd)) != 0)
goto out;
if (nd.ni_vp) {
@@ -1798,7 +1803,7 @@ kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
- pathseg, path, fd, CAP_DELETE, td);
+ pathseg, path, fd, CAP_UNLINKAT, td);
if ((error = namei(&nd)) != 0)
return (error == EINVAL ? EPERM : error);
vp = nd.ni_vp;
@@ -3502,10 +3507,10 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
bwillwrite();
#ifdef MAC
NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
- AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
+ AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td);
#else
NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
- pathseg, old, oldfd, CAP_DELETE, td);
+ pathseg, old, oldfd, CAP_RENAMEAT, td);
#endif
if ((error = namei(&fromnd)) != 0)
@@ -3527,7 +3532,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
goto out1;
}
NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
- SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, td);
+ SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td);
if (fromnd.ni_vp->v_type == VDIR)
tond.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&tond)) != 0) {
@@ -3550,6 +3555,15 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new,
error = EISDIR;
goto out;
}
+#ifdef CAPABILITIES
+ /*
+ * If the target already exists we require CAP_UNLINKAT
+ * from 'newfd'.
+ */
+ error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT);
+ if (error != 0)
+ goto out;
+#endif
}
if (fvp == tdvp) {
error = EINVAL;
@@ -3650,7 +3664,7 @@ kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1,
- segflg, path, fd, CAP_MKDIR, td);
+ segflg, path, fd, CAP_MKDIRAT, td);
nd.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&nd)) != 0)
return (error);
@@ -3734,7 +3748,7 @@ kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg)
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1,
- pathseg, path, fd, CAP_RMDIR, td);
+ pathseg, path, fd, CAP_UNLINKAT, td);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
@@ -3987,8 +4001,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count,
if (count > IOSIZE_MAX)
return (EINVAL);
auio.uio_resid = count;
- if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK,
- &fp)) != 0)
+ if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0)
return (error);
if ((fp->f_flag & FREAD) == 0) {
fdrop(fp, td);
@@ -4151,33 +4164,14 @@ out:
* entry is held upon returning.
*/
int
-getvnode(struct filedesc *fdp, int fd, cap_rights_t rights,
- struct file **fpp)
+getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp)
{
struct file *fp;
-#ifdef CAPABILITIES
- struct file *fp_fromcap;
int error;
-#endif
- if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
- return (EBADF);
-#ifdef CAPABILITIES
- /*
- * If the file descriptor is for a capability, test rights and use the
- * file descriptor referenced by the capability.
- */
- error = cap_funwrap(fp, rights, &fp_fromcap);
- if (error) {
- fdrop(fp, curthread);
+ error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL);
+ if (error != 0)
return (error);
- }
- if (fp != fp_fromcap) {
- fhold(fp_fromcap);
- fdrop(fp, curthread);
- fp = fp_fromcap;
- }
-#endif /* CAPABILITIES */
/*
* The file could be not of the vnode type, or it may be not
@@ -4361,7 +4355,7 @@ sys_fhopen(td, uap)
goto bad;
}
- error = finstall(td, fp, &indx, fmode);
+ error = finstall(td, fp, &indx, fmode, NULL);
bad:
fdrop(fp, td);
td->td_retval[0] = indx;
@@ -4614,7 +4608,7 @@ kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
return (EINVAL);
}
/* XXX: CAP_POSIX_FADVISE? */
- error = fget(td, fd, 0, &fp);
+ error = fget(td, fd, CAP_NONE, &fp);
if (error != 0)
goto out;
diff --git a/sys/netsmb/smb_dev.c b/sys/netsmb/smb_dev.c
index dfedd88..a09d74d 100644
--- a/sys/netsmb/smb_dev.c
+++ b/sys/netsmb/smb_dev.c
@@ -399,9 +399,7 @@ nsmb_getfp(struct filedesc* fdp, int fd, int flag)
struct file* fp;
FILEDESC_SLOCK(fdp);
- if (fd < 0 || fd >= fdp->fd_nfiles ||
- (fp = fdp->fd_ofiles[fd]) == NULL ||
- (fp->f_flag & flag) == 0) {
+ if ((fp = fget_locked(fdp, fd)) == NULL || (fp->f_flag & flag) == 0) {
FILEDESC_SUNLOCK(fdp);
return (NULL);
}
diff --git a/sys/nfsserver/nfs_srvkrpc.c b/sys/nfsserver/nfs_srvkrpc.c
index 64f2aaa..6b3a6b7 100644
--- a/sys/nfsserver/nfs_srvkrpc.c
+++ b/sys/nfsserver/nfs_srvkrpc.c
@@ -174,7 +174,8 @@ nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap)
sizeof(addsockarg));
if (error)
return (error);
- if ((error = fget(td, addsockarg.sock, CAP_SOCK_ALL, &fp)) != 0)
+ error = fget(td, addsockarg.sock, CAP_SOCK_SERVER, &fp);
+ if (error)
return (error);
if (fp->f_type != DTYPE_SOCKET) {
fdrop(fp, td);
diff --git a/sys/ofed/include/linux/file.h b/sys/ofed/include/linux/file.h
index cbeec39..b9bd8b1 100644
--- a/sys/ofed/include/linux/file.h
+++ b/sys/ofed/include/linux/file.h
@@ -47,7 +47,8 @@ linux_fget(unsigned int fd)
{
struct file *file;
- file = fget_unlocked(curthread->td_proc->p_fd, fd);
+ if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0)
+ return (NULL);
return (struct linux_file *)file->f_data;
}
@@ -69,8 +70,7 @@ put_unused_fd(unsigned int fd)
{
struct file *file;
- file = fget_unlocked(curthread->td_proc->p_fd, fd);
- if (file == NULL)
+ if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0)
return;
fdclose(curthread->td_proc->p_fd, file, fd, curthread);
}
@@ -80,7 +80,8 @@ fd_install(unsigned int fd, struct linux_file *filp)
{
struct file *file;
- file = fget_unlocked(curthread->td_proc->p_fd, fd);
+ if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0)
+ file = NULL;
filp->_file = file;
finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
}
diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h
index f43f6c8..733a3c7 100644
--- a/sys/security/audit/audit.h
+++ b/sys/security/audit/audit.h
@@ -115,6 +115,7 @@ void audit_arg_file(struct proc *p, struct file *fp);
void audit_arg_argv(char *argv, int argc, int length);
void audit_arg_envv(char *envv, int envc, int length);
void audit_arg_rights(cap_rights_t rights);
+void audit_arg_fcntl_rights(uint32_t fcntlrights);
void audit_sysclose(struct thread *td, int fd);
void audit_cred_copy(struct ucred *src, struct ucred *dest);
void audit_cred_destroy(struct ucred *cred);
@@ -241,6 +242,11 @@ void audit_thread_free(struct thread *td);
audit_arg_rights((rights)); \
} while (0)
+#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \
+ if (AUDITING_TD(curthread)) \
+ audit_arg_fcntl_rights((fcntlrights)); \
+} while (0)
+
#define AUDIT_ARG_RUID(ruid) do { \
if (AUDITING_TD(curthread)) \
audit_arg_ruid((ruid)); \
@@ -354,6 +360,7 @@ void audit_thread_free(struct thread *td);
#define AUDIT_ARG_PROCESS(p)
#define AUDIT_ARG_RGID(rgid)
#define AUDIT_ARG_RIGHTS(rights)
+#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights)
#define AUDIT_ARG_RUID(ruid)
#define AUDIT_ARG_SIGNUM(signum)
#define AUDIT_ARG_SGID(sgid)
diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c
index 41d6b42..ec04b8b 100644
--- a/sys/security/audit/audit_arg.c
+++ b/sys/security/audit/audit_arg.c
@@ -871,6 +871,19 @@ audit_arg_rights(cap_rights_t rights)
ARG_SET_VALID(ar, ARG_RIGHTS);
}
+void
+audit_arg_fcntl_rights(uint32_t fcntlrights)
+{
+ struct kaudit_record *ar;
+
+ ar = currecord();
+ if (ar == NULL)
+ return;
+
+ ar->k_ar.ar_arg_fcntl_rights = fcntlrights;
+ ARG_SET_VALID(ar, ARG_FCNTL_RIGHTS);
+}
+
/*
* The close() system call uses it's own audit call to capture the path/vnode
* information because those pieces are not easily obtained within the system
diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c
index 8881cea..9c69b1e 100644
--- a/sys/security/audit/audit_bsm.c
+++ b/sys/security/audit/audit_bsm.c
@@ -1597,6 +1597,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
break;
case AUE_CAP_NEW:
+ case AUE_CAP_RIGHTS_LIMIT:
/*
* XXXRW/XXXJA: Would be nice to audit socket/etc information.
*/
@@ -1607,13 +1608,25 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau)
}
break;
- case AUE_CAP_GETRIGHTS:
+ case AUE_CAP_FCNTLS_GET:
+ case AUE_CAP_IOCTLS_GET:
+ case AUE_CAP_IOCTLS_LIMIT:
+ case AUE_CAP_RIGHTS_GET:
if (ARG_IS_VALID(kar, ARG_FD)) {
tok = au_to_arg32(1, "fd", ar->ar_arg_fd);
kau_write(rec, tok);
}
break;
+ case AUE_CAP_FCNTLS_LIMIT:
+ FD_VNODE1_TOKENS;
+ if (ARG_IS_VALID(kar, ARG_FCNTL_RIGHTS)) {
+ tok = au_to_arg32(2, "fcntlrights",
+ ar->ar_arg_fcntl_rights);
+ kau_write(rec, tok);
+ }
+ break;
+
case AUE_CAP_ENTER:
case AUE_CAP_GETMODE:
break;
diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h
index 10ccd5b..e23ba08 100644
--- a/sys/security/audit/audit_private.h
+++ b/sys/security/audit/audit_private.h
@@ -230,6 +230,7 @@ struct audit_record {
int ar_arg_exitretval;
struct sockaddr_storage ar_arg_sockaddr;
cap_rights_t ar_arg_rights;
+ uint32_t ar_arg_fcntl_rights;
char ar_jailname[MAXHOSTNAMELEN];
};
@@ -291,6 +292,7 @@ struct audit_record {
#define ARG_ATFD1 0x0004000000000000ULL
#define ARG_ATFD2 0x0008000000000000ULL
#define ARG_RIGHTS 0x0010000000000000ULL
+#define ARG_FCNTL_RIGHTS 0x0020000000000000ULL
#define ARG_NONE 0x0000000000000000ULL
#define ARG_ALL 0xFFFFFFFFFFFFFFFFULL
diff --git a/sys/sys/capability.h b/sys/sys/capability.h
index a163c4c..27e56c2 100644
--- a/sys/sys/capability.h
+++ b/sys/sys/capability.h
@@ -1,10 +1,14 @@
/*-
* Copyright (c) 2008-2010 Robert N. M. Watson
+ * Copyright (c) 2012 FreeBSD Foundation
* All rights reserved.
*
* This software was developed at the University of Cambridge Computer
* Laboratory with support from a grant from Google, Inc.
*
+ * Portions of this software were developed by Pawel Jakub Dawidek under
+ * sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -36,9 +40,10 @@
#define _SYS_CAPABILITY_H_
#include <sys/cdefs.h>
-#include <sys/types.h>
+#include <sys/param.h>
#include <sys/file.h>
+#include <sys/fcntl.h>
/*
* Possible rights on capabilities.
@@ -54,34 +59,69 @@
* involve reads or writes depending a great deal on context.
*/
-/* General file I/O. */
-#define CAP_READ 0x0000000000000001ULL /* read/recv */
-#define CAP_WRITE 0x0000000000000002ULL /* write/send */
-#define CAP_MMAP 0x0000000000000004ULL /* mmap */
-#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */
+#define CAP_NONE 0x0000000000000000ULL
+
+/*
+ * General file I/O.
+ */
+/* Allows for openat(O_RDONLY), read(2), readv(2). */
+#define CAP_READ 0x0000000000000001ULL
+/* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */
+#define CAP_WRITE 0x0000000000000002ULL
+/* Allows for lseek(2). */
+#define CAP_SEEK 0x0000000000000080ULL
+/* Allows for pread(2), preadv(2). */
+#define CAP_PREAD (CAP_SEEK | CAP_READ)
+/* Allows for openat(O_WRONLY) (without O_APPEND), pwrite(2), pwritev(2). */
+#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
+/* Allows for mmap(PROT_NONE). */
+#define CAP_MMAP 0x0000000000000004ULL
+/* Allows for mmap(PROT_READ). */
+#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
+/* Allows for mmap(PROT_WRITE). */
+#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
+/* Allows for mmap(PROT_EXEC). */
+#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
+/* Allows for mmap(PROT_READ | PROT_WRITE). */
+#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
+/* Allows for mmap(PROT_READ | PROT_EXEC). */
+#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
+/* Allows for mmap(PROT_WRITE | PROT_EXEC). */
+#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
+/* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */
+#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
+/* Allows for openat(O_CREAT). */
+#define CAP_CREATE 0x0000000000080000ULL
+/* Allows for openat(O_EXEC) and fexecve(2) in turn. */
#define CAP_FEXECVE 0x0000000000000010ULL
+/* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2). */
#define CAP_FSYNC 0x0000000000000020ULL
+/* Allows for openat(O_TRUNC), ftruncate(2). */
#define CAP_FTRUNCATE 0x0000000000000040ULL
-#define CAP_SEEK 0x0000000000000080ULL
/* VFS methods. */
-#define CAP_FCHFLAGS 0x0000000000000100ULL
#define CAP_FCHDIR 0x0000000000000200ULL
+#define CAP_FCHFLAGS 0x0000000000000100ULL
#define CAP_FCHMOD 0x0000000000000400ULL
+#define CAP_FCHMODAT CAP_FCHMOD
#define CAP_FCHOWN 0x0000000000000800ULL
+#define CAP_FCHOWNAT CAP_FCHOWN
#define CAP_FCNTL 0x0000000000001000ULL
-#define CAP_FPATHCONF 0x0000000000002000ULL
#define CAP_FLOCK 0x0000000000004000ULL
+#define CAP_FPATHCONF 0x0000000000002000ULL
#define CAP_FSCK 0x0000000000008000ULL
#define CAP_FSTAT 0x0000000000010000ULL
+#define CAP_FSTATAT CAP_FSTAT
#define CAP_FSTATFS 0x0000000000020000ULL
#define CAP_FUTIMES 0x0000000000040000ULL
-#define CAP_CREATE 0x0000000000080000ULL
-#define CAP_DELETE 0x0000000000100000ULL
-#define CAP_MKDIR 0x0000000000200000ULL
-#define CAP_RMDIR 0x0000000000400000ULL
-#define CAP_MKFIFO 0x0000000000800000ULL
-#define CAP_MKNOD 0x0080000000000000ULL
+#define CAP_FUTIMESAT CAP_FUTIMES
+#define CAP_LINKAT 0x0000000000400000ULL
+#define CAP_MKDIRAT 0x0000000000200000ULL
+#define CAP_MKFIFOAT 0x0000000000800000ULL
+#define CAP_MKNODAT 0x0080000000000000ULL
+#define CAP_RENAMEAT 0x0200000000000000ULL
+#define CAP_SYMLINKAT 0x0100000000000000ULL
+#define CAP_UNLINKAT 0x0000000000100000ULL
/* Lookups - used to constrain *at() calls. */
#define CAP_LOOKUP 0x0000000001000000ULL
@@ -107,13 +147,18 @@
#define CAP_GETSOCKOPT 0x0000004000000000ULL
#define CAP_LISTEN 0x0000008000000000ULL
#define CAP_PEELOFF 0x0000010000000000ULL
+#define CAP_RECV CAP_READ
+#define CAP_SEND CAP_WRITE
#define CAP_SETSOCKOPT 0x0000020000000000ULL
#define CAP_SHUTDOWN 0x0000040000000000ULL
-#define CAP_SOCK_ALL \
- (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \
- | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \
- | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#define CAP_SOCK_CLIENT \
+ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
+ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
+#define CAP_SOCK_SERVER \
+ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
+ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
+ CAP_SETSOCKOPT | CAP_SHUTDOWN)
/* Mandatory Access Control. */
#define CAP_MAC_GET 0x0000080000000000ULL
@@ -138,40 +183,77 @@
#define CAP_PDKILL 0x0040000000000000ULL
/* The mask of all valid method rights. */
-#define CAP_MASK_VALID 0x00ffffffffffffffULL
+#define CAP_MASK_VALID 0x03ffffffffffffffULL
+#define CAP_ALL CAP_MASK_VALID
-#ifdef _KERNEL
+/* Available bits. */
+#define CAP_UNUSED5 0x0400000000000000ULL
+#define CAP_UNUSED4 0x0800000000000000ULL
+#define CAP_UNUSED3 0x1000000000000000ULL
+#define CAP_UNUSED2 0x2000000000000000ULL
+#define CAP_UNUSED1 0x4000000000000000ULL
+#define CAP_UNUSED0 0x8000000000000000ULL
-#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)
+/*
+ * The following defines are provided for backward API compatibility and
+ * should not be used in new code.
+ */
+#define CAP_MAPEXEC CAP_MMAP_X
+#define CAP_DELETE CAP_UNLINKAT
+#define CAP_MKDIR CAP_MKDIRAT
+#define CAP_RMDIR CAP_UNLINKAT
+#define CAP_MKFIFO CAP_MKFIFOAT
+#define CAP_MKNOD CAP_MKNODAT
+#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
/*
- * Create a capability to wrap a file object.
+ * Allowed fcntl(2) commands.
*/
-int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
- int *capfd);
+#define CAP_FCNTL_GETFL (1 << F_GETFL)
+#define CAP_FCNTL_SETFL (1 << F_SETFL)
+#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112
+#define CAP_FCNTL_GETOWN (1 << F_GETOWN)
+#define CAP_FCNTL_SETOWN (1 << F_SETOWN)
+#endif
+#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112
+#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \
+ CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN)
+#else
+#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL)
+#endif
+
+#define CAP_IOCTLS_ALL SSIZE_MAX
+
+#ifdef _KERNEL
+
+#include <sys/systm.h>
+
+#define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0)
+
+struct filedesc;
/*
- * Unwrap a capability if its rights mask is a superset of 'rights'.
- *
- * Unwrapping a non-capability is effectively a no-op; the value of fp_cap
- * is simply copied into fpp.
+ * Test whether a capability grants the requested rights.
+ */
+int cap_check(cap_rights_t have, cap_rights_t need);
+/*
+ * Convert capability rights into VM access flags.
*/
-int cap_funwrap(struct file *fp_cap, cap_rights_t rights,
- struct file **fpp);
-int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights,
- u_char *maxprotp, struct file **fpp);
+u_char cap_rights_to_vmprot(cap_rights_t have);
/*
* For the purposes of procstat(1) and similar tools, allow kern_descrip.c to
- * extract the rights from a capability. However, this should not be used by
- * kernel code generally, instead cap_funwrap() should be used in order to
- * keep all access control in one place.
+ * extract the rights from a capability.
*/
-cap_rights_t cap_rights(struct file *fp_cap);
+cap_rights_t cap_rights(struct filedesc *fdp, int fd);
+
+int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd);
+int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd);
#else /* !_KERNEL */
__BEGIN_DECLS
+#include <stdbool.h>
/*
* cap_enter(): Cause the process to enter capability mode, which will
@@ -187,21 +269,46 @@ __BEGIN_DECLS
int cap_enter(void);
/*
- * cap_getmode(): Are we in capability mode?
+ * Are we sandboxed (in capability mode)?
+ * This is a libc wrapper around the cap_getmode(2) system call.
*/
-int cap_getmode(u_int* modep);
+bool cap_sandboxed(void);
/*
- * cap_new(): Create a new capability derived from an existing file
- * descriptor with the specified rights. If the existing file descriptor is
- * a capability, then the new rights must be a subset of the existing rights.
+ * cap_getmode(): Are we in capability mode?
*/
-int cap_new(int fd, cap_rights_t rights);
+int cap_getmode(u_int *modep);
/*
- * cap_getrights(): Query the rights on a capability.
+ * Limits capability rights for the given descriptor (CAP_*).
+ */
+int cap_rights_limit(int fd, cap_rights_t rights);
+/*
+ * Returns bitmask of capability rights for the given descriptor.
*/
-int cap_getrights(int fd, cap_rights_t *rightsp);
+int cap_rights_get(int fd, cap_rights_t *rightsp);
+/*
+ * Limits allowed ioctls for the given descriptor.
+ */
+int cap_ioctls_limit(int fd, const unsigned long *cmds, size_t ncmds);
+/*
+ * Returns array of allowed ioctls for the given descriptor.
+ * If all ioctls are allowed, the cmds array is not populated and
+ * the function returns CAP_IOCTLS_ALL.
+ */
+ssize_t cap_ioctls_get(int fd, unsigned long *cmds, size_t maxcmds);
+/*
+ * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*).
+ */
+int cap_fcntls_limit(int fd, uint32_t fcntlrights);
+/*
+ * Returns bitmask of allowed fcntls for the given descriptor.
+ */
+int cap_fcntls_get(int fd, uint32_t *fcntlrightsp);
+
+/* For backward compatibility. */
+int cap_new(int fd, cap_rights_t rights);
+#define cap_getrights(fd, rightsp) cap_rights_get((fd), (rightsp))
__END_DECLS
diff --git a/sys/sys/file.h b/sys/sys/file.h
index cf5f1ea..cfdc1d8 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -64,12 +64,12 @@ struct socket;
#define DTYPE_SEM 9 /* posix semaphore */
#define DTYPE_PTS 10 /* pseudo teletype master device */
#define DTYPE_DEV 11 /* Device specific fd type */
-#define DTYPE_CAPABILITY 12 /* capability */
-#define DTYPE_PROCDESC 13 /* process descriptor */
+#define DTYPE_PROCDESC 12 /* process descriptor */
#ifdef _KERNEL
struct file;
+struct filecaps;
struct ucred;
#define FOF_OFFSET 0x01 /* Use the offset in uio argument */
@@ -217,7 +217,6 @@ int fget_read(struct thread *td, int fd, cap_rights_t rights,
struct file **fpp);
int fget_write(struct thread *td, int fd, cap_rights_t rights,
struct file **fpp);
-int fgetcap(struct thread *td, int fd, struct file **fpp);
int _fdrop(struct file *fp, struct thread *td);
/*
@@ -242,7 +241,7 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp);
int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights,
struct vnode **vpp);
int fgetvp_rights(struct thread *td, int fd, cap_rights_t need,
- cap_rights_t *have, struct vnode **vpp);
+ struct filecaps *havecaps, struct vnode **vpp);
int fgetvp_read(struct thread *td, int fd, cap_rights_t rights,
struct vnode **vpp);
int fgetvp_write(struct thread *td, int fd, cap_rights_t rights,
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index 07cfb1f..f3e3a09 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -41,6 +41,23 @@
#include <machine/_limits.h>
+struct filecaps {
+ cap_rights_t fc_rights; /* per-descriptor capability rights */
+ uint32_t fc_fcntls; /* per-descriptor allowed fcntls */
+ u_long *fc_ioctls; /* per-descriptor allowed ioctls */
+ int16_t fc_nioctls; /* fc_ioctls array size */
+};
+
+struct filedescent {
+ struct file *fde_file; /* file structure for open file */
+ struct filecaps fde_caps; /* per-descriptor rights */
+ uint8_t fde_flags; /* per-process open file flags */
+};
+#define fde_rights fde_caps.fc_rights
+#define fde_fcntls fde_caps.fc_fcntls
+#define fde_ioctls fde_caps.fc_ioctls
+#define fde_nioctls fde_caps.fc_nioctls
+
/*
* This structure is used for the management of descriptors. It may be
* shared by multiple processes.
@@ -48,8 +65,7 @@
#define NDSLOTTYPE u_long
struct filedesc {
- struct file **fd_ofiles; /* file structures for open files */
- char *fd_ofileflags; /* per-process open file flags */
+ struct filedescent *fd_ofiles; /* open files */
struct vnode *fd_cdir; /* current directory */
struct vnode *fd_rdir; /* root directory */
struct vnode *fd_jdir; /* jail root directory */
@@ -92,6 +108,15 @@ struct filedesc_to_leader {
#ifdef _KERNEL
+#include <sys/systm.h> /* CTASSERT() */
+
+CTASSERT(sizeof(cap_rights_t) == sizeof(uint64_t));
+
+/* Flags for do_dup() */
+#define DUP_FIXED 0x1 /* Force fixed allocation. */
+#define DUP_FCNTL 0x2 /* fcntl()-style errors. */
+#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */
+
/* Lock a file descriptor table. */
#define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure")
#define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx)
@@ -109,13 +134,20 @@ struct filedesc_to_leader {
struct thread;
+void filecaps_init(struct filecaps *fcaps);
+void filecaps_copy(const struct filecaps *src, struct filecaps *dst);
+void filecaps_free(struct filecaps *fcaps);
+
int closef(struct file *fp, struct thread *td);
+int do_dup(struct thread *td, int flags, int old, int new,
+ register_t *retval);
int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode,
int openerror, int *indxp);
int falloc(struct thread *td, struct file **resultfp, int *resultfd,
int flags);
int falloc_noinstall(struct thread *td, struct file **resultfp);
-int finstall(struct thread *td, struct file *fp, int *resultfp, int flags);
+int finstall(struct thread *td, struct file *fp, int *resultfp, int flags,
+ struct filecaps *fcaps);
int fdalloc(struct thread *td, int minfd, int *result);
int fdavail(struct thread *td, int n);
int fdcheckstd(struct thread *td);
@@ -123,7 +155,7 @@ void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td);
void fdcloseexec(struct thread *td);
struct filedesc *fdcopy(struct filedesc *fdp);
void fdunshare(struct proc *p, struct thread *td);
-void fdfree(struct thread *td);
+void fdescfree(struct thread *td);
struct filedesc *fdinit(struct filedesc *fdp);
struct filedesc *fdshare(struct filedesc *fdp);
struct filedesc_to_leader *
@@ -135,7 +167,8 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
void setugidsafety(struct thread *td);
/* Return a referenced file from an unlocked descriptor. */
-struct file *fget_unlocked(struct filedesc *fdp, int fd);
+int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights,
+ int needfcntl, struct file **fpp, cap_rights_t *haverightsp);
/* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */
static __inline struct file *
@@ -147,7 +180,7 @@ fget_locked(struct filedesc *fdp, int fd)
if (fd < 0 || fd >= fdp->fd_nfiles)
return (NULL);
- return (fdp->fd_ofiles[fd]);
+ return (fdp->fd_ofiles[fd].fde_file);
}
#endif /* _KERNEL */
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 3b43916..a9992f4 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -33,6 +33,7 @@
#ifndef _SYS_NAMEI_H_
#define _SYS_NAMEI_H_
+#include <sys/filedesc.h>
#include <sys/queue.h>
#include <sys/uio.h>
@@ -75,7 +76,7 @@ struct nameidata {
/*
* Results: returned from namei
*/
- cap_rights_t ni_baserights; /* rights the *at base has (or -1) */
+ struct filecaps ni_filecaps; /* rights the *at base has */
/*
* Results: returned from/manipulated by lookup
*/
@@ -180,7 +181,7 @@ NDINIT_ALL(struct nameidata *ndp,
ndp->ni_startdir = startdir;
ndp->ni_strictrelative = 0;
ndp->ni_rightsneeded = rights;
- ndp->ni_baserights = 0;
+ filecaps_init(&ndp->ni_filecaps);
ndp->ni_cnd.cn_thread = td;
}
diff --git a/sys/sys/user.h b/sys/sys/user.h
index ddaccb8..5de76ac 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -251,8 +251,7 @@ struct user {
#define KF_TYPE_SHM 8
#define KF_TYPE_SEM 9
#define KF_TYPE_PTS 10
-/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */
-#define KF_TYPE_PROCDESC 12
+#define KF_TYPE_PROCDESC 11
#define KF_TYPE_UNKNOWN 255
#define KF_VTYPE_VNON 0
@@ -288,7 +287,6 @@ struct user {
#define KF_FLAG_TRUNC 0x00001000
#define KF_FLAG_EXCL 0x00002000
#define KF_FLAG_EXEC 0x00004000
-#define KF_FLAG_CAPABILITY 0x00008000
/*
* Old format. Has variable hidden padding due to alignment.
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 05bb8ae..cf94fe5 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -305,13 +305,13 @@ sys_mmap(td, uap)
*/
rights = CAP_MMAP;
if (prot & PROT_READ)
- rights |= CAP_READ;
+ rights |= CAP_MMAP_R;
if ((flags & MAP_SHARED) != 0) {
if (prot & PROT_WRITE)
- rights |= CAP_WRITE;
+ rights |= CAP_MMAP_W;
}
if (prot & PROT_EXEC)
- rights |= CAP_MAPEXEC;
+ rights |= CAP_MMAP_X;
if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot,
&fp)) != 0)
goto done;
diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c
index 2769832..82317f2 100644
--- a/usr.bin/kdump/kdump.c
+++ b/usr.bin/kdump/kdump.c
@@ -1008,6 +1008,7 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags)
narg--;
break;
case SYS_cap_new:
+ case SYS_cap_rights_limit:
print_number(ip, narg, c);
putchar(',');
arg = *ip;
@@ -1035,6 +1036,14 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags)
}
capname(arg);
break;
+ case SYS_cap_fcntls_limit:
+ print_number(ip, narg, c);
+ putchar(',');
+ arg = *ip;
+ ip++;
+ narg--;
+ capfcntlname(arg);
+ break;
case SYS_posix_fadvise:
print_number(ip, narg, c);
print_number(ip, narg, c);
diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr
index d56f030..aed8291 100644
--- a/usr.bin/kdump/mksubr
+++ b/usr.bin/kdump/mksubr
@@ -361,6 +361,7 @@ _EOF_
auto_or_type "accessmodename" "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+" "sys/unistd.h"
auto_switch_type "acltypename" "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+" "sys/acl.h"
auto_or_type "capname" "CAP_[A-Z]+[[:space:]]+0x[01248]{16}ULL" "sys/capability.h"
+auto_or_type "capfcntlname" "CAP_FCNTL_[A-Z]+[[:space:]]+\(1" "sys/capability.h"
auto_switch_type "extattrctlname" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h"
auto_switch_type "fadvisebehavname" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h"
auto_or_type "flagsname" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h"
diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c
index 3773900..030bba1 100644
--- a/usr.bin/procstat/procstat_files.c
+++ b/usr.bin/procstat/procstat_files.c
@@ -139,33 +139,34 @@ static struct cap_desc {
/* General file I/O. */
{ CAP_READ, "rd" },
{ CAP_WRITE, "wr" },
+ { CAP_SEEK, "se" },
{ CAP_MMAP, "mm" },
- { CAP_MAPEXEC, "me" },
+ { CAP_CREATE, "cr" },
{ CAP_FEXECVE, "fe" },
{ CAP_FSYNC, "fy" },
{ CAP_FTRUNCATE, "ft" },
- { CAP_SEEK, "se" },
/* VFS methods. */
- { CAP_FCHFLAGS, "cf" },
{ CAP_FCHDIR, "cd" },
+ { CAP_FCHFLAGS, "cf" },
{ CAP_FCHMOD, "cm" },
{ CAP_FCHOWN, "cn" },
{ CAP_FCNTL, "fc" },
- { CAP_FPATHCONF, "fp" },
{ CAP_FLOCK, "fl" },
+ { CAP_FPATHCONF, "fp" },
{ CAP_FSCK, "fk" },
{ CAP_FSTAT, "fs" },
{ CAP_FSTATFS, "sf" },
{ CAP_FUTIMES, "fu" },
- { CAP_CREATE, "cr" },
- { CAP_DELETE, "de" },
- { CAP_MKDIR, "md" },
- { CAP_RMDIR, "rm" },
- { CAP_MKFIFO, "mf" },
- { CAP_MKNOD, "mn" },
-
- /* Lookups - used to constraint *at() calls. */
+ { CAP_LINKAT, "li" },
+ { CAP_MKDIRAT, "md" },
+ { CAP_MKFIFOAT, "mf" },
+ { CAP_MKNODAT, "mn" },
+ { CAP_RENAMEAT, "rn" },
+ { CAP_SYMLINKAT, "sl" },
+ { CAP_UNLINKAT, "un" },
+
+ /* Lookups - used to constrain *at() calls. */
{ CAP_LOOKUP, "lo" },
/* Extended attributes. */
@@ -213,6 +214,24 @@ static struct cap_desc {
{ CAP_PDGETPID, "pg" },
{ CAP_PDWAIT, "pw" },
{ CAP_PDKILL, "pk" },
+
+ /* Aliases and defines that combine multiple rights. */
+ { CAP_PREAD, "prd" },
+ { CAP_PWRITE, "pwr" },
+
+ { CAP_MMAP_R, "mmr" },
+ { CAP_MMAP_W, "mmw" },
+ { CAP_MMAP_X, "mmx" },
+ { CAP_MMAP_RW, "mrw" },
+ { CAP_MMAP_RX, "mrx" },
+ { CAP_MMAP_WX, "mwx" },
+ { CAP_MMAP_RWX, "mma" },
+
+ { CAP_RECV, "re" },
+ { CAP_SEND, "sd" },
+
+ { CAP_SOCK_CLIENT, "scl" },
+ { CAP_SOCK_SERVER, "ssr" },
};
static const u_int cap_desc_count = sizeof(cap_desc) /
sizeof(cap_desc[0]);
@@ -225,7 +244,7 @@ width_capability(cap_rights_t rights)
count = 0;
width = 0;
for (i = 0; i < cap_desc_count; i++) {
- if (rights & cap_desc[i].cd_right) {
+ if ((cap_desc[i].cd_right & ~rights) == 0) {
width += strlen(cap_desc[i].cd_desc);
if (count)
width++;
@@ -249,7 +268,7 @@ print_capability(cap_rights_t rights, u_int capwidth)
printf("-");
}
for (i = 0; i < cap_desc_count; i++) {
- if (rights & cap_desc[i].cd_right) {
+ if ((cap_desc[i].cd_right & ~rights) == 0) {
printf("%s%s", count ? "," : "", cap_desc[i].cd_desc);
width += strlen(cap_desc[i].cd_desc);
if (count)
@@ -261,7 +280,7 @@ print_capability(cap_rights_t rights, u_int capwidth)
void
procstat_files(struct procstat *procstat, struct kinfo_proc *kipp)
-{
+{
struct sockstat sock;
struct filestat_list *head;
struct filestat *fst;
@@ -423,8 +442,6 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp)
printf("%s", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ? "n" : "-");
printf("%s", fst->fs_fflags & PS_FST_FFLAG_DIRECT ? "d" : "-");
printf("%s", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ? "l" : "-");
- printf("%s ", fst->fs_fflags & PS_FST_FFLAG_CAPABILITY ?
- "c" : "-");
if (!Cflag) {
if (fst->fs_ref_count > -1)
printf("%3d ", fst->fs_ref_count);
OpenPOWER on IntegriCloud