diff options
author | pjd <pjd@FreeBSD.org> | 2013-03-02 00:53:12 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2013-03-02 00:53:12 +0000 |
commit | f07ebb8888ea42f744890a727e8f6799a1086915 (patch) | |
tree | 90495ae13fcc0dd621d97fc1b788f43780023c0a | |
parent | dd15932a159ec60641cd20e4fb689fa28d75465d (diff) | |
download | FreeBSD-src-f07ebb8888ea42f744890a727e8f6799a1086915.zip FreeBSD-src-f07ebb8888ea42f744890a727e8f6799a1086915.tar.gz |
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
61 files changed, 1840 insertions, 1124 deletions
diff --git a/contrib/openbsm/etc/audit_event b/contrib/openbsm/etc/audit_event index 0350389..f82841a 100644 --- a/contrib/openbsm/etc/audit_event +++ b/contrib/openbsm/etc/audit_event @@ -548,7 +548,7 @@ 43184:AUE_OPENAT:openat(2) - attr only:fa 43185:AUE_POSIX_OPENPT:posix_openpt(2):ip 43186:AUE_CAP_NEW:cap_new(2):fm -43187:AUE_CAP_GETRIGHTS:cap_getrights(2):fm +43187:AUE_CAP_RIGHTS_GET:cap_rights_get(2):fm 43188:AUE_CAP_ENTER:cap_enter(2):pc 43189:AUE_CAP_GETMODE:cap_getmode(2):pc 43190:AUE_POSIX_SPAWN:posix_spawn(2):pc @@ -563,6 +563,11 @@ 43199:AUE_PDGETPID:pdgetpid(2):pc 43200:AUE_PDWAIT:pdwait(2):pc 43201:AUE_WAIT6:wait6(2):pc +43202:AUE_CAP_RIGHTS_LIMIT:cap_rights_limit(2):fm +43203:AUE_CAP_IOCTLS_LIMIT:cap_ioctls_limit(2):fm +43204:AUE_CAP_IOCTLS_GET:cap_ioctls_get(2):fm +43205:AUE_CAP_FCNTLS_LIMIT:cap_fcntls_limit(2):fm +43206:AUE_CAP_FCNTLS_GET:cap_fcntls_get(2):fm # # Solaris userspace events. # diff --git a/lib/libc/include/compat.h b/lib/libc/include/compat.h index 7694540..3739fe1 100644 --- a/lib/libc/include/compat.h +++ b/lib/libc/include/compat.h @@ -42,6 +42,8 @@ __sym_compat(__semctl, freebsd7___semctl, FBSD_1.0); __sym_compat(msgctl, freebsd7_msgctl, FBSD_1.0); __sym_compat(shmctl, freebsd7_shmctl, FBSD_1.0); +__sym_compat(cap_getrights, cap_rights_get, FBSD_1.2); + #undef __sym_compat #endif /* __LIBC_COMPAT_H__ */ diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 9f216dc..03c0090 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -93,7 +93,9 @@ MAN+= abort2.2 \ bind.2 \ brk.2 \ cap_enter.2 \ - cap_new.2 \ + cap_fcntls_limit.2 \ + cap_ioctls_limit.2 \ + cap_rights_limit.2 \ chdir.2 \ chflags.2 \ chmod.2 \ @@ -270,7 +272,9 @@ MLINKS+=access.2 eaccess.2 \ access.2 faccessat.2 MLINKS+=brk.2 sbrk.2 MLINKS+=cap_enter.2 cap_getmode.2 -MLINKS+=cap_new.2 cap_getrights.2 +MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2 +MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2 +MLINKS+=cap_rights_limit.2 cap_rights_get.2 MLINKS+=chdir.2 fchdir.2 MLINKS+=chflags.2 fchflags.2 \ chflags.2 lchflags.2 diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index d126255..7738e46 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -364,7 +364,6 @@ FBSD_1.2 { cap_enter; cap_getmode; cap_new; - cap_getrights; getloginclass; pdfork; pdgetpid; @@ -379,6 +378,12 @@ FBSD_1.2 { }; FBSD_1.3 { + cap_fcntls_get; + cap_fcntls_limit; + cap_ioctls_get; + cap_ioctls_limit; + cap_rights_get; + cap_rights_limit; cap_sandboxed; clock_getcpuclockid2; ffclock_getcounter; diff --git a/lib/libc/sys/cap_enter.2 b/lib/libc/sys/cap_enter.2 index c3cefe8..3369669 100644 --- a/lib/libc/sys/cap_enter.2 +++ b/lib/libc/sys/cap_enter.2 @@ -58,8 +58,10 @@ or .Xr pdfork 2 will be placed in capability mode from inception. .Pp -When combined with capabilities created with -.Xr cap_new 2 , +When combined with +.Xr cap_rights_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_fcntls_limit 2 , .Fn cap_enter may be used to create kernel-enforced sandboxes in which appropriately-crafted applications or application components may be run. @@ -71,11 +73,6 @@ sandbox. Creating effective process sandboxes is a tricky process that involves identifying the least possible rights required by the process and then passing those rights into the process in a safe manner. -See the CAVEAT -section of -.Xr cap_new 2 -for why this is particularly tricky with UNIX file descriptors as the -canonical representation of a right. Consumers of .Fn cap_enter should also be aware of other inherited rights, such as access to VM @@ -87,8 +84,33 @@ to create a runtime environment inside the sandbox that has as few implicitly acquired rights as possible. .Sh RETURN VALUES .Rv -std cap_enter cap_getmode +.Sh ERRORS +The +.Fn cap_enter +and +.Fn cap_getmode +system calls +will fail if: +.Bl -tag -width Er +.It Bq Er ENOSYS +The kernel is compiled without: +.Pp +.Cd "options CAPABILITY_MODE" +.El +.Pp +The +.Fn cap_getmode +system call may also return the following error: +.Bl -tag -width Er +.It Bq Er EFAULT +Pointer +.Fa modep +points outside the process's allocated address space. +.El .Sh SEE ALSO -.Xr cap_new 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , .Xr fexecve 2 , .Xr cap_sandboxed 3 , .Xr capsicum 4 diff --git a/lib/libc/sys/cap_fcntls_limit.2 b/lib/libc/sys/cap_fcntls_limit.2 new file mode 100644 index 0000000..8fa7463 --- /dev/null +++ b/lib/libc/sys/cap_fcntls_limit.2 @@ -0,0 +1,127 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_FCNTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_fcntls_limit , +.Nm cap_fcntls_get +.Nd manage allowed fcntl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_fcntls_limit "int fd" "uint32_t fcntlrights" +.Ft int +.Fn cap_fcntls_get "int fd" "uint32_t *fcntlrightsp" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_FCNTL +capability right, the list of allowed +.Xr fcntl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_fcntls_limit +system call. +.Pp +A bitmask of allowed fcntls commands for a given file descriptor can be obtained +with the +.Fn cap_fcntls_get +system call. +.Sh FLAGS +The following flags may be specified in the +.Fa fcntlrights +argument or returned in the +.Fa fcntlrightsp +argument: +.Bl -tag -width CAP_FCNTL_GETOWN +.It Dv CAP_FCNTL_GETFL +Permit +.Dv F_GETFL +command. +.It Dv CAP_FCNTL_SETFL +Permit +.Dv F_SETFL +command. +.It Dv CAP_FCNTL_GETOWN +Permit +.Dv F_GETOWN +command. +.It Dv CAP_FCNTL_SETOWN +Permit +.Dv F_SETOWN +command. +.El +.Sh RETURN VALUES +.Rv -std +.Sh ERRORS +.Fn cap_fcntls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EINVAL +An invalid flag has been passed in +.Fa fcntlrights . +.It Bq Er ENOTCAPABLE +.Fa fcntlrights +would expand the list of allowed +.Xr fcntl 2 +commands. +.El +.Pp +.Fn cap_fcntls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa fcntlrightsp +argument points at an invalid address. +.El +.Sh SEE ALSO +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr fcntl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. diff --git a/lib/libc/sys/cap_ioctls_limit.2 b/lib/libc/sys/cap_ioctls_limit.2 new file mode 100644 index 0000000..5eca18c --- /dev/null +++ b/lib/libc/sys/cap_ioctls_limit.2 @@ -0,0 +1,158 @@ +.\" +.\" Copyright (c) 2012 The FreeBSD Foundation +.\" All rights reserved. +.\" +.\" This documentation was written by Pawel Jakub Dawidek under sponsorship +.\" the FreeBSD Foundation. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd September 20, 2012 +.Dt CAP_IOCTLS_LIMIT 2 +.Os +.Sh NAME +.Nm cap_ioctls_limit , +.Nm cap_ioctls_get +.Nd manage allowed ioctl commands +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In sys/capability.h +.Ft int +.Fn cap_ioctls_limit "int fd" "const unsigned long *cmds" "size_t ncmds" +.Ft ssize_t +.Fn cap_ioctls_get "int fd" "unsigned long *cmds" "size_t maxcmds" +.Sh DESCRIPTION +If a file descriptor is granted the +.Dv CAP_IOCTL +capability right, the list of allowed +.Xr ioctl 2 +commands can be selectively reduced (but never expanded) with the +.Fn cap_ioctls_limit +system call. +The +.Fa cmds +argument is an array of +.Xr ioctl 2 +commands and the +.Fa ncmds +argument specifies the number of elements in the array. +There might be up to +.Va 256 +elements in the array. +.Pp +The list of allowed ioctl commands for a given file descriptor can be obtained +with the +.Fn cap_ioctls_get +system call. +The +.Fa cmds +argument points at memory that can hold up to +.Fa maxcmds +values. +The function populates the provided buffer with up to +.Fa maxcmds +elements, but always returns the total number of ioctl commands allowed for the +given file descriptor. +The total number of ioctls commands for the given file descriptor can be +obtained by passing +.Dv NULL as the +.Fa cmds +argument and +.Va 0 +as the +.Fa maxcmds +argument. +If all ioctl commands are allowed +.Dv ( CAP_IOCTL +capability right is assigned to the file descriptor and the +.Fn cap_ioctls_limit +system call was never called for this file descriptor), the +.Fn cap_ioctls_get +system call will return +.Dv CAP_IOCTLS_ALL +and won't modify the buffer pointed out by the +.Fa cmds +argument. +.Sh RETURN VALUES +.Rv -std cap_ioctls_limit +.Pp +The +.Fn cap_ioctls_limit +function, if successfull, returns the total number of allowed ioctl commands or +the value +.Dv INT_MAX +if all ioctls commands are allowed. +On failure the value +.Va -1 +is returned and the global variable errno is set to indicate the error. +.Sh ERRORS +.Fn cap_ioctls_limit +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at an invalid address. +.It Bq Er EINVAL +The +.Fa ncmds +argument is greater than +.Va 256 . +.It Bq Er ENOTCAPABLE +.Fa cmds +would expand the list of allowed +.Xr ioctl 2 +commands. +.El +.Pp +.Fn cap_ioctls_get +succeeds unless: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid descriptor. +.It Bq Er EFAULT +The +.Fa cmds +argument points at invalid address. +.El +.Sh SEE ALSO +.Xr cap_fcntls_limit 2 , +.Xr cap_rights_limit 2 , +.Xr ioctl 2 +.Sh HISTORY +Support for capabilities and capabilities mode was developed as part of the +.Tn TrustedBSD +Project. +.Pp +.Sh AUTHORS +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. diff --git a/lib/libc/sys/cap_new.2 b/lib/libc/sys/cap_rights_limit.2 index a18fd3b..d8d8777 100644 --- a/lib/libc/sys/cap_new.2 +++ b/lib/libc/sys/cap_rights_limit.2 @@ -1,10 +1,14 @@ .\" .\" Copyright (c) 2008-2010 Robert N. M. Watson +.\" Copyright (c) 2012-2013 The FreeBSD Foundation .\" All rights reserved. .\" .\" This software was developed at the University of Cambridge Computer .\" Laboratory with support from a grant from Google, Inc. .\" +.\" Portions of this documentation were written by Pawel Jakub Dawidek +.\" under sponsorship from the FreeBSD Foundation. +.\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: @@ -28,76 +32,48 @@ .\" .\" $FreeBSD$ .\" -.Dd July 20, 2011 -.Dt CAP_NEW 2 +.Dd February 23, 2013 +.Dt CAP_RIGHTS_LIMIT 2 .Os .Sh NAME -.Nm cap_new , -.Nm cap_getrights -.Nd System calls to manipulate capabilities +.Nm cap_rights_limit , +.Nm cap_rights_get +.Nd manage capability rights .Sh LIBRARY .Lb libc .Sh SYNOPSIS .In sys/capability.h .Ft int -.Fn cap_new "int fd" "cap_rights_t rights" +.Fn cap_rights_limit "int fd" "cap_rights_t rights" .Ft int -.Fn cap_getrights "int fd" "cap_rights_t *rightsp" +.Fn cap_rights_get "int fd" "cap_rights_t *rightsp" .Sh DESCRIPTION -Capabilities are special file descriptors derived from an existing file -descriptor, such as one returned by +When a file descriptor is created by a function such as .Xr fhopen 2 , .Xr kqueue 2 , .Xr mq_open 2 , .Xr open 2 , +.Xr openat 2 , +.Xr pdfork 2 , .Xr pipe 2 , .Xr shm_open 2 , .Xr socket 2 , or .Xr socketpair 2 , -but with a restricted set of permitted operations determined by a rights -mask set when the capability is created. -These restricted rights cannot be changed after the capability is created, -although further capabilities with yet more restricted rights may be created -from an existing capability. -In every other sense, a capability behaves in the same way as the file -descriptor it was created from. -.Pp -.Fn cap_new -creates a new capability for the existing file descriptor -.Fa fd , -and returns a file descriptor for it. -Operations on the capability will be limited to those permitted by -.Fa rights , -which is static for the lifetime of the capability. -If -.Fa fd -refers to an existing capability, then -.Fa rights -must be equal to or a subset of the rights on that capability. -As with -.Xr dup 2 -and -.Xr dup2 2 , -many properties are shared between the new capability and the existing file -descriptor, including open file flags, blocking disposition, and file offset. -Many applications will prefer to use the -.Xr cap_limitfd 3 -library call, part of -.Xr libcapsicum 3 , -as it offers a more convenient interface. -.Pp -.Fn cap_getrights -queries the rights associated with the capability referred to by file -descriptor -.Fa fd . +it is assigned all capability rights. +Those rights can be reduced (but never expanded) by using the +.Fn cap_rights_limit +system call. +Once capability rights are reduced, operations on the file descriptor will be +limited to those permitted by +.Fa rights . .Pp -These system calls, when combined with -.Xr cap_enter 2 , -may be used to construct process sandboxes with highly granular rights -assignment. +A bitmask of capability rights assigned to a file descriptor can be obtained with +the +.Fn cap_rights_get +system call. .Sh RIGHTS -The following rights may be specified in a new capability rights mask: +The following rights may be specified in a rights mask: .Bl -tag -width CAP_EXTATTR_DELETE .It Dv CAP_ACCEPT Permit @@ -134,6 +110,13 @@ Permit also required for .Xr sendto 2 with a non-NULL destination address. +.It Dv CAP_CREATE +Permit +.Xr openat 2 +with the +.Dv O_CREAT +flag. +.\" XXXPJD: Doesn't exist anymore. .It Dv CAP_EVENT Permit .Xr select 2 , @@ -143,7 +126,12 @@ and to be used in monitoring the file descriptor for events. .It Dv CAP_FEXECVE Permit -.Xr fexecve 2 ; +.Xr fexecve 2 +and +.Xr openat 2 +with the +.Dv O_EXEC +flag; .Dv CAP_READ will also be required. .It Dv CAP_EXTATTR_DELETE @@ -166,19 +154,49 @@ Permit .Xr fchflags 2 . .It Dv CAP_FCHMOD Permit -.Xr fchmod 2 . +.Xr fchmod 2 +and +.Xr fchmodat 2 . +.It Dv CAP_FCHMODAT +An alias to +.Dv CAP_FCHMOD . .It Dv CAP_FCHOWN Permit -.Xr fchown 2 . +.Xr fchown 2 +and +.Xr fchownat 2 . +.It Dv CAP_FCHOWNAT +An alias to +.Dv CAP_FCHOWN . .It Dv CAP_FCNTL Permit -.Xr fcntl 2 ; -be aware that this call provides indirect access to other operations, such as -.Xr flock 2 . +.Xr fcntl 2 . +Note that only the +.Dv F_GETFL , +.Dv F_SETFL , +.Dv F_GETOWN +and +.Dv F_SETOWN +commands require this capability right. +Also note that the list of permitted commands can be further limited with the +.Xr cap_fcntls_limit 2 +system call. .It Dv CAP_FLOCK Permit -.Xr flock 2 -and related calls. +.Xr flock 2 , +.Xr fcntl 2 +(with +.Dv F_GETLK , +.Dv F_SETLK +or +.Dv F_SETLKW +flag) and +.Xr openat 2 +(with +.Dv O_EXLOCK +or +.Dv O_SHLOCK +flag). .It Dv CAP_FPATHCONF Permit .Xr fpathconf 2 . @@ -186,22 +204,42 @@ Permit Permit UFS background-fsck operations on the descriptor. .It Dv CAP_FSTAT Permit -.Xr fstat 2 . +.Xr fstat 2 +and +.Xr fstatat 2 . +.It Dv CAP_FSTATAT +An alias to +.Dv CAP_FSTAT . .It Dv CAP_FSTATFS Permit .Xr fstatfs 2 . .It Dv CAP_FSYNC Permit -.Xr aio_fsync 2 +.Xr aio_fsync 2 , +.Xr fsync 2 and -.Xr fsync 2 . -.Pp +.Xr openat 2 +with +.Dv O_FSYNC +or +.Dv O_SYNC +flag. .It Dv CAP_FTRUNCATE Permit -.Xr ftruncate 2 . +.Xr ftruncate 2 +and +.Xr openat 2 +with the +.Dv O_TRUNC +flag. .It Dv CAP_FUTIMES Permit -.Xr futimes 2 . +.Xr futimes 2 +and +.Xr futimesat 2 . +.It Dv CAP_FUTIMESAT +An alias to +.Dv CAP_FUTIMES . .It Dv CAP_GETPEERNAME Permit .Xr getpeername 2 . @@ -216,42 +254,106 @@ Permit .Xr ioctl 2 . Be aware that this system call has enormous scope, including potentially global scope for some objects. +The list of permitted ioctl commands can be further limited with the +.Xr cap_ioctls_limit 2 +system call. +.\" XXXPJD: Doesn't exist anymore. .It Dv CAP_KEVENT Permit .Xr kevent 2 ; .Dv CAP_EVENT is also required on file descriptors that will be monitored using .Xr kevent 2 . +.It Dv CAP_LINKAT +Permit +.Xr linkat 2 +and +.Xr renameat 2 . +This right is required for the destination directory descriptor. .It Dv CAP_LISTEN Permit .Xr listen 2 ; not much use (generally) without .Dv CAP_BIND . .It Dv CAP_LOOKUP -Permit the file descriptor to be used as a starting directory for calls such -as +Permit the file descriptor to be used as a starting directory for calls such as .Xr linkat 2 , .Xr openat 2 , and .Xr unlinkat 2 . -Note that these calls are not available in capability mode as they manipulate -a global name space; see -.Xr cap_enter 2 -for details. .It Dv CAP_MAC_GET Permit .Xr mac_get_fd 3 . .It Dv CAP_MAC_SET Permit .Xr mac_set_fd 3 . +.It Dv CAP_MKDIRAT +Permit +.Xr mkdirat 2 . +.It Dv CAP_MKFIFOAT +Permit +.Xr mkfifoat 2 . +.It Dv CAP_MKNODAT +Permit +.Xr mknodat 2 . .It Dv CAP_MMAP Permit -.Xr mmap 2 ; -specific invocations may also require +.Xr mmap 2 +with the +.Dv PROT_NONE +protection. +.It Dv CAP_MMAP_R +Permit +.Xr mmap 2 +with the +.Dv PROT_READ +protection. +This also implies .Dv CAP_READ -or -.Dv CAP_WRITE . -.Pp +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_W +Permit +.Xr mmap 2 +with the +.Dv PROT_WRITE +protection. +This also implies +.Dv CAP_WRITE +and +.Dv CAP_SEEK +rights. +.It Dv CAP_MMAP_X +Permit +.Xr mmap 2 +with the +.Dv PROT_EXEC +protection. +This also implies +.Dv CAP_SEEK +right. +.It Dv CAP_MMAP_RW +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_W . +.It Dv CAP_MMAP_RX +Implies +.Dv CAP_MMAP_R +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_WX +Implies +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . +.It Dv CAP_MMAP_RWX +Implies +.Dv CAP_MMAP_R , +.Dv CAP_MMAP_W +and +.Dv CAP_MMAP_X . .It Dv CAP_PDGETPID Permit .Xr pdgetpid 2 . @@ -264,30 +366,46 @@ Permit .It Dv CAP_PEELOFF Permit .Xr sctp_peeloff 2 . +.\" XXXPJD: Not documented. +.It Dv CAP_POLL_EVENT +.\" XXXPJD: Not documented. +.It Dv CAP_POST_EVENT +.It Dv CAP_PREAD +Implies +.Dv CAP_SEEK +and +.Dv CAP_READ . +.It Dv CAP_PWRITE +Implies +.Dv CAP_SEEK +and +.Dv CAP_WRITE . .It Dv CAP_READ Allow .Xr aio_read 2 , -.Xr pread 2 , +.Xr openat +with the +.Dv O_RDONLY flag, .Xr read 2 , .Xr recv 2 , .Xr recvfrom 2 , -.Xr recvmsg 2 , +.Xr recvmsg 2 and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.It Dv CAP_REVOKE +.It Dv CAP_RECV +An alias to +.Dv CAP_READ . +.It Dv CAP_RENAMEAT Permit -.Xr frevoke 2 -in certain ABI compatibility modes that support this system call. +.Xr renameat 2 . +This right is required for the source directory descriptor. .It Dv CAP_SEEK Permit operations that seek on the file descriptor, such as .Xr lseek 2 , -but also required for I/O system calls that modify the file offset, such as -.Xr read 2 +but also required for I/O system calls that can read or write at any position +in the file, such as +.Xr pread 2 and -.Xr write 2 . +.Xr pwrite 2 . .It Dv CAP_SEM_GETVALUE Permit .Xr sem_getvalue 3 . @@ -299,6 +417,9 @@ Permit .Xr sem_wait 3 and .Xr sem_trywait 3 . +.It Dv CAP_SEND +An alias to +.Dv CAP_WRITE . .It Dv CAP_SETSOCKOPT Permit .Xr setsockopt 2 ; @@ -308,49 +429,56 @@ connecting, and other behaviors with global scope. Permit explicit .Xr shutdown 2 ; closing the socket will also generally shut down any connections on it. +.It Dv CAP_SYMLINKAT +Permit +.Xr symlinkat 2 . .It Dv CAP_TTYHOOK Allow configuration of TTY hooks, such as .Xr snp 4 , on the file descriptor. +.It Dv CAP_UNLINKAT +Permit +.Xr unlinkat 2 +and +.Xr renameat 2 . +This right is only required for +.Xr renameat 2 +on the destination directory descriptor if the destination object already +exists and will be removed by the rename. .It Dv CAP_WRITE Allow .Xr aio_write 2 , -.Xr pwrite 2 , +.Xr openat 2 +with +.Dv O_WRONLY +and +.Dv O_APPEND +flags, .Xr send 2 , .Xr sendmsg 2 , .Xr sendto 2 , .Xr write 2 , and related system calls. -.Pp -For files and other seekable objects, -.Dv CAP_SEEK -may also be required. -.Pp For .Xr sendto 2 with a non-NULL connection address, .Dv CAP_CONNECT is also required. +For +.Xr openat 2 +with the +.Dv O_WRONLY +flag, but without the +.Dv O_APPEND +flag, +.Dv CAP_SEEK +is also required. .El -.Sh CAVEAT -The -.Fn cap_new -system call and the capabilities it creates may be used to assign -fine-grained rights to sandboxed processes running in capability mode. -However, the semantics of objects accessed via file descriptors are complex, -so caution should be exercised in passing object capabilities into sandboxes. .Sh RETURN VALUES -If successful, -.Fn cap_new -returns a non-negative integer, termed a file descriptor. -It returns -1 on failure, and sets -.Va errno -to indicate the error. -.Pp -.Rv -std cap_getrights +.Rv -std .Sh ERRORS -.Fn cap_new -may return the following errors: +.Fn cap_rights_limit +succeeds unless: .Bl -tag -width Er .It Bq Er EBADF The @@ -359,29 +487,23 @@ argument is not a valid active descriptor. .It Bq Er EINVAL An invalid right has been requested in .Fa rights . -.It Bq Er EMFILE -The process has already reached its limit for open file descriptors. -.It Bq Er ENFILE -The system file table is full. -.It Bq Er EPERM +.It Bq Er ENOTCAPABLE .Fa rights contains requested rights not present in the current rights mask associated -with the capability referenced by -.Fa fd , -if any. +with the given file descriptor. .El .Pp -.Fn cap_getrights -may return the following errors: +.Fn cap_rights_get +succeeds unless: .Bl -tag -width Er .It Bq Er EBADF The .Fa fd argument is not a valid active descriptor. -.It Bq Er EINVAL +.It Bq Er EFAULT The -.Fa fd -argument is not a capability. +.Fa rightsp +argument points at an invalid address. .El .Sh SEE ALSO .Xr accept 2 , @@ -390,6 +512,9 @@ argument is not a capability. .Xr aio_write 2 , .Xr bind 2 , .Xr cap_enter 2 , +.Xr cap_fcntls_limit 2 , +.Xr cap_ioctls_limit 2 , +.Xr cap_rights_limit 2 , .Xr connect 2 , .Xr dup 2 , .Xr dup2 2 , @@ -421,6 +546,7 @@ argument is not a capability. .Xr mq_open 2 , .Xr open 2 , .Xr openat 2 , +.Xr pdfork 2 , .Xr pdgetpid 2 , .Xr pdkill 2 , .Xr pdwait4 2 , @@ -432,6 +558,7 @@ argument is not a capability. .Xr recv 2 , .Xr recvfrom 2 , .Xr recvmsg 2 , +.Xr renameat 2 , .Xr sctp_peeloff 2 , .Xr select 2 , .Xr send 2 , @@ -442,6 +569,7 @@ argument is not a capability. .Xr shutdown 2 , .Xr socket 2 , .Xr socketpair 2 , +.Xr symlinkat 2 , .Xr unlinkat 2 , .Xr write 2 , .Xr acl_delete_fd_np 3 , @@ -462,11 +590,11 @@ argument is not a capability. Support for capabilities and capabilities mode was developed as part of the .Tn TrustedBSD Project. +.Pp .Sh AUTHORS -These functions and the capability facility were created by -.An "Robert N. M. Watson" -at the University of Cambridge Computer Laboratory with support from a grant -from Google, Inc. +This function was created by +.An Pawel Jakub Dawidek Aq pawel@dawidek.net +under sponsorship of the FreeBSD Foundation. .Sh BUGS This man page should list the set of permitted system calls more specifically for each capability right. diff --git a/lib/libc/sys/dup.2 b/lib/libc/sys/dup.2 index 7a07c21..6e1de20 100644 --- a/lib/libc/sys/dup.2 +++ b/lib/libc/sys/dup.2 @@ -115,11 +115,6 @@ and is a valid descriptor, then .Fn dup2 is successful, and does nothing. -.Pp -The related -.Xr cap_new 2 -system call allows file descriptors to be duplicated with restrictions on -their use. .Sh RETURN VALUES The value -1 is returned if an error occurs in either call. The external variable @@ -152,7 +147,6 @@ argument is negative or exceeds the maximum allowable descriptor number .El .Sh SEE ALSO .Xr accept 2 , -.Xr cap_new 2 , .Xr close 2 , .Xr fcntl 2 , .Xr getdtablesize 2 , diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 9d9c111..f23ec96 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -600,7 +600,6 @@ kinfo_fflags2fst(int kfflags) } kfflags2fst[] = { { KF_FLAG_APPEND, PS_FST_FFLAG_APPEND }, { KF_FLAG_ASYNC, PS_FST_FFLAG_ASYNC }, - { KF_FLAG_CAPABILITY, PS_FST_FFLAG_CAPABILITY }, { KF_FLAG_CREAT, PS_FST_FFLAG_CREAT }, { KF_FLAG_DIRECT, PS_FST_FFLAG_DIRECT }, { KF_FLAG_EXCL, PS_FST_FFLAG_EXCL }, diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h index 662ea37..1c55aa7 100644 --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -88,7 +88,6 @@ #define PS_FST_FFLAG_DIRECT 0x1000 #define PS_FST_FFLAG_EXEC 0x2000 #define PS_FST_FFLAG_HASLOCK 0x4000 -#define PS_FST_FFLAG_CAPABILITY 0x8000 struct procstat; struct filestat { diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index d227981..9d71fa2 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -588,7 +588,7 @@ #define AUE_OPENAT 43184 /* FreeBSD. */ #define AUE_POSIX_OPENPT 43185 /* FreeBSD. */ #define AUE_CAP_NEW 43186 /* TrustedBSD. */ -#define AUE_CAP_GETRIGHTS 43187 /* TrustedBSD. */ +#define AUE_CAP_RIGHTS_GET 43187 /* TrustedBSD. */ #define AUE_CAP_ENTER 43188 /* TrustedBSD. */ #define AUE_CAP_GETMODE 43189 /* TrustedBSD. */ #define AUE_POSIX_SPAWN 43190 /* Darwin. */ @@ -603,6 +603,11 @@ #define AUE_PDGETPID 43199 /* FreeBSD. */ #define AUE_PDWAIT 43200 /* FreeBSD. */ #define AUE_WAIT6 43201 /* FreeBSD. */ +#define AUE_CAP_RIGHTS_LIMIT 43202 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_LIMIT 43203 /* TrustedBSD. */ +#define AUE_CAP_IOCTLS_GET 43204 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_LIMIT 43205 /* TrustedBSD. */ +#define AUE_CAP_FCNTLS_GET 43206 /* TrustedBSD. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/cddl/compat/opensolaris/sys/file.h b/sys/cddl/compat/opensolaris/sys/file.h index 7a3df36..0b8f875 100644 --- a/sys/cddl/compat/opensolaris/sys/file.h +++ b/sys/cddl/compat/opensolaris/sys/file.h @@ -39,15 +39,11 @@ typedef struct file file_t; #include <sys/capability.h> static __inline file_t * -getf(int fd) +getf(int fd, cap_rights_t rights) { struct file *fp; - /* - * We wouldn't need all of these rights on every invocation - * if we had more information about intent. - */ - if (fget(curthread, fd, CAP_READ | CAP_WRITE | CAP_SEEK, &fp) == 0) + if (fget(curthread, fd, rights, &fp) == 0) return (fp); return (NULL); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c index af0c9f7..fce4bb5 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c @@ -3822,7 +3822,7 @@ zfs_ioc_recv(zfs_cmd_t *zc) return (error); fd = zc->zc_cookie; - fp = getf(fd); + fp = getf(fd, CAP_PREAD); if (fp == NULL) { nvlist_free(props); return (EBADF); @@ -4079,7 +4079,7 @@ zfs_ioc_send(zfs_cmd_t *zc) error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj, &zc->zc_objset_type); } else { - file_t *fp = getf(zc->zc_cookie); + file_t *fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dsl_dataset_rele(ds, FTAG); if (dsfrom) @@ -4675,7 +4675,7 @@ zfs_ioc_diff(zfs_cmd_t *zc) return (error); } - fp = getf(zc->zc_cookie); + fp = getf(zc->zc_cookie, CAP_WRITE); if (fp == NULL) { dmu_objset_rele(fromsnap, FTAG); dmu_objset_rele(tosnap, FTAG); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c index ca0acfd..c12826f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_onexit.c @@ -124,7 +124,7 @@ zfs_onexit_fd_hold(int fd, minor_t *minorp) void *data; int error; - fp = getf(fd); + fp = getf(fd, CAP_NONE); if (fp == NULL) return (EBADF); diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index 4106447..6552d13 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -963,7 +963,7 @@ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW NOPROTO { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS NOPROTO { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET NOPROTO { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } @@ -1005,3 +1005,13 @@ struct wrusage32 *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT NOPROTO { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT NOPROTO { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET NOPROTO { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 0318a5c..346d178 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -154,6 +154,7 @@ linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mod SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); sx_unlock(&proctree_lock); + /* XXXPJD: Verify if TIOCSCTTY is allowed. */ if (fp->f_type == DTYPE_VNODE) (void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0, td->td_ucred, td); @@ -1038,11 +1039,11 @@ linux_pread(td, uap) error = sys_pread(td, &bsd); if (error == 0) { - /* This seems to violate POSIX but linux does it */ - if ((error = fgetvp(td, uap->fd, CAP_READ, &vp)) != 0) - return (error); + /* This seems to violate POSIX but linux does it */ + if ((error = fgetvp(td, uap->fd, CAP_PREAD, &vp)) != 0) + return (error); if (vp->v_type == VDIR) { - vrele(vp); + vrele(vp); return (EISDIR); } vrele(vp); diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c index b9d3ace..86fab78 100644 --- a/sys/compat/svr4/svr4_fcntl.c +++ b/sys/compat/svr4/svr4_fcntl.c @@ -265,14 +265,14 @@ fd_revoke(td, fd) /* * If we ever want to support Capsicum on SVR4 processes (unlikely) * or FreeBSD grows a native frevoke() (more likely), we will need a - * CAP_REVOKE here. + * CAP_FREVOKE here. * - * In the meantime, use CAP_MASK_VALID: if a SVR4 process wants to + * In the meantime, use CAP_ALL: if a SVR4 process wants to * do an frevoke(), it needs to do it on either a regular file * descriptor or a fully-privileged capability (which is effectively * the same as a non-capability-restricted file descriptor). */ - if ((error = fgetvp(td, fd, CAP_MASK_VALID, &vp)) != 0) + if ((error = fgetvp(td, fd, CAP_ALL, &vp)) != 0) return (error); if (vp->v_type != VCHR && vp->v_type != VBLK) { diff --git a/sys/compat/svr4/svr4_filio.c b/sys/compat/svr4/svr4_filio.c index 967169b..0fbba07 100644 --- a/sys/compat/svr4/svr4_filio.c +++ b/sys/compat/svr4/svr4_filio.c @@ -197,22 +197,24 @@ svr4_fil_ioctl(fp, td, retval, fd, cmd, data) u_long cmd; caddr_t data; { - int error; - int num; struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; + int error, num; *retval = 0; switch (cmd) { case SVR4_FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return 0; case SVR4_FIONCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags &= ~UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return 0; diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index d6bc4eb..0cfaeae 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -247,10 +247,8 @@ svr4_sys_getdents64(td, uap) DPRINTF(("svr4_sys_getdents64(%d, *, %d)\n", uap->fd, uap->nbytes)); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); - } if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -426,8 +424,7 @@ svr4_sys_getdents(td, uap) if (uap->nbytes < 0) return (EINVAL); - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { diff --git a/sys/compat/svr4/svr4_stream.c b/sys/compat/svr4/svr4_stream.c index 27014e3..1c7e83e 100644 --- a/sys/compat/svr4/svr4_stream.c +++ b/sys/compat/svr4/svr4_stream.c @@ -1449,7 +1449,7 @@ svr4_sys_putmsg(td, uap) struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_WRITE, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_SEND, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("putmsg: bad fp\n"); #endif @@ -1621,7 +1621,7 @@ svr4_sys_getmsg(td, uap) struct file *fp; int error; - if ((error = fget(td, uap->fd, CAP_READ, &fp)) != 0) { + if ((error = fget(td, uap->fd, CAP_RECV, &fp)) != 0) { #ifdef DEBUG_SVR4 uprintf("getmsg: bad fp\n"); #endif diff --git a/sys/dev/iscsi/initiator/iscsi.c b/sys/dev/iscsi/initiator/iscsi.c index a93a685..3737b7f 100644 --- a/sys/dev/iscsi/initiator/iscsi.c +++ b/sys/dev/iscsi/initiator/iscsi.c @@ -387,11 +387,11 @@ i_setsoc(isc_session_t *sp, int fd, struct thread *td) if(sp->soc != NULL) isc_stop_receiver(sp); - error = fget(td, fd, CAP_SOCK_ALL, &sp->fp); + error = fget(td, fd, CAP_SOCK_CLIENT, &sp->fp); if(error) return error; - if((error = fgetsock(td, fd, CAP_SOCK_ALL, &sp->soc, 0)) == 0) { + if((error = fgetsock(td, fd, CAP_SOCK_CLIENT, &sp->soc, 0)) == 0) { sp->td = td; isc_start_receiver(sp); } diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c index c3dbccf..cb5e3c0 100644 --- a/sys/fs/fdescfs/fdesc_vfsops.c +++ b/sys/fs/fdescfs/fdesc_vfsops.c @@ -205,7 +205,7 @@ fdesc_statfs(mp, sbp) last = min(fdp->fd_nfiles, lim); freefd = 0; for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL) + if (fdp->fd_ofiles[i].fde_file == NULL) freefd++; /* diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 3c4f44d..7923fc6 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -534,7 +534,7 @@ fdesc_readdir(ap) dp->d_type = DT_DIR; break; default: - if (fdp->fd_ofiles[fcnt] == NULL) + if (fdp->fd_ofiles[fcnt].fde_file == NULL) break; dp->d_namlen = sprintf(dp->d_name, "%d", fcnt); dp->d_reclen = UIO_MX; diff --git a/sys/fs/nfs/nfsdport.h b/sys/fs/nfs/nfsdport.h index 529ada2..a09a6dd 100644 --- a/sys/fs/nfs/nfsdport.h +++ b/sys/fs/nfs/nfsdport.h @@ -94,8 +94,6 @@ struct nfsexstuff { #define NFSFPCRED(f) ((f)->f_cred) #define NFSFPFLAG(f) ((f)->f_flag) -int fp_getfvp(NFSPROC_T *, int, struct file **, struct vnode **); - #define NFSNAMEICNDSET(n, c, o, f) do { \ (n)->cn_cred = (c); \ (n)->cn_nameiop = (o); \ diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 1179eb5..a07a67f 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -1215,7 +1215,7 @@ nfssvc_nfscl(struct thread *td, struct nfssvc_args *uap) * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_ALL, &fp)) + if ((error = fget(td, nfscbdarg.sock, CAP_SOCK_CLIENT, &fp)) != 0) { return (error); } diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c index 1731c72..ef98e2b 100644 --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -2767,7 +2767,7 @@ out: /* * glue for fp. */ -int +static int fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) { struct filedesc *fdp; @@ -2775,8 +2775,8 @@ fp_getfvp(struct thread *p, int fd, struct file **fpp, struct vnode **vpp) int error = 0; fdp = p->td_proc->p_fd; - if (fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) { + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd].fde_file) == NULL) { error = EBADF; goto out; } @@ -3041,7 +3041,7 @@ nfssvc_nfsd(struct thread *td, struct nfssvc_args *uap) * pretend that we need them all. It is better to be too * careful than too reckless. */ - if ((error = fget(td, sockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + if ((error = fget(td, sockarg.sock, CAP_SOCK_SERVER, &fp)) != 0) goto out; if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); diff --git a/sys/i386/ibcs2/ibcs2_misc.c b/sys/i386/ibcs2/ibcs2_misc.c index 0692122..9f382aa 100644 --- a/sys/i386/ibcs2/ibcs2_misc.c +++ b/sys/i386/ibcs2/ibcs2_misc.c @@ -337,8 +337,7 @@ ibcs2_getdents(td, uap) #define BSD_DIRENT(cp) ((struct dirent *)(cp)) #define IBCS2_RECLEN(reclen) (reclen + sizeof(u_short)) - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -491,8 +490,8 @@ ibcs2_read(td, uap) u_long *cookies = NULL, *cookiep; int ncookies; - if ((error = getvnode(td->td_proc->p_fd, uap->fd, - CAP_READ | CAP_SEEK, &fp)) != 0) { + if ((error = getvnode(td->td_proc->p_fd, uap->fd, CAP_READ, + &fp)) != 0) { if (error == EINVAL) return sys_read(td, (struct read_args *)uap); else diff --git a/sys/kern/capabilities.conf b/sys/kern/capabilities.conf index 11aad16..3c08782 100644 --- a/sys/kern/capabilities.conf +++ b/sys/kern/capabilities.conf @@ -110,9 +110,14 @@ aio_write ## Allow capability mode and capability system calls. ## cap_enter +cap_fcntls_get +cap_fcntls_limit cap_getmode -cap_getrights +cap_ioctls_get +cap_ioctls_limit cap_new +cap_rights_get +cap_rights_limit ## ## Allow read-only clock operations. @@ -239,7 +244,7 @@ getcontext ## Allow directory I/O on a file descriptor, subject to capability rights. ## Originally we had separate capabilities for directory-specific read ## operations, but on BSD we allow reading the raw directory data, so we just -## rely on CAP_READ and CAP_SEEK now. +## rely on CAP_READ now. ## getdents getdirentries @@ -317,13 +322,10 @@ gettimeofday getuid ## -## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global -## scope, but this is a tricky one as it is also required for tty control. -## We do have a capability right for this operation. +## Allow ioctl(2), which hopefully will be limited by applications only to +## required commands with cap_ioctls_limit(2) syscall. ## -## XXXRW: This needs to be revisited. -## -#ioctl +ioctl ## ## Allow querying current process credential state. diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index acdea40..b146bab 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -110,15 +110,8 @@ MALLOC_DECLARE(M_FADVISE); static uma_zone_t file_zone; -/* Flags for do_dup() */ -#define DUP_FIXED 0x1 /* Force fixed allocation */ -#define DUP_FCNTL 0x2 /* fcntl()-style errors */ -#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ - static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); -static int do_dup(struct thread *td, int flags, int old, int new, - register_t *retval); static int fd_first_free(struct filedesc *fdp, int low, int size); static int fd_last_used(struct filedesc *fdp, int size); static void fdgrowtable(struct filedesc *fdp, int nfd); @@ -166,7 +159,7 @@ static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif); * the process exits. */ struct freetable { - struct file **ft_table; + struct filedescent *ft_table; SLIST_ENTRY(freetable) ft_next; }; @@ -177,8 +170,7 @@ struct freetable { struct filedesc0 { struct filedesc fd_fd; SLIST_HEAD(, freetable) fd_free; - struct file *fd_dfiles[NDFILE]; - char fd_dfileflags[NDFILE]; + struct filedescent fd_dfiles[NDFILE]; NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; }; @@ -284,7 +276,8 @@ fdunused(struct filedesc *fdp, int fd) FILEDESC_XLOCK_ASSERT(fdp); KASSERT(fdisused(fdp, fd), ("fd=%d is already unused", fd)); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("fd=%d is still in use", fd)); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("fd=%d is still in use", fd)); fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); if (fd < fdp->fd_freefile) @@ -294,6 +287,20 @@ fdunused(struct filedesc *fdp, int fd) } /* + * Free a file descriptor. + */ +static inline void +fdfree(struct filedesc *fdp, int fd) +{ + struct filedescent *fde; + + fde = &fdp->fd_ofiles[fd]; + filecaps_free(&fde->fde_caps); + bzero(fde, sizeof(*fde)); + fdunused(fdp, fd); +} + +/* * System calls on descriptors. */ #ifndef _SYS_SYSPROTO_H_ @@ -434,36 +441,14 @@ sys_fcntl(struct thread *td, struct fcntl_args *uap) return (error); } -static inline int -fdunwrap(int fd, cap_rights_t rights, struct filedesc *fdp, struct file **fpp) -{ - - FILEDESC_LOCK_ASSERT(fdp); - - *fpp = fget_locked(fdp, fd); - if (*fpp == NULL) - return (EBADF); - -#ifdef CAPABILITIES - if ((*fpp)->f_type == DTYPE_CAPABILITY) { - int err = cap_funwrap(*fpp, rights, fpp); - if (err != 0) { - *fpp = NULL; - return (err); - } - } -#endif /* CAPABILITIES */ - return (0); -} - int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) { struct filedesc *fdp; struct flock *flp; - struct file *fp; + struct file *fp, *fp2; + struct filedescent *fde; struct proc *p; - char *pop; struct vnode *vp; int error, flg, tmp; u_int old, new; @@ -505,8 +490,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; + fde = &fdp->fd_ofiles[fd]; + td->td_retval[0] = + (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0; FILEDESC_SUNLOCK(fdp); break; @@ -517,32 +503,24 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) error = EBADF; break; } - pop = &fdp->fd_ofileflags[fd]; - *pop = (*pop &~ UF_EXCLOSE) | + fde = &fdp->fd_ofiles[fd]; + fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) | (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); FILEDESC_XUNLOCK(fdp); break; case F_GETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETFL, &fp, NULL); + if (error != 0) break; - } td->td_retval[0] = OFLAGS(fp->f_flag); - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); break; case F_SETFL: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETFL, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); do { tmp = flg = fp->f_flag; tmp &= ~FCNTLFLAGS; @@ -550,7 +528,7 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) } while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0); tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); - if (error) { + if (error != 0) { fdrop(fp, td); break; } @@ -567,14 +545,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_GETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_GETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); if (error == 0) td->td_retval[0] = tmp; @@ -582,14 +555,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) break; case F_SETOWN: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FCNTL, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FCNTL, F_SETOWN, &fp, NULL); + if (error != 0) break; - } - fhold(fp); - FILEDESC_SUNLOCK(fdp); tmp = arg; error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); fdrop(fp, td); @@ -608,17 +576,15 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) case F_SETLK: do_setlk: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } + flp = (struct flock *)arg; if (flp->l_whence == SEEK_CUR) { foffset = foffset_get(fp); @@ -627,16 +593,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset > OFF_MAX - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; switch (flp->l_type) { case F_RDLCK: @@ -703,37 +665,37 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) * that the closing thread was a bit slower and that the * advisory lock succeeded before the close. */ - FILEDESC_SLOCK(fdp); - if (fget_locked(fdp, fd) != fp) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, 0, 0, &fp2, NULL); + if (error != 0) { + fdrop(fp, td); + break; + } + if (fp != fp2) { flp->l_whence = SEEK_SET; flp->l_start = 0; flp->l_len = 0; flp->l_type = F_UNLCK; (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, flp, F_POSIX); - } else - FILEDESC_SUNLOCK(fdp); + } fdrop(fp, td); + fdrop(fp2, td); break; case F_GETLK: - FILEDESC_SLOCK(fdp); - error = fdunwrap(fd, CAP_FLOCK, fdp, &fp); - if (error != 0) { - FILEDESC_SUNLOCK(fdp); + error = fget_unlocked(fdp, fd, CAP_FLOCK, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); error = EBADF; + fdrop(fp, td); break; } flp = (struct flock *)arg; if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && flp->l_type != F_UNLCK) { - FILEDESC_SUNLOCK(fdp); error = EINVAL; + fdrop(fp, td); break; } if (flp->l_whence == SEEK_CUR) { @@ -744,15 +706,11 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) foffset < OFF_MIN - flp->l_start)) { FILEDESC_SUNLOCK(fdp); error = EOVERFLOW; + fdrop(fp, td); break; } flp->l_start += foffset; } - /* - * VOP_ADVLOCK() may block. - */ - fhold(fp); - FILEDESC_SUNLOCK(fdp); vp = fp->f_vnode; error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, F_POSIX); @@ -763,19 +721,14 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) arg = arg ? 128 * 1024: 0; /* FALLTHROUGH */ case F_READAHEAD: - FILEDESC_SLOCK(fdp); - if ((fp = fget_locked(fdp, fd)) == NULL) { - FILEDESC_SUNLOCK(fdp); - error = EBADF; + error = fget_unlocked(fdp, fd, 0, 0, &fp, NULL); + if (error != 0) break; - } if (fp->f_type != DTYPE_VNODE) { - FILEDESC_SUNLOCK(fdp); + fdrop(fp, td); error = EBADF; break; } - fhold(fp); - FILEDESC_SUNLOCK(fdp); if (arg >= 0) { vp = fp->f_vnode; error = vn_lock(vp, LK_SHARED); @@ -809,11 +762,12 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) /* * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD). */ -static int +int do_dup(struct thread *td, int flags, int old, int new, register_t *retval) { struct filedesc *fdp; + struct filedescent *oldfde, *newfde; struct proc *p; struct file *fp; struct file *delfp; @@ -842,14 +796,15 @@ do_dup(struct thread *td, int flags, int old, int new, FILEDESC_XUNLOCK(fdp); return (EBADF); } + oldfde = &fdp->fd_ofiles[old]; if (flags & DUP_FIXED && old == new) { *retval = new; if (flags & DUP_CLOEXEC) - fdp->fd_ofileflags[new] |= UF_EXCLOSE; + fdp->fd_ofiles[new].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); return (0); } - fp = fdp->fd_ofiles[old]; + fp = oldfde->fde_file; fhold(fp); /* @@ -878,8 +833,10 @@ do_dup(struct thread *td, int flags, int old, int new, } #endif fdgrowtable(fdp, new + 1); + oldfde = &fdp->fd_ofiles[old]; } - if (fdp->fd_ofiles[new] == NULL) + newfde = &fdp->fd_ofiles[new]; + if (newfde->fde_file == NULL) fdused(fdp, new); } else { if ((error = fdalloc(td, new, &new)) != 0) { @@ -887,20 +844,23 @@ do_dup(struct thread *td, int flags, int old, int new, fdrop(fp, td); return (error); } + newfde = &fdp->fd_ofiles[new]; } - KASSERT(fp == fdp->fd_ofiles[old], ("old fd has been modified")); + KASSERT(fp == oldfde->fde_file, ("old fd has been modified")); KASSERT(old != new, ("new fd is same as old")); - delfp = fdp->fd_ofiles[new]; + delfp = newfde->fde_file; + /* * Duplicate the source descriptor. */ - fdp->fd_ofiles[new] = fp; + *newfde = *oldfde; + filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] | UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else - fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE; + newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; @@ -1141,7 +1101,6 @@ static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders) { - struct file *fp_object; int error; FILEDESC_XLOCK_ASSERT(fdp); @@ -1167,12 +1126,10 @@ closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, knote_fdclose(td, fd); /* - * When we're closing an fd with a capability, we need to notify - * mqueue if the underlying object is of type mqueue. + * We need to notify mqueue if the object is of type mqueue. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_MQUEUE) - mq_fdclose(td, fd, fp_object); + if (fp->f_type == DTYPE_MQUEUE) + mq_fdclose(td, fd, fp); FILEDESC_XUNLOCK(fdp); error = closef(fp, td); @@ -1224,9 +1181,7 @@ kern_close(td, fd) FILEDESC_XUNLOCK(fdp); return (EBADF); } - fdp->fd_ofiles[fd] = NULL; - fdp->fd_ofileflags[fd] = 0; - fdunused(fdp, fd); + fdfree(fdp, fd); /* closefp() drops the FILEDESC lock for us. */ return (closefp(fdp, fd, fp, td, 1)); @@ -1258,7 +1213,7 @@ sys_closefrom(struct thread *td, struct closefrom_args *uap) uap->lowfd = 0; FILEDESC_SLOCK(fdp); for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) { - if (fdp->fd_ofiles[fd] != NULL) { + if (fdp->fd_ofiles[fd].fde_file != NULL) { FILEDESC_SUNLOCK(fdp); (void)kern_close(td, fd); FILEDESC_SLOCK(fdp); @@ -1410,6 +1365,91 @@ out: } /* + * Initialize filecaps structure. + */ +void +filecaps_init(struct filecaps *fcaps) +{ + + bzero(fcaps, sizeof(*fcaps)); + fcaps->fc_nioctls = -1; +} + +/* + * Copy filecaps structure allocating memory for ioctls array if needed. + */ +void +filecaps_copy(const struct filecaps *src, struct filecaps *dst) +{ + size_t size; + + *dst = *src; + if (src->fc_ioctls != NULL) { + KASSERT(src->fc_nioctls > 0, + ("fc_ioctls != NULL, but fc_nioctls=%hd", src->fc_nioctls)); + + size = sizeof(src->fc_ioctls[0]) * src->fc_nioctls; + dst->fc_ioctls = malloc(size, M_TEMP, M_WAITOK); + bcopy(src->fc_ioctls, dst->fc_ioctls, size); + } +} + +/* + * Move filecaps structure to the new place and clear the old place. + */ +static void +filecaps_move(struct filecaps *src, struct filecaps *dst) +{ + + *dst = *src; + bzero(src, sizeof(*src)); +} + +/* + * Fill the given filecaps structure with full rights. + */ +static void +filecaps_fill(struct filecaps *fcaps) +{ + + fcaps->fc_rights = CAP_ALL; + fcaps->fc_ioctls = NULL; + fcaps->fc_nioctls = -1; + fcaps->fc_fcntls = CAP_FCNTL_ALL; +} + +/* + * Free memory allocated within filecaps structure. + */ +void +filecaps_free(struct filecaps *fcaps) +{ + + free(fcaps->fc_ioctls, M_TEMP); + bzero(fcaps, sizeof(*fcaps)); +} + +/* + * Validate the given filecaps structure. + */ +static void +filecaps_validate(const struct filecaps *fcaps, const char *func) +{ + + KASSERT((fcaps->fc_rights & ~CAP_MASK_VALID) == 0, + ("%s: invalid rights", func)); + KASSERT((fcaps->fc_fcntls & ~CAP_FCNTL_ALL) == 0, + ("%s: invalid fcntls", func)); + KASSERT(fcaps->fc_fcntls == 0 || (fcaps->fc_rights & CAP_FCNTL) != 0, + ("%s: fcntls without CAP_FCNTL", func)); + KASSERT(fcaps->fc_ioctls != NULL ? fcaps->fc_nioctls > 0 : + (fcaps->fc_nioctls == -1 || fcaps->fc_nioctls == 0), + ("%s: invalid ioctls", func)); + KASSERT(fcaps->fc_nioctls == 0 || (fcaps->fc_rights & CAP_IOCTL) != 0, + ("%s: ioctls without CAP_IOCTL", func)); +} + +/* * Grow the file table to accomodate (at least) nfd descriptors. */ static void @@ -1417,9 +1457,8 @@ fdgrowtable(struct filedesc *fdp, int nfd) { struct filedesc0 *fdp0; struct freetable *ft; - struct file **ntable; - struct file **otable; - char *nfileflags, *ofileflags; + struct filedescent *ntable; + struct filedescent *otable; int nnfiles, onfiles; NDSLOTTYPE *nmap, *omap; @@ -1430,7 +1469,6 @@ fdgrowtable(struct filedesc *fdp, int nfd) /* save old values */ onfiles = fdp->fd_nfiles; otable = fdp->fd_ofiles; - ofileflags = fdp->fd_ofileflags; omap = fdp->fd_map; /* compute the size of the new table */ @@ -1440,27 +1478,25 @@ fdgrowtable(struct filedesc *fdp, int nfd) return; /* - * Allocate a new table and map. We need enough space for a) the - * file entries themselves, b) the file flags, and c) the struct - * freetable we will use when we decommission the table and place - * it on the freelist. We place the struct freetable in the - * middle so we don't have to worry about padding. + * Allocate a new table and map. We need enough space for the + * file entries themselves and the struct freetable we will use + * when we decommission the table and place it on the freelist. + * We place the struct freetable in the middle so we don't have + * to worry about padding. */ - ntable = malloc(nnfiles * sizeof(*ntable) + sizeof(struct freetable) + - nnfiles * sizeof(*nfileflags), M_FILEDESC, M_ZERO | M_WAITOK); - nfileflags = (char *)&ntable[nnfiles] + sizeof(struct freetable); + ntable = malloc(nnfiles * sizeof(ntable[0]) + sizeof(struct freetable), + M_FILEDESC, M_ZERO | M_WAITOK); nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE, M_FILEDESC, M_ZERO | M_WAITOK); /* copy the old data over and point at the new tables */ memcpy(ntable, otable, onfiles * sizeof(*otable)); - memcpy(nfileflags, ofileflags, onfiles * sizeof(*ofileflags)); memcpy(nmap, omap, NDSLOTS(onfiles) * sizeof(*omap)); /* update the pointers and counters */ fdp->fd_nfiles = nnfiles; + memcpy(ntable, otable, onfiles * sizeof(ntable[0])); fdp->fd_ofiles = ntable; - fdp->fd_ofileflags = nfileflags; fdp->fd_map = nmap; /* @@ -1536,8 +1572,9 @@ fdalloc(struct thread *td, int minfd, int *result) ("invalid descriptor %d", fd)); KASSERT(!fdisused(fdp, fd), ("fd_first_free() returned non-free descriptor")); - KASSERT(fdp->fd_ofiles[fd] == NULL, ("file descriptor isn't free")); - KASSERT(fdp->fd_ofileflags[fd] == 0, ("file flags are set")); + KASSERT(fdp->fd_ofiles[fd].fde_file == NULL, + ("file descriptor isn't free")); + KASSERT(fdp->fd_ofiles[fd].fde_flags == 0, ("file flags are set")); fdused(fdp, fd); *result = fd; return (0); @@ -1568,7 +1605,7 @@ fdavail(struct thread *td, int n) return (1); last = min(fdp->fd_nfiles, lim); for (i = fdp->fd_freefile; i < last; i++) { - if (fdp->fd_ofiles[i] == NULL && --n <= 0) + if (fdp->fd_ofiles[i].fde_file == NULL && --n <= 0) return (1); } return (0); @@ -1591,7 +1628,7 @@ falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags) if (error) return (error); /* no reference held on error */ - error = finstall(td, fp, &fd, flags); + error = finstall(td, fp, &fd, flags, NULL); if (error) { fdrop(fp, td); /* one reference (fp only) */ return (error); @@ -1645,13 +1682,17 @@ falloc_noinstall(struct thread *td, struct file **resultfp) * Install a file in a file descriptor table. */ int -finstall(struct thread *td, struct file *fp, int *fd, int flags) +finstall(struct thread *td, struct file *fp, int *fd, int flags, + struct filecaps *fcaps) { struct filedesc *fdp = td->td_proc->p_fd; + struct filedescent *fde; int error; KASSERT(fd != NULL, ("%s: fd == NULL", __func__)); KASSERT(fp != NULL, ("%s: fp == NULL", __func__)); + if (fcaps != NULL) + filecaps_validate(fcaps, __func__); FILEDESC_XLOCK(fdp); if ((error = fdalloc(td, 0, fd))) { @@ -1659,9 +1700,14 @@ finstall(struct thread *td, struct file *fp, int *fd, int flags) return (error); } fhold(fp); - fdp->fd_ofiles[*fd] = fp; + fde = &fdp->fd_ofiles[*fd]; + fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) - fdp->fd_ofileflags[*fd] |= UF_EXCLOSE; + fde->fde_flags |= UF_EXCLOSE; + if (fcaps != NULL) + filecaps_move(fcaps, &fde->fde_caps); + else + filecaps_fill(&fde->fde_caps); FILEDESC_XUNLOCK(fdp); return (0); } @@ -1696,7 +1742,6 @@ fdinit(struct filedesc *fdp) newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; - newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_map = newfdp->fd_dmap; newfdp->fd_fd.fd_lastfile = -1; @@ -1764,7 +1809,7 @@ fdunshare(struct proc *p, struct thread *td) FILEDESC_XUNLOCK(p->p_fd); tmp = fdcopy(p->p_fd); - fdfree(td); + fdescfree(td); p->p_fd = tmp; } else FILEDESC_XUNLOCK(p->p_fd); @@ -1778,6 +1823,7 @@ struct filedesc * fdcopy(struct filedesc *fdp) { struct filedesc *newfdp; + struct filedescent *nfde, *ofde; int i; /* Certain daemons might not have file descriptors. */ @@ -1796,12 +1842,14 @@ fdcopy(struct filedesc *fdp) /* copy all passable descriptors (i.e. not kqueue) */ newfdp->fd_freefile = -1; for (i = 0; i <= fdp->fd_lastfile; ++i) { + ofde = &fdp->fd_ofiles[i]; if (fdisused(fdp, i) && - (fdp->fd_ofiles[i]->f_ops->fo_flags & DFLAG_PASSABLE) && - fdp->fd_ofiles[i]->f_ops != &badfileops) { - newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; - newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; - fhold(newfdp->fd_ofiles[i]); + (ofde->fde_file->f_ops->fo_flags & DFLAG_PASSABLE) && + ofde->fde_file->f_ops != &badfileops) { + nfde = &newfdp->fd_ofiles[i]; + *nfde = *ofde; + filecaps_copy(&ofde->fde_caps, &nfde->fde_caps); + fhold(nfde->fde_file); newfdp->fd_lastfile = i; } else { if (newfdp->fd_freefile == -1) @@ -1811,9 +1859,10 @@ fdcopy(struct filedesc *fdp) newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); FILEDESC_XLOCK(newfdp); - for (i = 0; i <= newfdp->fd_lastfile; ++i) - if (newfdp->fd_ofiles[i] != NULL) + for (i = 0; i <= newfdp->fd_lastfile; ++i) { + if (newfdp->fd_ofiles[i].fde_file != NULL) fdused(newfdp, i); + } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; FILEDESC_XUNLOCK(newfdp); @@ -1824,7 +1873,7 @@ fdcopy(struct filedesc *fdp) * Release a filedesc structure. */ void -fdfree(struct thread *td) +fdescfree(struct thread *td) { struct filedesc *fdp; int i; @@ -1849,12 +1898,12 @@ fdfree(struct thread *td) if (fdtol != NULL) { FILEDESC_XLOCK(fdp); KASSERT(fdtol->fdl_refcount > 0, - ("filedesc_to_refcount botch: fdl_refcount=%d", - fdtol->fdl_refcount)); + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); if (fdtol->fdl_refcount == 1 && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) continue; fhold(fp); @@ -1914,10 +1963,10 @@ fdfree(struct thread *td) return; for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fp = fdp->fd_ofiles[i].fde_file; if (fp != NULL) { FILEDESC_XLOCK(fdp); - fdp->fd_ofiles[i] = NULL; + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); } @@ -1982,6 +2031,7 @@ void setugidsafety(struct thread *td) { struct filedesc *fdp; + struct file *fp; int i; /* Certain daemons might not have file descriptors. */ @@ -1997,18 +2047,14 @@ setugidsafety(struct thread *td) for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { - struct file *fp; - + fp = fdp->fd_ofiles[i].fde_file; + if (fp != NULL && is_unsafe(fp)) { knote_fdclose(td, i); /* * NULL-out descriptor prior to close to avoid * a race while close blocks. */ - fp = fdp->fd_ofiles[i]; - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + fdfree(fdp, i); FILEDESC_XUNLOCK(fdp); (void) closef(fp, td); FILEDESC_XLOCK(fdp); @@ -2029,9 +2075,8 @@ fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) { FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[idx] == fp) { - fdp->fd_ofiles[idx] = NULL; - fdunused(fdp, idx); + if (fdp->fd_ofiles[idx].fde_file == fp) { + fdfree(fdp, idx); FILEDESC_XUNLOCK(fdp); fdrop(fp, td); } else @@ -2045,6 +2090,7 @@ void fdcloseexec(struct thread *td) { struct filedesc *fdp; + struct filedescent *fde; struct file *fp; int i; @@ -2054,17 +2100,16 @@ fdcloseexec(struct thread *td) return; /* - * We cannot cache fd_ofiles or fd_ofileflags since operations + * We cannot cache fd_ofiles since operations * may block and rip them out from under us. */ FILEDESC_XLOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { - fp = fdp->fd_ofiles[i]; + fde = &fdp->fd_ofiles[i]; + fp = fde->fde_file; if (fp != NULL && (fp->f_type == DTYPE_MQUEUE || - (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { - fdp->fd_ofiles[i] = NULL; - fdp->fd_ofileflags[i] = 0; - fdunused(fdp, i); + (fde->fde_flags & UF_EXCLOSE))) { + fdfree(fdp, i); (void) closefp(fdp, i, fp, td, 0); /* closefp() drops the FILEDESC lock. */ FILEDESC_XLOCK(fdp); @@ -2094,7 +2139,7 @@ fdcheckstd(struct thread *td) devnull = -1; error = 0; for (i = 0; i < 3; i++) { - if (fdp->fd_ofiles[i] != NULL) + if (fdp->fd_ofiles[i].fde_file != NULL) continue; if (devnull < 0) { save = td->td_retval[0]; @@ -2129,7 +2174,6 @@ closef(struct file *fp, struct thread *td) struct flock lf; struct filedesc_to_leader *fdtol; struct filedesc *fdp; - struct file *fp_object; /* * POSIX record locking dictates that any close releases ALL @@ -2142,13 +2186,9 @@ closef(struct file *fp, struct thread *td) * NULL thread pointer when there really is no owning * context that might have locks, or the locks will be * leaked. - * - * If this is a capability, we do lock processing under the underlying - * node, not the capability itself. */ - (void)cap_funwrap(fp, 0, &fp_object); - if (fp_object->f_type == DTYPE_VNODE && td != NULL) { - vp = fp_object->f_vnode; + if (fp->f_type == DTYPE_VNODE && td != NULL) { + vp = fp->f_vnode; if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { lf.l_whence = SEEK_SET; lf.l_start = 0; @@ -2177,7 +2217,7 @@ closef(struct file *fp, struct thread *td) lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; - vp = fp_object->f_vnode; + vp = fp->f_vnode; (void) VOP_ADVLOCK(vp, (caddr_t)fdtol->fdl_leader, F_UNLCK, &lf, F_POSIX); @@ -2211,14 +2251,19 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops) atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops); } -struct file * -fget_unlocked(struct filedesc *fdp, int fd) +int +fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { struct file *fp; u_int count; +#ifdef CAPABILITIES + cap_rights_t haverights; + int error; +#endif if (fd < 0 || fd >= fdp->fd_nfiles) - return (NULL); + return (EBADF); /* * Fetch the descriptor locklessly. We avoid fdrop() races by * never raising a refcount above 0. To accomplish this we have @@ -2228,9 +2273,20 @@ fget_unlocked(struct filedesc *fdp, int fd) * due to preemption. */ for (;;) { - fp = fdp->fd_ofiles[fd]; + fp = fdp->fd_ofiles[fd].fde_file; if (fp == NULL) - break; + return (EBADF); +#ifdef CAPABILITIES + haverights = cap_rights(fdp, fd); + error = cap_check(haverights, needrights); + if (error != 0) + return (error); + if ((needrights & CAP_FCNTL) != 0) { + error = cap_fcntl_check(fdp, fd, needfcntl); + if (error != 0) + return (error); + } +#endif count = fp->f_count; if (count == 0) continue; @@ -2240,12 +2296,19 @@ fget_unlocked(struct filedesc *fdp, int fd) */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; - if (fp == fdp->fd_ofiles[fd]) + if (fp == fdp->fd_ofiles[fd].fde_file) break; fdrop(fp, curthread); } - - return (fp); + *fpp = fp; + if (haverightsp != NULL) { +#ifdef CAPABILITIES + *haverightsp = haverights; +#else + *haverightsp = CAP_ALL; +#endif + } + return (0); } /* @@ -2255,33 +2318,29 @@ fget_unlocked(struct filedesc *fdp, int fd) * If the descriptor doesn't exist or doesn't match 'flags', EBADF is * returned. * - * If the FGET_GETCAP flag is set, the capability itself will be returned. - * Calling _fget() with FGET_GETCAP on a non-capability will return EINVAL. - * Otherwise, if the file is a capability, its rights will be checked against - * the capability rights mask, and if successful, the object will be unwrapped. + * File's rights will be checked against the capability rights mask. * * If an error occured the non-zero error is returned and *fpp is set to * NULL. Otherwise *fpp is held and set and zero is returned. Caller is * responsible for fdrop(). */ -#define FGET_GETCAP 0x00000001 static __inline int _fget(struct thread *td, int fd, struct file **fpp, int flags, - cap_rights_t needrights, cap_rights_t *haverightsp, u_char *maxprotp, - int fget_flags) + cap_rights_t needrights, u_char *maxprotp) { struct filedesc *fdp; struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; -#endif + cap_rights_t haverights; int error; *fpp = NULL; if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) return (EBADF); - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + if (maxprotp != NULL) + needrights |= CAP_MMAP; + error = fget_unlocked(fdp, fd, needrights, 0, &fp, &haverights); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { fdrop(fp, td); return (EBADF); @@ -2289,50 +2348,11 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, #ifdef CAPABILITIES /* - * If this is a capability, what rights does it have? + * If requested, convert capability rights to access flags. */ - if (haverightsp != NULL) { - if (fp->f_type == DTYPE_CAPABILITY) - *haverightsp = cap_rights(fp); - else - *haverightsp = CAP_MASK_VALID; - } - - /* - * If a capability has been requested, return the capability directly. - * Otherwise, check capability rights, extract the underlying object, - * and check its access flags. - */ - if (fget_flags & FGET_GETCAP) { - if (fp->f_type != DTYPE_CAPABILITY) { - fdrop(fp, td); - return (EINVAL); - } - } else { - if (maxprotp == NULL) - error = cap_funwrap(fp, needrights, &fp_fromcap); - else - error = cap_funwrap_mmap(fp, needrights, maxprotp, - &fp_fromcap); - if (error != 0) { - fdrop(fp, td); - return (error); - } - - /* - * If we've unwrapped a file, drop the original capability - * and hold the new descriptor. fp after this point refers to - * the actual (unwrapped) object, not the capability. - */ - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, td); - fp = fp_fromcap; - } - } + if (maxprotp != NULL) + *maxprotp = cap_rights_to_vmprot(haverights); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("%s: saw capability", __func__)); if (maxprotp != NULL) *maxprotp = VM_PROT_ALL; #endif /* CAPABILITIES */ @@ -2371,7 +2391,7 @@ int fget(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, 0, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, 0, rights, NULL)); } int @@ -2379,37 +2399,24 @@ fget_mmap(struct thread *td, int fd, cap_rights_t rights, u_char *maxprotp, struct file **fpp) { - return (_fget(td, fd, fpp, 0, rights, NULL, maxprotp, 0)); + return (_fget(td, fd, fpp, 0, rights, maxprotp)); } int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return(_fget(td, fd, fpp, FREAD, rights, NULL, NULL, 0)); + return(_fget(td, fd, fpp, FREAD, rights, NULL)); } int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp) { - return (_fget(td, fd, fpp, FWRITE, rights, NULL, NULL, 0)); + return (_fget(td, fd, fpp, FWRITE, rights, NULL)); } /* - * Unlike the other fget() calls, which accept and check capability rights - * but never return capabilities, fgetcap() returns the capability but doesn't - * check capability rights. - */ -int -fgetcap(struct thread *td, int fd, struct file **fpp) -{ - - return (_fget(td, fd, fpp, 0, 0, NULL, NULL, FGET_GETCAP)); -} - - -/* * Like fget() but loads the underlying vnode, or returns an error if the * descriptor does not represent a vnode. Note that pipes use vnodes but * never have VM objects. The returned vnode will be vref()'d. @@ -2418,14 +2425,14 @@ fgetcap(struct thread *td, int fd, struct file **fpp) */ static __inline int _fgetvp(struct thread *td, int fd, int flags, cap_rights_t needrights, - cap_rights_t *haverightsp, struct vnode **vpp) + struct vnode **vpp) { struct file *fp; int error; *vpp = NULL; - if ((error = _fget(td, fd, &fp, flags, needrights, haverightsp, - NULL, 0)) != 0) + error = _fget(td, fd, &fp, flags, needrights, NULL); + if (error) return (error); if (fp->f_vnode == NULL) { error = EINVAL; @@ -2442,28 +2449,54 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, rights, NULL, vpp)); + return (_fgetvp(td, fd, 0, rights, vpp)); } int -fgetvp_rights(struct thread *td, int fd, cap_rights_t need, cap_rights_t *have, - struct vnode **vpp) +fgetvp_rights(struct thread *td, int fd, cap_rights_t need, + struct filecaps *havecaps, struct vnode **vpp) { - return (_fgetvp(td, fd, 0, need, have, vpp)); + struct filedesc *fdp; + struct file *fp; +#ifdef CAPABILITIES + int error; +#endif + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (EBADF); + + fp = fget_locked(fdp, fd); + if (fp == NULL || fp->f_ops == &badfileops) + return (EBADF); + +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, fd), need); + if (error != 0) + return (error); +#endif + + if (fp->f_vnode == NULL) + return (EINVAL); + + *vpp = fp->f_vnode; + vref(*vpp); + filecaps_copy(&fdp->fd_ofiles[fd].fde_caps, havecaps); + + return (0); } int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FREAD, rights, NULL, vpp)); + return (_fgetvp(td, fd, FREAD, rights, vpp)); } int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FEXEC, rights, NULL, vpp)); + return (_fgetvp(td, fd, FEXEC, rights, vpp)); } #ifdef notyet @@ -2472,7 +2505,7 @@ fgetvp_write(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp) { - return (_fgetvp(td, fd, FWRITE, rights, NULL, vpp)); + return (_fgetvp(td, fd, FWRITE, rights, vpp)); } #endif @@ -2497,7 +2530,7 @@ fgetsock(struct thread *td, int fd, cap_rights_t rights, struct socket **spp, *spp = NULL; if (fflagp != NULL) *fflagp = 0; - if ((error = _fget(td, fd, &fp, 0, rights, NULL, NULL, 0)) != 0) + if ((error = _fget(td, fd, &fp, 0, rights, NULL)) != 0) return (error); if (fp->f_type != DTYPE_SOCKET) { error = ENOTSOCK; @@ -2533,9 +2566,6 @@ fputsock(struct socket *so) /* * Handle the last reference to a file being closed. - * - * No special capability handling here, as the capability's fo_close will run - * instead of the object here, and perform any necessary drop on the object. */ int _fdrop(struct file *fp, struct thread *td) @@ -2612,7 +2642,8 @@ done2: * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) +dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, + int openerror, int *indxp) { struct file *fp; int error, indx; @@ -2656,18 +2687,17 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int opener FILEDESC_XUNLOCK(fdp); return (EACCES); } - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(fp); + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, + &fdp->fd_ofiles[indx].fde_caps); break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fp; - fdp->fd_ofiles[dfd] = NULL; - fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fdp->fd_ofileflags[dfd] = 0; + fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; + bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); fdunused(fdp, dfd); break; } @@ -2823,7 +2853,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) continue; FILEDESC_SLOCK(fdp); for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; xf.xf_fd = n; xf.xf_file = fp; @@ -2935,7 +2965,7 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif, fdp, req); for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; bzero(kif, sizeof(*kif)); kif->kf_structsize = sizeof(*kif); @@ -2945,21 +2975,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS) shmfd = NULL; kif->kf_fd = i; -#ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability. With - * ofiledesc, we don't have a field to export the cap_rights_t, - * but we do with the new filedesc. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - kif->kf_flags |= KF_FLAG_CAPABILITY; - (void)cap_funwrap(fp, 0, &fp); - } -#else - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_ofiledesc: saw capability")); -#endif switch (fp->f_type) { case DTYPE_VNODE: kif->kf_type = KF_TYPE_VNODE; @@ -3128,8 +3143,8 @@ CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); static int export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, - int64_t offset, int fd_is_cap, cap_rights_t fd_cap_rights, - struct kinfo_file *kif, struct sysctl_req *req) + int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif, + struct sysctl_req *req) { struct { int fflag; @@ -3191,10 +3206,7 @@ export_fd_for_sysctl(void *data, int type, int fd, int fflags, int refcnt, for (i = 0; i < NFFLAGS; i++) if (fflags & fflags_table[i].fflag) kif->kf_flags |= fflags_table[i].kf_fflag; - if (fd_is_cap) - kif->kf_flags |= KF_FLAG_CAPABILITY; - if (fd_is_cap) - kif->kf_cap_rights = fd_cap_rights; + kif->kf_cap_rights = fd_cap_rights; kif->kf_fd = fd; kif->kf_type = type; kif->kf_ref_count = refcnt; @@ -3222,7 +3234,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) int64_t offset; void *data; int error, i, *name; - int fd_is_cap, type, refcnt, fflags; + int type, refcnt, fflags; cap_rights_t fd_cap_rights; name = (int *)arg1; @@ -3252,13 +3264,13 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); if (tracevp != NULL) export_fd_for_sysctl(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (textvp != NULL) export_fd_for_sysctl(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); if (cttyvp != NULL) export_fd_for_sysctl(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY, - FREAD | FWRITE, -1, -1, 0, 0, kif, req); + FREAD | FWRITE, -1, -1, 0, kif, req); if (fdp == NULL) goto fail; FILEDESC_SLOCK(fdp); @@ -3268,7 +3280,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_cdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* root directory */ @@ -3277,7 +3289,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_rdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } /* jail directory */ @@ -3286,30 +3298,17 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) data = fdp->fd_jdir; FILEDESC_SUNLOCK(fdp); export_fd_for_sysctl(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL, - FREAD, -1, -1, 0, 0, kif, req); + FREAD, -1, -1, 0, kif, req); FILEDESC_SLOCK(fdp); } for (i = 0; i < fdp->fd_nfiles; i++) { - if ((fp = fdp->fd_ofiles[i]) == NULL) + if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) continue; data = NULL; - fd_is_cap = 0; - fd_cap_rights = 0; - #ifdef CAPABILITIES - /* - * When reporting a capability, most fields will be from the - * underlying object, but do mark as a capability and export - * the capability rights mask. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - fd_is_cap = 1; - fd_cap_rights = cap_rights(fp); - (void)cap_funwrap(fp, 0, &fp); - } + fd_cap_rights = cap_rights(fdp, i); #else /* !CAPABILITIES */ - KASSERT(fp->f_type != DTYPE_CAPABILITY, - ("sysctl_kern_proc_filedesc: saw capability")); + fd_cap_rights = 0; #endif switch (fp->f_type) { case DTYPE_VNODE: @@ -3385,7 +3384,7 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SUNLOCK(fdp); error = export_fd_for_sysctl(data, type, i, fflags, refcnt, - offset, fd_is_cap, fd_cap_rights, kif, req); + offset, fd_cap_rights, kif, req); if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO) FILEDESC_SLOCK(fdp); if (error) { @@ -3644,7 +3643,7 @@ file_to_first_proc(struct file *fp) if (fdp == NULL) continue; for (n = 0; n < fdp->fd_nfiles; n++) { - if (fp == fdp->fd_ofiles[n]) + if (fp == fdp->fd_ofiles[n].fde_file) return (p); } } @@ -3694,7 +3693,7 @@ DB_SHOW_COMMAND(files, db_show_files) if ((fdp = p->p_fd) == NULL) continue; for (n = 0; n < fdp->fd_nfiles; ++n) { - if ((fp = fdp->fd_ofiles[n]) == NULL) + if ((fp = fdp->fd_ofiles[n].fde_file) == NULL) continue; db_print_file(fp, header); header = 0; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 965ce31..7c0d2d6 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -438,9 +438,6 @@ interpret: } else { AUDIT_ARG_FD(args->fd); /* - * Some might argue that CAP_READ and/or CAP_MMAP should also - * be required here; such arguments will be entertained. - * * Descriptors opened only with O_EXEC or O_RDONLY are allowed. */ error = fgetvp_exec(td, args->fd, CAP_FEXECVE, &binvp); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 82f0344..5bd2daa 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -297,7 +297,7 @@ exit1(struct thread *td, int rv) * Close open files and release open-file table. * This may block! */ - fdfree(td); + fdescfree(td); /* * If this thread tickled GEOM, we need to wait for the giggling to diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 287d202..b5a4934 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -342,7 +342,7 @@ fork_norfproc(struct thread *td, int flags) if (flags & RFCFDG) { struct filedesc *fdtmp; fdtmp = fdinit(td->td_proc->p_fd); - fdfree(td); + fdescfree(td); p1->p_fd = fdtmp; } diff --git a/sys/kern/sys_capability.c b/sys/kern/sys_capability.c index 6fb4fee..ba168e9 100644 --- a/sys/kern/sys_capability.c +++ b/sys/kern/sys_capability.c @@ -1,11 +1,15 @@ /*- * Copyright (c) 2008-2011 Robert N. M. Watson * Copyright (c) 2010-2011 Jonathan Anderson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -62,6 +66,7 @@ __FBSDID("$FreeBSD$"); #include <sys/file.h> #include <sys/filedesc.h> #include <sys/kernel.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> @@ -139,90 +144,48 @@ sys_cap_getmode(struct thread *td, struct cap_getmode_args *uap) FEATURE(security_capabilities, "Capsicum Capabilities"); -/* - * struct capability describes a capability, and is hung off of its struct - * file f_data field. cap_file and cap_rightss are static once hooked up, as - * neither the object it references nor the rights it encapsulates are - * permitted to change. - */ -struct capability { - struct file *cap_object; /* Underlying object's file. */ - struct file *cap_file; /* Back-pointer to cap's file. */ - cap_rights_t cap_rights; /* Mask of rights on object. */ -}; +static inline int +_cap_check(cap_rights_t have, cap_rights_t need, enum ktr_cap_fail_type type) +{ + + + if ((need & ~have) != 0) { +#ifdef KTRACE + if (KTRPOINT(curthread, KTR_CAPFAIL)) + ktrcapfail(type, need, have); +#endif + return (ENOTCAPABLE); + } + return (0); +} /* - * Capabilities have a fileops vector, but in practice none should ever be - * called except for fo_close, as the capability will normally not be - * returned during a file descriptor lookup in the system call code. + * Test whether a capability grants the requested rights. */ -static fo_rdwr_t capability_read; -static fo_rdwr_t capability_write; -static fo_truncate_t capability_truncate; -static fo_ioctl_t capability_ioctl; -static fo_poll_t capability_poll; -static fo_kqfilter_t capability_kqfilter; -static fo_stat_t capability_stat; -static fo_close_t capability_close; -static fo_chmod_t capability_chmod; -static fo_chown_t capability_chown; - -static struct fileops capability_ops = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = DFLAG_PASSABLE, -}; - -static struct fileops capability_ops_unpassable = { - .fo_read = capability_read, - .fo_write = capability_write, - .fo_truncate = capability_truncate, - .fo_ioctl = capability_ioctl, - .fo_poll = capability_poll, - .fo_kqfilter = capability_kqfilter, - .fo_stat = capability_stat, - .fo_close = capability_close, - .fo_chmod = capability_chmod, - .fo_chown = capability_chown, - .fo_flags = 0, -}; - -static uma_zone_t capability_zone; - -static void -capability_init(void *dummy __unused) +int +cap_check(cap_rights_t have, cap_rights_t need) { - capability_zone = uma_zcreate("capability", sizeof(struct capability), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - if (capability_zone == NULL) - panic("capability_init: capability_zone not initialized"); + return (_cap_check(have, need, CAPFAIL_NOTCAPABLE)); } -SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, capability_init, NULL); /* - * Test whether a capability grants the requested rights. + * Convert capability rights into VM access flags. */ -static int -cap_check(struct capability *c, cap_rights_t rights) +u_char +cap_rights_to_vmprot(cap_rights_t have) { + u_char maxprot; - if ((c->cap_rights | rights) != c->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_NOTCAPABLE, rights, c->cap_rights); -#endif - return (ENOTCAPABLE); - } - return (0); + maxprot = VM_PROT_NONE; + if (have & CAP_MMAP_R) + maxprot |= VM_PROT_READ; + if (have & CAP_MMAP_W) + maxprot |= VM_PROT_WRITE; + if (have & CAP_MMAP_X) + maxprot |= VM_PROT_EXECUTE; + + return (maxprot); } /* @@ -231,43 +194,49 @@ cap_check(struct capability *c, cap_rights_t rights) * this one file. */ cap_rights_t -cap_rights(struct file *fp_cap) +cap_rights(struct filedesc *fdp, int fd) { - struct capability *c; - - KASSERT(fp_cap->f_type == DTYPE_CAPABILITY, - ("cap_rights: !capability")); - c = fp_cap->f_data; - return (c->cap_rights); + return (fdp->fd_ofiles[fd].fde_rights); } /* - * System call to create a new capability reference to either an existing - * file object or an an existing capability. + * System call to limit rights of the given capability. */ int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { - int error, capfd; - int fd = uap->fd; - struct file *fp; - cap_rights_t rights = uap->rights; + struct filedesc *fdp; + cap_rights_t rights; + int error, fd; + + fd = uap->fd; + rights = uap->rights; AUDIT_ARG_FD(fd); AUDIT_ARG_RIGHTS(rights); - error = fget(td, fd, rights, &fp); - if (error) - return (error); - AUDIT_ARG_FILE(td->td_proc, fp); - error = kern_capwrap(td, fp, rights, &capfd); - /* - * Release our reference to the file (kern_capwrap has held a reference - * for the filedesc array). - */ - fdrop(fp, td); - if (error == 0) - td->td_retval[0] = capfd; + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + if (error == 0) { + fdp->fd_ofiles[fd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[fd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[fd].fde_ioctls = NULL; + fdp->fd_ofiles[fd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[fd].fde_fcntls = 0; + } + FILEDESC_XUNLOCK(fdp); return (error); } @@ -275,247 +244,321 @@ sys_cap_new(struct thread *td, struct cap_new_args *uap) * System call to query the rights mask associated with a capability. */ int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { - struct capability *cp; - struct file *fp; - int error; + struct filedesc *fdp; + cap_rights_t rights; + int fd; - AUDIT_ARG_FD(uap->fd); - error = fgetcap(td, uap->fd, &fp); - if (error) - return (error); - cp = fp->f_data; - error = copyout(&cp->cap_rights, uap->rightsp, sizeof(*uap->rightsp)); - fdrop(fp, td); - return (error); + fd = uap->fd; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = cap_rights(fdp, fd); + FILEDESC_SUNLOCK(fdp); + return (copyout(&rights, uap->rightsp, sizeof(*uap->rightsp))); } /* - * Create a capability to wrap around an existing file. + * Test whether a capability grants the given ioctl command. + * If descriptor doesn't have CAP_IOCTL, then ioctls list is empty and + * ENOTCAPABLE will be returned. */ int -kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfdp) +cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd) { - struct capability *cp, *cp_old; - struct file *fp_object, *fcapp; - int error; - - if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); + u_long *cmds; + ssize_t ncmds; + long i; - /* - * If a new capability is being derived from an existing capability, - * then the new capability rights must be a subset of the existing - * rights. - */ - if (fp->f_type == DTYPE_CAPABILITY) { - cp_old = fp->f_data; - if ((cp_old->cap_rights | rights) != cp_old->cap_rights) { -#ifdef KTRACE - if (KTRPOINT(curthread, KTR_CAPFAIL)) - ktrcapfail(CAPFAIL_INCREASE, - rights, cp_old->cap_rights); -#endif - return (ENOTCAPABLE); - } - } + FILEDESC_LOCK_ASSERT(fdp); + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - /* - * Allocate a new file descriptor to hang the capability off of. - */ - error = falloc(td, &fcapp, capfdp, fp->f_flag); - if (error) - return (error); + ncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (ncmds == -1) + return (0); - /* - * Rather than nesting capabilities, directly reference the object an - * existing capability references. There's nothing else interesting - * to preserve for future use, as we've incorporated the previous - * rights mask into the new one. This prevents us from having to - * deal with capability chains. - */ - if (fp->f_type == DTYPE_CAPABILITY) - fp_object = ((struct capability *)fp->f_data)->cap_object; - else - fp_object = fp; - fhold(fp_object); - cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - cp->cap_rights = rights; - cp->cap_object = fp_object; - cp->cap_file = fcapp; - if (fp->f_flag & DFLAG_PASSABLE) - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops); - else - finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp, - &capability_ops_unpassable); + cmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + if (cmds[i] == cmd) + return (0); + } - /* - * Release our private reference (the proc filedesc still has one). - */ - fdrop(fcapp, td); - return (0); + return (ENOTCAPABLE); } /* - * Given a file descriptor, test it against a capability rights mask and then - * return the file descriptor on which to actually perform the requested - * operation. As long as the reference to fp_cap remains valid, the returned - * pointer in *fp will remain valid, so no extra reference management is - * required, and the caller should fdrop() fp_cap as normal when done with - * both. + * Check if the current ioctls list can be replaced by the new one. */ -int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +static int +cap_ioctl_limit_check(struct filedesc *fdp, int fd, const u_long *cmds, + size_t ncmds) { - struct capability *c; - int error; + u_long *ocmds; + ssize_t oncmds; + u_long i; + long j; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; + oncmds = fdp->fd_ofiles[fd].fde_nioctls; + if (oncmds == -1) return (0); + if (oncmds < (ssize_t)ncmds) + return (ENOTCAPABLE); + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + for (i = 0; i < ncmds; i++) { + for (j = 0; j < oncmds; j++) { + if (cmds[i] == ocmds[j]) + break; + } + if (j == oncmds) + return (ENOTCAPABLE); } - c = fp_cap->f_data; - error = cap_check(c, rights); - if (error) - return (error); - *fpp = c->cap_object; + return (0); } -/* - * Slightly different routine for memory mapping file descriptors: unwrap the - * capability and check CAP_MMAP, but also return a bitmask representing the - * maximum mapping rights the capability allows on the object. - */ int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - struct capability *c; - u_char maxprot; - int error; + struct filedesc *fdp; + u_long *cmds, *ocmds; + size_t ncmds; + int error, fd; - if (fp_cap->f_type != DTYPE_CAPABILITY) { - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); + fd = uap->fd; + ncmds = uap->ncmds; + + AUDIT_ARG_FD(fd); + + if (ncmds > 256) /* XXX: Is 256 sane? */ + return (EINVAL); + + if (ncmds == 0) { + cmds = NULL; + } else { + cmds = malloc(sizeof(cmds[0]) * ncmds, M_TEMP, M_WAITOK); + error = copyin(uap->cmds, cmds, sizeof(cmds[0]) * ncmds); + if (error != 0) { + free(cmds, M_TEMP); + return (error); + } } - c = fp_cap->f_data; - error = cap_check(c, rights | CAP_MMAP); - if (error) - return (error); - *fpp = c->cap_object; - maxprot = 0; - if (c->cap_rights & CAP_READ) - maxprot |= VM_PROT_READ; - if (c->cap_rights & CAP_WRITE) - maxprot |= VM_PROT_WRITE; - if (c->cap_rights & CAP_MAPEXEC) - maxprot |= VM_PROT_EXECUTE; - *maxprotp = maxprot; - return (0); + + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + error = cap_ioctl_limit_check(fdp, fd, cmds, ncmds); + if (error != 0) + goto out; + + ocmds = fdp->fd_ofiles[fd].fde_ioctls; + fdp->fd_ofiles[fd].fde_ioctls = cmds; + fdp->fd_ofiles[fd].fde_nioctls = ncmds; + + cmds = ocmds; + error = 0; +out: + FILEDESC_XUNLOCK(fdp); + free(cmds, M_TEMP); + return (error); } -/* - * When a capability is closed, simply drop the reference on the underlying - * object and free the capability. fdrop() will handle the case where the - * underlying object also needs to close, and the caller will have already - * performed any object-specific lock or mqueue handling. - */ -static int -capability_close(struct file *fp, struct thread *td) +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) { - struct capability *c; - struct file *fp_object; - - KASSERT(fp->f_type == DTYPE_CAPABILITY, - ("capability_close: !capability")); - - c = fp->f_data; - fp->f_ops = &badfileops; - fp->f_data = NULL; - fp_object = c->cap_object; - uma_zfree(capability_zone, c); - return (fdrop(fp_object, td)); + struct filedesc *fdp; + struct filedescent *fdep; + u_long *cmds; + size_t maxcmds; + int error, fd; + + fd = uap->fd; + cmds = uap->cmds; + maxcmds = uap->maxcmds; + + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + + if (fget_locked(fdp, fd) == NULL) { + error = EBADF; + goto out; + } + + /* + * If all ioctls are allowed (fde_nioctls == -1 && fde_ioctls == NULL) + * the only sane thing we can do is to not populate the given array and + * return CAP_IOCTLS_ALL. + */ + + fdep = &fdp->fd_ofiles[fd]; + if (cmds != NULL && fdep->fde_ioctls != NULL) { + error = copyout(fdep->fde_ioctls, cmds, + sizeof(cmds[0]) * MIN(fdep->fde_nioctls, maxcmds)); + if (error != 0) + goto out; + } + if (fdep->fde_nioctls == -1) + td->td_retval[0] = CAP_IOCTLS_ALL; + else + td->td_retval[0] = fdep->fde_nioctls; + + error = 0; +out: + FILEDESC_SUNLOCK(fdp); + return (error); } /* - * In general, file descriptor operations should never make it to the - * capability, only the underlying file descriptor operation vector, so panic - * if we do turn up here. + * Test whether a capability grants the given fcntl command. */ -static int -capability_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) +int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) { + uint32_t fcntlcap; - panic("capability_read"); -} - -static int -capability_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); - panic("capability_write"); -} + fcntlcap = (1 << cmd); + KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, + ("Unsupported fcntl=%d.", cmd)); -static int -capability_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ + if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + return (0); - panic("capability_truncate"); + return (ENOTCAPABLE); } -static int -capability_ioctl(struct file *fp, u_long com, void *data, - struct ucred *active_cred, struct thread *td) +int +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { + struct filedesc *fdp; + uint32_t fcntlrights; + int fd; - panic("capability_ioctl"); -} + fd = uap->fd; + fcntlrights = uap->fcntlrights; -static int -capability_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ + AUDIT_ARG_FD(fd); + AUDIT_ARG_FCNTL_RIGHTS(fcntlrights); - panic("capability_poll"); -} + if ((fcntlrights & ~CAP_FCNTL_ALL) != 0) + return (EINVAL); -static int -capability_kqfilter(struct file *fp, struct knote *kn) -{ + fdp = td->td_proc->p_fd; + FILEDESC_XLOCK(fdp); - panic("capability_kqfilter"); -} + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_XUNLOCK(fdp); + return (EBADF); + } -static int -capability_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, - struct thread *td) -{ + if ((fcntlrights & ~fdp->fd_ofiles[fd].fde_fcntls) != 0) { + FILEDESC_XUNLOCK(fdp); + return (ENOTCAPABLE); + } - panic("capability_stat"); + fdp->fd_ofiles[fd].fde_fcntls = fcntlrights; + FILEDESC_XUNLOCK(fdp); + + return (0); } int -capability_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, - struct thread *td) +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) { + struct filedesc *fdp; + uint32_t rights; + int fd; + + fd = uap->fd; - panic("capability_chmod"); + AUDIT_ARG_FD(fd); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + rights = fdp->fd_ofiles[fd].fde_fcntls; + FILEDESC_SUNLOCK(fdp); + + return (copyout(&rights, uap->fcntlrightsp, sizeof(rights))); } +/* + * For backward compatibility. + */ int -capability_chown(struct file *fp, uid_t uid, gid_t gid, - struct ucred *active_cred, struct thread *td) +sys_cap_new(struct thread *td, struct cap_new_args *uap) { + struct filedesc *fdp; + cap_rights_t rights; + register_t newfd; + int error, fd; + + fd = uap->fd; + rights = uap->rights; + + AUDIT_ARG_FD(fd); + AUDIT_ARG_RIGHTS(rights); + + if ((rights & ~CAP_ALL) != 0) + return (EINVAL); + + fdp = td->td_proc->p_fd; + FILEDESC_SLOCK(fdp); + if (fget_locked(fdp, fd) == NULL) { + FILEDESC_SUNLOCK(fdp); + return (EBADF); + } + error = _cap_check(cap_rights(fdp, fd), rights, CAPFAIL_INCREASE); + FILEDESC_SUNLOCK(fdp); + if (error != 0) + return (error); + + error = do_dup(td, 0, fd, 0, &newfd); + if (error != 0) + return (error); - panic("capability_chown"); + FILEDESC_XLOCK(fdp); + /* + * We don't really care about the race between checking capability + * rights for the source descriptor and now. If capability rights + * were ok at that earlier point, the process had this descriptor + * with those rights, so we don't increase them in security sense, + * the process might have done the cap_new(2) a bit earlier to get + * the same effect. + */ + fdp->fd_ofiles[newfd].fde_rights = rights; + if ((rights & CAP_IOCTL) == 0) { + free(fdp->fd_ofiles[newfd].fde_ioctls, M_TEMP); + fdp->fd_ofiles[newfd].fde_ioctls = NULL; + fdp->fd_ofiles[newfd].fde_nioctls = 0; + } + if ((rights & CAP_FCNTL) == 0) + fdp->fd_ofiles[newfd].fde_fcntls = 0; + FILEDESC_XUNLOCK(fdp); + + td->td_retval[0] = newfd; + + return (0); } #else /* !CAPABILITIES */ @@ -524,42 +567,54 @@ capability_chown(struct file *fp, uid_t uid, gid_t gid, * Stub Capability functions for when options CAPABILITIES isn't compiled * into the kernel. */ + int -sys_cap_new(struct thread *td, struct cap_new_args *uap) +sys_cap_rights_limit(struct thread *td, struct cap_rights_limit_args *uap) { return (ENOSYS); } int -sys_cap_getrights(struct thread *td, struct cap_getrights_args *uap) +sys_cap_rights_get(struct thread *td, struct cap_rights_get_args *uap) { return (ENOSYS); } int -cap_funwrap(struct file *fp_cap, cap_rights_t rights, struct file **fpp) +sys_cap_ioctls_limit(struct thread *td, struct cap_ioctls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - return (0); +int +sys_cap_ioctls_get(struct thread *td, struct cap_ioctls_get_args *uap) +{ + + return (ENOSYS); } int -cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, u_char *maxprotp, - struct file **fpp) +sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { - KASSERT(fp_cap->f_type != DTYPE_CAPABILITY, - ("cap_funwrap_mmap: saw capability")); + return (ENOSYS); +} - *fpp = fp_cap; - *maxprotp = VM_PROT_ALL; - return (0); +int +sys_cap_fcntls_get(struct thread *td, struct cap_fcntls_get_args *uap) +{ + + return (ENOSYS); +} + +int +sys_cap_new(struct thread *td, struct cap_new_args *uap) +{ + + return (ENOSYS); } #endif /* CAPABILITIES */ diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index b97ff7f..39f33f3 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/filio.h> #include <sys/fcntl.h> #include <sys/file.h> +#include <sys/lock.h> #include <sys/proc.h> #include <sys/signalvar.h> #include <sys/socketvar.h> @@ -244,7 +245,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_READ, &fp); if (error) return (error); error = dofileread(td, fd, fp, auio, (off_t)-1, 0); @@ -287,7 +288,7 @@ kern_preadv(td, fd, auio, offset) struct file *fp; int error; - error = fget_read(td, fd, CAP_READ, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -453,7 +454,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_WRITE, &fp); if (error) return (error); error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); @@ -496,7 +497,7 @@ kern_pwritev(td, fd, auio, offset) struct file *fp; int error; - error = fget_write(td, fd, CAP_WRITE, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -704,28 +705,60 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) { struct file *fp; struct filedesc *fdp; - int error; - int tmp; + int error, tmp, locked; AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); - if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) - return (error); - if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - fdrop(fp, td); - return (EBADF); - } + fdp = td->td_proc->p_fd; + switch (com) { case FIONCLEX: + case FIOCLEX: FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + locked = LA_XLOCKED; + break; + default: +#ifdef CAPABILITIES + FILEDESC_SLOCK(fdp); + locked = LA_SLOCKED; +#else + locked = LA_UNLOCKED; +#endif + break; + } + +#ifdef CAPABILITIES + if ((fp = fget_locked(fdp, fd)) == NULL) { + error = EBADF; + goto out; + } + if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { + fp = NULL; /* fhold() was not called yet */ + goto out; + } + fhold(fp); + if (locked == LA_SLOCKED) { + FILEDESC_SUNLOCK(fdp); + locked = LA_UNLOCKED; + } +#else + if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) { + fp = NULL; + goto out; + } +#endif + if ((fp->f_flag & (FREAD | FWRITE)) == 0) { + error = EBADF; + goto out; + } + + switch (com) { + case FIONCLEX: + fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; goto out; case FIOCLEX: - FILEDESC_XLOCK(fdp); - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; - FILEDESC_XUNLOCK(fdp); + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; goto out; case FIONBIO: if ((tmp = *(int *)data)) @@ -745,7 +778,21 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) error = fo_ioctl(fp, com, data, td->td_ucred, td); out: - fdrop(fp, td); + switch (locked) { + case LA_XLOCKED: + FILEDESC_XUNLOCK(fdp); + break; +#ifdef CAPABILITIES + case LA_SLOCKED: + FILEDESC_SUNLOCK(fdp); + break; +#endif + default: + FILEDESC_UNLOCK_ASSERT(fdp); + break; + } + if (fp != NULL) + fdrop(fp, td); return (error); } @@ -1130,32 +1177,8 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) static __inline int getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) { - struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; - int error; -#endif - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor references by the capability. - */ - error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); - if (error) { - fdrop(fp, curthread); - return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ - *fpp = fp; - return (0); + return (fget_unlocked(fdp, fd, CAP_POLL_EVENT, 0, fpp, NULL)); } /* @@ -1349,13 +1372,14 @@ pollrescan(struct thread *td) /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - fp = fdp->fd_ofiles[fd->fd]; + fp = fdp->fd_ofiles[fd->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fd->fd), CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fd->revents = POLLNVAL; n++; continue; @@ -1408,9 +1432,8 @@ pollscan(td, fds, nfd) u_int nfd; { struct filedesc *fdp = td->td_proc->p_fd; - int i; struct file *fp; - int n = 0; + int i, n = 0; FILEDESC_SLOCK(fdp); for (i = 0; i < nfd; i++, fds++) { @@ -1420,13 +1443,15 @@ pollscan(td, fds, nfd) } else if (fds->fd < 0) { fds->revents = 0; } else { - fp = fdp->fd_ofiles[fds->fd]; + fp = fdp->fd_ofiles[fds->fd].fde_file; #ifdef CAPABILITIES - if ((fp == NULL) - || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { + if (fp == NULL || + cap_check(cap_rights(fdp, fds->fd), + CAP_POLL_EVENT) != 0) #else - if (fp == NULL) { + if (fp == NULL) #endif + { fds->revents = POLLNVAL; n++; } else { diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 148dea3..1a89010 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -917,7 +917,7 @@ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); } 514 AUE_CAP_NEW STD { int cap_new(int fd, uint64_t rights); } -515 AUE_CAP_GETRIGHTS STD { int cap_getrights(int fd, \ +515 AUE_CAP_RIGHTS_GET STD { int cap_rights_get(int fd, \ uint64_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); } @@ -955,5 +955,15 @@ int *status, int options, \ struct __wrusage *wrusage, \ siginfo_t *info); } +533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ + uint64_t rights); } +534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ + const u_long *cmds, size_t ncmds); } +535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ + u_long *cmds, size_t maxcmds); } +536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ + uint32_t fcntlrights); } +537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ + uint32_t *fcntlrightsp); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 5c7b753..02eccd7 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -1840,23 +1840,15 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd, int error, ref; /* Validate the file descriptor. */ - if ((fdp = p->p_fd) == NULL) - return (EBADF); - - fp = fget_unlocked(fdp, fd); - if (fp == NULL) - return (EBADF); + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, CAP_TTYHOOK, 0, &fp, NULL); + if (error != 0) + return (error); if (fp->f_ops == &badfileops) { error = EBADF; goto done1; } -#ifdef CAPABILITIES - error = cap_funwrap(fp, CAP_TTYHOOK, &fp); - if (error) - goto done1; -#endif - /* * Make sure the vnode is bound to a character device. * Unlocked check for the vnode type is ok there, because we diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index 9da464c..2d18e77 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -45,6 +45,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_capsicum.h" #include "opt_compat.h" #include <sys/param.h> @@ -2032,8 +2033,8 @@ kern_kmq_open(struct thread *td, const char *upath, int flags, mode_t mode, &mqueueops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); @@ -2275,11 +2276,13 @@ again: error = EBADF; goto out; } - error = cap_funwrap(fp2, CAP_POLL_EVENT, &fp2); +#ifdef CAPABILITIES + error = cap_check(cap_rights(fdp, uap->mqd), CAP_POLL_EVENT); if (error) { FILEDESC_SUNLOCK(fdp); goto out; } +#endif if (fp2 != fp) { FILEDESC_SUNLOCK(fdp); error = EBADF; diff --git a/sys/kern/uipc_sem.c b/sys/kern/uipc_sem.c index c219844..2de3409 100644 --- a/sys/kern/uipc_sem.c +++ b/sys/kern/uipc_sem.c @@ -579,8 +579,8 @@ ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, finit(fp, FREAD | FWRITE, DTYPE_SEM, ks, &ksem_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); fdrop(fp, td); diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c index 7f75bdc..0cbb8b3 100644 --- a/sys/kern/uipc_shm.c +++ b/sys/kern/uipc_shm.c @@ -629,8 +629,8 @@ sys_shm_open(struct thread *td, struct shm_open_args *uap) finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops); FILEDESC_XLOCK(fdp); - if (fdp->fd_ofiles[fd] == fp) - fdp->fd_ofileflags[fd] |= UF_EXCLOSE; + if (fdp->fd_ofiles[fd].fde_file == fp) + fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; FILEDESC_XUNLOCK(fdp); td->td_retval[0] = fd; fdrop(fp, td); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 665eb6d..847db35 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -121,38 +121,20 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); /* - * Convert a user file descriptor to a kernel file entry and check that, if - * it is a capability, the right rights are present. A reference on the file - * entry is held upon returning. + * Convert a user file descriptor to a kernel file entry and check if required + * capability rights are present. + * A reference on the file entry is held upon returning. */ static int getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp, u_int *fflagp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use - * the file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, curthread); return (ENOTSOCK); @@ -765,7 +747,7 @@ kern_sendit(td, s, mp, flags, control, segflg) #endif AUDIT_ARG_FD(s); - rights = CAP_WRITE; + rights = CAP_SEND; if (mp->msg_name != NULL) { AUDIT_ARG_SOCKADDR(td, mp->msg_name); rights |= CAP_CONNECT; @@ -974,7 +956,7 @@ kern_recvit(td, s, mp, fromseg, controlp) *controlp = NULL; AUDIT_ARG_FD(s); - error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1850,7 +1832,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * we send only the header/trailer and no payload data. */ AUDIT_ARG_FD(uap->fd); - if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0) + /* + * sendfile(2) can start at any offset within a file so we require + * CAP_READ+CAP_SEEK = CAP_PREAD. + */ + if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) goto out; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { @@ -1886,7 +1872,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The socket must be a stream socket and connected. * Remember if it a blocking or non-blocking socket. */ - if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE, + if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, &sock_fp, NULL)) != 0) goto out; so = sock_fp->f_data; @@ -2423,7 +2409,7 @@ sys_sctp_generic_sendmsg (td, uap) u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2534,7 +2520,7 @@ sys_sctp_generic_sendmsg_iov(td, uap) return (error); u_sinfo = &sinfo; } - rights = CAP_WRITE; + rights = CAP_SEND; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { @@ -2658,7 +2644,7 @@ sys_sctp_generic_recvmsg(td, uap) #endif AUDIT_ARG_FD(uap->sd); - error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); if (error) { return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a6c308f..dcfd009 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -279,7 +279,7 @@ static void unp_drop(struct unpcb *, int); static void unp_gc(__unused void *, int); static void unp_scan(struct mbuf *, void (*)(struct file *)); static void unp_discard(struct file *); -static void unp_freerights(struct file **, int); +static void unp_freerights(struct filedescent *, int); static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); @@ -1642,14 +1642,14 @@ unp_drop(struct unpcb *unp, int errno) } static void -unp_freerights(struct file **rp, int fdcount) +unp_freerights(struct filedescent *fde, int fdcount) { - int i; struct file *fp; + int i; - for (i = 0; i < fdcount; i++) { - fp = *rp; - *rp++ = NULL; + for (i = 0; i < fdcount; i++, fde++) { + fp = fde->fde_file; + bzero(fde, sizeof(*fde)); unp_discard(fp); } } @@ -1661,8 +1661,8 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) struct cmsghdr *cm = mtod(control, struct cmsghdr *); int i; int *fdp; - struct file **rp; - struct file *fp; + struct filedesc *fdesc = td->td_proc->p_fd; + struct filedescent *fde, *fdep; void *data; socklen_t clen = control->m_len, datalen; int error, newfds; @@ -1683,20 +1683,20 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - newfds = datalen / sizeof(struct file *); - rp = data; + newfds = datalen / sizeof(*fdep); + fdep = data; /* If we're not outputting the descriptors free them. */ if (error || controlp == NULL) { - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } - FILEDESC_XLOCK(td->td_proc->p_fd); + FILEDESC_XLOCK(fdesc); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = EMSGSIZE; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } @@ -1710,23 +1710,24 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); error = E2BIG; - unp_freerights(rp, newfds); + unp_freerights(fdep, newfds); goto next; } fdp = (int *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < newfds; i++) { + for (i = 0; i < newfds; i++, fdep++, fdp++) { if (fdalloc(td, 0, &f)) panic("unp_externalize fdalloc failed"); - fp = *rp++; - td->td_proc->p_fd->fd_ofiles[f] = fp; - unp_externalize_fp(fp); - *fdp++ = f; + fde = &fdesc->fd_ofiles[f]; + fde->fde_file = fdep->fde_file; + filecaps_copy(&fdep->fde_caps, &fde->fde_caps); + unp_externalize_fp(fde->fde_file); + *fdp = f; } - FILEDESC_XUNLOCK(td->td_proc->p_fd); + FILEDESC_XUNLOCK(fdesc); } else { /* We can just copy anything else across. */ if (error || controlp == NULL) @@ -1797,11 +1798,11 @@ unp_internalize(struct mbuf **controlp, struct thread *td) { struct mbuf *control = *controlp; struct proc *p = td->td_proc; - struct filedesc *fdescp = p->p_fd; + struct filedesc *fdesc = p->p_fd; struct bintime *bt; struct cmsghdr *cm = mtod(control, struct cmsghdr *); struct cmsgcred *cmcred; - struct file **rp; + struct filedescent *fde, *fdep; struct file *fp; struct timeval *tv; int i, fd, *fdp; @@ -1854,18 +1855,17 @@ unp_internalize(struct mbuf **controlp, struct thread *td) * files. If not, reject the entire operation. */ fdp = data; - FILEDESC_SLOCK(fdescp); + FILEDESC_SLOCK(fdesc); for (i = 0; i < oldfds; i++) { fd = *fdp++; - if (fd < 0 || fd >= fdescp->fd_nfiles || - fdescp->fd_ofiles[fd] == NULL) { - FILEDESC_SUNLOCK(fdescp); + if (fget_locked(fdesc, fd) == NULL) { + FILEDESC_SUNLOCK(fdesc); error = EBADF; goto out; } - fp = fdescp->fd_ofiles[fd]; + fp = fdesc->fd_ofiles[fd].fde_file; if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = EOPNOTSUPP; goto out; } @@ -1874,25 +1874,26 @@ unp_internalize(struct mbuf **controlp, struct thread *td) /* * Now replace the integer FDs with pointers to the - * associated global file table entry.. + * file structure and capability rights. */ - newlen = oldfds * sizeof(struct file *); + newlen = oldfds * sizeof(*fdep); *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); error = E2BIG; goto out; } fdp = data; - rp = (struct file **) + fdep = (struct filedescent *) CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - for (i = 0; i < oldfds; i++) { - fp = fdescp->fd_ofiles[*fdp++]; - *rp++ = fp; - unp_internalize_fp(fp); + for (i = 0; i < oldfds; i++, fdep++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep->fde_file = fde->fde_file; + filecaps_copy(&fde->fde_caps, &fdep->fde_caps); + unp_internalize_fp(fdep->fde_file); } - FILEDESC_SUNLOCK(fdescp); + FILEDESC_SUNLOCK(fdesc); break; case SCM_TIMESTAMP: @@ -2252,7 +2253,7 @@ static void unp_scan(struct mbuf *m0, void (*op)(struct file *)) { struct mbuf *m; - struct file **rp; + struct filedescent *fdep; struct cmsghdr *cm; void *data; int i; @@ -2277,10 +2278,10 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *)) if (cm->cmsg_level == SOL_SOCKET && cm->cmsg_type == SCM_RIGHTS) { - qfds = datalen / sizeof (struct file *); - rp = data; - for (i = 0; i < qfds; i++) - (*op)(*rp++); + qfds = datalen / sizeof(*fdep); + fdep = data; + for (i = 0; i < qfds; i++, fdep++) + (*op)(fdep->fde_file); } if (CMSG_SPACE(datalen) < clen) { diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 99b0197..cba1638 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -1593,16 +1593,16 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, fd = aiocbe->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: - error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); + error = fget_write(td, fd, CAP_PWRITE, &fp); break; case LIO_READ: - error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); + error = fget_read(td, fd, CAP_PREAD, &fp); break; case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; case LIO_NOP: - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); break; default: error = EINVAL; diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index fbde152..94d11f2 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -227,17 +227,18 @@ namei(struct nameidata *ndp) AUDIT_ARG_ATFD2(ndp->ni_dirfd); error = fgetvp_rights(td, ndp->ni_dirfd, ndp->ni_rightsneeded | CAP_LOOKUP, - &(ndp->ni_baserights), &dp); + &ndp->ni_filecaps, &dp); #ifdef CAPABILITIES /* - * Lookups relative to a capability must also be + * If file descriptor doesn't have all rights, + * all lookups relative to it must also be * strictly relative. - * - * Note that a capability with rights CAP_MASK_VALID - * is treated exactly like a regular file descriptor. */ - if (ndp->ni_baserights != CAP_MASK_VALID) + if (ndp->ni_filecaps.fc_rights != CAP_ALL || + ndp->ni_filecaps.fc_fcntls != CAP_FCNTL_ALL || + ndp->ni_filecaps.fc_nioctls != -1) { ndp->ni_strictrelative = 1; + } #endif } if (error != 0 || dp != NULL) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index bd44a3a..787399a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -970,6 +970,8 @@ flags_to_rights(int flags) /* FALLTHROUGH */ case O_WRONLY: rights |= CAP_WRITE; + if (!(flags & O_APPEND)) + rights |= CAP_SEEK; break; } } @@ -1143,19 +1145,22 @@ success: * If we haven't already installed the FD (for dupfdopen), do so now. */ if (indx == -1) { + struct filecaps *fcaps; + #ifdef CAPABILITIES - if (nd.ni_strictrelative == 1) { - /* - * We are doing a strict relative lookup; wrap the - * result in a capability. - */ - if ((error = kern_capwrap(td, fp, nd.ni_baserights, - &indx)) != 0) - goto bad; - } else + if (nd.ni_strictrelative == 1) + fcaps = &nd.ni_filecaps; + else #endif - if ((error = finstall(td, fp, &indx, flags)) != 0) - goto bad; + fcaps = NULL; + error = finstall(td, fp, &indx, flags, fcaps); + /* On success finstall() consumes fcaps. */ + if (error != 0) { + filecaps_free(&nd.ni_filecaps); + goto bad; + } + } else { + filecaps_free(&nd.ni_filecaps); } /* @@ -1279,7 +1284,7 @@ kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKNOD, td); + pathseg, path, fd, CAP_MKNODAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -1399,7 +1404,7 @@ kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - pathseg, path, fd, CAP_MKFIFO, td); + pathseg, path, fd, CAP_MKFIFOAT, td); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { @@ -1553,7 +1558,7 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, return (error); } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, - segflg, path2, fd2, CAP_CREATE, td); + segflg, path2, fd2, CAP_LINKAT, td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) @@ -1646,7 +1651,7 @@ kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path2, fd, CAP_CREATE, td); + segflg, path2, fd, CAP_SYMLINKAT, td); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { @@ -1798,7 +1803,7 @@ kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_DELETE, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vp = nd.ni_vp; @@ -3502,10 +3507,10 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | - AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td); + AUDITVNODE1, pathseg, old, oldfd, CAP_RENAMEAT, td); #else NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, - pathseg, old, oldfd, CAP_DELETE, td); + pathseg, old, oldfd, CAP_RENAMEAT, td); #endif if ((error = namei(&fromnd)) != 0) @@ -3527,7 +3532,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, goto out1; } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | - SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE, td); + SAVESTART | AUDITVNODE2, pathseg, new, newfd, CAP_LINKAT, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { @@ -3550,6 +3555,15 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, error = EISDIR; goto out; } +#ifdef CAPABILITIES + /* + * If the target already exists we require CAP_UNLINKAT + * from 'newfd'. + */ + error = cap_check(tond.ni_filecaps.fc_rights, CAP_UNLINKAT); + if (error != 0) + goto out; +#endif } if (fvp == tdvp) { error = EINVAL; @@ -3650,7 +3664,7 @@ kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, - segflg, path, fd, CAP_MKDIR, td); + segflg, path, fd, CAP_MKDIRAT, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); @@ -3734,7 +3748,7 @@ kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, - pathseg, path, fd, CAP_RMDIR, td); + pathseg, path, fd, CAP_UNLINKAT, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -3987,8 +4001,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, if (count > IOSIZE_MAX) return (EINVAL); auio.uio_resid = count; - if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ | CAP_SEEK, - &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, fd, CAP_READ, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); @@ -4151,33 +4164,14 @@ out: * entry is held upon returning. */ int -getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, - struct file **fpp) +getvnode(struct filedesc *fdp, int fd, cap_rights_t rights, struct file **fpp) { struct file *fp; -#ifdef CAPABILITIES - struct file *fp_fromcap; int error; -#endif - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); -#ifdef CAPABILITIES - /* - * If the file descriptor is for a capability, test rights and use the - * file descriptor referenced by the capability. - */ - error = cap_funwrap(fp, rights, &fp_fromcap); - if (error) { - fdrop(fp, curthread); + error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); + if (error != 0) return (error); - } - if (fp != fp_fromcap) { - fhold(fp_fromcap); - fdrop(fp, curthread); - fp = fp_fromcap; - } -#endif /* CAPABILITIES */ /* * The file could be not of the vnode type, or it may be not @@ -4361,7 +4355,7 @@ sys_fhopen(td, uap) goto bad; } - error = finstall(td, fp, &indx, fmode); + error = finstall(td, fp, &indx, fmode, NULL); bad: fdrop(fp, td); td->td_retval[0] = indx; @@ -4614,7 +4608,7 @@ kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, return (EINVAL); } /* XXX: CAP_POSIX_FADVISE? */ - error = fget(td, fd, 0, &fp); + error = fget(td, fd, CAP_NONE, &fp); if (error != 0) goto out; diff --git a/sys/netsmb/smb_dev.c b/sys/netsmb/smb_dev.c index dfedd88..a09d74d 100644 --- a/sys/netsmb/smb_dev.c +++ b/sys/netsmb/smb_dev.c @@ -399,9 +399,7 @@ nsmb_getfp(struct filedesc* fdp, int fd, int flag) struct file* fp; FILEDESC_SLOCK(fdp); - if (fd < 0 || fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) { + if ((fp = fget_locked(fdp, fd)) == NULL || (fp->f_flag & flag) == 0) { FILEDESC_SUNLOCK(fdp); return (NULL); } diff --git a/sys/nfsserver/nfs_srvkrpc.c b/sys/nfsserver/nfs_srvkrpc.c index 64f2aaa..6b3a6b7 100644 --- a/sys/nfsserver/nfs_srvkrpc.c +++ b/sys/nfsserver/nfs_srvkrpc.c @@ -174,7 +174,8 @@ nfssvc_nfsserver(struct thread *td, struct nfssvc_args *uap) sizeof(addsockarg)); if (error) return (error); - if ((error = fget(td, addsockarg.sock, CAP_SOCK_ALL, &fp)) != 0) + error = fget(td, addsockarg.sock, CAP_SOCK_SERVER, &fp); + if (error) return (error); if (fp->f_type != DTYPE_SOCKET) { fdrop(fp, td); diff --git a/sys/ofed/include/linux/file.h b/sys/ofed/include/linux/file.h index cbeec39..b9bd8b1 100644 --- a/sys/ofed/include/linux/file.h +++ b/sys/ofed/include/linux/file.h @@ -47,7 +47,8 @@ linux_fget(unsigned int fd) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + return (NULL); return (struct linux_file *)file->f_data; } @@ -69,8 +70,7 @@ put_unused_fd(unsigned int fd) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); - if (file == NULL) + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) return; fdclose(curthread->td_proc->p_fd, file, fd, curthread); } @@ -80,7 +80,8 @@ fd_install(unsigned int fd, struct linux_file *filp) { struct file *file; - file = fget_unlocked(curthread->td_proc->p_fd, fd); + if (fget_unlocked(curthread->td_proc->p_fd, fd, 0, 0, &file, NULL) != 0) + file = NULL; filp->_file = file; finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops); } diff --git a/sys/security/audit/audit.h b/sys/security/audit/audit.h index f43f6c8..733a3c7 100644 --- a/sys/security/audit/audit.h +++ b/sys/security/audit/audit.h @@ -115,6 +115,7 @@ void audit_arg_file(struct proc *p, struct file *fp); void audit_arg_argv(char *argv, int argc, int length); void audit_arg_envv(char *envv, int envc, int length); void audit_arg_rights(cap_rights_t rights); +void audit_arg_fcntl_rights(uint32_t fcntlrights); void audit_sysclose(struct thread *td, int fd); void audit_cred_copy(struct ucred *src, struct ucred *dest); void audit_cred_destroy(struct ucred *cred); @@ -241,6 +242,11 @@ void audit_thread_free(struct thread *td); audit_arg_rights((rights)); \ } while (0) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) do { \ + if (AUDITING_TD(curthread)) \ + audit_arg_fcntl_rights((fcntlrights)); \ +} while (0) + #define AUDIT_ARG_RUID(ruid) do { \ if (AUDITING_TD(curthread)) \ audit_arg_ruid((ruid)); \ @@ -354,6 +360,7 @@ void audit_thread_free(struct thread *td); #define AUDIT_ARG_PROCESS(p) #define AUDIT_ARG_RGID(rgid) #define AUDIT_ARG_RIGHTS(rights) +#define AUDIT_ARG_FCNTL_RIGHTS(fcntlrights) #define AUDIT_ARG_RUID(ruid) #define AUDIT_ARG_SIGNUM(signum) #define AUDIT_ARG_SGID(sgid) diff --git a/sys/security/audit/audit_arg.c b/sys/security/audit/audit_arg.c index 41d6b42..ec04b8b 100644 --- a/sys/security/audit/audit_arg.c +++ b/sys/security/audit/audit_arg.c @@ -871,6 +871,19 @@ audit_arg_rights(cap_rights_t rights) ARG_SET_VALID(ar, ARG_RIGHTS); } +void +audit_arg_fcntl_rights(uint32_t fcntlrights) +{ + struct kaudit_record *ar; + + ar = currecord(); + if (ar == NULL) + return; + + ar->k_ar.ar_arg_fcntl_rights = fcntlrights; + ARG_SET_VALID(ar, ARG_FCNTL_RIGHTS); +} + /* * The close() system call uses it's own audit call to capture the path/vnode * information because those pieces are not easily obtained within the system diff --git a/sys/security/audit/audit_bsm.c b/sys/security/audit/audit_bsm.c index 8881cea..9c69b1e 100644 --- a/sys/security/audit/audit_bsm.c +++ b/sys/security/audit/audit_bsm.c @@ -1597,6 +1597,7 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) break; case AUE_CAP_NEW: + case AUE_CAP_RIGHTS_LIMIT: /* * XXXRW/XXXJA: Would be nice to audit socket/etc information. */ @@ -1607,13 +1608,25 @@ kaudit_to_bsm(struct kaudit_record *kar, struct au_record **pau) } break; - case AUE_CAP_GETRIGHTS: + case AUE_CAP_FCNTLS_GET: + case AUE_CAP_IOCTLS_GET: + case AUE_CAP_IOCTLS_LIMIT: + case AUE_CAP_RIGHTS_GET: if (ARG_IS_VALID(kar, ARG_FD)) { tok = au_to_arg32(1, "fd", ar->ar_arg_fd); kau_write(rec, tok); } break; + case AUE_CAP_FCNTLS_LIMIT: + FD_VNODE1_TOKENS; + if (ARG_IS_VALID(kar, ARG_FCNTL_RIGHTS)) { + tok = au_to_arg32(2, "fcntlrights", + ar->ar_arg_fcntl_rights); + kau_write(rec, tok); + } + break; + case AUE_CAP_ENTER: case AUE_CAP_GETMODE: break; diff --git a/sys/security/audit/audit_private.h b/sys/security/audit/audit_private.h index 10ccd5b..e23ba08 100644 --- a/sys/security/audit/audit_private.h +++ b/sys/security/audit/audit_private.h @@ -230,6 +230,7 @@ struct audit_record { int ar_arg_exitretval; struct sockaddr_storage ar_arg_sockaddr; cap_rights_t ar_arg_rights; + uint32_t ar_arg_fcntl_rights; char ar_jailname[MAXHOSTNAMELEN]; }; @@ -291,6 +292,7 @@ struct audit_record { #define ARG_ATFD1 0x0004000000000000ULL #define ARG_ATFD2 0x0008000000000000ULL #define ARG_RIGHTS 0x0010000000000000ULL +#define ARG_FCNTL_RIGHTS 0x0020000000000000ULL #define ARG_NONE 0x0000000000000000ULL #define ARG_ALL 0xFFFFFFFFFFFFFFFFULL diff --git a/sys/sys/capability.h b/sys/sys/capability.h index a163c4c..27e56c2 100644 --- a/sys/sys/capability.h +++ b/sys/sys/capability.h @@ -1,10 +1,14 @@ /*- * Copyright (c) 2008-2010 Robert N. M. Watson + * Copyright (c) 2012 FreeBSD Foundation * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * + * Portions of this software were developed by Pawel Jakub Dawidek under + * sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -36,9 +40,10 @@ #define _SYS_CAPABILITY_H_ #include <sys/cdefs.h> -#include <sys/types.h> +#include <sys/param.h> #include <sys/file.h> +#include <sys/fcntl.h> /* * Possible rights on capabilities. @@ -54,34 +59,69 @@ * involve reads or writes depending a great deal on context. */ -/* General file I/O. */ -#define CAP_READ 0x0000000000000001ULL /* read/recv */ -#define CAP_WRITE 0x0000000000000002ULL /* write/send */ -#define CAP_MMAP 0x0000000000000004ULL /* mmap */ -#define CAP_MAPEXEC 0x0000000000000008ULL /* mmap(2) as exec */ +#define CAP_NONE 0x0000000000000000ULL + +/* + * General file I/O. + */ +/* Allows for openat(O_RDONLY), read(2), readv(2). */ +#define CAP_READ 0x0000000000000001ULL +/* Allows for openat(O_WRONLY | O_APPEND), write(2), writev(2). */ +#define CAP_WRITE 0x0000000000000002ULL +/* Allows for lseek(2). */ +#define CAP_SEEK 0x0000000000000080ULL +/* Allows for pread(2), preadv(2). */ +#define CAP_PREAD (CAP_SEEK | CAP_READ) +/* Allows for openat(O_WRONLY) (without O_APPEND), pwrite(2), pwritev(2). */ +#define CAP_PWRITE (CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_NONE). */ +#define CAP_MMAP 0x0000000000000004ULL +/* Allows for mmap(PROT_READ). */ +#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) +/* Allows for mmap(PROT_WRITE). */ +#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) +/* Allows for mmap(PROT_EXEC). */ +#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) +/* Allows for mmap(PROT_READ | PROT_WRITE). */ +#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) +/* Allows for mmap(PROT_READ | PROT_EXEC). */ +#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) +/* Allows for mmap(PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) +/* Allows for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). */ +#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) +/* Allows for openat(O_CREAT). */ +#define CAP_CREATE 0x0000000000080000ULL +/* Allows for openat(O_EXEC) and fexecve(2) in turn. */ #define CAP_FEXECVE 0x0000000000000010ULL +/* Allows for openat(O_SYNC), openat(O_FSYNC), fsync(2). */ #define CAP_FSYNC 0x0000000000000020ULL +/* Allows for openat(O_TRUNC), ftruncate(2). */ #define CAP_FTRUNCATE 0x0000000000000040ULL -#define CAP_SEEK 0x0000000000000080ULL /* VFS methods. */ -#define CAP_FCHFLAGS 0x0000000000000100ULL #define CAP_FCHDIR 0x0000000000000200ULL +#define CAP_FCHFLAGS 0x0000000000000100ULL #define CAP_FCHMOD 0x0000000000000400ULL +#define CAP_FCHMODAT CAP_FCHMOD #define CAP_FCHOWN 0x0000000000000800ULL +#define CAP_FCHOWNAT CAP_FCHOWN #define CAP_FCNTL 0x0000000000001000ULL -#define CAP_FPATHCONF 0x0000000000002000ULL #define CAP_FLOCK 0x0000000000004000ULL +#define CAP_FPATHCONF 0x0000000000002000ULL #define CAP_FSCK 0x0000000000008000ULL #define CAP_FSTAT 0x0000000000010000ULL +#define CAP_FSTATAT CAP_FSTAT #define CAP_FSTATFS 0x0000000000020000ULL #define CAP_FUTIMES 0x0000000000040000ULL -#define CAP_CREATE 0x0000000000080000ULL -#define CAP_DELETE 0x0000000000100000ULL -#define CAP_MKDIR 0x0000000000200000ULL -#define CAP_RMDIR 0x0000000000400000ULL -#define CAP_MKFIFO 0x0000000000800000ULL -#define CAP_MKNOD 0x0080000000000000ULL +#define CAP_FUTIMESAT CAP_FUTIMES +#define CAP_LINKAT 0x0000000000400000ULL +#define CAP_MKDIRAT 0x0000000000200000ULL +#define CAP_MKFIFOAT 0x0000000000800000ULL +#define CAP_MKNODAT 0x0080000000000000ULL +#define CAP_RENAMEAT 0x0200000000000000ULL +#define CAP_SYMLINKAT 0x0100000000000000ULL +#define CAP_UNLINKAT 0x0000000000100000ULL /* Lookups - used to constrain *at() calls. */ #define CAP_LOOKUP 0x0000000001000000ULL @@ -107,13 +147,18 @@ #define CAP_GETSOCKOPT 0x0000004000000000ULL #define CAP_LISTEN 0x0000008000000000ULL #define CAP_PEELOFF 0x0000010000000000ULL +#define CAP_RECV CAP_READ +#define CAP_SEND CAP_WRITE #define CAP_SETSOCKOPT 0x0000020000000000ULL #define CAP_SHUTDOWN 0x0000040000000000ULL -#define CAP_SOCK_ALL \ - (CAP_ACCEPT | CAP_BIND | CAP_CONNECT \ - | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT \ - | CAP_LISTEN | CAP_PEELOFF | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_CLIENT \ + (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ + CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) +#define CAP_SOCK_SERVER \ + (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ + CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ + CAP_SETSOCKOPT | CAP_SHUTDOWN) /* Mandatory Access Control. */ #define CAP_MAC_GET 0x0000080000000000ULL @@ -138,40 +183,77 @@ #define CAP_PDKILL 0x0040000000000000ULL /* The mask of all valid method rights. */ -#define CAP_MASK_VALID 0x00ffffffffffffffULL +#define CAP_MASK_VALID 0x03ffffffffffffffULL +#define CAP_ALL CAP_MASK_VALID -#ifdef _KERNEL +/* Available bits. */ +#define CAP_UNUSED5 0x0400000000000000ULL +#define CAP_UNUSED4 0x0800000000000000ULL +#define CAP_UNUSED3 0x1000000000000000ULL +#define CAP_UNUSED2 0x2000000000000000ULL +#define CAP_UNUSED1 0x4000000000000000ULL +#define CAP_UNUSED0 0x8000000000000000ULL -#define IN_CAPABILITY_MODE(td) (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) +/* + * The following defines are provided for backward API compatibility and + * should not be used in new code. + */ +#define CAP_MAPEXEC CAP_MMAP_X +#define CAP_DELETE CAP_UNLINKAT +#define CAP_MKDIR CAP_MKDIRAT +#define CAP_RMDIR CAP_UNLINKAT +#define CAP_MKFIFO CAP_MKFIFOAT +#define CAP_MKNOD CAP_MKNODAT +#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) /* - * Create a capability to wrap a file object. + * Allowed fcntl(2) commands. */ -int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, - int *capfd); +#define CAP_FCNTL_GETFL (1 << F_GETFL) +#define CAP_FCNTL_SETFL (1 << F_SETFL) +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_GETOWN (1 << F_GETOWN) +#define CAP_FCNTL_SETOWN (1 << F_SETOWN) +#endif +#if __BSD_VISIBLE || __XSI_VISIBLE || __POSIX_VISIBLE >= 200112 +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL | \ + CAP_FCNTL_GETOWN | CAP_FCNTL_SETOWN) +#else +#define CAP_FCNTL_ALL (CAP_FCNTL_GETFL | CAP_FCNTL_SETFL) +#endif + +#define CAP_IOCTLS_ALL SSIZE_MAX + +#ifdef _KERNEL + +#include <sys/systm.h> + +#define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) + +struct filedesc; /* - * Unwrap a capability if its rights mask is a superset of 'rights'. - * - * Unwrapping a non-capability is effectively a no-op; the value of fp_cap - * is simply copied into fpp. + * Test whether a capability grants the requested rights. + */ +int cap_check(cap_rights_t have, cap_rights_t need); +/* + * Convert capability rights into VM access flags. */ -int cap_funwrap(struct file *fp_cap, cap_rights_t rights, - struct file **fpp); -int cap_funwrap_mmap(struct file *fp_cap, cap_rights_t rights, - u_char *maxprotp, struct file **fpp); +u_char cap_rights_to_vmprot(cap_rights_t have); /* * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to - * extract the rights from a capability. However, this should not be used by - * kernel code generally, instead cap_funwrap() should be used in order to - * keep all access control in one place. + * extract the rights from a capability. */ -cap_rights_t cap_rights(struct file *fp_cap); +cap_rights_t cap_rights(struct filedesc *fdp, int fd); + +int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); +int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); #else /* !_KERNEL */ __BEGIN_DECLS +#include <stdbool.h> /* * cap_enter(): Cause the process to enter capability mode, which will @@ -187,21 +269,46 @@ __BEGIN_DECLS int cap_enter(void); /* - * cap_getmode(): Are we in capability mode? + * Are we sandboxed (in capability mode)? + * This is a libc wrapper around the cap_getmode(2) system call. */ -int cap_getmode(u_int* modep); +bool cap_sandboxed(void); /* - * cap_new(): Create a new capability derived from an existing file - * descriptor with the specified rights. If the existing file descriptor is - * a capability, then the new rights must be a subset of the existing rights. + * cap_getmode(): Are we in capability mode? */ -int cap_new(int fd, cap_rights_t rights); +int cap_getmode(u_int *modep); /* - * cap_getrights(): Query the rights on a capability. + * Limits capability rights for the given descriptor (CAP_*). + */ +int cap_rights_limit(int fd, cap_rights_t rights); +/* + * Returns bitmask of capability rights for the given descriptor. */ -int cap_getrights(int fd, cap_rights_t *rightsp); +int cap_rights_get(int fd, cap_rights_t *rightsp); +/* + * Limits allowed ioctls for the given descriptor. + */ +int cap_ioctls_limit(int fd, const unsigned long *cmds, size_t ncmds); +/* + * Returns array of allowed ioctls for the given descriptor. + * If all ioctls are allowed, the cmds array is not populated and + * the function returns CAP_IOCTLS_ALL. + */ +ssize_t cap_ioctls_get(int fd, unsigned long *cmds, size_t maxcmds); +/* + * Limits allowed fcntls for the given descriptor (CAP_FCNTL_*). + */ +int cap_fcntls_limit(int fd, uint32_t fcntlrights); +/* + * Returns bitmask of allowed fcntls for the given descriptor. + */ +int cap_fcntls_get(int fd, uint32_t *fcntlrightsp); + +/* For backward compatibility. */ +int cap_new(int fd, cap_rights_t rights); +#define cap_getrights(fd, rightsp) cap_rights_get((fd), (rightsp)) __END_DECLS diff --git a/sys/sys/file.h b/sys/sys/file.h index cf5f1ea..cfdc1d8 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -64,12 +64,12 @@ struct socket; #define DTYPE_SEM 9 /* posix semaphore */ #define DTYPE_PTS 10 /* pseudo teletype master device */ #define DTYPE_DEV 11 /* Device specific fd type */ -#define DTYPE_CAPABILITY 12 /* capability */ -#define DTYPE_PROCDESC 13 /* process descriptor */ +#define DTYPE_PROCDESC 12 /* process descriptor */ #ifdef _KERNEL struct file; +struct filecaps; struct ucred; #define FOF_OFFSET 0x01 /* Use the offset in uio argument */ @@ -217,7 +217,6 @@ int fget_read(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); int fget_write(struct thread *td, int fd, cap_rights_t rights, struct file **fpp); -int fgetcap(struct thread *td, int fd, struct file **fpp); int _fdrop(struct file *fp, struct thread *td); /* @@ -242,7 +241,7 @@ int fgetvp(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_exec(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_rights(struct thread *td, int fd, cap_rights_t need, - cap_rights_t *have, struct vnode **vpp); + struct filecaps *havecaps, struct vnode **vpp); int fgetvp_read(struct thread *td, int fd, cap_rights_t rights, struct vnode **vpp); int fgetvp_write(struct thread *td, int fd, cap_rights_t rights, diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 07cfb1f..f3e3a09 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -41,6 +41,23 @@ #include <machine/_limits.h> +struct filecaps { + cap_rights_t fc_rights; /* per-descriptor capability rights */ + uint32_t fc_fcntls; /* per-descriptor allowed fcntls */ + u_long *fc_ioctls; /* per-descriptor allowed ioctls */ + int16_t fc_nioctls; /* fc_ioctls array size */ +}; + +struct filedescent { + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ +}; +#define fde_rights fde_caps.fc_rights +#define fde_fcntls fde_caps.fc_fcntls +#define fde_ioctls fde_caps.fc_ioctls +#define fde_nioctls fde_caps.fc_nioctls + /* * This structure is used for the management of descriptors. It may be * shared by multiple processes. @@ -48,8 +65,7 @@ #define NDSLOTTYPE u_long struct filedesc { - struct file **fd_ofiles; /* file structures for open files */ - char *fd_ofileflags; /* per-process open file flags */ + struct filedescent *fd_ofiles; /* open files */ struct vnode *fd_cdir; /* current directory */ struct vnode *fd_rdir; /* root directory */ struct vnode *fd_jdir; /* jail root directory */ @@ -92,6 +108,15 @@ struct filedesc_to_leader { #ifdef _KERNEL +#include <sys/systm.h> /* CTASSERT() */ + +CTASSERT(sizeof(cap_rights_t) == sizeof(uint64_t)); + +/* Flags for do_dup() */ +#define DUP_FIXED 0x1 /* Force fixed allocation. */ +#define DUP_FCNTL 0x2 /* fcntl()-style errors. */ +#define DUP_CLOEXEC 0x4 /* Atomically set FD_CLOEXEC. */ + /* Lock a file descriptor table. */ #define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") #define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) @@ -109,13 +134,20 @@ struct filedesc_to_leader { struct thread; +void filecaps_init(struct filecaps *fcaps); +void filecaps_copy(const struct filecaps *src, struct filecaps *dst); +void filecaps_free(struct filecaps *fcaps); + int closef(struct file *fp, struct thread *td); +int do_dup(struct thread *td, int flags, int old, int new, + register_t *retval); int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp); int falloc(struct thread *td, struct file **resultfp, int *resultfd, int flags); int falloc_noinstall(struct thread *td, struct file **resultfp); -int finstall(struct thread *td, struct file *fp, int *resultfp, int flags); +int finstall(struct thread *td, struct file *fp, int *resultfp, int flags, + struct filecaps *fcaps); int fdalloc(struct thread *td, int minfd, int *result); int fdavail(struct thread *td, int n); int fdcheckstd(struct thread *td); @@ -123,7 +155,7 @@ void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td); void fdcloseexec(struct thread *td); struct filedesc *fdcopy(struct filedesc *fdp); void fdunshare(struct proc *p, struct thread *td); -void fdfree(struct thread *td); +void fdescfree(struct thread *td); struct filedesc *fdinit(struct filedesc *fdp); struct filedesc *fdshare(struct filedesc *fdp); struct filedesc_to_leader * @@ -135,7 +167,8 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); void setugidsafety(struct thread *td); /* Return a referenced file from an unlocked descriptor. */ -struct file *fget_unlocked(struct filedesc *fdp, int fd); +int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t needrights, + int needfcntl, struct file **fpp, cap_rights_t *haverightsp); /* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */ static __inline struct file * @@ -147,7 +180,7 @@ fget_locked(struct filedesc *fdp, int fd) if (fd < 0 || fd >= fdp->fd_nfiles) return (NULL); - return (fdp->fd_ofiles[fd]); + return (fdp->fd_ofiles[fd].fde_file); } #endif /* _KERNEL */ diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 3b43916..a9992f4 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -33,6 +33,7 @@ #ifndef _SYS_NAMEI_H_ #define _SYS_NAMEI_H_ +#include <sys/filedesc.h> #include <sys/queue.h> #include <sys/uio.h> @@ -75,7 +76,7 @@ struct nameidata { /* * Results: returned from namei */ - cap_rights_t ni_baserights; /* rights the *at base has (or -1) */ + struct filecaps ni_filecaps; /* rights the *at base has */ /* * Results: returned from/manipulated by lookup */ @@ -180,7 +181,7 @@ NDINIT_ALL(struct nameidata *ndp, ndp->ni_startdir = startdir; ndp->ni_strictrelative = 0; ndp->ni_rightsneeded = rights; - ndp->ni_baserights = 0; + filecaps_init(&ndp->ni_filecaps); ndp->ni_cnd.cn_thread = td; } diff --git a/sys/sys/user.h b/sys/sys/user.h index ddaccb8..5de76ac 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -251,8 +251,7 @@ struct user { #define KF_TYPE_SHM 8 #define KF_TYPE_SEM 9 #define KF_TYPE_PTS 10 -/* no KF_TYPE_CAPABILITY (11), since capabilities wrap other file objects */ -#define KF_TYPE_PROCDESC 12 +#define KF_TYPE_PROCDESC 11 #define KF_TYPE_UNKNOWN 255 #define KF_VTYPE_VNON 0 @@ -288,7 +287,6 @@ struct user { #define KF_FLAG_TRUNC 0x00001000 #define KF_FLAG_EXCL 0x00002000 #define KF_FLAG_EXEC 0x00004000 -#define KF_FLAG_CAPABILITY 0x00008000 /* * Old format. Has variable hidden padding due to alignment. diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 05bb8ae..cf94fe5 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -305,13 +305,13 @@ sys_mmap(td, uap) */ rights = CAP_MMAP; if (prot & PROT_READ) - rights |= CAP_READ; + rights |= CAP_MMAP_R; if ((flags & MAP_SHARED) != 0) { if (prot & PROT_WRITE) - rights |= CAP_WRITE; + rights |= CAP_MMAP_W; } if (prot & PROT_EXEC) - rights |= CAP_MAPEXEC; + rights |= CAP_MMAP_X; if ((error = fget_mmap(td, uap->fd, rights, &cap_maxprot, &fp)) != 0) goto done; diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c index 2769832..82317f2 100644 --- a/usr.bin/kdump/kdump.c +++ b/usr.bin/kdump/kdump.c @@ -1008,6 +1008,7 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags) narg--; break; case SYS_cap_new: + case SYS_cap_rights_limit: print_number(ip, narg, c); putchar(','); arg = *ip; @@ -1035,6 +1036,14 @@ ktrsyscall(struct ktr_syscall *ktr, u_int flags) } capname(arg); break; + case SYS_cap_fcntls_limit: + print_number(ip, narg, c); + putchar(','); + arg = *ip; + ip++; + narg--; + capfcntlname(arg); + break; case SYS_posix_fadvise: print_number(ip, narg, c); print_number(ip, narg, c); diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr index d56f030..aed8291 100644 --- a/usr.bin/kdump/mksubr +++ b/usr.bin/kdump/mksubr @@ -361,6 +361,7 @@ _EOF_ auto_or_type "accessmodename" "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+" "sys/unistd.h" auto_switch_type "acltypename" "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+" "sys/acl.h" auto_or_type "capname" "CAP_[A-Z]+[[:space:]]+0x[01248]{16}ULL" "sys/capability.h" +auto_or_type "capfcntlname" "CAP_FCNTL_[A-Z]+[[:space:]]+\(1" "sys/capability.h" auto_switch_type "extattrctlname" "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h" auto_switch_type "fadvisebehavname" "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+" "sys/fcntl.h" auto_or_type "flagsname" "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/fcntl.h" diff --git a/usr.bin/procstat/procstat_files.c b/usr.bin/procstat/procstat_files.c index 3773900..030bba1 100644 --- a/usr.bin/procstat/procstat_files.c +++ b/usr.bin/procstat/procstat_files.c @@ -139,33 +139,34 @@ static struct cap_desc { /* General file I/O. */ { CAP_READ, "rd" }, { CAP_WRITE, "wr" }, + { CAP_SEEK, "se" }, { CAP_MMAP, "mm" }, - { CAP_MAPEXEC, "me" }, + { CAP_CREATE, "cr" }, { CAP_FEXECVE, "fe" }, { CAP_FSYNC, "fy" }, { CAP_FTRUNCATE, "ft" }, - { CAP_SEEK, "se" }, /* VFS methods. */ - { CAP_FCHFLAGS, "cf" }, { CAP_FCHDIR, "cd" }, + { CAP_FCHFLAGS, "cf" }, { CAP_FCHMOD, "cm" }, { CAP_FCHOWN, "cn" }, { CAP_FCNTL, "fc" }, - { CAP_FPATHCONF, "fp" }, { CAP_FLOCK, "fl" }, + { CAP_FPATHCONF, "fp" }, { CAP_FSCK, "fk" }, { CAP_FSTAT, "fs" }, { CAP_FSTATFS, "sf" }, { CAP_FUTIMES, "fu" }, - { CAP_CREATE, "cr" }, - { CAP_DELETE, "de" }, - { CAP_MKDIR, "md" }, - { CAP_RMDIR, "rm" }, - { CAP_MKFIFO, "mf" }, - { CAP_MKNOD, "mn" }, - - /* Lookups - used to constraint *at() calls. */ + { CAP_LINKAT, "li" }, + { CAP_MKDIRAT, "md" }, + { CAP_MKFIFOAT, "mf" }, + { CAP_MKNODAT, "mn" }, + { CAP_RENAMEAT, "rn" }, + { CAP_SYMLINKAT, "sl" }, + { CAP_UNLINKAT, "un" }, + + /* Lookups - used to constrain *at() calls. */ { CAP_LOOKUP, "lo" }, /* Extended attributes. */ @@ -213,6 +214,24 @@ static struct cap_desc { { CAP_PDGETPID, "pg" }, { CAP_PDWAIT, "pw" }, { CAP_PDKILL, "pk" }, + + /* Aliases and defines that combine multiple rights. */ + { CAP_PREAD, "prd" }, + { CAP_PWRITE, "pwr" }, + + { CAP_MMAP_R, "mmr" }, + { CAP_MMAP_W, "mmw" }, + { CAP_MMAP_X, "mmx" }, + { CAP_MMAP_RW, "mrw" }, + { CAP_MMAP_RX, "mrx" }, + { CAP_MMAP_WX, "mwx" }, + { CAP_MMAP_RWX, "mma" }, + + { CAP_RECV, "re" }, + { CAP_SEND, "sd" }, + + { CAP_SOCK_CLIENT, "scl" }, + { CAP_SOCK_SERVER, "ssr" }, }; static const u_int cap_desc_count = sizeof(cap_desc) / sizeof(cap_desc[0]); @@ -225,7 +244,7 @@ width_capability(cap_rights_t rights) count = 0; width = 0; for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { width += strlen(cap_desc[i].cd_desc); if (count) width++; @@ -249,7 +268,7 @@ print_capability(cap_rights_t rights, u_int capwidth) printf("-"); } for (i = 0; i < cap_desc_count; i++) { - if (rights & cap_desc[i].cd_right) { + if ((cap_desc[i].cd_right & ~rights) == 0) { printf("%s%s", count ? "," : "", cap_desc[i].cd_desc); width += strlen(cap_desc[i].cd_desc); if (count) @@ -261,7 +280,7 @@ print_capability(cap_rights_t rights, u_int capwidth) void procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) -{ +{ struct sockstat sock; struct filestat_list *head; struct filestat *fst; @@ -423,8 +442,6 @@ procstat_files(struct procstat *procstat, struct kinfo_proc *kipp) printf("%s", fst->fs_fflags & PS_FST_FFLAG_NONBLOCK ? "n" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_DIRECT ? "d" : "-"); printf("%s", fst->fs_fflags & PS_FST_FFLAG_HASLOCK ? "l" : "-"); - printf("%s ", fst->fs_fflags & PS_FST_FFLAG_CAPABILITY ? - "c" : "-"); if (!Cflag) { if (fst->fs_ref_count > -1) printf("%3d ", fst->fs_ref_count); |