summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2008-01-08 21:58:16 +0000
committerjhb <jhb@FreeBSD.org>2008-01-08 21:58:16 +0000
commit8cd9437636744162d1427275b2fe66cf8ccef25c (patch)
tree49b07dc757aae71e0a64eb4939cde4037af60a24
parent23d78439c96372baa4a3c2847df65f8e11455ae7 (diff)
downloadFreeBSD-src-8cd9437636744162d1427275b2fe66cf8ccef25c.zip
FreeBSD-src-8cd9437636744162d1427275b2fe66cf8ccef25c.tar.gz
Add a new file descriptor type for IPC shared memory objects and use it to
implement shm_open(2) and shm_unlink(2) in the kernel: - Each shared memory file descriptor is associated with a swap-backed vm object which provides the backing store. Each descriptor starts off with a size of zero, but the size can be altered via ftruncate(2). The shared memory file descriptors also support fstat(2). read(2), write(2), ioctl(2), select(2), poll(2), and kevent(2) are not supported on shared memory file descriptors. - shm_open(2) and shm_unlink(2) are now implemented as system calls that manage shared memory file descriptors. The virtual namespace that maps pathnames to shared memory file descriptors is implemented as a hash table where the hash key is generated via the 32-bit Fowler/Noll/Vo hash of the pathname. - As an extension, the constant 'SHM_ANON' may be specified in place of the path argument to shm_open(2). In this case, an unnamed shared memory file descriptor will be created similar to the IPC_PRIVATE key for shmget(2). Note that the shared memory object can still be shared among processes by sharing the file descriptor via fork(2) or sendmsg(2), but it is unnamed. This effectively serves to implement the getmemfd() idea bandied about the lists several times over the years. - The backing store for shared memory file descriptors are garbage collected when they are not referenced by any open file descriptors or the shm_open(2) virtual namespace. Submitted by: dillon, peter (previous versions) Submitted by: rwatson (I based this on his version) Reviewed by: alc (suggested converting getmemfd() to shm_open())
-rw-r--r--lib/libc/gen/Makefile.inc5
-rw-r--r--lib/libc/gen/posixshm.c72
-rw-r--r--lib/libc/gen/shm_open.3192
-rw-r--r--lib/libc/sys/Makefile.inc3
-rw-r--r--lib/libc/sys/shm_open.2224
-rw-r--r--sys/compat/freebsd32/syscalls.master3
-rw-r--r--sys/conf/files2
-rw-r--r--sys/kern/kern_descrip.c2
-rw-r--r--sys/kern/syscalls.master3
-rw-r--r--sys/kern/uipc_shm.c608
-rw-r--r--sys/security/mac/mac_framework.h13
-rw-r--r--sys/security/mac/mac_policy.h28
-rw-r--r--sys/security/mac/mac_posix_shm.c146
-rw-r--r--sys/security/mac_stub/mac_stub.c56
-rw-r--r--sys/security/mac_test/mac_test.c96
-rw-r--r--sys/sys/fcntl.h19
-rw-r--r--sys/sys/file.h1
-rw-r--r--sys/sys/mman.h33
-rw-r--r--sys/vm/vm_mmap.c56
19 files changed, 1216 insertions, 346 deletions
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
index 68258da..8ea422d 100644
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -21,7 +21,7 @@ SRCS+= __getosreldate.c __xuname.c \
initgroups.c isatty.c isinf.c isnan.c jrand48.c lcong48.c \
lockf.c lrand48.c mrand48.c nftw.c nice.c \
nlist.c nrand48.c opendir.c \
- pause.c pmadvise.c popen.c posixshm.c pselect.c \
+ pause.c pmadvise.c popen.c pselect.c \
psignal.c pw_scan.c pwcache.c \
raise.c readdir.c readpassphrase.c rewinddir.c \
scandir.c seed48.c seekdir.c sem.c semctl.c \
@@ -59,7 +59,7 @@ MAN+= alarm.3 arc4random.3 \
raise.3 rand48.3 readpassphrase.3 rfork_thread.3 \
scandir.3 sem_destroy.3 sem_getvalue.3 sem_init.3 \
sem_open.3 sem_post.3 sem_wait.3 \
- setjmp.3 setmode.3 setproctitle.3 shm_open.3 \
+ setjmp.3 setmode.3 setproctitle.3 \
siginterrupt.3 signal.3 sigsetops.3 sleep.3 \
statvfs.3 stringlist.3 \
strtofflags.3 sysconf.3 sysctl.3 syslog.3 tcgetpgrp.3 \
@@ -133,7 +133,6 @@ MLINKS+=setjmp.3 _longjmp.3 setjmp.3 _setjmp.3 setjmp.3 longjmp.3 \
setjmp.3 longjmperr.3 setjmp.3 longjmperror.3 \
setjmp.3 siglongjmp.3 setjmp.3 sigsetjmp.3
MLINKS+=setmode.3 getmode.3
-MLINKS+=shm_open.3 shm_unlink.3
MLINKS+=sigsetops.3 sigaddset.3 sigsetops.3 sigdelset.3 \
sigsetops.3 sigemptyset.3 sigsetops.3 sigfillset.3 \
sigsetops.3 sigismember.3
diff --git a/lib/libc/gen/posixshm.c b/lib/libc/gen/posixshm.c
deleted file mode 100644
index 05fc1c9..0000000
--- a/lib/libc/gen/posixshm.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright 2000 Massachusetts Institute of Technology
- *
- * Permission to use, copy, modify, and distribute this software and
- * its documentation for any purpose and without fee is hereby
- * granted, provided that both the above copyright notice and this
- * permission notice appear in all copies, that both the above
- * copyright notice and this permission notice appear in all
- * supporting documentation, and that the name of M.I.T. not be used
- * in advertising or publicity pertaining to distribution of the
- * software without specific, written prior permission. M.I.T. makes
- * no representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied
- * warranty.
- *
- * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
- * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
- * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include "namespace.h"
-#include <sys/types.h>
-#include <sys/fcntl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include "un-namespace.h"
-
-int
-shm_open(const char *path, int flags, mode_t mode)
-{
- int fd;
- struct stat stab;
-
- if ((flags & O_ACCMODE) == O_WRONLY)
- return (EINVAL);
-
- fd = _open(path, flags, mode);
- if (fd != -1) {
- if (_fstat(fd, &stab) != 0 || !S_ISREG(stab.st_mode)) {
- _close(fd);
- errno = EINVAL;
- return (-1);
- }
-
- if (_fcntl(fd, F_SETFL, (int)FPOSIXSHM) != 0) {
- _close(fd);
- return (-1);
- }
- }
- return (fd);
-}
-
-int
-shm_unlink(const char *path)
-{
- return (unlink(path));
-}
diff --git a/lib/libc/gen/shm_open.3 b/lib/libc/gen/shm_open.3
deleted file mode 100644
index b68e85b..0000000
--- a/lib/libc/gen/shm_open.3
+++ /dev/null
@@ -1,192 +0,0 @@
-.\"
-.\" Copyright 2000 Massachusetts Institute of Technology
-.\"
-.\" Permission to use, copy, modify, and distribute this software and
-.\" its documentation for any purpose and without fee is hereby
-.\" granted, provided that both the above copyright notice and this
-.\" permission notice appear in all copies, that both the above
-.\" copyright notice and this permission notice appear in all
-.\" supporting documentation, and that the name of M.I.T. not be used
-.\" in advertising or publicity pertaining to distribution of the
-.\" software without specific, written prior permission. M.I.T. makes
-.\" no representations about the suitability of this software for any
-.\" purpose. It is provided "as is" without express or implied
-.\" warranty.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
-.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
-.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
-.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
-.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd March 24, 2000
-.Dt SHM_OPEN 3
-.Os
-.Sh NAME
-.Nm shm_open , shm_unlink
-.Nd "shared memory object operations"
-.Sh LIBRARY
-.Lb libc
-.Sh SYNOPSIS
-.In sys/types.h
-.In sys/mman.h
-.Ft int
-.Fn shm_open "const char *path" "int flags" "mode_t mode"
-.Ft int
-.Fn shm_unlink "const char *path"
-.Sh DESCRIPTION
-The
-.Fn shm_open
-function opens (or optionally creates) a
-.Tn POSIX
-shared memory object named
-.Fa path .
-The
-.Fn shm_unlink
-function removes a shared memory object named
-.Fa path .
-.Pp
-In the
-.Fx
-implementation,
-.Tn POSIX
-shared memory objects are implemented as ordinary files.
-The
-.Fn shm_open
-and
-.Fn shm_unlink
-act as wrappers around the
-.Xr open 2
-and
-.Xr unlink 2
-routines, and
-.Fa path ,
-.Fa flags ,
-and
-.Fa mode
-arguments are as specified for those functions.
-The
-.Fa flags
-argument is checked to ensure that the access mode specified is not
-.Dv O_WRONLY
-(which is not defined for shared memory objects).
-.Pp
-In addition, the
-.Fx
-implementation causes
-.Fn mmap
-of a descriptor returned by
-.Fn shm_open
-to behave as if the
-.Dv MAP_NOSYNC
-flag had been specified to
-.Xr mmap 2 .
-(It does so by setting a special file flag using
-.Xr fcntl 2 . )
-.Pp
-The
-.Fn shm_unlink
-function makes no effort to ensure that
-.Fa path
-refers to a shared memory object.
-.Sh RETURN VALUES
-If successful,
-.Fn shm_open
-returns a non-negative integer;
-.Fn shm_unlink
-returns zero.
-Both functions return -1 on failure, and set
-.Va errno
-to indicate the error.
-.Sh COMPATIBILITY
-The
-.Fa path
-argument does not necessarily represent a pathname (although it does in this
-and most other implementations).
-Two processes opening the same
-.Fa path
-are guaranteed to access the same shared memory object if and only if
-.Fa path
-begins with a slash
-.Pq Ql \&/
-character.
-.Pp
-Only the
-.Dv O_RDONLY ,
-.Dv O_RDWR ,
-.Dv O_CREAT ,
-.Dv O_EXCL ,
-and
-.Dv O_TRUNC
-flags may be used in portable programs.
-.Pp
-The result of using
-.Xr open 2 ,
-.Xr read 2 ,
-or
-.Xr write 2
-on a shared memory object, or on the descriptor returned by
-.Fn shm_open ,
-is undefined.
-It is also undefined whether the shared memory object itself, or its
-contents, persist across reboots.
-.Sh ERRORS
-The
-.Fn shm_open
-and
-.Fn shm_unlink
-functions can fail with any error defined for
-.Fn open
-and
-.Fn unlink ,
-respectively.
-In addition, the following errors are defined for
-.Fn shm_open :
-.Bl -tag -width Er
-.It Bq Er EINVAL
-The object named by
-.Fa path
-is not a shared memory object
-(i.e., it is not a regular file).
-.It Bq Er EINVAL
-The
-.Fa flags
-argument to
-.Fn shm_open
-specifies an access mode of
-.Dv O_WRONLY .
-.El
-.Sh SEE ALSO
-.Xr mmap 2 ,
-.Xr munmap 2 ,
-.Xr open 2 ,
-.Xr unlink 2
-.Sh STANDARDS
-The
-.Fn shm_open
-and
-.Fn shm_unlink
-functions are believed to conform to
-.St -p1003.1b-93 .
-.Sh HISTORY
-The
-.Fn shm_open
-and
-.Fn shm_unlink
-functions first appeared in
-.Fx 4.3 .
-.Sh AUTHORS
-.An Garrett A. Wollman Aq wollman@FreeBSD.org
-(C library support and this manual page)
-.Pp
-.An Matthew Dillon Aq dillon@FreeBSD.org
-.Pq Dv MAP_NOSYNC
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index 75ffcfc..fe17d7d 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -83,7 +83,7 @@ MAN+= _exit.2 abort2.2 accept.2 access.2 acct.2 adjtime.2 \
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
rtprio.2 select.2 semctl.2 semget.2 semop.2 send.2 sendfile.2 \
setgroups.2 setpgid.2 setregid.2 setresuid.2 setreuid.2 setsid.2 \
- setuid.2 shmat.2 shmctl.2 shmget.2 shutdown.2 \
+ setuid.2 shmat.2 shmctl.2 shmget.2 shm_open.2 shutdown.2 \
sigaction.2 sigaltstack.2 sigpending.2 sigprocmask.2 sigqueue.2 \
sigreturn.2 sigstack.2 sigsuspend.2 sigwait.2 sigwaitinfo.2 \
socket.2 socketpair.2 stat.2 statfs.2 \
@@ -154,6 +154,7 @@ MLINKS+=setpgid.2 setpgrp.2
MLINKS+=setresuid.2 setresgid.2 setresuid.2 getresuid.2 setresuid.2 getresgid.2
MLINKS+=setuid.2 setegid.2 setuid.2 seteuid.2 setuid.2 setgid.2
MLINKS+=shmat.2 shmdt.2
+MLINKS+=shm_open.2 shm_unlink.2
MLINKS+=sigwaitinfo.2 sigtimedwait.2
MLINKS+=stat.2 fstat.2 stat.2 lstat.2
MLINKS+=statfs.2 fstatfs.2
diff --git a/lib/libc/sys/shm_open.2 b/lib/libc/sys/shm_open.2
index b68e85b..5c5d694 100644
--- a/lib/libc/sys/shm_open.2
+++ b/lib/libc/sys/shm_open.2
@@ -28,8 +28,8 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 24, 2000
-.Dt SHM_OPEN 3
+.Dd March 20, 2007
+.Dt SHM_OPEN 2
.Os
.Sh NAME
.Nm shm_open , shm_unlink
@@ -46,62 +46,104 @@
.Sh DESCRIPTION
The
.Fn shm_open
-function opens (or optionally creates) a
+system call opens (or optionally creates) a
.Tn POSIX
shared memory object named
.Fa path .
The
-.Fn shm_unlink
-function removes a shared memory object named
-.Fa path .
-.Pp
-In the
-.Fx
-implementation,
-.Tn POSIX
-shared memory objects are implemented as ordinary files.
-The
-.Fn shm_open
+.Fa flags
+argument contains a subset of the flags used by
+.Xr open 2 .
+An access mode of either
+.Dv O_RDONLY
+or
+.Dv O_RDWR
+must be included in
+.Fa flags .
+The optional flags
+.Dv O_CREAT ,
+.Dv O_EXCL ,
and
-.Fn shm_unlink
-act as wrappers around the
-.Xr open 2
+.Dv O_TRUNC
+may also be specified.
+.Pp
+If
+.Dv O_CREAT
+is specified,
+then a new shared memory object named
+.Fa path
+will be created if it does not exist.
+In this case,
+the shared memory object is created with mode
+.Fa mode
+subject to the process' umask value.
+If both the
+.Dv O_CREAT
and
-.Xr unlink 2
-routines, and
-.Fa path ,
-.Fa flags ,
+.Dv O_EXCL
+flags are specified and a shared memory object named
+.Fa path
+already exists,
+then
+.Fn shm_open
+will fail with
+.Er EEXIST.
+.Pp
+Newly created objects start off with a size of zero.
+If an existing shared memory object is opened with
+.Dv O_RDWR
+and the
+.Dv O_TRUNC
+flag is specified,
+then the shared memory object will be truncated to a size of zero.
+The size of the object can be adjusted via
+.Xr ftruncate 2
+and queried via
+.Xr fstat 2 .
+.Pp
+The new descriptor is set to close during
+.Xr execve 2
+system calls;
+see
+.Xr close 2
and
-.Fa mode
-arguments are as specified for those functions.
-The
-.Fa flags
-argument is checked to ensure that the access mode specified is not
-.Dv O_WRONLY
-(which is not defined for shared memory objects).
+.Xr fcntl 2 .
.Pp
-In addition, the
-.Fx
-implementation causes
-.Fn mmap
-of a descriptor returned by
-.Fn shm_open
-to behave as if the
-.Dv MAP_NOSYNC
-flag had been specified to
-.Xr mmap 2 .
-(It does so by setting a special file flag using
-.Xr fcntl 2 . )
+As a FreeBSD extension,
+the constant
+.Dv SHM_ANON
+may be used for the
+.Fa path
+argument to
+.Fn shm_open .
+In this case, an anonymous, unnamed shared memory object is created.
+Since the object has no name,
+it cannot be removed via a subsequent call to
+.Fn shm_unlink .
+Instead,
+the shared memory object will be garbage collected when the last reference to
+the shared memory object is removed.
+The shared memory object may be shared with other processes by sharing the
+file descriptor via
+.Xr fork 2
+or
+.Xr sendmsg 2 .
+Attempting to open an anonymous shared memory object with
+.Dv O_RDONLY
+will fail with
+.Er EINVAL .
+All other flags are ignored.
.Pp
The
.Fn shm_unlink
-function makes no effort to ensure that
-.Fa path
-refers to a shared memory object.
+system call removes a shared memory object named
+.Fa path .
+.Pp
.Sh RETURN VALUES
If successful,
.Fn shm_open
-returns a non-negative integer;
+returns a non-negative integer,
+and
.Fn shm_unlink
returns zero.
Both functions return -1 on failure, and set
@@ -110,8 +152,8 @@ to indicate the error.
.Sh COMPATIBILITY
The
.Fa path
-argument does not necessarily represent a pathname (although it does in this
-and most other implementations).
+argument does not necessarily represent a pathname (although it does in
+most other implementations).
Two processes opening the same
.Fa path
are guaranteed to access the same shared memory object if and only if
@@ -139,37 +181,82 @@ on a shared memory object, or on the descriptor returned by
is undefined.
It is also undefined whether the shared memory object itself, or its
contents, persist across reboots.
-.Sh ERRORS
-The
-.Fn shm_open
-and
-.Fn shm_unlink
-functions can fail with any error defined for
-.Fn open
+.Pp
+In FreeBSD,
+.Xr read 2
and
-.Fn unlink ,
-respectively.
-In addition, the following errors are defined for
+.Xr write 2
+on a shared memory object will fail with
+.Er EOPNOTSUPP
+and neither shared memory objects nor their contents persist across reboots.
+.Sh ERRORS
+The following errors are defined for
.Fn shm_open :
.Bl -tag -width Er
.It Bq Er EINVAL
-The object named by
+A flag other than
+.Dv O_RDONLY ,
+.Dv O_RDWR ,
+.Dv O_CREAT ,
+.Dv O_EXCL ,
+or
+.Dv O_TRUNC
+was included in
+.Fa flags .
+.It Bq Er EMFILE
+The process has already reached its limit for open file descriptors.
+.It Bq Er ENFILE
+The system file table is full.
+.It Bq Er EINVAL
+.Dv O_RDONLY
+was specified while creating an anonymous shared memory object via
+.Dv SHM_ANON .
+.It Bq Er EFAULT
+The
.Fa path
-is not a shared memory object
-(i.e., it is not a regular file).
+argument points outside the process' allocated address space.
+.It Bq Er ENAMETOOLONG
+The entire pathname exceeded 1023 characters.
.It Bq Er EINVAL
The
-.Fa flags
-argument to
-.Fn shm_open
-specifies an access mode of
-.Dv O_WRONLY .
+.Fa path
+does not begin with a slash
+.Pq Ql \&/
+character.
+.It Bq Er ENOENT
+.Dv O_CREAT
+is specified and the named shared memory object does not exist.
+.It Bq Er EEXIST
+.Dv O_CREAT
+and
+.Dv O_EXCL
+are specified and the named shared memory object dies exist.
+.It Bq Er EACCES
+The required permissions (for reading or reading and writing) are denied.
+.El
+.Pp
+The following errors are defined for
+.Fn shm_unlink :
+.Bl -tag -width Er
+.It Bq Er EFAULT
+The
+.Fa path
+argument points outside the process' allocated address space.
+.It Bq Er ENAMETOOLONG
+The entire pathname exceeded 1023 characters.
+.It Bq Er ENOENT
+The named shared memory object does not exist.
+.It Bq Er EACCES
+The required permissions are denied.
+.Fn shm_unlink
+requires write permission to the shared memory object.
.El
.Sh SEE ALSO
+.Xr close 2 ,
+.Xr ftruncate 2 ,
+.Xr fstat 2 ,
.Xr mmap 2 ,
-.Xr munmap 2 ,
-.Xr open 2 ,
-.Xr unlink 2
+.Xr munmap 2
.Sh STANDARDS
The
.Fn shm_open
@@ -184,6 +271,9 @@ and
.Fn shm_unlink
functions first appeared in
.Fx 4.3 .
+The functions were reimplemented as system calls using shared memory objects
+directly rather than files in
+.Fx 7.0 .
.Sh AUTHORS
.An Garrett A. Wollman Aq wollman@FreeBSD.org
(C library support and this manual page)
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index 04c86f7..923ab26 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -796,3 +796,6 @@
480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \
u_int32_t lengthlo, u_int32_t lengthhi); }
481 AUE_KILL NOPROTO { int thr_kill2(pid_t pid, long id, int sig); }
+482 AUE_NULL NOPROTO { int shm_open(const char *path, int flags, \
+ mode_t mode); }
+483 AUE_NULL NOPROTO { int shm_unlink(const char *path); }
diff --git a/sys/conf/files b/sys/conf/files
index 558c6d1..8d5b3a5 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1540,6 +1540,7 @@ kern/uipc_mbuf.c standard
kern/uipc_mbuf2.c standard
kern/uipc_mqueue.c optional p1003_1b_mqueue
kern/uipc_sem.c optional p1003_1b_semaphores
+kern/uipc_shm.c standard
kern/uipc_sockbuf.c standard
kern/uipc_socket.c standard
kern/uipc_syscalls.c standard
@@ -2046,6 +2047,7 @@ security/mac/mac_label.c optional mac
security/mac/mac_net.c optional mac
security/mac/mac_pipe.c optional mac
security/mac/mac_posix_sem.c optional mac
+security/mac/mac_posix_shm.c optional mac
security/mac/mac_priv.c optional mac
security/mac/mac_process.c optional mac
security/mac/mac_socket.c optional mac
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 24438dd..3b4c0f4 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -2646,6 +2646,8 @@ file_type_to_name(short type)
return ("crpt");
case DTYPE_MQUEUE:
return ("mque");
+ case DTYPE_SHM:
+ return ("shm");
default:
return ("unkn");
}
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 9538e4f..e51d7b1 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -847,5 +847,8 @@
479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); }
480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); }
481 AUE_KILL STD { int thr_kill2(pid_t pid, long id, int sig); }
+482 AUE_NULL STD { int shm_open(const char *path, int flags, \
+ mode_t mode); }
+483 AUE_NULL STD { int shm_unlink(const char *path); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
new file mode 100644
index 0000000..0d7b741
--- /dev/null
+++ b/sys/kern/uipc_shm.c
@@ -0,0 +1,608 @@
+/*-
+ * Copyright (c) 2006 Robert N. M. Watson
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for shared swap-backed anonymous memory objects via
+ * shm_open(2) and shm_unlink(2). While most of the implementation is
+ * here, vm_mmap.c contains mapping logic changes.
+ *
+ * TODO:
+ *
+ * (1) Convert test utilities into regression tests and import them into
+ * src/tools/regression.
+ *
+ * (2) Need to export data to a userland tool via a sysctl. Should ipcs(1)
+ * and ipcrm(1) be expanded or should new tools to manage both POSIX
+ * kernel semaphores and POSIX shared memory be written?
+ *
+ * (3) Add support for this file type to fstat(1).
+ *
+ * (4) Resource limits? Does this need its own resource limits or are the
+ * existing limits in mmap(2) sufficient?
+ *
+ * (5) Partial page truncation. vnode_pager_setsize() will zero any parts
+ * of a partially mapped page as a result of ftruncate(2)/truncate(2).
+ * We can do the same (with the same pmap evil), but do we need to
+ * worry about the bits on disk if the page is swapped out or will the
+ * swapper zero the parts of a page that are invalid if the page is
+ * swapped back in for us?
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/fnv_hash.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/refcount.h>
+#include <sys/resourcevar.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/sx.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+
+#include <security/mac/mac_framework.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/swap_pager.h>
+
+struct shm_mapping {
+ char *sm_path;
+ Fnv32_t sm_fnv;
+ struct shmfd *sm_shmfd;
+ LIST_ENTRY(shm_mapping) sm_link;
+};
+
+static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor");
+static LIST_HEAD(, shm_mapping) *shm_dictionary;
+static struct sx shm_dict_lock;
+static struct mtx shm_timestamp_lock;
+static u_long shm_hash;
+
+#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash])
+
+static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags);
+static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode);
+static void shm_dict_init(void *arg);
+static void shm_drop(struct shmfd *shmfd);
+static struct shmfd *shm_hold(struct shmfd *shmfd);
+static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
+static struct shmfd *shm_lookup(char *path, Fnv32_t fnv);
+static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
+static void shm_dotruncate(struct shmfd *shmfd, off_t length);
+
+static fo_rdwr_t shm_read;
+static fo_rdwr_t shm_write;
+static fo_truncate_t shm_truncate;
+static fo_ioctl_t shm_ioctl;
+static fo_poll_t shm_poll;
+static fo_kqfilter_t shm_kqfilter;
+static fo_stat_t shm_stat;
+static fo_close_t shm_close;
+
+/* File descriptor operations. */
+static struct fileops shm_ops = {
+ .fo_read = shm_read,
+ .fo_write = shm_write,
+ .fo_truncate = shm_truncate,
+ .fo_ioctl = shm_ioctl,
+ .fo_poll = shm_poll,
+ .fo_kqfilter = shm_kqfilter,
+ .fo_stat = shm_stat,
+ .fo_close = shm_close,
+ .fo_flags = DFLAG_PASSABLE
+};
+
+FEATURE(posix_shm, "POSIX shared memory");
+
+static int
+shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static int
+shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static int
+shm_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct shmfd *shmfd;
+#ifdef MAC
+ int error;
+#endif
+
+ shmfd = fp->f_data;
+#ifdef MAC
+ error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd);
+ if (error)
+ return (error);
+#endif
+ shm_dotruncate(shmfd, length);
+ return (0);
+}
+
+static int
+shm_ioctl(struct file *fp, u_long com, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static int
+shm_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static int
+shm_kqfilter(struct file *fp, struct knote *kn)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static int
+shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct shmfd *shmfd;
+#ifdef MAC
+ int error;
+#endif
+
+ shmfd = fp->f_data;
+
+#ifdef MAC
+ error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd);
+ if (error)
+ return (error);
+#endif
+
+ /*
+ * Attempt to return sanish values for fstat() on a memory file
+ * descriptor.
+ */
+ bzero(sb, sizeof(*sb));
+ sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */
+ sb->st_blksize = PAGE_SIZE;
+ sb->st_size = shmfd->shm_size;
+ sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
+ sb->st_atimespec = shmfd->shm_atime;
+ sb->st_ctimespec = shmfd->shm_ctime;
+ sb->st_mtimespec = shmfd->shm_mtime;
+ sb->st_birthtimespec = shmfd->shm_birthtime;
+ sb->st_uid = shmfd->shm_uid;
+ sb->st_gid = shmfd->shm_gid;
+
+ return (0);
+}
+
+static int
+shm_close(struct file *fp, struct thread *td)
+{
+ struct shmfd *shmfd;
+
+ shmfd = fp->f_data;
+ fp->f_data = NULL;
+ shm_drop(shmfd);
+
+ return (0);
+}
+
+static void
+shm_dotruncate(struct shmfd *shmfd, off_t length)
+{
+ vm_object_t object;
+ vm_page_t m;
+ vm_pindex_t nobjsize;
+
+ object = shmfd->shm_object;
+ VM_OBJECT_LOCK(object);
+ if (length == shmfd->shm_size) {
+ VM_OBJECT_UNLOCK(object);
+ return;
+ }
+ nobjsize = OFF_TO_IDX(length + PAGE_MASK);
+
+ /* Are we shrinking? If so, trim the end. */
+ if (length < shmfd->shm_size) {
+ /* Toss in memory pages. */
+ if (nobjsize < object->size)
+ vm_object_page_remove(object, nobjsize, object->size,
+ FALSE);
+
+ /* Toss pages from swap. */
+ if (object->type == OBJT_SWAP)
+ swap_pager_freespace(object, nobjsize,
+ object->size - nobjsize);
+
+ /*
+ * If the last page is partially mapped, then zero out
+ * the garbage at the end of the page. See comments
+ * in vnode_page_setsize() for more details.
+ *
+ * XXXJHB: This handles in memory pages, but what about
+ * a page swapped out to disk?
+ */
+ if ((length & PAGE_MASK) &&
+ (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
+ m->valid != 0) {
+ int base = (int)length & PAGE_MASK;
+ int size = PAGE_SIZE - base;
+
+ pmap_zero_page_area(m, base, size);
+ vm_page_lock_queues();
+ vm_page_set_validclean(m, base, size);
+ if (m->dirty != 0)
+ m->dirty = VM_PAGE_BITS_ALL;
+ vm_page_unlock_queues();
+ }
+ }
+ shmfd->shm_size = length;
+ mtx_lock(&shm_timestamp_lock);
+ vfs_timestamp(&shmfd->shm_ctime);
+ shmfd->shm_mtime = shmfd->shm_ctime;
+ mtx_unlock(&shm_timestamp_lock);
+ object->size = nobjsize;
+ VM_OBJECT_UNLOCK(object);
+}
+
+/*
+ * shmfd object management including creation and reference counting
+ * routines.
+ */
+static struct shmfd *
+shm_alloc(struct ucred *ucred, mode_t mode)
+{
+ struct shmfd *shmfd;
+
+ shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
+ shmfd->shm_size = 0;
+ shmfd->shm_uid = ucred->cr_uid;
+ shmfd->shm_gid = ucred->cr_gid;
+ shmfd->shm_mode = mode;
+ shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL,
+ shmfd->shm_size, VM_PROT_DEFAULT, 0);
+ KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
+ vfs_timestamp(&shmfd->shm_birthtime);
+ shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime =
+ shmfd->shm_birthtime;
+ refcount_init(&shmfd->shm_refs, 1);
+#ifdef MAC
+ mac_posixshm_init(shmfd);
+ mac_posixshm_create(ucred, shmfd);
+#endif
+
+ return (shmfd);
+}
+
+static struct shmfd *
+shm_hold(struct shmfd *shmfd)
+{
+
+ refcount_acquire(&shmfd->shm_refs);
+ return (shmfd);
+}
+
+static void
+shm_drop(struct shmfd *shmfd)
+{
+
+ if (refcount_release(&shmfd->shm_refs)) {
+#ifdef MAC
+ mac_posixshm_destroy(shmfd);
+#endif
+ vm_object_deallocate(shmfd->shm_object);
+ free(shmfd, M_SHMFD);
+ }
+}
+
+/*
+ * Determine if the credentials have sufficient permissions for a
+ * specified combination of FREAD and FWRITE.
+ */
+static int
+shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags)
+{
+ int acc_mode;
+
+ acc_mode = 0;
+ if (flags & FREAD)
+ acc_mode |= VREAD;
+ if (flags & FWRITE)
+ acc_mode |= VWRITE;
+ return (vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid,
+ acc_mode, ucred, NULL));
+}
+
+/*
+ * Dictionary management. We maintain an in-kernel dictionary to map
+ * paths to shmfd objects. We use the FNV hash on the path to store
+ * the mappings in a hash table.
+ */
+static void
+shm_dict_init(void *arg)
+{
+
+ mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF);
+ sx_init(&shm_dict_lock, "shm dictionary");
+ shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash);
+}
+SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL);
+
+static struct shmfd *
+shm_lookup(char *path, Fnv32_t fnv)
+{
+ struct shm_mapping *map;
+
+ LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
+ if (map->sm_fnv != fnv)
+ continue;
+ if (strcmp(map->sm_path, path) == 0)
+ return (map->sm_shmfd);
+ }
+
+ return (NULL);
+}
+
+static void
+shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd)
+{
+ struct shm_mapping *map;
+
+ map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK);
+ map->sm_path = path;
+ map->sm_fnv = fnv;
+ map->sm_shmfd = shm_hold(shmfd);
+ LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link);
+}
+
+static int
+shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
+{
+ struct shm_mapping *map;
+ int error;
+
+ LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
+ if (map->sm_fnv != fnv)
+ continue;
+ if (strcmp(map->sm_path, path) == 0) {
+#ifdef MAC
+ error = mac_posixshm_check_unlink(ucred, map->sm_shmfd);
+ if (error)
+ return (error);
+#endif
+ error = shm_access(map->sm_shmfd, ucred,
+ FREAD | FWRITE);
+ if (error)
+ return (error);
+ LIST_REMOVE(map, sm_link);
+ shm_drop(map->sm_shmfd);
+ free(map->sm_path, M_SHMFD);
+ free(map, M_SHMFD);
+ return (0);
+ }
+ }
+
+ return (ENOENT);
+}
+
+/* System calls. */
+int
+shm_open(struct thread *td, struct shm_open_args *uap)
+{
+ struct filedesc *fdp;
+ struct shmfd *shmfd;
+ struct file *fp;
+ char *path;
+ Fnv32_t fnv;
+ mode_t cmode;
+ int fd, error;
+
+ if ((uap->flags & O_ACCMODE) != O_RDONLY &&
+ (uap->flags & O_ACCMODE) != O_RDWR)
+ return (EINVAL);
+
+ if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0)
+ return (EINVAL);
+
+ fdp = td->td_proc->p_fd;
+ cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS;
+
+ error = falloc(td, &fp, &fd);
+ if (error)
+ return (error);
+
+ /* A SHM_ANON path pointer creates an anonymous object. */
+ if (uap->path == SHM_ANON) {
+ /* A read-only anonymous object is pointless. */
+ if ((uap->flags & O_ACCMODE) == O_RDONLY) {
+ fdclose(fdp, fp, fd, td);
+ fdrop(fp, td);
+ return (EINVAL);
+ }
+ shmfd = shm_alloc(td->td_ucred, cmode);
+ } else {
+ path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
+ error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+
+ /* Require paths to start with a '/' character. */
+ if (error == 0 && path[0] != '/')
+ error = EINVAL;
+ if (error) {
+ fdclose(fdp, fp, fd, td);
+ fdrop(fp, td);
+ free(path, M_SHMFD);
+ return (error);
+ }
+
+ fnv = fnv_32_str(path, FNV1_32_INIT);
+ sx_xlock(&shm_dict_lock);
+ shmfd = shm_lookup(path, fnv);
+ if (shmfd == NULL) {
+ /* Object does not yet exist, create it if requested. */
+ if (uap->flags & O_CREAT) {
+ shmfd = shm_alloc(td->td_ucred, cmode);
+ shm_insert(path, fnv, shmfd);
+ } else {
+ free(path, M_SHMFD);
+ error = ENOENT;
+ }
+ } else {
+ /*
+ * Object already exists, obtain a new
+ * reference if requested and permitted.
+ */
+ free(path, M_SHMFD);
+ if ((uap->flags & (O_CREAT | O_EXCL)) ==
+ (O_CREAT | O_EXCL))
+ error = EEXIST;
+ else {
+#ifdef MAC
+ error = mac_posixshm_check_open(td->td_ucred,
+ shmfd);
+ if (error == 0)
+#endif
+ error = shm_access(shmfd, td->td_ucred,
+ FFLAGS(uap->flags & O_ACCMODE));
+ }
+
+ /*
+ * Truncate the file back to zero length if
+ * O_TRUNC was specified and the object was
+ * opened with read/write.
+ */
+ if (error == 0 &&
+ (uap->flags & (O_ACCMODE | O_TRUNC)) ==
+ (O_RDWR | O_TRUNC)) {
+#ifdef MAC
+ error = mac_posixshm_check_truncate(
+ td->td_ucred, fp->f_cred, shmfd);
+ if (error == 0)
+#endif
+ shm_dotruncate(shmfd, 0);
+ }
+ if (error == 0)
+ shm_hold(shmfd);
+ }
+ sx_xunlock(&shm_dict_lock);
+
+ if (error) {
+ fdclose(fdp, fp, fd, td);
+ fdrop(fp, td);
+ return (error);
+ }
+ }
+
+ finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);
+
+ FILEDESC_XLOCK(fdp);
+ if (fdp->fd_ofiles[fd] == fp)
+ fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
+ FILEDESC_XUNLOCK(fdp);
+ td->td_retval[0] = fd;
+ fdrop(fp, td);
+
+ return (0);
+}
+
+int
+shm_unlink(struct thread *td, struct shm_unlink_args *uap)
+{
+ char *path;
+ Fnv32_t fnv;
+ int error;
+
+ path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
+ error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
+ if (error) {
+ free(path, M_TEMP);
+ return (error);
+ }
+
+ fnv = fnv_32_str(path, FNV1_32_INIT);
+ sx_xlock(&shm_dict_lock);
+ error = shm_remove(path, fnv, td->td_ucred);
+ sx_xunlock(&shm_dict_lock);
+ free(path, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * mmap() helper to validate mmap() requests against shm object state
+ * and give mmap() the vm_object to use for the mapping.
+ */
+int
+shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
+ vm_object_t *obj)
+{
+
+ /*
+ * XXXRW: This validation is probably insufficient, and subject to
+ * sign errors. It should be fixed.
+ */
+ if (foff >= shmfd->shm_size || foff + objsize > shmfd->shm_size)
+ return (EINVAL);
+
+ mtx_lock(&shm_timestamp_lock);
+ vfs_timestamp(&shmfd->shm_atime);
+ mtx_unlock(&shm_timestamp_lock);
+ vm_object_reference(shmfd->shm_object);
+ *obj = shmfd->shm_object;
+ return (0);
+}
diff --git a/sys/security/mac/mac_framework.h b/sys/security/mac/mac_framework.h
index f3d41df..e607d32 100644
--- a/sys/security/mac/mac_framework.h
+++ b/sys/security/mac/mac_framework.h
@@ -71,6 +71,7 @@ struct msg;
struct msqid_kernel;
struct proc;
struct semid_kernel;
+struct shmfd;
struct shmid_kernel;
struct sockaddr;
struct socket;
@@ -198,6 +199,18 @@ void mac_posixsem_create(struct ucred *cred, struct ksem *ks);
void mac_posixsem_destroy(struct ksem *);
void mac_posixsem_init(struct ksem *);
+int mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
+ int prot, int flags);
+int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd);
+int mac_posixshm_check_stat(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd);
+int mac_posixshm_check_truncate(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd);
+int mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd);
+void mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd);
+void mac_posixshm_destroy(struct shmfd *);
+void mac_posixshm_init(struct shmfd *);
+
int mac_priv_check(struct ucred *cred, int priv);
int mac_priv_grant(struct ucred *cred, int priv);
diff --git a/sys/security/mac/mac_policy.h b/sys/security/mac/mac_policy.h
index 3d494db..c7aef52 100644
--- a/sys/security/mac/mac_policy.h
+++ b/sys/security/mac/mac_policy.h
@@ -83,6 +83,7 @@ struct pipepair;
struct proc;
struct sbuf;
struct semid_kernel;
+struct shmfd;
struct shmid_kernel;
struct sockaddr;
struct socket;
@@ -305,6 +306,24 @@ typedef void (*mpo_posixsem_create_t)(struct ucred *cred,
typedef void (*mpo_posixsem_destroy_label_t)(struct label *label);
typedef void (*mpo_posixsem_init_label_t)(struct label *label);
+typedef int (*mpo_posixshm_check_mmap_t)(struct ucred *cred,
+ struct shmfd *shmfd, struct label *shmlabel, int prot,
+ int flags);
+typedef int (*mpo_posixshm_check_open_t)(struct ucred *cred,
+ struct shmfd *shmfd, struct label *shmlabel);
+typedef int (*mpo_posixshm_check_stat_t)(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd,
+ struct label *shmlabel);
+typedef int (*mpo_posixshm_check_truncate_t)(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd,
+ struct label *shmlabel);
+typedef int (*mpo_posixshm_check_unlink_t)(struct ucred *cred,
+ struct shmfd *shmfd, struct label *shmlabel);
+typedef void (*mpo_posixshm_create_t)(struct ucred *cred,
+ struct shmfd *shmfd, struct label *shmlabel);
+typedef void (*mpo_posixshm_destroy_label_t)(struct label *label);
+typedef void (*mpo_posixshm_init_label_t)(struct label *label);
+
typedef int (*mpo_priv_check_t)(struct ucred *cred, int priv);
typedef int (*mpo_priv_grant_t)(struct ucred *cred, int priv);
@@ -733,6 +752,15 @@ struct mac_policy_ops {
mpo_posixsem_destroy_label_t mpo_posixsem_destroy_label;
mpo_posixsem_init_label_t mpo_posixsem_init_label;
+ mpo_posixshm_check_mmap_t mpo_posixshm_check_mmap;
+ mpo_posixshm_check_open_t mpo_posixshm_check_open;
+ mpo_posixshm_check_stat_t mpo_posixshm_check_stat;
+ mpo_posixshm_check_truncate_t mpo_posixshm_check_truncate;
+ mpo_posixshm_check_unlink_t mpo_posixshm_check_unlink;
+ mpo_posixshm_create_t mpo_posixshm_create;
+ mpo_posixshm_destroy_label_t mpo_posixshm_destroy_label;
+ mpo_posixshm_init_label_t mpo_posixshm_init_label;
+
mpo_priv_check_t mpo_priv_check;
mpo_priv_grant_t mpo_priv_grant;
diff --git a/sys/security/mac/mac_posix_shm.c b/sys/security/mac/mac_posix_shm.c
new file mode 100644
index 0000000..b9da7b3
--- /dev/null
+++ b/sys/security/mac/mac_posix_shm.c
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 2003-2006 SPARTA, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project in part by Network
+ * Associates Laboratories, the Security Research Division of Network
+ * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
+ * as part of the DARPA CHATS research program.
+ *
+ * This software was enhanced by SPARTA ISSO under SPAWAR contract
+ * N66001-04-C-6019 ("SEFOS").
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_mac.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mman.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+#include <security/mac/mac_framework.h>
+#include <security/mac/mac_internal.h>
+#include <security/mac/mac_policy.h>
+
+static struct label *
+mac_posixshm_label_alloc(void)
+{
+ struct label *label;
+
+ label = mac_labelzone_alloc(M_WAITOK);
+ MAC_PERFORM(posixshm_init_label, label);
+ return (label);
+}
+
+void
+mac_posixshm_init(struct shmfd *shmfd)
+{
+
+ shmfd->shm_label = mac_posixshm_label_alloc();
+}
+
+static void
+mac_posixshm_label_free(struct label *label)
+{
+
+ MAC_PERFORM(posixshm_destroy_label, label);
+ mac_labelzone_free(label);
+}
+
+void
+mac_posixshm_destroy(struct shmfd *shmfd)
+{
+
+ mac_posixshm_label_free(shmfd->shm_label);
+ shmfd->shm_label = NULL;
+}
+
+void
+mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd)
+{
+
+ MAC_PERFORM(posixshm_create, cred, shmfd, shmfd->shm_label);
+}
+
+int
+mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd, int prot,
+ int flags)
+{
+ int error;
+
+ MAC_CHECK(posixshm_check_mmap, cred, shmfd, shmfd->shm_label, prot,
+ flags);
+
+ return (error);
+}
+
+int
+mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd)
+{
+ int error;
+
+ MAC_CHECK(posixshm_check_open, cred, shmfd, shmfd->shm_label);
+
+ return (error);
+}
+
+int
+mac_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred,
+ struct shmfd *shmfd)
+{
+ int error;
+
+ MAC_CHECK(posixshm_check_stat, active_cred, file_cred, shmfd,
+ shmfd->shm_label);
+
+ return (error);
+}
+
+int
+mac_posixshm_check_truncate(struct ucred *active_cred, struct ucred *file_cred,
+ struct shmfd *shmfd)
+{
+ int error;
+
+ MAC_CHECK(posixshm_check_truncate, active_cred, file_cred, shmfd,
+ shmfd->shm_label);
+
+ return (error);
+}
+
+int
+mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd)
+{
+ int error;
+
+ MAC_CHECK(posixshm_check_unlink, cred, shmfd, shmfd->shm_label);
+
+ return (error);
+}
diff --git a/sys/security/mac_stub/mac_stub.c b/sys/security/mac_stub/mac_stub.c
index 50463a0..165e7bb 100644
--- a/sys/security/mac_stub/mac_stub.c
+++ b/sys/security/mac_stub/mac_stub.c
@@ -578,6 +578,53 @@ stub_posixsem_create(struct ucred *cred, struct ksem *ks,
}
static int
+stub_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmlabel, int prot, int flags)
+{
+
+ return (0);
+}
+
+static int
+stub_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmlabel)
+{
+
+ return (0);
+}
+
+static int
+stub_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred,
+ struct shmfd *shmfd, struct label *shmlabel)
+{
+
+ return (0);
+}
+
+static int
+stub_posixshm_check_truncate(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd, struct label *shmlabel)
+{
+
+ return (0);
+}
+
+static int
+stub_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmlabel)
+{
+
+ return (0);
+}
+
+static void
+stub_posixshm_create(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmlabel)
+{
+
+}
+
+static int
stub_priv_check(struct ucred *cred, int priv)
{
@@ -1550,6 +1597,15 @@ static struct mac_policy_ops stub_ops =
.mpo_posixsem_destroy_label = stub_destroy_label,
.mpo_posixsem_init_label = stub_init_label,
+ .mpo_posixshm_check_mmap = stub_posixshm_check_mmap,
+ .mpo_posixshm_check_open = stub_posixshm_check_open,
+ .mpo_posixshm_check_stat = stub_posixshm_check_stat,
+ .mpo_posixshm_check_truncate = stub_posixshm_check_truncate,
+ .mpo_posixshm_check_unlink = stub_posixshm_check_unlink,
+ .mpo_posixshm_create = stub_posixshm_create,
+ .mpo_posixshm_destroy_label = stub_destroy_label,
+ .mpo_posixshm_init_label = stub_init_label,
+
.mpo_priv_check = stub_priv_check,
.mpo_priv_grant = stub_priv_grant,
diff --git a/sys/security/mac_test/mac_test.c b/sys/security/mac_test/mac_test.c
index e28e4c3..14d3b80 100644
--- a/sys/security/mac_test/mac_test.c
+++ b/sys/security/mac_test/mac_test.c
@@ -94,6 +94,7 @@ SYSCTL_NODE(_security_mac, OID_AUTO, test, CTLFLAG_RW, 0,
#define MAGIC_SYSV_SHM 0x76119ab0
#define MAGIC_PIPE 0xdc6c9919
#define MAGIC_POSIX_SEM 0x78ae980c
+#define MAGIC_POSIX_SHM 0x4e853fc9
#define MAGIC_PROC 0x3b4be98f
#define MAGIC_CRED 0x9a5a4987
#define MAGIC_VNODE 0x1a67a45c
@@ -1116,6 +1117,92 @@ test_posixsem_init_label(struct label *label)
COUNTER_INC(posixsem_init_label);
}
+COUNTER_DECL(posixshm_check_mmap);
+static int
+test_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmfdlabel, int prot, int flags)
+{
+
+ LABEL_CHECK(cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ return (0);
+}
+
+COUNTER_DECL(posixshm_check_open);
+static int
+test_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmfdlabel)
+{
+
+ LABEL_CHECK(cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ return (0);
+}
+
+COUNTER_DECL(posixshm_check_stat);
+static int
+test_posixshm_check_stat(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd, struct label *shmfdlabel)
+{
+
+ LABEL_CHECK(active_cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(file_cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ return (0);
+}
+
+COUNTER_DECL(posixshm_check_truncate);
+static int
+test_posixshm_check_truncate(struct ucred *active_cred,
+ struct ucred *file_cred, struct shmfd *shmfd, struct label *shmfdlabel)
+{
+
+ LABEL_CHECK(active_cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(file_cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ return (0);
+}
+
+COUNTER_DECL(posixshm_check_unlink);
+static int
+test_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmfdlabel)
+{
+
+ LABEL_CHECK(cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ return (0);
+}
+
+COUNTER_DECL(posixshm_create);
+static void
+test_posixshm_create(struct ucred *cred, struct shmfd *shmfd,
+ struct label *shmfdlabel)
+{
+
+ LABEL_CHECK(cred->cr_label, MAGIC_CRED);
+ LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
+ COUNTER_INC(posixshm_create);
+}
+
+COUNTER_DECL(posixshm_destroy_label);
+static void
+test_posixshm_destroy_label(struct label *label)
+{
+
+ LABEL_DESTROY(label, MAGIC_POSIX_SHM);
+ COUNTER_INC(posixshm_destroy_label);
+}
+
+COUNTER_DECL(posixshm_init_label);
+static void
+test_posixshm_init_label(struct label *label)
+{
+
+ LABEL_INIT(label, MAGIC_POSIX_SHM);
+ COUNTER_INC(posixshm_init_label);
+}
+
COUNTER_DECL(proc_check_debug);
static int
test_proc_check_debug(struct ucred *cred, struct proc *p)
@@ -2809,6 +2896,15 @@ static struct mac_policy_ops test_ops =
.mpo_posixsem_destroy_label = test_posixsem_destroy_label,
.mpo_posixsem_init_label = test_posixsem_init_label,
+ .mpo_posixshm_check_mmap = test_posixshm_check_mmap,
+ .mpo_posixshm_check_open = test_posixshm_check_open,
+ .mpo_posixshm_check_stat = test_posixshm_check_stat,
+ .mpo_posixshm_check_truncate = test_posixshm_check_truncate,
+ .mpo_posixshm_check_unlink = test_posixshm_check_unlink,
+ .mpo_posixshm_create = test_posixshm_create,
+ .mpo_posixshm_destroy_label = test_posixshm_destroy_label,
+ .mpo_posixshm_init_label = test_posixshm_init_label,
+
.mpo_proc_check_debug = test_proc_check_debug,
.mpo_proc_check_sched = test_proc_check_sched,
.mpo_proc_check_setaudit = test_proc_check_setaudit,
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 4b7b511..8ec5862 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -126,8 +126,20 @@ typedef __pid_t pid_t;
/* bits to save after open */
#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT)
/* bits settable by fcntl(F_SETFL, ...) */
+#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT)
+
+#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
+ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
+/*
+ * Set by shm_open(3) in older libc's to get automatic MAP_ASYNC
+ * behavior for POSIX shared memory objects (which are otherwise
+ * implemented as plain files).
+ */
+#define FPOSIXSHM O_NOFOLLOW
+#undef FCNTLFLAGS
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|O_DIRECT)
#endif
+#endif
/*
* The O_* flags used to have only F* names, which were used in the kernel
@@ -150,13 +162,6 @@ typedef __pid_t pid_t;
* different meaning for fcntl(2).
*/
#if __BSD_VISIBLE
-
-/*
- * Set by shm_open(3) to get automatic MAP_ASYNC behavior
- * for POSIX shared memory objects (which are otherwise
- * implemented as plain files).
- */
-#define FPOSIXSHM O_NOFOLLOW
#endif
/*
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 757c884..0281d21 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -59,6 +59,7 @@ struct socket;
#define DTYPE_KQUEUE 5 /* event queue */
#define DTYPE_CRYPTO 6 /* crypto */
#define DTYPE_MQUEUE 7 /* posix message queue */
+#define DTYPE_SHM 8 /* swap-backed shared memory */
#ifdef _KERNEL
diff --git a/sys/sys/mman.h b/sys/sys/mman.h
index 0f7416f..c6b8d42 100644
--- a/sys/sys/mman.h
+++ b/sys/sys/mman.h
@@ -139,6 +139,11 @@
#define MINCORE_MODIFIED 0x4 /* Page has been modified by us */
#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */
#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */
+
+/*
+ * Anonymous object constant for shm_open().
+ */
+#define SHM_ANON ((char *)1)
#endif /* __BSD_VISIBLE */
/*
@@ -168,7 +173,33 @@ typedef __size_t size_t;
#define _SIZE_T_DECLARED
#endif
-#ifndef _KERNEL
+#ifdef _KERNEL
+#include <vm/vm.h>
+
+struct shmfd {
+ size_t shm_size;
+ vm_object_t shm_object;
+ int shm_refs;
+ uid_t shm_uid;
+ gid_t shm_gid;
+ mode_t shm_mode;
+
+ /*
+ * Values maintained solely to make this a better-behaved file
+ * descriptor for fstat() to run on.
+ */
+ struct timespec shm_atime;
+ struct timespec shm_mtime;
+ struct timespec shm_ctime;
+ struct timespec shm_birthtime;
+
+ struct label *shm_label; /* MAC label */
+};
+
+int shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
+ vm_object_t *obj);
+
+#else /* !_KERNEL */
__BEGIN_DECLS
/*
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 4820a14..e6c82dc 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -118,6 +118,8 @@ static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
int *, struct vnode *, vm_ooffset_t, vm_object_t *);
static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
int *, struct cdev *, vm_ooffset_t, vm_object_t *);
+static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
+ int *, struct shmfd *, vm_ooffset_t, vm_object_t *);
/*
* MPSAFE
@@ -300,16 +302,29 @@ mmap(td, uap)
pos = 0;
} else {
/*
- * Mapping file, get fp for validation. Obtain vnode and make
- * sure it is of appropriate type.
- * don't let the descriptor disappear on us if we block
+ * Mapping file, get fp for validation and
+ * don't let the descriptor disappear on us if we block.
*/
if ((error = fget(td, uap->fd, &fp)) != 0)
goto done;
+ if (fp->f_type == DTYPE_SHM) {
+ handle = fp->f_data;
+ handle_type = OBJT_SWAP;
+ maxprot = VM_PROT_NONE;
+
+ /* FREAD should always be set. */
+ if (fp->f_flag & FREAD)
+ maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
+ if (fp->f_flag & FWRITE)
+ maxprot |= VM_PROT_WRITE;
+ goto map;
+ }
if (fp->f_type != DTYPE_VNODE) {
error = ENODEV;
goto done;
}
+#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
+ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
/*
* POSIX shared-memory objects are defined to have
* kernel persistence, and are not defined to support
@@ -320,6 +335,7 @@ mmap(td, uap)
*/
if (fp->f_flag & FPOSIXSHM)
flags |= MAP_NOSYNC;
+#endif
vp = fp->f_vnode;
/*
* Ensure that file and memory protections are
@@ -360,6 +376,7 @@ mmap(td, uap)
handle = (void *)vp;
handle_type = OBJT_VNODE;
}
+map:
/*
* Do not allow more then a certain number of vm_map_entry structures
@@ -1291,6 +1308,35 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize,
}
/*
+ * vm_mmap_shm()
+ *
+ * MPSAFE
+ *
+ * Helper function for vm_mmap. Perform sanity check specific for mmap
+ * operations on shm file descriptors.
+ */
+int
+vm_mmap_shm(struct thread *td, vm_size_t objsize,
+ vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
+ struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp)
+{
+ int error;
+
+ if ((*maxprotp & VM_PROT_WRITE) == 0 &&
+ (prot & PROT_WRITE) != 0)
+ return (EACCES);
+#ifdef MAC
+ error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp);
+ if (error != 0)
+ return (error);
+#endif
+ error = shm_mmap(shmfd, objsize, foff, objp);
+ if (error)
+ return (error);
+ return (0);
+}
+
+/*
* vm_mmap()
*
* MPSAFE
@@ -1354,6 +1400,10 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
handle, foff, &object);
break;
+ case OBJT_SWAP:
+ error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
+ handle, foff, &object);
+ break;
case OBJT_DEFAULT:
if (handle == NULL) {
error = 0;
OpenPOWER on IntegriCloud