summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/linux32/linux32_dummy.c4
-rw-r--r--sys/amd64/linux32/syscalls.master10
-rw-r--r--sys/compat/linux/linux_epoll.c554
-rw-r--r--sys/compat/linux/linux_epoll.h68
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/i386/linux/linux_dummy.c4
-rw-r--r--sys/i386/linux/syscalls.master10
-rw-r--r--sys/kern/kern_event.c123
-rw-r--r--sys/modules/linux/Makefile2
-rw-r--r--sys/sys/event.h18
-rw-r--r--sys/sys/file.h2
-rw-r--r--sys/sys/syscallsubr.h7
14 files changed, 736 insertions, 69 deletions
diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c
index 95bf3ec..1ae64bb 100644
--- a/sys/amd64/linux32/linux32_dummy.c
+++ b/sys/amd64/linux32/linux32_dummy.c
@@ -70,9 +70,6 @@ DUMMY(pivot_root);
DUMMY(mincore);
DUMMY(ptrace);
DUMMY(lookup_dcookie);
-DUMMY(epoll_create);
-DUMMY(epoll_ctl);
-DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(timer_create);
DUMMY(timer_settime);
@@ -129,7 +126,6 @@ DUMMY(timerfd_gettime);
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(eventfd2);
-DUMMY(epoll_create1);
DUMMY(dup3);
DUMMY(inotify_init1);
/* linux 2.6.30: */
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index c3a10af..b9a0829 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -430,9 +430,11 @@
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }
253 AUE_NULL STD { int linux_lookup_dcookie(void); }
-254 AUE_NULL STD { int linux_epoll_create(void); }
-255 AUE_NULL STD { int linux_epoll_ctl(void); }
-256 AUE_NULL STD { int linux_epoll_wait(void); }
+254 AUE_NULL STD { int linux_epoll_create(l_int size); }
+255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
+ struct linux_epoll_event *event); }
+256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \
+ l_int maxevents, l_int timeout); }
257 AUE_NULL STD { int linux_remap_file_pages(void); }
258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
259 AUE_NULL STD { int linux_timer_create(void); }
@@ -534,7 +536,7 @@
; linux 2.6.27:
327 AUE_NULL STD { int linux_signalfd4(void); }
328 AUE_NULL STD { int linux_eventfd2(void); }
-329 AUE_NULL STD { int linux_epoll_create1(void); }
+329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
330 AUE_NULL STD { int linux_dup3(void); }
331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
332 AUE_NULL STD { int linux_inotify_init1(void); }
diff --git a/sys/compat/linux/linux_epoll.c b/sys/compat/linux/linux_epoll.c
new file mode 100644
index 0000000..b9e1f2b
--- /dev/null
+++ b/sys/compat/linux/linux_epoll.c
@@ -0,0 +1,554 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+#include "opt_ktrace.h"
+
+#include <sys/limits.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/event.h>
+#include <sys/proc.h>
+#include <sys/sysproto.h>
+#include <sys/syscallsubr.h>
+#include <sys/timespec.h>
+#include <compat/linux/linux_epoll.h>
+#include <compat/linux/linux_util.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
+
+#ifdef COMPAT_LINUX32
+#include <machine/../linux32/linux.h>
+#include <machine/../linux32/linux32_proto.h>
+#else
+#include <machine/../linux/linux.h>
+#include <machine/../linux/linux_proto.h>
+#endif
+
+#define ktrepoll_events(evt, count) \
+ ktrstruct("linux_epoll_event", (evt), count * sizeof(*evt))
+
+/*
+ * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
+ * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
+ * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
+ * data verbatuim. Therefore on 32 bit architectures we allocate 64-bit memory
+ * block to pass user supplied data for every file descriptor.
+ */
+typedef uint64_t epoll_udata_t;
+#if defined(__i386__)
+#define EPOLL_WIDE_USER_DATA 1
+#else
+#define EPOLL_WIDE_USER_DATA 0
+#endif
+
+#if EPOLL_WIDE_USER_DATA
+
+/*
+ * Approach similar to epoll_user_data could also be used to
+ * keep track of event bits per file descriptor for all architectures.
+ * However, it isn't obvious that such tracking would be beneficial
+ * in practice.
+ */
+
+struct epoll_user_data {
+ unsigned sz;
+ epoll_udata_t data[1];
+};
+static MALLOC_DEFINE(M_LINUX_EPOLL, "epoll", "memory for epoll system");
+#define EPOLL_USER_DATA_SIZE(ndata) \
+ (sizeof(struct epoll_user_data)+((ndata)-1)*sizeof(epoll_udata_t))
+#define EPOLL_USER_DATA_MARGIN 16
+
+static void epoll_init_user_data(struct thread *td, struct file *epfp);
+static void epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data);
+static epoll_udata_t epoll_get_user_data(struct thread *td, struct file *epfp, int fd);
+static fo_close_t epoll_close;
+
+/* overload kqueue fileops */
+static struct fileops epollops = {
+ .fo_read = kqueue_read,
+ .fo_write = kqueue_write,
+ .fo_truncate = kqueue_truncate,
+ .fo_ioctl = kqueue_ioctl,
+ .fo_poll = kqueue_poll,
+ .fo_kqfilter = kqueue_kqfilter,
+ .fo_stat = kqueue_stat,
+ .fo_close = epoll_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+};
+#endif
+
+static struct file* epoll_fget(struct thread *td, int epfd);
+
+struct epoll_copyin_args {
+ struct kevent *changelist;
+};
+
+struct epoll_copyout_args {
+ struct linux_epoll_event *leventlist;
+ int count;
+ int error;
+#if KTRACE || EPOLL_WIDE_USER_DATA
+ struct thread *td;
+#endif
+#if EPOLL_WIDE_USER_DATA
+ struct file *epfp;
+#endif
+};
+
+
+/* Create a new epoll file descriptor. */
+
+static int
+linux_epoll_create_common(struct thread *td)
+{
+ struct file *fp;
+ int error;
+
+ error = kern_kqueue_locked(td, &fp);
+#if EPOLL_WIDE_USER_DATA
+ if (error == 0) {
+ epoll_init_user_data(td, fp);
+ fdrop(fp, td);
+ }
+#endif
+ return (error);
+}
+
+int
+linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
+{
+ if (args->size <= 0)
+ return (EINVAL);
+ /* args->size is unused. Linux just tests it
+ * and then forgets it as well. */
+
+ return (linux_epoll_create_common(td));
+}
+
+int
+linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
+{
+ int error;
+
+ error = linux_epoll_create_common(td);
+
+ if (!error) {
+ if (args->flags & LINUX_EPOLL_CLOEXEC)
+ td->td_proc->p_fd->fd_ofiles[td->td_retval[0]].fde_flags |= UF_EXCLOSE;
+ if (args->flags & LINUX_EPOLL_NONBLOCK)
+ linux_msg(td, "epoll_create1 doesn't yet support EPOLL_NONBLOCK flag\n");
+ }
+
+ return (error);
+}
+
+/* Structure converting function from epoll to kevent. */
+static int
+linux_epoll_to_kevent(struct thread *td,
+#if EPOLL_WIDE_USER_DATA
+ struct file *epfp,
+#endif
+ int fd, struct linux_epoll_event *l_event, int kev_flags, struct kevent *kevent, int *nkevents)
+{
+ /* flags related to how event is registered */
+ if (l_event->events & LINUX_EPOLLONESHOT)
+ kev_flags |= EV_ONESHOT;
+ if (l_event->events & LINUX_EPOLLET) {
+ kev_flags |= EV_CLEAR;
+ }
+
+ /* flags related to what event is registered */
+ if (l_event->events & LINUX_EPOLLIN ||
+ l_event->events & LINUX_EPOLLRDNORM ||
+ l_event->events & LINUX_EPOLLPRI ||
+ l_event->events & LINUX_EPOLLRDHUP) {
+ EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0,
+ (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data));
+ ++*nkevents;
+ }
+ if (l_event->events & LINUX_EPOLLOUT ||
+ l_event->events & LINUX_EPOLLWRNORM) {
+ EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0,
+ (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data));
+ ++*nkevents;
+ }
+ if (l_event->events & LINUX_EPOLLRDBAND ||
+ l_event->events & LINUX_EPOLLWRBAND ||
+ l_event->events & LINUX_EPOLLHUP ||
+ l_event->events & LINUX_EPOLLMSG ||
+ l_event->events & LINUX_EPOLLWAKEUP ||
+ l_event->events & LINUX_EPOLLERR) {
+ linux_msg(td, "epoll_ctl doesn't yet support some event flags supplied: 0x%x\n",
+ l_event->events);
+ return (EINVAL);
+ }
+
+#if EPOLL_WIDE_USER_DATA
+ epoll_set_user_data(td, epfp, fd, l_event->data);
+#endif
+ return (0);
+}
+
+/*
+ * Structure converting function from kevent to epoll. In a case
+ * this is called on error in registration we store the error in
+ * event->data and pick it up later in linux_epoll_ctl().
+ */
+static void
+linux_kevent_to_epoll(
+#if EPOLL_WIDE_USER_DATA
+ struct thread *td, struct file *epfp,
+#endif
+ struct kevent *kevent, struct linux_epoll_event *l_event)
+{
+ if ((kevent->flags & EV_ERROR) == 0)
+ switch (kevent->filter) {
+ case EVFILT_READ:
+ l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
+ break;
+ case EVFILT_WRITE:
+ l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
+ break;
+ }
+#if EPOLL_WIDE_USER_DATA
+ l_event->data = epoll_get_user_data(td, epfp, kevent->ident);
+#else
+ l_event->data = (epoll_udata_t)kevent->udata;
+#endif
+}
+
+/*
+ * Copyout callback used by kevent. This converts kevent
+ * events to epoll events and copies them back to the
+ * userspace. This is also called on error on registering
+ * of the filter.
+ */
+static int
+epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyout_args *args;
+ struct linux_epoll_event *eep;
+ int error, i;
+
+ args = (struct epoll_copyout_args*) arg;
+ eep = malloc(sizeof(*eep) * count, M_TEMP, M_WAITOK | M_ZERO);
+
+ for (i = 0; i < count; i++)
+ linux_kevent_to_epoll(
+#if EPOLL_WIDE_USER_DATA
+ args->td, args->epfp,
+#endif
+ &kevp[i], &eep[i]);
+
+ error = copyout(eep, args->leventlist, count * sizeof(*eep));
+ if (!error) {
+ args->leventlist += count;
+ args->count += count;
+ } else if (!args->error)
+ args->error = error;
+
+#ifdef KTRACE
+ if (KTRPOINT(args->td, KTR_STRUCT))
+ ktrepoll_events(eep, count);
+#endif
+
+ free(eep, M_TEMP);
+ return (error);
+}
+
+/*
+ * Copyin callback used by kevent. This copies already
+ * converted filters from kernel memory to the kevent
+ * internal kernel memory. Hence the memcpy instead of
+ * copyin.
+ */
+static int
+epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyin_args *args;
+
+ args = (struct epoll_copyin_args*) arg;
+
+ memcpy(kevp, args->changelist, count * sizeof(*kevp));
+ args->changelist += count;
+
+ return (0);
+}
+
+static int
+ignore_enoent(int error) {
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+delete_event(struct thread *td, struct file *epfp, int fd, int filter)
+{
+ struct epoll_copyin_args ciargs;
+ struct kevent kev;
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ ciargs.changelist = &kev;
+
+ EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
+ return (kern_kevent_locked(td, epfp, 1, 0, &k_ops, NULL));
+}
+
+static int
+delete_all_events(struct thread *td, struct file *epfp, int fd)
+{
+ /* here we ignore ENONT, because we don't keep track of events here */
+ int error1, error2;
+
+ error1 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_READ));
+ error2 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_WRITE));
+
+ /* report any errors we got */
+ if (error1)
+ return (error1);
+ if (error2)
+ return (error2);
+ return (0);
+}
+
+/*
+ * Load epoll filter, convert it to kevent filter
+ * and load it into kevent subsystem.
+ */
+int
+linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
+{
+ struct file *epfp;
+ struct epoll_copyin_args ciargs;
+ struct kevent kev[2];
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ struct linux_epoll_event le;
+ int kev_flags;
+ int nchanges = 0;
+ int error;
+
+ if (args->epfd == args->fd)
+ return (EINVAL);
+
+ if (args->op != LINUX_EPOLL_CTL_DEL) {
+ error = copyin(args->event, &le, sizeof(le));
+ if (error)
+ return (error);
+ }
+#ifdef DEBUG
+ if (ldebug(epoll_ctl))
+ printf(ARGS(epoll_ctl,"%i, %i, %i, %u"), args->epfd, args->op,
+ args->fd, le.events);
+#endif
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_STRUCT) && args->op != LINUX_EPOLL_CTL_DEL)
+ ktrepoll_events(&le, 1);
+#endif
+ epfp = epoll_fget(td, args->epfd);
+
+ ciargs.changelist = kev;
+
+ switch (args->op) {
+ case LINUX_EPOLL_CTL_MOD:
+ /* we don't memorize which events were set for this FD
+ on this level, so just delete all we could have set:
+ EVFILT_READ and EVFILT_WRITE, ignoring any errors
+ */
+ error = delete_all_events(td, epfp, args->fd);
+ if (error)
+ goto leave;
+ /* FALLTHROUGH */
+ case LINUX_EPOLL_CTL_ADD:
+ kev_flags = EV_ADD | EV_ENABLE;
+ break;
+ case LINUX_EPOLL_CTL_DEL:
+ /* CTL_DEL means unregister this fd with this epoll */
+ error = delete_all_events(td, epfp, args->fd);
+ goto leave;
+ default:
+ error = EINVAL;
+ goto leave;
+ }
+
+ error = linux_epoll_to_kevent(td,
+#if EPOLL_WIDE_USER_DATA
+ epfp,
+#endif
+ args->fd, &le, kev_flags, kev, &nchanges);
+ if (error)
+ goto leave;
+
+ error = kern_kevent_locked(td, epfp, nchanges, 0, &k_ops, NULL);
+leave:
+ fdrop(epfp, td);
+ return (error);
+}
+
+/*
+ * Wait for a filter to be triggered on the epoll file descriptor. */
+int
+linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
+{
+ struct file *epfp;
+ struct timespec ts, *tsp;
+ struct epoll_copyout_args coargs;
+ struct kevent_copyops k_ops = { &coargs,
+ epoll_kev_copyout,
+ NULL};
+ int error;
+
+ if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS)
+ return (EINVAL);
+
+ epfp = epoll_fget(td, args->epfd);
+
+ coargs.leventlist = args->events;
+ coargs.count = 0;
+ coargs.error = 0;
+#if defined(KTRACE) || EPOLL_WIDE_USER_DATA
+ coargs.td = td;
+#endif
+#if EPOLL_WIDE_USER_DATA
+ coargs.epfp = epfp;
+#endif
+
+ if (args->timeout != -1) {
+ if (args->timeout < 0) {
+ error = EINVAL;
+ goto leave;
+ }
+ /* Convert from milliseconds to timespec. */
+ ts.tv_sec = args->timeout / 1000;
+ ts.tv_nsec = (args->timeout % 1000) * 1000000;
+ tsp = &ts;
+ } else {
+ tsp = NULL;
+ }
+
+ error = kern_kevent_locked(td, epfp, 0, args->maxevents, &k_ops, tsp);
+ if (!error && coargs.error)
+ error = coargs.error;
+
+ /*
+ * kern_keven might return ENOMEM which is not expected from epoll_wait.
+ * Maybe we should translate that but I don't think it matters at all.
+ */
+
+ if (!error)
+ td->td_retval[0] = coargs.count;
+leave:
+ fdrop(epfp, td);
+ return (error);
+}
+
+#if EPOLL_WIDE_USER_DATA
+/*
+ * we store user_data vector in an unused for kqueue descriptor
+ * field fvn_epollpriv in struct file.
+ */
+#define EPOLL_USER_DATA_GET(epfp) \
+ ((struct epoll_user_data*)(epfp)->f_vnun.fvn_epollpriv)
+#define EPOLL_USER_DATA_SET(epfp, udv) \
+ (epfp)->f_vnun.fvn_epollpriv = (udv)
+
+static void
+epoll_init_user_data(struct thread *td, struct file *epfp)
+{
+ struct epoll_user_data *udv;
+
+ /* override file ops to have our close operation */
+ atomic_store_rel_ptr((volatile uintptr_t *)&epfp->f_ops, (uintptr_t)&epollops);
+
+ /* allocate epoll_user_data initially for up to 16 file descriptor values */
+ udv = malloc(EPOLL_USER_DATA_SIZE(EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK);
+ udv->sz = EPOLL_USER_DATA_MARGIN;
+ EPOLL_USER_DATA_SET(epfp, udv);
+}
+
+static void
+epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data)
+{
+ struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp);
+
+ if (fd >= udv->sz) {
+ udv = realloc(udv, EPOLL_USER_DATA_SIZE(fd + EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK);
+ udv->sz = fd + EPOLL_USER_DATA_MARGIN;
+ EPOLL_USER_DATA_SET(epfp, udv);
+ }
+ udv->data[fd] = user_data;
+}
+
+static epoll_udata_t
+epoll_get_user_data(struct thread *td, struct file *epfp, int fd)
+{
+ struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp);
+ if (fd >= udv->sz)
+ panic("epoll: user data vector is too small");
+
+ return (udv->data[fd]);
+}
+
+/*ARGSUSED*/
+static int
+epoll_close(struct file *epfp, struct thread *td)
+{
+ /* free user data vector */
+ free(EPOLL_USER_DATA_GET(epfp), M_LINUX_EPOLL);
+ /* over to kqueue parent */
+ return (kqueue_close(epfp, td));
+}
+#endif
+
+static struct file*
+epoll_fget(struct thread *td, int epfd)
+{
+ struct file *fp;
+ cap_rights_t rights;
+
+ if (fget(td, epfd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp) != 0)
+ panic("epoll: no file object found for kqueue descriptor");
+
+ return (fp);
+}
+
diff --git a/sys/compat/linux/linux_epoll.h b/sys/compat/linux/linux_epoll.h
new file mode 100644
index 0000000..aea4185
--- /dev/null
+++ b/sys/compat/linux/linux_epoll.h
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_EPOLL_H_
+#define _LINUX_EPOLL_H_
+
+#ifdef __amd64__
+#define EPOLL_PACKED __packed
+#else
+#define EPOLL_PACKED
+#endif
+
+struct linux_epoll_event {
+ uint32_t events;
+ uint64_t data;
+} EPOLL_PACKED;
+
+#define LINUX_EPOLLIN 0x001
+#define LINUX_EPOLLPRI 0x002
+#define LINUX_EPOLLOUT 0x004
+#define LINUX_EPOLLRDNORM 0x040
+#define LINUX_EPOLLRDBAND 0x080
+#define LINUX_EPOLLWRNORM 0x100
+#define LINUX_EPOLLWRBAND 0x200
+#define LINUX_EPOLLMSG 0x400
+#define LINUX_EPOLLERR 0x008
+#define LINUX_EPOLLHUP 0x010
+#define LINUX_EPOLLRDHUP 0x2000
+#define LINUX_EPOLLWAKEUP 1u<<29
+#define LINUX_EPOLLONESHOT 1u<<30
+#define LINUX_EPOLLET 1u<<31
+
+#define LINUX_EPOLL_CTL_ADD 1
+#define LINUX_EPOLL_CTL_DEL 2
+#define LINUX_EPOLL_CTL_MOD 3
+
+#define LINUX_EPOLL_CLOEXEC 02000000
+#define LINUX_EPOLL_NONBLOCK 00004000
+
+#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct linux_epoll_event))
+
+#endif /* !_LINUX_EPOLL_H_ */
+
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index c1647d3..babfcab 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -467,6 +467,7 @@ amd64/linux32/linux32_support.s optional compat_linux32 \
dependency "linux32_assym.h"
amd64/linux32/linux32_sysent.c optional compat_linux32
amd64/linux32/linux32_sysvec.c optional compat_linux32
+compat/linux/linux_epoll.c optional compat_linux32
compat/linux/linux_emul.c optional compat_linux32
compat/linux/linux_file.c optional compat_linux32
compat/linux/linux_fork.c optional compat_linux32
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 24dac5f..17791a6 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -80,6 +80,7 @@ hptrr_lib.o optional hptrr \
cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
+compat/linux/linux_epoll.c optional compat_linux
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_fork.c optional compat_linux
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index a8e60b6..ee91501 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -41,6 +41,7 @@ ukbdmap.h optional ukbd_dflt_keymap \
cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
+compat/linux/linux_epoll.c optional compat_linux
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_fork.c optional compat_linux
diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c
index ab77790..f8526e1 100644
--- a/sys/i386/linux/linux_dummy.c
+++ b/sys/i386/linux/linux_dummy.c
@@ -72,9 +72,6 @@ DUMMY(setfsgid);
DUMMY(pivot_root);
DUMMY(mincore);
DUMMY(lookup_dcookie);
-DUMMY(epoll_create);
-DUMMY(epoll_ctl);
-DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(fstatfs64);
DUMMY(mbind);
@@ -120,7 +117,6 @@ DUMMY(timerfd_gettime);
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(eventfd2);
-DUMMY(epoll_create1);
DUMMY(dup3);
DUMMY(inotify_init1);
/* linux 2.6.30: */
diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master
index bb17166..1f260bd 100644
--- a/sys/i386/linux/syscalls.master
+++ b/sys/i386/linux/syscalls.master
@@ -432,9 +432,11 @@
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }
253 AUE_NULL STD { int linux_lookup_dcookie(void); }
-254 AUE_NULL STD { int linux_epoll_create(void); }
-255 AUE_NULL STD { int linux_epoll_ctl(void); }
-256 AUE_NULL STD { int linux_epoll_wait(void); }
+254 AUE_NULL STD { int linux_epoll_create(l_int size); }
+255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
+ struct linux_epoll_event *event); }
+256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \
+ l_int maxevents, l_int timeout); }
257 AUE_NULL STD { int linux_remap_file_pages(void); }
258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
@@ -544,7 +546,7 @@
; linux 2.6.27:
327 AUE_NULL STD { int linux_signalfd4(void); }
328 AUE_NULL STD { int linux_eventfd2(void); }
-329 AUE_NULL STD { int linux_epoll_create1(void); }
+329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
330 AUE_NULL STD { int linux_dup3(void); }
331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
332 AUE_NULL STD { int linux_inotify_init1(void); }
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 85ea78c..f4b6c19 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -107,16 +107,7 @@ static void kqueue_wakeup(struct kqueue *kq);
static struct filterops *kqueue_fo_find(int filt);
static void kqueue_fo_release(int filt);
-static fo_rdwr_t kqueue_read;
-static fo_rdwr_t kqueue_write;
-static fo_truncate_t kqueue_truncate;
-static fo_ioctl_t kqueue_ioctl;
-static fo_poll_t kqueue_poll;
-static fo_kqfilter_t kqueue_kqfilter;
-static fo_stat_t kqueue_stat;
-static fo_close_t kqueue_close;
-
-static struct fileops kqueueops = {
+struct fileops kqueueops = {
.fo_read = kqueue_read,
.fo_write = kqueue_write,
.fo_truncate = kqueue_truncate,
@@ -303,7 +294,7 @@ filt_fileattach(struct knote *kn)
}
/*ARGSUSED*/
-static int
+int
kqueue_kqfilter(struct file *fp, struct knote *kn)
{
struct kqueue *kq = kn->kn_fp->f_data;
@@ -688,34 +679,7 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type)
int
sys_kqueue(struct thread *td, struct kqueue_args *uap)
{
- struct filedesc *fdp;
- struct kqueue *kq;
- struct file *fp;
- int fd, error;
-
- fdp = td->td_proc->p_fd;
- error = falloc(td, &fp, &fd, 0);
- if (error)
- goto done2;
-
- /* An extra reference on `fp' has been held for us by falloc(). */
- kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
- mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
- TAILQ_INIT(&kq->kq_head);
- kq->kq_fdp = fdp;
- knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
- TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
-
- FILEDESC_XLOCK(fdp);
- TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
- FILEDESC_XUNLOCK(fdp);
-
- finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
- fdrop(fp, td);
-
- td->td_retval[0] = fd;
-done2:
- return (error);
+ return (kern_kqueue(td));
}
#ifndef _SYS_SYSPROTO_H_
@@ -817,19 +781,75 @@ kevent_copyin(void *arg, struct kevent *kevp, int count)
}
int
+kern_kqueue(struct thread *td)
+{
+ struct file *fp;
+ int error;
+
+ error = kern_kqueue_locked(td, &fp);
+
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+kern_kqueue_locked(struct thread *td, struct file **fpp)
+{
+ struct filedesc *fdp;
+ struct kqueue *kq;
+ struct file *fp;
+ int fd, error;
+
+ fdp = td->td_proc->p_fd;
+ error = falloc(td, &fp, &fd, 0);
+ if (error)
+ return (error);
+
+ /* An extra reference on `fp' has been held for us by falloc(). */
+ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
+ mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
+ TAILQ_INIT(&kq->kq_head);
+ kq->kq_fdp = fdp;
+ knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
+ TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
+
+ FILEDESC_XLOCK(fdp);
+ TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
+ FILEDESC_XUNLOCK(fdp);
+
+ finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
+
+ td->td_retval[0] = fd;
+ *fpp = fp;
+ return (0);
+}
+
+int
kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
struct kevent_copyops *k_ops, const struct timespec *timeout)
{
+ struct file *fp;
+ cap_rights_t rights;
+ int error;
+
+ if ((error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp)) != 0)
+ return (error);
+
+ error = kern_kevent_locked(td, fp, nchanges, nevents, k_ops, timeout);
+
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+kern_kevent_locked(struct thread *td, struct file *fp, int nchanges, int nevents,
+ struct kevent_copyops *k_ops, const struct timespec *timeout)
+{
struct kevent keva[KQ_NEVENTS];
struct kevent *kevp, *changes;
struct kqueue *kq;
- struct file *fp;
- cap_rights_t rights;
int i, n, nerrors, error;
- error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp);
- if (error != 0)
- return (error);
if ((error = kqueue_acquire(fp, &kq)) != 0)
goto done_norel;
@@ -872,7 +892,6 @@ kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
done:
kqueue_release(kq, 0);
done_norel:
- fdrop(fp, td);
return (error);
}
@@ -1526,7 +1545,7 @@ done_nl:
* This could be expanded to call kqueue_scan, if desired.
*/
/*ARGSUSED*/
-static int
+int
kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
@@ -1534,7 +1553,7 @@ kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
}
/*ARGSUSED*/
-static int
+int
kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
@@ -1542,7 +1561,7 @@ kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
}
/*ARGSUSED*/
-static int
+int
kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
struct thread *td)
{
@@ -1551,7 +1570,7 @@ kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
}
/*ARGSUSED*/
-static int
+int
kqueue_ioctl(struct file *fp, u_long cmd, void *data,
struct ucred *active_cred, struct thread *td)
{
@@ -1599,7 +1618,7 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data,
}
/*ARGSUSED*/
-static int
+int
kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
@@ -1626,7 +1645,7 @@ kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
}
/*ARGSUSED*/
-static int
+int
kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
struct thread *td)
{
@@ -1644,7 +1663,7 @@ kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
}
/*ARGSUSED*/
-static int
+int
kqueue_close(struct file *fp, struct thread *td)
{
struct kqueue *kq = fp->f_data;
diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile
index 7ed6e98..ce46aa8 100644
--- a/sys/modules/linux/Makefile
+++ b/sys/modules/linux/Makefile
@@ -9,7 +9,7 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINUX32
KMOD= linux
SRCS= linux_fork.c linux${SFX}_dummy.c linux_emul.c linux_file.c \
- linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
+ linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c linux_epoll.c \
linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \
linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \
linux${SFX}_sysvec.c linux_uid16.c linux_util.c linux_time.c \
diff --git a/sys/sys/event.h b/sys/sys/event.h
index 03bd7b9..60bced7 100644
--- a/sys/sys/event.h
+++ b/sys/sys/event.h
@@ -236,6 +236,9 @@ struct proc;
struct knlist;
struct mtx;
struct rwlock;
+struct uio;
+struct stat;
+struct ucred;
extern void knote(struct knlist *list, long hint, int lockflags);
extern void knote_fork(struct knlist *list, int pid);
@@ -261,6 +264,21 @@ extern int kqfd_register(int fd, struct kevent *kev, struct thread *p,
extern int kqueue_add_filteropts(int filt, struct filterops *filtops);
extern int kqueue_del_filteropts(int filt);
+int kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td);
+int kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td);
+int kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td);
+int kqueue_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td);
+int kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td);
+int kqueue_kqfilter(struct file *fp, struct knote *kn);
+int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
+ struct thread *td);
+int kqueue_close(struct file *fp, struct thread *td);
+
#else /* !_KERNEL */
#include <sys/cdefs.h>
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 7b373f0..b4c1ad4 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -169,6 +169,8 @@ struct file {
union {
struct cdev_privdata *fvn_cdevpriv;
/* (d) Private data for the cdev. */
+ void *fvn_epollpriv;
+ /* (d) Private data for the epoll. */
struct fadvise_info *fvn_advice;
} f_vnun;
/*
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 17f2b97..92dd8be 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -121,8 +121,13 @@ int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
int kern_jail(struct thread *td, struct jail *j);
int kern_jail_get(struct thread *td, struct uio *options, int flags);
int kern_jail_set(struct thread *td, struct uio *options, int flags);
+int kern_kqueue(struct thread *td);
+int kern_kqueue_locked(struct thread *td, struct file **fpp);
int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
struct kevent_copyops *k_ops, const struct timespec *timeout);
+int kern_kevent_locked(struct thread *td, struct file *fp, int nchanges,
+ int nevents,
+ struct kevent_copyops *k_ops, const struct timespec *timeout);
int kern_kldload(struct thread *td, const char *file, int *fileid);
int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
int kern_kldunload(struct thread *td, int fileid, int flags);
@@ -248,6 +253,8 @@ int kern_utimes(struct thread *td, char *path, enum uio_seg pathseg,
struct timeval *tptr, enum uio_seg tptrseg);
int kern_utimesat(struct thread *td, int fd, char *path,
enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
+int kern_utimensat(struct thread *td, int fd, char *path,
+ enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg);
int kern_wait(struct thread *td, pid_t pid, int *status, int options,
struct rusage *rup);
int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,
OpenPOWER on IntegriCloud