summaryrefslogtreecommitdiffstats
path: root/sys/compat
diff options
context:
space:
mode:
authorRenato Botelho <renato@netgate.com>2016-01-13 17:56:30 -0200
committerRenato Botelho <renato@netgate.com>2016-01-13 17:56:30 -0200
commit3e0bf52f358eb969d165c4b1e54942ee94cf2c8d (patch)
tree440bb9907871a5bc578d65b32f0c4aa339096175 /sys/compat
parent4b4ac714f11471e43f18410bcc86da8f9dc3b88c (diff)
parente357bdb742b2696dcb81404917b6247f9e840232 (diff)
downloadFreeBSD-src-3e0bf52f358eb969d165c4b1e54942ee94cf2c8d.zip
FreeBSD-src-3e0bf52f358eb969d165c4b1e54942ee94cf2c8d.tar.gz
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/compat')
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c43
-rw-r--r--sys/compat/freebsd32/freebsd32_proto.h16
-rw-r--r--sys/compat/freebsd32/freebsd32_syscall.h6
-rw-r--r--sys/compat/freebsd32/freebsd32_syscalls.c4
-rw-r--r--sys/compat/freebsd32/freebsd32_sysent.c4
-rw-r--r--sys/compat/freebsd32/freebsd32_systrace_args.c60
-rw-r--r--sys/compat/freebsd32/syscalls.master5
-rw-r--r--sys/compat/ia32/ia32_sysvec.c1
-rw-r--r--sys/compat/linprocfs/linprocfs.c167
-rw-r--r--sys/compat/linsysfs/linsysfs.c10
-rw-r--r--sys/compat/linux/check_error.d4
-rw-r--r--sys/compat/linux/check_internal_locks.d35
-rw-r--r--sys/compat/linux/linux.c205
-rw-r--r--sys/compat/linux/linux.h95
-rw-r--r--sys/compat/linux/linux_common.c93
-rw-r--r--sys/compat/linux/linux_emul.c513
-rw-r--r--sys/compat/linux/linux_emul.h92
-rw-r--r--sys/compat/linux/linux_event.c882
-rw-r--r--sys/compat/linux/linux_event.h60
-rw-r--r--sys/compat/linux/linux_file.c75
-rw-r--r--sys/compat/linux/linux_file.h71
-rw-r--r--sys/compat/linux/linux_fork.c323
-rw-r--r--sys/compat/linux/linux_futex.c218
-rw-r--r--sys/compat/linux/linux_futex.h3
-rw-r--r--sys/compat/linux/linux_getcwd.c4
-rw-r--r--sys/compat/linux/linux_ioctl.c59
-rw-r--r--sys/compat/linux/linux_ioctl.h13
-rw-r--r--sys/compat/linux/linux_ipc.c10
-rw-r--r--sys/compat/linux/linux_ipc.h4
-rw-r--r--sys/compat/linux/linux_mib.c367
-rw-r--r--sys/compat/linux/linux_mib.h19
-rw-r--r--sys/compat/linux/linux_misc.c816
-rw-r--r--sys/compat/linux/linux_misc.h42
-rw-r--r--sys/compat/linux/linux_signal.c348
-rw-r--r--sys/compat/linux/linux_signal.h26
-rw-r--r--sys/compat/linux/linux_socket.c524
-rw-r--r--sys/compat/linux/linux_socket.h155
-rw-r--r--sys/compat/linux/linux_stats.c107
-rw-r--r--sys/compat/linux/linux_sysctl.c8
-rw-r--r--sys/compat/linux/linux_time.c277
-rw-r--r--sys/compat/linux/linux_timer.c27
-rw-r--r--sys/compat/linux/linux_timer.h23
-rw-r--r--sys/compat/linux/linux_uid16.c10
-rw-r--r--sys/compat/linux/linux_util.c115
-rw-r--r--sys/compat/linux/linux_util.h6
-rw-r--r--sys/compat/linux/linux_vdso.c244
-rw-r--r--sys/compat/linux/linux_vdso.h65
-rw-r--r--sys/compat/linux/stats_timing.d1
-rw-r--r--sys/compat/svr4/svr4_misc.c16
-rw-r--r--sys/compat/svr4/svr4_sysvec.c1
50 files changed, 4443 insertions, 1829 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index afcef0d..2f60c13 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -1429,6 +1429,49 @@ freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap)
}
int
+freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap)
+{
+ struct timespec32 ts32[2];
+ struct timespec ts[2], *tsp;
+ int error;
+
+ if (uap->times != NULL) {
+ error = copyin(uap->times, ts32, sizeof(ts32));
+ if (error)
+ return (error);
+ CP(ts32[0], ts[0], tv_sec);
+ CP(ts32[0], ts[0], tv_nsec);
+ CP(ts32[1], ts[1], tv_sec);
+ CP(ts32[1], ts[1], tv_nsec);
+ tsp = ts;
+ } else
+ tsp = NULL;
+ return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE));
+}
+
+int
+freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap)
+{
+ struct timespec32 ts32[2];
+ struct timespec ts[2], *tsp;
+ int error;
+
+ if (uap->times != NULL) {
+ error = copyin(uap->times, ts32, sizeof(ts32));
+ if (error)
+ return (error);
+ CP(ts32[0], ts[0], tv_sec);
+ CP(ts32[0], ts[0], tv_nsec);
+ CP(ts32[1], ts[1], tv_sec);
+ CP(ts32[1], ts[1], tv_nsec);
+ tsp = ts;
+ } else
+ tsp = NULL;
+ return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
+ tsp, UIO_SYSSPACE, uap->flag));
+}
+
+int
freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap)
{
struct timeval32 tv32;
diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h
index c4a1e30..16dc17e 100644
--- a/sys/compat/freebsd32/freebsd32_proto.h
+++ b/sys/compat/freebsd32/freebsd32_proto.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin
+ * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin
*/
#ifndef _FREEBSD32_SYSPROTO_H_
@@ -699,6 +699,16 @@ struct freebsd32_ppoll_args {
char ts_l_[PADL_(const struct timespec32 *)]; const struct timespec32 * ts; char ts_r_[PADR_(const struct timespec32 *)];
char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)];
};
+struct freebsd32_futimens_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)];
+};
+struct freebsd32_utimensat_args {
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
+ char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)];
+ char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)];
+ char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)];
+};
#if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__))
#define PAD64_REQUIRED
#endif
@@ -833,6 +843,8 @@ int freebsd32_procctl(struct thread *, struct freebsd32_procctl_args *);
int freebsd32_procctl(struct thread *, struct freebsd32_procctl_args *);
#endif
int freebsd32_ppoll(struct thread *, struct freebsd32_ppoll_args *);
+int freebsd32_futimens(struct thread *, struct freebsd32_futimens_args *);
+int freebsd32_utimensat(struct thread *, struct freebsd32_utimensat_args *);
#ifdef COMPAT_43
@@ -1250,6 +1262,8 @@ int freebsd7_freebsd32_shmctl(struct thread *, struct freebsd7_freebsd32_shmctl_
#define FREEBSD32_SYS_AUE_freebsd32_procctl AUE_NULL
#define FREEBSD32_SYS_AUE_freebsd32_procctl AUE_NULL
#define FREEBSD32_SYS_AUE_freebsd32_ppoll AUE_POLL
+#define FREEBSD32_SYS_AUE_freebsd32_futimens AUE_FUTIMES
+#define FREEBSD32_SYS_AUE_freebsd32_utimensat AUE_FUTIMESAT
#undef PAD_
#undef PADL_
diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h
index 92af89b..cf4e492 100644
--- a/sys/compat/freebsd32/freebsd32_syscall.h
+++ b/sys/compat/freebsd32/freebsd32_syscall.h
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin
+ * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin
*/
#define FREEBSD32_SYS_syscall 0
@@ -455,4 +455,6 @@
#define FREEBSD32_SYS_freebsd32_procctl 544
#define FREEBSD32_SYS_freebsd32_procctl 544
#define FREEBSD32_SYS_freebsd32_ppoll 545
-#define FREEBSD32_SYS_MAXSYSCALL 546
+#define FREEBSD32_SYS_freebsd32_futimens 546
+#define FREEBSD32_SYS_freebsd32_utimensat 547
+#define FREEBSD32_SYS_MAXSYSCALL 548
diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c
index 01a1201..5b442c3 100644
--- a/sys/compat/freebsd32/freebsd32_syscalls.c
+++ b/sys/compat/freebsd32/freebsd32_syscalls.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin
+ * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin
*/
const char *freebsd32_syscallnames[] = {
@@ -579,4 +579,6 @@ const char *freebsd32_syscallnames[] = {
"freebsd32_procctl", /* 544 = freebsd32_procctl */
#endif
"freebsd32_ppoll", /* 545 = freebsd32_ppoll */
+ "freebsd32_futimens", /* 546 = freebsd32_futimens */
+ "freebsd32_utimensat", /* 547 = freebsd32_utimensat */
};
diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c
index 3b0b3fe..f3321a0 100644
--- a/sys/compat/freebsd32/freebsd32_sysent.c
+++ b/sys/compat/freebsd32/freebsd32_sysent.c
@@ -3,7 +3,7 @@
*
* DO NOT EDIT-- this file is automatically generated.
* $FreeBSD$
- * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin
+ * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin
*/
#include "opt_compat.h"
@@ -616,4 +616,6 @@ struct sysent freebsd32_sysent[] = {
{ AS(freebsd32_procctl_args), (sy_call_t *)freebsd32_procctl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 544 = freebsd32_procctl */
#endif
{ AS(freebsd32_ppoll_args), (sy_call_t *)freebsd32_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = freebsd32_ppoll */
+ { AS(freebsd32_futimens_args), (sy_call_t *)freebsd32_futimens, AUE_FUTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 546 = freebsd32_futimens */
+ { AS(freebsd32_utimensat_args), (sy_call_t *)freebsd32_utimensat, AUE_FUTIMESAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 547 = freebsd32_utimensat */
};
diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c
index 6d9ab1c..87cf3b3 100644
--- a/sys/compat/freebsd32/freebsd32_systrace_args.c
+++ b/sys/compat/freebsd32/freebsd32_systrace_args.c
@@ -3323,6 +3323,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args)
*n_args = 4;
break;
}
+ /* freebsd32_futimens */
+ case 546: {
+ struct freebsd32_futimens_args *p = params;
+ iarg[0] = p->fd; /* int */
+ uarg[1] = (intptr_t) p->times; /* struct timespec * */
+ *n_args = 2;
+ break;
+ }
+ /* freebsd32_utimensat */
+ case 547: {
+ struct freebsd32_utimensat_args *p = params;
+ iarg[0] = p->fd; /* int */
+ uarg[1] = (intptr_t) p->path; /* char * */
+ uarg[2] = (intptr_t) p->times; /* struct timespec * */
+ iarg[3] = p->flag; /* int */
+ *n_args = 4;
+ break;
+ }
default:
*n_args = 0;
break;
@@ -8907,6 +8925,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
break;
};
break;
+ /* freebsd32_futimens */
+ case 546:
+ switch(ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "struct timespec *";
+ break;
+ default:
+ break;
+ };
+ break;
+ /* freebsd32_utimensat */
+ case 547:
+ switch(ndx) {
+ case 0:
+ p = "int";
+ break;
+ case 1:
+ p = "char *";
+ break;
+ case 2:
+ p = "struct timespec *";
+ break;
+ case 3:
+ p = "int";
+ break;
+ default:
+ break;
+ };
+ break;
default:
break;
};
@@ -10795,6 +10845,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz)
if (ndx == 0 || ndx == 1)
p = "int";
break;
+ /* freebsd32_futimens */
+ case 546:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
+ /* freebsd32_utimensat */
+ case 547:
+ if (ndx == 0 || ndx == 1)
+ p = "int";
+ break;
default:
break;
};
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index 48c2f3e..6c167bf 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -1069,3 +1069,8 @@
545 AUE_POLL STD { int freebsd32_ppoll(struct pollfd *fds, \
u_int nfds, const struct timespec32 *ts, \
const sigset_t *set); }
+546 AUE_FUTIMES STD { int freebsd32_futimens(int fd, \
+ struct timespec *times); }
+547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \
+ char *path, \
+ struct timespec *times, int flag); }
diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c
index bfc17d6..206935a 100644
--- a/sys/compat/ia32/ia32_sysvec.c
+++ b/sys/compat/ia32/ia32_sysvec.c
@@ -139,6 +139,7 @@ struct sysentvec ia32_freebsd_sysvec = {
.sv_shared_page_base = FREEBSD32_SHAREDPAGE,
.sv_shared_page_len = PAGE_SIZE,
.sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
};
INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 4aae77e..6e591e9 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -39,13 +39,12 @@
* @(#)procfs_status.c 8.4 (Berkeley) 6/15/94
*/
-#include "opt_compat.h"
-
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/queue.h>
+#include <sys/systm.h>
#include <sys/blist.h>
#include <sys/conf.h>
#include <sys/exec.h>
@@ -53,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/filedesc.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
+#include <sys/sysent.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/tty.h>
@@ -78,7 +79,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <net/if.h>
-#include <net/vnet.h>
+#include <net/if_types.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -98,11 +99,7 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#endif /* __i386__ || __amd64__ */
-#ifdef COMPAT_FREEBSD32
-#include <compat/freebsd32/freebsd32_util.h>
-#endif
-
-#include <compat/linux/linux_ioctl.h>
+#include <compat/linux/linux.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_misc.h>
#include <compat/linux/linux_util.h>
@@ -734,6 +731,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
segsz_t lsize;
struct thread *td2;
struct sigacts *ps;
+ l_sigset_t siglist, sigignore, sigcatch;
int i;
PROC_LOCK(p);
@@ -822,29 +820,25 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
/*
* Signal masks
- *
- * We support up to 128 signals, while Linux supports 32,
- * but we only define 32 (the same 32 as Linux, to boot), so
- * just show the lower 32 bits of each mask. XXX hack.
- *
- * NB: on certain platforms (Sparc at least) Linux actually
- * supports 64 signals, but this code is a long way from
- * running on anything but i386, so ignore that for now.
*/
PROC_LOCK(p);
- sbuf_printf(sb, "SigPnd:\t%08x\n", p->p_siglist.__bits[0]);
- /*
- * I can't seem to find out where the signal mask is in
- * relation to struct proc, so SigBlk is left unimplemented.
- */
- sbuf_printf(sb, "SigBlk:\t%08x\n", 0); /* XXX */
+ bsd_to_linux_sigset(&p->p_siglist, &siglist);
ps = p->p_sigacts;
mtx_lock(&ps->ps_mtx);
- sbuf_printf(sb, "SigIgn:\t%08x\n", ps->ps_sigignore.__bits[0]);
- sbuf_printf(sb, "SigCgt:\t%08x\n", ps->ps_sigcatch.__bits[0]);
+ bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
+ bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
mtx_unlock(&ps->ps_mtx);
PROC_UNLOCK(p);
+ sbuf_printf(sb, "SigPnd:\t%016jx\n", siglist.__mask);
+ /*
+ * XXX. SigBlk - target thread's signal mask, td_sigmask.
+ * To implement SigBlk pseudofs should support proc/tid dir entries.
+ */
+ sbuf_printf(sb, "SigBlk:\t%016x\n", 0);
+ sbuf_printf(sb, "SigIgn:\t%016jx\n", sigignore.__mask);
+ sbuf_printf(sb, "SigCgt:\t%016jx\n", sigcatch.__mask);
+
/*
* Linux also prints the capability masks, but we don't have
* capabilities yet, and when we do get them they're likely to
@@ -937,34 +931,22 @@ linprocfs_doproccmdline(PFS_FILL_ARGS)
static int
linprocfs_doprocenviron(PFS_FILL_ARGS)
{
- int ret;
-
- PROC_LOCK(p);
- if ((ret = p_candebug(td, p)) != 0) {
- PROC_UNLOCK(p);
- return (ret);
- }
/*
* Mimic linux behavior and pass only processes with usermode
* address space as valid. Return zero silently otherwize.
*/
- if (p->p_vmspace == &vmspace0) {
- PROC_UNLOCK(p);
+ if (p->p_vmspace == &vmspace0)
return (0);
- }
- if ((p->p_flag & P_SYSTEM) != 0) {
- PROC_UNLOCK(p);
- return (0);
- }
-
- PROC_UNLOCK(p);
-
- ret = proc_getenvv(td, p, sb);
- return (ret);
+ return (proc_getenvv(td, p, sb));
}
+static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
+static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
+static char vdso_str[] = " [vdso]";
+static char stack_str[] = " [stack]";
+
/*
* Filler function for proc/pid/maps
*/
@@ -980,6 +962,7 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
vm_prot_t e_prot;
unsigned int last_timestamp;
char *name = "", *freename = NULL;
+ const char *l_map_str;
ino_t ino;
int ref_count, shadow_count, flags;
int error;
@@ -999,6 +982,11 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
vm = vmspace_acquire_ref(p);
if (vm == NULL)
return (ESRCH);
+
+ if (SV_CURPROC_FLAG(SV_LP64))
+ l_map_str = l64_map_str;
+ else
+ l_map_str = l32_map_str;
map = &vm->vm_map;
vm_map_lock_read(map);
for (entry = map->header.next; entry != &map->header;
@@ -1037,6 +1025,11 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
VOP_GETATTR(vp, &vat, td->td_ucred);
ino = vat.va_fileid;
vput(vp);
+ } else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
+ if (e_start == p->p_sysent->sv_shared_page_base)
+ name = vdso_str;
+ if (e_end == p->p_sysent->sv_usrstack)
+ name = stack_str;
}
} else {
flags = 0;
@@ -1048,8 +1041,7 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
* format:
* start, end, access, offset, major, minor, inode, name.
*/
- error = sbuf_printf(sb,
- "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n",
+ error = sbuf_printf(sb, l_map_str,
(u_long)e_start, (u_long)e_end,
(e_prot & VM_PROT_READ)?"r":"-",
(e_prot & VM_PROT_WRITE)?"w":"-",
@@ -1086,6 +1078,35 @@ linprocfs_doprocmaps(PFS_FILL_ARGS)
}
/*
+ * Criteria for interface name translation
+ */
+#define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER)
+
+static int
+linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen)
+{
+ struct ifnet *ifscan;
+ int ethno;
+
+ IFNET_RLOCK_ASSERT();
+
+ /* Short-circuit non ethernet interfaces */
+ if (!IFP_IS_ETH(ifp))
+ return (strlcpy(buffer, ifp->if_xname, buflen));
+
+ /* Determine the (relative) unit number for ethernet interfaces */
+ ethno = 0;
+ TAILQ_FOREACH(ifscan, &V_ifnet, if_link) {
+ if (ifscan == ifp)
+ return (snprintf(buffer, buflen, "eth%d", ethno));
+ if (IFP_IS_ETH(ifscan))
+ ethno++;
+ }
+
+ return (0);
+}
+
+/*
* Filler function for proc/net/dev
*/
static int
@@ -1232,8 +1253,6 @@ linprocfs_doscsiscsi(PFS_FILL_ARGS)
return (0);
}
-extern struct cdevsw *cdevsw[];
-
/*
* Filler function for proc/devices
*/
@@ -1328,6 +1347,52 @@ linprocfs_douuid(PFS_FILL_ARGS)
return(0);
}
+/*
+ * Filler function for proc/pid/auxv
+ */
+static int
+linprocfs_doauxv(PFS_FILL_ARGS)
+{
+ struct sbuf *asb;
+ off_t buflen, resid;
+ int error;
+
+ /*
+ * Mimic linux behavior and pass only processes with usermode
+ * address space as valid. Return zero silently otherwise.
+ */
+ if (p->p_vmspace == &vmspace0)
+ return (0);
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0 || uio->uio_resid < 0)
+ return (EINVAL);
+
+ asb = sbuf_new_auto();
+ if (asb == NULL)
+ return (ENOMEM);
+ error = proc_getauxv(td, p, asb);
+ if (error == 0)
+ error = sbuf_finish(asb);
+
+ resid = sbuf_len(asb) - uio->uio_offset;
+ if (resid > uio->uio_resid)
+ buflen = uio->uio_resid;
+ else
+ buflen = resid;
+ if (buflen > IOSIZE_MAX)
+ return (EINVAL);
+ if (buflen > MAXPHYS)
+ buflen = MAXPHYS;
+ if (resid <= 0)
+ return (0);
+
+ if (error == 0)
+ error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
+ sbuf_delete(asb);
+ return (error);
+}
/*
* Constructor
@@ -1386,7 +1451,7 @@ linprocfs_init(PFS_INIT_ARGS)
pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
NULL, NULL, NULL, 0);
pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
- NULL, NULL, NULL, PFS_RD);
+ NULL, &procfs_candebug, NULL, PFS_RD);
pfs_create_link(dir, "exe", &procfs_doprocfile,
NULL, &procfs_notsystem, NULL, 0);
pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
@@ -1403,6 +1468,8 @@ linprocfs_init(PFS_INIT_ARGS)
NULL, NULL, NULL, PFS_RD);
pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
NULL, NULL, NULL, 0);
+ pfs_create_file(dir, "auxv", &linprocfs_doauxv,
+ NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
/* /proc/scsi/... */
dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
@@ -1448,7 +1515,11 @@ linprocfs_uninit(PFS_INIT_ARGS)
}
PSEUDOFS(linprocfs, 1, 0);
+#if defined(__amd64__)
+MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
+#else
MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
+#endif
MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
diff --git a/sys/compat/linsysfs/linsysfs.c b/sys/compat/linsysfs/linsysfs.c
index 45f44af..8b5f9b5 100644
--- a/sys/compat/linsysfs/linsysfs.c
+++ b/sys/compat/linsysfs/linsysfs.c
@@ -61,12 +61,6 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
-#include "opt_compat.h"
-#ifdef COMPAT_LINUX32 /* XXX */
-#include <machine/../linux32/linux.h>
-#else
-#include <machine/../linux/linux.h>
-#endif
#include <compat/linux/linux_ioctl.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_util.h>
@@ -281,4 +275,8 @@ linsysfs_uninit(PFS_INIT_ARGS)
}
PSEUDOFS(linsysfs, 1, 0);
+#if defined(__amd64__)
+MODULE_DEPEND(linsysfs, linux_common, 1, 1, 1);
+#else
MODULE_DEPEND(linsysfs, linux, 1, 1, 1);
+#endif
diff --git a/sys/compat/linux/check_error.d b/sys/compat/linux/check_error.d
index 9e3c00a..389e768 100644
--- a/sys/compat/linux/check_error.d
+++ b/sys/compat/linux/check_error.d
@@ -36,8 +36,8 @@
*/
linuxulator*:dummy::not_implemented,
-linuxulator*:emul:proc_exit:child_clear_tid_error,
-linuxulator*:emul:proc_exit:futex_failed,
+linuxulator*:emul:linux_thread_detach:child_clear_tid_error,
+linuxulator*:emul:linux_thread_detach:futex_failed,
linuxulator*:emul:linux_schedtail:copyout_error,
linuxulator*:futex:futex_get:error,
linuxulator*:futex:futex_sleep:requeue_error,
diff --git a/sys/compat/linux/check_internal_locks.d b/sys/compat/linux/check_internal_locks.d
index 2bdef68..b9d7c61 100644
--- a/sys/compat/linux/check_internal_locks.d
+++ b/sys/compat/linux/check_internal_locks.d
@@ -41,14 +41,9 @@
BEGIN
{
- check["emul_lock"] = 0;
- check["emul_shared_rlock"] = 0;
- check["emul_shared_wlock"] = 0;
check["futex_mtx"] = 0;
}
-linuxulator*:locks:emul_lock:locked,
-linuxulator*:locks:emul_shared_wlock:locked,
linuxulator*:locks:futex_mtx:locked
/check[probefunc] > 0/
{
@@ -57,9 +52,6 @@ linuxulator*:locks:futex_mtx:locked
stack();
}
-linuxulator*:locks:emul_lock:locked,
-linuxulator*:locks:emul_shared_rlock:locked,
-linuxulator*:locks:emul_shared_wlock:locked,
linuxulator*:locks:futex_mtx:locked
{
++check[probefunc];
@@ -69,9 +61,6 @@ linuxulator*:locks:futex_mtx:locked
spec[probefunc] = speculation();
}
-linuxulator*:locks:emul_lock:unlock,
-linuxulator*:locks:emul_shared_rlock:unlock,
-linuxulator*:locks:emul_shared_wlock:unlock,
linuxulator*:locks:futex_mtx:unlock
/check[probefunc] == 0/
{
@@ -82,9 +71,6 @@ linuxulator*:locks:futex_mtx:unlock
stack();
}
-linuxulator*:locks:emul_lock:unlock,
-linuxulator*:locks:emul_shared_rlock:unlock,
-linuxulator*:locks:emul_shared_wlock:unlock,
linuxulator*:locks:futex_mtx:unlock
{
discard(spec[probefunc]);
@@ -95,27 +81,6 @@ linuxulator*:locks:futex_mtx:unlock
/* Timeout handling */
tick-10s
-/spec["emul_lock"] != 0 && timestamp - ts["emul_lock"] >= 9999999000/
-{
- commit(spec["emul_lock"]);
- spec["emul_lock"] = 0;
-}
-
-tick-10s
-/spec["emul_shared_wlock"] != 0 && timestamp - ts["emul_shared_wlock"] >= 9999999000/
-{
- commit(spec["emul_shared_wlock"]);
- spec["emul_shared_wlock"] = 0;
-}
-
-tick-10s
-/spec["emul_shared_rlock"] != 0 && timestamp - ts["emul_shared_rlock"] >= 9999999000/
-{
- commit(spec["emul_shared_rlock"]);
- spec["emul_shared_rlock"] = 0;
-}
-
-tick-10s
/spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/
{
commit(spec["futex_mtx"]);
diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c
new file mode 100644
index 0000000..d1d7877
--- /dev/null
+++ b/sys/compat/linux/linux.c
@@ -0,0 +1,205 @@
+/*-
+ * Copyright (c) 2015 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/signalvar.h>
+
+#include <compat/linux/linux.h>
+
+
+static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
+ LINUX_SIGHUP, /* SIGHUP */
+ LINUX_SIGINT, /* SIGINT */
+ LINUX_SIGQUIT, /* SIGQUIT */
+ LINUX_SIGILL, /* SIGILL */
+ LINUX_SIGTRAP, /* SIGTRAP */
+ LINUX_SIGABRT, /* SIGABRT */
+ 0, /* SIGEMT */
+ LINUX_SIGFPE, /* SIGFPE */
+ LINUX_SIGKILL, /* SIGKILL */
+ LINUX_SIGBUS, /* SIGBUS */
+ LINUX_SIGSEGV, /* SIGSEGV */
+ LINUX_SIGSYS, /* SIGSYS */
+ LINUX_SIGPIPE, /* SIGPIPE */
+ LINUX_SIGALRM, /* SIGALRM */
+ LINUX_SIGTERM, /* SIGTERM */
+ LINUX_SIGURG, /* SIGURG */
+ LINUX_SIGSTOP, /* SIGSTOP */
+ LINUX_SIGTSTP, /* SIGTSTP */
+ LINUX_SIGCONT, /* SIGCONT */
+ LINUX_SIGCHLD, /* SIGCHLD */
+ LINUX_SIGTTIN, /* SIGTTIN */
+ LINUX_SIGTTOU, /* SIGTTOU */
+ LINUX_SIGIO, /* SIGIO */
+ LINUX_SIGXCPU, /* SIGXCPU */
+ LINUX_SIGXFSZ, /* SIGXFSZ */
+ LINUX_SIGVTALRM,/* SIGVTALRM */
+ LINUX_SIGPROF, /* SIGPROF */
+ LINUX_SIGWINCH, /* SIGWINCH */
+ 0, /* SIGINFO */
+ LINUX_SIGUSR1, /* SIGUSR1 */
+ LINUX_SIGUSR2 /* SIGUSR2 */
+};
+
+static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
+ SIGHUP, /* LINUX_SIGHUP */
+ SIGINT, /* LINUX_SIGINT */
+ SIGQUIT, /* LINUX_SIGQUIT */
+ SIGILL, /* LINUX_SIGILL */
+ SIGTRAP, /* LINUX_SIGTRAP */
+ SIGABRT, /* LINUX_SIGABRT */
+ SIGBUS, /* LINUX_SIGBUS */
+ SIGFPE, /* LINUX_SIGFPE */
+ SIGKILL, /* LINUX_SIGKILL */
+ SIGUSR1, /* LINUX_SIGUSR1 */
+ SIGSEGV, /* LINUX_SIGSEGV */
+ SIGUSR2, /* LINUX_SIGUSR2 */
+ SIGPIPE, /* LINUX_SIGPIPE */
+ SIGALRM, /* LINUX_SIGALRM */
+ SIGTERM, /* LINUX_SIGTERM */
+ SIGBUS, /* LINUX_SIGSTKFLT */
+ SIGCHLD, /* LINUX_SIGCHLD */
+ SIGCONT, /* LINUX_SIGCONT */
+ SIGSTOP, /* LINUX_SIGSTOP */
+ SIGTSTP, /* LINUX_SIGTSTP */
+ SIGTTIN, /* LINUX_SIGTTIN */
+ SIGTTOU, /* LINUX_SIGTTOU */
+ SIGURG, /* LINUX_SIGURG */
+ SIGXCPU, /* LINUX_SIGXCPU */
+ SIGXFSZ, /* LINUX_SIGXFSZ */
+ SIGVTALRM, /* LINUX_SIGVTALARM */
+ SIGPROF, /* LINUX_SIGPROF */
+ SIGWINCH, /* LINUX_SIGWINCH */
+ SIGIO, /* LINUX_SIGIO */
+ /*
+ * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
+ * to the first unused FreeBSD signal number. Since Linux supports
+ * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
+ */
+ SIGRTMIN, /* LINUX_SIGPWR */
+ SIGSYS /* LINUX_SIGSYS */
+};
+
+/*
+ * Map Linux RT signals to the FreeBSD RT signals.
+ */
+static inline int
+linux_to_bsd_rt_signal(int sig)
+{
+
+ return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN);
+}
+
+static inline int
+bsd_to_linux_rt_signal(int sig)
+{
+
+ return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN);
+}
+
+int
+linux_to_bsd_signal(int sig)
+{
+
+ KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("Invalid Linux signal\n"));
+
+ if (sig < LINUX_SIGRTMIN)
+ return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
+
+ return (linux_to_bsd_rt_signal(sig));
+}
+
+int
+bsd_to_linux_signal(int sig)
+{
+
+ if (sig <= LINUX_SIGTBLSZ)
+ return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
+ if (sig == SIGRTMIN)
+ return (LINUX_SIGPWR);
+
+ return (bsd_to_linux_rt_signal(sig));
+}
+
+int
+linux_to_bsd_sigaltstack(int lsa)
+{
+ int bsa = 0;
+
+ if (lsa & LINUX_SS_DISABLE)
+ bsa |= SS_DISABLE;
+ /*
+ * Linux ignores SS_ONSTACK flag for ss
+ * parameter while FreeBSD prohibits it.
+ */
+ return (bsa);
+}
+
+int
+bsd_to_linux_sigaltstack(int bsa)
+{
+ int lsa = 0;
+
+ if (bsa & SS_DISABLE)
+ lsa |= LINUX_SS_DISABLE;
+ if (bsa & SS_ONSTACK)
+ lsa |= LINUX_SS_ONSTACK;
+ return (lsa);
+}
+
+void
+linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
+{
+ int b, l;
+
+ SIGEMPTYSET(*bss);
+ for (l = 1; l <= LINUX_SIGRTMAX; l++) {
+ if (LINUX_SIGISMEMBER(*lss, l)) {
+ b = linux_to_bsd_signal(l);
+ if (b)
+ SIGADDSET(*bss, b);
+ }
+ }
+}
+
+void
+bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
+{
+ int b, l;
+
+ LINUX_SIGEMPTYSET(*lss);
+ for (b = 1; b <= SIGRTMAX; b++) {
+ if (SIGISMEMBER(*bss, b)) {
+ l = bsd_to_linux_signal(b);
+ if (l)
+ LINUX_SIGADDSET(*lss, l);
+ }
+ }
+}
diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h
new file mode 100644
index 0000000..974440f
--- /dev/null
+++ b/sys/compat/linux/linux.h
@@ -0,0 +1,95 @@
+/*-
+ * Copyright (c) 2015 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_MI_H_
+#define _LINUX_MI_H_
+
+/* sigaltstack */
+#define LINUX_SS_ONSTACK 1
+#define LINUX_SS_DISABLE 2
+
+int linux_to_bsd_sigaltstack(int lsa);
+int bsd_to_linux_sigaltstack(int bsa);
+
+/* sigset */
+typedef struct {
+ uint64_t __mask;
+} l_sigset_t;
+
+/* primitives to manipulate sigset_t */
+#define LINUX_SIGEMPTYSET(set) (set).__mask = 0
+#define LINUX_SIGISMEMBER(set, sig) (1UL & ((set).__mask >> _SIG_IDX(sig)))
+#define LINUX_SIGADDSET(set, sig) (set).__mask |= 1UL << _SIG_IDX(sig)
+
+void linux_to_bsd_sigset(l_sigset_t *, sigset_t *);
+void bsd_to_linux_sigset(sigset_t *, l_sigset_t *);
+
+/* signaling */
+#define LINUX_SIGHUP 1
+#define LINUX_SIGINT 2
+#define LINUX_SIGQUIT 3
+#define LINUX_SIGILL 4
+#define LINUX_SIGTRAP 5
+#define LINUX_SIGABRT 6
+#define LINUX_SIGIOT LINUX_SIGABRT
+#define LINUX_SIGBUS 7
+#define LINUX_SIGFPE 8
+#define LINUX_SIGKILL 9
+#define LINUX_SIGUSR1 10
+#define LINUX_SIGSEGV 11
+#define LINUX_SIGUSR2 12
+#define LINUX_SIGPIPE 13
+#define LINUX_SIGALRM 14
+#define LINUX_SIGTERM 15
+#define LINUX_SIGSTKFLT 16
+#define LINUX_SIGCHLD 17
+#define LINUX_SIGCONT 18
+#define LINUX_SIGSTOP 19
+#define LINUX_SIGTSTP 20
+#define LINUX_SIGTTIN 21
+#define LINUX_SIGTTOU 22
+#define LINUX_SIGURG 23
+#define LINUX_SIGXCPU 24
+#define LINUX_SIGXFSZ 25
+#define LINUX_SIGVTALRM 26
+#define LINUX_SIGPROF 27
+#define LINUX_SIGWINCH 28
+#define LINUX_SIGIO 29
+#define LINUX_SIGPOLL LINUX_SIGIO
+#define LINUX_SIGPWR 30
+#define LINUX_SIGSYS 31
+#define LINUX_SIGTBLSZ 31
+#define LINUX_SIGRTMIN 32
+#define LINUX_SIGRTMAX 64
+
+#define LINUX_SIG_VALID(sig) ((sig) <= LINUX_SIGRTMAX && (sig) > 0)
+
+int linux_to_bsd_signal(int sig);
+int bsd_to_linux_signal(int sig);
+
+#endif /* _LINUX_MI_H_ */
diff --git a/sys/compat/linux/linux_common.c b/sys/compat/linux/linux_common.c
new file mode 100644
index 0000000..b9e3531
--- /dev/null
+++ b/sys/compat/linux/linux_common.c
@@ -0,0 +1,93 @@
+/*-
+ * Copyright (c) 2014 Vassilis Laganakos
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/exec.h>
+#include <sys/imgact.h>
+#include <sys/imgact_elf.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/eventhandler.h>
+#include <sys/sysctl.h>
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_mib.h>
+#include <compat/linux/linux_util.h>
+
+FEATURE(linuxulator_v4l, "V4L ioctl wrapper support in the linuxulator");
+FEATURE(linuxulator_v4l2, "V4L2 ioctl wrapper support in the linuxulator");
+
+MODULE_VERSION(linux_common, 1);
+
+SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
+
+static eventhandler_tag linux_exec_tag;
+static eventhandler_tag linux_thread_dtor_tag;
+static eventhandler_tag linux_exit_tag;
+
+
+static int
+linux_common_modevent(module_t mod, int type, void *data)
+{
+ struct linux_device_handler **ldhp;
+
+ switch(type) {
+ case MOD_LOAD:
+ linux_osd_jail_register();
+ linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
+ linux_proc_exit, NULL, 1000);
+ linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
+ linux_proc_exec, NULL, 1000);
+ linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
+ linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
+ SET_FOREACH(ldhp, linux_device_handler_set)
+ linux_device_register_handler(*ldhp);
+ break;
+ case MOD_UNLOAD:
+ linux_osd_jail_deregister();
+ SET_FOREACH(ldhp, linux_device_handler_set)
+ linux_device_unregister_handler(*ldhp);
+ EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
+ EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
+ EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t linux_common_mod = {
+ "linuxcommon",
+ linux_common_modevent,
+ 0
+};
+
+DECLARE_MODULE(linuxcommon, linux_common_mod, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c
index 61156ba..c2bf3ae 100644
--- a/sys/compat/linux/linux_emul.c
+++ b/sys/compat/linux/linux_emul.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2006 Roman Divacky
+ * Copyright (c) 2013 Dmitry Chagin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,364 +30,235 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_compat.h"
-#include "opt_kdtrace.h"
-
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/imgact.h>
#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
-#include <sys/sdt.h>
#include <sys/sx.h>
#include <sys/proc.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
-#include <sys/sysproto.h>
-#include <sys/unistd.h>
-
-#ifdef COMPAT_LINUX32
-#include <machine/../linux32/linux.h>
-#include <machine/../linux32/linux32_proto.h>
-#else
-#include <machine/../linux/linux.h>
-#include <machine/../linux/linux_proto.h>
-#endif
-
-#include <compat/linux/linux_dtrace.h>
+
#include <compat/linux/linux_emul.h>
-#include <compat/linux/linux_futex.h>
#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_util.h>
-/**
- * Special DTrace provider for the linuxulator.
- *
- * In this file we define the provider for the entire linuxulator. All
- * modules (= files of the linuxulator) use it.
- *
- * We define a different name depending on the emulated bitsize, see
- * ../../<ARCH>/linux{,32}/linux.h, e.g.:
- * native bitsize = linuxulator
- * amd64, 32bit emulation = linuxulator32
- */
-LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
-/**
- * Special DTrace module "locks", it covers some linuxulator internal
- * locks.
- */
-LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *");
-
-/**
- * DTrace probes in this module.
+/*
+ * This returns reference to the thread emuldata entry (if found)
+ *
+ * Hold PROC_LOCK when referencing emuldata from other threads.
*/
-LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int");
-LIN_SDT_PROBE_DEFINE0(emul, em_find, return);
-LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t",
- "int");
-LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread);
-LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork);
-LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec);
-LIN_SDT_PROBE_DEFINE0(emul, proc_init, return);
-LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *");
-LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed);
-LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t",
- "struct proc *");
-LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int");
-LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return);
-LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *",
- "struct image_params *");
-LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return);
-LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry);
-LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int");
-LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return);
-LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *");
-LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return);
-LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *",
- "int");
-LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t");
-LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return);
-
-struct sx emul_shared_lock;
-struct mtx emul_lock;
-
-/* this returns locked reference to the emuldata entry (if found) */
struct linux_emuldata *
-em_find(struct proc *p, int locked)
+em_find(struct thread *td)
{
struct linux_emuldata *em;
- LIN_SDT_PROBE2(emul, em_find, entry, p, locked);
+ em = td->td_emuldata;
- if (locked == EMUL_DOLOCK)
- EMUL_LOCK(&emul_lock);
+ return (em);
+}
- em = p->p_emuldata;
+/*
+ * This returns reference to the proc pemuldata entry (if found)
+ *
+ * Hold PROC_LOCK when referencing proc pemuldata from other threads.
+ * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
+ */
+struct linux_pemuldata *
+pem_find(struct proc *p)
+{
+ struct linux_pemuldata *pem;
- if (em == NULL && locked == EMUL_DOLOCK)
- EMUL_UNLOCK(&emul_lock);
+ pem = p->p_emuldata;
- LIN_SDT_PROBE1(emul, em_find, return, em);
- return (em);
+ return (pem);
}
-int
-linux_proc_init(struct thread *td, pid_t child, int flags)
+void
+linux_proc_init(struct thread *td, struct thread *newtd, int flags)
{
- struct linux_emuldata *em, *p_em;
+ struct linux_emuldata *em;
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
struct proc *p;
- LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags);
+ if (newtd != NULL) {
+ p = newtd->td_proc;
- if (child != 0) {
- /* fork or create a thread */
- em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
- em->pid = child;
- em->pdeath_signal = 0;
- em->flags = 0;
- em->robust_futexes = NULL;
+ /* non-exec call */
+ em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO);
if (flags & LINUX_CLONE_THREAD) {
- /* handled later in the code */
- LIN_SDT_PROBE0(emul, proc_init, create_thread);
- } else {
- struct linux_emuldata_shared *s;
+ LINUX_CTR1(proc_init, "thread newtd(%d)",
+ newtd->td_tid);
- LIN_SDT_PROBE0(emul, proc_init, fork);
+ em->em_tid = newtd->td_tid;
+ } else {
+ LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
- s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
- s->refs = 1;
- s->group_pid = child;
+ em->em_tid = p->p_pid;
- LIST_INIT(&s->threads);
- em->shared = s;
+ pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
+ sx_init(&pem->pem_sx, "lpemlk");
+ p->p_emuldata = pem;
}
+ newtd->td_emuldata = em;
} else {
+ p = td->td_proc;
+
/* exec */
- LIN_SDT_PROBE0(emul, proc_init, exec);
+ LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
/* lookup the old one */
- em = em_find(td->td_proc, EMUL_DOLOCK);
+ em = em_find(td);
KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
- }
-
- em->child_clear_tid = NULL;
- em->child_set_tid = NULL;
- /*
- * allocate the shared struct only in clone()/fork cases in the case
- * of clone() td = calling proc and child = pid of the newly created
- * proc
- */
- if (child != 0) {
- if (flags & LINUX_CLONE_THREAD) {
- /* lookup the parent */
- /*
- * we dont have to lock the p_em because
- * its waiting for us in linux_clone so
- * there is no chance of it changing the
- * p_em->shared address
- */
- p_em = em_find(td->td_proc, EMUL_DONTLOCK);
- KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
- em->shared = p_em->shared;
- EMUL_SHARED_WLOCK(&emul_shared_lock);
- em->shared->refs++;
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
- } else {
- /*
- * handled earlier to avoid malloc(M_WAITOK) with
- * rwlock held
- */
- }
+ em->em_tid = p->p_pid;
+ em->flags = 0;
+ em->pdeath_signal = 0;
+ em->robust_futexes = NULL;
+ em->child_clear_tid = NULL;
+ em->child_set_tid = NULL;
- EMUL_SHARED_WLOCK(&emul_shared_lock);
- LIST_INSERT_HEAD(&em->shared->threads, em, threads);
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
+ /* epoll should be destroyed in a case of exec. */
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
- p = pfind(child);
- KASSERT(p != NULL, ("process not found in proc_init\n"));
- p->p_emuldata = em;
- PROC_UNLOCK(p);
- } else
- EMUL_UNLOCK(&emul_lock);
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
+ }
+ }
- LIN_SDT_PROBE0(emul, proc_init, return);
- return (0);
}
-void
+void
linux_proc_exit(void *arg __unused, struct proc *p)
{
- struct linux_emuldata *em;
- int error, shared_flags, shared_xstat;
- struct thread *td = FIRST_THREAD_IN_PROC(p);
- int *child_clear_tid;
- struct proc *q, *nq;
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct thread *td = curthread;
- if (__predict_true(p->p_sysent != &elf_linux_sysvec))
+ if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX))
return;
- LIN_SDT_PROBE1(emul, proc_exit, entry, p);
-
- release_futexes(p);
+ LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
+ td->td_tid, p->p_pid, p);
- /* find the emuldata */
- em = em_find(p, EMUL_DOLOCK);
+ pem = pem_find(p);
+ if (pem == NULL)
+ return;
+ (p->p_sysent->sv_thread_detach)(td);
- KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
+ p->p_emuldata = NULL;
- /* reparent all procs that are not a thread leader to initproc */
- if (em->shared->group_pid != p->p_pid) {
- LIN_SDT_PROBE3(emul, proc_exit, reparent,
- em->shared->group_pid, p->p_pid, p);
-
- child_clear_tid = em->child_clear_tid;
- EMUL_UNLOCK(&emul_lock);
- sx_xlock(&proctree_lock);
- wakeup(initproc);
- PROC_LOCK(p);
- proc_reparent(p, initproc);
- p->p_sigparent = SIGCHLD;
- PROC_UNLOCK(p);
- sx_xunlock(&proctree_lock);
- } else {
- child_clear_tid = em->child_clear_tid;
- EMUL_UNLOCK(&emul_lock);
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
}
- EMUL_SHARED_WLOCK(&emul_shared_lock);
- shared_flags = em->shared->flags;
- shared_xstat = em->shared->xstat;
- LIST_REMOVE(em, threads);
+ sx_destroy(&pem->pem_sx);
+ free(pem, M_LINUX);
+}
- em->shared->refs--;
- if (em->shared->refs == 0) {
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
- free(em->shared, M_LINUX);
- } else
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
+int
+linux_common_execve(struct thread *td, struct image_args *eargs)
+{
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct vmspace *oldvmspace;
+ struct linux_emuldata *em;
+ struct proc *p;
+ int error;
- if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0)
- p->p_xstat = shared_xstat;
+ p = td->td_proc;
- if (child_clear_tid != NULL) {
- struct linux_sys_futex_args cup;
- int null = 0;
+ error = pre_execve(td, &oldvmspace);
+ if (error != 0)
+ return (error);
- error = copyout(&null, child_clear_tid, sizeof(null));
- if (error) {
- LIN_SDT_PROBE1(emul, proc_exit,
- child_clear_tid_error, error);
+ error = kern_execve(td, eargs, NULL);
+ post_execve(td, error, oldvmspace);
+ if (error != 0)
+ return (error);
- free(em, M_LINUX);
+ /*
+ * In a case of transition from Linux binary execing to
+ * FreeBSD binary we destroy linux emuldata thread & proc entries.
+ */
+ if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
+ PROC_LOCK(p);
+ em = em_find(td);
+ KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
+ td->td_emuldata = NULL;
- LIN_SDT_PROBE0(emul, proc_exit, return);
- return;
- }
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
+ p->p_emuldata = NULL;
+ PROC_UNLOCK(p);
- /* futexes stuff */
- cup.uaddr = child_clear_tid;
- cup.op = LINUX_FUTEX_WAKE;
- cup.val = 0x7fffffff; /* Awake everyone */
- cup.timeout = NULL;
- cup.uaddr2 = NULL;
- cup.val3 = 0;
- error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
- /*
- * this cannot happen at the moment and if this happens it
- * probably means there is a user space bug
- */
- if (error) {
- LIN_SDT_PROBE0(emul, proc_exit, futex_failed);
- printf(LMSG("futex stuff in proc_exit failed.\n"));
+ if (pem->epoll != NULL) {
+ emd = pem->epoll;
+ pem->epoll = NULL;
+ free(emd, M_EPOLL);
}
- }
- /* clean the stuff up */
- free(em, M_LINUX);
-
- /* this is a little weird but rewritten from exit1() */
- sx_xlock(&proctree_lock);
- q = LIST_FIRST(&p->p_children);
- for (; q != NULL; q = nq) {
- nq = LIST_NEXT(q, p_sibling);
- if (q->p_flag & P_WEXIT)
- continue;
- if (__predict_false(q->p_sysent != &elf_linux_sysvec))
- continue;
- em = em_find(q, EMUL_DOLOCK);
- KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
- PROC_LOCK(q);
- if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
- kern_psignal(q, em->pdeath_signal);
- }
- PROC_UNLOCK(q);
- EMUL_UNLOCK(&emul_lock);
+ free(em, M_TEMP);
+ free(pem, M_LINUX);
}
- sx_xunlock(&proctree_lock);
-
- LIN_SDT_PROBE0(emul, proc_exit, return);
+ return (0);
}
-/*
- * This is used in a case of transition from FreeBSD binary execing to linux binary
- * in this case we create linux emuldata proc entry with the pid of the currently running
- * process.
- */
void
linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
{
- if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
- LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
- }
- if (__predict_false(imgp->sysent == &elf_linux_sysvec
- && p->p_sysent != &elf_linux_sysvec))
- linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
- if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) ==
- SV_ABI_LINUX))
- /* Kill threads regardless of imgp->sysent value */
- linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL);
- if (__predict_false(imgp->sysent != &elf_linux_sysvec
- && p->p_sysent == &elf_linux_sysvec)) {
- struct linux_emuldata *em;
-
- /*
- * XXX:There's a race because here we assign p->p_emuldata NULL
- * but the process is still counted as linux one for a short
- * time so some other process might reference it and try to
- * access its p->p_emuldata and panicing on a NULL reference.
- */
- em = em_find(p, EMUL_DONTLOCK);
-
- KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
-
- EMUL_SHARED_WLOCK(&emul_shared_lock);
- LIST_REMOVE(em, threads);
+ struct thread *td = curthread;
+ struct thread *othertd;
- PROC_LOCK(p);
- p->p_emuldata = NULL;
- PROC_UNLOCK(p);
+ /*
+ * In a case of execing from linux binary properly detach
+ * other threads from the user space.
+ */
+ if (__predict_false(SV_PROC_ABI(p) == SV_ABI_LINUX)) {
+ FOREACH_THREAD_IN_PROC(p, othertd) {
+ if (td != othertd)
+ (p->p_sysent->sv_thread_detach)(othertd);
+ }
+ }
- em->shared->refs--;
- if (em->shared->refs == 0) {
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
- free(em->shared, M_LINUX);
- } else
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
+ /*
+ * In a case of execing to linux binary we create linux
+ * emuldata thread entry.
+ */
+ if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) ==
+ SV_ABI_LINUX)) {
- free(em, M_LINUX);
+ if (SV_PROC_ABI(p) == SV_ABI_LINUX)
+ linux_proc_init(td, NULL, 0);
+ else
+ linux_proc_init(td, td, 0);
}
+}
- if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
- LIN_SDT_PROBE0(emul, proc_exec, return);
- }
+void
+linux_thread_dtor(void *arg __unused, struct thread *td)
+{
+ struct linux_emuldata *em;
+
+ em = em_find(td);
+ if (em == NULL)
+ return;
+ td->td_emuldata = NULL;
+
+ LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
+
+ free(em, M_TEMP);
}
void
@@ -399,76 +271,15 @@ linux_schedtail(struct thread *td)
p = td->td_proc;
- LIN_SDT_PROBE1(emul, linux_schedtail, entry, p);
-
- /* find the emuldata */
- em = em_find(p, EMUL_DOLOCK);
-
- KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
+ em = em_find(td);
+ KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
child_set_tid = em->child_set_tid;
- EMUL_UNLOCK(&emul_lock);
if (child_set_tid != NULL) {
- error = copyout(&p->p_pid, (int *)child_set_tid,
- sizeof(p->p_pid));
-
- if (error != 0) {
- LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error,
- error);
- }
- }
-
- LIN_SDT_PROBE0(emul, linux_schedtail, return);
-
- return;
-}
-
-int
-linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
-{
- struct linux_emuldata *em;
-
- LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr);
-
- /* find the emuldata */
- em = em_find(td->td_proc, EMUL_DOLOCK);
-
- KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
-
- em->child_clear_tid = args->tidptr;
- td->td_retval[0] = td->td_proc->p_pid;
-
- EMUL_UNLOCK(&emul_lock);
-
- LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
- return 0;
-}
-
-void
-linux_kill_threads(struct thread *td, int sig)
-{
- struct linux_emuldata *em, *td_em, *tmp_em;
- struct proc *sp;
-
- LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig);
-
- td_em = em_find(td->td_proc, EMUL_DONTLOCK);
-
- KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n"));
-
- EMUL_SHARED_RLOCK(&emul_shared_lock);
- LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
- if (em->pid == td_em->pid)
- continue;
-
- sp = pfind(em->pid);
- if ((sp->p_flag & P_WEXIT) == 0)
- kern_psignal(sp, sig);
- PROC_UNLOCK(sp);
-
- LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid);
- }
- EMUL_SHARED_RUNLOCK(&emul_shared_lock);
-
- LIN_SDT_PROBE0(emul, linux_kill_threads, return);
+ error = copyout(&em->em_tid, child_set_tid,
+ sizeof(em->em_tid));
+ LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
+ td->td_tid, child_set_tid, em->em_tid, error);
+ } else
+ LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
}
diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h
index f409a34..7262093 100644
--- a/sys/compat/linux/linux_emul.h
+++ b/sys/compat/linux/linux_emul.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2006 Roman Divacky
+ * Copyright (c) 2013 Dmitry Chagin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,91 +32,48 @@
#ifndef _LINUX_EMUL_H_
#define _LINUX_EMUL_H_
-#define EMUL_SHARED_HASXSTAT 0x01
-
-struct linux_emuldata_shared {
- int refs;
- int flags;
- int xstat;
- pid_t group_pid;
-
- LIST_HEAD(, linux_emuldata) threads; /* head of list of linux threads */
-};
-
/*
* modeled after similar structure in NetBSD
* this will be extended as we need more functionality
*/
struct linux_emuldata {
- pid_t pid;
-
int *child_set_tid; /* in clone(): Child's TID to set on clone */
int *child_clear_tid;/* in clone(): Child's TID to clear on exit */
- struct linux_emuldata_shared *shared;
-
int pdeath_signal; /* parent death signal */
- int flags; /* different emuldata flags */
+ int flags; /* thread emuldata flags */
+ int em_tid; /* thread id */
struct linux_robust_list_head *robust_futexes;
-
- LIST_ENTRY(linux_emuldata) threads; /* list of linux threads */
};
-struct linux_emuldata *em_find(struct proc *, int locked);
-
-/*
- * DTrace probes for locks should be fired after locking and before releasing
- * to prevent races (to provide data/function stability in dtrace, see the
- * output of "dtrace -v ..." and the corresponding dtrace docs).
- */
-#define EMUL_LOCK(l) do { \
- mtx_lock(l); \
- LIN_SDT_PROBE1(locks, emul_lock, \
- locked, l); \
- } while (0)
-#define EMUL_UNLOCK(l) do { \
- LIN_SDT_PROBE1(locks, emul_lock, \
- unlock, l); \
- mtx_unlock(l); \
- } while (0)
+struct linux_emuldata *em_find(struct thread *);
-#define EMUL_SHARED_RLOCK(l) do { \
- sx_slock(l); \
- LIN_SDT_PROBE1(locks, emul_shared_rlock, \
- locked, l); \
- } while (0)
-#define EMUL_SHARED_RUNLOCK(l) do { \
- LIN_SDT_PROBE1(locks, emul_shared_rlock, \
- unlock, l); \
- sx_sunlock(l); \
- } while (0)
-#define EMUL_SHARED_WLOCK(l) do { \
- sx_xlock(l); \
- LIN_SDT_PROBE1(locks, emul_shared_wlock, \
- locked, l); \
- } while (0)
-#define EMUL_SHARED_WUNLOCK(l) do { \
- LIN_SDT_PROBE1(locks, emul_shared_wlock, \
- unlock, l); \
- sx_xunlock(l); \
- } while (0)
-
-/* for em_find use */
-#define EMUL_DOLOCK 1
-#define EMUL_DONTLOCK 0
+void linux_proc_init(struct thread *, struct thread *, int);
+void linux_proc_exit(void *, struct proc *);
+void linux_schedtail(struct thread *);
+void linux_proc_exec(void *, struct proc *, struct image_params *);
+void linux_thread_dtor(void *arg __unused, struct thread *);
+void linux_thread_detach(struct thread *);
+int linux_common_execve(struct thread *, struct image_args *);
-/* emuldata flags */
+/* process emuldata flags */
#define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated
futex REQUEUE op*/
+#define LINUX_XUNSUP_EPOLL 0x00000002 /* unsupported epoll events */
+#define LINUX_XUNSUP_FUTEXPIOP 0x00000004 /* uses unsupported pi futex */
-int linux_proc_init(struct thread *, pid_t, int);
-void linux_proc_exit(void *, struct proc *);
-void linux_schedtail(struct thread *);
-void linux_proc_exec(void *, struct proc *, struct image_params *);
-void linux_kill_threads(struct thread *, int);
+struct linux_pemuldata {
+ uint32_t flags; /* process emuldata flags */
+ struct sx pem_sx; /* lock for this struct */
+ void *epoll; /* epoll data */
+};
+
+#define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx)
+#define LINUX_PEM_XUNLOCK(p) sx_xunlock(&(p)->pem_sx)
+#define LINUX_PEM_SLOCK(p) sx_slock(&(p)->pem_sx)
+#define LINUX_PEM_SUNLOCK(p) sx_sunlock(&(p)->pem_sx)
-extern struct sx emul_shared_lock;
-extern struct mtx emul_lock;
+struct linux_pemuldata *pem_find(struct proc *);
#endif /* !_LINUX_EMUL_H_ */
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
new file mode 100644
index 0000000..1fe3445
--- /dev/null
+++ b/sys/compat/linux/linux_event.c
@@ -0,0 +1,882 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/imgact.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/capability.h>
+#include <sys/types.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/errno.h>
+#include <sys/event.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
+#include <sys/selinfo.h>
+#include <sys/sx.h>
+#include <sys/syscallsubr.h>
+#include <sys/timespec.h>
+
+#ifdef COMPAT_LINUX32
+#include <machine/../linux32/linux.h>
+#include <machine/../linux32/linux32_proto.h>
+#else
+#include <machine/../linux/linux.h>
+#include <machine/../linux/linux_proto.h>
+#endif
+
+#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_event.h>
+#include <compat/linux/linux_file.h>
+#include <compat/linux/linux_util.h>
+
+/*
+ * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
+ * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
+ * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
+ * data verbatuim. Therefore we allocate 64-bit memory block to pass
+ * user supplied data for every file descriptor.
+ */
+
+typedef uint64_t epoll_udata_t;
+
+struct epoll_emuldata {
+ uint32_t fdc; /* epoll udata max index */
+ epoll_udata_t udata[1]; /* epoll user data vector */
+};
+
+#define EPOLL_DEF_SZ 16
+#define EPOLL_SIZE(fdn) \
+ (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t))
+
+struct epoll_event {
+ uint32_t events;
+ epoll_udata_t data;
+}
+#if defined(__amd64__)
+__attribute__((packed))
+#endif
+;
+
+#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
+static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata);
+static int epoll_to_kevent(struct thread *td, struct file *epfp,
+ int fd, struct epoll_event *l_event, int *kev_flags,
+ struct kevent *kevent, int *nkevents);
+static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event);
+static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count);
+static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count);
+static int epoll_delete_event(struct thread *td, struct file *epfp,
+ int fd, int filter);
+static int epoll_delete_all_events(struct thread *td, struct file *epfp,
+ int fd);
+
+struct epoll_copyin_args {
+ struct kevent *changelist;
+};
+
+struct epoll_copyout_args {
+ struct epoll_event *leventlist;
+ struct proc *p;
+ uint32_t count;
+ int error;
+};
+
+/* eventfd */
+typedef uint64_t eventfd_t;
+
+static fo_rdwr_t eventfd_read;
+static fo_rdwr_t eventfd_write;
+static fo_truncate_t eventfd_truncate;
+static fo_ioctl_t eventfd_ioctl;
+static fo_poll_t eventfd_poll;
+static fo_kqfilter_t eventfd_kqfilter;
+static fo_stat_t eventfd_stat;
+static fo_close_t eventfd_close;
+
+static struct fileops eventfdops = {
+ .fo_read = eventfd_read,
+ .fo_write = eventfd_write,
+ .fo_truncate = eventfd_truncate,
+ .fo_ioctl = eventfd_ioctl,
+ .fo_poll = eventfd_poll,
+ .fo_kqfilter = eventfd_kqfilter,
+ .fo_stat = eventfd_stat,
+ .fo_close = eventfd_close,
+ .fo_chmod = invfo_chmod,
+ .fo_chown = invfo_chown,
+ .fo_sendfile = invfo_sendfile,
+ .fo_flags = DFLAG_PASSABLE
+};
+
+static void filt_eventfddetach(struct knote *kn);
+static int filt_eventfdread(struct knote *kn, long hint);
+static int filt_eventfdwrite(struct knote *kn, long hint);
+
+static struct filterops eventfd_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdread
+};
+static struct filterops eventfd_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = filt_eventfddetach,
+ .f_event = filt_eventfdwrite
+};
+
+struct eventfd {
+ eventfd_t efd_count;
+ uint32_t efd_flags;
+ struct selinfo efd_sel;
+ struct mtx efd_lock;
+};
+
+static int eventfd_create(struct thread *td, uint32_t initval, int flags);
+
+
+static void
+epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata)
+{
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct proc *p;
+
+ p = td->td_proc;
+
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+
+ LINUX_PEM_XLOCK(pem);
+ if (pem->epoll == NULL) {
+ emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
+ emd->fdc = fd;
+ pem->epoll = emd;
+ } else {
+ emd = pem->epoll;
+ if (fd > emd->fdc) {
+ emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK);
+ emd->fdc = fd;
+ pem->epoll = emd;
+ }
+ }
+ emd->udata[fd] = udata;
+ LINUX_PEM_XUNLOCK(pem);
+}
+
+static int
+epoll_create_common(struct thread *td, int flags)
+{
+ int error;
+
+ error = kern_kqueue(td, flags);
+ if (error)
+ return (error);
+
+ epoll_fd_install(td, EPOLL_DEF_SZ, 0);
+
+ return (0);
+}
+
+int
+linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
+{
+
+ /*
+ * args->size is unused. Linux just tests it
+ * and then forgets it as well.
+ */
+ if (args->size <= 0)
+ return (EINVAL);
+
+ return (epoll_create_common(td, 0));
+}
+
+int
+linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
+{
+ int flags;
+
+ if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0)
+ return (EINVAL);
+
+ flags = 0;
+ if ((args->flags & LINUX_O_CLOEXEC) != 0)
+ flags |= O_CLOEXEC;
+
+ return (epoll_create_common(td, flags));
+}
+
+/* Structure converting function from epoll to kevent. */
+static int
+epoll_to_kevent(struct thread *td, struct file *epfp,
+ int fd, struct epoll_event *l_event, int *kev_flags,
+ struct kevent *kevent, int *nkevents)
+{
+ uint32_t levents = l_event->events;
+ struct linux_pemuldata *pem;
+ struct proc *p;
+
+ /* flags related to how event is registered */
+ if ((levents & LINUX_EPOLLONESHOT) != 0)
+ *kev_flags |= EV_ONESHOT;
+ if ((levents & LINUX_EPOLLET) != 0)
+ *kev_flags |= EV_CLEAR;
+ if ((levents & LINUX_EPOLLERR) != 0)
+ *kev_flags |= EV_ERROR;
+ if ((levents & LINUX_EPOLLRDHUP) != 0)
+ *kev_flags |= EV_EOF;
+
+ /* flags related to what event is registered */
+ if ((levents & LINUX_EPOLL_EVRD) != 0) {
+ EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0);
+ ++(*nkevents);
+ }
+ if ((levents & LINUX_EPOLL_EVWR) != 0) {
+ EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0);
+ ++(*nkevents);
+ }
+
+ if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) {
+ p = td->td_proc;
+
+ pem = pem_find(p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+ KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n"));
+
+ LINUX_PEM_XLOCK(pem);
+ if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) {
+ pem->flags |= LINUX_XUNSUP_EPOLL;
+ LINUX_PEM_XUNLOCK(pem);
+ linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n",
+ levents);
+ } else
+ LINUX_PEM_XUNLOCK(pem);
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Structure converting function from kevent to epoll. In a case
+ * this is called on error in registration we store the error in
+ * event->data and pick it up later in linux_epoll_ctl().
+ */
+static void
+kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event)
+{
+
+ if ((kevent->flags & EV_ERROR) != 0) {
+ l_event->events = LINUX_EPOLLERR;
+ return;
+ }
+
+ switch (kevent->filter) {
+ case EVFILT_READ:
+ l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
+ if ((kevent->flags & EV_EOF) != 0)
+ l_event->events |= LINUX_EPOLLRDHUP;
+ break;
+ case EVFILT_WRITE:
+ l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
+ break;
+ }
+}
+
+/*
+ * Copyout callback used by kevent. This converts kevent
+ * events to epoll events and copies them back to the
+ * userspace. This is also called on error on registering
+ * of the filter.
+ */
+static int
+epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyout_args *args;
+ struct linux_pemuldata *pem;
+ struct epoll_emuldata *emd;
+ struct epoll_event *eep;
+ int error, fd, i;
+
+ args = (struct epoll_copyout_args*) arg;
+ eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO);
+
+ pem = pem_find(args->p);
+ KASSERT(pem != NULL, ("epoll proc emuldata not found.\n"));
+ LINUX_PEM_SLOCK(pem);
+ emd = pem->epoll;
+ KASSERT(emd != NULL, ("epoll proc epolldata not found.\n"));
+
+ for (i = 0; i < count; i++) {
+ kevent_to_epoll(&kevp[i], &eep[i]);
+
+ fd = kevp[i].ident;
+ KASSERT(fd <= emd->fdc, ("epoll user data vector"
+ " is too small.\n"));
+ eep[i].data = emd->udata[fd];
+ }
+ LINUX_PEM_SUNLOCK(pem);
+
+ error = copyout(eep, args->leventlist, count * sizeof(*eep));
+ if (error == 0) {
+ args->leventlist += count;
+ args->count += count;
+ } else if (args->error == 0)
+ args->error = error;
+
+ free(eep, M_EPOLL);
+ return (error);
+}
+
+/*
+ * Copyin callback used by kevent. This copies already
+ * converted filters from kernel memory to the kevent
+ * internal kernel memory. Hence the memcpy instead of
+ * copyin.
+ */
+static int
+epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
+{
+ struct epoll_copyin_args *args;
+
+ args = (struct epoll_copyin_args*) arg;
+
+ memcpy(kevp, args->changelist, count * sizeof(*kevp));
+ args->changelist += count;
+
+ return (0);
+}
+
+/*
+ * Load epoll filter, convert it to kevent filter
+ * and load it into kevent subsystem.
+ */
+int
+linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
+{
+ struct file *epfp, *fp;
+ struct epoll_copyin_args ciargs;
+ struct kevent kev[2];
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ struct epoll_event le;
+ cap_rights_t rights;
+ int kev_flags;
+ int nchanges = 0;
+ int error;
+
+ if (args->op != LINUX_EPOLL_CTL_DEL) {
+ error = copyin(args->event, &le, sizeof(le));
+ if (error != 0)
+ return (error);
+ }
+
+ error = fget(td, args->epfd,
+ cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp);
+ if (error != 0)
+ return (error);
+ if (epfp->f_type != DTYPE_KQUEUE)
+ goto leave1;
+
+ /* Protect user data vector from incorrectly supplied fd. */
+ error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp);
+ if (error != 0)
+ goto leave1;
+
+ /* Linux disallows spying on himself */
+ if (epfp == fp) {
+ error = EINVAL;
+ goto leave0;
+ }
+
+ ciargs.changelist = kev;
+
+ switch (args->op) {
+ case LINUX_EPOLL_CTL_MOD:
+ /*
+ * We don't memorize which events were set for this FD
+ * on this level, so just delete all we could have set:
+ * EVFILT_READ and EVFILT_WRITE, ignoring any errors
+ */
+ error = epoll_delete_all_events(td, epfp, args->fd);
+ if (error)
+ goto leave0;
+ /* FALLTHROUGH */
+
+ case LINUX_EPOLL_CTL_ADD:
+ kev_flags = EV_ADD | EV_ENABLE;
+ break;
+
+ case LINUX_EPOLL_CTL_DEL:
+ /* CTL_DEL means unregister this fd with this epoll */
+ error = epoll_delete_all_events(td, epfp, args->fd);
+ goto leave0;
+
+ default:
+ error = EINVAL;
+ goto leave0;
+ }
+
+ error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags,
+ kev, &nchanges);
+ if (error)
+ goto leave0;
+
+ epoll_fd_install(td, args->fd, le.data);
+
+ error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL);
+
+leave0:
+ fdrop(fp, td);
+
+leave1:
+ fdrop(epfp, td);
+ return (error);
+}
+
+/*
+ * Wait for a filter to be triggered on the epoll file descriptor.
+ */
+static int
+linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events,
+ int maxevents, int timeout, sigset_t *uset)
+{
+ struct file *epfp;
+ struct timespec ts, *tsp;
+ cap_rights_t rights;
+ struct epoll_copyout_args coargs;
+ struct kevent_copyops k_ops = { &coargs,
+ epoll_kev_copyout,
+ NULL};
+ int error;
+
+ if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS)
+ return (EINVAL);
+
+ if (uset != NULL) {
+ error = kern_sigprocmask(td, SIG_SETMASK, uset,
+ &td->td_oldsigmask, 0);
+ if (error != 0)
+ return (error);
+ td->td_pflags |= TDP_OLDMASK;
+ /*
+ * Make sure that ast() is called on return to
+ * usermode and TDP_OLDMASK is cleared, restoring old
+ * sigmask.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
+ }
+
+ error = fget(td, epfd,
+ cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp);
+ if (error != 0)
+ return (error);
+
+ coargs.leventlist = events;
+ coargs.p = td->td_proc;
+ coargs.count = 0;
+ coargs.error = 0;
+
+ if (timeout != -1) {
+ if (timeout < 0) {
+ error = EINVAL;
+ goto leave;
+ }
+ /* Convert from milliseconds to timespec. */
+ ts.tv_sec = timeout / 1000;
+ ts.tv_nsec = (timeout % 1000) * 1000000;
+ tsp = &ts;
+ } else {
+ tsp = NULL;
+ }
+
+ error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp);
+ if (error == 0 && coargs.error != 0)
+ error = coargs.error;
+
+ /*
+ * kern_kevent might return ENOMEM which is not expected from epoll_wait.
+ * Maybe we should translate that but I don't think it matters at all.
+ */
+ if (error == 0)
+ td->td_retval[0] = coargs.count;
+leave:
+ fdrop(epfp, td);
+ return (error);
+}
+
+int
+linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
+{
+
+ return (linux_epoll_wait_common(td, args->epfd, args->events,
+ args->maxevents, args->timeout, NULL));
+}
+
+int
+linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args)
+{
+ sigset_t mask, *pmask;
+ l_sigset_t lmask;
+ int error;
+
+ if (args->mask != NULL) {
+ error = copyin(args->mask, &lmask, sizeof(l_sigset_t));
+ if (error != 0)
+ return (error);
+ linux_to_bsd_sigset(&lmask, &mask);
+ pmask = &mask;
+ } else
+ pmask = NULL;
+ return (linux_epoll_wait_common(td, args->epfd, args->events,
+ args->maxevents, args->timeout, pmask));
+}
+
+static int
+epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter)
+{
+ struct epoll_copyin_args ciargs;
+ struct kevent kev;
+ struct kevent_copyops k_ops = { &ciargs,
+ NULL,
+ epoll_kev_copyin};
+ int error;
+
+ ciargs.changelist = &kev;
+ EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
+
+ error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL);
+
+ /*
+ * here we ignore ENONT, because we don't keep track of events here
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+static int
+epoll_delete_all_events(struct thread *td, struct file *epfp, int fd)
+{
+ int error1, error2;
+
+ error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ);
+ error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE);
+
+ /* report any errors we got */
+ return (error1 == 0 ? error2 : error1);
+}
+
+static int
+eventfd_create(struct thread *td, uint32_t initval, int flags)
+{
+ struct filedesc *fdp;
+ struct eventfd *efd;
+ struct file *fp;
+ int fflags, fd, error;
+
+ fflags = 0;
+ if ((flags & LINUX_O_CLOEXEC) != 0)
+ fflags |= O_CLOEXEC;
+
+ fdp = td->td_proc->p_fd;
+ error = falloc(td, &fp, &fd, fflags);
+ if (error)
+ return (error);
+
+ efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO);
+ efd->efd_flags = flags;
+ efd->efd_count = initval;
+ mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF);
+
+ knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock);
+
+ fflags = FREAD | FWRITE;
+ if ((flags & LINUX_O_NONBLOCK) != 0)
+ fflags |= FNONBLOCK;
+
+ finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops);
+ fdrop(fp, td);
+
+ td->td_retval[0] = fd;
+ return (error);
+}
+
+int
+linux_eventfd(struct thread *td, struct linux_eventfd_args *args)
+{
+
+ return (eventfd_create(td, args->initval, 0));
+}
+
+int
+linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args)
+{
+
+ if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0)
+ return (EINVAL);
+
+ return (eventfd_create(td, args->initval, args->flags));
+}
+
+static int
+eventfd_close(struct file *fp, struct thread *td)
+{
+ struct eventfd *efd;
+
+ efd = fp->f_data;
+ if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
+ return (EBADF);
+
+ seldrain(&efd->efd_sel);
+ knlist_destroy(&efd->efd_sel.si_note);
+
+ fp->f_ops = &badfileops;
+ mtx_destroy(&efd->efd_lock);
+ free(efd, M_EPOLL);
+
+ return (0);
+}
+
+static int
+eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ efd = fp->f_data;
+ if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
+ return (EBADF);
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = 0;
+ mtx_lock(&efd->efd_lock);
+retry:
+ if (efd->efd_count == 0) {
+ if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0);
+ if (error == 0)
+ goto retry;
+ }
+ if (error == 0) {
+ if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) {
+ count = 1;
+ --efd->efd_count;
+ } else {
+ count = efd->efd_count;
+ efd->efd_count = 0;
+ }
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ mtx_unlock(&efd->efd_lock);
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ } else
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
+{
+ struct eventfd *efd;
+ eventfd_t count;
+ int error;
+
+ efd = fp->f_data;
+ if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
+ return (EBADF);
+
+ if (uio->uio_resid < sizeof(eventfd_t))
+ return (EINVAL);
+
+ error = uiomove(&count, sizeof(eventfd_t), uio);
+ if (error)
+ return (error);
+ if (count == UINT64_MAX)
+ return (EINVAL);
+
+ mtx_lock(&efd->efd_lock);
+retry:
+ if (UINT64_MAX - efd->efd_count <= count) {
+ if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) {
+ mtx_unlock(&efd->efd_lock);
+ return (EAGAIN);
+ }
+ error = mtx_sleep(&efd->efd_count, &efd->efd_lock,
+ PCATCH, "lefdwr", 0);
+ if (error == 0)
+ goto retry;
+ }
+ if (error == 0) {
+ efd->efd_count += count;
+ KNOTE_LOCKED(&efd->efd_sel.si_note, 0);
+ selwakeup(&efd->efd_sel);
+ wakeup(&efd->efd_count);
+ }
+ mtx_unlock(&efd->efd_lock);
+
+ return (error);
+}
+
+static int
+eventfd_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
+{
+ struct eventfd *efd;
+ int revents = 0;
+
+ efd = fp->f_data;
+ if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
+ return (POLLERR);
+
+ mtx_lock(&efd->efd_lock);
+ if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0)
+ revents |= events & (POLLIN|POLLRDNORM);
+ if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count)
+ revents |= events & (POLLOUT|POLLWRNORM);
+ if (revents == 0)
+ selrecord(td, &efd->efd_sel);
+ mtx_unlock(&efd->efd_lock);
+
+ return (revents);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_kqfilter(struct file *fp, struct knote *kn)
+{
+ struct eventfd *efd;
+
+ efd = fp->f_data;
+ if (fp->f_type != DTYPE_LINUXEFD || efd == NULL)
+ return (EINVAL);
+
+ mtx_lock(&efd->efd_lock);
+ switch (kn->kn_filter) {
+ case EVFILT_READ:
+ kn->kn_fop = &eventfd_rfiltops;
+ break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &eventfd_wfiltops;
+ break;
+ default:
+ mtx_unlock(&efd->efd_lock);
+ return (EINVAL);
+ }
+
+ kn->kn_hook = efd;
+ knlist_add(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+
+ return (0);
+}
+
+static void
+filt_eventfddetach(struct knote *kn)
+{
+ struct eventfd *efd = kn->kn_hook;
+
+ mtx_lock(&efd->efd_lock);
+ knlist_remove(&efd->efd_sel.si_note, kn, 1);
+ mtx_unlock(&efd->efd_lock);
+}
+
+/*ARGSUSED*/
+static int
+filt_eventfdread(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ ret = (efd->efd_count > 0);
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+static int
+filt_eventfdwrite(struct knote *kn, long hint)
+{
+ struct eventfd *efd = kn->kn_hook;
+ int ret;
+
+ mtx_assert(&efd->efd_lock, MA_OWNED);
+ ret = (UINT64_MAX - 1 > efd->efd_count);
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
+{
+
+ return (ENXIO);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_ioctl(struct file *fp, u_long cmd, void *data,
+ struct ucred *active_cred, struct thread *td)
+{
+
+ return (ENXIO);
+}
+
+/*ARGSUSED*/
+static int
+eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
+ struct thread *td)
+{
+
+ return (ENXIO);
+}
diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h
new file mode 100644
index 0000000..9b7d37b
--- /dev/null
+++ b/sys/compat/linux/linux_event.h
@@ -0,0 +1,60 @@
+/*-
+ * Copyright (c) 2007 Roman Divacky
+ * Copyright (c) 2014 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_EVENT_H_
+#define _LINUX_EVENT_H_
+
+#define LINUX_EPOLLIN 0x001
+#define LINUX_EPOLLPRI 0x002
+#define LINUX_EPOLLOUT 0x004
+#define LINUX_EPOLLRDNORM 0x040
+#define LINUX_EPOLLRDBAND 0x080
+#define LINUX_EPOLLWRNORM 0x100
+#define LINUX_EPOLLWRBAND 0x200
+#define LINUX_EPOLLMSG 0x400
+#define LINUX_EPOLLERR 0x008
+#define LINUX_EPOLLHUP 0x010
+#define LINUX_EPOLLRDHUP 0x2000
+#define LINUX_EPOLLWAKEUP 1u<<29
+#define LINUX_EPOLLONESHOT 1u<<30
+#define LINUX_EPOLLET 1u<<31
+
+#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \
+ |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI)
+#define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM)
+#define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \
+ |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP)
+
+#define LINUX_EPOLL_CTL_ADD 1
+#define LINUX_EPOLL_CTL_DEL 2
+#define LINUX_EPOLL_CTL_MOD 3
+
+#define LINUX_EFD_SEMAPHORE (1 << 0)
+
+#endif /* !_LINUX_EVENT_H_ */
diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c
index 19104a4..ee1d1ba 100644
--- a/sys/compat/linux/linux_file.c
+++ b/sys/compat/linux/linux_file.c
@@ -235,6 +235,7 @@ linux_lseek(struct thread *td, struct linux_lseek_args *args)
return error;
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_llseek(struct thread *td, struct linux_llseek_args *args)
{
@@ -273,6 +274,7 @@ linux_readdir(struct thread *td, struct linux_readdir_args *args)
lda.count = 1;
return linux_getdents(td, &lda);
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
/*
* Note that linux_getdents(2) and linux_getdents64(2) have the same
@@ -367,8 +369,8 @@ getdents_common(struct thread *td, struct linux_getdents64_args *args,
buflen = max(LINUX_DIRBLKSIZ, nbytes);
buflen = min(buflen, MAXBSIZE);
- buf = malloc(buflen, M_TEMP, M_WAITOK);
- lbuf = malloc(LINUX_MAXRECLEN, M_TEMP, M_WAITOK | M_ZERO);
+ buf = malloc(buflen, M_LINUX, M_WAITOK);
+ lbuf = malloc(LINUX_MAXRECLEN, M_LINUX, M_WAITOK | M_ZERO);
vn_lock(vp, LK_SHARED | LK_RETRY);
aiov.iov_base = buf;
@@ -519,8 +521,8 @@ out:
VOP_UNLOCK(vp, 0);
foffset_unlock(fp, off, 0);
fdrop(fp, td);
- free(buf, M_TEMP);
- free(lbuf, M_TEMP);
+ free(buf, M_LINUX);
+ free(lbuf, M_LINUX);
return (error);
}
@@ -578,10 +580,8 @@ int
linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
{
char *path;
- int error, dfd, flag;
+ int error, dfd;
- if (args->flag & ~LINUX_AT_EACCESS)
- return (EINVAL);
/* linux convention */
if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
return (EINVAL);
@@ -594,8 +594,7 @@ linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
printf(ARGS(access, "%s, %d"), path, args->amode);
#endif
- flag = (args->flag & LINUX_AT_EACCESS) == 0 ? 0 : AT_EACCESS;
- error = kern_accessat(td, dfd, path, UIO_SYSSPACE, flag, args->amode);
+ error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
LFREEPATH(path);
return (error);
@@ -919,6 +918,7 @@ linux_truncate(struct thread *td, struct linux_truncate_args *args)
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
{
@@ -936,6 +936,8 @@ linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
LFREEPATH(path);
return (error);
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
+
int
linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
{
@@ -1123,6 +1125,7 @@ linux_mount(struct thread *td, struct linux_mount_args *args)
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
{
@@ -1132,6 +1135,7 @@ linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
args2.flags = 0;
return (linux_umount(td, &args2));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_umount(struct thread *td, struct linux_umount_args *args)
@@ -1262,7 +1266,7 @@ bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
static int
-fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
+fcntl_common(struct thread *td, struct linux_fcntl_args *args)
{
struct l_flock linux_flock;
struct flock bsd_flock;
@@ -1380,6 +1384,9 @@ fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
fdrop(fp, td);
return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
+
+ case LINUX_F_DUPFD_CLOEXEC:
+ return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
}
return (EINVAL);
@@ -1388,17 +1395,13 @@ fcntl_common(struct thread *td, struct linux_fcntl64_args *args)
int
linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
{
- struct linux_fcntl64_args args64;
#ifdef DEBUG
if (ldebug(fcntl))
printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
#endif
- args64.fd = args->fd;
- args64.cmd = args->cmd;
- args64.arg = args->arg;
- return (fcntl_common(td, &args64));
+ return (fcntl_common(td, args));
}
#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
@@ -1407,6 +1410,7 @@ linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
{
struct l_flock64 linux_flock;
struct flock bsd_flock;
+ struct linux_fcntl_args fcntl_args;
int error;
#ifdef DEBUG
@@ -1447,7 +1451,10 @@ linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
(intptr_t)&bsd_flock));
}
- return (fcntl_common(td, args));
+ fcntl_args.fd = args->fd;
+ fcntl_args.cmd = args->cmd;
+ fcntl_args.arg = args->arg;
+ return (fcntl_common(td, &fcntl_args));
}
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
@@ -1543,6 +1550,7 @@ linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
advice));
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
{
@@ -1554,6 +1562,7 @@ linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
return (kern_posix_fadvise(td, args->fd, args->offset, args->len,
advice));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_pipe(struct thread *td, struct linux_pipe_args *args)
@@ -1600,3 +1609,37 @@ linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
/* XXX: Close descriptors on error. */
return (copyout(fildes, args->pipefds, sizeof(fildes)));
}
+
+int
+linux_dup3(struct thread *td, struct linux_dup3_args *args)
+{
+ int cmd;
+ intptr_t newfd;
+
+ if (args->oldfd == args->newfd)
+ return (EINVAL);
+ if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
+ return (EINVAL);
+ if (args->flags & LINUX_O_CLOEXEC)
+ cmd = F_DUP2FD_CLOEXEC;
+ else
+ cmd = F_DUP2FD;
+
+ newfd = args->newfd;
+ return (kern_fcntl(td, args->oldfd, cmd, newfd));
+}
+
+int
+linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
+{
+
+ /*
+ * We emulate only posix_fallocate system call for which
+ * mode should be 0.
+ */
+ if (args->mode != 0)
+ return (ENOSYS);
+
+ return (kern_posix_fallocate(td, args->fd, args->offset,
+ args->len));
+}
diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h
index 2d3106f..f27d5b4 100644
--- a/sys/compat/linux/linux_file.h
+++ b/sys/compat/linux/linux_file.h
@@ -54,4 +54,75 @@
#define LINUX_MS_NOEXEC 0x0008
#define LINUX_MS_REMOUNT 0x0020
+/*
+ * common open/fcntl flags
+ */
+#define LINUX_O_RDONLY 00000000
+#define LINUX_O_WRONLY 00000001
+#define LINUX_O_RDWR 00000002
+#define LINUX_O_ACCMODE 00000003
+#define LINUX_O_CREAT 00000100
+#define LINUX_O_EXCL 00000200
+#define LINUX_O_NOCTTY 00000400
+#define LINUX_O_TRUNC 00001000
+#define LINUX_O_APPEND 00002000
+#define LINUX_O_NONBLOCK 00004000
+#define LINUX_O_NDELAY LINUX_O_NONBLOCK
+#define LINUX_O_SYNC 00010000
+#define LINUX_FASYNC 00020000
+#define LINUX_O_DIRECT 00040000 /* Direct disk access hint */
+#define LINUX_O_LARGEFILE 00100000
+#define LINUX_O_DIRECTORY 00200000 /* Must be a directory */
+#define LINUX_O_NOFOLLOW 00400000 /* Do not follow links */
+#define LINUX_O_NOATIME 01000000
+#define LINUX_O_CLOEXEC 02000000
+
+#define LINUX_F_DUPFD 0
+#define LINUX_F_GETFD 1
+#define LINUX_F_SETFD 2
+#define LINUX_F_GETFL 3
+#define LINUX_F_SETFL 4
+#ifndef LINUX_F_GETLK
+#define LINUX_F_GETLK 5
+#define LINUX_F_SETLK 6
+#define LINUX_F_SETLKW 7
+#endif
+#ifndef LINUX_F_SETOWN
+#define LINUX_F_SETOWN 8
+#define LINUX_F_GETOWN 9
+#endif
+#ifndef LINUX_F_SETSIG
+#define LINUX_F_SETSIG 10
+#define LINUX_F_GETSIG 11
+#endif
+#ifndef LINUX_F_SETOWN_EX
+#define LINUX_F_SETOWN_EX 15
+#define LINUX_F_GETOWN_EX 16
+#define LINUX_F_GETOWNER_UIDS 17
+#endif
+
+#define LINUX_F_SPECIFIC_BASE 1024
+
+#define LINUX_F_SETLEASE (LINUX_F_SPECIFIC_BASE + 0)
+#define LINUX_F_GETLEASE (LINUX_F_SPECIFIC_BASE + 1)
+#define LINUX_F_CANCELLK (LINUX_F_SPECIFIC_BASE + 5)
+#define LINUX_F_DUPFD_CLOEXEC (LINUX_F_SPECIFIC_BASE + 6)
+#define LINUX_F_NOTIFY (LINUX_F_SPECIFIC_BASE + 2)
+#define LINUX_F_SETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 7)
+#define LINUX_F_GETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 8)
+
+#define LINUX_F_GETLKP 36
+#define LINUX_F_SETLKP 37
+#define LINUX_F_SETLKPW 38
+
+#define LINUX_F_OWNER_TID 0
+#define LINUX_F_OWNER_PID 1
+#define LINUX_F_OWNER_PGRP 2
+
+#ifndef LINUX_F_RDLCK
+#define LINUX_F_RDLCK 0
+#define LINUX_F_WRLCK 1
+#define LINUX_F_UNLCK 2
+#endif
+
#endif /* !_LINUX_FILE_H_ */
diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index 0ab7d3a..9e7c71f 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -35,13 +35,20 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/imgact.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/racct.h>
#include <sys/sched.h>
-#include <sys/sdt.h>
+#include <sys/syscallsubr.h>
#include <sys/sx.h>
#include <sys/unistd.h>
+#include <sys/wait.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
@@ -50,18 +57,10 @@ __FBSDID("$FreeBSD$");
#include <machine/../linux/linux.h>
#include <machine/../linux/linux_proto.h>
#endif
-#include <compat/linux/linux_dtrace.h>
-#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_futex.h>
#include <compat/linux/linux_misc.h>
-
-/* DTrace init */
-LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-
-/* Linuxulator-global DTrace probes */
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
-
+#include <compat/linux/linux_util.h>
int
linux_fork(struct thread *td, struct linux_fork_args *args)
@@ -79,14 +78,11 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
!= 0)
return (error);
- td->td_retval[0] = p2->p_pid;
- td->td_retval[1] = 0;
+ td2 = FIRST_THREAD_IN_PROC(p2);
- error = linux_proc_init(td, td->td_retval[0], 0);
- if (error)
- return (error);
+ linux_proc_init(td, td2, 0);
- td2 = FIRST_THREAD_IN_PROC(p2);
+ td->td_retval[0] = p2->p_pid;
/*
* Make this runnable after we are finished with it.
@@ -115,13 +111,11 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
0, &p2, NULL, 0)) != 0)
return (error);
- td->td_retval[0] = p2->p_pid;
+ td2 = FIRST_THREAD_IN_PROC(p2);
- error = linux_proc_init(td, td->td_retval[0], 0);
- if (error)
- return (error);
+ linux_proc_init(td, td2, 0);
- td2 = FIRST_THREAD_IN_PROC(p2);
+ td->td_retval[0] = p2->p_pid;
/*
* Make this runnable after we are finished with it.
@@ -134,8 +128,8 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
return (0);
}
-int
-linux_clone(struct thread *td, struct linux_clone_args *args)
+static int
+linux_clone_proc(struct thread *td, struct linux_clone_args *args)
{
int error, ff = RFPROC | RFSTOPPED;
struct proc *p2;
@@ -153,9 +147,7 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
exit_signal = args->flags & 0x000000ff;
if (LINUX_SIG_VALID(exit_signal)) {
- if (exit_signal <= LINUX_SIGTBLSZ)
- exit_signal =
- linux_to_bsd_signal[_SIG_IDX(exit_signal)];
+ exit_signal = linux_to_bsd_signal(exit_signal);
} else if (exit_signal != 0)
return (EINVAL);
@@ -172,22 +164,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
ff |= RFFDG;
- /*
- * Attempt to detect when linux_clone(2) is used for creating
- * kernel threads. Unfortunately despite the existence of the
- * CLONE_THREAD flag, version of linuxthreads package used in
- * most popular distros as of beginning of 2005 doesn't make
- * any use of it. Therefore, this detection relies on
- * empirical observation that linuxthreads sets certain
- * combination of flags, so that we can make more or less
- * precise detection and notify the FreeBSD kernel that several
- * processes are in fact part of the same threading group, so
- * that special treatment is necessary for signal delivery
- * between those processes and fd locking.
- */
- if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
- ff |= RFTHREAD;
-
if (args->flags & LINUX_CLONE_PARENT_SETTID)
if (args->parent_tidptr == NULL)
return (EINVAL);
@@ -199,29 +175,13 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
if (error)
return (error);
- if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
- sx_xlock(&proctree_lock);
- PROC_LOCK(p2);
- proc_reparent(p2, td->td_proc->p_pptr);
- PROC_UNLOCK(p2);
- sx_xunlock(&proctree_lock);
- }
+ td2 = FIRST_THREAD_IN_PROC(p2);
/* create the emuldata */
- error = linux_proc_init(td, p2->p_pid, args->flags);
- /* reference it - no need to check this */
- em = em_find(p2, EMUL_DOLOCK);
- KASSERT(em != NULL, ("clone: emuldata not found."));
- /* and adjust it */
-
- if (args->flags & LINUX_CLONE_THREAD) {
-#ifdef notyet
- PROC_LOCK(p2);
- p2->p_pgrp = td->td_proc->p_pgrp;
- PROC_UNLOCK(p2);
-#endif
- exit_signal = 0;
- }
+ linux_proc_init(td, td2, args->flags);
+
+ em = em_find(td2);
+ KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));
if (args->flags & LINUX_CLONE_CHILD_SETTID)
em->child_set_tid = args->child_tidptr;
@@ -233,8 +193,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
else
em->child_clear_tid = NULL;
- EMUL_UNLOCK(&emul_lock);
-
if (args->flags & LINUX_CLONE_PARENT_SETTID) {
error = copyout(&p2->p_pid, args->parent_tidptr,
sizeof(p2->p_pid));
@@ -245,14 +203,12 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
PROC_LOCK(p2);
p2->p_sigparent = exit_signal;
PROC_UNLOCK(p2);
- td2 = FIRST_THREAD_IN_PROC(p2);
/*
* In a case of stack = NULL, we are supposed to COW calling process
* stack. This is what normal fork() does, so we just keep tf_rsp arg
* intact.
*/
- if (args->stack)
- linux_set_upcall_kse(td2, PTROUT(args->stack));
+ linux_set_upcall_kse(td2, PTROUT(args->stack));
if (args->flags & LINUX_CLONE_SETTLS)
linux_set_cloned_tls(td2, args->tls);
@@ -263,6 +219,7 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
"stack %p sig = %d"), (int)p2->p_pid, args->stack,
exit_signal);
#endif
+
/*
* Make this runnable after we are finished with it.
*/
@@ -272,7 +229,233 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
thread_unlock(td2);
td->td_retval[0] = p2->p_pid;
- td->td_retval[1] = 0;
return (0);
}
+
+static int
+linux_clone_thread(struct thread *td, struct linux_clone_args *args)
+{
+ struct linux_emuldata *em;
+ struct thread *newtd;
+ struct proc *p;
+ int error;
+
+#ifdef DEBUG
+ if (ldebug(clone)) {
+ printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, "
+ "child tid: %p"), (unsigned)args->flags,
+ args->stack, args->parent_tidptr, args->child_tidptr);
+ }
+#endif
+
+ LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p",
+ td->td_tid, (unsigned)args->flags,
+ args->parent_tidptr, args->child_tidptr);
+
+ if (args->flags & LINUX_CLONE_PARENT_SETTID)
+ if (args->parent_tidptr == NULL)
+ return (EINVAL);
+
+ /* Threads should be created with own stack */
+ if (args->stack == NULL)
+ return (EINVAL);
+
+ p = td->td_proc;
+
+#ifdef RACCT
+ if (racct_enable) {
+ PROC_LOCK(p);
+ error = racct_add(p, RACCT_NTHR, 1);
+ PROC_UNLOCK(p);
+ if (error != 0)
+ return (EPROCLIM);
+ }
+#endif
+
+ /* Initialize our td */
+ error = kern_thr_alloc(p, 0, &newtd);
+ if (error)
+ goto fail;
+
+ cpu_set_upcall(newtd, td);
+
+ bzero(&newtd->td_startzero,
+ __rangeof(struct thread, td_startzero, td_endzero));
+ bcopy(&td->td_startcopy, &newtd->td_startcopy,
+ __rangeof(struct thread, td_startcopy, td_endcopy));
+
+ newtd->td_proc = p;
+ newtd->td_ucred = crhold(td->td_ucred);
+
+ /* create the emuldata */
+ linux_proc_init(td, newtd, args->flags);
+
+ em = em_find(newtd);
+ KASSERT(em != NULL, ("clone_thread: emuldata not found.\n"));
+
+ if (args->flags & LINUX_CLONE_SETTLS)
+ linux_set_cloned_tls(newtd, args->tls);
+
+ if (args->flags & LINUX_CLONE_CHILD_SETTID)
+ em->child_set_tid = args->child_tidptr;
+ else
+ em->child_set_tid = NULL;
+
+ if (args->flags & LINUX_CLONE_CHILD_CLEARTID)
+ em->child_clear_tid = args->child_tidptr;
+ else
+ em->child_clear_tid = NULL;
+
+ cpu_thread_clean(newtd);
+
+ linux_set_upcall_kse(newtd, PTROUT(args->stack));
+
+ PROC_LOCK(p);
+ p->p_flag |= P_HADTHREADS;
+ bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name));
+
+ if (args->flags & LINUX_CLONE_PARENT)
+ thread_link(newtd, p->p_pptr);
+ else
+ thread_link(newtd, p);
+
+ thread_lock(td);
+ /* let the scheduler know about these things. */
+ sched_fork_thread(td, newtd);
+ thread_unlock(td);
+ if (P_SHOULDSTOP(p))
+ newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
+ PROC_UNLOCK(p);
+
+ tidhash_add(newtd);
+
+#ifdef DEBUG
+ if (ldebug(clone))
+ printf(ARGS(clone, "successful clone to %d, stack %p"),
+ (int)newtd->td_tid, args->stack);
+#endif
+
+ LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d",
+ td->td_tid, newtd->td_tid);
+
+ if (args->flags & LINUX_CLONE_PARENT_SETTID) {
+ error = copyout(&newtd->td_tid, args->parent_tidptr,
+ sizeof(newtd->td_tid));
+ if (error)
+ printf(LMSG("clone_thread: copyout failed!"));
+ }
+
+ /*
+ * Make this runnable after we are finished with it.
+ */
+ thread_lock(newtd);
+ TD_SET_CAN_RUN(newtd);
+ sched_add(newtd, SRQ_BORING);
+ thread_unlock(newtd);
+
+ td->td_retval[0] = newtd->td_tid;
+
+ return (0);
+
+fail:
+#ifdef RACCT
+ if (racct_enable) {
+ PROC_LOCK(p);
+ racct_sub(p, RACCT_NTHR, 1);
+ PROC_UNLOCK(p);
+ }
+#endif
+ return (error);
+}
+
+int
+linux_clone(struct thread *td, struct linux_clone_args *args)
+{
+
+ if (args->flags & LINUX_CLONE_THREAD)
+ return (linux_clone_thread(td, args));
+ else
+ return (linux_clone_proc(td, args));
+}
+
+int
+linux_exit(struct thread *td, struct linux_exit_args *args)
+{
+ struct linux_emuldata *em;
+
+ em = em_find(td);
+ KASSERT(em != NULL, ("exit: emuldata not found.\n"));
+
+ LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval);
+
+ linux_thread_detach(td);
+
+ /*
+ * XXX. When the last two threads of a process
+ * exit via pthread_exit() try thr_exit() first.
+ */
+ kern_thr_exit(td);
+ exit1(td, W_EXITCODE(args->rval, 0));
+ /* NOTREACHED */
+}
+
+int
+linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
+{
+ struct linux_emuldata *em;
+
+ em = em_find(td);
+ KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
+
+ em->child_clear_tid = args->tidptr;
+
+ td->td_retval[0] = em->em_tid;
+
+ LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d",
+ em->em_tid, args->tidptr, td->td_retval[0]);
+
+ return (0);
+}
+
+void
+linux_thread_detach(struct thread *td)
+{
+ struct linux_sys_futex_args cup;
+ struct linux_emuldata *em;
+ int *child_clear_tid;
+ int error;
+
+ em = em_find(td);
+ KASSERT(em != NULL, ("thread_detach: emuldata not found.\n"));
+
+ LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid);
+
+ release_futexes(td, em);
+
+ child_clear_tid = em->child_clear_tid;
+
+ if (child_clear_tid != NULL) {
+
+ LINUX_CTR2(thread_detach, "thread(%d) %p",
+ em->em_tid, child_clear_tid);
+
+ error = suword32(child_clear_tid, 0);
+ if (error != 0)
+ return;
+
+ cup.uaddr = child_clear_tid;
+ cup.op = LINUX_FUTEX_WAKE;
+ cup.val = 1; /* wake one */
+ cup.timeout = NULL;
+ cup.uaddr2 = NULL;
+ cup.val3 = 0;
+ error = linux_sys_futex(td, &cup);
+ /*
+ * this cannot happen at the moment and if this happens it
+ * probably means there is a user space bug
+ */
+ if (error != 0)
+ linux_msg(td, "futex stuff in thread_detach failed.");
+ }
+}
diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c
index eb79ad9..4573f73 100644
--- a/sys/compat/linux/linux_futex.c
+++ b/sys/compat/linux/linux_futex.c
@@ -66,15 +66,12 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $")
#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_futex.h>
+#include <compat/linux/linux_timer.h>
#include <compat/linux/linux_util.h>
/* DTrace init */
LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-/* Linuxulator-global DTrace probes */
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
-
/**
* Futex part for the special DTrace module "locks".
*/
@@ -175,8 +172,8 @@ LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *",
"struct linux_get_robust_list_args *");
LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int");
LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int");
-LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct proc *",
- "uint32_t *", "unsigned int");
+LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry,
+ "struct linux_emuldata *", "uint32_t *", "unsigned int");
LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int");
LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int");
LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
@@ -184,13 +181,11 @@ LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry,
"unsigned int *");
LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int");
LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int");
-LIN_SDT_PROBE_DEFINE1(futex, release_futexes, entry, "struct proc *");
+LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *",
+ "struct linux_emuldata *");
LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int");
LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return);
-static MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
-static MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
-
struct futex;
struct waiting_proc {
@@ -253,6 +248,21 @@ struct mtx futex_mtx; /* protects the futex list */
* wp_list to prevent double wakeup.
*/
+static void futex_put(struct futex *, struct waiting_proc *);
+static int futex_get0(uint32_t *, struct futex **f, uint32_t);
+static int futex_get(uint32_t *, struct waiting_proc **, struct futex **,
+ uint32_t);
+static int futex_sleep(struct futex *, struct waiting_proc *, int);
+static int futex_wake(struct futex *, int, uint32_t);
+static int futex_requeue(struct futex *, int, struct futex *, int);
+static int futex_wait(struct futex *, struct waiting_proc *, int,
+ uint32_t);
+static int futex_atomic_op(struct thread *, int, uint32_t *);
+static int handle_futex_death(struct linux_emuldata *, uint32_t *,
+ unsigned int);
+static int fetch_robust_entry(struct linux_robust_list **,
+ struct linux_robust_list **, unsigned int *);
+
/* support.s */
int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
@@ -260,6 +270,7 @@ int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
+
static void
futex_put(struct futex *f, struct waiting_proc *wp)
{
@@ -657,10 +668,11 @@ int
linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
{
int clockrt, nrwake, op_ret, ret;
- struct linux_emuldata *em;
+ struct linux_pemuldata *pem;
struct waiting_proc *wp;
struct futex *f, *f2;
- struct l_timespec timeout;
+ struct l_timespec ltimeout;
+ struct timespec timeout;
struct timeval utv, ctv;
int timeout_hz;
int error;
@@ -704,6 +716,38 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x",
args->uaddr, args->val, args->val3);
+ if (args->timeout != NULL) {
+ error = copyin(args->timeout, &ltimeout, sizeof(ltimeout));
+ if (error) {
+ LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
+ error);
+ LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
+ return (error);
+ }
+ error = linux_to_native_timespec(&timeout, &ltimeout);
+ if (error)
+ return (error);
+ TIMESPEC_TO_TIMEVAL(&utv, &timeout);
+ error = itimerfix(&utv);
+ if (error) {
+ LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
+ error);
+ LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
+ return (error);
+ }
+ if (clockrt) {
+ microtime(&ctv);
+ timevalsub(&utv, &ctv);
+ } else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
+ microuptime(&ctv);
+ timevalsub(&utv, &ctv);
+ }
+ if (utv.tv_sec < 0)
+ timevalclear(&utv);
+ timeout_hz = tvtohz(&utv);
+ } else
+ timeout_hz = 0;
+
error = futex_get(args->uaddr, &wp, &f,
flags | FUTEX_CREATE_WP);
if (error) {
@@ -736,37 +780,6 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
return (EWOULDBLOCK);
}
- if (args->timeout != NULL) {
- error = copyin(args->timeout, &timeout, sizeof(timeout));
- if (error) {
- LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error,
- error);
- LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
- futex_put(f, wp);
- return (error);
- }
- TIMESPEC_TO_TIMEVAL(&utv, &timeout);
- error = itimerfix(&utv);
- if (error) {
- LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error,
- error);
- LIN_SDT_PROBE1(futex, linux_sys_futex, return, error);
- futex_put(f, wp);
- return (error);
- }
- if (clockrt) {
- microtime(&ctv);
- timevalsub(&utv, &ctv);
- } else if (args->op == LINUX_FUTEX_WAIT_BITSET) {
- microuptime(&ctv);
- timevalsub(&utv, &ctv);
- }
- if (utv.tv_sec < 0)
- timevalclear(&utv);
- timeout_hz = tvtohz(&utv);
- } else
- timeout_hz = 0;
-
error = futex_wait(f, wp, timeout_hz, args->val3);
break;
@@ -943,29 +956,43 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
case LINUX_FUTEX_LOCK_PI:
/* not yet implemented */
- linux_msg(td,
- "linux_sys_futex: "
- "op LINUX_FUTEX_LOCK_PI not implemented\n");
- LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi);
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
+ linux_msg(td,
+ "linux_sys_futex: "
+ "unsupported futex_pi op\n");
+ pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
+ LIN_SDT_PROBE0(futex, linux_sys_futex,
+ unimplemented_lock_pi);
+ }
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
return (ENOSYS);
case LINUX_FUTEX_UNLOCK_PI:
/* not yet implemented */
- linux_msg(td,
- "linux_sys_futex: "
- "op LINUX_FUTEX_UNLOCK_PI not implemented\n");
- LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi);
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
+ linux_msg(td,
+ "linux_sys_futex: "
+ "unsupported futex_pi op\n");
+ pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
+ LIN_SDT_PROBE0(futex, linux_sys_futex,
+ unimplemented_unlock_pi);
+ }
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
return (ENOSYS);
case LINUX_FUTEX_TRYLOCK_PI:
/* not yet implemented */
- linux_msg(td,
- "linux_sys_futex: "
- "op LINUX_FUTEX_TRYLOCK_PI not implemented\n");
- LIN_SDT_PROBE0(futex, linux_sys_futex,
- unimplemented_trylock_pi);
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
+ linux_msg(td,
+ "linux_sys_futex: "
+ "unsupported futex_pi op\n");
+ pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
+ LIN_SDT_PROBE0(futex, linux_sys_futex,
+ unimplemented_trylock_pi);
+ }
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
return (ENOSYS);
@@ -977,12 +1004,12 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
* Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
* FUTEX_REQUEUE returned EINVAL.
*/
- em = em_find(td->td_proc, EMUL_DONTLOCK);
- if ((em->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) {
linux_msg(td,
"linux_sys_futex: "
"unsupported futex_requeue op\n");
- em->flags |= LINUX_XDEPR_REQUEUEOP;
+ pem->flags |= LINUX_XDEPR_REQUEUEOP;
LIN_SDT_PROBE0(futex, linux_sys_futex,
deprecated_requeue);
}
@@ -992,21 +1019,29 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
case LINUX_FUTEX_WAIT_REQUEUE_PI:
/* not yet implemented */
- linux_msg(td,
- "linux_sys_futex: "
- "op FUTEX_WAIT_REQUEUE_PI not implemented\n");
- LIN_SDT_PROBE0(futex, linux_sys_futex,
- unimplemented_wait_requeue_pi);
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
+ linux_msg(td,
+ "linux_sys_futex: "
+ "unsupported futex_pi op\n");
+ pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
+ LIN_SDT_PROBE0(futex, linux_sys_futex,
+ unimplemented_wait_requeue_pi);
+ }
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
return (ENOSYS);
case LINUX_FUTEX_CMP_REQUEUE_PI:
/* not yet implemented */
- linux_msg(td,
- "linux_sys_futex: "
- "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n");
- LIN_SDT_PROBE0(futex, linux_sys_futex,
- unimplemented_cmp_requeue_pi);
+ pem = pem_find(td->td_proc);
+ if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) {
+ linux_msg(td,
+ "linux_sys_futex: "
+ "unsupported futex_pi op\n");
+ pem->flags |= LINUX_XUNSUP_FUTEXPIOP;
+ LIN_SDT_PROBE0(futex, linux_sys_futex,
+ unimplemented_cmp_requeue_pi);
+ }
LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS);
return (ENOSYS);
@@ -1036,9 +1071,8 @@ linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args
return (EINVAL);
}
- em = em_find(td->td_proc, EMUL_DOLOCK);
+ em = em_find(td);
em->robust_futexes = args->head;
- EMUL_UNLOCK(&emul_lock);
LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0);
return (0);
@@ -1050,29 +1084,36 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args
struct linux_emuldata *em;
struct linux_robust_list_head *head;
l_size_t len = sizeof(struct linux_robust_list_head);
+ struct thread *td2;
int error = 0;
LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args);
if (!args->pid) {
- em = em_find(td->td_proc, EMUL_DONTLOCK);
+ em = em_find(td);
+ KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
head = em->robust_futexes;
} else {
- struct proc *p;
-
- p = pfind(args->pid);
- if (p == NULL) {
+ td2 = tdfind(args->pid, -1);
+ if (td2 == NULL) {
LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
ESRCH);
return (ESRCH);
}
+ if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) {
+ LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
+ EPERM);
+ PROC_UNLOCK(td2->td_proc);
+ return (EPERM);
+ }
- em = em_find(p, EMUL_DONTLOCK);
+ em = em_find(td2);
+ KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n"));
/* XXX: ptrace? */
if (priv_check(td, PRIV_CRED_SETUID) ||
priv_check(td, PRIV_CRED_SETEUID) ||
- p_candebug(td, p)) {
- PROC_UNLOCK(p);
+ p_candebug(td, td2->td_proc)) {
+ PROC_UNLOCK(td2->td_proc);
LIN_SDT_PROBE1(futex, linux_get_robust_list, return,
EPERM);
@@ -1080,7 +1121,7 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args
}
head = em->robust_futexes;
- PROC_UNLOCK(p);
+ PROC_UNLOCK(td2->td_proc);
}
error = copyout(&len, args->len, sizeof(l_size_t));
@@ -1102,13 +1143,14 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args
}
static int
-handle_futex_death(struct proc *p, uint32_t *uaddr, unsigned int pi)
+handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr,
+ unsigned int pi)
{
uint32_t uval, nval, mval;
struct futex *f;
int error;
- LIN_SDT_PROBE3(futex, handle_futex_death, entry, p, uaddr, pi);
+ LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi);
retry:
error = copyin(uaddr, &uval, 4);
@@ -1117,7 +1159,7 @@ retry:
LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT);
return (EFAULT);
}
- if ((uval & FUTEX_TID_MASK) == p->p_pid) {
+ if ((uval & FUTEX_TID_MASK) == em->em_tid) {
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
nval = casuword32(uaddr, uval, mval);
@@ -1174,18 +1216,16 @@ fetch_robust_entry(struct linux_robust_list **entry,
/* This walks the list of robust futexes releasing them. */
void
-release_futexes(struct proc *p)
+release_futexes(struct thread *td, struct linux_emuldata *em)
{
struct linux_robust_list_head *head = NULL;
struct linux_robust_list *entry, *next_entry, *pending;
unsigned int limit = 2048, pi, next_pi, pip;
- struct linux_emuldata *em;
l_long futex_offset;
int rc, error;
- LIN_SDT_PROBE1(futex, release_futexes, entry, p);
+ LIN_SDT_PROBE2(futex, release_futexes, entry, td, em);
- em = em_find(p, EMUL_DONTLOCK);
head = em->robust_futexes;
if (head == NULL) {
@@ -1215,7 +1255,7 @@ release_futexes(struct proc *p)
rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi);
if (entry != pending)
- if (handle_futex_death(p,
+ if (handle_futex_death(em,
(uint32_t *)((caddr_t)entry + futex_offset), pi)) {
LIN_SDT_PROBE0(futex, release_futexes, return);
return;
@@ -1235,7 +1275,7 @@ release_futexes(struct proc *p)
}
if (pending)
- handle_futex_death(p, (uint32_t *)((caddr_t)pending + futex_offset), pip);
+ handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip);
LIN_SDT_PROBE0(futex, release_futexes, return);
}
diff --git a/sys/compat/linux/linux_futex.h b/sys/compat/linux/linux_futex.h
index 0990daa..7922743 100644
--- a/sys/compat/linux/linux_futex.h
+++ b/sys/compat/linux/linux_futex.h
@@ -76,6 +76,7 @@ extern struct mtx futex_mtx;
#define FUTEX_TID_MASK 0x3fffffff
#define FUTEX_BITSET_MATCH_ANY 0xffffffff
-void release_futexes(struct proc *);
+void release_futexes(struct thread *,
+ struct linux_emuldata *);
#endif /* !_LINUX_FUTEX_H */
diff --git a/sys/compat/linux/linux_getcwd.c b/sys/compat/linux/linux_getcwd.c
index 1c7080d..da1c726 100644
--- a/sys/compat/linux/linux_getcwd.c
+++ b/sys/compat/linux/linux_getcwd.c
@@ -186,7 +186,7 @@ linux_getcwd_scandir(lvpp, uvpp, bpp, bufp, td)
dirbuflen = DIRBLKSIZ;
if (dirbuflen < va.va_blocksize)
dirbuflen = va.va_blocksize;
- dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK);
+ dirbuf = malloc(dirbuflen, M_LINUX, M_WAITOK);
#if 0
unionread:
@@ -274,7 +274,7 @@ unionread:
out:
vput(lvp);
*lvpp = NULL;
- free(dirbuf, M_TEMP);
+ free(dirbuf, M_LINUX);
return error;
}
diff --git a/sys/compat/linux/linux_ioctl.c b/sys/compat/linux/linux_ioctl.c
index ab95e64..8858e2f 100644
--- a/sys/compat/linux/linux_ioctl.c
+++ b/sys/compat/linux/linux_ioctl.c
@@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
-#include <net/vnet.h>
#include <dev/usb/usb_ioctl.h>
@@ -95,9 +94,6 @@ __FBSDID("$FreeBSD$");
CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ);
-FEATURE(linuxulator_v4l, "V4L ioctl wrapper support in the linuxulator");
-FEATURE(linuxulator_v4l2, "V4L2 ioctl wrapper support in the linuxulator");
-
static linux_ioctl_function_t linux_ioctl_cdrom;
static linux_ioctl_function_t linux_ioctl_vfat;
static linux_ioctl_function_t linux_ioctl_console;
@@ -1980,8 +1976,6 @@ linux_ioctl_sound(struct thread *td, struct linux_ioctl_args *args)
* Console related ioctls
*/
-#define ISSIGVALID(sig) ((sig) > 0 && (sig) < NSIG)
-
static int
linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args)
{
@@ -2064,8 +2058,16 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args)
struct vt_mode mode;
if ((error = copyin((void *)args->arg, &mode, sizeof(mode))))
break;
- if (!ISSIGVALID(mode.frsig) && ISSIGVALID(mode.acqsig))
- mode.frsig = mode.acqsig;
+ if (LINUX_SIG_VALID(mode.relsig))
+ mode.relsig = linux_to_bsd_signal(mode.relsig);
+ else
+ mode.relsig = 0;
+ if (LINUX_SIG_VALID(mode.acqsig))
+ mode.acqsig = linux_to_bsd_signal(mode.acqsig);
+ else
+ mode.acqsig = 0;
+ /* XXX. Linux ignores frsig and set it to 0. */
+ mode.frsig = 0;
if ((error = copyout(&mode, (void *)args->arg, sizeof(mode))))
break;
args->cmd = VT_SETMODE;
@@ -2108,34 +2110,6 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args)
#define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER)
/*
- * Interface function used by linprocfs (at the time of writing). It's not
- * used by the Linuxulator itself.
- */
-int
-linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen)
-{
- struct ifnet *ifscan;
- int ethno;
-
- IFNET_RLOCK_ASSERT();
-
- /* Short-circuit non ethernet interfaces */
- if (!IFP_IS_ETH(ifp))
- return (strlcpy(buffer, ifp->if_xname, buflen));
-
- /* Determine the (relative) unit number for ethernet interfaces */
- ethno = 0;
- TAILQ_FOREACH(ifscan, &V_ifnet, if_link) {
- if (ifscan == ifp)
- return (snprintf(buffer, buflen, "eth%d", ethno));
- if (IFP_IS_ETH(ifscan))
- ethno++;
- }
-
- return (0);
-}
-
-/*
* Translate a Linux interface name to a FreeBSD interface name,
* and return the associated ifnet structure
* bsdname and lxname need to be least IFNAMSIZ bytes long, but
@@ -3621,9 +3595,16 @@ linux_ioctl(struct thread *td, struct linux_ioctl_args *args)
sx_sunlock(&linux_ioctl_sx);
fdrop(fp, td);
- linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented",
- args->fd, (int)(args->cmd & 0xffff),
- (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff));
+ switch (args->cmd & 0xffff) {
+ case LINUX_BTRFS_IOC_CLONE:
+ return (ENOTSUP);
+
+ default:
+ linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented",
+ args->fd, (int)(args->cmd & 0xffff),
+ (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff));
+ break;
+ }
return (EINVAL);
}
diff --git a/sys/compat/linux/linux_ioctl.h b/sys/compat/linux/linux_ioctl.h
index 3f63b21..873937d 100644
--- a/sys/compat/linux/linux_ioctl.h
+++ b/sys/compat/linux/linux_ioctl.h
@@ -581,13 +581,6 @@
#define LINUX_IOCTL_DRM_MAX 0x64ff
/*
- * This doesn't really belong here, but I can't think of a better
- * place to put it.
- */
-struct ifnet;
-int linux_ifname(struct ifnet *, char *, size_t);
-
-/*
* video
*/
#define LINUX_VIDIOCGCAP 0x7601
@@ -752,6 +745,12 @@ int linux_ifname(struct ifnet *, char *, size_t);
#define FBSD_LUSB_MIN 0xffdd
/*
+ * Linux btrfs clone operation
+ */
+#define LINUX_BTRFS_IOC_CLONE 0x9409 /* 0x40049409 */
+
+
+/*
* Pluggable ioctl handlers
*/
struct linux_ioctl_args;
diff --git a/sys/compat/linux/linux_ipc.c b/sys/compat/linux/linux_ipc.c
index 1237edc..7a92c6a 100644
--- a/sys/compat/linux/linux_ipc.c
+++ b/sys/compat/linux/linux_ipc.c
@@ -117,16 +117,6 @@ bsd_to_linux_shm_info( struct shm_info *bpp, struct l_shm_info *lpp)
lpp->swap_successes = bpp->swap_successes ;
}
-struct l_ipc_perm {
- l_key_t key;
- l_uid16_t uid;
- l_gid16_t gid;
- l_uid16_t cuid;
- l_gid16_t cgid;
- l_ushort mode;
- l_ushort seq;
-};
-
static void
linux_to_bsd_ipc_perm(struct l_ipc_perm *lpp, struct ipc_perm *bpp)
{
diff --git a/sys/compat/linux/linux_ipc.h b/sys/compat/linux/linux_ipc.h
index f1531ba..8e9c050 100644
--- a/sys/compat/linux/linux_ipc.h
+++ b/sys/compat/linux/linux_ipc.h
@@ -82,7 +82,7 @@
#define LINUX_IPC_64 0x0100 /* New version (support 32-bit UIDs, bigger
message sizes, etc. */
-#if defined(__i386__) || defined(__amd64__)
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
struct linux_msgctl_args
{
@@ -177,6 +177,6 @@ int linux_shmctl(struct thread *, struct linux_shmctl_args *);
int linux_shmdt(struct thread *, struct linux_shmdt_args *);
int linux_shmget(struct thread *, struct linux_shmget_args *);
-#endif /* __i386__ || __amd64__ */
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
#endif /* _LINUX_IPC_H_ */
diff --git a/sys/compat/linux/linux_mib.c b/sys/compat/linux/linux_mib.c
index cf64599..396344b 100644
--- a/sys/compat/linux/linux_mib.c
+++ b/sys/compat/linux/linux_mib.c
@@ -29,9 +29,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include "opt_compat.h"
-#include "opt_kdtrace.h"
-
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/sdt.h>
@@ -42,85 +39,11 @@ __FBSDID("$FreeBSD$");
#include <sys/mount.h>
#include <sys/jail.h>
#include <sys/lock.h>
-#include <sys/mutex.h>
#include <sys/sx.h>
-#ifdef COMPAT_LINUX32
-#include <machine/../linux32/linux.h>
-#else
-#include <machine/../linux/linux.h>
-#endif
-#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_misc.h>
-/* DTrace init */
-LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-
-/**
- * DTrace probes in this module.
- */
-LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osname, entry);
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, sysctl_string_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, return, "int");
-
-LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osrelease, entry);
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, sysctl_string_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, return, "int");
-LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_oss_version, entry);
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, sysctl_string_error,
- "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_map_osrel, entry, "char *", "int *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_map_osrel, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_get_prison, entry, "struct prison *",
- "struct prison **");
-LIN_SDT_PROBE_DEFINE1(mib, linux_get_prison, return, "struct linux_prison *");
-LIN_SDT_PROBE_DEFINE2(mib, linux_alloc_prison, entry, "struct prison *",
- "struct linux_prison **");
-LIN_SDT_PROBE_DEFINE1(mib, linux_alloc_prison, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_prison_create, entry, "void *", "void *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, vfs_copyopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_prison_check, entry, "void *", "void *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_copyopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_getopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_prison_set, entry, "void *", "void *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_copyopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_getopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_prison_get, entry, "void *", "void *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopt_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopts_error, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, return, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_prison_destructor, entry, "void *");
-LIN_SDT_PROBE_DEFINE0(mib, linux_prison_destructor, return);
-LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, entry);
-LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, return);
-LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, entry);
-LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, return);
-LIN_SDT_PROBE_DEFINE2(mib, linux_get_osname, entry, "struct thread *",
- "char *");
-LIN_SDT_PROBE_DEFINE0(mib, linux_get_osname, return);
-LIN_SDT_PROBE_DEFINE2(mib, linux_set_osname, entry, "struct thread *",
- "char *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_set_osname, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_get_osrelease, entry, "struct thread *",
- "char *");
-LIN_SDT_PROBE_DEFINE0(mib, linux_get_osrelease, return);
-LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, entry, "struct thread *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, return, "int");
-LIN_SDT_PROBE_DEFINE2(mib, linux_set_osrelease, entry, "struct thread *",
- "char *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_set_osrelease, return, "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, entry, "struct thread *");
-LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, return, "int");
-
-LIN_SDT_PROBE_DEFINE2(mib, linux_set_oss_version, entry, "struct thread *",
- "int");
-LIN_SDT_PROBE_DEFINE1(mib, linux_set_oss_version, return, "int");
-
struct linux_prison {
char pr_osname[LINUX_MAX_UTSNAME];
char pr_osrelease[LINUX_MAX_UTSNAME];
@@ -130,15 +53,14 @@ struct linux_prison {
static struct linux_prison lprison0 = {
.pr_osname = "Linux",
- .pr_osrelease = "2.6.18",
+ .pr_osrelease = LINUX_VERSION_STR,
.pr_oss_version = 0x030600,
- .pr_osrel = 2006018
+ .pr_osrel = LINUX_VERSION_CODE
};
static unsigned linux_osd_jail_slot;
-static SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0,
- "Linux mode");
+SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, "Linux mode");
static int linux_set_osname(struct thread *td, char *osname);
static int linux_set_osrelease(struct thread *td, char *osrelease);
@@ -150,19 +72,12 @@ linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
char osname[LINUX_MAX_UTSNAME];
int error;
- LIN_SDT_PROBE0(mib, linux_sysctl_osname, entry);
-
linux_get_osname(req->td, osname);
error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req);
- if (error != 0 || req->newptr == NULL) {
- LIN_SDT_PROBE1(mib, linux_sysctl_osname, sysctl_string_error,
- error);
- LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error);
+ if (error != 0 || req->newptr == NULL)
return (error);
- }
error = linux_set_osname(req->td, osname);
- LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error);
return (error);
}
@@ -177,19 +92,12 @@ linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
char osrelease[LINUX_MAX_UTSNAME];
int error;
- LIN_SDT_PROBE0(mib, linux_sysctl_osrelease, entry);
-
linux_get_osrelease(req->td, osrelease);
error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req);
- if (error != 0 || req->newptr == NULL) {
- LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, sysctl_string_error,
- error);
- LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error);
+ if (error != 0 || req->newptr == NULL)
return (error);
- }
error = linux_set_osrelease(req->td, osrelease);
- LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error);
return (error);
}
@@ -204,19 +112,12 @@ linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
int oss_version;
int error;
- LIN_SDT_PROBE0(mib, linux_sysctl_oss_version, entry);
-
oss_version = linux_get_oss_version(req->td);
error = sysctl_handle_int(oidp, &oss_version, 0, req);
- if (error != 0 || req->newptr == NULL) {
- LIN_SDT_PROBE1(mib, linux_sysctl_oss_version,
- sysctl_string_error, error);
- LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error);
+ if (error != 0 || req->newptr == NULL)
return (error);
- }
error = linux_set_oss_version(req->td, oss_version);
- LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error);
return (error);
}
@@ -234,37 +135,26 @@ linux_map_osrel(char *osrelease, int *osrel)
char *sep, *eosrelease;
int len, v0, v1, v2, v;
- LIN_SDT_PROBE2(mib, linux_map_osrel, entry, osrelease, osrel);
-
len = strlen(osrelease);
eosrelease = osrelease + len;
v0 = strtol(osrelease, &sep, 10);
- if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') {
- LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL);
+ if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
return (EINVAL);
- }
osrelease = sep + 1;
v1 = strtol(osrelease, &sep, 10);
- if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') {
- LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL);
+ if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.')
return (EINVAL);
- }
osrelease = sep + 1;
v2 = strtol(osrelease, &sep, 10);
- if (osrelease == sep || sep != eosrelease) {
- LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL);
+ if (osrelease == sep || sep != eosrelease)
return (EINVAL);
- }
v = v0 * 1000000 + v1 * 1000 + v2;
- if (v < 1000000) {
- LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL);
+ if (v < 1000000)
return (EINVAL);
- }
*osrel = v;
- LIN_SDT_PROBE1(mib, linux_map_osrel, return, 0);
return (0);
}
@@ -278,8 +168,6 @@ linux_find_prison(struct prison *spr, struct prison **prp)
struct prison *pr;
struct linux_prison *lpr;
- LIN_SDT_PROBE2(mib, linux_get_prison, entry, spr, prp);
-
if (!linux_osd_jail_slot)
/* In case osd_register failed. */
spr = &prison0;
@@ -294,7 +182,6 @@ linux_find_prison(struct prison *spr, struct prison **prp)
}
*prp = pr;
- LIN_SDT_PROBE1(mib, linux_get_prison, return, lpr);
return (lpr);
}
@@ -309,8 +196,6 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
struct linux_prison *lpr, *nlpr;
int error;
- LIN_SDT_PROBE2(mib, linux_alloc_prison, entry, pr, lprp);
-
/* If this prison already has Linux info, return that. */
error = 0;
lpr = linux_find_prison(pr, &ppr);
@@ -344,7 +229,6 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
else
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_alloc_prison, return, error);
return (error);
}
@@ -356,26 +240,16 @@ linux_prison_create(void *obj, void *data)
{
struct prison *pr = obj;
struct vfsoptlist *opts = data;
- int jsys, error;
-
- LIN_SDT_PROBE2(mib, linux_prison_create, entry, obj, data);
+ int jsys;
- error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_create, vfs_copyopt_error,
- error);
- } else if (jsys == JAIL_SYS_INHERIT) {
- LIN_SDT_PROBE1(mib, linux_prison_create, return, 0);
+ if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 &&
+ jsys == JAIL_SYS_INHERIT)
return (0);
- }
/*
* Inherit a prison's initial values from its parent
* (different from JAIL_SYS_INHERIT which also inherits changes).
*/
- error = linux_alloc_prison(pr, NULL);
-
- LIN_SDT_PROBE1(mib, linux_prison_create, return, error);
- return (error);
+ return (linux_alloc_prison(pr, NULL));
}
static int
@@ -385,80 +259,46 @@ linux_prison_check(void *obj __unused, void *data)
char *osname, *osrelease;
int error, jsys, len, osrel, oss_version;
- LIN_SDT_PROBE2(mib, linux_prison_check, entry, obj, data);
-
/* Check that the parameters are correct. */
error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error,
- error);
- }
if (error != ENOENT) {
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, error);
+ if (error != 0)
return (error);
- }
- if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL);
+ if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT)
return (EINVAL);
- }
}
error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error,
- error);
- }
if (error != ENOENT) {
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, error);
+ if (error != 0)
return (error);
- }
- if (len == 0 || osname[len - 1] != '\0') {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL);
+ if (len == 0 || osname[len - 1] != '\0')
return (EINVAL);
- }
if (len > LINUX_MAX_UTSNAME) {
vfs_opterror(opts, "linux.osname too long");
- LIN_SDT_PROBE1(mib, linux_prison_check, return,
- ENAMETOOLONG);
return (ENAMETOOLONG);
}
}
error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error,
- error);
- }
if (error != ENOENT) {
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, error);
+ if (error != 0)
return (error);
- }
- if (len == 0 || osrelease[len - 1] != '\0') {
- LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL);
+ if (len == 0 || osrelease[len - 1] != '\0')
return (EINVAL);
- }
if (len > LINUX_MAX_UTSNAME) {
vfs_opterror(opts, "linux.osrelease too long");
- LIN_SDT_PROBE1(mib, linux_prison_check, return,
- ENAMETOOLONG);
return (ENAMETOOLONG);
}
error = linux_map_osrel(osrelease, &osrel);
if (error != 0) {
vfs_opterror(opts, "linux.osrelease format error");
- LIN_SDT_PROBE1(mib, linux_prison_check, return, error);
return (error);
}
}
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
- if (error != 0)
- LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error, error);
if (error == ENOENT)
error = 0;
- LIN_SDT_PROBE1(mib, linux_prison_check, return, error);
return (error);
}
@@ -471,32 +311,22 @@ linux_prison_set(void *obj, void *data)
char *osname, *osrelease;
int error, gotversion, jsys, len, oss_version;
- LIN_SDT_PROBE2(mib, linux_prison_set, entry, obj, data);
-
/* Set the parameters, which should be correct. */
error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys));
- if (error != 0)
- LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error);
if (error == ENOENT)
jsys = -1;
error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len);
- if (error != 0)
- LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error);
if (error == ENOENT)
osname = NULL;
else
jsys = JAIL_SYS_NEW;
error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len);
- if (error != 0)
- LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error);
if (error == ENOENT)
osrelease = NULL;
else
jsys = JAIL_SYS_NEW;
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
- if (error != 0)
- LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error);
if (error == ENOENT)
gotversion = 0;
else {
@@ -518,15 +348,12 @@ linux_prison_set(void *obj, void *data)
error = linux_alloc_prison(pr, &lpr);
if (error) {
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_prison_set, return, error);
return (error);
}
if (osrelease) {
error = linux_map_osrel(osrelease, &lpr->pr_osrel);
if (error) {
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_prison_set, return,
- error);
return (error);
}
strlcpy(lpr->pr_osrelease, osrelease,
@@ -539,7 +366,6 @@ linux_prison_set(void *obj, void *data)
mtx_unlock(&pr->pr_mtx);
}
- LIN_SDT_PROBE1(mib, linux_prison_set, return, 0);
return (0);
}
@@ -562,74 +388,44 @@ linux_prison_get(void *obj, void *data)
static int version0;
- LIN_SDT_PROBE2(mib, linux_prison_get, entry, obj, data);
-
/* See if this prison is the one with the Linux info. */
lpr = linux_find_prison(pr, &ppr);
i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
error = vfs_setopt(opts, "linux", &i, sizeof(i));
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, error);
- if (error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
if (i) {
error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error,
- error);
- if (error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease);
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error,
- error);
- if (error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
error = vfs_setopt(opts, "linux.oss_version",
&lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error,
- error);
- if(error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
} else {
/*
* If this prison is inheriting its Linux info, report
* empty/zero parameters.
*/
error = vfs_setopts(opts, "linux.osname", "");
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error,
- error);
- if(error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
error = vfs_setopts(opts, "linux.osrelease", "");
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error,
- error);
- if(error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
error = vfs_setopt(opts, "linux.oss_version", &version0,
sizeof(lpr->pr_oss_version));
- if (error != 0) {
- LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error,
- error);
- if(error != ENOENT)
- goto done;
- }
+ if (error != 0 && error != ENOENT)
+ goto done;
}
error = 0;
done:
mtx_unlock(&ppr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_prison_get, return, error);
return (error);
}
@@ -637,9 +433,7 @@ static void
linux_prison_destructor(void *data)
{
- LIN_SDT_PROBE1(mib, linux_prison_destructor, entry, data);
free(data, M_PRISON);
- LIN_SDT_PROBE0(mib, linux_prison_destructor, return);
}
void
@@ -653,8 +447,6 @@ linux_osd_jail_register(void)
[PR_METHOD_CHECK] = linux_prison_check
};
- LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry);
-
linux_osd_jail_slot =
osd_jail_register(linux_prison_destructor, methods);
if (linux_osd_jail_slot > 0) {
@@ -664,20 +456,14 @@ linux_osd_jail_register(void)
(void)linux_alloc_prison(pr, NULL);
sx_xunlock(&allprison_lock);
}
-
- LIN_SDT_PROBE0(mib, linux_osd_jail_register, return);
}
void
linux_osd_jail_deregister(void)
{
- LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry);
-
if (linux_osd_jail_slot)
osd_jail_deregister(linux_osd_jail_slot);
-
- LIN_SDT_PROBE0(mib, linux_osd_jail_register, return);
}
void
@@ -686,13 +472,9 @@ linux_get_osname(struct thread *td, char *dst)
struct prison *pr;
struct linux_prison *lpr;
- LIN_SDT_PROBE2(mib, linux_get_osname, entry, td, dst);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
-
- LIN_SDT_PROBE0(mib, linux_get_osname, return);
}
static int
@@ -701,13 +483,10 @@ linux_set_osname(struct thread *td, char *osname)
struct prison *pr;
struct linux_prison *lpr;
- LIN_SDT_PROBE2(mib, linux_set_osname, entry, td, osname);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_set_osname, return, 0);
return (0);
}
@@ -717,13 +496,9 @@ linux_get_osrelease(struct thread *td, char *dst)
struct prison *pr;
struct linux_prison *lpr;
- LIN_SDT_PROBE2(mib, linux_get_osrelease, entry, td, dst);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
-
- LIN_SDT_PROBE0(mib, linux_get_osrelease, return);
}
int
@@ -733,13 +508,10 @@ linux_kernver(struct thread *td)
struct linux_prison *lpr;
int osrel;
- LIN_SDT_PROBE1(mib, linux_kernver, entry, td);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
osrel = lpr->pr_osrel;
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_kernver, return, osrel);
return (osrel);
}
@@ -750,15 +522,12 @@ linux_set_osrelease(struct thread *td, char *osrelease)
struct linux_prison *lpr;
int error;
- LIN_SDT_PROBE2(mib, linux_set_osrelease, entry, td, osrelease);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
error = linux_map_osrel(osrelease, &lpr->pr_osrel);
if (error == 0)
strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_set_osrelease, return, error);
return (error);
}
@@ -769,13 +538,10 @@ linux_get_oss_version(struct thread *td)
struct linux_prison *lpr;
int version;
- LIN_SDT_PROBE1(mib, linux_get_oss_version, entry, td);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
version = lpr->pr_oss_version;
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_get_oss_version, return, version);
return (version);
}
@@ -785,74 +551,9 @@ linux_set_oss_version(struct thread *td, int oss_version)
struct prison *pr;
struct linux_prison *lpr;
- LIN_SDT_PROBE2(mib, linux_set_oss_version, entry, td, oss_version);
-
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
lpr->pr_oss_version = oss_version;
mtx_unlock(&pr->pr_mtx);
- LIN_SDT_PROBE1(mib, linux_set_oss_version, return, 0);
return (0);
}
-
-#if defined(DEBUG) || defined(KTR)
-/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */
-
-u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))];
-
-static int
-linux_debug(int syscall, int toggle, int global)
-{
-
- if (global) {
- char c = toggle ? 0 : 0xff;
-
- memset(linux_debug_map, c, sizeof(linux_debug_map));
- return (0);
- }
- if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL)
- return (EINVAL);
- if (toggle)
- clrbit(linux_debug_map, syscall);
- else
- setbit(linux_debug_map, syscall);
- return (0);
-}
-
-/*
- * Usage: sysctl linux.debug=<syscall_nr>.<0/1>
- *
- * E.g.: sysctl linux.debug=21.0
- *
- * As a special case, syscall "all" will apply to all syscalls globally.
- */
-#define LINUX_MAX_DEBUGSTR 16
-static int
-linux_sysctl_debug(SYSCTL_HANDLER_ARGS)
-{
- char value[LINUX_MAX_DEBUGSTR], *p;
- int error, sysc, toggle;
- int global = 0;
-
- value[0] = '\0';
- error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req);
- if (error || req->newptr == NULL)
- return (error);
- for (p = value; *p != '\0' && *p != '.'; p++);
- if (*p == '\0')
- return (EINVAL);
- *p++ = '\0';
- sysc = strtol(value, NULL, 0);
- toggle = strtol(p, NULL, 0);
- if (strcmp(value, "all") == 0)
- global = 1;
- error = linux_debug(sysc, toggle, global);
- return (error);
-}
-
-SYSCTL_PROC(_compat_linux, OID_AUTO, debug,
- CTLTYPE_STRING | CTLFLAG_RW,
- 0, 0, linux_sysctl_debug, "A",
- "Linux debugging control");
-
-#endif /* DEBUG || KTR */
diff --git a/sys/compat/linux/linux_mib.h b/sys/compat/linux/linux_mib.h
index e8eedf9..80b6c97 100644
--- a/sys/compat/linux/linux_mib.h
+++ b/sys/compat/linux/linux_mib.h
@@ -31,6 +31,10 @@
#ifndef _LINUX_MIB_H_
#define _LINUX_MIB_H_
+#ifdef SYSCTL_DECL
+SYSCTL_DECL(_compat_linux);
+#endif
+
void linux_osd_jail_register(void);
void linux_osd_jail_deregister(void);
@@ -42,8 +46,19 @@ int linux_get_oss_version(struct thread *td);
int linux_kernver(struct thread *td);
-#define LINUX_KERNVER_2004000 2004000
-#define LINUX_KERNVER_2006000 2006000
+#define LINUX_KVERSION 2
+#define LINUX_KPATCHLEVEL 6
+#define LINUX_KSUBLEVEL 32
+
+#define LINUX_KERNVER(a,b,c) (((a) << 16) + ((b) << 8) + (c))
+#define LINUX_VERSION_CODE LINUX_KERNVER(LINUX_KVERSION, \
+ LINUX_KPATCHLEVEL, LINUX_KSUBLEVEL)
+#define LINUX_KERNVERSTR(x) #x
+#define LINUX_XKERNVERSTR(x) LINUX_KERNVERSTR(x)
+#define LINUX_VERSION_STR LINUX_XKERNVERSTR(LINUX_KVERSION.LINUX_KPATCHLEVEL.LINUX_KSUBLEVEL)
+
+#define LINUX_KERNVER_2004000 LINUX_KERNVER(2,4,0)
+#define LINUX_KERNVER_2006000 LINUX_KERNVER(2,6,0)
#define linux_use26(t) (linux_kernver(t) >= LINUX_KERNVER_2006000)
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index ac2384c..d87d786 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -89,21 +89,24 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_file.h>
#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_signal.h>
+#include <compat/linux/linux_timer.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_sysproto.h>
#include <compat/linux/linux_emul.h>
#include <compat/linux/linux_misc.h>
-/* DTrace init */
-LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-
-/* Linuxulator-global DTrace probes */
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
-LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock);
-LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock);
+/**
+ * Special DTrace provider for the linuxulator.
+ *
+ * In this file we define the provider for the entire linuxulator. All
+ * modules (= files of the linuxulator) use it.
+ *
+ * We define a different name depending on the emulated bitsize, see
+ * ../../<ARCH>/linux{,32}/linux.h, e.g.:
+ * native bitsize = linuxulator
+ * amd64, 32bit emulation = linuxulator32
+ */
+LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
int stclohz; /* Statistics clock frequency */
@@ -130,6 +133,15 @@ struct l_sysinfo {
l_uint mem_unit;
char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */
};
+
+struct l_pselect6arg {
+ l_uintptr_t ss;
+ l_size_t ss_len;
+};
+
+static int linux_utimensat_nsec_valid(l_long);
+
+
int
linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
{
@@ -524,7 +536,7 @@ linux_select(struct thread *td, struct linux_select_args *args)
tvp = NULL;
error = kern_select(td, args->nfds, args->readfds, args->writefds,
- args->exceptfds, tvp, sizeof(l_int) * 8);
+ args->exceptfds, tvp, LINUX_NFDBITS);
#ifdef DEBUG
if (ldebug(select))
@@ -691,9 +703,9 @@ linux_times(struct thread *td, struct linux_times_args *args)
if (args->buf != NULL) {
p = td->td_proc;
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &utime, &stime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
calccru(p, &cutime, &cstime);
PROC_UNLOCK(p);
@@ -739,12 +751,11 @@ linux_newuname(struct thread *td, struct linux_newuname_args *args)
*p = '\0';
break;
}
- strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME);
+ strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME);
return (copyout(&utsname, args->buf, sizeof(utsname)));
}
-#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
struct l_utimbuf {
l_time_t l_actime;
l_time_t l_modtime;
@@ -815,6 +826,98 @@ linux_utimes(struct thread *td, struct linux_utimes_args *args)
return (error);
}
+static int
+linux_utimensat_nsec_valid(l_long nsec)
+{
+
+ if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW)
+ return (0);
+ if (nsec >= 0 && nsec <= 999999999)
+ return (0);
+ return (1);
+}
+
+int
+linux_utimensat(struct thread *td, struct linux_utimensat_args *args)
+{
+ struct l_timespec l_times[2];
+ struct timespec times[2], *timesp = NULL;
+ char *path = NULL;
+ int error, dfd, flags = 0;
+
+ dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
+
+#ifdef DEBUG
+ if (ldebug(utimensat))
+ printf(ARGS(utimensat, "%d, *"), dfd);
+#endif
+
+ if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+
+ if (args->times != NULL) {
+ error = copyin(args->times, l_times, sizeof(l_times));
+ if (error != 0)
+ return (error);
+
+ if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 ||
+ linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0)
+ return (EINVAL);
+
+ times[0].tv_sec = l_times[0].tv_sec;
+ switch (l_times[0].tv_nsec)
+ {
+ case LINUX_UTIME_OMIT:
+ times[0].tv_nsec = UTIME_OMIT;
+ break;
+ case LINUX_UTIME_NOW:
+ times[0].tv_nsec = UTIME_NOW;
+ break;
+ default:
+ times[0].tv_nsec = l_times[0].tv_nsec;
+ }
+
+ times[1].tv_sec = l_times[1].tv_sec;
+ switch (l_times[1].tv_nsec)
+ {
+ case LINUX_UTIME_OMIT:
+ times[1].tv_nsec = UTIME_OMIT;
+ break;
+ case LINUX_UTIME_NOW:
+ times[1].tv_nsec = UTIME_NOW;
+ break;
+ default:
+ times[1].tv_nsec = l_times[1].tv_nsec;
+ break;
+ }
+ timesp = times;
+ }
+
+ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT)
+ /* This breaks POSIX, but is what the Linux kernel does
+ * _on purpose_ (documented in the man page for utimensat(2)),
+ * so we must follow that behaviour. */
+ return (0);
+
+ if (args->pathname != NULL)
+ LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
+ else if (args->flags != 0)
+ return (EINVAL);
+
+ if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW)
+ flags |= AT_SYMLINK_NOFOLLOW;
+
+ if (path == NULL)
+ error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE);
+ else {
+ error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp,
+ UIO_SYSSPACE, flags);
+ LFREEPATH(path);
+ }
+
+ return (error);
+}
+
int
linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
{
@@ -847,7 +950,6 @@ linux_futimesat(struct thread *td, struct linux_futimesat_args *args)
LFREEPATH(fname);
return (error);
}
-#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_common_wait(struct thread *td, int pid, int *status,
@@ -863,41 +965,131 @@ linux_common_wait(struct thread *td, int pid, int *status,
tmpstat &= 0xffff;
if (WIFSIGNALED(tmpstat))
tmpstat = (tmpstat & 0xffffff80) |
- BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat));
+ bsd_to_linux_signal(WTERMSIG(tmpstat));
else if (WIFSTOPPED(tmpstat))
tmpstat = (tmpstat & 0xffff00ff) |
- (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8);
+ (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8);
+ else if (WIFCONTINUED(tmpstat))
+ tmpstat = 0xffff;
error = copyout(&tmpstat, status, sizeof(int));
}
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_waitpid(struct thread *td, struct linux_waitpid_args *args)
{
- int options;
-
+ struct linux_wait4_args wait4_args;
+
#ifdef DEBUG
if (ldebug(waitpid))
printf(ARGS(waitpid, "%d, %p, %d"),
args->pid, (void *)args->status, args->options);
#endif
- /*
- * this is necessary because the test in kern_wait doesn't work
- * because we mess with the options here
- */
- if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE))
+
+ wait4_args.pid = args->pid;
+ wait4_args.status = args->status;
+ wait4_args.options = args->options;
+ wait4_args.rusage = NULL;
+
+ return (linux_wait4(td, &wait4_args));
+}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
+
+int
+linux_wait4(struct thread *td, struct linux_wait4_args *args)
+{
+ int error, options;
+ struct rusage ru, *rup;
+
+#ifdef DEBUG
+ if (ldebug(wait4))
+ printf(ARGS(wait4, "%d, %p, %d, %p"),
+ args->pid, (void *)args->status, args->options,
+ (void *)args->rusage);
+#endif
+ if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG |
+ LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL))
return (EINVAL);
-
- options = (args->options & (WNOHANG | WUNTRACED));
- /* WLINUXCLONE should be equal to __WCLONE, but we make sure */
- if (args->options & __WCLONE)
- options |= WLINUXCLONE;
- return (linux_common_wait(td, args->pid, args->status, options, NULL));
+ options = WEXITED;
+ linux_to_bsd_waitopts(args->options, &options);
+
+ if (args->rusage != NULL)
+ rup = &ru;
+ else
+ rup = NULL;
+ error = linux_common_wait(td, args->pid, args->status, options, rup);
+ if (error != 0)
+ return (error);
+ if (args->rusage != NULL)
+ error = linux_copyout_rusage(&ru, args->rusage);
+ return (error);
}
+int
+linux_waitid(struct thread *td, struct linux_waitid_args *args)
+{
+ int status, options, sig;
+ struct __wrusage wru;
+ siginfo_t siginfo;
+ l_siginfo_t lsi;
+ idtype_t idtype;
+ struct proc *p;
+ int error;
+
+ options = 0;
+ linux_to_bsd_waitopts(args->options, &options);
+
+ if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED))
+ return (EINVAL);
+ if (!(options & (WEXITED | WUNTRACED | WCONTINUED)))
+ return (EINVAL);
+
+ switch (args->idtype) {
+ case LINUX_P_ALL:
+ idtype = P_ALL;
+ break;
+ case LINUX_P_PID:
+ if (args->id <= 0)
+ return (EINVAL);
+ idtype = P_PID;
+ break;
+ case LINUX_P_PGID:
+ if (args->id <= 0)
+ return (EINVAL);
+ idtype = P_PGID;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ error = kern_wait6(td, idtype, args->id, &status, options,
+ &wru, &siginfo);
+ if (error != 0)
+ return (error);
+ if (args->rusage != NULL) {
+ error = linux_copyout_rusage(&wru.wru_children,
+ args->rusage);
+ if (error != 0)
+ return (error);
+ }
+ if (args->info != NULL) {
+ p = td->td_proc;
+ if (td->td_retval[0] == 0)
+ bzero(&lsi, sizeof(lsi));
+ else {
+ sig = bsd_to_linux_signal(siginfo.si_signo);
+ siginfo_to_lsiginfo(&siginfo, &lsi, sig);
+ }
+ error = copyout(&lsi, args->info, sizeof(lsi));
+ }
+ td->td_retval[0] = 0;
+
+ return (error);
+}
int
linux_mknod(struct thread *td, struct linux_mknod_args *args)
@@ -909,7 +1101,8 @@ linux_mknod(struct thread *td, struct linux_mknod_args *args)
#ifdef DEBUG
if (ldebug(mknod))
- printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev);
+ printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode,
+ (uintmax_t)args->dev);
#endif
switch (args->mode & S_IFMT) {
@@ -1079,6 +1272,7 @@ linux_getitimer(struct thread *td, struct linux_getitimer_args *uap)
return (copyout(&ls, uap->itv, sizeof(ls)));
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_nice(struct thread *td, struct linux_nice_args *args)
{
@@ -1089,6 +1283,7 @@ linux_nice(struct thread *td, struct linux_nice_args *args)
bsd_args.prio = args->inc;
return (sys_setpriority(td, &bsd_args));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
@@ -1102,7 +1297,7 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
ngrp = args->gidsetsize;
if (ngrp < 0 || ngrp >= ngroups_max + 1)
return (EINVAL);
- linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK);
+ linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t));
if (error)
goto out;
@@ -1141,7 +1336,7 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args)
crfree(oldcred);
error = 0;
out:
- free(linux_gidset, M_TEMP);
+ free(linux_gidset, M_LINUX);
return (error);
}
@@ -1173,14 +1368,14 @@ linux_getgroups(struct thread *td, struct linux_getgroups_args *args)
ngrp = 0;
linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
- M_TEMP, M_WAITOK);
+ M_LINUX, M_WAITOK);
while (ngrp < bsd_gidsetsz) {
linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
ngrp++;
}
error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t));
- free(linux_gidset, M_TEMP);
+ free(linux_gidset, M_LINUX);
if (error)
return (error);
@@ -1218,6 +1413,7 @@ linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args)
return (kern_setrlimit(td, which, &bsd_rlim));
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
{
@@ -1260,6 +1456,7 @@ linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args)
#endif
return (copyout(&rlim, args->rlim, sizeof(rlim)));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args)
@@ -1295,7 +1492,9 @@ int
linux_sched_setscheduler(struct thread *td,
struct linux_sched_setscheduler_args *args)
{
- struct sched_setscheduler_args bsd;
+ struct sched_param sched_param;
+ struct thread *tdt;
+ int error, policy;
#ifdef DEBUG
if (ldebug(sched_setscheduler))
@@ -1305,39 +1504,51 @@ linux_sched_setscheduler(struct thread *td,
switch (args->policy) {
case LINUX_SCHED_OTHER:
- bsd.policy = SCHED_OTHER;
+ policy = SCHED_OTHER;
break;
case LINUX_SCHED_FIFO:
- bsd.policy = SCHED_FIFO;
+ policy = SCHED_FIFO;
break;
case LINUX_SCHED_RR:
- bsd.policy = SCHED_RR;
+ policy = SCHED_RR;
break;
default:
return (EINVAL);
}
- bsd.pid = args->pid;
- bsd.param = (struct sched_param *)args->param;
- return (sys_sched_setscheduler(td, &bsd));
+ error = copyin(args->param, &sched_param, sizeof(sched_param));
+ if (error)
+ return (error);
+
+ tdt = linux_tdfind(td, args->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ error = kern_sched_setscheduler(td, tdt, policy, &sched_param);
+ PROC_UNLOCK(tdt->td_proc);
+ return (error);
}
int
linux_sched_getscheduler(struct thread *td,
struct linux_sched_getscheduler_args *args)
{
- struct sched_getscheduler_args bsd;
- int error;
+ struct thread *tdt;
+ int error, policy;
#ifdef DEBUG
if (ldebug(sched_getscheduler))
printf(ARGS(sched_getscheduler, "%d"), args->pid);
#endif
- bsd.pid = args->pid;
- error = sys_sched_getscheduler(td, &bsd);
+ tdt = linux_tdfind(td, args->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ error = kern_sched_getscheduler(td, tdt, &policy);
+ PROC_UNLOCK(tdt->td_proc);
- switch (td->td_retval[0]) {
+ switch (policy) {
case SCHED_OTHER:
td->td_retval[0] = LINUX_SCHED_OTHER;
break;
@@ -1348,7 +1559,6 @@ linux_sched_getscheduler(struct thread *td,
td->td_retval[0] = LINUX_SCHED_RR;
break;
}
-
return (error);
}
@@ -1474,20 +1684,12 @@ linux_reboot(struct thread *td, struct linux_reboot_args *args)
int
linux_getpid(struct thread *td, struct linux_getpid_args *args)
{
- struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(getpid))
printf(ARGS(getpid, ""));
#endif
-
- if (linux_use26(td)) {
- em = em_find(td->td_proc, EMUL_DONTLOCK);
- KASSERT(em != NULL, ("getpid: emuldata not found.\n"));
- td->td_retval[0] = em->shared->group_pid;
- } else {
- td->td_retval[0] = td->td_proc->p_pid;
- }
+ td->td_retval[0] = td->td_proc->p_pid;
return (0);
}
@@ -1495,13 +1697,18 @@ linux_getpid(struct thread *td, struct linux_getpid_args *args)
int
linux_gettid(struct thread *td, struct linux_gettid_args *args)
{
+ struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(gettid))
printf(ARGS(gettid, ""));
#endif
- td->td_retval[0] = td->td_proc->p_pid;
+ em = em_find(td);
+ KASSERT(em != NULL, ("gettid: emuldata not found.\n"));
+
+ td->td_retval[0] = em->em_tid;
+
return (0);
}
@@ -1509,50 +1716,15 @@ linux_gettid(struct thread *td, struct linux_gettid_args *args)
int
linux_getppid(struct thread *td, struct linux_getppid_args *args)
{
- struct linux_emuldata *em;
- struct proc *p, *pp;
#ifdef DEBUG
if (ldebug(getppid))
printf(ARGS(getppid, ""));
#endif
- if (!linux_use26(td)) {
- PROC_LOCK(td->td_proc);
- td->td_retval[0] = td->td_proc->p_pptr->p_pid;
- PROC_UNLOCK(td->td_proc);
- return (0);
- }
-
- em = em_find(td->td_proc, EMUL_DONTLOCK);
-
- KASSERT(em != NULL, ("getppid: process emuldata not found.\n"));
-
- /* find the group leader */
- p = pfind(em->shared->group_pid);
-
- if (p == NULL) {
-#ifdef DEBUG
- printf(LMSG("parent process not found.\n"));
-#endif
- return (0);
- }
-
- pp = p->p_pptr; /* switch to parent */
- PROC_LOCK(pp);
- PROC_UNLOCK(p);
-
- /* if its also linux process */
- if (pp->p_sysent == &elf_linux_sysvec) {
- em = em_find(pp, EMUL_DONTLOCK);
- KASSERT(em != NULL, ("getppid: parent emuldata not found.\n"));
-
- td->td_retval[0] = em->shared->group_pid;
- } else
- td->td_retval[0] = pp->p_pid;
-
- PROC_UNLOCK(pp);
-
+ PROC_LOCK(td->td_proc);
+ td->td_retval[0] = td->td_proc->p_pptr->p_pid;
+ PROC_UNLOCK(td->td_proc);
return (0);
}
@@ -1657,22 +1829,14 @@ linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args)
int
linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
{
- struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(exit_group))
printf(ARGS(exit_group, "%i"), args->error_code);
#endif
- em = em_find(td->td_proc, EMUL_DONTLOCK);
- if (em->shared->refs > 1) {
- EMUL_SHARED_WLOCK(&emul_shared_lock);
- em->shared->flags |= EMUL_SHARED_HASXSTAT;
- em->shared->xstat = W_EXITCODE(args->error_code, 0);
- EMUL_SHARED_WUNLOCK(&emul_shared_lock);
- if (linux_use26(td))
- linux_kill_threads(td, SIGKILL);
- }
+ LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid,
+ args->error_code);
/*
* XXX: we should send a signal to the parent if
@@ -1680,8 +1844,7 @@ linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
* as it doesnt occur often.
*/
exit1(td, W_EXITCODE(args->error_code, 0));
-
- return (0);
+ /* NOTREACHED */
}
#define _LINUX_CAPABILITY_VERSION 0x19980330
@@ -1789,24 +1952,23 @@ linux_prctl(struct thread *td, struct linux_prctl_args *args)
#ifdef DEBUG
if (ldebug(prctl))
- printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option,
- args->arg2, args->arg3, args->arg4, args->arg5);
+ printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option,
+ (uintmax_t)args->arg2, (uintmax_t)args->arg3,
+ (uintmax_t)args->arg4, (uintmax_t)args->arg5);
#endif
switch (args->option) {
case LINUX_PR_SET_PDEATHSIG:
if (!LINUX_SIG_VALID(args->arg2))
return (EINVAL);
- em = em_find(p, EMUL_DOLOCK);
+ em = em_find(td);
KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
em->pdeath_signal = args->arg2;
- EMUL_UNLOCK(&emul_lock);
break;
case LINUX_PR_GET_PDEATHSIG:
- em = em_find(p, EMUL_DOLOCK);
+ em = em_find(td);
KASSERT(em != NULL, ("prctl: emuldata not found.\n"));
pdeath_signal = em->pdeath_signal;
- EMUL_UNLOCK(&emul_lock);
error = copyout(&pdeath_signal,
(void *)(register_t)args->arg2,
sizeof(pdeath_signal));
@@ -1871,6 +2033,57 @@ linux_prctl(struct thread *td, struct linux_prctl_args *args)
return (error);
}
+int
+linux_sched_setparam(struct thread *td,
+ struct linux_sched_setparam_args *uap)
+{
+ struct sched_param sched_param;
+ struct thread *tdt;
+ int error;
+
+#ifdef DEBUG
+ if (ldebug(sched_setparam))
+ printf(ARGS(sched_setparam, "%d, *"), uap->pid);
+#endif
+
+ error = copyin(uap->param, &sched_param, sizeof(sched_param));
+ if (error)
+ return (error);
+
+ tdt = linux_tdfind(td, uap->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ error = kern_sched_setparam(td, tdt, &sched_param);
+ PROC_UNLOCK(tdt->td_proc);
+ return (error);
+}
+
+int
+linux_sched_getparam(struct thread *td,
+ struct linux_sched_getparam_args *uap)
+{
+ struct sched_param sched_param;
+ struct thread *tdt;
+ int error;
+
+#ifdef DEBUG
+ if (ldebug(sched_getparam))
+ printf(ARGS(sched_getparam, "%d, *"), uap->pid);
+#endif
+
+ tdt = linux_tdfind(td, uap->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ error = kern_sched_getparam(td, tdt, &sched_param);
+ PROC_UNLOCK(tdt->td_proc);
+ if (error == 0)
+ error = copyout(&sched_param, uap->param,
+ sizeof(sched_param));
+ return (error);
+}
+
/*
* Get affinity of a process.
*/
@@ -1879,6 +2092,7 @@ linux_sched_getaffinity(struct thread *td,
struct linux_sched_getaffinity_args *args)
{
int error;
+ struct thread *tdt;
struct cpuset_getaffinity_args cga;
#ifdef DEBUG
@@ -1889,9 +2103,14 @@ linux_sched_getaffinity(struct thread *td,
if (args->len < sizeof(cpuset_t))
return (EINVAL);
+ tdt = linux_tdfind(td, args->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ PROC_UNLOCK(tdt->td_proc);
cga.level = CPU_LEVEL_WHICH;
- cga.which = CPU_WHICH_PID;
- cga.id = args->pid;
+ cga.which = CPU_WHICH_TID;
+ cga.id = tdt->td_tid;
cga.cpusetsize = sizeof(cpuset_t);
cga.mask = (cpuset_t *) args->user_mask_ptr;
@@ -1909,6 +2128,7 @@ linux_sched_setaffinity(struct thread *td,
struct linux_sched_setaffinity_args *args)
{
struct cpuset_setaffinity_args csa;
+ struct thread *tdt;
#ifdef DEBUG
if (ldebug(sched_setaffinity))
@@ -1918,11 +2138,369 @@ linux_sched_setaffinity(struct thread *td,
if (args->len < sizeof(cpuset_t))
return (EINVAL);
+ tdt = linux_tdfind(td, args->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ PROC_UNLOCK(tdt->td_proc);
csa.level = CPU_LEVEL_WHICH;
- csa.which = CPU_WHICH_PID;
- csa.id = args->pid;
+ csa.which = CPU_WHICH_TID;
+ csa.id = tdt->td_tid;
csa.cpusetsize = sizeof(cpuset_t);
csa.mask = (cpuset_t *) args->user_mask_ptr;
return (sys_cpuset_setaffinity(td, &csa));
}
+
+struct linux_rlimit64 {
+ uint64_t rlim_cur;
+ uint64_t rlim_max;
+};
+
+int
+linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args)
+{
+ struct rlimit rlim, nrlim;
+ struct linux_rlimit64 lrlim;
+ struct proc *p;
+ u_int which;
+ int flags;
+ int error;
+
+#ifdef DEBUG
+ if (ldebug(prlimit64))
+ printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid,
+ args->resource, (void *)args->new, (void *)args->old);
+#endif
+
+ if (args->resource >= LINUX_RLIM_NLIMITS)
+ return (EINVAL);
+
+ which = linux_to_bsd_resource[args->resource];
+ if (which == -1)
+ return (EINVAL);
+
+ if (args->new != NULL) {
+ /*
+ * Note. Unlike FreeBSD where rlim is signed 64-bit Linux
+ * rlim is unsigned 64-bit. FreeBSD treats negative limits
+ * as INFINITY so we do not need a conversion even.
+ */
+ error = copyin(args->new, &nrlim, sizeof(nrlim));
+ if (error != 0)
+ return (error);
+ }
+
+ flags = PGET_HOLD | PGET_NOTWEXIT;
+ if (args->new != NULL)
+ flags |= PGET_CANDEBUG;
+ else
+ flags |= PGET_CANSEE;
+ error = pget(args->pid, flags, &p);
+ if (error != 0)
+ return (error);
+
+ if (args->old != NULL) {
+ PROC_LOCK(p);
+ lim_rlimit(p, which, &rlim);
+ PROC_UNLOCK(p);
+ if (rlim.rlim_cur == RLIM_INFINITY)
+ lrlim.rlim_cur = LINUX_RLIM_INFINITY;
+ else
+ lrlim.rlim_cur = rlim.rlim_cur;
+ if (rlim.rlim_max == RLIM_INFINITY)
+ lrlim.rlim_max = LINUX_RLIM_INFINITY;
+ else
+ lrlim.rlim_max = rlim.rlim_max;
+ error = copyout(&lrlim, args->old, sizeof(lrlim));
+ if (error != 0)
+ goto out;
+ }
+
+ if (args->new != NULL)
+ error = kern_proc_setrlimit(td, p, which, &nrlim);
+
+ out:
+ PRELE(p);
+ return (error);
+}
+
+int
+linux_pselect6(struct thread *td, struct linux_pselect6_args *args)
+{
+ struct timeval utv, tv0, tv1, *tvp;
+ struct l_pselect6arg lpse6;
+ struct l_timespec lts;
+ struct timespec uts;
+ l_sigset_t l_ss;
+ sigset_t *ssp;
+ sigset_t ss;
+ int error;
+
+ ssp = NULL;
+ if (args->sig != NULL) {
+ error = copyin(args->sig, &lpse6, sizeof(lpse6));
+ if (error != 0)
+ return (error);
+ if (lpse6.ss_len != sizeof(l_ss))
+ return (EINVAL);
+ if (lpse6.ss != 0) {
+ error = copyin(PTRIN(lpse6.ss), &l_ss,
+ sizeof(l_ss));
+ if (error != 0)
+ return (error);
+ linux_to_bsd_sigset(&l_ss, &ss);
+ ssp = &ss;
+ }
+ }
+
+ /*
+ * Currently glibc changes nanosecond number to microsecond.
+ * This mean losing precision but for now it is hardly seen.
+ */
+ if (args->tsp != NULL) {
+ error = copyin(args->tsp, &lts, sizeof(lts));
+ if (error != 0)
+ return (error);
+ error = linux_to_native_timespec(&uts, &lts);
+ if (error != 0)
+ return (error);
+
+ TIMESPEC_TO_TIMEVAL(&utv, &uts);
+ if (itimerfix(&utv))
+ return (EINVAL);
+
+ microtime(&tv0);
+ tvp = &utv;
+ } else
+ tvp = NULL;
+
+ error = kern_pselect(td, args->nfds, args->readfds, args->writefds,
+ args->exceptfds, tvp, ssp, LINUX_NFDBITS);
+
+ if (error == 0 && args->tsp != NULL) {
+ if (td->td_retval[0] != 0) {
+ /*
+ * Compute how much time was left of the timeout,
+ * by subtracting the current time and the time
+ * before we started the call, and subtracting
+ * that result from the user-supplied value.
+ */
+
+ microtime(&tv1);
+ timevalsub(&tv1, &tv0);
+ timevalsub(&utv, &tv1);
+ if (utv.tv_sec < 0)
+ timevalclear(&utv);
+ } else
+ timevalclear(&utv);
+
+ TIMEVAL_TO_TIMESPEC(&utv, &uts);
+
+ native_to_linux_timespec(&lts, &uts);
+ error = copyout(&lts, args->tsp, sizeof(lts));
+ }
+
+ return (error);
+}
+
+int
+linux_ppoll(struct thread *td, struct linux_ppoll_args *args)
+{
+ struct timespec ts0, ts1;
+ struct l_timespec lts;
+ struct timespec uts, *tsp;
+ l_sigset_t l_ss;
+ sigset_t *ssp;
+ sigset_t ss;
+ int error;
+
+ if (args->sset != NULL) {
+ if (args->ssize != sizeof(l_ss))
+ return (EINVAL);
+ error = copyin(args->sset, &l_ss, sizeof(l_ss));
+ if (error)
+ return (error);
+ linux_to_bsd_sigset(&l_ss, &ss);
+ ssp = &ss;
+ } else
+ ssp = NULL;
+ if (args->tsp != NULL) {
+ error = copyin(args->tsp, &lts, sizeof(lts));
+ if (error)
+ return (error);
+ error = linux_to_native_timespec(&uts, &lts);
+ if (error != 0)
+ return (error);
+
+ nanotime(&ts0);
+ tsp = &uts;
+ } else
+ tsp = NULL;
+
+ error = kern_poll(td, args->fds, args->nfds, tsp, ssp);
+
+ if (error == 0 && args->tsp != NULL) {
+ if (td->td_retval[0]) {
+ nanotime(&ts1);
+ timespecsub(&ts1, &ts0);
+ timespecsub(&uts, &ts1);
+ if (uts.tv_sec < 0)
+ timespecclear(&uts);
+ } else
+ timespecclear(&uts);
+
+ native_to_linux_timespec(&lts, &uts);
+ error = copyout(&lts, args->tsp, sizeof(lts));
+ }
+
+ return (error);
+}
+
+#if defined(DEBUG) || defined(KTR)
+/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */
+
+u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))];
+
+static int
+linux_debug(int syscall, int toggle, int global)
+{
+
+ if (global) {
+ char c = toggle ? 0 : 0xff;
+
+ memset(linux_debug_map, c, sizeof(linux_debug_map));
+ return (0);
+ }
+ if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL)
+ return (EINVAL);
+ if (toggle)
+ clrbit(linux_debug_map, syscall);
+ else
+ setbit(linux_debug_map, syscall);
+ return (0);
+}
+
+/*
+ * Usage: sysctl linux.debug=<syscall_nr>.<0/1>
+ *
+ * E.g.: sysctl linux.debug=21.0
+ *
+ * As a special case, syscall "all" will apply to all syscalls globally.
+ */
+#define LINUX_MAX_DEBUGSTR 16
+int
+linux_sysctl_debug(SYSCTL_HANDLER_ARGS)
+{
+ char value[LINUX_MAX_DEBUGSTR], *p;
+ int error, sysc, toggle;
+ int global = 0;
+
+ value[0] = '\0';
+ error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req);
+ if (error || req->newptr == NULL)
+ return (error);
+ for (p = value; *p != '\0' && *p != '.'; p++);
+ if (*p == '\0')
+ return (EINVAL);
+ *p++ = '\0';
+ sysc = strtol(value, NULL, 0);
+ toggle = strtol(p, NULL, 0);
+ if (strcmp(value, "all") == 0)
+ global = 1;
+ error = linux_debug(sysc, toggle, global);
+ return (error);
+}
+
+#endif /* DEBUG || KTR */
+
+int
+linux_sched_rr_get_interval(struct thread *td,
+ struct linux_sched_rr_get_interval_args *uap)
+{
+ struct timespec ts;
+ struct l_timespec lts;
+ struct thread *tdt;
+ int error;
+
+ /*
+ * According to man in case the invalid pid specified
+ * EINVAL should be returned.
+ */
+ if (uap->pid < 0)
+ return (EINVAL);
+
+ tdt = linux_tdfind(td, uap->pid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ error = kern_sched_rr_get_interval_td(td, tdt, &ts);
+ PROC_UNLOCK(tdt->td_proc);
+ if (error != 0)
+ return (error);
+ native_to_linux_timespec(&lts, &ts);
+ return (copyout(&lts, uap->interval, sizeof(lts)));
+}
+
+/*
+ * In case when the Linux thread is the initial thread in
+ * the thread group thread id is equal to the process id.
+ * Glibc depends on this magic (assert in pthread_getattr_np.c).
+ */
+struct thread *
+linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid)
+{
+ struct linux_emuldata *em;
+ struct thread *tdt;
+ struct proc *p;
+
+ tdt = NULL;
+ if (tid == 0 || tid == td->td_tid) {
+ tdt = td;
+ PROC_LOCK(tdt->td_proc);
+ } else if (tid > PID_MAX)
+ tdt = tdfind(tid, pid);
+ else {
+ /*
+ * Initial thread where the tid equal to the pid.
+ */
+ p = pfind(tid);
+ if (p != NULL) {
+ if (SV_PROC_ABI(p) != SV_ABI_LINUX) {
+ /*
+ * p is not a Linuxulator process.
+ */
+ PROC_UNLOCK(p);
+ return (NULL);
+ }
+ FOREACH_THREAD_IN_PROC(p, tdt) {
+ em = em_find(tdt);
+ if (tid == em->em_tid)
+ return (tdt);
+ }
+ PROC_UNLOCK(p);
+ }
+ return (NULL);
+ }
+
+ return (tdt);
+}
+
+void
+linux_to_bsd_waitopts(int options, int *bsdopts)
+{
+
+ if (options & LINUX_WNOHANG)
+ *bsdopts |= WNOHANG;
+ if (options & LINUX_WUNTRACED)
+ *bsdopts |= WUNTRACED;
+ if (options & LINUX_WEXITED)
+ *bsdopts |= WEXITED;
+ if (options & LINUX_WCONTINUED)
+ *bsdopts |= WCONTINUED;
+ if (options & LINUX_WNOWAIT)
+ *bsdopts |= WNOWAIT;
+
+ if (options & __WCLONE)
+ *bsdopts |= WLINUXCLONE;
+}
diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h
index 154d78f..f969c4d 100644
--- a/sys/compat/linux/linux_misc.h
+++ b/sys/compat/linux/linux_misc.h
@@ -31,6 +31,11 @@
#ifndef _LINUX_MISC_H_
#define _LINUX_MISC_H_
+#include <sys/sysctl.h>
+
+ /* bits per mask */
+#define LINUX_NFDBITS sizeof(l_fd_mask) * 8
+
/*
* Miscellaneous
*/
@@ -55,7 +60,7 @@
#define LINUX_MREMAP_MAYMOVE 1
#define LINUX_MREMAP_FIXED 2
-extern const char *linux_platform;
+extern const char *linux_kplatform;
/*
* Non-standard aux entry types used in Linux ELF binaries.
@@ -68,7 +73,12 @@ extern const char *linux_platform;
#define LINUX_AT_BASE_PLATFORM 24 /* string identifying real platform, may
* differ from AT_PLATFORM.
*/
+#define LINUX_AT_RANDOM 25 /* address of random bytes */
#define LINUX_AT_EXECFN 31 /* filename of program */
+#define LINUX_AT_SYSINFO 32 /* vsyscall */
+#define LINUX_AT_SYSINFO_EHDR 33 /* vdso header */
+
+#define LINUX_AT_RANDOM_LEN 16 /* size of random bytes */
/* Linux sets the i387 to extended precision. */
#if defined(__i386__) || defined(__amd64__)
@@ -88,10 +98,6 @@ extern const char *linux_platform;
#define LINUX_CLONE_CHILD_CLEARTID 0x00200000
#define LINUX_CLONE_CHILD_SETTID 0x01000000
-#define LINUX_THREADING_FLAGS \
- (LINUX_CLONE_VM | LINUX_CLONE_FS | LINUX_CLONE_FILES | \
- LINUX_CLONE_SIGHAND | LINUX_CLONE_THREAD)
-
/* Scheduling policies */
#define LINUX_SCHED_OTHER 0
#define LINUX_SCHED_FIFO 1
@@ -113,13 +119,37 @@ struct l_new_utsname {
#define LINUX_CLOCK_REALTIME_HR 4
#define LINUX_CLOCK_MONOTONIC_HR 5
+#define LINUX_UTIME_NOW 0x3FFFFFFF
+#define LINUX_UTIME_OMIT 0x3FFFFFFE
+
extern int stclohz;
-#define __WCLONE 0x80000000
+#define LINUX_WNOHANG 0x00000001
+#define LINUX_WUNTRACED 0x00000002
+#define LINUX_WSTOPPED LINUX_WUNTRACED
+#define LINUX_WEXITED 0x00000004
+#define LINUX_WCONTINUED 0x00000008
+#define LINUX_WNOWAIT 0x01000000
+
+
+#define __WNOTHREAD 0x20000000
+#define __WALL 0x40000000
+#define __WCLONE 0x80000000
+
+/* Linux waitid idtype */
+#define LINUX_P_ALL 0
+#define LINUX_P_PID 1
+#define LINUX_P_PGID 2
+
+#define LINUX_RLIM_INFINITY (~0UL)
int linux_common_wait(struct thread *td, int pid, int *status,
int options, struct rusage *ru);
+void linux_to_bsd_waitopts(int options, int *bsdopts);
int linux_set_upcall_kse(struct thread *td, register_t stack);
int linux_set_cloned_tls(struct thread *td, void *desc);
+struct thread *linux_tdfind(struct thread *, lwpid_t, pid_t);
+
+int linux_sysctl_debug(SYSCTL_HANDLER_ARGS);
#endif /* _LINUX_MISC_H_ */
diff --git a/sys/compat/linux/linux_signal.c b/sys/compat/linux/linux_signal.c
index 1c778f9..0ecf537 100644
--- a/sys/compat/linux/linux_signal.c
+++ b/sys/compat/linux/linux_signal.c
@@ -53,40 +53,12 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
+#include <compat/linux/linux_misc.h>
-void
-linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
-{
- int b, l;
-
- SIGEMPTYSET(*bss);
- bss->__bits[0] = lss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1);
- bss->__bits[1] = lss->__bits[1];
- for (l = 1; l <= LINUX_SIGTBLSZ; l++) {
- if (LINUX_SIGISMEMBER(*lss, l)) {
- b = linux_to_bsd_signal[_SIG_IDX(l)];
- if (b)
- SIGADDSET(*bss, b);
- }
- }
-}
+static int linux_do_tkill(struct thread *td, struct thread *tdt,
+ ksiginfo_t *ksi);
+static void sicode_to_lsicode(int si_code, int *lsi_code);
-void
-bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
-{
- int b, l;
-
- LINUX_SIGEMPTYSET(*lss);
- lss->__bits[0] = bss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1);
- lss->__bits[1] = bss->__bits[1];
- for (b = 1; b <= LINUX_SIGTBLSZ; b++) {
- if (SIGISMEMBER(*bss, b)) {
- l = bsd_to_linux_signal[_SIG_IDX(b)];
- if (l)
- LINUX_SIGADDSET(*lss, l);
- }
- }
-}
static void
linux_to_bsd_sigaction(l_sigaction_t *lsa, struct sigaction *bsa)
@@ -155,11 +127,7 @@ linux_do_sigaction(struct thread *td, int linux_sig, l_sigaction_t *linux_nsa,
linux_to_bsd_sigaction(linux_nsa, nsa);
} else
nsa = NULL;
-
- if (linux_sig <= LINUX_SIGTBLSZ)
- sig = linux_to_bsd_signal[_SIG_IDX(linux_sig)];
- else
- sig = linux_sig;
+ sig = linux_to_bsd_signal(linux_sig);
error = kern_sigaction(td, sig, nsa, osa, 0);
if (error)
@@ -171,7 +139,7 @@ linux_do_sigaction(struct thread *td, int linux_sig, l_sigaction_t *linux_nsa,
return (0);
}
-
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_signal(struct thread *td, struct linux_signal_args *args)
{
@@ -193,6 +161,7 @@ linux_signal(struct thread *td, struct linux_signal_args *args)
return (error);
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_rt_sigaction(struct thread *td, struct linux_rt_sigaction_args *args)
@@ -262,6 +231,7 @@ linux_do_sigprocmask(struct thread *td, int how, l_sigset_t *new,
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args)
{
@@ -279,7 +249,7 @@ linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args)
if (error)
return (error);
LINUX_SIGEMPTYSET(set);
- set.__bits[0] = mask;
+ set.__mask = mask;
}
error = linux_do_sigprocmask(td, args->how,
@@ -287,12 +257,13 @@ linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args)
args->omask ? &oset : NULL);
if (args->omask != NULL && !error) {
- mask = oset.__bits[0];
+ mask = oset.__mask;
error = copyout(&mask, args->omask, sizeof(l_osigset_t));
}
return (error);
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_rt_sigprocmask(struct thread *td, struct linux_rt_sigprocmask_args *args)
@@ -327,6 +298,7 @@ linux_rt_sigprocmask(struct thread *td, struct linux_rt_sigprocmask_args *args)
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
int
linux_sgetmask(struct thread *td, struct linux_sgetmask_args *args)
{
@@ -341,7 +313,7 @@ linux_sgetmask(struct thread *td, struct linux_sgetmask_args *args)
PROC_LOCK(p);
bsd_to_linux_sigset(&td->td_sigmask, &mask);
PROC_UNLOCK(p);
- td->td_retval[0] = mask.__bits[0];
+ td->td_retval[0] = mask.__mask;
return (0);
}
@@ -359,9 +331,9 @@ linux_ssetmask(struct thread *td, struct linux_ssetmask_args *args)
PROC_LOCK(p);
bsd_to_linux_sigset(&td->td_sigmask, &lset);
- td->td_retval[0] = lset.__bits[0];
+ td->td_retval[0] = lset.__mask;
LINUX_SIGEMPTYSET(lset);
- lset.__bits[0] = args->mask;
+ lset.__mask = args->mask;
linux_to_bsd_sigset(&lset, &bset);
td->td_sigmask = bset;
SIG_CANTMASK(td->td_sigmask);
@@ -370,9 +342,6 @@ linux_ssetmask(struct thread *td, struct linux_ssetmask_args *args)
return (0);
}
-/*
- * MPSAFE
- */
int
linux_sigpending(struct thread *td, struct linux_sigpending_args *args)
{
@@ -392,9 +361,10 @@ linux_sigpending(struct thread *td, struct linux_sigpending_args *args)
SIGSETAND(bset, td->td_sigmask);
PROC_UNLOCK(p);
bsd_to_linux_sigset(&bset, &lset);
- mask = lset.__bits[0];
+ mask = lset.__mask;
return (copyout(&mask, args->mask, sizeof(mask)));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
/*
* MPSAFE
@@ -458,8 +428,8 @@ linux_rt_sigtimedwait(struct thread *td,
#ifdef DEBUG
if (ldebug(rt_sigtimedwait))
printf(LMSG("linux_rt_sigtimedwait: "
- "incoming timeout (%d/%d)\n"),
- ltv.tv_sec, ltv.tv_usec);
+ "incoming timeout (%jd/%jd)\n"),
+ (intmax_t)ltv.tv_sec, (intmax_t)ltv.tv_usec);
#endif
tv.tv_sec = (long)ltv.tv_sec;
tv.tv_usec = (suseconds_t)ltv.tv_usec;
@@ -495,7 +465,7 @@ linux_rt_sigtimedwait(struct thread *td,
if (error)
return (error);
- sig = BSD_TO_LINUX_SIGNAL(info.ksi_signo);
+ sig = bsd_to_linux_signal(info.ksi_signo);
if (args->ptr) {
memset(&linfo, 0, sizeof(linfo));
@@ -527,66 +497,31 @@ linux_kill(struct thread *td, struct linux_kill_args *args)
if (!LINUX_SIG_VALID(args->signum) && args->signum != 0)
return (EINVAL);
- if (args->signum > 0 && args->signum <= LINUX_SIGTBLSZ)
- tmp.signum = linux_to_bsd_signal[_SIG_IDX(args->signum)];
+ if (args->signum > 0)
+ tmp.signum = linux_to_bsd_signal(args->signum);
else
- tmp.signum = args->signum;
+ tmp.signum = 0;
tmp.pid = args->pid;
return (sys_kill(td, &tmp));
}
static int
-linux_do_tkill(struct thread *td, l_int tgid, l_int pid, l_int signum)
+linux_do_tkill(struct thread *td, struct thread *tdt, ksiginfo_t *ksi)
{
- struct proc *proc = td->td_proc;
- struct linux_emuldata *em;
struct proc *p;
- ksiginfo_t ksi;
int error;
- AUDIT_ARG_SIGNUM(signum);
- AUDIT_ARG_PID(pid);
-
- /*
- * Allow signal 0 as a means to check for privileges
- */
- if (!LINUX_SIG_VALID(signum) && signum != 0)
- return (EINVAL);
-
- if (signum > 0 && signum <= LINUX_SIGTBLSZ)
- signum = linux_to_bsd_signal[_SIG_IDX(signum)];
-
- if ((p = pfind(pid)) == NULL) {
- if ((p = zpfind(pid)) == NULL)
- return (ESRCH);
- }
-
+ p = tdt->td_proc;
+ AUDIT_ARG_SIGNUM(ksi->ksi_signo);
+ AUDIT_ARG_PID(p->p_pid);
AUDIT_ARG_PROCESS(p);
- error = p_cansignal(td, p, signum);
- if (error != 0 || signum == 0)
- goto out;
-
- error = ESRCH;
- em = em_find(p, EMUL_DONTLOCK);
- if (em == NULL) {
-#ifdef DEBUG
- printf("emuldata not found in do_tkill.\n");
-#endif
+ error = p_cansignal(td, p, ksi->ksi_signo);
+ if (error != 0 || ksi->ksi_signo == 0)
goto out;
- }
- if (tgid > 0 && em->shared->group_pid != tgid)
- goto out;
-
- ksiginfo_init(&ksi);
- ksi.ksi_signo = signum;
- ksi.ksi_code = LINUX_SI_TKILL;
- ksi.ksi_errno = 0;
- ksi.ksi_pid = proc->p_pid;
- ksi.ksi_uid = proc->p_ucred->cr_ruid;
- error = pksignal(p, ksi.ksi_signo, &ksi);
+ tdksignal(tdt, ksi->ksi_signo, ksi);
out:
PROC_UNLOCK(p);
@@ -596,20 +531,53 @@ out:
int
linux_tgkill(struct thread *td, struct linux_tgkill_args *args)
{
+ struct thread *tdt;
+ ksiginfo_t ksi;
+ int sig;
#ifdef DEBUG
if (ldebug(tgkill))
- printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig);
+ printf(ARGS(tgkill, "%d, %d, %d"),
+ args->tgid, args->pid, args->sig);
#endif
+
if (args->pid <= 0 || args->tgid <=0)
return (EINVAL);
- return (linux_do_tkill(td, args->tgid, args->pid, args->sig));
+ /*
+ * Allow signal 0 as a means to check for privileges
+ */
+ if (!LINUX_SIG_VALID(args->sig) && args->sig != 0)
+ return (EINVAL);
+
+ if (args->sig > 0)
+ sig = linux_to_bsd_signal(args->sig);
+ else
+ sig = 0;
+
+ tdt = linux_tdfind(td, args->pid, args->tgid);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ ksiginfo_init(&ksi);
+ ksi.ksi_signo = sig;
+ ksi.ksi_code = SI_LWP;
+ ksi.ksi_errno = 0;
+ ksi.ksi_pid = td->td_proc->p_pid;
+ ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid;
+ return (linux_do_tkill(td, tdt, &ksi));
}
+/*
+ * Deprecated since 2.5.75. Replaced by tgkill().
+ */
int
linux_tkill(struct thread *td, struct linux_tkill_args *args)
{
+ struct thread *tdt;
+ ksiginfo_t ksi;
+ int sig;
+
#ifdef DEBUG
if (ldebug(tkill))
printf(ARGS(tkill, "%i, %i"), args->tid, args->sig);
@@ -617,40 +585,182 @@ linux_tkill(struct thread *td, struct linux_tkill_args *args)
if (args->tid <= 0)
return (EINVAL);
- return (linux_do_tkill(td, 0, args->tid, args->sig));
+ if (!LINUX_SIG_VALID(args->sig))
+ return (EINVAL);
+
+ sig = linux_to_bsd_signal(args->sig);
+
+ tdt = linux_tdfind(td, args->tid, -1);
+ if (tdt == NULL)
+ return (ESRCH);
+
+ ksiginfo_init(&ksi);
+ ksi.ksi_signo = sig;
+ ksi.ksi_code = SI_LWP;
+ ksi.ksi_errno = 0;
+ ksi.ksi_pid = td->td_proc->p_pid;
+ ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid;
+ return (linux_do_tkill(td, tdt, &ksi));
+}
+
+void
+ksiginfo_to_lsiginfo(const ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig)
+{
+
+ siginfo_to_lsiginfo(&ksi->ksi_info, lsi, sig);
+}
+
+static void
+sicode_to_lsicode(int si_code, int *lsi_code)
+{
+
+ switch (si_code) {
+ case SI_USER:
+ *lsi_code = LINUX_SI_USER;
+ break;
+ case SI_KERNEL:
+ *lsi_code = LINUX_SI_KERNEL;
+ break;
+ case SI_QUEUE:
+ *lsi_code = LINUX_SI_QUEUE;
+ break;
+ case SI_TIMER:
+ *lsi_code = LINUX_SI_TIMER;
+ break;
+ case SI_MESGQ:
+ *lsi_code = LINUX_SI_MESGQ;
+ break;
+ case SI_ASYNCIO:
+ *lsi_code = LINUX_SI_ASYNCIO;
+ break;
+ case SI_LWP:
+ *lsi_code = LINUX_SI_TKILL;
+ break;
+ default:
+ *lsi_code = si_code;
+ break;
+ }
}
void
-ksiginfo_to_lsiginfo(ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig)
+siginfo_to_lsiginfo(const siginfo_t *si, l_siginfo_t *lsi, l_int sig)
{
+ /* sig alredy converted */
lsi->lsi_signo = sig;
- lsi->lsi_code = ksi->ksi_code;
+ sicode_to_lsicode(si->si_code, &lsi->lsi_code);
- switch (sig) {
- case LINUX_SIGPOLL:
- /* XXX si_fd? */
- lsi->lsi_band = ksi->ksi_band;
+ switch (si->si_code) {
+ case SI_LWP:
+ lsi->lsi_pid = si->si_pid;
+ lsi->lsi_uid = si->si_uid;
break;
- case LINUX_SIGCHLD:
- lsi->lsi_pid = ksi->ksi_pid;
- lsi->lsi_uid = ksi->ksi_uid;
- lsi->lsi_status = ksi->ksi_status;
+
+ case SI_TIMER:
+ lsi->lsi_int = si->si_value.sival_int;
+ lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr);
+ lsi->lsi_tid = si->si_timerid;
break;
- case LINUX_SIGBUS:
- case LINUX_SIGILL:
- case LINUX_SIGFPE:
- case LINUX_SIGSEGV:
- lsi->lsi_addr = PTROUT(ksi->ksi_addr);
+
+ case SI_QUEUE:
+ lsi->lsi_pid = si->si_pid;
+ lsi->lsi_uid = si->si_uid;
+ lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr);
break;
+
+ case SI_ASYNCIO:
+ lsi->lsi_int = si->si_value.sival_int;
+ lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr);
+ break;
+
default:
- /* XXX SI_TIMER etc... */
- lsi->lsi_pid = ksi->ksi_pid;
- lsi->lsi_uid = ksi->ksi_uid;
+ switch (sig) {
+ case LINUX_SIGPOLL:
+ /* XXX si_fd? */
+ lsi->lsi_band = si->si_band;
+ break;
+
+ case LINUX_SIGCHLD:
+ lsi->lsi_errno = 0;
+ lsi->lsi_pid = si->si_pid;
+ lsi->lsi_uid = si->si_uid;
+
+ if (si->si_code == CLD_STOPPED)
+ lsi->lsi_status = bsd_to_linux_signal(si->si_status);
+ else if (si->si_code == CLD_CONTINUED)
+ lsi->lsi_status = bsd_to_linux_signal(SIGCONT);
+ else
+ lsi->lsi_status = si->si_status;
+ break;
+
+ case LINUX_SIGBUS:
+ case LINUX_SIGILL:
+ case LINUX_SIGFPE:
+ case LINUX_SIGSEGV:
+ lsi->lsi_addr = PTROUT(si->si_addr);
+ break;
+
+ default:
+ lsi->lsi_pid = si->si_pid;
+ lsi->lsi_uid = si->si_uid;
+ if (sig >= LINUX_SIGRTMIN) {
+ lsi->lsi_int = si->si_value.sival_int;
+ lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr);
+ }
+ break;
+ }
break;
}
- if (sig >= LINUX_SIGRTMIN) {
- lsi->lsi_int = ksi->ksi_info.si_value.sival_int;
- lsi->lsi_ptr = PTROUT(ksi->ksi_info.si_value.sival_ptr);
+}
+
+void
+lsiginfo_to_ksiginfo(const l_siginfo_t *lsi, ksiginfo_t *ksi, int sig)
+{
+
+ ksi->ksi_signo = sig;
+ ksi->ksi_code = lsi->lsi_code; /* XXX. Convert. */
+ ksi->ksi_pid = lsi->lsi_pid;
+ ksi->ksi_uid = lsi->lsi_uid;
+ ksi->ksi_status = lsi->lsi_status;
+ ksi->ksi_addr = PTRIN(lsi->lsi_addr);
+ ksi->ksi_info.si_value.sival_int = lsi->lsi_int;
+}
+
+int
+linux_rt_sigqueueinfo(struct thread *td, struct linux_rt_sigqueueinfo_args *args)
+{
+ l_siginfo_t linfo;
+ struct proc *p;
+ ksiginfo_t ksi;
+ int error;
+ int sig;
+
+ if (!LINUX_SIG_VALID(args->sig))
+ return (EINVAL);
+
+ error = copyin(args->info, &linfo, sizeof(linfo));
+ if (error != 0)
+ return (error);
+
+ if (linfo.lsi_code >= 0)
+ return (EPERM);
+
+ sig = linux_to_bsd_signal(args->sig);
+
+ error = ESRCH;
+ if ((p = pfind(args->pid)) != NULL ||
+ (p = zpfind(args->pid)) != NULL) {
+ error = p_cansignal(td, p, sig);
+ if (error != 0) {
+ PROC_UNLOCK(p);
+ return (error);
+ }
+
+ ksiginfo_init(&ksi);
+ lsiginfo_to_ksiginfo(&linfo, &ksi, sig);
+ error = tdsendsignal(p, NULL, sig, &ksi);
+ PROC_UNLOCK(p);
}
+
+ return (error);
}
diff --git a/sys/compat/linux/linux_signal.h b/sys/compat/linux/linux_signal.h
index 426cf43..510bfb3 100644
--- a/sys/compat/linux/linux_signal.h
+++ b/sys/compat/linux/linux_signal.h
@@ -31,19 +31,21 @@
#ifndef _LINUX_SIGNAL_H_
#define _LINUX_SIGNAL_H_
-#define LINUX_SI_TKILL -6;
-
-extern int bsd_to_linux_signal[];
-extern int linux_to_bsd_signal[];
+/*
+ * si_code values
+ */
+#define LINUX_SI_USER 0 /* sent by kill, sigsend, raise */
+#define LINUX_SI_KERNEL 0x80 /* sent by the kernel from somewhere */
+#define LINUX_SI_QUEUE -1 /* sent by sigqueue */
+#define LINUX_SI_TIMER -2 /* sent by timer expiration */
+#define LINUX_SI_MESGQ -3 /* sent by real time mesq state change */
+#define LINUX_SI_ASYNCIO -4 /* sent by AIO completion */
+#define LINUX_SI_SIGIO -5 /* sent by queued SIGIO */
+#define LINUX_SI_TKILL -6 /* sent by tkill system call */
-void linux_to_bsd_sigset(l_sigset_t *, sigset_t *);
-void bsd_to_linux_sigset(sigset_t *, l_sigset_t *);
int linux_do_sigaction(struct thread *, int, l_sigaction_t *, l_sigaction_t *);
-void ksiginfo_to_lsiginfo(ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig);
-
-#define LINUX_SIG_VALID(sig) ((sig) <= LINUX_NSIG && (sig) > 0)
-
-#define BSD_TO_LINUX_SIGNAL(sig) \
- (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
+void ksiginfo_to_lsiginfo(const ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig);
+void siginfo_to_lsiginfo(const siginfo_t *si, l_siginfo_t *lsi, l_int sig);
+void lsiginfo_to_ksiginfo(const l_siginfo_t *lsi, ksiginfo_t *ksi, int sig);
#endif /* _LINUX_SIGNAL_H_ */
diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index 4b07b5c..34af9da 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -70,10 +70,17 @@ __FBSDID("$FreeBSD$");
#include <machine/../linux/linux.h>
#include <machine/../linux/linux_proto.h>
#endif
+#include <compat/linux/linux_file.h>
#include <compat/linux/linux_socket.h>
+#include <compat/linux/linux_timer.h>
#include <compat/linux/linux_util.h>
static int linux_to_bsd_domain(int);
+static int linux_sendmsg_common(struct thread *, l_int, struct l_msghdr *,
+ l_uint);
+static int linux_recvmsg_common(struct thread *, l_int, struct l_msghdr *,
+ l_uint, struct msghdr *);
+static int linux_set_socket_flags(int, int *);
/*
* Reads a linux sockaddr and does any necessary translation.
@@ -428,7 +435,6 @@ linux_to_bsd_sockaddr(struct sockaddr *arg, int len)
return (error);
}
-
static int
linux_sa_put(struct osockaddr *osa)
{
@@ -477,6 +483,8 @@ bsd_to_linux_cmsg_type(int cmsg_type)
return (LINUX_SCM_RIGHTS);
case SCM_CREDS:
return (LINUX_SCM_CREDENTIALS);
+ case SCM_TIMESTAMP:
+ return (LINUX_SCM_TIMESTAMP);
}
return (-1);
}
@@ -529,20 +537,15 @@ bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr)
}
static int
-linux_set_socket_flags(struct thread *td, int s, int flags)
+linux_set_socket_flags(int lflags, int *flags)
{
- int error;
- if (flags & LINUX_SOCK_NONBLOCK) {
- error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK);
- if (error)
- return (error);
- }
- if (flags & LINUX_SOCK_CLOEXEC) {
- error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC);
- if (error)
- return (error);
- }
+ if (lflags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
+ return (EINVAL);
+ if (lflags & LINUX_SOCK_NONBLOCK)
+ *flags |= SOCK_NONBLOCK;
+ if (lflags & LINUX_SOCK_CLOEXEC)
+ *flags |= SOCK_CLOEXEC;
return (0);
}
@@ -585,15 +588,6 @@ linux_check_hdrincl(struct thread *td, int s)
return (optval == 0);
}
-struct linux_sendto_args {
- int s;
- l_uintptr_t msg;
- int len;
- int flags;
- l_uintptr_t to;
- int tolen;
-};
-
/*
* Updated sendto() when IP_HDRINCL is set:
* tweak endian-dependent fields in the IP packet.
@@ -618,7 +612,7 @@ linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
linux_args->len > IP_MAXPACKET)
return (EINVAL);
- packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK);
+ packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK);
/* Make kernel copy of the packet to be sent */
if ((error = copyin(PTRIN(linux_args->msg), packet,
@@ -641,17 +635,11 @@ linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args)
error = linux_sendit(td, linux_args->s, &msg, linux_args->flags,
NULL, UIO_SYSSPACE);
goout:
- free(packet, M_TEMP);
+ free(packet, M_LINUX);
return (error);
}
-struct linux_socket_args {
- int domain;
- int type;
- int protocol;
-};
-
-static int
+int
linux_socket(struct thread *td, struct linux_socket_args *args)
{
struct socket_args /* {
@@ -659,15 +647,16 @@ linux_socket(struct thread *td, struct linux_socket_args *args)
int type;
int protocol;
} */ bsd_args;
- int retval_socket, socket_flags;
+ int retval_socket;
bsd_args.protocol = args->protocol;
- socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
- if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
- return (EINVAL);
bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
return (EINVAL);
+ retval_socket = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK,
+ &bsd_args.type);
+ if (retval_socket != 0)
+ return (retval_socket);
bsd_args.domain = linux_to_bsd_domain(args->domain);
if (bsd_args.domain == -1)
return (EAFNOSUPPORT);
@@ -676,13 +665,6 @@ linux_socket(struct thread *td, struct linux_socket_args *args)
if (retval_socket)
return (retval_socket);
- retval_socket = linux_set_socket_flags(td, td->td_retval[0],
- socket_flags);
- if (retval_socket) {
- (void)kern_close(td, td->td_retval[0]);
- goto out;
- }
-
if (bsd_args.type == SOCK_RAW
&& (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
&& bsd_args.domain == PF_INET) {
@@ -711,17 +693,10 @@ linux_socket(struct thread *td, struct linux_socket_args *args)
}
#endif
-out:
return (retval_socket);
}
-struct linux_bind_args {
- int s;
- l_uintptr_t name;
- int namelen;
-};
-
-static int
+int
linux_bind(struct thread *td, struct linux_bind_args *args)
{
struct sockaddr *sa;
@@ -739,13 +714,6 @@ linux_bind(struct thread *td, struct linux_bind_args *args)
return (error);
}
-struct linux_connect_args {
- int s;
- l_uintptr_t name;
- int namelen;
-};
-int linux_connect(struct thread *, struct linux_connect_args *);
-
int
linux_connect(struct thread *td, struct linux_connect_args *args)
{
@@ -790,12 +758,7 @@ linux_connect(struct thread *td, struct linux_connect_args *args)
return (error);
}
-struct linux_listen_args {
- int s;
- int backlog;
-};
-
-static int
+int
linux_listen(struct thread *td, struct linux_listen_args *args)
{
struct listen_args /* {
@@ -812,43 +775,31 @@ static int
linux_accept_common(struct thread *td, int s, l_uintptr_t addr,
l_uintptr_t namelen, int flags)
{
- struct accept_args /* {
+ struct accept4_args /* {
int s;
struct sockaddr * __restrict name;
socklen_t * __restrict anamelen;
+ int flags;
} */ bsd_args;
int error;
- if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
- return (EINVAL);
-
bsd_args.s = s;
/* XXX: */
bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr);
bsd_args.anamelen = PTRIN(namelen);/* XXX */
- error = sys_accept(td, &bsd_args);
+ bsd_args.flags = 0;
+ error = linux_set_socket_flags(flags, &bsd_args.flags);
+ if (error != 0)
+ return (error);
+ error = sys_accept4(td, &bsd_args);
bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name);
if (error) {
if (error == EFAULT && namelen != sizeof(struct sockaddr_in))
return (EINVAL);
return (error);
}
-
- /*
- * linux appears not to copy flags from the parent socket to the
- * accepted one, so we must clear the flags in the new descriptor
- * and apply the requested flags.
- */
- error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0);
- if (error)
- goto out;
- error = linux_set_socket_flags(td, td->td_retval[0], flags);
- if (error)
- goto out;
if (addr)
error = linux_sa_put(PTRIN(addr));
-
-out:
if (error) {
(void)kern_close(td, td->td_retval[0]);
td->td_retval[0] = 0;
@@ -856,13 +807,7 @@ out:
return (error);
}
-struct linux_accept_args {
- int s;
- l_uintptr_t addr;
- l_uintptr_t namelen;
-};
-
-static int
+int
linux_accept(struct thread *td, struct linux_accept_args *args)
{
@@ -870,14 +815,7 @@ linux_accept(struct thread *td, struct linux_accept_args *args)
args->namelen, 0));
}
-struct linux_accept4_args {
- int s;
- l_uintptr_t addr;
- l_uintptr_t namelen;
- int flags;
-};
-
-static int
+int
linux_accept4(struct thread *td, struct linux_accept4_args *args)
{
@@ -885,13 +823,7 @@ linux_accept4(struct thread *td, struct linux_accept4_args *args)
args->namelen, args->flags));
}
-struct linux_getsockname_args {
- int s;
- l_uintptr_t addr;
- l_uintptr_t namelen;
-};
-
-static int
+int
linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
{
struct getsockname_args /* {
@@ -915,13 +847,7 @@ linux_getsockname(struct thread *td, struct linux_getsockname_args *args)
return (0);
}
-struct linux_getpeername_args {
- int s;
- l_uintptr_t addr;
- l_uintptr_t namelen;
-};
-
-static int
+int
linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
{
struct getpeername_args /* {
@@ -944,14 +870,7 @@ linux_getpeername(struct thread *td, struct linux_getpeername_args *args)
return (0);
}
-struct linux_socketpair_args {
- int domain;
- int type;
- int protocol;
- l_uintptr_t rsv;
-};
-
-static int
+int
linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
{
struct socketpair_args /* {
@@ -960,20 +879,18 @@ linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
int protocol;
int *rsv;
} */ bsd_args;
- int error, socket_flags;
- int sv[2];
+ int error;
bsd_args.domain = linux_to_bsd_domain(args->domain);
if (bsd_args.domain != PF_LOCAL)
return (EAFNOSUPPORT);
-
- socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK;
- if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK))
- return (EINVAL);
bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK;
if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX)
return (EINVAL);
-
+ error = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK,
+ &bsd_args.type);
+ if (error != 0)
+ return (error);
if (args->protocol != 0 && args->protocol != PF_UNIX)
/*
@@ -986,27 +903,10 @@ linux_socketpair(struct thread *td, struct linux_socketpair_args *args)
else
bsd_args.protocol = 0;
bsd_args.rsv = (int *)PTRIN(args->rsv);
- error = kern_socketpair(td, bsd_args.domain, bsd_args.type,
- bsd_args.protocol, sv);
- if (error)
- return (error);
- error = linux_set_socket_flags(td, sv[0], socket_flags);
- if (error)
- goto out;
- error = linux_set_socket_flags(td, sv[1], socket_flags);
- if (error)
- goto out;
-
- error = copyout(sv, bsd_args.rsv, 2 * sizeof(int));
-
-out:
- if (error) {
- (void)kern_close(td, sv[0]);
- (void)kern_close(td, sv[1]);
- }
- return (error);
+ return (sys_socketpair(td, &bsd_args));
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
struct linux_send_args {
int s;
l_uintptr_t msg;
@@ -1062,8 +962,9 @@ linux_recv(struct thread *td, struct linux_recv_args *args)
bsd_args.fromlenaddr = 0;
return (sys_recvfrom(td, &bsd_args));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
-static int
+int
linux_sendto(struct thread *td, struct linux_sendto_args *args)
{
struct msghdr msg;
@@ -1087,63 +988,58 @@ linux_sendto(struct thread *td, struct linux_sendto_args *args)
return (error);
}
-struct linux_recvfrom_args {
- int s;
- l_uintptr_t buf;
- int len;
- int flags;
- l_uintptr_t from;
- l_uintptr_t fromlen;
-};
-
-static int
+int
linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args)
{
- struct recvfrom_args /* {
- int s;
- caddr_t buf;
- size_t len;
- int flags;
- struct sockaddr * __restrict from;
- socklen_t * __restrict fromlenaddr;
- } */ bsd_args;
- size_t len;
+ struct msghdr msg;
+ struct iovec aiov;
int error;
- if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t))))
- return (error);
+ if (PTRIN(args->fromlen) != NULL) {
+ error = copyin(PTRIN(args->fromlen), &msg.msg_namelen,
+ sizeof(msg.msg_namelen));
+ if (error != 0)
+ return (error);
- bsd_args.s = args->s;
- bsd_args.buf = PTRIN(args->buf);
- bsd_args.len = args->len;
- bsd_args.flags = linux_to_bsd_msg_flags(args->flags);
- /* XXX: */
- bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from);
- bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */
-
- linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len);
- error = sys_recvfrom(td, &bsd_args);
- bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from);
-
- if (error)
+ error = linux_to_bsd_sockaddr((struct sockaddr *)PTRIN(args->from),
+ msg.msg_namelen);
+ if (error != 0)
+ return (error);
+ } else
+ msg.msg_namelen = 0;
+
+ msg.msg_name = (struct sockaddr * __restrict)PTRIN(args->from);
+ msg.msg_iov = &aiov;
+ msg.msg_iovlen = 1;
+ aiov.iov_base = PTRIN(args->buf);
+ aiov.iov_len = args->len;
+ msg.msg_control = 0;
+ msg.msg_flags = linux_to_bsd_msg_flags(args->flags);
+
+ error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, NULL);
+ if (error != 0)
return (error);
- if (args->from) {
- error = linux_sa_put((struct osockaddr *)
+
+ if (PTRIN(args->from) != NULL) {
+ error = bsd_to_linux_sockaddr((struct sockaddr *)
PTRIN(args->from));
- if (error)
+ if (error != 0)
return (error);
+
+ error = linux_sa_put((struct osockaddr *)
+ PTRIN(args->from));
}
- return (0);
-}
-struct linux_sendmsg_args {
- int s;
- l_uintptr_t msg;
- int flags;
-};
+ if (PTRIN(args->fromlen) != NULL)
+ error = copyout(&msg.msg_namelen, PTRIN(args->fromlen),
+ sizeof(msg.msg_namelen));
+
+ return (error);
+}
static int
-linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
+linux_sendmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr,
+ l_uint flags)
{
struct cmsghdr *cmsg;
struct cmsgcred cmcred;
@@ -1159,8 +1055,8 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
void *data;
int error;
- error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
- if (error)
+ error = copyin(msghdr, &linux_msg, sizeof(linux_msg));
+ if (error != 0)
return (error);
/*
@@ -1174,7 +1070,7 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
linux_msg.msg_control = PTROUT(NULL);
error = linux_to_bsd_msghdr(&msg, &linux_msg);
- if (error)
+ if (error != 0)
return (error);
#ifdef COMPAT_LINUX32
@@ -1183,29 +1079,27 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
#else
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
#endif
- if (error)
+ if (error != 0)
return (error);
control = NULL;
cmsg = NULL;
if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) {
- error = kern_getsockname(td, args->s, &sa, &datalen);
- if (error)
+ error = kern_getsockname(td, s, &sa, &datalen);
+ if (error != 0)
goto bad;
sa_family = sa->sa_family;
free(sa, M_SONAME);
error = ENOBUFS;
- cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
+ cmsg = malloc(CMSG_HDRSZ, M_LINUX, M_WAITOK|M_ZERO);
control = m_get(M_WAITOK, MT_CONTROL);
- if (control == NULL)
- goto bad;
do {
error = copyin(ptr_cmsg, &linux_cmsg,
sizeof(struct l_cmsghdr));
- if (error)
+ if (error != 0)
goto bad;
error = EINVAL;
@@ -1269,28 +1163,60 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
msg.msg_iov = iov;
msg.msg_flags = 0;
- error = linux_sendit(td, args->s, &msg, args->flags, control,
- UIO_USERSPACE);
+ error = linux_sendit(td, s, &msg, flags, control, UIO_USERSPACE);
bad:
+ m_freem(control);
free(iov, M_IOV);
if (cmsg)
- free(cmsg, M_TEMP);
+ free(cmsg, M_LINUX);
return (error);
}
-struct linux_recvmsg_args {
- int s;
- l_uintptr_t msg;
- int flags;
-};
+int
+linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args)
+{
+
+ return (linux_sendmsg_common(td, args->s, PTRIN(args->msg),
+ args->flags));
+}
+
+int
+linux_sendmmsg(struct thread *td, struct linux_sendmmsg_args *args)
+{
+ struct l_mmsghdr *msg;
+ l_uint retval;
+ int error, datagrams;
+
+ if (args->vlen > UIO_MAXIOV)
+ args->vlen = UIO_MAXIOV;
+
+ msg = PTRIN(args->msg);
+ datagrams = 0;
+ while (datagrams < args->vlen) {
+ error = linux_sendmsg_common(td, args->s, &msg->msg_hdr,
+ args->flags);
+ if (error != 0)
+ break;
+
+ retval = td->td_retval[0];
+ error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len));
+ if (error != 0)
+ break;
+ ++msg;
+ ++datagrams;
+ }
+ if (error == 0)
+ td->td_retval[0] = datagrams;
+ return (error);
+}
static int
-linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
+linux_recvmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr,
+ l_uint flags, struct msghdr *msg)
{
struct cmsghdr *cm;
struct cmsgcred *cmcred;
- struct msghdr msg;
struct l_cmsghdr *linux_cmsg = NULL;
struct l_ucred linux_ucred;
socklen_t datalen, outlen;
@@ -1298,55 +1224,57 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
struct iovec *iov, *uiov;
struct mbuf *control = NULL;
struct mbuf **controlp;
+ struct timeval *ftmvl;
+ l_timeval ltmvl;
caddr_t outbuf;
void *data;
int error, i, fd, fds, *fdp;
- error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg));
- if (error)
+ error = copyin(msghdr, &linux_msg, sizeof(linux_msg));
+ if (error != 0)
return (error);
- error = linux_to_bsd_msghdr(&msg, &linux_msg);
- if (error)
+ error = linux_to_bsd_msghdr(msg, &linux_msg);
+ if (error != 0)
return (error);
#ifdef COMPAT_LINUX32
- error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen,
+ error = linux32_copyiniov(PTRIN(msg->msg_iov), msg->msg_iovlen,
&iov, EMSGSIZE);
#else
- error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
+ error = copyiniov(msg->msg_iov, msg->msg_iovlen, &iov, EMSGSIZE);
#endif
- if (error)
+ if (error != 0)
return (error);
- if (msg.msg_name) {
- error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name,
- msg.msg_namelen);
- if (error)
+ if (msg->msg_name) {
+ error = linux_to_bsd_sockaddr((struct sockaddr *)msg->msg_name,
+ msg->msg_namelen);
+ if (error != 0)
goto bad;
}
- uiov = msg.msg_iov;
- msg.msg_iov = iov;
- controlp = (msg.msg_control != NULL) ? &control : NULL;
- error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp);
- msg.msg_iov = uiov;
- if (error)
+ uiov = msg->msg_iov;
+ msg->msg_iov = iov;
+ controlp = (msg->msg_control != NULL) ? &control : NULL;
+ error = kern_recvit(td, s, msg, UIO_USERSPACE, controlp);
+ msg->msg_iov = uiov;
+ if (error != 0)
goto bad;
- error = bsd_to_linux_msghdr(&msg, &linux_msg);
- if (error)
+ error = bsd_to_linux_msghdr(msg, &linux_msg);
+ if (error != 0)
goto bad;
if (linux_msg.msg_name) {
error = bsd_to_linux_sockaddr((struct sockaddr *)
PTRIN(linux_msg.msg_name));
- if (error)
+ if (error != 0)
goto bad;
}
if (linux_msg.msg_name && linux_msg.msg_namelen > 2) {
error = linux_sa_put(PTRIN(linux_msg.msg_name));
- if (error)
+ if (error != 0)
goto bad;
}
@@ -1354,12 +1282,12 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
outlen = 0;
if (control) {
- linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO);
+ linux_cmsg = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO);
- msg.msg_control = mtod(control, struct cmsghdr *);
- msg.msg_controllen = control->m_len;
+ msg->msg_control = mtod(control, struct cmsghdr *);
+ msg->msg_controllen = control->m_len;
- cm = CMSG_FIRSTHDR(&msg);
+ cm = CMSG_FIRSTHDR(msg);
while (cm != NULL) {
linux_cmsg->cmsg_type =
@@ -1379,7 +1307,7 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
switch (cm->cmsg_type)
{
case SCM_RIGHTS:
- if (args->flags & LINUX_MSG_CMSG_CLOEXEC) {
+ if (flags & LINUX_MSG_CMSG_CLOEXEC) {
fds = datalen / sizeof(int);
fdp = data;
for (i = 0; i < fds; i++) {
@@ -1408,6 +1336,18 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
data = &linux_ucred;
datalen = sizeof(linux_ucred);
break;
+
+ case SCM_TIMESTAMP:
+ if (datalen != sizeof(struct timeval)) {
+ error = EMSGSIZE;
+ goto bad;
+ }
+ ftmvl = (struct timeval *)data;
+ ltmvl.tv_sec = ftmvl->tv_sec;
+ ltmvl.tv_usec = ftmvl->tv_usec;
+ data = &ltmvl;
+ datalen = sizeof(ltmvl);
+ break;
}
if (outlen + LINUX_CMSG_LEN(datalen) >
@@ -1436,28 +1376,92 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
outbuf += LINUX_CMSG_ALIGN(datalen);
outlen += LINUX_CMSG_LEN(datalen);
- cm = CMSG_NXTHDR(&msg, cm);
+ cm = CMSG_NXTHDR(msg, cm);
}
}
out:
linux_msg.msg_controllen = outlen;
- error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg));
+ error = copyout(&linux_msg, msghdr, sizeof(linux_msg));
bad:
free(iov, M_IOV);
m_freem(control);
- free(linux_cmsg, M_TEMP);
+ free(linux_cmsg, M_LINUX);
return (error);
}
-struct linux_shutdown_args {
- int s;
- int how;
-};
+int
+linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args)
+{
+ struct msghdr bsd_msg;
-static int
+ return (linux_recvmsg_common(td, args->s, PTRIN(args->msg),
+ args->flags, &bsd_msg));
+}
+
+int
+linux_recvmmsg(struct thread *td, struct linux_recvmmsg_args *args)
+{
+ struct l_mmsghdr *msg;
+ struct msghdr bsd_msg;
+ struct l_timespec lts;
+ struct timespec ts, tts;
+ l_uint retval;
+ int error, datagrams;
+
+ if (args->timeout) {
+ error = copyin(args->timeout, &lts, sizeof(struct l_timespec));
+ if (error != 0)
+ return (error);
+ error = linux_to_native_timespec(&ts, &lts);
+ if (error != 0)
+ return (error);
+ getnanotime(&tts);
+ timespecadd(&tts, &ts);
+ }
+
+ msg = PTRIN(args->msg);
+ datagrams = 0;
+ while (datagrams < args->vlen) {
+ error = linux_recvmsg_common(td, args->s, &msg->msg_hdr,
+ args->flags & ~LINUX_MSG_WAITFORONE, &bsd_msg);
+ if (error != 0)
+ break;
+
+ retval = td->td_retval[0];
+ error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len));
+ if (error != 0)
+ break;
+ ++msg;
+ ++datagrams;
+
+ /*
+ * MSG_WAITFORONE turns on MSG_DONTWAIT after one packet.
+ */
+ if (args->flags & LINUX_MSG_WAITFORONE)
+ args->flags |= LINUX_MSG_DONTWAIT;
+
+ /*
+ * See BUGS section of recvmmsg(2).
+ */
+ if (args->timeout) {
+ getnanotime(&ts);
+ timespecsub(&ts, &tts);
+ if (!timespecisset(&ts) || ts.tv_sec > 0)
+ break;
+ }
+ /* Out of band data, return right away. */
+ if (bsd_msg.msg_flags & MSG_OOB)
+ break;
+ }
+ if (error == 0)
+ td->td_retval[0] = datagrams;
+ return (error);
+}
+
+int
linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
{
struct shutdown_args /* {
@@ -1470,15 +1474,7 @@ linux_shutdown(struct thread *td, struct linux_shutdown_args *args)
return (sys_shutdown(td, &bsd_args));
}
-struct linux_setsockopt_args {
- int s;
- int level;
- int optname;
- l_uintptr_t optval;
- int optlen;
-};
-
-static int
+int
linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
{
struct setsockopt_args /* {
@@ -1543,15 +1539,7 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args)
return (error);
}
-struct linux_getsockopt_args {
- int s;
- int level;
- int optname;
- l_uintptr_t optval;
- l_uintptr_t optlen;
-};
-
-static int
+int
linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
{
struct getsockopt_args /* {
@@ -1635,6 +1623,8 @@ linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args)
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
+
/* Argument list sizes for linux_socketcall */
#define LINUX_AL(x) ((x) * sizeof(l_ulong))
@@ -1649,7 +1639,8 @@ static const unsigned char lxs_args[] = {
LINUX_AL(6) /* recvfrom */, LINUX_AL(2) /* shutdown */,
LINUX_AL(5) /* setsockopt */, LINUX_AL(5) /* getsockopt */,
LINUX_AL(3) /* sendmsg */, LINUX_AL(3) /* recvmsg */,
- LINUX_AL(4) /* accept4 */
+ LINUX_AL(4) /* accept4 */, LINUX_AL(5) /* recvmmsg */,
+ LINUX_AL(4) /* sendmmsg */
};
#define LINUX_AL_SIZE sizeof(lxs_args) / sizeof(lxs_args[0]) - 1
@@ -1705,8 +1696,13 @@ linux_socketcall(struct thread *td, struct linux_socketcall_args *args)
return (linux_recvmsg(td, arg));
case LINUX_ACCEPT4:
return (linux_accept4(td, arg));
+ case LINUX_RECVMMSG:
+ return (linux_recvmmsg(td, arg));
+ case LINUX_SENDMMSG:
+ return (linux_sendmmsg(td, arg));
}
uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
return (ENOSYS);
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
diff --git a/sys/compat/linux/linux_socket.h b/sys/compat/linux/linux_socket.h
index e6efadb..b32a969 100644
--- a/sys/compat/linux/linux_socket.h
+++ b/sys/compat/linux/linux_socket.h
@@ -48,12 +48,36 @@
#define LINUX_MSG_RST 0x1000
#define LINUX_MSG_ERRQUEUE 0x2000
#define LINUX_MSG_NOSIGNAL 0x4000
+#define LINUX_MSG_WAITFORONE 0x10000
#define LINUX_MSG_CMSG_CLOEXEC 0x40000000
/* Socket-level control message types */
#define LINUX_SCM_RIGHTS 0x01
-#define LINUX_SCM_CREDENTIALS 0x02
+#define LINUX_SCM_CREDENTIALS 0x02
+#define LINUX_SCM_TIMESTAMP 0x1D
+
+struct l_msghdr {
+ l_uintptr_t msg_name;
+ l_int msg_namelen;
+ l_uintptr_t msg_iov;
+ l_size_t msg_iovlen;
+ l_uintptr_t msg_control;
+ l_size_t msg_controllen;
+ l_uint msg_flags;
+};
+
+struct l_mmsghdr {
+ struct l_msghdr msg_hdr;
+ l_uint msg_len;
+
+};
+
+struct l_cmsghdr {
+ l_size_t cmsg_len;
+ l_int cmsg_level;
+ l_int cmsg_type;
+};
/* Ancilliary data object information macros */
@@ -116,6 +140,133 @@ struct l_ucred {
uint32_t gid;
};
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
+
+struct linux_sendto_args {
+ int s;
+ l_uintptr_t msg;
+ int len;
+ int flags;
+ l_uintptr_t to;
+ int tolen;
+};
+
+struct linux_socket_args {
+ int domain;
+ int type;
+ int protocol;
+};
+
+struct linux_bind_args {
+ int s;
+ l_uintptr_t name;
+ int namelen;
+};
+
+struct linux_connect_args {
+ int s;
+ l_uintptr_t name;
+ int namelen;
+};
+
+struct linux_listen_args {
+ int s;
+ int backlog;
+};
+
+struct linux_accept_args {
+ int s;
+ l_uintptr_t addr;
+ l_uintptr_t namelen;
+};
+
+struct linux_accept4_args {
+ int s;
+ l_uintptr_t addr;
+ l_uintptr_t namelen;
+ int flags;
+};
+
+struct linux_getsockname_args {
+ int s;
+ l_uintptr_t addr;
+ l_uintptr_t namelen;
+};
+
+struct linux_getpeername_args {
+ int s;
+ l_uintptr_t addr;
+ l_uintptr_t namelen;
+};
+
+struct linux_socketpair_args {
+ int domain;
+ int type;
+ int protocol;
+ l_uintptr_t rsv;
+};
+
+struct linux_recvfrom_args {
+ int s;
+ l_uintptr_t buf;
+ int len;
+ int flags;
+ l_uintptr_t from;
+ l_uintptr_t fromlen;
+};
+
+struct linux_sendmsg_args {
+ int s;
+ l_uintptr_t msg;
+ int flags;
+};
+
+struct linux_recvmsg_args {
+ int s;
+ l_uintptr_t msg;
+ int flags;
+};
+
+struct linux_shutdown_args {
+ int s;
+ int how;
+};
+
+struct linux_setsockopt_args {
+ int s;
+ int level;
+ int optname;
+ l_uintptr_t optval;
+ int optlen;
+};
+
+struct linux_getsockopt_args {
+ int s;
+ int level;
+ int optname;
+ l_uintptr_t optval;
+ l_uintptr_t optlen;
+};
+
+int linux_socket(struct thread *td, struct linux_socket_args *args);
+int linux_bind(struct thread *td, struct linux_bind_args *args);
+int linux_connect(struct thread *, struct linux_connect_args *);
+int linux_listen(struct thread *td, struct linux_listen_args *args);
+int linux_accept(struct thread *td, struct linux_accept_args *args);
+int linux_accept4(struct thread *td, struct linux_accept4_args *args);
+int linux_getsockname(struct thread *td, struct linux_getsockname_args *args);
+int linux_getpeername(struct thread *td, struct linux_getpeername_args *args);
+int linux_socketpair(struct thread *td, struct linux_socketpair_args *args);
+int linux_sendto(struct thread *td, struct linux_sendto_args *args);
+int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args);
+int linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args);
+int linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args);
+int linux_shutdown(struct thread *td, struct linux_shutdown_args *args);
+int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args);
+int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args);
+
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
+
/* Operations for socketcall */
#define LINUX_SOCKET 1
@@ -136,6 +287,8 @@ struct l_ucred {
#define LINUX_SENDMSG 16
#define LINUX_RECVMSG 17
#define LINUX_ACCEPT4 18
+#define LINUX_RECVMMSG 19
+#define LINUX_SENDMMSG 20
/* Socket options */
#define LINUX_IP_TOS 1
diff --git a/sys/compat/linux/linux_stats.c b/sys/compat/linux/linux_stats.c
index 2e05c85..f96acc0 100644
--- a/sys/compat/linux/linux_stats.c
+++ b/sys/compat/linux/linux_stats.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include "opt_compat.h"
#include <sys/param.h>
+#include <sys/capsicum.h>
#include <sys/dirent.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@@ -58,7 +59,6 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_file.h>
-#define LINUX_SHMFS_MAGIC 0x01021994
static void
translate_vnhook_major_minor(struct vnode *vp, struct stat *sb)
@@ -251,6 +251,7 @@ linux_newfstat(struct thread *td, struct linux_newfstat_args *args)
return (error);
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
static int
stat_copyout(struct stat *buf, void *ubuf)
{
@@ -325,19 +326,19 @@ linux_lstat(struct thread *td, struct linux_lstat_args *args)
LFREEPATH(path);
return(stat_copyout(&buf, args->up));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
-/* XXX - All fields of type l_int are defined as l_long on i386 */
struct l_statfs {
- l_int f_type;
- l_int f_bsize;
- l_int f_blocks;
- l_int f_bfree;
- l_int f_bavail;
- l_int f_files;
- l_int f_ffree;
+ l_long f_type;
+ l_long f_bsize;
+ l_long f_blocks;
+ l_long f_bfree;
+ l_long f_bavail;
+ l_long f_files;
+ l_long f_ffree;
l_fsid_t f_fsid;
- l_int f_namelen;
- l_int f_spare[6];
+ l_long f_namelen;
+ l_long f_spare[6];
};
#define LINUX_CODA_SUPER_MAGIC 0x73757245L
@@ -351,6 +352,7 @@ struct l_statfs {
#define LINUX_PROC_SUPER_MAGIC 0x9fa0L
#define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */
#define LINUX_DEVFS_SUPER_MAGIC 0x1373L
+#define LINUX_SHMFS_MAGIC 0x01021994
static long
bsd_to_linux_ftype(const char *fstypename)
@@ -368,6 +370,7 @@ bsd_to_linux_ftype(const char *fstypename)
{"hpfs", LINUX_HPFS_SUPER_MAGIC},
{"coda", LINUX_CODA_SUPER_MAGIC},
{"devfs", LINUX_DEVFS_SUPER_MAGIC},
+ {"tmpfs", LINUX_SHMFS_MAGIC},
{NULL, 0L}};
for (i = 0; b2l_tbl[i].bsd_name != NULL; i++)
@@ -399,7 +402,7 @@ linux_statfs(struct thread *td, struct linux_statfs_args *args)
struct l_statfs linux_statfs;
struct statfs bsd_statfs;
char *path;
- int error, dev_shm;
+ int error;
LCONVPATHEXIST(td, args->path, &path);
@@ -407,20 +410,15 @@ linux_statfs(struct thread *td, struct linux_statfs_args *args)
if (ldebug(statfs))
printf(ARGS(statfs, "%s, *"), path);
#endif
- dev_shm = 0;
error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs);
- if (strncmp(path, "/dev/shm", sizeof("/dev/shm") - 1) == 0)
- dev_shm = (path[8] == '\0'
- || (path[8] == '/' && path[9] == '\0'));
LFREEPATH(path);
if (error)
return (error);
bsd_to_linux_statfs(&bsd_statfs, &linux_statfs);
- if (dev_shm)
- linux_statfs.f_type = LINUX_SHMFS_MAGIC;
return copyout(&linux_statfs, args->buf, sizeof(linux_statfs));
}
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
static void
bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs)
{
@@ -461,6 +459,7 @@ linux_statfs64(struct thread *td, struct linux_statfs64_args *args)
bsd_to_linux_statfs64(&bsd_statfs, &linux_statfs);
return copyout(&linux_statfs, args->buf, sizeof(linux_statfs));
}
+#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
int
linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args)
@@ -493,7 +492,7 @@ linux_ustat(struct thread *td, struct linux_ustat_args *args)
{
#ifdef DEBUG
if (ldebug(ustat))
- printf(ARGS(ustat, "%d, *"), args->dev);
+ printf(ARGS(ustat, "%ju, *"), (uintmax_t)args->dev);
#endif
return (EOPNOTSUPP);
@@ -624,4 +623,74 @@ linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args)
return (error);
}
+#else /* __amd64__ && !COMPAT_LINUX32 */
+
+int
+linux_newfstatat(struct thread *td, struct linux_newfstatat_args *args)
+{
+ char *path;
+ int error, dfd, flag;
+ struct stat buf;
+
+ if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
+ return (EINVAL);
+ flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ?
+ AT_SYMLINK_NOFOLLOW : 0;
+
+ dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
+ LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
+
+#ifdef DEBUG
+ if (ldebug(newfstatat))
+ printf(ARGS(newfstatat, "%i, %s, %i"), args->dfd, path, args->flag);
+#endif
+
+ error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf);
+ if (error == 0)
+ error = newstat_copyout(&buf, args->statbuf);
+ LFREEPATH(path);
+
+ return (error);
+}
+
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
+
+int
+linux_syncfs(struct thread *td, struct linux_syncfs_args *args)
+{
+ cap_rights_t rights;
+ struct mount *mp;
+ struct vnode *vp;
+ int error, save;
+
+ error = fgetvp(td, args->fd, cap_rights_init(&rights, CAP_FSYNC), &vp);
+ if (error != 0)
+ /*
+ * Linux syncfs() returns only EBADF, however fgetvp()
+ * can return EINVAL in case of file descriptor does
+ * not represent a vnode. XXX.
+ */
+ return (error);
+
+ mp = vp->v_mount;
+ mtx_lock(&mountlist_mtx);
+ error = vfs_busy(mp, MBF_MNTLSTLOCK);
+ if (error != 0) {
+ /* See comment above. */
+ mtx_unlock(&mountlist_mtx);
+ goto out;
+ }
+ if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+ vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
+ save = curthread_pflags_set(TDP_SYNCIO);
+ vfs_msync(mp, MNT_NOWAIT);
+ VFS_SYNC(mp, MNT_NOWAIT);
+ curthread_pflags_restore(save);
+ vn_finished_write(mp);
+ }
+ vfs_unbusy(mp);
+
+ out:
+ vrele(vp);
+ return (error);
+}
diff --git a/sys/compat/linux/linux_sysctl.c b/sys/compat/linux/linux_sysctl.c
index decd8f8..27b7a3d 100644
--- a/sys/compat/linux/linux_sysctl.c
+++ b/sys/compat/linux/linux_sysctl.c
@@ -141,12 +141,12 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args)
return (ENOTDIR);
}
- mib = malloc(la.nlen * sizeof(l_int), M_TEMP, M_WAITOK);
+ mib = malloc(la.nlen * sizeof(l_int), M_LINUX, M_WAITOK);
error = copyin(PTRIN(la.name), mib, la.nlen * sizeof(l_int));
if (error) {
LIN_SDT_PROBE1(sysctl, linux_sysctl, copyin_error, error);
LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error);
- free(mib, M_TEMP);
+ free(mib, M_LINUX);
return (error);
}
@@ -158,7 +158,7 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args)
switch (mib[1]) {
case LINUX_KERN_VERSION:
error = handle_string(&la, version);
- free(mib, M_TEMP);
+ free(mib, M_LINUX);
LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error);
return (error);
default:
@@ -187,7 +187,7 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args)
sbuf_delete(sb);
}
- free(mib, M_TEMP);
+ free(mib, M_LINUX);
LIN_SDT_PROBE1(sysctl, linux_sysctl, return, ENOTDIR);
return (ENOTDIR);
diff --git a/sys/compat/linux/linux_time.c b/sys/compat/linux/linux_time.c
index e03af00..663ac92 100644
--- a/sys/compat/linux/linux_time.c
+++ b/sys/compat/linux/linux_time.c
@@ -40,8 +40,11 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
#include <sys/ucred.h>
#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/resourcevar.h>
#include <sys/sdt.h>
#include <sys/signal.h>
#include <sys/stdint.h>
@@ -60,7 +63,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp
#endif
#include <compat/linux/linux_dtrace.h>
-#include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_timer.h>
/* DTrace init */
LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
@@ -103,27 +106,20 @@ LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, return, "int");
LIN_SDT_PROBE_DEFINE2(time, linux_nanosleep, entry, "const struct l_timespec *",
"struct l_timespec *");
LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, conversion_error, "int");
-LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, nanosleep_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyout_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyin_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, return, "int");
LIN_SDT_PROBE_DEFINE4(time, linux_clock_nanosleep, entry, "clockid_t", "int",
"struct l_timespec *", "struct l_timespec *");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, conversion_error, "int");
-LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, nanosleep_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyout_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyin_error, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_flags, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_clockid, "int");
LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, return, "int");
-static void native_to_linux_timespec(struct l_timespec *,
- struct timespec *);
-static int linux_to_native_timespec(struct timespec *,
- struct l_timespec *);
-static int linux_to_native_clockid(clockid_t *, clockid_t);
-static void
+void
native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp)
{
@@ -135,7 +131,7 @@ native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp)
LIN_SDT_PROBE0(time, native_to_linux_timespec, return);
}
-static int
+int
linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp)
{
@@ -152,12 +148,26 @@ linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp)
return (0);
}
-static int
+int
linux_to_native_clockid(clockid_t *n, clockid_t l)
{
LIN_SDT_PROBE2(time, linux_to_native_clockid, entry, n, l);
+ if (l < 0) {
+ /* cpu-clock */
+ if ((l & LINUX_CLOCKFD_MASK) == LINUX_CLOCKFD)
+ return (EINVAL);
+ if (LINUX_CPUCLOCK_WHICH(l) >= LINUX_CPUCLOCK_MAX)
+ return (EINVAL);
+
+ if (LINUX_CPUCLOCK_PERTHREAD(l))
+ *n = CLOCK_THREAD_CPUTIME_ID;
+ else
+ *n = CLOCK_PROCESS_CPUTIME_ID;
+ return (0);
+ }
+
switch (l) {
case LINUX_CLOCK_REALTIME:
*n = CLOCK_REALTIME;
@@ -165,21 +175,27 @@ linux_to_native_clockid(clockid_t *n, clockid_t l)
case LINUX_CLOCK_MONOTONIC:
*n = CLOCK_MONOTONIC;
break;
- case LINUX_CLOCK_PROCESS_CPUTIME_ID:
- case LINUX_CLOCK_THREAD_CPUTIME_ID:
- case LINUX_CLOCK_REALTIME_HR:
- case LINUX_CLOCK_MONOTONIC_HR:
+ case LINUX_CLOCK_REALTIME_COARSE:
+ *n = CLOCK_REALTIME_FAST;
+ break;
+ case LINUX_CLOCK_MONOTONIC_COARSE:
+ *n = CLOCK_MONOTONIC_FAST;
+ break;
+ case LINUX_CLOCK_MONOTONIC_RAW:
+ case LINUX_CLOCK_BOOTTIME:
+ case LINUX_CLOCK_REALTIME_ALARM:
+ case LINUX_CLOCK_BOOTTIME_ALARM:
+ case LINUX_CLOCK_SGI_CYCLE:
+ case LINUX_CLOCK_TAI:
LIN_SDT_PROBE1(time, linux_to_native_clockid,
unsupported_clockid, l);
LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL);
return (EINVAL);
- break;
default:
LIN_SDT_PROBE1(time, linux_to_native_clockid,
unknown_clockid, l);
LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL);
return (EINVAL);
- break;
}
LIN_SDT_PROBE1(time, linux_to_native_clockid, return, 0);
@@ -190,9 +206,14 @@ int
linux_clock_gettime(struct thread *td, struct linux_clock_gettime_args *args)
{
struct l_timespec lts;
- int error;
- clockid_t nwhich = 0; /* XXX: GCC */
struct timespec tp;
+ struct rusage ru;
+ struct thread *targettd;
+ struct proc *p;
+ int error, clockwhich;
+ clockid_t nwhich = 0; /* XXX: GCC */
+ pid_t pid;
+ lwpid_t tid;
LIN_SDT_PROBE2(time, linux_clock_gettime, entry, args->which, args->tp);
@@ -203,7 +224,100 @@ linux_clock_gettime(struct thread *td, struct linux_clock_gettime_args *args)
LIN_SDT_PROBE1(time, linux_clock_gettime, return, error);
return (error);
}
- error = kern_clock_gettime(td, nwhich, &tp);
+
+ switch (nwhich) {
+ case CLOCK_PROCESS_CPUTIME_ID:
+ clockwhich = LINUX_CPUCLOCK_WHICH(args->which);
+ pid = LINUX_CPUCLOCK_ID(args->which);
+ if (pid == 0) {
+ p = td->td_proc;
+ PROC_LOCK(p);
+ } else {
+ error = pget(pid, PGET_CANSEE, &p);
+ if (error != 0)
+ return (EINVAL);
+ }
+ switch (clockwhich) {
+ case LINUX_CPUCLOCK_PROF:
+ PROC_STATLOCK(p);
+ calcru(p, &ru.ru_utime, &ru.ru_stime);
+ PROC_STATUNLOCK(p);
+ PROC_UNLOCK(p);
+ timevaladd(&ru.ru_utime, &ru.ru_stime);
+ TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp);
+ break;
+ case LINUX_CPUCLOCK_VIRT:
+ PROC_STATLOCK(p);
+ calcru(p, &ru.ru_utime, &ru.ru_stime);
+ PROC_STATUNLOCK(p);
+ PROC_UNLOCK(p);
+ TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp);
+ break;
+ case LINUX_CPUCLOCK_SCHED:
+ PROC_UNLOCK(p);
+ error = kern_clock_getcpuclockid2(td, pid,
+ CPUCLOCK_WHICH_PID, &nwhich);
+ if (error != 0)
+ return (EINVAL);
+ error = kern_clock_gettime(td, nwhich, &tp);
+ break;
+ default:
+ PROC_UNLOCK(p);
+ return (EINVAL);
+ }
+
+ break;
+
+ case CLOCK_THREAD_CPUTIME_ID:
+ clockwhich = LINUX_CPUCLOCK_WHICH(args->which);
+ p = td->td_proc;
+ tid = LINUX_CPUCLOCK_ID(args->which);
+ if (tid == 0) {
+ targettd = td;
+ PROC_LOCK(p);
+ } else {
+ targettd = tdfind(tid, p->p_pid);
+ if (targettd == NULL)
+ return (EINVAL);
+ }
+ switch (clockwhich) {
+ case LINUX_CPUCLOCK_PROF:
+ PROC_STATLOCK(p);
+ thread_lock(targettd);
+ rufetchtd(targettd, &ru);
+ thread_unlock(targettd);
+ PROC_STATUNLOCK(p);
+ PROC_UNLOCK(p);
+ timevaladd(&ru.ru_utime, &ru.ru_stime);
+ TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp);
+ break;
+ case LINUX_CPUCLOCK_VIRT:
+ PROC_STATLOCK(p);
+ thread_lock(targettd);
+ rufetchtd(targettd, &ru);
+ thread_unlock(targettd);
+ PROC_STATUNLOCK(p);
+ PROC_UNLOCK(p);
+ TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp);
+ break;
+ case LINUX_CPUCLOCK_SCHED:
+ error = kern_clock_getcpuclockid2(td, tid,
+ CPUCLOCK_WHICH_TID, &nwhich);
+ PROC_UNLOCK(p);
+ if (error != 0)
+ return (EINVAL);
+ error = kern_clock_gettime(td, nwhich, &tp);
+ break;
+ default:
+ PROC_UNLOCK(p);
+ return (EINVAL);
+ }
+ break;
+
+ default:
+ error = kern_clock_gettime(td, nwhich, &tp);
+ break;
+ }
if (error != 0) {
LIN_SDT_PROBE1(time, linux_clock_gettime, gettime_error, error);
LIN_SDT_PROBE1(time, linux_clock_gettime, return, error);
@@ -261,19 +375,16 @@ linux_clock_settime(struct thread *td, struct linux_clock_settime_args *args)
int
linux_clock_getres(struct thread *td, struct linux_clock_getres_args *args)
{
+ struct proc *p;
struct timespec ts;
struct l_timespec lts;
- int error;
+ int error, clockwhich;
clockid_t nwhich = 0; /* XXX: GCC */
+ pid_t pid;
+ lwpid_t tid;
LIN_SDT_PROBE2(time, linux_clock_getres, entry, args->which, args->tp);
- if (args->tp == NULL) {
- LIN_SDT_PROBE0(time, linux_clock_getres, nullcall);
- LIN_SDT_PROBE1(time, linux_clock_getres, return, 0);
- return (0);
- }
-
error = linux_to_native_clockid(&nwhich, args->which);
if (error != 0) {
LIN_SDT_PROBE1(time, linux_clock_getres, conversion_error,
@@ -281,6 +392,59 @@ linux_clock_getres(struct thread *td, struct linux_clock_getres_args *args)
LIN_SDT_PROBE1(time, linux_clock_getres, return, error);
return (error);
}
+
+ /*
+ * Check user supplied clock id in case of per-process
+ * or thread-specific cpu-time clock.
+ */
+ switch (nwhich) {
+ case CLOCK_THREAD_CPUTIME_ID:
+ tid = LINUX_CPUCLOCK_ID(args->which);
+ if (tid != 0) {
+ p = td->td_proc;
+ if (tdfind(tid, p->p_pid) == NULL)
+ return (ESRCH);
+ PROC_UNLOCK(p);
+ }
+ break;
+ case CLOCK_PROCESS_CPUTIME_ID:
+ pid = LINUX_CPUCLOCK_ID(args->which);
+ if (pid != 0) {
+ error = pget(pid, PGET_CANSEE, &p);
+ if (error != 0)
+ return (EINVAL);
+ PROC_UNLOCK(p);
+ }
+ break;
+ }
+
+ if (args->tp == NULL) {
+ LIN_SDT_PROBE0(time, linux_clock_getres, nullcall);
+ LIN_SDT_PROBE1(time, linux_clock_getres, return, 0);
+ return (0);
+ }
+
+ switch (nwhich) {
+ case CLOCK_THREAD_CPUTIME_ID:
+ case CLOCK_PROCESS_CPUTIME_ID:
+ clockwhich = LINUX_CPUCLOCK_WHICH(args->which);
+ switch (clockwhich) {
+ case LINUX_CPUCLOCK_PROF:
+ nwhich = CLOCK_PROF;
+ break;
+ case LINUX_CPUCLOCK_VIRT:
+ nwhich = CLOCK_VIRTUAL;
+ break;
+ case LINUX_CPUCLOCK_SCHED:
+ break;
+ default:
+ return (EINVAL);
+ }
+ break;
+
+ default:
+ break;
+ }
error = kern_clock_getres(td, nwhich, &ts);
if (error != 0) {
LIN_SDT_PROBE1(time, linux_clock_getres, getres_error, error);
@@ -303,7 +467,7 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args)
struct timespec *rmtp;
struct l_timespec lrqts, lrmts;
struct timespec rqts, rmts;
- int error;
+ int error, error2;
LIN_SDT_PROBE2(time, linux_nanosleep, entry, args->rqtp, args->rmtp);
@@ -315,9 +479,9 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args)
}
if (args->rmtp != NULL)
- rmtp = &rmts;
+ rmtp = &rmts;
else
- rmtp = NULL;
+ rmtp = NULL;
error = linux_to_native_timespec(&rqts, &lrqts);
if (error != 0) {
@@ -326,25 +490,19 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args)
return (error);
}
error = kern_nanosleep(td, &rqts, rmtp);
- if (error != 0) {
- LIN_SDT_PROBE1(time, linux_nanosleep, nanosleep_error, error);
- LIN_SDT_PROBE1(time, linux_nanosleep, return, error);
- return (error);
- }
-
if (args->rmtp != NULL) {
- native_to_linux_timespec(&lrmts, rmtp);
- error = copyout(&lrmts, args->rmtp, sizeof(lrmts));
- if (error != 0) {
+ native_to_linux_timespec(&lrmts, rmtp);
+ error2 = copyout(&lrmts, args->rmtp, sizeof(lrmts));
+ if (error2 != 0) {
LIN_SDT_PROBE1(time, linux_nanosleep, copyout_error,
- error);
- LIN_SDT_PROBE1(time, linux_nanosleep, return, error);
- return (error);
+ error2);
+ LIN_SDT_PROBE1(time, linux_nanosleep, return, error2);
+ return (error2);
}
}
- LIN_SDT_PROBE1(time, linux_nanosleep, return, 0);
- return (0);
+ LIN_SDT_PROBE1(time, linux_nanosleep, return, error);
+ return (error);
}
int
@@ -353,7 +511,7 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args
struct timespec *rmtp;
struct l_timespec lrqts, lrmts;
struct timespec rqts, rmts;
- int error;
+ int error, error2;
LIN_SDT_PROBE4(time, linux_clock_nanosleep, entry, args->which,
args->flags, args->rqtp, args->rmtp);
@@ -373,7 +531,7 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args
return (EINVAL);
}
- error = copyin(args->rqtp, &lrqts, sizeof lrqts);
+ error = copyin(args->rqtp, &lrqts, sizeof(lrqts));
if (error != 0) {
LIN_SDT_PROBE1(time, linux_clock_nanosleep, copyin_error,
error);
@@ -382,9 +540,9 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args
}
if (args->rmtp != NULL)
- rmtp = &rmts;
+ rmtp = &rmts;
else
- rmtp = NULL;
+ rmtp = NULL;
error = linux_to_native_timespec(&rqts, &lrqts);
if (error != 0) {
@@ -394,24 +552,19 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args
return (error);
}
error = kern_nanosleep(td, &rqts, rmtp);
- if (error != 0) {
- LIN_SDT_PROBE1(time, linux_clock_nanosleep, nanosleep_error,
- error);
- LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error);
- return (error);
- }
-
if (args->rmtp != NULL) {
+ /* XXX. Not for TIMER_ABSTIME */
native_to_linux_timespec(&lrmts, rmtp);
- error = copyout(&lrmts, args->rmtp, sizeof lrmts );
- if (error != 0) {
+ error2 = copyout(&lrmts, args->rmtp, sizeof(lrmts));
+ if (error2 != 0) {
+ LIN_SDT_PROBE1(time, linux_clock_nanosleep,
+ copyout_error, error2);
LIN_SDT_PROBE1(time, linux_clock_nanosleep,
- copyout_error, error);
- LIN_SDT_PROBE1(time, linux_nanosleep, return, error);
- return (error);
+ return, error2);
+ return (error2);
}
}
- LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, 0);
- return (0);
+ LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error);
+ return (error);
}
diff --git a/sys/compat/linux/linux_timer.c b/sys/compat/linux/linux_timer.c
index 92dae4c..7dbddbe 100644
--- a/sys/compat/linux/linux_timer.c
+++ b/sys/compat/linux/linux_timer.c
@@ -49,23 +49,6 @@ __FBSDID("$FreeBSD$");
#endif
#include <compat/linux/linux_timer.h>
-static int
-linux_convert_l_clockid(clockid_t *clock_id)
-{
-
- switch (*clock_id) {
- case LINUX_CLOCK_REALTIME:
- *clock_id = CLOCK_REALTIME;
- break;
- case LINUX_CLOCK_MONOTONIC:
- *clock_id = CLOCK_MONOTONIC;
- break;
- default:
- return (EINVAL);
- }
-
- return (0);
-}
static int
linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig)
@@ -75,7 +58,7 @@ linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig)
switch (l_sig->sigev_notify) {
case L_SIGEV_SIGNAL:
sig->sigev_notify = SIGEV_SIGNAL;
- CP(*l_sig, *sig, sigev_signo);
+ sig->sigev_signo = linux_to_bsd_signal(l_sig->sigev_signo);
PTRIN_CP(*l_sig, *sig, sigev_value.sival_ptr);
break;
case L_SIGEV_NONE:
@@ -92,7 +75,7 @@ linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig)
case L_SIGEV_THREAD_ID:
sig->sigev_notify = SIGEV_THREAD_ID;
CP2(*l_sig, *sig, _l_sigev_un._tid, sigev_notify_thread_id);
- CP(*l_sig, *sig, sigev_signo);
+ sig->sigev_signo = linux_to_bsd_signal(l_sig->sigev_signo);
PTRIN_CP(*l_sig, *sig, sigev_value.sival_ptr);
break;
default:
@@ -106,6 +89,7 @@ linux_timer_create(struct thread *td, struct linux_timer_create_args *uap)
{
struct l_sigevent l_ev;
struct sigevent ev, *evp;
+ clockid_t nwhich;
int error, id;
if (uap->evp == NULL) {
@@ -119,10 +103,10 @@ linux_timer_create(struct thread *td, struct linux_timer_create_args *uap)
return (error);
evp = &ev;
}
- error = linux_convert_l_clockid(&uap->clock_id);
+ error = linux_to_native_clockid(&nwhich, uap->clock_id);
if (error != 0)
return (error);
- error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1);
+ error = kern_ktimer_create(td, nwhich, evp, &id, -1);
if (error == 0) {
error = copyout(&id, uap->timerid, sizeof(int));
if (error != 0)
@@ -179,4 +163,3 @@ linux_timer_delete(struct thread *td, struct linux_timer_delete_args *uap)
return (kern_ktimer_delete(td, uap->timerid));
}
-
diff --git a/sys/compat/linux/linux_timer.h b/sys/compat/linux/linux_timer.h
index 4f64ee5..c79c08d 100644
--- a/sys/compat/linux/linux_timer.h
+++ b/sys/compat/linux/linux_timer.h
@@ -56,6 +56,23 @@
#define LINUX_CLOCK_SGI_CYCLE 10
#define LINUX_CLOCK_TAI 11
+#define LINUX_CPUCLOCK_PERTHREAD_MASK 4
+#define LINUX_CPUCLOCK_MASK 3
+#define LINUX_CPUCLOCK_WHICH(clock) \
+ ((clock) & (clockid_t) LINUX_CPUCLOCK_MASK)
+#define LINUX_CPUCLOCK_PROF 0
+#define LINUX_CPUCLOCK_VIRT 1
+#define LINUX_CPUCLOCK_SCHED 2
+#define LINUX_CPUCLOCK_MAX 3
+#define LINUX_CLOCKFD LINUX_CPUCLOCK_MAX
+#define LINUX_CLOCKFD_MASK \
+ (LINUX_CPUCLOCK_PERTHREAD_MASK|LINUX_CPUCLOCK_MASK)
+
+#define LINUX_CPUCLOCK_ID(clock) ((pid_t) ~((clock) >> 3))
+#define LINUX_CPUCLOCK_PERTHREAD(clock) \
+ (((clock) & (clockid_t) LINUX_CPUCLOCK_PERTHREAD_MASK) != 0)
+
+
#define L_SIGEV_SIGNAL 0
#define L_SIGEV_NONE 1
#define L_SIGEV_THREAD 2
@@ -94,4 +111,10 @@ struct l_itimerspec {
struct l_timespec it_value;
};
+void native_to_linux_timespec(struct l_timespec *,
+ struct timespec *);
+int linux_to_native_timespec(struct timespec *,
+ struct l_timespec *);
+int linux_to_native_clockid(clockid_t *, clockid_t);
+
#endif /* _LINUX_TIMER_H */
diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c
index a2c3214..9acc047 100644
--- a/sys/compat/linux/linux_uid16.c
+++ b/sys/compat/linux/linux_uid16.c
@@ -172,12 +172,12 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args)
LIN_SDT_PROBE1(uid16, linux_setgroups16, return, EINVAL);
return (EINVAL);
}
- linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK);
+ linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK);
error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t));
if (error) {
LIN_SDT_PROBE1(uid16, linux_setgroups16, copyin_error, error);
LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error);
- free(linux_gidset, M_TEMP);
+ free(linux_gidset, M_LINUX);
return (error);
}
newcred = crget();
@@ -219,7 +219,7 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args)
crfree(oldcred);
error = 0;
out:
- free(linux_gidset, M_TEMP);
+ free(linux_gidset, M_LINUX);
LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error);
return (error);
@@ -260,14 +260,14 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args)
ngrp = 0;
linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset),
- M_TEMP, M_WAITOK);
+ M_LINUX, M_WAITOK);
while (ngrp < bsd_gidsetsz) {
linux_gidset[ngrp] = bsd_gidset[ngrp + 1];
ngrp++;
}
error = copyout(linux_gidset, args->gidset, ngrp * sizeof(l_gid16_t));
- free(linux_gidset, M_TEMP);
+ free(linux_gidset, M_LINUX);
if (error) {
LIN_SDT_PROBE1(uid16, linux_getgroups16, copyout_error, error);
LIN_SDT_PROBE1(uid16, linux_getgroups16, return, error);
diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c
index 76c210c..fe49120 100644
--- a/sys/compat/linux/linux_util.c
+++ b/sys/compat/linux/linux_util.c
@@ -53,48 +53,14 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <compat/linux/linux_util.h>
-#ifdef COMPAT_LINUX32
-#include <machine/../linux32/linux.h>
-#else
-#include <machine/../linux/linux.h>
-#endif
-#include <compat/linux/linux_dtrace.h>
+MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
+MALLOC_DEFINE(M_EPOLL, "lepoll", "Linux events structures");
+MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
+MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futex waiting proc");
const char linux_emul_path[] = "/compat/linux";
-/* DTrace init */
-LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-
-/**
- * DTrace probes in this module.
- */
-LIN_SDT_PROBE_DEFINE5(util, linux_emul_convpath, entry, "const char *",
- "enum uio_seg", "char **", "int", "int");
-LIN_SDT_PROBE_DEFINE1(util, linux_emul_convpath, return, "int");
-LIN_SDT_PROBE_DEFINE1(util, linux_msg, entry, "const char *");
-LIN_SDT_PROBE_DEFINE0(util, linux_msg, return);
-LIN_SDT_PROBE_DEFINE2(util, linux_driver_get_name_dev, entry, "device_t",
- "const char *");
-LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_name_dev, nullcall);
-LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_name_dev, return, "char *");
-LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, entry, "char *",
- "int *", "int *");
-LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_major_minor, nullcall);
-LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_major_minor, notfound, "char *");
-LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, return, "int",
- "int", "int");
-LIN_SDT_PROBE_DEFINE0(util, linux_get_char_devices, entry);
-LIN_SDT_PROBE_DEFINE1(util, linux_get_char_devices, return, "char *");
-LIN_SDT_PROBE_DEFINE1(util, linux_free_get_char_devices, entry, "char *");
-LIN_SDT_PROBE_DEFINE0(util, linux_free_get_char_devices, return);
-LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, entry,
- "struct linux_device_handler *");
-LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, return, "int");
-LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, entry,
- "struct linux_device_handler *");
-LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, return, "int");
-
/*
* Search an alternate path before passing pathname arguments on to
* system calls. Useful for keeping a separate 'emulation tree'.
@@ -108,13 +74,9 @@ linux_emul_convpath(struct thread *td, const char *path, enum uio_seg pathseg,
{
int retval;
- LIN_SDT_PROBE5(util, linux_emul_convpath, entry, path, pathseg, pbuf,
- cflag, dfd);
-
retval = kern_alternate_path(td, linux_emul_path, path, pathseg, pbuf,
cflag, dfd);
- LIN_SDT_PROBE1(util, linux_emul_convpath, return, retval);
return (retval);
}
@@ -124,16 +86,12 @@ linux_msg(const struct thread *td, const char *fmt, ...)
va_list ap;
struct proc *p;
- LIN_SDT_PROBE1(util, linux_msg, entry, fmt);
-
p = td->td_proc;
printf("linux: pid %d (%s): ", (int)p->p_pid, p->p_comm);
va_start(ap, fmt);
vprintf(fmt, ap);
va_end(ap);
printf("\n");
-
- LIN_SDT_PROBE0(util, linux_msg, return);
}
struct device_element
@@ -156,24 +114,14 @@ linux_driver_get_name_dev(device_t dev)
struct device_element *de;
const char *device_name = device_get_name(dev);
- LIN_SDT_PROBE2(util, linux_driver_get_name_dev, entry, dev,
- device_name);
-
- if (device_name == NULL) {
- LIN_SDT_PROBE0(util, linux_driver_get_name_dev, nullcall);
- LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL);
+ if (device_name == NULL)
return NULL;
- }
TAILQ_FOREACH(de, &devices, list) {
- if (strcmp(device_name, de->entry.bsd_driver_name) == 0) {
- LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return,
- de->entry.linux_driver_name);
+ if (strcmp(device_name, de->entry.bsd_driver_name) == 0)
return (de->entry.linux_driver_name);
- }
}
- LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL);
- return NULL;
+ return (NULL);
}
int
@@ -181,15 +129,8 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor)
{
struct device_element *de;
- LIN_SDT_PROBE3(util, linux_driver_get_major_minor, entry, node, major,
- minor);
-
- if (node == NULL || major == NULL || minor == NULL) {
- LIN_SDT_PROBE0(util, linux_driver_get_major_minor, nullcall);
- LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1,
- 0, 0);
+ if (node == NULL || major == NULL || minor == NULL)
return 1;
- }
if (strlen(node) > strlen("pts/") &&
strncmp(node, "pts/", strlen("pts/")) == 0) {
@@ -204,25 +145,18 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor)
*major = 136 + (devno / 256);
*minor = devno % 256;
- LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 0,
- *major, *minor);
- return 0;
+ return (0);
}
TAILQ_FOREACH(de, &devices, list) {
if (strcmp(node, de->entry.bsd_device_name) == 0) {
*major = de->entry.linux_major;
*minor = de->entry.linux_minor;
-
- LIN_SDT_PROBE3(util, linux_driver_get_major_minor,
- return, 0, *major, *minor);
- return 0;
+ return (0);
}
}
- LIN_SDT_PROBE1(util, linux_driver_get_major_minor, notfound, node);
- LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1, 0, 0);
- return 1;
+ return (1);
}
char *
@@ -233,8 +167,6 @@ linux_get_char_devices()
char formated[256];
int current_size = 0, string_size = 1024;
- LIN_SDT_PROBE0(util, linux_get_char_devices, entry);
-
string = malloc(string_size, M_LINUX, M_WAITOK);
string[0] = '\000';
last = "";
@@ -261,19 +193,14 @@ linux_get_char_devices()
}
}
- LIN_SDT_PROBE1(util, linux_get_char_devices, return, string);
- return string;
+ return (string);
}
void
linux_free_get_char_devices(char *string)
{
- LIN_SDT_PROBE1(util, linux_get_char_devices, entry, string);
-
free(string, M_LINUX);
-
- LIN_SDT_PROBE0(util, linux_get_char_devices, return);
}
static int linux_major_starting = 200;
@@ -283,13 +210,8 @@ linux_device_register_handler(struct linux_device_handler *d)
{
struct device_element *de;
- LIN_SDT_PROBE1(util, linux_device_register_handler, entry, d);
-
- if (d == NULL) {
- LIN_SDT_PROBE1(util, linux_device_register_handler, return,
- EINVAL);
+ if (d == NULL)
return (EINVAL);
- }
de = malloc(sizeof(*de), M_LINUX, M_WAITOK);
if (d->linux_major < 0) {
@@ -300,7 +222,6 @@ linux_device_register_handler(struct linux_device_handler *d)
/* Add the element to the list, sorted on span. */
TAILQ_INSERT_TAIL(&devices, de, list);
- LIN_SDT_PROBE1(util, linux_device_register_handler, return, 0);
return (0);
}
@@ -309,25 +230,17 @@ linux_device_unregister_handler(struct linux_device_handler *d)
{
struct device_element *de;
- LIN_SDT_PROBE1(util, linux_device_unregister_handler, entry, d);
-
- if (d == NULL) {
- LIN_SDT_PROBE1(util, linux_device_unregister_handler, return,
- EINVAL);
+ if (d == NULL)
return (EINVAL);
- }
TAILQ_FOREACH(de, &devices, list) {
if (bcmp(d, &de->entry, sizeof(*d)) == 0) {
TAILQ_REMOVE(&devices, de, list);
free(de, M_LINUX);
- LIN_SDT_PROBE1(util, linux_device_unregister_handler,
- return, 0);
return (0);
}
}
- LIN_SDT_PROBE1(util, linux_device_unregister_handler, return, EINVAL);
return (EINVAL);
}
diff --git a/sys/compat/linux/linux_util.h b/sys/compat/linux/linux_util.h
index 6be0392..a52a7b9 100644
--- a/sys/compat/linux/linux_util.h
+++ b/sys/compat/linux/linux_util.h
@@ -44,6 +44,11 @@
#include <sys/cdefs.h>
#include <sys/uio.h>
+MALLOC_DECLARE(M_LINUX);
+MALLOC_DECLARE(M_EPOLL);
+MALLOC_DECLARE(M_FUTEX);
+MALLOC_DECLARE(M_FUTEX_WP);
+
extern const char linux_emul_path[];
int linux_emul_convpath(struct thread *, const char *, enum uio_seg, char **, int, int);
@@ -115,7 +120,6 @@ void linux_free_get_char_devices(char *string);
#define LINUX_CTRFMT(nm, fmt) #nm"("fmt")"
#define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) do { \
- if (ldebug(f)) \
CTR6(KTR_LINUX, LINUX_CTRFMT(f, m), \
p1, p2, p3, p4, p5, p6); \
} while (0)
diff --git a/sys/compat/linux/linux_vdso.c b/sys/compat/linux/linux_vdso.c
new file mode 100644
index 0000000..5ab0ee6
--- /dev/null
+++ b/sys/compat/linux/linux_vdso.c
@@ -0,0 +1,244 @@
+/*-
+ * Copyright (c) 2013 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
+#define __ELF_WORD_SIZE 32
+#else
+#define __ELF_WORD_SIZE 64
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/elf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/queue.h>
+#include <sys/sysent.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+#include <compat/linux/linux_vdso.h>
+
+SLIST_HEAD(, linux_vdso_sym) __elfN(linux_vdso_syms) =
+ SLIST_HEAD_INITIALIZER(__elfN(linux_vdso_syms));
+
+static int __elfN(symtabindex);
+static int __elfN(symstrindex);
+
+static void
+__elfN(linux_vdso_lookup)(Elf_Ehdr *, struct linux_vdso_sym *);
+
+
+void
+__elfN(linux_vdso_sym_init)(struct linux_vdso_sym *s)
+{
+
+ SLIST_INSERT_HEAD(&__elfN(linux_vdso_syms), s, sym);
+}
+
+vm_object_t
+__elfN(linux_shared_page_init)(char **mapping)
+{
+ vm_page_t m;
+ vm_object_t obj;
+ vm_offset_t addr;
+
+ obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE,
+ VM_PROT_DEFAULT, 0, NULL);
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_grab(obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO);
+ m->valid = VM_PAGE_BITS_ALL;
+ VM_OBJECT_WUNLOCK(obj);
+ addr = kva_alloc(PAGE_SIZE);
+ pmap_qenter(addr, &m, 1);
+ *mapping = (char *)addr;
+ return (obj);
+}
+
+void
+__elfN(linux_shared_page_fini)(vm_object_t obj)
+{
+
+ vm_object_deallocate(obj);
+}
+
+void
+__elfN(linux_vdso_fixup)(struct sysentvec *sv)
+{
+ Elf_Ehdr *ehdr;
+ Elf_Shdr *shdr;
+ int i;
+
+ ehdr = (Elf_Ehdr *) sv->sv_sigcode;
+
+ if (!IS_ELF(*ehdr) ||
+ ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS ||
+ ehdr->e_ident[EI_DATA] != ELF_TARG_DATA ||
+ ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr->e_shoff == 0 ||
+ ehdr->e_shentsize != sizeof(Elf_Shdr))
+ panic("Linux invalid vdso header.\n");
+
+ if (ehdr->e_type != ET_DYN)
+ panic("Linux invalid vdso header.\n");
+
+ shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff);
+
+ __elfN(symtabindex) = -1;
+ __elfN(symstrindex) = -1;
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (shdr[i].sh_size == 0)
+ continue;
+ if (shdr[i].sh_type == SHT_DYNSYM) {
+ __elfN(symtabindex) = i;
+ __elfN(symstrindex) = shdr[i].sh_link;
+ }
+ }
+
+ if (__elfN(symtabindex) == -1 || __elfN(symstrindex) == -1)
+ panic("Linux invalid vdso header.\n");
+
+ ehdr->e_ident[EI_OSABI] = ELFOSABI_LINUX;
+}
+
+void
+__elfN(linux_vdso_reloc)(struct sysentvec *sv, long vdso_adjust)
+{
+ struct linux_vdso_sym *lsym;
+ Elf_Ehdr *ehdr;
+ Elf_Phdr *phdr;
+ Elf_Shdr *shdr;
+ Elf_Dyn *dyn;
+ Elf_Sym *sym;
+ int i, symcnt;
+
+ ehdr = (Elf_Ehdr *) sv->sv_sigcode;
+
+ /* Adjust our so relative to the sigcode_base */
+ if (vdso_adjust != 0) {
+ ehdr->e_entry += vdso_adjust;
+ phdr = (Elf_Phdr *)((caddr_t)ehdr + ehdr->e_phoff);
+
+ /* phdrs */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ phdr[i].p_vaddr += vdso_adjust;
+ if (phdr[i].p_type != PT_DYNAMIC)
+ continue;
+ dyn = (Elf_Dyn *)((caddr_t)ehdr + phdr[i].p_offset);
+ for(; dyn->d_tag != DT_NULL; dyn++) {
+ switch (dyn->d_tag) {
+ case DT_PLTGOT:
+ case DT_HASH:
+ case DT_STRTAB:
+ case DT_SYMTAB:
+ case DT_RELA:
+ case DT_INIT:
+ case DT_FINI:
+ case DT_REL:
+ case DT_DEBUG:
+ case DT_JMPREL:
+ case DT_VERSYM:
+ case DT_VERDEF:
+ case DT_VERNEED:
+ case DT_ADDRRNGLO ... DT_ADDRRNGHI:
+ dyn->d_un.d_ptr += vdso_adjust;
+ break;
+ case DT_ENCODING ... DT_LOOS-1:
+ case DT_LOOS ... DT_HIOS:
+ if (dyn->d_tag >= DT_ENCODING &&
+ (dyn->d_tag & 1) == 0)
+ dyn->d_un.d_ptr += vdso_adjust;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ /* sections */
+ shdr = (Elf_Shdr *)((caddr_t)ehdr + ehdr->e_shoff);
+ for(i = 0; i < ehdr->e_shnum; i++) {
+ if (!(shdr[i].sh_flags & SHF_ALLOC))
+ continue;
+ shdr[i].sh_addr += vdso_adjust;
+ if (shdr[i].sh_type != SHT_SYMTAB &&
+ shdr[i].sh_type != SHT_DYNSYM)
+ continue;
+
+ sym = (Elf_Sym *)((caddr_t)ehdr + shdr[i].sh_offset);
+ symcnt = shdr[i].sh_size / sizeof(*sym);
+
+ for(i = 0; i < symcnt; i++, sym++) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_ABS)
+ continue;
+ sym->st_value += vdso_adjust;
+ }
+ }
+ }
+
+ SLIST_FOREACH(lsym, &__elfN(linux_vdso_syms), sym)
+ __elfN(linux_vdso_lookup)(ehdr, lsym);
+}
+
+static void
+__elfN(linux_vdso_lookup)(Elf_Ehdr *ehdr, struct linux_vdso_sym *vsym)
+{
+ vm_offset_t strtab, symname;
+ uint32_t symcnt;
+ Elf_Shdr *shdr;
+ int i;
+
+ shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff);
+
+ strtab = (vm_offset_t)((caddr_t)ehdr +
+ shdr[__elfN(symstrindex)].sh_offset);
+ Elf_Sym *sym = (Elf_Sym *)((caddr_t)ehdr +
+ shdr[__elfN(symtabindex)].sh_offset);
+ symcnt = shdr[__elfN(symtabindex)].sh_size / sizeof(*sym);
+
+ for (i = 0; i < symcnt; ++i, ++sym) {
+ symname = strtab + sym->st_name;
+ if (strncmp(vsym->symname, (char *)symname, vsym->size) == 0) {
+ *vsym->ptr = (uintptr_t)sym->st_value;
+ break;
+ }
+ }
+}
diff --git a/sys/compat/linux/linux_vdso.h b/sys/compat/linux/linux_vdso.h
new file mode 100644
index 0000000..e11ee8a
--- /dev/null
+++ b/sys/compat/linux/linux_vdso.h
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2013 Dmitry Chagin
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer
+ * in this position and unchanged.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _LINUX_VDSO_H_
+#define _LINUX_VDSO_H_
+
+#include <sys/types.h>
+
+struct linux_vdso_sym {
+ SLIST_ENTRY(linux_vdso_sym) sym;
+ uint32_t size;
+ uintptr_t * ptr;
+ char symname[];
+};
+
+vm_object_t __elfN(linux_shared_page_init)(char **);
+void __elfN(linux_shared_page_fini)(vm_object_t);
+void __elfN(linux_vdso_fixup)(struct sysentvec *);
+void __elfN(linux_vdso_reloc)(struct sysentvec *, long);
+void __elfN(linux_vdso_sym_init)(struct linux_vdso_sym *);
+
+#define LINUX_VDSO_SYM_INTPTR(name) \
+uintptr_t name; \
+LINUX_VDSO_SYM_DEFINE(name)
+
+#define LINUX_VDSO_SYM_CHAR(name) \
+const char * name; \
+LINUX_VDSO_SYM_DEFINE(name)
+
+#define LINUX_VDSO_SYM_DEFINE(name) \
+static struct linux_vdso_sym name ## sym = { \
+ .symname = #name, \
+ .size = sizeof(#name), \
+ .ptr = (uintptr_t *)&name \
+}; \
+SYSINIT(__elfN(name ## _sym_init), SI_SUB_EXEC, \
+ SI_ORDER_FIRST, __elfN(linux_vdso_sym_init), &name ## sym); \
+struct __hack
+
+#endif /* _LINUX_VDSO_H_ */
diff --git a/sys/compat/linux/stats_timing.d b/sys/compat/linux/stats_timing.d
index d0b6f73..1b60dc9 100644
--- a/sys/compat/linux/stats_timing.d
+++ b/sys/compat/linux/stats_timing.d
@@ -39,7 +39,6 @@
* possible for a given application
* - graph of longest running (CPU-time!) function in total
* - may help finding problem cases in the kernel code
- * - timing statistics for the emul_lock
* - graph of longest held (CPU-time!) locks
*/
diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c
index e244700..5d1a409 100644
--- a/sys/compat/svr4/svr4_misc.c
+++ b/sys/compat/svr4/svr4_misc.c
@@ -875,9 +875,9 @@ svr4_sys_times(td, uap)
p = td->td_proc;
PROC_LOCK(p);
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &utime, &stime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
calccru(p, &cutime, &cstime);
PROC_UNLOCK(p);
@@ -1288,9 +1288,9 @@ loop:
pid = p->p_pid;
status = p->p_xstat;
ru = p->p_ru;
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &ru.ru_utime, &ru.ru_stime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
sx_sunlock(&proctree_lock);
@@ -1315,9 +1315,9 @@ loop:
pid = p->p_pid;
status = W_STOPCODE(p->p_xstat);
ru = p->p_ru;
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &ru.ru_utime, &ru.ru_stime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
if (((uap->options & SVR4_WNOWAIT)) == 0) {
@@ -1339,9 +1339,9 @@ loop:
pid = p->p_pid;
ru = p->p_ru;
status = SIGCONT;
- PROC_SLOCK(p);
+ PROC_STATLOCK(p);
calcru(p, &ru.ru_utime, &ru.ru_stime);
- PROC_SUNLOCK(p);
+ PROC_STATUNLOCK(p);
PROC_UNLOCK(p);
if (((uap->options & SVR4_WNOWAIT)) == 0) {
diff --git a/sys/compat/svr4/svr4_sysvec.c b/sys/compat/svr4/svr4_sysvec.c
index 561a838..125a7d8 100644
--- a/sys/compat/svr4/svr4_sysvec.c
+++ b/sys/compat/svr4/svr4_sysvec.c
@@ -196,6 +196,7 @@ struct sysentvec svr4_sysvec = {
.sv_fetch_syscall_args = cpu_fetch_syscall_args,
.sv_syscallnames = NULL,
.sv_schedtail = NULL,
+ .sv_thread_detach = NULL,
};
const char svr4_emul_path[] = "/compat/svr4";
OpenPOWER on IntegriCloud