diff options
author | Renato Botelho <renato@netgate.com> | 2016-01-13 17:56:30 -0200 |
---|---|---|
committer | Renato Botelho <renato@netgate.com> | 2016-01-13 17:56:30 -0200 |
commit | 3e0bf52f358eb969d165c4b1e54942ee94cf2c8d (patch) | |
tree | 440bb9907871a5bc578d65b32f0c4aa339096175 /sys/compat | |
parent | 4b4ac714f11471e43f18410bcc86da8f9dc3b88c (diff) | |
parent | e357bdb742b2696dcb81404917b6247f9e840232 (diff) | |
download | FreeBSD-src-3e0bf52f358eb969d165c4b1e54942ee94cf2c8d.zip FreeBSD-src-3e0bf52f358eb969d165c4b1e54942ee94cf2c8d.tar.gz |
Merge remote-tracking branch 'origin/stable/10' into devel
Diffstat (limited to 'sys/compat')
50 files changed, 4443 insertions, 1829 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index afcef0d..2f60c13 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -1429,6 +1429,49 @@ freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) } int +freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) +{ + struct timespec32 ts32[2]; + struct timespec ts[2], *tsp; + int error; + + if (uap->times != NULL) { + error = copyin(uap->times, ts32, sizeof(ts32)); + if (error) + return (error); + CP(ts32[0], ts[0], tv_sec); + CP(ts32[0], ts[0], tv_nsec); + CP(ts32[1], ts[1], tv_sec); + CP(ts32[1], ts[1], tv_nsec); + tsp = ts; + } else + tsp = NULL; + return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); +} + +int +freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) +{ + struct timespec32 ts32[2]; + struct timespec ts[2], *tsp; + int error; + + if (uap->times != NULL) { + error = copyin(uap->times, ts32, sizeof(ts32)); + if (error) + return (error); + CP(ts32[0], ts[0], tv_sec); + CP(ts32[0], ts[0], tv_nsec); + CP(ts32[1], ts[1], tv_sec); + CP(ts32[1], ts[1], tv_nsec); + tsp = ts; + } else + tsp = NULL; + return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, + tsp, UIO_SYSSPACE, uap->flag)); +} + +int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h index c4a1e30..16dc17e 100644 --- a/sys/compat/freebsd32/freebsd32_proto.h +++ b/sys/compat/freebsd32/freebsd32_proto.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin + * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin */ #ifndef _FREEBSD32_SYSPROTO_H_ @@ -699,6 +699,16 @@ struct freebsd32_ppoll_args { char ts_l_[PADL_(const struct timespec32 *)]; const struct timespec32 * ts; char ts_r_[PADR_(const struct timespec32 *)]; char set_l_[PADL_(const sigset_t *)]; const sigset_t * set; char set_r_[PADR_(const sigset_t *)]; }; +struct freebsd32_futimens_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)]; +}; +struct freebsd32_utimensat_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)]; + char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; +}; #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif @@ -833,6 +843,8 @@ int freebsd32_procctl(struct thread *, struct freebsd32_procctl_args *); int freebsd32_procctl(struct thread *, struct freebsd32_procctl_args *); #endif int freebsd32_ppoll(struct thread *, struct freebsd32_ppoll_args *); +int freebsd32_futimens(struct thread *, struct freebsd32_futimens_args *); +int freebsd32_utimensat(struct thread *, struct freebsd32_utimensat_args *); #ifdef COMPAT_43 @@ -1250,6 +1262,8 @@ int freebsd7_freebsd32_shmctl(struct thread *, struct freebsd7_freebsd32_shmctl_ #define FREEBSD32_SYS_AUE_freebsd32_procctl AUE_NULL #define FREEBSD32_SYS_AUE_freebsd32_procctl AUE_NULL #define FREEBSD32_SYS_AUE_freebsd32_ppoll AUE_POLL +#define FREEBSD32_SYS_AUE_freebsd32_futimens AUE_FUTIMES +#define FREEBSD32_SYS_AUE_freebsd32_utimensat AUE_FUTIMESAT #undef PAD_ #undef PADL_ diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index 92af89b..cf4e492 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin + * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin */ #define FREEBSD32_SYS_syscall 0 @@ -455,4 +455,6 @@ #define FREEBSD32_SYS_freebsd32_procctl 544 #define FREEBSD32_SYS_freebsd32_procctl 544 #define FREEBSD32_SYS_freebsd32_ppoll 545 -#define FREEBSD32_SYS_MAXSYSCALL 546 +#define FREEBSD32_SYS_freebsd32_futimens 546 +#define FREEBSD32_SYS_freebsd32_utimensat 547 +#define FREEBSD32_SYS_MAXSYSCALL 548 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 01a1201..5b442c3 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin + * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin */ const char *freebsd32_syscallnames[] = { @@ -579,4 +579,6 @@ const char *freebsd32_syscallnames[] = { "freebsd32_procctl", /* 544 = freebsd32_procctl */ #endif "freebsd32_ppoll", /* 545 = freebsd32_ppoll */ + "freebsd32_futimens", /* 546 = freebsd32_futimens */ + "freebsd32_utimensat", /* 547 = freebsd32_utimensat */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 3b0b3fe..f3321a0 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -3,7 +3,7 @@ * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ - * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 276955 2015-01-11 07:02:03Z dchagin + * created from FreeBSD: stable/10/sys/compat/freebsd32/syscalls.master 293474 2016-01-09 14:20:23Z dchagin */ #include "opt_compat.h" @@ -616,4 +616,6 @@ struct sysent freebsd32_sysent[] = { { AS(freebsd32_procctl_args), (sy_call_t *)freebsd32_procctl, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 544 = freebsd32_procctl */ #endif { AS(freebsd32_ppoll_args), (sy_call_t *)freebsd32_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = freebsd32_ppoll */ + { AS(freebsd32_futimens_args), (sy_call_t *)freebsd32_futimens, AUE_FUTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 546 = freebsd32_futimens */ + { AS(freebsd32_utimensat_args), (sy_call_t *)freebsd32_utimensat, AUE_FUTIMESAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 547 = freebsd32_utimensat */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index 6d9ab1c..87cf3b3 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3323,6 +3323,24 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 4; break; } + /* freebsd32_futimens */ + case 546: { + struct freebsd32_futimens_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->times; /* struct timespec * */ + *n_args = 2; + break; + } + /* freebsd32_utimensat */ + case 547: { + struct freebsd32_utimensat_args *p = params; + iarg[0] = p->fd; /* int */ + uarg[1] = (intptr_t) p->path; /* char * */ + uarg[2] = (intptr_t) p->times; /* struct timespec * */ + iarg[3] = p->flag; /* int */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -8907,6 +8925,38 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* freebsd32_futimens */ + case 546: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "struct timespec *"; + break; + default: + break; + }; + break; + /* freebsd32_utimensat */ + case 547: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "char *"; + break; + case 2: + p = "struct timespec *"; + break; + case 3: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10795,6 +10845,16 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* freebsd32_futimens */ + case 546: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* freebsd32_utimensat */ + case 547: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index 48c2f3e..6c167bf 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1069,3 +1069,8 @@ 545 AUE_POLL STD { int freebsd32_ppoll(struct pollfd *fds, \ u_int nfds, const struct timespec32 *ts, \ const sigset_t *set); } +546 AUE_FUTIMES STD { int freebsd32_futimens(int fd, \ + struct timespec *times); } +547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \ + char *path, \ + struct timespec *times, int flag); } diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c index bfc17d6..206935a 100644 --- a/sys/compat/ia32/ia32_sysvec.c +++ b/sys/compat/ia32/ia32_sysvec.c @@ -139,6 +139,7 @@ struct sysentvec ia32_freebsd_sysvec = { .sv_shared_page_base = FREEBSD32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = NULL, + .sv_thread_detach = NULL, }; INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec); diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 4aae77e..6e591e9 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -39,13 +39,12 @@ * @(#)procfs_status.c 8.4 (Berkeley) 6/15/94 */ -#include "opt_compat.h" - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/queue.h> +#include <sys/systm.h> #include <sys/blist.h> #include <sys/conf.h> #include <sys/exec.h> @@ -53,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/filedesc.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/limits.h> #include <sys/linker.h> #include <sys/lock.h> #include <sys/malloc.h> @@ -68,6 +68,7 @@ __FBSDID("$FreeBSD$"); #include <sys/smp.h> #include <sys/socket.h> #include <sys/sysctl.h> +#include <sys/sysent.h> #include <sys/systm.h> #include <sys/time.h> #include <sys/tty.h> @@ -78,7 +79,7 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <net/if.h> -#include <net/vnet.h> +#include <net/if_types.h> #include <vm/vm.h> #include <vm/vm_extern.h> @@ -98,11 +99,7 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #endif /* __i386__ || __amd64__ */ -#ifdef COMPAT_FREEBSD32 -#include <compat/freebsd32/freebsd32_util.h> -#endif - -#include <compat/linux/linux_ioctl.h> +#include <compat/linux/linux.h> #include <compat/linux/linux_mib.h> #include <compat/linux/linux_misc.h> #include <compat/linux/linux_util.h> @@ -734,6 +731,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) segsz_t lsize; struct thread *td2; struct sigacts *ps; + l_sigset_t siglist, sigignore, sigcatch; int i; PROC_LOCK(p); @@ -822,29 +820,25 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) /* * Signal masks - * - * We support up to 128 signals, while Linux supports 32, - * but we only define 32 (the same 32 as Linux, to boot), so - * just show the lower 32 bits of each mask. XXX hack. - * - * NB: on certain platforms (Sparc at least) Linux actually - * supports 64 signals, but this code is a long way from - * running on anything but i386, so ignore that for now. */ PROC_LOCK(p); - sbuf_printf(sb, "SigPnd:\t%08x\n", p->p_siglist.__bits[0]); - /* - * I can't seem to find out where the signal mask is in - * relation to struct proc, so SigBlk is left unimplemented. - */ - sbuf_printf(sb, "SigBlk:\t%08x\n", 0); /* XXX */ + bsd_to_linux_sigset(&p->p_siglist, &siglist); ps = p->p_sigacts; mtx_lock(&ps->ps_mtx); - sbuf_printf(sb, "SigIgn:\t%08x\n", ps->ps_sigignore.__bits[0]); - sbuf_printf(sb, "SigCgt:\t%08x\n", ps->ps_sigcatch.__bits[0]); + bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore); + bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch); mtx_unlock(&ps->ps_mtx); PROC_UNLOCK(p); + sbuf_printf(sb, "SigPnd:\t%016jx\n", siglist.__mask); + /* + * XXX. SigBlk - target thread's signal mask, td_sigmask. + * To implement SigBlk pseudofs should support proc/tid dir entries. + */ + sbuf_printf(sb, "SigBlk:\t%016x\n", 0); + sbuf_printf(sb, "SigIgn:\t%016jx\n", sigignore.__mask); + sbuf_printf(sb, "SigCgt:\t%016jx\n", sigcatch.__mask); + /* * Linux also prints the capability masks, but we don't have * capabilities yet, and when we do get them they're likely to @@ -937,34 +931,22 @@ linprocfs_doproccmdline(PFS_FILL_ARGS) static int linprocfs_doprocenviron(PFS_FILL_ARGS) { - int ret; - - PROC_LOCK(p); - if ((ret = p_candebug(td, p)) != 0) { - PROC_UNLOCK(p); - return (ret); - } /* * Mimic linux behavior and pass only processes with usermode * address space as valid. Return zero silently otherwize. */ - if (p->p_vmspace == &vmspace0) { - PROC_UNLOCK(p); + if (p->p_vmspace == &vmspace0) return (0); - } - if ((p->p_flag & P_SYSTEM) != 0) { - PROC_UNLOCK(p); - return (0); - } - - PROC_UNLOCK(p); - - ret = proc_getenvv(td, p, sb); - return (ret); + return (proc_getenvv(td, p, sb)); } +static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n"; +static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n"; +static char vdso_str[] = " [vdso]"; +static char stack_str[] = " [stack]"; + /* * Filler function for proc/pid/maps */ @@ -980,6 +962,7 @@ linprocfs_doprocmaps(PFS_FILL_ARGS) vm_prot_t e_prot; unsigned int last_timestamp; char *name = "", *freename = NULL; + const char *l_map_str; ino_t ino; int ref_count, shadow_count, flags; int error; @@ -999,6 +982,11 @@ linprocfs_doprocmaps(PFS_FILL_ARGS) vm = vmspace_acquire_ref(p); if (vm == NULL) return (ESRCH); + + if (SV_CURPROC_FLAG(SV_LP64)) + l_map_str = l64_map_str; + else + l_map_str = l32_map_str; map = &vm->vm_map; vm_map_lock_read(map); for (entry = map->header.next; entry != &map->header; @@ -1037,6 +1025,11 @@ linprocfs_doprocmaps(PFS_FILL_ARGS) VOP_GETATTR(vp, &vat, td->td_ucred); ino = vat.va_fileid; vput(vp); + } else if (SV_PROC_ABI(p) == SV_ABI_LINUX) { + if (e_start == p->p_sysent->sv_shared_page_base) + name = vdso_str; + if (e_end == p->p_sysent->sv_usrstack) + name = stack_str; } } else { flags = 0; @@ -1048,8 +1041,7 @@ linprocfs_doprocmaps(PFS_FILL_ARGS) * format: * start, end, access, offset, major, minor, inode, name. */ - error = sbuf_printf(sb, - "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n", + error = sbuf_printf(sb, l_map_str, (u_long)e_start, (u_long)e_end, (e_prot & VM_PROT_READ)?"r":"-", (e_prot & VM_PROT_WRITE)?"w":"-", @@ -1086,6 +1078,35 @@ linprocfs_doprocmaps(PFS_FILL_ARGS) } /* + * Criteria for interface name translation + */ +#define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) + +static int +linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen) +{ + struct ifnet *ifscan; + int ethno; + + IFNET_RLOCK_ASSERT(); + + /* Short-circuit non ethernet interfaces */ + if (!IFP_IS_ETH(ifp)) + return (strlcpy(buffer, ifp->if_xname, buflen)); + + /* Determine the (relative) unit number for ethernet interfaces */ + ethno = 0; + TAILQ_FOREACH(ifscan, &V_ifnet, if_link) { + if (ifscan == ifp) + return (snprintf(buffer, buflen, "eth%d", ethno)); + if (IFP_IS_ETH(ifscan)) + ethno++; + } + + return (0); +} + +/* * Filler function for proc/net/dev */ static int @@ -1232,8 +1253,6 @@ linprocfs_doscsiscsi(PFS_FILL_ARGS) return (0); } -extern struct cdevsw *cdevsw[]; - /* * Filler function for proc/devices */ @@ -1328,6 +1347,52 @@ linprocfs_douuid(PFS_FILL_ARGS) return(0); } +/* + * Filler function for proc/pid/auxv + */ +static int +linprocfs_doauxv(PFS_FILL_ARGS) +{ + struct sbuf *asb; + off_t buflen, resid; + int error; + + /* + * Mimic linux behavior and pass only processes with usermode + * address space as valid. Return zero silently otherwise. + */ + if (p->p_vmspace == &vmspace0) + return (0); + + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0 || uio->uio_resid < 0) + return (EINVAL); + + asb = sbuf_new_auto(); + if (asb == NULL) + return (ENOMEM); + error = proc_getauxv(td, p, asb); + if (error == 0) + error = sbuf_finish(asb); + + resid = sbuf_len(asb) - uio->uio_offset; + if (resid > uio->uio_resid) + buflen = uio->uio_resid; + else + buflen = resid; + if (buflen > IOSIZE_MAX) + return (EINVAL); + if (buflen > MAXPHYS) + buflen = MAXPHYS; + if (resid <= 0) + return (0); + + if (error == 0) + error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio); + sbuf_delete(asb); + return (error); +} /* * Constructor @@ -1386,7 +1451,7 @@ linprocfs_init(PFS_INIT_ARGS) pfs_create_link(dir, "cwd", &linprocfs_doproccwd, NULL, NULL, NULL, 0); pfs_create_file(dir, "environ", &linprocfs_doprocenviron, - NULL, NULL, NULL, PFS_RD); + NULL, &procfs_candebug, NULL, PFS_RD); pfs_create_link(dir, "exe", &procfs_doprocfile, NULL, &procfs_notsystem, NULL, 0); pfs_create_file(dir, "maps", &linprocfs_doprocmaps, @@ -1403,6 +1468,8 @@ linprocfs_init(PFS_INIT_ARGS) NULL, NULL, NULL, PFS_RD); pfs_create_link(dir, "fd", &linprocfs_dofdescfs, NULL, NULL, NULL, 0); + pfs_create_file(dir, "auxv", &linprocfs_doauxv, + NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD); /* /proc/scsi/... */ dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0); @@ -1448,7 +1515,11 @@ linprocfs_uninit(PFS_INIT_ARGS) } PSEUDOFS(linprocfs, 1, 0); +#if defined(__amd64__) +MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1); +#else MODULE_DEPEND(linprocfs, linux, 1, 1, 1); +#endif MODULE_DEPEND(linprocfs, procfs, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1); MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1); diff --git a/sys/compat/linsysfs/linsysfs.c b/sys/compat/linsysfs/linsysfs.c index 45f44af..8b5f9b5 100644 --- a/sys/compat/linsysfs/linsysfs.c +++ b/sys/compat/linsysfs/linsysfs.c @@ -61,12 +61,6 @@ __FBSDID("$FreeBSD$"); #include <machine/bus.h> -#include "opt_compat.h" -#ifdef COMPAT_LINUX32 /* XXX */ -#include <machine/../linux32/linux.h> -#else -#include <machine/../linux/linux.h> -#endif #include <compat/linux/linux_ioctl.h> #include <compat/linux/linux_mib.h> #include <compat/linux/linux_util.h> @@ -281,4 +275,8 @@ linsysfs_uninit(PFS_INIT_ARGS) } PSEUDOFS(linsysfs, 1, 0); +#if defined(__amd64__) +MODULE_DEPEND(linsysfs, linux_common, 1, 1, 1); +#else MODULE_DEPEND(linsysfs, linux, 1, 1, 1); +#endif diff --git a/sys/compat/linux/check_error.d b/sys/compat/linux/check_error.d index 9e3c00a..389e768 100644 --- a/sys/compat/linux/check_error.d +++ b/sys/compat/linux/check_error.d @@ -36,8 +36,8 @@ */ linuxulator*:dummy::not_implemented, -linuxulator*:emul:proc_exit:child_clear_tid_error, -linuxulator*:emul:proc_exit:futex_failed, +linuxulator*:emul:linux_thread_detach:child_clear_tid_error, +linuxulator*:emul:linux_thread_detach:futex_failed, linuxulator*:emul:linux_schedtail:copyout_error, linuxulator*:futex:futex_get:error, linuxulator*:futex:futex_sleep:requeue_error, diff --git a/sys/compat/linux/check_internal_locks.d b/sys/compat/linux/check_internal_locks.d index 2bdef68..b9d7c61 100644 --- a/sys/compat/linux/check_internal_locks.d +++ b/sys/compat/linux/check_internal_locks.d @@ -41,14 +41,9 @@ BEGIN { - check["emul_lock"] = 0; - check["emul_shared_rlock"] = 0; - check["emul_shared_wlock"] = 0; check["futex_mtx"] = 0; } -linuxulator*:locks:emul_lock:locked, -linuxulator*:locks:emul_shared_wlock:locked, linuxulator*:locks:futex_mtx:locked /check[probefunc] > 0/ { @@ -57,9 +52,6 @@ linuxulator*:locks:futex_mtx:locked stack(); } -linuxulator*:locks:emul_lock:locked, -linuxulator*:locks:emul_shared_rlock:locked, -linuxulator*:locks:emul_shared_wlock:locked, linuxulator*:locks:futex_mtx:locked { ++check[probefunc]; @@ -69,9 +61,6 @@ linuxulator*:locks:futex_mtx:locked spec[probefunc] = speculation(); } -linuxulator*:locks:emul_lock:unlock, -linuxulator*:locks:emul_shared_rlock:unlock, -linuxulator*:locks:emul_shared_wlock:unlock, linuxulator*:locks:futex_mtx:unlock /check[probefunc] == 0/ { @@ -82,9 +71,6 @@ linuxulator*:locks:futex_mtx:unlock stack(); } -linuxulator*:locks:emul_lock:unlock, -linuxulator*:locks:emul_shared_rlock:unlock, -linuxulator*:locks:emul_shared_wlock:unlock, linuxulator*:locks:futex_mtx:unlock { discard(spec[probefunc]); @@ -95,27 +81,6 @@ linuxulator*:locks:futex_mtx:unlock /* Timeout handling */ tick-10s -/spec["emul_lock"] != 0 && timestamp - ts["emul_lock"] >= 9999999000/ -{ - commit(spec["emul_lock"]); - spec["emul_lock"] = 0; -} - -tick-10s -/spec["emul_shared_wlock"] != 0 && timestamp - ts["emul_shared_wlock"] >= 9999999000/ -{ - commit(spec["emul_shared_wlock"]); - spec["emul_shared_wlock"] = 0; -} - -tick-10s -/spec["emul_shared_rlock"] != 0 && timestamp - ts["emul_shared_rlock"] >= 9999999000/ -{ - commit(spec["emul_shared_rlock"]); - spec["emul_shared_rlock"] = 0; -} - -tick-10s /spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/ { commit(spec["futex_mtx"]); diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c new file mode 100644 index 0000000..d1d7877 --- /dev/null +++ b/sys/compat/linux/linux.c @@ -0,0 +1,205 @@ +/*- + * Copyright (c) 2015 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/signalvar.h> + +#include <compat/linux/linux.h> + + +static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = { + LINUX_SIGHUP, /* SIGHUP */ + LINUX_SIGINT, /* SIGINT */ + LINUX_SIGQUIT, /* SIGQUIT */ + LINUX_SIGILL, /* SIGILL */ + LINUX_SIGTRAP, /* SIGTRAP */ + LINUX_SIGABRT, /* SIGABRT */ + 0, /* SIGEMT */ + LINUX_SIGFPE, /* SIGFPE */ + LINUX_SIGKILL, /* SIGKILL */ + LINUX_SIGBUS, /* SIGBUS */ + LINUX_SIGSEGV, /* SIGSEGV */ + LINUX_SIGSYS, /* SIGSYS */ + LINUX_SIGPIPE, /* SIGPIPE */ + LINUX_SIGALRM, /* SIGALRM */ + LINUX_SIGTERM, /* SIGTERM */ + LINUX_SIGURG, /* SIGURG */ + LINUX_SIGSTOP, /* SIGSTOP */ + LINUX_SIGTSTP, /* SIGTSTP */ + LINUX_SIGCONT, /* SIGCONT */ + LINUX_SIGCHLD, /* SIGCHLD */ + LINUX_SIGTTIN, /* SIGTTIN */ + LINUX_SIGTTOU, /* SIGTTOU */ + LINUX_SIGIO, /* SIGIO */ + LINUX_SIGXCPU, /* SIGXCPU */ + LINUX_SIGXFSZ, /* SIGXFSZ */ + LINUX_SIGVTALRM,/* SIGVTALRM */ + LINUX_SIGPROF, /* SIGPROF */ + LINUX_SIGWINCH, /* SIGWINCH */ + 0, /* SIGINFO */ + LINUX_SIGUSR1, /* SIGUSR1 */ + LINUX_SIGUSR2 /* SIGUSR2 */ +}; + +static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = { + SIGHUP, /* LINUX_SIGHUP */ + SIGINT, /* LINUX_SIGINT */ + SIGQUIT, /* LINUX_SIGQUIT */ + SIGILL, /* LINUX_SIGILL */ + SIGTRAP, /* LINUX_SIGTRAP */ + SIGABRT, /* LINUX_SIGABRT */ + SIGBUS, /* LINUX_SIGBUS */ + SIGFPE, /* LINUX_SIGFPE */ + SIGKILL, /* LINUX_SIGKILL */ + SIGUSR1, /* LINUX_SIGUSR1 */ + SIGSEGV, /* LINUX_SIGSEGV */ + SIGUSR2, /* LINUX_SIGUSR2 */ + SIGPIPE, /* LINUX_SIGPIPE */ + SIGALRM, /* LINUX_SIGALRM */ + SIGTERM, /* LINUX_SIGTERM */ + SIGBUS, /* LINUX_SIGSTKFLT */ + SIGCHLD, /* LINUX_SIGCHLD */ + SIGCONT, /* LINUX_SIGCONT */ + SIGSTOP, /* LINUX_SIGSTOP */ + SIGTSTP, /* LINUX_SIGTSTP */ + SIGTTIN, /* LINUX_SIGTTIN */ + SIGTTOU, /* LINUX_SIGTTOU */ + SIGURG, /* LINUX_SIGURG */ + SIGXCPU, /* LINUX_SIGXCPU */ + SIGXFSZ, /* LINUX_SIGXFSZ */ + SIGVTALRM, /* LINUX_SIGVTALARM */ + SIGPROF, /* LINUX_SIGPROF */ + SIGWINCH, /* LINUX_SIGWINCH */ + SIGIO, /* LINUX_SIGIO */ + /* + * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal + * to the first unused FreeBSD signal number. Since Linux supports + * signals from 1 to 64 we are ok here as our SIGRTMIN = 65. + */ + SIGRTMIN, /* LINUX_SIGPWR */ + SIGSYS /* LINUX_SIGSYS */ +}; + +/* + * Map Linux RT signals to the FreeBSD RT signals. + */ +static inline int +linux_to_bsd_rt_signal(int sig) +{ + + return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN); +} + +static inline int +bsd_to_linux_rt_signal(int sig) +{ + + return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN); +} + +int +linux_to_bsd_signal(int sig) +{ + + KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("Invalid Linux signal\n")); + + if (sig < LINUX_SIGRTMIN) + return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]); + + return (linux_to_bsd_rt_signal(sig)); +} + +int +bsd_to_linux_signal(int sig) +{ + + if (sig <= LINUX_SIGTBLSZ) + return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]); + if (sig == SIGRTMIN) + return (LINUX_SIGPWR); + + return (bsd_to_linux_rt_signal(sig)); +} + +int +linux_to_bsd_sigaltstack(int lsa) +{ + int bsa = 0; + + if (lsa & LINUX_SS_DISABLE) + bsa |= SS_DISABLE; + /* + * Linux ignores SS_ONSTACK flag for ss + * parameter while FreeBSD prohibits it. + */ + return (bsa); +} + +int +bsd_to_linux_sigaltstack(int bsa) +{ + int lsa = 0; + + if (bsa & SS_DISABLE) + lsa |= LINUX_SS_DISABLE; + if (bsa & SS_ONSTACK) + lsa |= LINUX_SS_ONSTACK; + return (lsa); +} + +void +linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) +{ + int b, l; + + SIGEMPTYSET(*bss); + for (l = 1; l <= LINUX_SIGRTMAX; l++) { + if (LINUX_SIGISMEMBER(*lss, l)) { + b = linux_to_bsd_signal(l); + if (b) + SIGADDSET(*bss, b); + } + } +} + +void +bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) +{ + int b, l; + + LINUX_SIGEMPTYSET(*lss); + for (b = 1; b <= SIGRTMAX; b++) { + if (SIGISMEMBER(*bss, b)) { + l = bsd_to_linux_signal(b); + if (l) + LINUX_SIGADDSET(*lss, l); + } + } +} diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h new file mode 100644 index 0000000..974440f --- /dev/null +++ b/sys/compat/linux/linux.h @@ -0,0 +1,95 @@ +/*- + * Copyright (c) 2015 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_MI_H_ +#define _LINUX_MI_H_ + +/* sigaltstack */ +#define LINUX_SS_ONSTACK 1 +#define LINUX_SS_DISABLE 2 + +int linux_to_bsd_sigaltstack(int lsa); +int bsd_to_linux_sigaltstack(int bsa); + +/* sigset */ +typedef struct { + uint64_t __mask; +} l_sigset_t; + +/* primitives to manipulate sigset_t */ +#define LINUX_SIGEMPTYSET(set) (set).__mask = 0 +#define LINUX_SIGISMEMBER(set, sig) (1UL & ((set).__mask >> _SIG_IDX(sig))) +#define LINUX_SIGADDSET(set, sig) (set).__mask |= 1UL << _SIG_IDX(sig) + +void linux_to_bsd_sigset(l_sigset_t *, sigset_t *); +void bsd_to_linux_sigset(sigset_t *, l_sigset_t *); + +/* signaling */ +#define LINUX_SIGHUP 1 +#define LINUX_SIGINT 2 +#define LINUX_SIGQUIT 3 +#define LINUX_SIGILL 4 +#define LINUX_SIGTRAP 5 +#define LINUX_SIGABRT 6 +#define LINUX_SIGIOT LINUX_SIGABRT +#define LINUX_SIGBUS 7 +#define LINUX_SIGFPE 8 +#define LINUX_SIGKILL 9 +#define LINUX_SIGUSR1 10 +#define LINUX_SIGSEGV 11 +#define LINUX_SIGUSR2 12 +#define LINUX_SIGPIPE 13 +#define LINUX_SIGALRM 14 +#define LINUX_SIGTERM 15 +#define LINUX_SIGSTKFLT 16 +#define LINUX_SIGCHLD 17 +#define LINUX_SIGCONT 18 +#define LINUX_SIGSTOP 19 +#define LINUX_SIGTSTP 20 +#define LINUX_SIGTTIN 21 +#define LINUX_SIGTTOU 22 +#define LINUX_SIGURG 23 +#define LINUX_SIGXCPU 24 +#define LINUX_SIGXFSZ 25 +#define LINUX_SIGVTALRM 26 +#define LINUX_SIGPROF 27 +#define LINUX_SIGWINCH 28 +#define LINUX_SIGIO 29 +#define LINUX_SIGPOLL LINUX_SIGIO +#define LINUX_SIGPWR 30 +#define LINUX_SIGSYS 31 +#define LINUX_SIGTBLSZ 31 +#define LINUX_SIGRTMIN 32 +#define LINUX_SIGRTMAX 64 + +#define LINUX_SIG_VALID(sig) ((sig) <= LINUX_SIGRTMAX && (sig) > 0) + +int linux_to_bsd_signal(int sig); +int bsd_to_linux_signal(int sig); + +#endif /* _LINUX_MI_H_ */ diff --git a/sys/compat/linux/linux_common.c b/sys/compat/linux/linux_common.c new file mode 100644 index 0000000..b9e3531 --- /dev/null +++ b/sys/compat/linux/linux_common.c @@ -0,0 +1,93 @@ +/*- + * Copyright (c) 2014 Vassilis Laganakos + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/exec.h> +#include <sys/imgact.h> +#include <sys/imgact_elf.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/eventhandler.h> +#include <sys/sysctl.h> + +#include <compat/linux/linux_emul.h> +#include <compat/linux/linux_mib.h> +#include <compat/linux/linux_util.h> + +FEATURE(linuxulator_v4l, "V4L ioctl wrapper support in the linuxulator"); +FEATURE(linuxulator_v4l2, "V4L2 ioctl wrapper support in the linuxulator"); + +MODULE_VERSION(linux_common, 1); + +SET_DECLARE(linux_device_handler_set, struct linux_device_handler); + +static eventhandler_tag linux_exec_tag; +static eventhandler_tag linux_thread_dtor_tag; +static eventhandler_tag linux_exit_tag; + + +static int +linux_common_modevent(module_t mod, int type, void *data) +{ + struct linux_device_handler **ldhp; + + switch(type) { + case MOD_LOAD: + linux_osd_jail_register(); + linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, + linux_proc_exit, NULL, 1000); + linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, + linux_proc_exec, NULL, 1000); + linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, + linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); + SET_FOREACH(ldhp, linux_device_handler_set) + linux_device_register_handler(*ldhp); + break; + case MOD_UNLOAD: + linux_osd_jail_deregister(); + SET_FOREACH(ldhp, linux_device_handler_set) + linux_device_unregister_handler(*ldhp); + EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); + EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); + EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t linux_common_mod = { + "linuxcommon", + linux_common_modevent, + 0 +}; + +DECLARE_MODULE(linuxcommon, linux_common_mod, SI_SUB_EXEC, SI_ORDER_ANY); diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c index 61156ba..c2bf3ae 100644 --- a/sys/compat/linux/linux_emul.c +++ b/sys/compat/linux/linux_emul.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2006 Roman Divacky + * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,364 +30,235 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_compat.h" -#include "opt_kdtrace.h" - #include <sys/param.h> #include <sys/systm.h> #include <sys/imgact.h> #include <sys/kernel.h> +#include <sys/ktr.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> -#include <sys/sdt.h> #include <sys/sx.h> #include <sys/proc.h> #include <sys/syscallsubr.h> #include <sys/sysent.h> -#include <sys/sysproto.h> -#include <sys/unistd.h> - -#ifdef COMPAT_LINUX32 -#include <machine/../linux32/linux.h> -#include <machine/../linux32/linux32_proto.h> -#else -#include <machine/../linux/linux.h> -#include <machine/../linux/linux_proto.h> -#endif - -#include <compat/linux/linux_dtrace.h> + #include <compat/linux/linux_emul.h> -#include <compat/linux/linux_futex.h> #include <compat/linux/linux_misc.h> +#include <compat/linux/linux_util.h> -/** - * Special DTrace provider for the linuxulator. - * - * In this file we define the provider for the entire linuxulator. All - * modules (= files of the linuxulator) use it. - * - * We define a different name depending on the emulated bitsize, see - * ../../<ARCH>/linux{,32}/linux.h, e.g.: - * native bitsize = linuxulator - * amd64, 32bit emulation = linuxulator32 - */ -LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); -/** - * Special DTrace module "locks", it covers some linuxulator internal - * locks. - */ -LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *"); - -/** - * DTrace probes in this module. +/* + * This returns reference to the thread emuldata entry (if found) + * + * Hold PROC_LOCK when referencing emuldata from other threads. */ -LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int"); -LIN_SDT_PROBE_DEFINE0(emul, em_find, return); -LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t", - "int"); -LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread); -LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork); -LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec); -LIN_SDT_PROBE_DEFINE0(emul, proc_init, return); -LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *"); -LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed); -LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t", - "struct proc *"); -LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int"); -LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return); -LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *", - "struct image_params *"); -LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return); -LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry); -LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int"); -LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return); -LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *"); -LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return); -LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *", - "int"); -LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t"); -LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return); - -struct sx emul_shared_lock; -struct mtx emul_lock; - -/* this returns locked reference to the emuldata entry (if found) */ struct linux_emuldata * -em_find(struct proc *p, int locked) +em_find(struct thread *td) { struct linux_emuldata *em; - LIN_SDT_PROBE2(emul, em_find, entry, p, locked); + em = td->td_emuldata; - if (locked == EMUL_DOLOCK) - EMUL_LOCK(&emul_lock); + return (em); +} - em = p->p_emuldata; +/* + * This returns reference to the proc pemuldata entry (if found) + * + * Hold PROC_LOCK when referencing proc pemuldata from other threads. + * Hold LINUX_PEM_LOCK wher referencing pemuldata members. + */ +struct linux_pemuldata * +pem_find(struct proc *p) +{ + struct linux_pemuldata *pem; - if (em == NULL && locked == EMUL_DOLOCK) - EMUL_UNLOCK(&emul_lock); + pem = p->p_emuldata; - LIN_SDT_PROBE1(emul, em_find, return, em); - return (em); + return (pem); } -int -linux_proc_init(struct thread *td, pid_t child, int flags) +void +linux_proc_init(struct thread *td, struct thread *newtd, int flags) { - struct linux_emuldata *em, *p_em; + struct linux_emuldata *em; + struct linux_pemuldata *pem; + struct epoll_emuldata *emd; struct proc *p; - LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags); + if (newtd != NULL) { + p = newtd->td_proc; - if (child != 0) { - /* fork or create a thread */ - em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO); - em->pid = child; - em->pdeath_signal = 0; - em->flags = 0; - em->robust_futexes = NULL; + /* non-exec call */ + em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO); if (flags & LINUX_CLONE_THREAD) { - /* handled later in the code */ - LIN_SDT_PROBE0(emul, proc_init, create_thread); - } else { - struct linux_emuldata_shared *s; + LINUX_CTR1(proc_init, "thread newtd(%d)", + newtd->td_tid); - LIN_SDT_PROBE0(emul, proc_init, fork); + em->em_tid = newtd->td_tid; + } else { + LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid); - s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO); - s->refs = 1; - s->group_pid = child; + em->em_tid = p->p_pid; - LIST_INIT(&s->threads); - em->shared = s; + pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); + sx_init(&pem->pem_sx, "lpemlk"); + p->p_emuldata = pem; } + newtd->td_emuldata = em; } else { + p = td->td_proc; + /* exec */ - LIN_SDT_PROBE0(emul, proc_init, exec); + LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid); /* lookup the old one */ - em = em_find(td->td_proc, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); - } - - em->child_clear_tid = NULL; - em->child_set_tid = NULL; - /* - * allocate the shared struct only in clone()/fork cases in the case - * of clone() td = calling proc and child = pid of the newly created - * proc - */ - if (child != 0) { - if (flags & LINUX_CLONE_THREAD) { - /* lookup the parent */ - /* - * we dont have to lock the p_em because - * its waiting for us in linux_clone so - * there is no chance of it changing the - * p_em->shared address - */ - p_em = em_find(td->td_proc, EMUL_DONTLOCK); - KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n")); - em->shared = p_em->shared; - EMUL_SHARED_WLOCK(&emul_shared_lock); - em->shared->refs++; - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - } else { - /* - * handled earlier to avoid malloc(M_WAITOK) with - * rwlock held - */ - } + em->em_tid = p->p_pid; + em->flags = 0; + em->pdeath_signal = 0; + em->robust_futexes = NULL; + em->child_clear_tid = NULL; + em->child_set_tid = NULL; - EMUL_SHARED_WLOCK(&emul_shared_lock); - LIST_INSERT_HEAD(&em->shared->threads, em, threads); - EMUL_SHARED_WUNLOCK(&emul_shared_lock); + /* epoll should be destroyed in a case of exec. */ + pem = pem_find(p); + KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); - p = pfind(child); - KASSERT(p != NULL, ("process not found in proc_init\n")); - p->p_emuldata = em; - PROC_UNLOCK(p); - } else - EMUL_UNLOCK(&emul_lock); + if (pem->epoll != NULL) { + emd = pem->epoll; + pem->epoll = NULL; + free(emd, M_EPOLL); + } + } - LIN_SDT_PROBE0(emul, proc_init, return); - return (0); } -void +void linux_proc_exit(void *arg __unused, struct proc *p) { - struct linux_emuldata *em; - int error, shared_flags, shared_xstat; - struct thread *td = FIRST_THREAD_IN_PROC(p); - int *child_clear_tid; - struct proc *q, *nq; + struct linux_pemuldata *pem; + struct epoll_emuldata *emd; + struct thread *td = curthread; - if (__predict_true(p->p_sysent != &elf_linux_sysvec)) + if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) return; - LIN_SDT_PROBE1(emul, proc_exit, entry, p); - - release_futexes(p); + LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p", + td->td_tid, p->p_pid, p); - /* find the emuldata */ - em = em_find(p, EMUL_DOLOCK); + pem = pem_find(p); + if (pem == NULL) + return; + (p->p_sysent->sv_thread_detach)(td); - KASSERT(em != NULL, ("proc_exit: emuldata not found.\n")); + p->p_emuldata = NULL; - /* reparent all procs that are not a thread leader to initproc */ - if (em->shared->group_pid != p->p_pid) { - LIN_SDT_PROBE3(emul, proc_exit, reparent, - em->shared->group_pid, p->p_pid, p); - - child_clear_tid = em->child_clear_tid; - EMUL_UNLOCK(&emul_lock); - sx_xlock(&proctree_lock); - wakeup(initproc); - PROC_LOCK(p); - proc_reparent(p, initproc); - p->p_sigparent = SIGCHLD; - PROC_UNLOCK(p); - sx_xunlock(&proctree_lock); - } else { - child_clear_tid = em->child_clear_tid; - EMUL_UNLOCK(&emul_lock); + if (pem->epoll != NULL) { + emd = pem->epoll; + pem->epoll = NULL; + free(emd, M_EPOLL); } - EMUL_SHARED_WLOCK(&emul_shared_lock); - shared_flags = em->shared->flags; - shared_xstat = em->shared->xstat; - LIST_REMOVE(em, threads); + sx_destroy(&pem->pem_sx); + free(pem, M_LINUX); +} - em->shared->refs--; - if (em->shared->refs == 0) { - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - free(em->shared, M_LINUX); - } else - EMUL_SHARED_WUNLOCK(&emul_shared_lock); +int +linux_common_execve(struct thread *td, struct image_args *eargs) +{ + struct linux_pemuldata *pem; + struct epoll_emuldata *emd; + struct vmspace *oldvmspace; + struct linux_emuldata *em; + struct proc *p; + int error; - if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0) - p->p_xstat = shared_xstat; + p = td->td_proc; - if (child_clear_tid != NULL) { - struct linux_sys_futex_args cup; - int null = 0; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); - error = copyout(&null, child_clear_tid, sizeof(null)); - if (error) { - LIN_SDT_PROBE1(emul, proc_exit, - child_clear_tid_error, error); + error = kern_execve(td, eargs, NULL); + post_execve(td, error, oldvmspace); + if (error != 0) + return (error); - free(em, M_LINUX); + /* + * In a case of transition from Linux binary execing to + * FreeBSD binary we destroy linux emuldata thread & proc entries. + */ + if (SV_CURPROC_ABI() != SV_ABI_LINUX) { + PROC_LOCK(p); + em = em_find(td); + KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n")); + td->td_emuldata = NULL; - LIN_SDT_PROBE0(emul, proc_exit, return); - return; - } + pem = pem_find(p); + KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n")); + p->p_emuldata = NULL; + PROC_UNLOCK(p); - /* futexes stuff */ - cup.uaddr = child_clear_tid; - cup.op = LINUX_FUTEX_WAKE; - cup.val = 0x7fffffff; /* Awake everyone */ - cup.timeout = NULL; - cup.uaddr2 = NULL; - cup.val3 = 0; - error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup); - /* - * this cannot happen at the moment and if this happens it - * probably means there is a user space bug - */ - if (error) { - LIN_SDT_PROBE0(emul, proc_exit, futex_failed); - printf(LMSG("futex stuff in proc_exit failed.\n")); + if (pem->epoll != NULL) { + emd = pem->epoll; + pem->epoll = NULL; + free(emd, M_EPOLL); } - } - /* clean the stuff up */ - free(em, M_LINUX); - - /* this is a little weird but rewritten from exit1() */ - sx_xlock(&proctree_lock); - q = LIST_FIRST(&p->p_children); - for (; q != NULL; q = nq) { - nq = LIST_NEXT(q, p_sibling); - if (q->p_flag & P_WEXIT) - continue; - if (__predict_false(q->p_sysent != &elf_linux_sysvec)) - continue; - em = em_find(q, EMUL_DOLOCK); - KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid)); - PROC_LOCK(q); - if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) { - kern_psignal(q, em->pdeath_signal); - } - PROC_UNLOCK(q); - EMUL_UNLOCK(&emul_lock); + free(em, M_TEMP); + free(pem, M_LINUX); } - sx_xunlock(&proctree_lock); - - LIN_SDT_PROBE0(emul, proc_exit, return); + return (0); } -/* - * This is used in a case of transition from FreeBSD binary execing to linux binary - * in this case we create linux emuldata proc entry with the pid of the currently running - * process. - */ void linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) { - if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { - LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); - } - if (__predict_false(imgp->sysent == &elf_linux_sysvec - && p->p_sysent != &elf_linux_sysvec)) - linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0); - if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) == - SV_ABI_LINUX)) - /* Kill threads regardless of imgp->sysent value */ - linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL); - if (__predict_false(imgp->sysent != &elf_linux_sysvec - && p->p_sysent == &elf_linux_sysvec)) { - struct linux_emuldata *em; - - /* - * XXX:There's a race because here we assign p->p_emuldata NULL - * but the process is still counted as linux one for a short - * time so some other process might reference it and try to - * access its p->p_emuldata and panicing on a NULL reference. - */ - em = em_find(p, EMUL_DONTLOCK); - - KASSERT(em != NULL, ("proc_exec: emuldata not found.\n")); - - EMUL_SHARED_WLOCK(&emul_shared_lock); - LIST_REMOVE(em, threads); + struct thread *td = curthread; + struct thread *othertd; - PROC_LOCK(p); - p->p_emuldata = NULL; - PROC_UNLOCK(p); + /* + * In a case of execing from linux binary properly detach + * other threads from the user space. + */ + if (__predict_false(SV_PROC_ABI(p) == SV_ABI_LINUX)) { + FOREACH_THREAD_IN_PROC(p, othertd) { + if (td != othertd) + (p->p_sysent->sv_thread_detach)(othertd); + } + } - em->shared->refs--; - if (em->shared->refs == 0) { - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - free(em->shared, M_LINUX); - } else - EMUL_SHARED_WUNLOCK(&emul_shared_lock); + /* + * In a case of execing to linux binary we create linux + * emuldata thread entry. + */ + if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) == + SV_ABI_LINUX)) { - free(em, M_LINUX); + if (SV_PROC_ABI(p) == SV_ABI_LINUX) + linux_proc_init(td, NULL, 0); + else + linux_proc_init(td, td, 0); } +} - if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { - LIN_SDT_PROBE0(emul, proc_exec, return); - } +void +linux_thread_dtor(void *arg __unused, struct thread *td) +{ + struct linux_emuldata *em; + + em = em_find(td); + if (em == NULL) + return; + td->td_emuldata = NULL; + + LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid); + + free(em, M_TEMP); } void @@ -399,76 +271,15 @@ linux_schedtail(struct thread *td) p = td->td_proc; - LIN_SDT_PROBE1(emul, linux_schedtail, entry, p); - - /* find the emuldata */ - em = em_find(p, EMUL_DOLOCK); - - KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n")); + em = em_find(td); + KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n")); child_set_tid = em->child_set_tid; - EMUL_UNLOCK(&emul_lock); if (child_set_tid != NULL) { - error = copyout(&p->p_pid, (int *)child_set_tid, - sizeof(p->p_pid)); - - if (error != 0) { - LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error, - error); - } - } - - LIN_SDT_PROBE0(emul, linux_schedtail, return); - - return; -} - -int -linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) -{ - struct linux_emuldata *em; - - LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr); - - /* find the emuldata */ - em = em_find(td->td_proc, EMUL_DOLOCK); - - KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); - - em->child_clear_tid = args->tidptr; - td->td_retval[0] = td->td_proc->p_pid; - - EMUL_UNLOCK(&emul_lock); - - LIN_SDT_PROBE0(emul, linux_set_tid_address, return); - return 0; -} - -void -linux_kill_threads(struct thread *td, int sig) -{ - struct linux_emuldata *em, *td_em, *tmp_em; - struct proc *sp; - - LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig); - - td_em = em_find(td->td_proc, EMUL_DONTLOCK); - - KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n")); - - EMUL_SHARED_RLOCK(&emul_shared_lock); - LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { - if (em->pid == td_em->pid) - continue; - - sp = pfind(em->pid); - if ((sp->p_flag & P_WEXIT) == 0) - kern_psignal(sp, sig); - PROC_UNLOCK(sp); - - LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid); - } - EMUL_SHARED_RUNLOCK(&emul_shared_lock); - - LIN_SDT_PROBE0(emul, linux_kill_threads, return); + error = copyout(&em->em_tid, child_set_tid, + sizeof(em->em_tid)); + LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d", + td->td_tid, child_set_tid, em->em_tid, error); + } else + LINUX_CTR1(schedtail, "thread(%d)", em->em_tid); } diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h index f409a34..7262093 100644 --- a/sys/compat/linux/linux_emul.h +++ b/sys/compat/linux/linux_emul.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2006 Roman Divacky + * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,91 +32,48 @@ #ifndef _LINUX_EMUL_H_ #define _LINUX_EMUL_H_ -#define EMUL_SHARED_HASXSTAT 0x01 - -struct linux_emuldata_shared { - int refs; - int flags; - int xstat; - pid_t group_pid; - - LIST_HEAD(, linux_emuldata) threads; /* head of list of linux threads */ -}; - /* * modeled after similar structure in NetBSD * this will be extended as we need more functionality */ struct linux_emuldata { - pid_t pid; - int *child_set_tid; /* in clone(): Child's TID to set on clone */ int *child_clear_tid;/* in clone(): Child's TID to clear on exit */ - struct linux_emuldata_shared *shared; - int pdeath_signal; /* parent death signal */ - int flags; /* different emuldata flags */ + int flags; /* thread emuldata flags */ + int em_tid; /* thread id */ struct linux_robust_list_head *robust_futexes; - - LIST_ENTRY(linux_emuldata) threads; /* list of linux threads */ }; -struct linux_emuldata *em_find(struct proc *, int locked); - -/* - * DTrace probes for locks should be fired after locking and before releasing - * to prevent races (to provide data/function stability in dtrace, see the - * output of "dtrace -v ..." and the corresponding dtrace docs). - */ -#define EMUL_LOCK(l) do { \ - mtx_lock(l); \ - LIN_SDT_PROBE1(locks, emul_lock, \ - locked, l); \ - } while (0) -#define EMUL_UNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_lock, \ - unlock, l); \ - mtx_unlock(l); \ - } while (0) +struct linux_emuldata *em_find(struct thread *); -#define EMUL_SHARED_RLOCK(l) do { \ - sx_slock(l); \ - LIN_SDT_PROBE1(locks, emul_shared_rlock, \ - locked, l); \ - } while (0) -#define EMUL_SHARED_RUNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_shared_rlock, \ - unlock, l); \ - sx_sunlock(l); \ - } while (0) -#define EMUL_SHARED_WLOCK(l) do { \ - sx_xlock(l); \ - LIN_SDT_PROBE1(locks, emul_shared_wlock, \ - locked, l); \ - } while (0) -#define EMUL_SHARED_WUNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_shared_wlock, \ - unlock, l); \ - sx_xunlock(l); \ - } while (0) - -/* for em_find use */ -#define EMUL_DOLOCK 1 -#define EMUL_DONTLOCK 0 +void linux_proc_init(struct thread *, struct thread *, int); +void linux_proc_exit(void *, struct proc *); +void linux_schedtail(struct thread *); +void linux_proc_exec(void *, struct proc *, struct image_params *); +void linux_thread_dtor(void *arg __unused, struct thread *); +void linux_thread_detach(struct thread *); +int linux_common_execve(struct thread *, struct image_args *); -/* emuldata flags */ +/* process emuldata flags */ #define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated futex REQUEUE op*/ +#define LINUX_XUNSUP_EPOLL 0x00000002 /* unsupported epoll events */ +#define LINUX_XUNSUP_FUTEXPIOP 0x00000004 /* uses unsupported pi futex */ -int linux_proc_init(struct thread *, pid_t, int); -void linux_proc_exit(void *, struct proc *); -void linux_schedtail(struct thread *); -void linux_proc_exec(void *, struct proc *, struct image_params *); -void linux_kill_threads(struct thread *, int); +struct linux_pemuldata { + uint32_t flags; /* process emuldata flags */ + struct sx pem_sx; /* lock for this struct */ + void *epoll; /* epoll data */ +}; + +#define LINUX_PEM_XLOCK(p) sx_xlock(&(p)->pem_sx) +#define LINUX_PEM_XUNLOCK(p) sx_xunlock(&(p)->pem_sx) +#define LINUX_PEM_SLOCK(p) sx_slock(&(p)->pem_sx) +#define LINUX_PEM_SUNLOCK(p) sx_sunlock(&(p)->pem_sx) -extern struct sx emul_shared_lock; -extern struct mtx emul_lock; +struct linux_pemuldata *pem_find(struct proc *); #endif /* !_LINUX_EMUL_H_ */ diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c new file mode 100644 index 0000000..1fe3445 --- /dev/null +++ b/sys/compat/linux/linux_event.c @@ -0,0 +1,882 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/imgact.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/capability.h> +#include <sys/types.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/errno.h> +#include <sys/event.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/selinfo.h> +#include <sys/sx.h> +#include <sys/syscallsubr.h> +#include <sys/timespec.h> + +#ifdef COMPAT_LINUX32 +#include <machine/../linux32/linux.h> +#include <machine/../linux32/linux32_proto.h> +#else +#include <machine/../linux/linux.h> +#include <machine/../linux/linux_proto.h> +#endif + +#include <compat/linux/linux_emul.h> +#include <compat/linux/linux_event.h> +#include <compat/linux/linux_file.h> +#include <compat/linux/linux_util.h> + +/* + * epoll defines 'struct epoll_event' with the field 'data' as 64 bits + * on all architectures. But on 32 bit architectures BSD 'struct kevent' only + * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied + * data verbatuim. Therefore we allocate 64-bit memory block to pass + * user supplied data for every file descriptor. + */ + +typedef uint64_t epoll_udata_t; + +struct epoll_emuldata { + uint32_t fdc; /* epoll udata max index */ + epoll_udata_t udata[1]; /* epoll user data vector */ +}; + +#define EPOLL_DEF_SZ 16 +#define EPOLL_SIZE(fdn) \ + (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) + +struct epoll_event { + uint32_t events; + epoll_udata_t data; +} +#if defined(__amd64__) +__attribute__((packed)) +#endif +; + +#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) + +static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); +static int epoll_to_kevent(struct thread *td, struct file *epfp, + int fd, struct epoll_event *l_event, int *kev_flags, + struct kevent *kevent, int *nkevents); +static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); +static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); +static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); +static int epoll_delete_event(struct thread *td, struct file *epfp, + int fd, int filter); +static int epoll_delete_all_events(struct thread *td, struct file *epfp, + int fd); + +struct epoll_copyin_args { + struct kevent *changelist; +}; + +struct epoll_copyout_args { + struct epoll_event *leventlist; + struct proc *p; + uint32_t count; + int error; +}; + +/* eventfd */ +typedef uint64_t eventfd_t; + +static fo_rdwr_t eventfd_read; +static fo_rdwr_t eventfd_write; +static fo_truncate_t eventfd_truncate; +static fo_ioctl_t eventfd_ioctl; +static fo_poll_t eventfd_poll; +static fo_kqfilter_t eventfd_kqfilter; +static fo_stat_t eventfd_stat; +static fo_close_t eventfd_close; + +static struct fileops eventfdops = { + .fo_read = eventfd_read, + .fo_write = eventfd_write, + .fo_truncate = eventfd_truncate, + .fo_ioctl = eventfd_ioctl, + .fo_poll = eventfd_poll, + .fo_kqfilter = eventfd_kqfilter, + .fo_stat = eventfd_stat, + .fo_close = eventfd_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_flags = DFLAG_PASSABLE +}; + +static void filt_eventfddetach(struct knote *kn); +static int filt_eventfdread(struct knote *kn, long hint); +static int filt_eventfdwrite(struct knote *kn, long hint); + +static struct filterops eventfd_rfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdread +}; +static struct filterops eventfd_wfiltops = { + .f_isfd = 1, + .f_detach = filt_eventfddetach, + .f_event = filt_eventfdwrite +}; + +struct eventfd { + eventfd_t efd_count; + uint32_t efd_flags; + struct selinfo efd_sel; + struct mtx efd_lock; +}; + +static int eventfd_create(struct thread *td, uint32_t initval, int flags); + + +static void +epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) +{ + struct linux_pemuldata *pem; + struct epoll_emuldata *emd; + struct proc *p; + + p = td->td_proc; + + pem = pem_find(p); + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); + + LINUX_PEM_XLOCK(pem); + if (pem->epoll == NULL) { + emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); + emd->fdc = fd; + pem->epoll = emd; + } else { + emd = pem->epoll; + if (fd > emd->fdc) { + emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); + emd->fdc = fd; + pem->epoll = emd; + } + } + emd->udata[fd] = udata; + LINUX_PEM_XUNLOCK(pem); +} + +static int +epoll_create_common(struct thread *td, int flags) +{ + int error; + + error = kern_kqueue(td, flags); + if (error) + return (error); + + epoll_fd_install(td, EPOLL_DEF_SZ, 0); + + return (0); +} + +int +linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) +{ + + /* + * args->size is unused. Linux just tests it + * and then forgets it as well. + */ + if (args->size <= 0) + return (EINVAL); + + return (epoll_create_common(td, 0)); +} + +int +linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) +{ + int flags; + + if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) + return (EINVAL); + + flags = 0; + if ((args->flags & LINUX_O_CLOEXEC) != 0) + flags |= O_CLOEXEC; + + return (epoll_create_common(td, flags)); +} + +/* Structure converting function from epoll to kevent. */ +static int +epoll_to_kevent(struct thread *td, struct file *epfp, + int fd, struct epoll_event *l_event, int *kev_flags, + struct kevent *kevent, int *nkevents) +{ + uint32_t levents = l_event->events; + struct linux_pemuldata *pem; + struct proc *p; + + /* flags related to how event is registered */ + if ((levents & LINUX_EPOLLONESHOT) != 0) + *kev_flags |= EV_ONESHOT; + if ((levents & LINUX_EPOLLET) != 0) + *kev_flags |= EV_CLEAR; + if ((levents & LINUX_EPOLLERR) != 0) + *kev_flags |= EV_ERROR; + if ((levents & LINUX_EPOLLRDHUP) != 0) + *kev_flags |= EV_EOF; + + /* flags related to what event is registered */ + if ((levents & LINUX_EPOLL_EVRD) != 0) { + EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); + ++(*nkevents); + } + if ((levents & LINUX_EPOLL_EVWR) != 0) { + EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); + ++(*nkevents); + } + + if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { + p = td->td_proc; + + pem = pem_find(p); + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); + KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); + + LINUX_PEM_XLOCK(pem); + if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { + pem->flags |= LINUX_XUNSUP_EPOLL; + LINUX_PEM_XUNLOCK(pem); + linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", + levents); + } else + LINUX_PEM_XUNLOCK(pem); + return (EINVAL); + } + + return (0); +} + +/* + * Structure converting function from kevent to epoll. In a case + * this is called on error in registration we store the error in + * event->data and pick it up later in linux_epoll_ctl(). + */ +static void +kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) +{ + + if ((kevent->flags & EV_ERROR) != 0) { + l_event->events = LINUX_EPOLLERR; + return; + } + + switch (kevent->filter) { + case EVFILT_READ: + l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; + if ((kevent->flags & EV_EOF) != 0) + l_event->events |= LINUX_EPOLLRDHUP; + break; + case EVFILT_WRITE: + l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; + break; + } +} + +/* + * Copyout callback used by kevent. This converts kevent + * events to epoll events and copies them back to the + * userspace. This is also called on error on registering + * of the filter. + */ +static int +epoll_kev_copyout(void *arg, struct kevent *kevp, int count) +{ + struct epoll_copyout_args *args; + struct linux_pemuldata *pem; + struct epoll_emuldata *emd; + struct epoll_event *eep; + int error, fd, i; + + args = (struct epoll_copyout_args*) arg; + eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); + + pem = pem_find(args->p); + KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); + LINUX_PEM_SLOCK(pem); + emd = pem->epoll; + KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); + + for (i = 0; i < count; i++) { + kevent_to_epoll(&kevp[i], &eep[i]); + + fd = kevp[i].ident; + KASSERT(fd <= emd->fdc, ("epoll user data vector" + " is too small.\n")); + eep[i].data = emd->udata[fd]; + } + LINUX_PEM_SUNLOCK(pem); + + error = copyout(eep, args->leventlist, count * sizeof(*eep)); + if (error == 0) { + args->leventlist += count; + args->count += count; + } else if (args->error == 0) + args->error = error; + + free(eep, M_EPOLL); + return (error); +} + +/* + * Copyin callback used by kevent. This copies already + * converted filters from kernel memory to the kevent + * internal kernel memory. Hence the memcpy instead of + * copyin. + */ +static int +epoll_kev_copyin(void *arg, struct kevent *kevp, int count) +{ + struct epoll_copyin_args *args; + + args = (struct epoll_copyin_args*) arg; + + memcpy(kevp, args->changelist, count * sizeof(*kevp)); + args->changelist += count; + + return (0); +} + +/* + * Load epoll filter, convert it to kevent filter + * and load it into kevent subsystem. + */ +int +linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) +{ + struct file *epfp, *fp; + struct epoll_copyin_args ciargs; + struct kevent kev[2]; + struct kevent_copyops k_ops = { &ciargs, + NULL, + epoll_kev_copyin}; + struct epoll_event le; + cap_rights_t rights; + int kev_flags; + int nchanges = 0; + int error; + + if (args->op != LINUX_EPOLL_CTL_DEL) { + error = copyin(args->event, &le, sizeof(le)); + if (error != 0) + return (error); + } + + error = fget(td, args->epfd, + cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); + if (error != 0) + return (error); + if (epfp->f_type != DTYPE_KQUEUE) + goto leave1; + + /* Protect user data vector from incorrectly supplied fd. */ + error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); + if (error != 0) + goto leave1; + + /* Linux disallows spying on himself */ + if (epfp == fp) { + error = EINVAL; + goto leave0; + } + + ciargs.changelist = kev; + + switch (args->op) { + case LINUX_EPOLL_CTL_MOD: + /* + * We don't memorize which events were set for this FD + * on this level, so just delete all we could have set: + * EVFILT_READ and EVFILT_WRITE, ignoring any errors + */ + error = epoll_delete_all_events(td, epfp, args->fd); + if (error) + goto leave0; + /* FALLTHROUGH */ + + case LINUX_EPOLL_CTL_ADD: + kev_flags = EV_ADD | EV_ENABLE; + break; + + case LINUX_EPOLL_CTL_DEL: + /* CTL_DEL means unregister this fd with this epoll */ + error = epoll_delete_all_events(td, epfp, args->fd); + goto leave0; + + default: + error = EINVAL; + goto leave0; + } + + error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, + kev, &nchanges); + if (error) + goto leave0; + + epoll_fd_install(td, args->fd, le.data); + + error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); + +leave0: + fdrop(fp, td); + +leave1: + fdrop(epfp, td); + return (error); +} + +/* + * Wait for a filter to be triggered on the epoll file descriptor. + */ +static int +linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, + int maxevents, int timeout, sigset_t *uset) +{ + struct file *epfp; + struct timespec ts, *tsp; + cap_rights_t rights; + struct epoll_copyout_args coargs; + struct kevent_copyops k_ops = { &coargs, + epoll_kev_copyout, + NULL}; + int error; + + if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) + return (EINVAL); + + if (uset != NULL) { + error = kern_sigprocmask(td, SIG_SETMASK, uset, + &td->td_oldsigmask, 0); + if (error != 0) + return (error); + td->td_pflags |= TDP_OLDMASK; + /* + * Make sure that ast() is called on return to + * usermode and TDP_OLDMASK is cleared, restoring old + * sigmask. + */ + thread_lock(td); + td->td_flags |= TDF_ASTPENDING; + thread_unlock(td); + } + + error = fget(td, epfd, + cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); + if (error != 0) + return (error); + + coargs.leventlist = events; + coargs.p = td->td_proc; + coargs.count = 0; + coargs.error = 0; + + if (timeout != -1) { + if (timeout < 0) { + error = EINVAL; + goto leave; + } + /* Convert from milliseconds to timespec. */ + ts.tv_sec = timeout / 1000; + ts.tv_nsec = (timeout % 1000) * 1000000; + tsp = &ts; + } else { + tsp = NULL; + } + + error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); + if (error == 0 && coargs.error != 0) + error = coargs.error; + + /* + * kern_kevent might return ENOMEM which is not expected from epoll_wait. + * Maybe we should translate that but I don't think it matters at all. + */ + if (error == 0) + td->td_retval[0] = coargs.count; +leave: + fdrop(epfp, td); + return (error); +} + +int +linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) +{ + + return (linux_epoll_wait_common(td, args->epfd, args->events, + args->maxevents, args->timeout, NULL)); +} + +int +linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) +{ + sigset_t mask, *pmask; + l_sigset_t lmask; + int error; + + if (args->mask != NULL) { + error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); + if (error != 0) + return (error); + linux_to_bsd_sigset(&lmask, &mask); + pmask = &mask; + } else + pmask = NULL; + return (linux_epoll_wait_common(td, args->epfd, args->events, + args->maxevents, args->timeout, pmask)); +} + +static int +epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) +{ + struct epoll_copyin_args ciargs; + struct kevent kev; + struct kevent_copyops k_ops = { &ciargs, + NULL, + epoll_kev_copyin}; + int error; + + ciargs.changelist = &kev; + EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); + + error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); + + /* + * here we ignore ENONT, because we don't keep track of events here + */ + if (error == ENOENT) + error = 0; + return (error); +} + +static int +epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) +{ + int error1, error2; + + error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); + error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); + + /* report any errors we got */ + return (error1 == 0 ? error2 : error1); +} + +static int +eventfd_create(struct thread *td, uint32_t initval, int flags) +{ + struct filedesc *fdp; + struct eventfd *efd; + struct file *fp; + int fflags, fd, error; + + fflags = 0; + if ((flags & LINUX_O_CLOEXEC) != 0) + fflags |= O_CLOEXEC; + + fdp = td->td_proc->p_fd; + error = falloc(td, &fp, &fd, fflags); + if (error) + return (error); + + efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); + efd->efd_flags = flags; + efd->efd_count = initval; + mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); + + knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); + + fflags = FREAD | FWRITE; + if ((flags & LINUX_O_NONBLOCK) != 0) + fflags |= FNONBLOCK; + + finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); + fdrop(fp, td); + + td->td_retval[0] = fd; + return (error); +} + +int +linux_eventfd(struct thread *td, struct linux_eventfd_args *args) +{ + + return (eventfd_create(td, args->initval, 0)); +} + +int +linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) +{ + + if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) + return (EINVAL); + + return (eventfd_create(td, args->initval, args->flags)); +} + +static int +eventfd_close(struct file *fp, struct thread *td) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + seldrain(&efd->efd_sel); + knlist_destroy(&efd->efd_sel.si_note); + + fp->f_ops = &badfileops; + mtx_destroy(&efd->efd_lock); + free(efd, M_EPOLL); + + return (0); +} + +static int +eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = 0; + mtx_lock(&efd->efd_lock); +retry: + if (efd->efd_count == 0) { + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { + count = 1; + --efd->efd_count; + } else { + count = efd->efd_count; + efd->efd_count = 0; + } + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + mtx_unlock(&efd->efd_lock); + error = uiomove(&count, sizeof(eventfd_t), uio); + } else + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) +{ + struct eventfd *efd; + eventfd_t count; + int error; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EBADF); + + if (uio->uio_resid < sizeof(eventfd_t)) + return (EINVAL); + + error = uiomove(&count, sizeof(eventfd_t), uio); + if (error) + return (error); + if (count == UINT64_MAX) + return (EINVAL); + + mtx_lock(&efd->efd_lock); +retry: + if (UINT64_MAX - efd->efd_count <= count) { + if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { + mtx_unlock(&efd->efd_lock); + return (EAGAIN); + } + error = mtx_sleep(&efd->efd_count, &efd->efd_lock, + PCATCH, "lefdwr", 0); + if (error == 0) + goto retry; + } + if (error == 0) { + efd->efd_count += count; + KNOTE_LOCKED(&efd->efd_sel.si_note, 0); + selwakeup(&efd->efd_sel); + wakeup(&efd->efd_count); + } + mtx_unlock(&efd->efd_lock); + + return (error); +} + +static int +eventfd_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) +{ + struct eventfd *efd; + int revents = 0; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (POLLERR); + + mtx_lock(&efd->efd_lock); + if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) + revents |= events & (POLLIN|POLLRDNORM); + if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) + revents |= events & (POLLOUT|POLLWRNORM); + if (revents == 0) + selrecord(td, &efd->efd_sel); + mtx_unlock(&efd->efd_lock); + + return (revents); +} + +/*ARGSUSED*/ +static int +eventfd_kqfilter(struct file *fp, struct knote *kn) +{ + struct eventfd *efd; + + efd = fp->f_data; + if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) + return (EINVAL); + + mtx_lock(&efd->efd_lock); + switch (kn->kn_filter) { + case EVFILT_READ: + kn->kn_fop = &eventfd_rfiltops; + break; + case EVFILT_WRITE: + kn->kn_fop = &eventfd_wfiltops; + break; + default: + mtx_unlock(&efd->efd_lock); + return (EINVAL); + } + + kn->kn_hook = efd; + knlist_add(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); + + return (0); +} + +static void +filt_eventfddetach(struct knote *kn) +{ + struct eventfd *efd = kn->kn_hook; + + mtx_lock(&efd->efd_lock); + knlist_remove(&efd->efd_sel.si_note, kn, 1); + mtx_unlock(&efd->efd_lock); +} + +/*ARGSUSED*/ +static int +filt_eventfdread(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (efd->efd_count > 0); + + return (ret); +} + +/*ARGSUSED*/ +static int +filt_eventfdwrite(struct knote *kn, long hint) +{ + struct eventfd *efd = kn->kn_hook; + int ret; + + mtx_assert(&efd->efd_lock, MA_OWNED); + ret = (UINT64_MAX - 1 > efd->efd_count); + + return (ret); +} + +/*ARGSUSED*/ +static int +eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, + struct thread *td) +{ + + return (ENXIO); +} + +/*ARGSUSED*/ +static int +eventfd_ioctl(struct file *fp, u_long cmd, void *data, + struct ucred *active_cred, struct thread *td) +{ + + return (ENXIO); +} + +/*ARGSUSED*/ +static int +eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, + struct thread *td) +{ + + return (ENXIO); +} diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h new file mode 100644 index 0000000..9b7d37b --- /dev/null +++ b/sys/compat/linux/linux_event.h @@ -0,0 +1,60 @@ +/*- + * Copyright (c) 2007 Roman Divacky + * Copyright (c) 2014 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_EVENT_H_ +#define _LINUX_EVENT_H_ + +#define LINUX_EPOLLIN 0x001 +#define LINUX_EPOLLPRI 0x002 +#define LINUX_EPOLLOUT 0x004 +#define LINUX_EPOLLRDNORM 0x040 +#define LINUX_EPOLLRDBAND 0x080 +#define LINUX_EPOLLWRNORM 0x100 +#define LINUX_EPOLLWRBAND 0x200 +#define LINUX_EPOLLMSG 0x400 +#define LINUX_EPOLLERR 0x008 +#define LINUX_EPOLLHUP 0x010 +#define LINUX_EPOLLRDHUP 0x2000 +#define LINUX_EPOLLWAKEUP 1u<<29 +#define LINUX_EPOLLONESHOT 1u<<30 +#define LINUX_EPOLLET 1u<<31 + +#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \ + |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI) +#define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM) +#define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \ + |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP) + +#define LINUX_EPOLL_CTL_ADD 1 +#define LINUX_EPOLL_CTL_DEL 2 +#define LINUX_EPOLL_CTL_MOD 3 + +#define LINUX_EFD_SEMAPHORE (1 << 0) + +#endif /* !_LINUX_EVENT_H_ */ diff --git a/sys/compat/linux/linux_file.c b/sys/compat/linux/linux_file.c index 19104a4..ee1d1ba 100644 --- a/sys/compat/linux/linux_file.c +++ b/sys/compat/linux/linux_file.c @@ -235,6 +235,7 @@ linux_lseek(struct thread *td, struct linux_lseek_args *args) return error; } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_llseek(struct thread *td, struct linux_llseek_args *args) { @@ -273,6 +274,7 @@ linux_readdir(struct thread *td, struct linux_readdir_args *args) lda.count = 1; return linux_getdents(td, &lda); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * Note that linux_getdents(2) and linux_getdents64(2) have the same @@ -367,8 +369,8 @@ getdents_common(struct thread *td, struct linux_getdents64_args *args, buflen = max(LINUX_DIRBLKSIZ, nbytes); buflen = min(buflen, MAXBSIZE); - buf = malloc(buflen, M_TEMP, M_WAITOK); - lbuf = malloc(LINUX_MAXRECLEN, M_TEMP, M_WAITOK | M_ZERO); + buf = malloc(buflen, M_LINUX, M_WAITOK); + lbuf = malloc(LINUX_MAXRECLEN, M_LINUX, M_WAITOK | M_ZERO); vn_lock(vp, LK_SHARED | LK_RETRY); aiov.iov_base = buf; @@ -519,8 +521,8 @@ out: VOP_UNLOCK(vp, 0); foffset_unlock(fp, off, 0); fdrop(fp, td); - free(buf, M_TEMP); - free(lbuf, M_TEMP); + free(buf, M_LINUX); + free(lbuf, M_LINUX); return (error); } @@ -578,10 +580,8 @@ int linux_faccessat(struct thread *td, struct linux_faccessat_args *args) { char *path; - int error, dfd, flag; + int error, dfd; - if (args->flag & ~LINUX_AT_EACCESS) - return (EINVAL); /* linux convention */ if (args->amode & ~(F_OK | X_OK | W_OK | R_OK)) return (EINVAL); @@ -594,8 +594,7 @@ linux_faccessat(struct thread *td, struct linux_faccessat_args *args) printf(ARGS(access, "%s, %d"), path, args->amode); #endif - flag = (args->flag & LINUX_AT_EACCESS) == 0 ? 0 : AT_EACCESS; - error = kern_accessat(td, dfd, path, UIO_SYSSPACE, flag, args->amode); + error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode); LFREEPATH(path); return (error); @@ -919,6 +918,7 @@ linux_truncate(struct thread *td, struct linux_truncate_args *args) return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_truncate64(struct thread *td, struct linux_truncate64_args *args) { @@ -936,6 +936,8 @@ linux_truncate64(struct thread *td, struct linux_truncate64_args *args) LFREEPATH(path); return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ + int linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args) { @@ -1123,6 +1125,7 @@ linux_mount(struct thread *td, struct linux_mount_args *args) return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_oldumount(struct thread *td, struct linux_oldumount_args *args) { @@ -1132,6 +1135,7 @@ linux_oldumount(struct thread *td, struct linux_oldumount_args *args) args2.flags = 0; return (linux_umount(td, &args2)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_umount(struct thread *td, struct linux_umount_args *args) @@ -1262,7 +1266,7 @@ bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock) #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ static int -fcntl_common(struct thread *td, struct linux_fcntl64_args *args) +fcntl_common(struct thread *td, struct linux_fcntl_args *args) { struct l_flock linux_flock; struct flock bsd_flock; @@ -1380,6 +1384,9 @@ fcntl_common(struct thread *td, struct linux_fcntl64_args *args) fdrop(fp, td); return (kern_fcntl(td, args->fd, F_SETOWN, args->arg)); + + case LINUX_F_DUPFD_CLOEXEC: + return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg)); } return (EINVAL); @@ -1388,17 +1395,13 @@ fcntl_common(struct thread *td, struct linux_fcntl64_args *args) int linux_fcntl(struct thread *td, struct linux_fcntl_args *args) { - struct linux_fcntl64_args args64; #ifdef DEBUG if (ldebug(fcntl)) printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd); #endif - args64.fd = args->fd; - args64.cmd = args->cmd; - args64.arg = args->arg; - return (fcntl_common(td, &args64)); + return (fcntl_common(td, args)); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) @@ -1407,6 +1410,7 @@ linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) { struct l_flock64 linux_flock; struct flock bsd_flock; + struct linux_fcntl_args fcntl_args; int error; #ifdef DEBUG @@ -1447,7 +1451,10 @@ linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args) (intptr_t)&bsd_flock)); } - return (fcntl_common(td, args)); + fcntl_args.fd = args->fd; + fcntl_args.cmd = args->cmd; + fcntl_args.arg = args->arg; + return (fcntl_common(td, &fcntl_args)); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ @@ -1543,6 +1550,7 @@ linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args) advice)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) { @@ -1554,6 +1562,7 @@ linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args) return (kern_posix_fadvise(td, args->fd, args->offset, args->len, advice)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_pipe(struct thread *td, struct linux_pipe_args *args) @@ -1600,3 +1609,37 @@ linux_pipe2(struct thread *td, struct linux_pipe2_args *args) /* XXX: Close descriptors on error. */ return (copyout(fildes, args->pipefds, sizeof(fildes))); } + +int +linux_dup3(struct thread *td, struct linux_dup3_args *args) +{ + int cmd; + intptr_t newfd; + + if (args->oldfd == args->newfd) + return (EINVAL); + if ((args->flags & ~LINUX_O_CLOEXEC) != 0) + return (EINVAL); + if (args->flags & LINUX_O_CLOEXEC) + cmd = F_DUP2FD_CLOEXEC; + else + cmd = F_DUP2FD; + + newfd = args->newfd; + return (kern_fcntl(td, args->oldfd, cmd, newfd)); +} + +int +linux_fallocate(struct thread *td, struct linux_fallocate_args *args) +{ + + /* + * We emulate only posix_fallocate system call for which + * mode should be 0. + */ + if (args->mode != 0) + return (ENOSYS); + + return (kern_posix_fallocate(td, args->fd, args->offset, + args->len)); +} diff --git a/sys/compat/linux/linux_file.h b/sys/compat/linux/linux_file.h index 2d3106f..f27d5b4 100644 --- a/sys/compat/linux/linux_file.h +++ b/sys/compat/linux/linux_file.h @@ -54,4 +54,75 @@ #define LINUX_MS_NOEXEC 0x0008 #define LINUX_MS_REMOUNT 0x0020 +/* + * common open/fcntl flags + */ +#define LINUX_O_RDONLY 00000000 +#define LINUX_O_WRONLY 00000001 +#define LINUX_O_RDWR 00000002 +#define LINUX_O_ACCMODE 00000003 +#define LINUX_O_CREAT 00000100 +#define LINUX_O_EXCL 00000200 +#define LINUX_O_NOCTTY 00000400 +#define LINUX_O_TRUNC 00001000 +#define LINUX_O_APPEND 00002000 +#define LINUX_O_NONBLOCK 00004000 +#define LINUX_O_NDELAY LINUX_O_NONBLOCK +#define LINUX_O_SYNC 00010000 +#define LINUX_FASYNC 00020000 +#define LINUX_O_DIRECT 00040000 /* Direct disk access hint */ +#define LINUX_O_LARGEFILE 00100000 +#define LINUX_O_DIRECTORY 00200000 /* Must be a directory */ +#define LINUX_O_NOFOLLOW 00400000 /* Do not follow links */ +#define LINUX_O_NOATIME 01000000 +#define LINUX_O_CLOEXEC 02000000 + +#define LINUX_F_DUPFD 0 +#define LINUX_F_GETFD 1 +#define LINUX_F_SETFD 2 +#define LINUX_F_GETFL 3 +#define LINUX_F_SETFL 4 +#ifndef LINUX_F_GETLK +#define LINUX_F_GETLK 5 +#define LINUX_F_SETLK 6 +#define LINUX_F_SETLKW 7 +#endif +#ifndef LINUX_F_SETOWN +#define LINUX_F_SETOWN 8 +#define LINUX_F_GETOWN 9 +#endif +#ifndef LINUX_F_SETSIG +#define LINUX_F_SETSIG 10 +#define LINUX_F_GETSIG 11 +#endif +#ifndef LINUX_F_SETOWN_EX +#define LINUX_F_SETOWN_EX 15 +#define LINUX_F_GETOWN_EX 16 +#define LINUX_F_GETOWNER_UIDS 17 +#endif + +#define LINUX_F_SPECIFIC_BASE 1024 + +#define LINUX_F_SETLEASE (LINUX_F_SPECIFIC_BASE + 0) +#define LINUX_F_GETLEASE (LINUX_F_SPECIFIC_BASE + 1) +#define LINUX_F_CANCELLK (LINUX_F_SPECIFIC_BASE + 5) +#define LINUX_F_DUPFD_CLOEXEC (LINUX_F_SPECIFIC_BASE + 6) +#define LINUX_F_NOTIFY (LINUX_F_SPECIFIC_BASE + 2) +#define LINUX_F_SETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 7) +#define LINUX_F_GETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 8) + +#define LINUX_F_GETLKP 36 +#define LINUX_F_SETLKP 37 +#define LINUX_F_SETLKPW 38 + +#define LINUX_F_OWNER_TID 0 +#define LINUX_F_OWNER_PID 1 +#define LINUX_F_OWNER_PGRP 2 + +#ifndef LINUX_F_RDLCK +#define LINUX_F_RDLCK 0 +#define LINUX_F_WRLCK 1 +#define LINUX_F_UNLCK 2 +#endif + #endif /* !_LINUX_FILE_H_ */ diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 0ab7d3a..9e7c71f 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -35,13 +35,20 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/imgact.h> +#include <sys/ktr.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/racct.h> #include <sys/sched.h> -#include <sys/sdt.h> +#include <sys/syscallsubr.h> #include <sys/sx.h> #include <sys/unistd.h> +#include <sys/wait.h> + +#include <vm/vm.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> #ifdef COMPAT_LINUX32 #include <machine/../linux32/linux.h> @@ -50,18 +57,10 @@ __FBSDID("$FreeBSD$"); #include <machine/../linux/linux.h> #include <machine/../linux/linux_proto.h> #endif -#include <compat/linux/linux_dtrace.h> -#include <compat/linux/linux_signal.h> #include <compat/linux/linux_emul.h> +#include <compat/linux/linux_futex.h> #include <compat/linux/linux_misc.h> - -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); - +#include <compat/linux/linux_util.h> int linux_fork(struct thread *td, struct linux_fork_args *args) @@ -79,14 +78,11 @@ linux_fork(struct thread *td, struct linux_fork_args *args) != 0) return (error); - td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; + td2 = FIRST_THREAD_IN_PROC(p2); - error = linux_proc_init(td, td->td_retval[0], 0); - if (error) - return (error); + linux_proc_init(td, td2, 0); - td2 = FIRST_THREAD_IN_PROC(p2); + td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. @@ -115,13 +111,11 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) 0, &p2, NULL, 0)) != 0) return (error); - td->td_retval[0] = p2->p_pid; + td2 = FIRST_THREAD_IN_PROC(p2); - error = linux_proc_init(td, td->td_retval[0], 0); - if (error) - return (error); + linux_proc_init(td, td2, 0); - td2 = FIRST_THREAD_IN_PROC(p2); + td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. @@ -134,8 +128,8 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) return (0); } -int -linux_clone(struct thread *td, struct linux_clone_args *args) +static int +linux_clone_proc(struct thread *td, struct linux_clone_args *args) { int error, ff = RFPROC | RFSTOPPED; struct proc *p2; @@ -153,9 +147,7 @@ linux_clone(struct thread *td, struct linux_clone_args *args) exit_signal = args->flags & 0x000000ff; if (LINUX_SIG_VALID(exit_signal)) { - if (exit_signal <= LINUX_SIGTBLSZ) - exit_signal = - linux_to_bsd_signal[_SIG_IDX(exit_signal)]; + exit_signal = linux_to_bsd_signal(exit_signal); } else if (exit_signal != 0) return (EINVAL); @@ -172,22 +164,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args) if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) ff |= RFFDG; - /* - * Attempt to detect when linux_clone(2) is used for creating - * kernel threads. Unfortunately despite the existence of the - * CLONE_THREAD flag, version of linuxthreads package used in - * most popular distros as of beginning of 2005 doesn't make - * any use of it. Therefore, this detection relies on - * empirical observation that linuxthreads sets certain - * combination of flags, so that we can make more or less - * precise detection and notify the FreeBSD kernel that several - * processes are in fact part of the same threading group, so - * that special treatment is necessary for signal delivery - * between those processes and fd locking. - */ - if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) - ff |= RFTHREAD; - if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tidptr == NULL) return (EINVAL); @@ -199,29 +175,13 @@ linux_clone(struct thread *td, struct linux_clone_args *args) if (error) return (error); - if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { - sx_xlock(&proctree_lock); - PROC_LOCK(p2); - proc_reparent(p2, td->td_proc->p_pptr); - PROC_UNLOCK(p2); - sx_xunlock(&proctree_lock); - } + td2 = FIRST_THREAD_IN_PROC(p2); /* create the emuldata */ - error = linux_proc_init(td, p2->p_pid, args->flags); - /* reference it - no need to check this */ - em = em_find(p2, EMUL_DOLOCK); - KASSERT(em != NULL, ("clone: emuldata not found.")); - /* and adjust it */ - - if (args->flags & LINUX_CLONE_THREAD) { -#ifdef notyet - PROC_LOCK(p2); - p2->p_pgrp = td->td_proc->p_pgrp; - PROC_UNLOCK(p2); -#endif - exit_signal = 0; - } + linux_proc_init(td, td2, args->flags); + + em = em_find(td2); + KASSERT(em != NULL, ("clone_proc: emuldata not found.\n")); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tidptr; @@ -233,8 +193,6 @@ linux_clone(struct thread *td, struct linux_clone_args *args) else em->child_clear_tid = NULL; - EMUL_UNLOCK(&emul_lock); - if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); @@ -245,14 +203,12 @@ linux_clone(struct thread *td, struct linux_clone_args *args) PROC_LOCK(p2); p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); - td2 = FIRST_THREAD_IN_PROC(p2); /* * In a case of stack = NULL, we are supposed to COW calling process * stack. This is what normal fork() does, so we just keep tf_rsp arg * intact. */ - if (args->stack) - linux_set_upcall_kse(td2, PTROUT(args->stack)); + linux_set_upcall_kse(td2, PTROUT(args->stack)); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(td2, args->tls); @@ -263,6 +219,7 @@ linux_clone(struct thread *td, struct linux_clone_args *args) "stack %p sig = %d"), (int)p2->p_pid, args->stack, exit_signal); #endif + /* * Make this runnable after we are finished with it. */ @@ -272,7 +229,233 @@ linux_clone(struct thread *td, struct linux_clone_args *args) thread_unlock(td2); td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; return (0); } + +static int +linux_clone_thread(struct thread *td, struct linux_clone_args *args) +{ + struct linux_emuldata *em; + struct thread *newtd; + struct proc *p; + int error; + +#ifdef DEBUG + if (ldebug(clone)) { + printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, " + "child tid: %p"), (unsigned)args->flags, + args->stack, args->parent_tidptr, args->child_tidptr); + } +#endif + + LINUX_CTR4(clone_thread, "thread(%d) flags %x ptid %p ctid %p", + td->td_tid, (unsigned)args->flags, + args->parent_tidptr, args->child_tidptr); + + if (args->flags & LINUX_CLONE_PARENT_SETTID) + if (args->parent_tidptr == NULL) + return (EINVAL); + + /* Threads should be created with own stack */ + if (args->stack == NULL) + return (EINVAL); + + p = td->td_proc; + +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(p); + error = racct_add(p, RACCT_NTHR, 1); + PROC_UNLOCK(p); + if (error != 0) + return (EPROCLIM); + } +#endif + + /* Initialize our td */ + error = kern_thr_alloc(p, 0, &newtd); + if (error) + goto fail; + + cpu_set_upcall(newtd, td); + + bzero(&newtd->td_startzero, + __rangeof(struct thread, td_startzero, td_endzero)); + bcopy(&td->td_startcopy, &newtd->td_startcopy, + __rangeof(struct thread, td_startcopy, td_endcopy)); + + newtd->td_proc = p; + newtd->td_ucred = crhold(td->td_ucred); + + /* create the emuldata */ + linux_proc_init(td, newtd, args->flags); + + em = em_find(newtd); + KASSERT(em != NULL, ("clone_thread: emuldata not found.\n")); + + if (args->flags & LINUX_CLONE_SETTLS) + linux_set_cloned_tls(newtd, args->tls); + + if (args->flags & LINUX_CLONE_CHILD_SETTID) + em->child_set_tid = args->child_tidptr; + else + em->child_set_tid = NULL; + + if (args->flags & LINUX_CLONE_CHILD_CLEARTID) + em->child_clear_tid = args->child_tidptr; + else + em->child_clear_tid = NULL; + + cpu_thread_clean(newtd); + + linux_set_upcall_kse(newtd, PTROUT(args->stack)); + + PROC_LOCK(p); + p->p_flag |= P_HADTHREADS; + bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); + + if (args->flags & LINUX_CLONE_PARENT) + thread_link(newtd, p->p_pptr); + else + thread_link(newtd, p); + + thread_lock(td); + /* let the scheduler know about these things. */ + sched_fork_thread(td, newtd); + thread_unlock(td); + if (P_SHOULDSTOP(p)) + newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; + PROC_UNLOCK(p); + + tidhash_add(newtd); + +#ifdef DEBUG + if (ldebug(clone)) + printf(ARGS(clone, "successful clone to %d, stack %p"), + (int)newtd->td_tid, args->stack); +#endif + + LINUX_CTR2(clone_thread, "thread(%d) successful clone to %d", + td->td_tid, newtd->td_tid); + + if (args->flags & LINUX_CLONE_PARENT_SETTID) { + error = copyout(&newtd->td_tid, args->parent_tidptr, + sizeof(newtd->td_tid)); + if (error) + printf(LMSG("clone_thread: copyout failed!")); + } + + /* + * Make this runnable after we are finished with it. + */ + thread_lock(newtd); + TD_SET_CAN_RUN(newtd); + sched_add(newtd, SRQ_BORING); + thread_unlock(newtd); + + td->td_retval[0] = newtd->td_tid; + + return (0); + +fail: +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(p); + racct_sub(p, RACCT_NTHR, 1); + PROC_UNLOCK(p); + } +#endif + return (error); +} + +int +linux_clone(struct thread *td, struct linux_clone_args *args) +{ + + if (args->flags & LINUX_CLONE_THREAD) + return (linux_clone_thread(td, args)); + else + return (linux_clone_proc(td, args)); +} + +int +linux_exit(struct thread *td, struct linux_exit_args *args) +{ + struct linux_emuldata *em; + + em = em_find(td); + KASSERT(em != NULL, ("exit: emuldata not found.\n")); + + LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); + + linux_thread_detach(td); + + /* + * XXX. When the last two threads of a process + * exit via pthread_exit() try thr_exit() first. + */ + kern_thr_exit(td); + exit1(td, W_EXITCODE(args->rval, 0)); + /* NOTREACHED */ +} + +int +linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args) +{ + struct linux_emuldata *em; + + em = em_find(td); + KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); + + em->child_clear_tid = args->tidptr; + + td->td_retval[0] = em->em_tid; + + LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d", + em->em_tid, args->tidptr, td->td_retval[0]); + + return (0); +} + +void +linux_thread_detach(struct thread *td) +{ + struct linux_sys_futex_args cup; + struct linux_emuldata *em; + int *child_clear_tid; + int error; + + em = em_find(td); + KASSERT(em != NULL, ("thread_detach: emuldata not found.\n")); + + LINUX_CTR1(thread_detach, "thread(%d)", em->em_tid); + + release_futexes(td, em); + + child_clear_tid = em->child_clear_tid; + + if (child_clear_tid != NULL) { + + LINUX_CTR2(thread_detach, "thread(%d) %p", + em->em_tid, child_clear_tid); + + error = suword32(child_clear_tid, 0); + if (error != 0) + return; + + cup.uaddr = child_clear_tid; + cup.op = LINUX_FUTEX_WAKE; + cup.val = 1; /* wake one */ + cup.timeout = NULL; + cup.uaddr2 = NULL; + cup.val3 = 0; + error = linux_sys_futex(td, &cup); + /* + * this cannot happen at the moment and if this happens it + * probably means there is a user space bug + */ + if (error != 0) + linux_msg(td, "futex stuff in thread_detach failed."); + } +} diff --git a/sys/compat/linux/linux_futex.c b/sys/compat/linux/linux_futex.c index eb79ad9..4573f73 100644 --- a/sys/compat/linux/linux_futex.c +++ b/sys/compat/linux/linux_futex.c @@ -66,15 +66,12 @@ __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $") #include <compat/linux/linux_dtrace.h> #include <compat/linux/linux_emul.h> #include <compat/linux/linux_futex.h> +#include <compat/linux/linux_timer.h> #include <compat/linux/linux_util.h> /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); - /** * Futex part for the special DTrace module "locks". */ @@ -175,8 +172,8 @@ LIN_SDT_PROBE_DEFINE2(futex, linux_get_robust_list, entry, "struct thread *", "struct linux_get_robust_list_args *"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); -LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct proc *", - "uint32_t *", "unsigned int"); +LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, + "struct linux_emuldata *", "uint32_t *", "unsigned int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, @@ -184,13 +181,11 @@ LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, "unsigned int *"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); -LIN_SDT_PROBE_DEFINE1(futex, release_futexes, entry, "struct proc *"); +LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", + "struct linux_emuldata *"); LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); -static MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes"); -static MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp"); - struct futex; struct waiting_proc { @@ -253,6 +248,21 @@ struct mtx futex_mtx; /* protects the futex list */ * wp_list to prevent double wakeup. */ +static void futex_put(struct futex *, struct waiting_proc *); +static int futex_get0(uint32_t *, struct futex **f, uint32_t); +static int futex_get(uint32_t *, struct waiting_proc **, struct futex **, + uint32_t); +static int futex_sleep(struct futex *, struct waiting_proc *, int); +static int futex_wake(struct futex *, int, uint32_t); +static int futex_requeue(struct futex *, int, struct futex *, int); +static int futex_wait(struct futex *, struct waiting_proc *, int, + uint32_t); +static int futex_atomic_op(struct thread *, int, uint32_t *); +static int handle_futex_death(struct linux_emuldata *, uint32_t *, + unsigned int); +static int fetch_robust_entry(struct linux_robust_list **, + struct linux_robust_list **, unsigned int *); + /* support.s */ int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval); int futex_addl(int oparg, uint32_t *uaddr, int *oldval); @@ -260,6 +270,7 @@ int futex_orl(int oparg, uint32_t *uaddr, int *oldval); int futex_andl(int oparg, uint32_t *uaddr, int *oldval); int futex_xorl(int oparg, uint32_t *uaddr, int *oldval); + static void futex_put(struct futex *f, struct waiting_proc *wp) { @@ -657,10 +668,11 @@ int linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) { int clockrt, nrwake, op_ret, ret; - struct linux_emuldata *em; + struct linux_pemuldata *pem; struct waiting_proc *wp; struct futex *f, *f2; - struct l_timespec timeout; + struct l_timespec ltimeout; + struct timespec timeout; struct timeval utv, ctv; int timeout_hz; int error; @@ -704,6 +716,38 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) LINUX_CTR3(sys_futex, "WAIT uaddr %p val 0x%x bitset 0x%x", args->uaddr, args->val, args->val3); + if (args->timeout != NULL) { + error = copyin(args->timeout, <imeout, sizeof(ltimeout)); + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, + error); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); + return (error); + } + error = linux_to_native_timespec(&timeout, <imeout); + if (error) + return (error); + TIMESPEC_TO_TIMEVAL(&utv, &timeout); + error = itimerfix(&utv); + if (error) { + LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error, + error); + LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); + return (error); + } + if (clockrt) { + microtime(&ctv); + timevalsub(&utv, &ctv); + } else if (args->op == LINUX_FUTEX_WAIT_BITSET) { + microuptime(&ctv); + timevalsub(&utv, &ctv); + } + if (utv.tv_sec < 0) + timevalclear(&utv); + timeout_hz = tvtohz(&utv); + } else + timeout_hz = 0; + error = futex_get(args->uaddr, &wp, &f, flags | FUTEX_CREATE_WP); if (error) { @@ -736,37 +780,6 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) return (EWOULDBLOCK); } - if (args->timeout != NULL) { - error = copyin(args->timeout, &timeout, sizeof(timeout)); - if (error) { - LIN_SDT_PROBE1(futex, linux_sys_futex, copyin_error, - error); - LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); - futex_put(f, wp); - return (error); - } - TIMESPEC_TO_TIMEVAL(&utv, &timeout); - error = itimerfix(&utv); - if (error) { - LIN_SDT_PROBE1(futex, linux_sys_futex, itimerfix_error, - error); - LIN_SDT_PROBE1(futex, linux_sys_futex, return, error); - futex_put(f, wp); - return (error); - } - if (clockrt) { - microtime(&ctv); - timevalsub(&utv, &ctv); - } else if (args->op == LINUX_FUTEX_WAIT_BITSET) { - microuptime(&ctv); - timevalsub(&utv, &ctv); - } - if (utv.tv_sec < 0) - timevalclear(&utv); - timeout_hz = tvtohz(&utv); - } else - timeout_hz = 0; - error = futex_wait(f, wp, timeout_hz, args->val3); break; @@ -943,29 +956,43 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) case LINUX_FUTEX_LOCK_PI: /* not yet implemented */ - linux_msg(td, - "linux_sys_futex: " - "op LINUX_FUTEX_LOCK_PI not implemented\n"); - LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_lock_pi); + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { + linux_msg(td, + "linux_sys_futex: " + "unsupported futex_pi op\n"); + pem->flags |= LINUX_XUNSUP_FUTEXPIOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_lock_pi); + } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_UNLOCK_PI: /* not yet implemented */ - linux_msg(td, - "linux_sys_futex: " - "op LINUX_FUTEX_UNLOCK_PI not implemented\n"); - LIN_SDT_PROBE0(futex, linux_sys_futex, unimplemented_unlock_pi); + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { + linux_msg(td, + "linux_sys_futex: " + "unsupported futex_pi op\n"); + pem->flags |= LINUX_XUNSUP_FUTEXPIOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_unlock_pi); + } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_TRYLOCK_PI: /* not yet implemented */ - linux_msg(td, - "linux_sys_futex: " - "op LINUX_FUTEX_TRYLOCK_PI not implemented\n"); - LIN_SDT_PROBE0(futex, linux_sys_futex, - unimplemented_trylock_pi); + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { + linux_msg(td, + "linux_sys_futex: " + "unsupported futex_pi op\n"); + pem->flags |= LINUX_XUNSUP_FUTEXPIOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_trylock_pi); + } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); @@ -977,12 +1004,12 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when * FUTEX_REQUEUE returned EINVAL. */ - em = em_find(td->td_proc, EMUL_DONTLOCK); - if ((em->flags & LINUX_XDEPR_REQUEUEOP) == 0) { + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XDEPR_REQUEUEOP) == 0) { linux_msg(td, "linux_sys_futex: " "unsupported futex_requeue op\n"); - em->flags |= LINUX_XDEPR_REQUEUEOP; + pem->flags |= LINUX_XDEPR_REQUEUEOP; LIN_SDT_PROBE0(futex, linux_sys_futex, deprecated_requeue); } @@ -992,21 +1019,29 @@ linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args) case LINUX_FUTEX_WAIT_REQUEUE_PI: /* not yet implemented */ - linux_msg(td, - "linux_sys_futex: " - "op FUTEX_WAIT_REQUEUE_PI not implemented\n"); - LIN_SDT_PROBE0(futex, linux_sys_futex, - unimplemented_wait_requeue_pi); + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { + linux_msg(td, + "linux_sys_futex: " + "unsupported futex_pi op\n"); + pem->flags |= LINUX_XUNSUP_FUTEXPIOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_wait_requeue_pi); + } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); case LINUX_FUTEX_CMP_REQUEUE_PI: /* not yet implemented */ - linux_msg(td, - "linux_sys_futex: " - "op LINUX_FUTEX_CMP_REQUEUE_PI not implemented\n"); - LIN_SDT_PROBE0(futex, linux_sys_futex, - unimplemented_cmp_requeue_pi); + pem = pem_find(td->td_proc); + if ((pem->flags & LINUX_XUNSUP_FUTEXPIOP) == 0) { + linux_msg(td, + "linux_sys_futex: " + "unsupported futex_pi op\n"); + pem->flags |= LINUX_XUNSUP_FUTEXPIOP; + LIN_SDT_PROBE0(futex, linux_sys_futex, + unimplemented_cmp_requeue_pi); + } LIN_SDT_PROBE1(futex, linux_sys_futex, return, ENOSYS); return (ENOSYS); @@ -1036,9 +1071,8 @@ linux_set_robust_list(struct thread *td, struct linux_set_robust_list_args *args return (EINVAL); } - em = em_find(td->td_proc, EMUL_DOLOCK); + em = em_find(td); em->robust_futexes = args->head; - EMUL_UNLOCK(&emul_lock); LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); return (0); @@ -1050,29 +1084,36 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args struct linux_emuldata *em; struct linux_robust_list_head *head; l_size_t len = sizeof(struct linux_robust_list_head); + struct thread *td2; int error = 0; LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); if (!args->pid) { - em = em_find(td->td_proc, EMUL_DONTLOCK); + em = em_find(td); + KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); head = em->robust_futexes; } else { - struct proc *p; - - p = pfind(args->pid); - if (p == NULL) { + td2 = tdfind(args->pid, -1); + if (td2 == NULL) { LIN_SDT_PROBE1(futex, linux_get_robust_list, return, ESRCH); return (ESRCH); } + if (SV_PROC_ABI(td2->td_proc) != SV_ABI_LINUX) { + LIN_SDT_PROBE1(futex, linux_get_robust_list, return, + EPERM); + PROC_UNLOCK(td2->td_proc); + return (EPERM); + } - em = em_find(p, EMUL_DONTLOCK); + em = em_find(td2); + KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); /* XXX: ptrace? */ if (priv_check(td, PRIV_CRED_SETUID) || priv_check(td, PRIV_CRED_SETEUID) || - p_candebug(td, p)) { - PROC_UNLOCK(p); + p_candebug(td, td2->td_proc)) { + PROC_UNLOCK(td2->td_proc); LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EPERM); @@ -1080,7 +1121,7 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args } head = em->robust_futexes; - PROC_UNLOCK(p); + PROC_UNLOCK(td2->td_proc); } error = copyout(&len, args->len, sizeof(l_size_t)); @@ -1102,13 +1143,14 @@ linux_get_robust_list(struct thread *td, struct linux_get_robust_list_args *args } static int -handle_futex_death(struct proc *p, uint32_t *uaddr, unsigned int pi) +handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, + unsigned int pi) { uint32_t uval, nval, mval; struct futex *f; int error; - LIN_SDT_PROBE3(futex, handle_futex_death, entry, p, uaddr, pi); + LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); retry: error = copyin(uaddr, &uval, 4); @@ -1117,7 +1159,7 @@ retry: LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); return (EFAULT); } - if ((uval & FUTEX_TID_MASK) == p->p_pid) { + if ((uval & FUTEX_TID_MASK) == em->em_tid) { mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; nval = casuword32(uaddr, uval, mval); @@ -1174,18 +1216,16 @@ fetch_robust_entry(struct linux_robust_list **entry, /* This walks the list of robust futexes releasing them. */ void -release_futexes(struct proc *p) +release_futexes(struct thread *td, struct linux_emuldata *em) { struct linux_robust_list_head *head = NULL; struct linux_robust_list *entry, *next_entry, *pending; unsigned int limit = 2048, pi, next_pi, pip; - struct linux_emuldata *em; l_long futex_offset; int rc, error; - LIN_SDT_PROBE1(futex, release_futexes, entry, p); + LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); - em = em_find(p, EMUL_DONTLOCK); head = em->robust_futexes; if (head == NULL) { @@ -1215,7 +1255,7 @@ release_futexes(struct proc *p) rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); if (entry != pending) - if (handle_futex_death(p, + if (handle_futex_death(em, (uint32_t *)((caddr_t)entry + futex_offset), pi)) { LIN_SDT_PROBE0(futex, release_futexes, return); return; @@ -1235,7 +1275,7 @@ release_futexes(struct proc *p) } if (pending) - handle_futex_death(p, (uint32_t *)((caddr_t)pending + futex_offset), pip); + handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); LIN_SDT_PROBE0(futex, release_futexes, return); } diff --git a/sys/compat/linux/linux_futex.h b/sys/compat/linux/linux_futex.h index 0990daa..7922743 100644 --- a/sys/compat/linux/linux_futex.h +++ b/sys/compat/linux/linux_futex.h @@ -76,6 +76,7 @@ extern struct mtx futex_mtx; #define FUTEX_TID_MASK 0x3fffffff #define FUTEX_BITSET_MATCH_ANY 0xffffffff -void release_futexes(struct proc *); +void release_futexes(struct thread *, + struct linux_emuldata *); #endif /* !_LINUX_FUTEX_H */ diff --git a/sys/compat/linux/linux_getcwd.c b/sys/compat/linux/linux_getcwd.c index 1c7080d..da1c726 100644 --- a/sys/compat/linux/linux_getcwd.c +++ b/sys/compat/linux/linux_getcwd.c @@ -186,7 +186,7 @@ linux_getcwd_scandir(lvpp, uvpp, bpp, bufp, td) dirbuflen = DIRBLKSIZ; if (dirbuflen < va.va_blocksize) dirbuflen = va.va_blocksize; - dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK); + dirbuf = malloc(dirbuflen, M_LINUX, M_WAITOK); #if 0 unionread: @@ -274,7 +274,7 @@ unionread: out: vput(lvp); *lvpp = NULL; - free(dirbuf, M_TEMP); + free(dirbuf, M_LINUX); return error; } diff --git a/sys/compat/linux/linux_ioctl.c b/sys/compat/linux/linux_ioctl.c index ab95e64..8858e2f 100644 --- a/sys/compat/linux/linux_ioctl.c +++ b/sys/compat/linux/linux_ioctl.c @@ -68,7 +68,6 @@ __FBSDID("$FreeBSD$"); #include <net/if.h> #include <net/if_dl.h> #include <net/if_types.h> -#include <net/vnet.h> #include <dev/usb/usb_ioctl.h> @@ -95,9 +94,6 @@ __FBSDID("$FreeBSD$"); CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ); -FEATURE(linuxulator_v4l, "V4L ioctl wrapper support in the linuxulator"); -FEATURE(linuxulator_v4l2, "V4L2 ioctl wrapper support in the linuxulator"); - static linux_ioctl_function_t linux_ioctl_cdrom; static linux_ioctl_function_t linux_ioctl_vfat; static linux_ioctl_function_t linux_ioctl_console; @@ -1980,8 +1976,6 @@ linux_ioctl_sound(struct thread *td, struct linux_ioctl_args *args) * Console related ioctls */ -#define ISSIGVALID(sig) ((sig) > 0 && (sig) < NSIG) - static int linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) { @@ -2064,8 +2058,16 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) struct vt_mode mode; if ((error = copyin((void *)args->arg, &mode, sizeof(mode)))) break; - if (!ISSIGVALID(mode.frsig) && ISSIGVALID(mode.acqsig)) - mode.frsig = mode.acqsig; + if (LINUX_SIG_VALID(mode.relsig)) + mode.relsig = linux_to_bsd_signal(mode.relsig); + else + mode.relsig = 0; + if (LINUX_SIG_VALID(mode.acqsig)) + mode.acqsig = linux_to_bsd_signal(mode.acqsig); + else + mode.acqsig = 0; + /* XXX. Linux ignores frsig and set it to 0. */ + mode.frsig = 0; if ((error = copyout(&mode, (void *)args->arg, sizeof(mode)))) break; args->cmd = VT_SETMODE; @@ -2108,34 +2110,6 @@ linux_ioctl_console(struct thread *td, struct linux_ioctl_args *args) #define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER) /* - * Interface function used by linprocfs (at the time of writing). It's not - * used by the Linuxulator itself. - */ -int -linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen) -{ - struct ifnet *ifscan; - int ethno; - - IFNET_RLOCK_ASSERT(); - - /* Short-circuit non ethernet interfaces */ - if (!IFP_IS_ETH(ifp)) - return (strlcpy(buffer, ifp->if_xname, buflen)); - - /* Determine the (relative) unit number for ethernet interfaces */ - ethno = 0; - TAILQ_FOREACH(ifscan, &V_ifnet, if_link) { - if (ifscan == ifp) - return (snprintf(buffer, buflen, "eth%d", ethno)); - if (IFP_IS_ETH(ifscan)) - ethno++; - } - - return (0); -} - -/* * Translate a Linux interface name to a FreeBSD interface name, * and return the associated ifnet structure * bsdname and lxname need to be least IFNAMSIZ bytes long, but @@ -3621,9 +3595,16 @@ linux_ioctl(struct thread *td, struct linux_ioctl_args *args) sx_sunlock(&linux_ioctl_sx); fdrop(fp, td); - linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented", - args->fd, (int)(args->cmd & 0xffff), - (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff)); + switch (args->cmd & 0xffff) { + case LINUX_BTRFS_IOC_CLONE: + return (ENOTSUP); + + default: + linux_msg(td, "ioctl fd=%d, cmd=0x%x ('%c',%d) is not implemented", + args->fd, (int)(args->cmd & 0xffff), + (int)(args->cmd & 0xff00) >> 8, (int)(args->cmd & 0xff)); + break; + } return (EINVAL); } diff --git a/sys/compat/linux/linux_ioctl.h b/sys/compat/linux/linux_ioctl.h index 3f63b21..873937d 100644 --- a/sys/compat/linux/linux_ioctl.h +++ b/sys/compat/linux/linux_ioctl.h @@ -581,13 +581,6 @@ #define LINUX_IOCTL_DRM_MAX 0x64ff /* - * This doesn't really belong here, but I can't think of a better - * place to put it. - */ -struct ifnet; -int linux_ifname(struct ifnet *, char *, size_t); - -/* * video */ #define LINUX_VIDIOCGCAP 0x7601 @@ -752,6 +745,12 @@ int linux_ifname(struct ifnet *, char *, size_t); #define FBSD_LUSB_MIN 0xffdd /* + * Linux btrfs clone operation + */ +#define LINUX_BTRFS_IOC_CLONE 0x9409 /* 0x40049409 */ + + +/* * Pluggable ioctl handlers */ struct linux_ioctl_args; diff --git a/sys/compat/linux/linux_ipc.c b/sys/compat/linux/linux_ipc.c index 1237edc..7a92c6a 100644 --- a/sys/compat/linux/linux_ipc.c +++ b/sys/compat/linux/linux_ipc.c @@ -117,16 +117,6 @@ bsd_to_linux_shm_info( struct shm_info *bpp, struct l_shm_info *lpp) lpp->swap_successes = bpp->swap_successes ; } -struct l_ipc_perm { - l_key_t key; - l_uid16_t uid; - l_gid16_t gid; - l_uid16_t cuid; - l_gid16_t cgid; - l_ushort mode; - l_ushort seq; -}; - static void linux_to_bsd_ipc_perm(struct l_ipc_perm *lpp, struct ipc_perm *bpp) { diff --git a/sys/compat/linux/linux_ipc.h b/sys/compat/linux/linux_ipc.h index f1531ba..8e9c050 100644 --- a/sys/compat/linux/linux_ipc.h +++ b/sys/compat/linux/linux_ipc.h @@ -82,7 +82,7 @@ #define LINUX_IPC_64 0x0100 /* New version (support 32-bit UIDs, bigger message sizes, etc. */ -#if defined(__i386__) || defined(__amd64__) +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct linux_msgctl_args { @@ -177,6 +177,6 @@ int linux_shmctl(struct thread *, struct linux_shmctl_args *); int linux_shmdt(struct thread *, struct linux_shmdt_args *); int linux_shmget(struct thread *, struct linux_shmget_args *); -#endif /* __i386__ || __amd64__ */ +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #endif /* _LINUX_IPC_H_ */ diff --git a/sys/compat/linux/linux_mib.c b/sys/compat/linux/linux_mib.c index cf64599..396344b 100644 --- a/sys/compat/linux/linux_mib.c +++ b/sys/compat/linux/linux_mib.c @@ -29,9 +29,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include "opt_compat.h" -#include "opt_kdtrace.h" - #include <sys/param.h> #include <sys/kernel.h> #include <sys/sdt.h> @@ -42,85 +39,11 @@ __FBSDID("$FreeBSD$"); #include <sys/mount.h> #include <sys/jail.h> #include <sys/lock.h> -#include <sys/mutex.h> #include <sys/sx.h> -#ifdef COMPAT_LINUX32 -#include <machine/../linux32/linux.h> -#else -#include <machine/../linux/linux.h> -#endif -#include <compat/linux/linux_dtrace.h> #include <compat/linux/linux_mib.h> #include <compat/linux/linux_misc.h> -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/** - * DTrace probes in this module. - */ -LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osname, entry); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, sysctl_string_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osname, return, "int"); - -LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_osrelease, entry); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, sysctl_string_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_osrelease, return, "int"); -LIN_SDT_PROBE_DEFINE0(mib, linux_sysctl_oss_version, entry); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, sysctl_string_error, - "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_sysctl_oss_version, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_map_osrel, entry, "char *", "int *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_map_osrel, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_get_prison, entry, "struct prison *", - "struct prison **"); -LIN_SDT_PROBE_DEFINE1(mib, linux_get_prison, return, "struct linux_prison *"); -LIN_SDT_PROBE_DEFINE2(mib, linux_alloc_prison, entry, "struct prison *", - "struct linux_prison **"); -LIN_SDT_PROBE_DEFINE1(mib, linux_alloc_prison, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_prison_create, entry, "void *", "void *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, vfs_copyopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_create, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_prison_check, entry, "void *", "void *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_copyopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, vfs_getopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_check, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_prison_set, entry, "void *", "void *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_copyopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, vfs_getopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_set, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_prison_get, entry, "void *", "void *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopt_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, vfs_setopts_error, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_get, return, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_prison_destructor, entry, "void *"); -LIN_SDT_PROBE_DEFINE0(mib, linux_prison_destructor, return); -LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, entry); -LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_register, return); -LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, entry); -LIN_SDT_PROBE_DEFINE0(mib, linux_osd_jail_deregister, return); -LIN_SDT_PROBE_DEFINE2(mib, linux_get_osname, entry, "struct thread *", - "char *"); -LIN_SDT_PROBE_DEFINE0(mib, linux_get_osname, return); -LIN_SDT_PROBE_DEFINE2(mib, linux_set_osname, entry, "struct thread *", - "char *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_set_osname, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_get_osrelease, entry, "struct thread *", - "char *"); -LIN_SDT_PROBE_DEFINE0(mib, linux_get_osrelease, return); -LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, entry, "struct thread *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_kernver, return, "int"); -LIN_SDT_PROBE_DEFINE2(mib, linux_set_osrelease, entry, "struct thread *", - "char *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_set_osrelease, return, "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, entry, "struct thread *"); -LIN_SDT_PROBE_DEFINE1(mib, linux_get_oss_version, return, "int"); - -LIN_SDT_PROBE_DEFINE2(mib, linux_set_oss_version, entry, "struct thread *", - "int"); -LIN_SDT_PROBE_DEFINE1(mib, linux_set_oss_version, return, "int"); - struct linux_prison { char pr_osname[LINUX_MAX_UTSNAME]; char pr_osrelease[LINUX_MAX_UTSNAME]; @@ -130,15 +53,14 @@ struct linux_prison { static struct linux_prison lprison0 = { .pr_osname = "Linux", - .pr_osrelease = "2.6.18", + .pr_osrelease = LINUX_VERSION_STR, .pr_oss_version = 0x030600, - .pr_osrel = 2006018 + .pr_osrel = LINUX_VERSION_CODE }; static unsigned linux_osd_jail_slot; -static SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, - "Linux mode"); +SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0, "Linux mode"); static int linux_set_osname(struct thread *td, char *osname); static int linux_set_osrelease(struct thread *td, char *osrelease); @@ -150,19 +72,12 @@ linux_sysctl_osname(SYSCTL_HANDLER_ARGS) char osname[LINUX_MAX_UTSNAME]; int error; - LIN_SDT_PROBE0(mib, linux_sysctl_osname, entry); - linux_get_osname(req->td, osname); error = sysctl_handle_string(oidp, osname, LINUX_MAX_UTSNAME, req); - if (error != 0 || req->newptr == NULL) { - LIN_SDT_PROBE1(mib, linux_sysctl_osname, sysctl_string_error, - error); - LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error); + if (error != 0 || req->newptr == NULL) return (error); - } error = linux_set_osname(req->td, osname); - LIN_SDT_PROBE1(mib, linux_sysctl_osname, return, error); return (error); } @@ -177,19 +92,12 @@ linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS) char osrelease[LINUX_MAX_UTSNAME]; int error; - LIN_SDT_PROBE0(mib, linux_sysctl_osrelease, entry); - linux_get_osrelease(req->td, osrelease); error = sysctl_handle_string(oidp, osrelease, LINUX_MAX_UTSNAME, req); - if (error != 0 || req->newptr == NULL) { - LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, sysctl_string_error, - error); - LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error); + if (error != 0 || req->newptr == NULL) return (error); - } error = linux_set_osrelease(req->td, osrelease); - LIN_SDT_PROBE1(mib, linux_sysctl_osrelease, return, error); return (error); } @@ -204,19 +112,12 @@ linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS) int oss_version; int error; - LIN_SDT_PROBE0(mib, linux_sysctl_oss_version, entry); - oss_version = linux_get_oss_version(req->td); error = sysctl_handle_int(oidp, &oss_version, 0, req); - if (error != 0 || req->newptr == NULL) { - LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, - sysctl_string_error, error); - LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error); + if (error != 0 || req->newptr == NULL) return (error); - } error = linux_set_oss_version(req->td, oss_version); - LIN_SDT_PROBE1(mib, linux_sysctl_oss_version, return, error); return (error); } @@ -234,37 +135,26 @@ linux_map_osrel(char *osrelease, int *osrel) char *sep, *eosrelease; int len, v0, v1, v2, v; - LIN_SDT_PROBE2(mib, linux_map_osrel, entry, osrelease, osrel); - len = strlen(osrelease); eosrelease = osrelease + len; v0 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') { - LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); + if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') return (EINVAL); - } osrelease = sep + 1; v1 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') { - LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); + if (osrelease == sep || sep + 1 >= eosrelease || *sep != '.') return (EINVAL); - } osrelease = sep + 1; v2 = strtol(osrelease, &sep, 10); - if (osrelease == sep || sep != eosrelease) { - LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); + if (osrelease == sep || sep != eosrelease) return (EINVAL); - } v = v0 * 1000000 + v1 * 1000 + v2; - if (v < 1000000) { - LIN_SDT_PROBE1(mib, linux_map_osrel, return, EINVAL); + if (v < 1000000) return (EINVAL); - } *osrel = v; - LIN_SDT_PROBE1(mib, linux_map_osrel, return, 0); return (0); } @@ -278,8 +168,6 @@ linux_find_prison(struct prison *spr, struct prison **prp) struct prison *pr; struct linux_prison *lpr; - LIN_SDT_PROBE2(mib, linux_get_prison, entry, spr, prp); - if (!linux_osd_jail_slot) /* In case osd_register failed. */ spr = &prison0; @@ -294,7 +182,6 @@ linux_find_prison(struct prison *spr, struct prison **prp) } *prp = pr; - LIN_SDT_PROBE1(mib, linux_get_prison, return, lpr); return (lpr); } @@ -309,8 +196,6 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) struct linux_prison *lpr, *nlpr; int error; - LIN_SDT_PROBE2(mib, linux_alloc_prison, entry, pr, lprp); - /* If this prison already has Linux info, return that. */ error = 0; lpr = linux_find_prison(pr, &ppr); @@ -344,7 +229,6 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp) else mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_alloc_prison, return, error); return (error); } @@ -356,26 +240,16 @@ linux_prison_create(void *obj, void *data) { struct prison *pr = obj; struct vfsoptlist *opts = data; - int jsys, error; - - LIN_SDT_PROBE2(mib, linux_prison_create, entry, obj, data); + int jsys; - error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_create, vfs_copyopt_error, - error); - } else if (jsys == JAIL_SYS_INHERIT) { - LIN_SDT_PROBE1(mib, linux_prison_create, return, 0); + if (vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)) == 0 && + jsys == JAIL_SYS_INHERIT) return (0); - } /* * Inherit a prison's initial values from its parent * (different from JAIL_SYS_INHERIT which also inherits changes). */ - error = linux_alloc_prison(pr, NULL); - - LIN_SDT_PROBE1(mib, linux_prison_create, return, error); - return (error); + return (linux_alloc_prison(pr, NULL)); } static int @@ -385,80 +259,46 @@ linux_prison_check(void *obj __unused, void *data) char *osname, *osrelease; int error, jsys, len, osrel, oss_version; - LIN_SDT_PROBE2(mib, linux_prison_check, entry, obj, data); - /* Check that the parameters are correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error, - error); - } if (error != ENOENT) { - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, return, error); + if (error != 0) return (error); - } - if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) { - LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); + if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) return (EINVAL); - } } error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error, - error); - } if (error != ENOENT) { - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, return, error); + if (error != 0) return (error); - } - if (len == 0 || osname[len - 1] != '\0') { - LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); + if (len == 0 || osname[len - 1] != '\0') return (EINVAL); - } if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osname too long"); - LIN_SDT_PROBE1(mib, linux_prison_check, return, - ENAMETOOLONG); return (ENAMETOOLONG); } } error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, vfs_getopt_error, - error); - } if (error != ENOENT) { - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_check, return, error); + if (error != 0) return (error); - } - if (len == 0 || osrelease[len - 1] != '\0') { - LIN_SDT_PROBE1(mib, linux_prison_check, return, EINVAL); + if (len == 0 || osrelease[len - 1] != '\0') return (EINVAL); - } if (len > LINUX_MAX_UTSNAME) { vfs_opterror(opts, "linux.osrelease too long"); - LIN_SDT_PROBE1(mib, linux_prison_check, return, - ENAMETOOLONG); return (ENAMETOOLONG); } error = linux_map_osrel(osrelease, &osrel); if (error != 0) { vfs_opterror(opts, "linux.osrelease format error"); - LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); } } error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); - if (error != 0) - LIN_SDT_PROBE1(mib, linux_prison_check, vfs_copyopt_error, error); if (error == ENOENT) error = 0; - LIN_SDT_PROBE1(mib, linux_prison_check, return, error); return (error); } @@ -471,32 +311,22 @@ linux_prison_set(void *obj, void *data) char *osname, *osrelease; int error, gotversion, jsys, len, oss_version; - LIN_SDT_PROBE2(mib, linux_prison_set, entry, obj, data); - /* Set the parameters, which should be correct. */ error = vfs_copyopt(opts, "linux", &jsys, sizeof(jsys)); - if (error != 0) - LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error); if (error == ENOENT) jsys = -1; error = vfs_getopt(opts, "linux.osname", (void **)&osname, &len); - if (error != 0) - LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error); if (error == ENOENT) osname = NULL; else jsys = JAIL_SYS_NEW; error = vfs_getopt(opts, "linux.osrelease", (void **)&osrelease, &len); - if (error != 0) - LIN_SDT_PROBE1(mib, linux_prison_set, vfs_getopt_error, error); if (error == ENOENT) osrelease = NULL; else jsys = JAIL_SYS_NEW; error = vfs_copyopt(opts, "linux.oss_version", &oss_version, sizeof(oss_version)); - if (error != 0) - LIN_SDT_PROBE1(mib, linux_prison_set, vfs_copyopt_error, error); if (error == ENOENT) gotversion = 0; else { @@ -518,15 +348,12 @@ linux_prison_set(void *obj, void *data) error = linux_alloc_prison(pr, &lpr); if (error) { mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_prison_set, return, error); return (error); } if (osrelease) { error = linux_map_osrel(osrelease, &lpr->pr_osrel); if (error) { mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_prison_set, return, - error); return (error); } strlcpy(lpr->pr_osrelease, osrelease, @@ -539,7 +366,6 @@ linux_prison_set(void *obj, void *data) mtx_unlock(&pr->pr_mtx); } - LIN_SDT_PROBE1(mib, linux_prison_set, return, 0); return (0); } @@ -562,74 +388,44 @@ linux_prison_get(void *obj, void *data) static int version0; - LIN_SDT_PROBE2(mib, linux_prison_get, entry, obj, data); - /* See if this prison is the one with the Linux info. */ lpr = linux_find_prison(pr, &ppr); i = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; error = vfs_setopt(opts, "linux", &i, sizeof(i)); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, error); - if (error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; if (i) { error = vfs_setopts(opts, "linux.osname", lpr->pr_osname); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, - error); - if (error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; error = vfs_setopts(opts, "linux.osrelease", lpr->pr_osrelease); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, - error); - if (error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; error = vfs_setopt(opts, "linux.oss_version", &lpr->pr_oss_version, sizeof(lpr->pr_oss_version)); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, - error); - if(error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; } else { /* * If this prison is inheriting its Linux info, report * empty/zero parameters. */ error = vfs_setopts(opts, "linux.osname", ""); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, - error); - if(error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; error = vfs_setopts(opts, "linux.osrelease", ""); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopts_error, - error); - if(error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; error = vfs_setopt(opts, "linux.oss_version", &version0, sizeof(lpr->pr_oss_version)); - if (error != 0) { - LIN_SDT_PROBE1(mib, linux_prison_get, vfs_setopt_error, - error); - if(error != ENOENT) - goto done; - } + if (error != 0 && error != ENOENT) + goto done; } error = 0; done: mtx_unlock(&ppr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_prison_get, return, error); return (error); } @@ -637,9 +433,7 @@ static void linux_prison_destructor(void *data) { - LIN_SDT_PROBE1(mib, linux_prison_destructor, entry, data); free(data, M_PRISON); - LIN_SDT_PROBE0(mib, linux_prison_destructor, return); } void @@ -653,8 +447,6 @@ linux_osd_jail_register(void) [PR_METHOD_CHECK] = linux_prison_check }; - LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry); - linux_osd_jail_slot = osd_jail_register(linux_prison_destructor, methods); if (linux_osd_jail_slot > 0) { @@ -664,20 +456,14 @@ linux_osd_jail_register(void) (void)linux_alloc_prison(pr, NULL); sx_xunlock(&allprison_lock); } - - LIN_SDT_PROBE0(mib, linux_osd_jail_register, return); } void linux_osd_jail_deregister(void) { - LIN_SDT_PROBE0(mib, linux_osd_jail_register, entry); - if (linux_osd_jail_slot) osd_jail_deregister(linux_osd_jail_slot); - - LIN_SDT_PROBE0(mib, linux_osd_jail_register, return); } void @@ -686,13 +472,9 @@ linux_get_osname(struct thread *td, char *dst) struct prison *pr; struct linux_prison *lpr; - LIN_SDT_PROBE2(mib, linux_get_osname, entry, td, dst); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); - - LIN_SDT_PROBE0(mib, linux_get_osname, return); } static int @@ -701,13 +483,10 @@ linux_set_osname(struct thread *td, char *osname) struct prison *pr; struct linux_prison *lpr; - LIN_SDT_PROBE2(mib, linux_set_osname, entry, td, osname); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_set_osname, return, 0); return (0); } @@ -717,13 +496,9 @@ linux_get_osrelease(struct thread *td, char *dst) struct prison *pr; struct linux_prison *lpr; - LIN_SDT_PROBE2(mib, linux_get_osrelease, entry, td, dst); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); - - LIN_SDT_PROBE0(mib, linux_get_osrelease, return); } int @@ -733,13 +508,10 @@ linux_kernver(struct thread *td) struct linux_prison *lpr; int osrel; - LIN_SDT_PROBE1(mib, linux_kernver, entry, td); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); osrel = lpr->pr_osrel; mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_kernver, return, osrel); return (osrel); } @@ -750,15 +522,12 @@ linux_set_osrelease(struct thread *td, char *osrelease) struct linux_prison *lpr; int error; - LIN_SDT_PROBE2(mib, linux_set_osrelease, entry, td, osrelease); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); error = linux_map_osrel(osrelease, &lpr->pr_osrel); if (error == 0) strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME); mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_set_osrelease, return, error); return (error); } @@ -769,13 +538,10 @@ linux_get_oss_version(struct thread *td) struct linux_prison *lpr; int version; - LIN_SDT_PROBE1(mib, linux_get_oss_version, entry, td); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); version = lpr->pr_oss_version; mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_get_oss_version, return, version); return (version); } @@ -785,74 +551,9 @@ linux_set_oss_version(struct thread *td, int oss_version) struct prison *pr; struct linux_prison *lpr; - LIN_SDT_PROBE2(mib, linux_set_oss_version, entry, td, oss_version); - lpr = linux_find_prison(td->td_ucred->cr_prison, &pr); lpr->pr_oss_version = oss_version; mtx_unlock(&pr->pr_mtx); - LIN_SDT_PROBE1(mib, linux_set_oss_version, return, 0); return (0); } - -#if defined(DEBUG) || defined(KTR) -/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ - -u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; - -static int -linux_debug(int syscall, int toggle, int global) -{ - - if (global) { - char c = toggle ? 0 : 0xff; - - memset(linux_debug_map, c, sizeof(linux_debug_map)); - return (0); - } - if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL) - return (EINVAL); - if (toggle) - clrbit(linux_debug_map, syscall); - else - setbit(linux_debug_map, syscall); - return (0); -} - -/* - * Usage: sysctl linux.debug=<syscall_nr>.<0/1> - * - * E.g.: sysctl linux.debug=21.0 - * - * As a special case, syscall "all" will apply to all syscalls globally. - */ -#define LINUX_MAX_DEBUGSTR 16 -static int -linux_sysctl_debug(SYSCTL_HANDLER_ARGS) -{ - char value[LINUX_MAX_DEBUGSTR], *p; - int error, sysc, toggle; - int global = 0; - - value[0] = '\0'; - error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); - if (error || req->newptr == NULL) - return (error); - for (p = value; *p != '\0' && *p != '.'; p++); - if (*p == '\0') - return (EINVAL); - *p++ = '\0'; - sysc = strtol(value, NULL, 0); - toggle = strtol(p, NULL, 0); - if (strcmp(value, "all") == 0) - global = 1; - error = linux_debug(sysc, toggle, global); - return (error); -} - -SYSCTL_PROC(_compat_linux, OID_AUTO, debug, - CTLTYPE_STRING | CTLFLAG_RW, - 0, 0, linux_sysctl_debug, "A", - "Linux debugging control"); - -#endif /* DEBUG || KTR */ diff --git a/sys/compat/linux/linux_mib.h b/sys/compat/linux/linux_mib.h index e8eedf9..80b6c97 100644 --- a/sys/compat/linux/linux_mib.h +++ b/sys/compat/linux/linux_mib.h @@ -31,6 +31,10 @@ #ifndef _LINUX_MIB_H_ #define _LINUX_MIB_H_ +#ifdef SYSCTL_DECL +SYSCTL_DECL(_compat_linux); +#endif + void linux_osd_jail_register(void); void linux_osd_jail_deregister(void); @@ -42,8 +46,19 @@ int linux_get_oss_version(struct thread *td); int linux_kernver(struct thread *td); -#define LINUX_KERNVER_2004000 2004000 -#define LINUX_KERNVER_2006000 2006000 +#define LINUX_KVERSION 2 +#define LINUX_KPATCHLEVEL 6 +#define LINUX_KSUBLEVEL 32 + +#define LINUX_KERNVER(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#define LINUX_VERSION_CODE LINUX_KERNVER(LINUX_KVERSION, \ + LINUX_KPATCHLEVEL, LINUX_KSUBLEVEL) +#define LINUX_KERNVERSTR(x) #x +#define LINUX_XKERNVERSTR(x) LINUX_KERNVERSTR(x) +#define LINUX_VERSION_STR LINUX_XKERNVERSTR(LINUX_KVERSION.LINUX_KPATCHLEVEL.LINUX_KSUBLEVEL) + +#define LINUX_KERNVER_2004000 LINUX_KERNVER(2,4,0) +#define LINUX_KERNVER_2006000 LINUX_KERNVER(2,6,0) #define linux_use26(t) (linux_kernver(t) >= LINUX_KERNVER_2006000) diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index ac2384c..d87d786 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -89,21 +89,24 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux_file.h> #include <compat/linux/linux_mib.h> #include <compat/linux/linux_signal.h> +#include <compat/linux/linux_timer.h> #include <compat/linux/linux_util.h> #include <compat/linux/linux_sysproto.h> #include <compat/linux/linux_emul.h> #include <compat/linux/linux_misc.h> -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock); +/** + * Special DTrace provider for the linuxulator. + * + * In this file we define the provider for the entire linuxulator. All + * modules (= files of the linuxulator) use it. + * + * We define a different name depending on the emulated bitsize, see + * ../../<ARCH>/linux{,32}/linux.h, e.g.: + * native bitsize = linuxulator + * amd64, 32bit emulation = linuxulator32 + */ +LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); int stclohz; /* Statistics clock frequency */ @@ -130,6 +133,15 @@ struct l_sysinfo { l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; + +struct l_pselect6arg { + l_uintptr_t ss; + l_size_t ss_len; +}; + +static int linux_utimensat_nsec_valid(l_long); + + int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { @@ -524,7 +536,7 @@ linux_select(struct thread *td, struct linux_select_args *args) tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, - args->exceptfds, tvp, sizeof(l_int) * 8); + args->exceptfds, tvp, LINUX_NFDBITS); #ifdef DEBUG if (ldebug(select)) @@ -691,9 +703,9 @@ linux_times(struct thread *td, struct linux_times_args *args) if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); - PROC_SLOCK(p); + PROC_STATLOCK(p); calcru(p, &utime, &stime); - PROC_SUNLOCK(p); + PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); @@ -739,12 +751,11 @@ linux_newuname(struct thread *td, struct linux_newuname_args *args) *p = '\0'; break; } - strlcpy(utsname.machine, linux_platform, LINUX_MAX_UTSNAME); + strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); return (copyout(&utsname, args->buf, sizeof(utsname))); } -#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; @@ -815,6 +826,98 @@ linux_utimes(struct thread *td, struct linux_utimes_args *args) return (error); } +static int +linux_utimensat_nsec_valid(l_long nsec) +{ + + if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) + return (0); + if (nsec >= 0 && nsec <= 999999999) + return (0); + return (1); +} + +int +linux_utimensat(struct thread *td, struct linux_utimensat_args *args) +{ + struct l_timespec l_times[2]; + struct timespec times[2], *timesp = NULL; + char *path = NULL; + int error, dfd, flags = 0; + + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + +#ifdef DEBUG + if (ldebug(utimensat)) + printf(ARGS(utimensat, "%d, *"), dfd); +#endif + + if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) + return (EINVAL); + + if (args->times != NULL) { + error = copyin(args->times, l_times, sizeof(l_times)); + if (error != 0) + return (error); + + if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || + linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) + return (EINVAL); + + times[0].tv_sec = l_times[0].tv_sec; + switch (l_times[0].tv_nsec) + { + case LINUX_UTIME_OMIT: + times[0].tv_nsec = UTIME_OMIT; + break; + case LINUX_UTIME_NOW: + times[0].tv_nsec = UTIME_NOW; + break; + default: + times[0].tv_nsec = l_times[0].tv_nsec; + } + + times[1].tv_sec = l_times[1].tv_sec; + switch (l_times[1].tv_nsec) + { + case LINUX_UTIME_OMIT: + times[1].tv_nsec = UTIME_OMIT; + break; + case LINUX_UTIME_NOW: + times[1].tv_nsec = UTIME_NOW; + break; + default: + times[1].tv_nsec = l_times[1].tv_nsec; + break; + } + timesp = times; + } + + if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) + /* This breaks POSIX, but is what the Linux kernel does + * _on purpose_ (documented in the man page for utimensat(2)), + * so we must follow that behaviour. */ + return (0); + + if (args->pathname != NULL) + LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + else if (args->flags != 0) + return (EINVAL); + + if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) + flags |= AT_SYMLINK_NOFOLLOW; + + if (path == NULL) + error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); + else { + error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, + UIO_SYSSPACE, flags); + LFREEPATH(path); + } + + return (error); +} + int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { @@ -847,7 +950,6 @@ linux_futimesat(struct thread *td, struct linux_futimesat_args *args) LFREEPATH(fname); return (error); } -#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_common_wait(struct thread *td, int pid, int *status, @@ -863,41 +965,131 @@ linux_common_wait(struct thread *td, int pid, int *status, tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | - BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); + bsd_to_linux_signal(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | - (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); + (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); + else if (WIFCONTINUED(tmpstat)) + tmpstat = 0xffff; error = copyout(&tmpstat, status, sizeof(int)); } return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { - int options; - + struct linux_wait4_args wait4_args; + #ifdef DEBUG if (ldebug(waitpid)) printf(ARGS(waitpid, "%d, %p, %d"), args->pid, (void *)args->status, args->options); #endif - /* - * this is necessary because the test in kern_wait doesn't work - * because we mess with the options here - */ - if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) + + wait4_args.pid = args->pid; + wait4_args.status = args->status; + wait4_args.options = args->options; + wait4_args.rusage = NULL; + + return (linux_wait4(td, &wait4_args)); +} +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ + +int +linux_wait4(struct thread *td, struct linux_wait4_args *args) +{ + int error, options; + struct rusage ru, *rup; + +#ifdef DEBUG + if (ldebug(wait4)) + printf(ARGS(wait4, "%d, %p, %d, %p"), + args->pid, (void *)args->status, args->options, + (void *)args->rusage); +#endif + if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | + LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) return (EINVAL); - - options = (args->options & (WNOHANG | WUNTRACED)); - /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ - if (args->options & __WCLONE) - options |= WLINUXCLONE; - return (linux_common_wait(td, args->pid, args->status, options, NULL)); + options = WEXITED; + linux_to_bsd_waitopts(args->options, &options); + + if (args->rusage != NULL) + rup = &ru; + else + rup = NULL; + error = linux_common_wait(td, args->pid, args->status, options, rup); + if (error != 0) + return (error); + if (args->rusage != NULL) + error = linux_copyout_rusage(&ru, args->rusage); + return (error); } +int +linux_waitid(struct thread *td, struct linux_waitid_args *args) +{ + int status, options, sig; + struct __wrusage wru; + siginfo_t siginfo; + l_siginfo_t lsi; + idtype_t idtype; + struct proc *p; + int error; + + options = 0; + linux_to_bsd_waitopts(args->options, &options); + + if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) + return (EINVAL); + if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) + return (EINVAL); + + switch (args->idtype) { + case LINUX_P_ALL: + idtype = P_ALL; + break; + case LINUX_P_PID: + if (args->id <= 0) + return (EINVAL); + idtype = P_PID; + break; + case LINUX_P_PGID: + if (args->id <= 0) + return (EINVAL); + idtype = P_PGID; + break; + default: + return (EINVAL); + } + + error = kern_wait6(td, idtype, args->id, &status, options, + &wru, &siginfo); + if (error != 0) + return (error); + if (args->rusage != NULL) { + error = linux_copyout_rusage(&wru.wru_children, + args->rusage); + if (error != 0) + return (error); + } + if (args->info != NULL) { + p = td->td_proc; + if (td->td_retval[0] == 0) + bzero(&lsi, sizeof(lsi)); + else { + sig = bsd_to_linux_signal(siginfo.si_signo); + siginfo_to_lsiginfo(&siginfo, &lsi, sig); + } + error = copyout(&lsi, args->info, sizeof(lsi)); + } + td->td_retval[0] = 0; + + return (error); +} int linux_mknod(struct thread *td, struct linux_mknod_args *args) @@ -909,7 +1101,8 @@ linux_mknod(struct thread *td, struct linux_mknod_args *args) #ifdef DEBUG if (ldebug(mknod)) - printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); + printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, + (uintmax_t)args->dev); #endif switch (args->mode & S_IFMT) { @@ -1079,6 +1272,7 @@ linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) return (copyout(&ls, uap->itv, sizeof(ls))); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_nice(struct thread *td, struct linux_nice_args *args) { @@ -1089,6 +1283,7 @@ linux_nice(struct thread *td, struct linux_nice_args *args) bsd_args.prio = args->inc; return (sys_setpriority(td, &bsd_args)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) @@ -1102,7 +1297,7 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args) ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= ngroups_max + 1) return (EINVAL); - linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); + linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) goto out; @@ -1141,7 +1336,7 @@ linux_setgroups(struct thread *td, struct linux_setgroups_args *args) crfree(oldcred); error = 0; out: - free(linux_gidset, M_TEMP); + free(linux_gidset, M_LINUX); return (error); } @@ -1173,14 +1368,14 @@ linux_getgroups(struct thread *td, struct linux_getgroups_args *args) ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), - M_TEMP, M_WAITOK); + M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); - free(linux_gidset, M_TEMP); + free(linux_gidset, M_LINUX); if (error) return (error); @@ -1218,6 +1413,7 @@ linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) return (kern_setrlimit(td, which, &bsd_rlim)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { @@ -1260,6 +1456,7 @@ linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) @@ -1295,7 +1492,9 @@ int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { - struct sched_setscheduler_args bsd; + struct sched_param sched_param; + struct thread *tdt; + int error, policy; #ifdef DEBUG if (ldebug(sched_setscheduler)) @@ -1305,39 +1504,51 @@ linux_sched_setscheduler(struct thread *td, switch (args->policy) { case LINUX_SCHED_OTHER: - bsd.policy = SCHED_OTHER; + policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: - bsd.policy = SCHED_FIFO; + policy = SCHED_FIFO; break; case LINUX_SCHED_RR: - bsd.policy = SCHED_RR; + policy = SCHED_RR; break; default: return (EINVAL); } - bsd.pid = args->pid; - bsd.param = (struct sched_param *)args->param; - return (sys_sched_setscheduler(td, &bsd)); + error = copyin(args->param, &sched_param, sizeof(sched_param)); + if (error) + return (error); + + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_setscheduler(td, tdt, policy, &sched_param); + PROC_UNLOCK(tdt->td_proc); + return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { - struct sched_getscheduler_args bsd; - int error; + struct thread *tdt; + int error, policy; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif - bsd.pid = args->pid; - error = sys_sched_getscheduler(td, &bsd); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_getscheduler(td, tdt, &policy); + PROC_UNLOCK(tdt->td_proc); - switch (td->td_retval[0]) { + switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; @@ -1348,7 +1559,6 @@ linux_sched_getscheduler(struct thread *td, td->td_retval[0] = LINUX_SCHED_RR; break; } - return (error); } @@ -1474,20 +1684,12 @@ linux_reboot(struct thread *td, struct linux_reboot_args *args) int linux_getpid(struct thread *td, struct linux_getpid_args *args) { - struct linux_emuldata *em; #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif - - if (linux_use26(td)) { - em = em_find(td->td_proc, EMUL_DONTLOCK); - KASSERT(em != NULL, ("getpid: emuldata not found.\n")); - td->td_retval[0] = em->shared->group_pid; - } else { - td->td_retval[0] = td->td_proc->p_pid; - } + td->td_retval[0] = td->td_proc->p_pid; return (0); } @@ -1495,13 +1697,18 @@ linux_getpid(struct thread *td, struct linux_getpid_args *args) int linux_gettid(struct thread *td, struct linux_gettid_args *args) { + struct linux_emuldata *em; #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif - td->td_retval[0] = td->td_proc->p_pid; + em = em_find(td); + KASSERT(em != NULL, ("gettid: emuldata not found.\n")); + + td->td_retval[0] = em->em_tid; + return (0); } @@ -1509,50 +1716,15 @@ linux_gettid(struct thread *td, struct linux_gettid_args *args) int linux_getppid(struct thread *td, struct linux_getppid_args *args) { - struct linux_emuldata *em; - struct proc *p, *pp; #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif - if (!linux_use26(td)) { - PROC_LOCK(td->td_proc); - td->td_retval[0] = td->td_proc->p_pptr->p_pid; - PROC_UNLOCK(td->td_proc); - return (0); - } - - em = em_find(td->td_proc, EMUL_DONTLOCK); - - KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); - - /* find the group leader */ - p = pfind(em->shared->group_pid); - - if (p == NULL) { -#ifdef DEBUG - printf(LMSG("parent process not found.\n")); -#endif - return (0); - } - - pp = p->p_pptr; /* switch to parent */ - PROC_LOCK(pp); - PROC_UNLOCK(p); - - /* if its also linux process */ - if (pp->p_sysent == &elf_linux_sysvec) { - em = em_find(pp, EMUL_DONTLOCK); - KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); - - td->td_retval[0] = em->shared->group_pid; - } else - td->td_retval[0] = pp->p_pid; - - PROC_UNLOCK(pp); - + PROC_LOCK(td->td_proc); + td->td_retval[0] = td->td_proc->p_pptr->p_pid; + PROC_UNLOCK(td->td_proc); return (0); } @@ -1657,22 +1829,14 @@ linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { - struct linux_emuldata *em; #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif - em = em_find(td->td_proc, EMUL_DONTLOCK); - if (em->shared->refs > 1) { - EMUL_SHARED_WLOCK(&emul_shared_lock); - em->shared->flags |= EMUL_SHARED_HASXSTAT; - em->shared->xstat = W_EXITCODE(args->error_code, 0); - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - if (linux_use26(td)) - linux_kill_threads(td, SIGKILL); - } + LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, + args->error_code); /* * XXX: we should send a signal to the parent if @@ -1680,8 +1844,7 @@ linux_exit_group(struct thread *td, struct linux_exit_group_args *args) * as it doesnt occur often. */ exit1(td, W_EXITCODE(args->error_code, 0)); - - return (0); + /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION 0x19980330 @@ -1789,24 +1952,23 @@ linux_prctl(struct thread *td, struct linux_prctl_args *args) #ifdef DEBUG if (ldebug(prctl)) - printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, - args->arg2, args->arg3, args->arg4, args->arg5); + printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, + (uintmax_t)args->arg2, (uintmax_t)args->arg3, + (uintmax_t)args->arg4, (uintmax_t)args->arg5); #endif switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); - em = em_find(p, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); em->pdeath_signal = args->arg2; - EMUL_UNLOCK(&emul_lock); break; case LINUX_PR_GET_PDEATHSIG: - em = em_find(p, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); pdeath_signal = em->pdeath_signal; - EMUL_UNLOCK(&emul_lock); error = copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal)); @@ -1871,6 +2033,57 @@ linux_prctl(struct thread *td, struct linux_prctl_args *args) return (error); } +int +linux_sched_setparam(struct thread *td, + struct linux_sched_setparam_args *uap) +{ + struct sched_param sched_param; + struct thread *tdt; + int error; + +#ifdef DEBUG + if (ldebug(sched_setparam)) + printf(ARGS(sched_setparam, "%d, *"), uap->pid); +#endif + + error = copyin(uap->param, &sched_param, sizeof(sched_param)); + if (error) + return (error); + + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_setparam(td, tdt, &sched_param); + PROC_UNLOCK(tdt->td_proc); + return (error); +} + +int +linux_sched_getparam(struct thread *td, + struct linux_sched_getparam_args *uap) +{ + struct sched_param sched_param; + struct thread *tdt; + int error; + +#ifdef DEBUG + if (ldebug(sched_getparam)) + printf(ARGS(sched_getparam, "%d, *"), uap->pid); +#endif + + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_getparam(td, tdt, &sched_param); + PROC_UNLOCK(tdt->td_proc); + if (error == 0) + error = copyout(&sched_param, uap->param, + sizeof(sched_param)); + return (error); +} + /* * Get affinity of a process. */ @@ -1879,6 +2092,7 @@ linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; + struct thread *tdt; struct cpuset_getaffinity_args cga; #ifdef DEBUG @@ -1889,9 +2103,14 @@ linux_sched_getaffinity(struct thread *td, if (args->len < sizeof(cpuset_t)) return (EINVAL); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + PROC_UNLOCK(tdt->td_proc); cga.level = CPU_LEVEL_WHICH; - cga.which = CPU_WHICH_PID; - cga.id = args->pid; + cga.which = CPU_WHICH_TID; + cga.id = tdt->td_tid; cga.cpusetsize = sizeof(cpuset_t); cga.mask = (cpuset_t *) args->user_mask_ptr; @@ -1909,6 +2128,7 @@ linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct cpuset_setaffinity_args csa; + struct thread *tdt; #ifdef DEBUG if (ldebug(sched_setaffinity)) @@ -1918,11 +2138,369 @@ linux_sched_setaffinity(struct thread *td, if (args->len < sizeof(cpuset_t)) return (EINVAL); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + PROC_UNLOCK(tdt->td_proc); csa.level = CPU_LEVEL_WHICH; - csa.which = CPU_WHICH_PID; - csa.id = args->pid; + csa.which = CPU_WHICH_TID; + csa.id = tdt->td_tid; csa.cpusetsize = sizeof(cpuset_t); csa.mask = (cpuset_t *) args->user_mask_ptr; return (sys_cpuset_setaffinity(td, &csa)); } + +struct linux_rlimit64 { + uint64_t rlim_cur; + uint64_t rlim_max; +}; + +int +linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) +{ + struct rlimit rlim, nrlim; + struct linux_rlimit64 lrlim; + struct proc *p; + u_int which; + int flags; + int error; + +#ifdef DEBUG + if (ldebug(prlimit64)) + printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, + args->resource, (void *)args->new, (void *)args->old); +#endif + + if (args->resource >= LINUX_RLIM_NLIMITS) + return (EINVAL); + + which = linux_to_bsd_resource[args->resource]; + if (which == -1) + return (EINVAL); + + if (args->new != NULL) { + /* + * Note. Unlike FreeBSD where rlim is signed 64-bit Linux + * rlim is unsigned 64-bit. FreeBSD treats negative limits + * as INFINITY so we do not need a conversion even. + */ + error = copyin(args->new, &nrlim, sizeof(nrlim)); + if (error != 0) + return (error); + } + + flags = PGET_HOLD | PGET_NOTWEXIT; + if (args->new != NULL) + flags |= PGET_CANDEBUG; + else + flags |= PGET_CANSEE; + error = pget(args->pid, flags, &p); + if (error != 0) + return (error); + + if (args->old != NULL) { + PROC_LOCK(p); + lim_rlimit(p, which, &rlim); + PROC_UNLOCK(p); + if (rlim.rlim_cur == RLIM_INFINITY) + lrlim.rlim_cur = LINUX_RLIM_INFINITY; + else + lrlim.rlim_cur = rlim.rlim_cur; + if (rlim.rlim_max == RLIM_INFINITY) + lrlim.rlim_max = LINUX_RLIM_INFINITY; + else + lrlim.rlim_max = rlim.rlim_max; + error = copyout(&lrlim, args->old, sizeof(lrlim)); + if (error != 0) + goto out; + } + + if (args->new != NULL) + error = kern_proc_setrlimit(td, p, which, &nrlim); + + out: + PRELE(p); + return (error); +} + +int +linux_pselect6(struct thread *td, struct linux_pselect6_args *args) +{ + struct timeval utv, tv0, tv1, *tvp; + struct l_pselect6arg lpse6; + struct l_timespec lts; + struct timespec uts; + l_sigset_t l_ss; + sigset_t *ssp; + sigset_t ss; + int error; + + ssp = NULL; + if (args->sig != NULL) { + error = copyin(args->sig, &lpse6, sizeof(lpse6)); + if (error != 0) + return (error); + if (lpse6.ss_len != sizeof(l_ss)) + return (EINVAL); + if (lpse6.ss != 0) { + error = copyin(PTRIN(lpse6.ss), &l_ss, + sizeof(l_ss)); + if (error != 0) + return (error); + linux_to_bsd_sigset(&l_ss, &ss); + ssp = &ss; + } + } + + /* + * Currently glibc changes nanosecond number to microsecond. + * This mean losing precision but for now it is hardly seen. + */ + if (args->tsp != NULL) { + error = copyin(args->tsp, <s, sizeof(lts)); + if (error != 0) + return (error); + error = linux_to_native_timespec(&uts, <s); + if (error != 0) + return (error); + + TIMESPEC_TO_TIMEVAL(&utv, &uts); + if (itimerfix(&utv)) + return (EINVAL); + + microtime(&tv0); + tvp = &utv; + } else + tvp = NULL; + + error = kern_pselect(td, args->nfds, args->readfds, args->writefds, + args->exceptfds, tvp, ssp, LINUX_NFDBITS); + + if (error == 0 && args->tsp != NULL) { + if (td->td_retval[0] != 0) { + /* + * Compute how much time was left of the timeout, + * by subtracting the current time and the time + * before we started the call, and subtracting + * that result from the user-supplied value. + */ + + microtime(&tv1); + timevalsub(&tv1, &tv0); + timevalsub(&utv, &tv1); + if (utv.tv_sec < 0) + timevalclear(&utv); + } else + timevalclear(&utv); + + TIMEVAL_TO_TIMESPEC(&utv, &uts); + + native_to_linux_timespec(<s, &uts); + error = copyout(<s, args->tsp, sizeof(lts)); + } + + return (error); +} + +int +linux_ppoll(struct thread *td, struct linux_ppoll_args *args) +{ + struct timespec ts0, ts1; + struct l_timespec lts; + struct timespec uts, *tsp; + l_sigset_t l_ss; + sigset_t *ssp; + sigset_t ss; + int error; + + if (args->sset != NULL) { + if (args->ssize != sizeof(l_ss)) + return (EINVAL); + error = copyin(args->sset, &l_ss, sizeof(l_ss)); + if (error) + return (error); + linux_to_bsd_sigset(&l_ss, &ss); + ssp = &ss; + } else + ssp = NULL; + if (args->tsp != NULL) { + error = copyin(args->tsp, <s, sizeof(lts)); + if (error) + return (error); + error = linux_to_native_timespec(&uts, <s); + if (error != 0) + return (error); + + nanotime(&ts0); + tsp = &uts; + } else + tsp = NULL; + + error = kern_poll(td, args->fds, args->nfds, tsp, ssp); + + if (error == 0 && args->tsp != NULL) { + if (td->td_retval[0]) { + nanotime(&ts1); + timespecsub(&ts1, &ts0); + timespecsub(&uts, &ts1); + if (uts.tv_sec < 0) + timespecclear(&uts); + } else + timespecclear(&uts); + + native_to_linux_timespec(<s, &uts); + error = copyout(<s, args->tsp, sizeof(lts)); + } + + return (error); +} + +#if defined(DEBUG) || defined(KTR) +/* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ + +u_char linux_debug_map[howmany(LINUX_SYS_MAXSYSCALL, sizeof(u_char))]; + +static int +linux_debug(int syscall, int toggle, int global) +{ + + if (global) { + char c = toggle ? 0 : 0xff; + + memset(linux_debug_map, c, sizeof(linux_debug_map)); + return (0); + } + if (syscall < 0 || syscall >= LINUX_SYS_MAXSYSCALL) + return (EINVAL); + if (toggle) + clrbit(linux_debug_map, syscall); + else + setbit(linux_debug_map, syscall); + return (0); +} + +/* + * Usage: sysctl linux.debug=<syscall_nr>.<0/1> + * + * E.g.: sysctl linux.debug=21.0 + * + * As a special case, syscall "all" will apply to all syscalls globally. + */ +#define LINUX_MAX_DEBUGSTR 16 +int +linux_sysctl_debug(SYSCTL_HANDLER_ARGS) +{ + char value[LINUX_MAX_DEBUGSTR], *p; + int error, sysc, toggle; + int global = 0; + + value[0] = '\0'; + error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); + if (error || req->newptr == NULL) + return (error); + for (p = value; *p != '\0' && *p != '.'; p++); + if (*p == '\0') + return (EINVAL); + *p++ = '\0'; + sysc = strtol(value, NULL, 0); + toggle = strtol(p, NULL, 0); + if (strcmp(value, "all") == 0) + global = 1; + error = linux_debug(sysc, toggle, global); + return (error); +} + +#endif /* DEBUG || KTR */ + +int +linux_sched_rr_get_interval(struct thread *td, + struct linux_sched_rr_get_interval_args *uap) +{ + struct timespec ts; + struct l_timespec lts; + struct thread *tdt; + int error; + + /* + * According to man in case the invalid pid specified + * EINVAL should be returned. + */ + if (uap->pid < 0) + return (EINVAL); + + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_rr_get_interval_td(td, tdt, &ts); + PROC_UNLOCK(tdt->td_proc); + if (error != 0) + return (error); + native_to_linux_timespec(<s, &ts); + return (copyout(<s, uap->interval, sizeof(lts))); +} + +/* + * In case when the Linux thread is the initial thread in + * the thread group thread id is equal to the process id. + * Glibc depends on this magic (assert in pthread_getattr_np.c). + */ +struct thread * +linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) +{ + struct linux_emuldata *em; + struct thread *tdt; + struct proc *p; + + tdt = NULL; + if (tid == 0 || tid == td->td_tid) { + tdt = td; + PROC_LOCK(tdt->td_proc); + } else if (tid > PID_MAX) + tdt = tdfind(tid, pid); + else { + /* + * Initial thread where the tid equal to the pid. + */ + p = pfind(tid); + if (p != NULL) { + if (SV_PROC_ABI(p) != SV_ABI_LINUX) { + /* + * p is not a Linuxulator process. + */ + PROC_UNLOCK(p); + return (NULL); + } + FOREACH_THREAD_IN_PROC(p, tdt) { + em = em_find(tdt); + if (tid == em->em_tid) + return (tdt); + } + PROC_UNLOCK(p); + } + return (NULL); + } + + return (tdt); +} + +void +linux_to_bsd_waitopts(int options, int *bsdopts) +{ + + if (options & LINUX_WNOHANG) + *bsdopts |= WNOHANG; + if (options & LINUX_WUNTRACED) + *bsdopts |= WUNTRACED; + if (options & LINUX_WEXITED) + *bsdopts |= WEXITED; + if (options & LINUX_WCONTINUED) + *bsdopts |= WCONTINUED; + if (options & LINUX_WNOWAIT) + *bsdopts |= WNOWAIT; + + if (options & __WCLONE) + *bsdopts |= WLINUXCLONE; +} diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h index 154d78f..f969c4d 100644 --- a/sys/compat/linux/linux_misc.h +++ b/sys/compat/linux/linux_misc.h @@ -31,6 +31,11 @@ #ifndef _LINUX_MISC_H_ #define _LINUX_MISC_H_ +#include <sys/sysctl.h> + + /* bits per mask */ +#define LINUX_NFDBITS sizeof(l_fd_mask) * 8 + /* * Miscellaneous */ @@ -55,7 +60,7 @@ #define LINUX_MREMAP_MAYMOVE 1 #define LINUX_MREMAP_FIXED 2 -extern const char *linux_platform; +extern const char *linux_kplatform; /* * Non-standard aux entry types used in Linux ELF binaries. @@ -68,7 +73,12 @@ extern const char *linux_platform; #define LINUX_AT_BASE_PLATFORM 24 /* string identifying real platform, may * differ from AT_PLATFORM. */ +#define LINUX_AT_RANDOM 25 /* address of random bytes */ #define LINUX_AT_EXECFN 31 /* filename of program */ +#define LINUX_AT_SYSINFO 32 /* vsyscall */ +#define LINUX_AT_SYSINFO_EHDR 33 /* vdso header */ + +#define LINUX_AT_RANDOM_LEN 16 /* size of random bytes */ /* Linux sets the i387 to extended precision. */ #if defined(__i386__) || defined(__amd64__) @@ -88,10 +98,6 @@ extern const char *linux_platform; #define LINUX_CLONE_CHILD_CLEARTID 0x00200000 #define LINUX_CLONE_CHILD_SETTID 0x01000000 -#define LINUX_THREADING_FLAGS \ - (LINUX_CLONE_VM | LINUX_CLONE_FS | LINUX_CLONE_FILES | \ - LINUX_CLONE_SIGHAND | LINUX_CLONE_THREAD) - /* Scheduling policies */ #define LINUX_SCHED_OTHER 0 #define LINUX_SCHED_FIFO 1 @@ -113,13 +119,37 @@ struct l_new_utsname { #define LINUX_CLOCK_REALTIME_HR 4 #define LINUX_CLOCK_MONOTONIC_HR 5 +#define LINUX_UTIME_NOW 0x3FFFFFFF +#define LINUX_UTIME_OMIT 0x3FFFFFFE + extern int stclohz; -#define __WCLONE 0x80000000 +#define LINUX_WNOHANG 0x00000001 +#define LINUX_WUNTRACED 0x00000002 +#define LINUX_WSTOPPED LINUX_WUNTRACED +#define LINUX_WEXITED 0x00000004 +#define LINUX_WCONTINUED 0x00000008 +#define LINUX_WNOWAIT 0x01000000 + + +#define __WNOTHREAD 0x20000000 +#define __WALL 0x40000000 +#define __WCLONE 0x80000000 + +/* Linux waitid idtype */ +#define LINUX_P_ALL 0 +#define LINUX_P_PID 1 +#define LINUX_P_PGID 2 + +#define LINUX_RLIM_INFINITY (~0UL) int linux_common_wait(struct thread *td, int pid, int *status, int options, struct rusage *ru); +void linux_to_bsd_waitopts(int options, int *bsdopts); int linux_set_upcall_kse(struct thread *td, register_t stack); int linux_set_cloned_tls(struct thread *td, void *desc); +struct thread *linux_tdfind(struct thread *, lwpid_t, pid_t); + +int linux_sysctl_debug(SYSCTL_HANDLER_ARGS); #endif /* _LINUX_MISC_H_ */ diff --git a/sys/compat/linux/linux_signal.c b/sys/compat/linux/linux_signal.c index 1c778f9..0ecf537 100644 --- a/sys/compat/linux/linux_signal.c +++ b/sys/compat/linux/linux_signal.c @@ -53,40 +53,12 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux_signal.h> #include <compat/linux/linux_util.h> #include <compat/linux/linux_emul.h> +#include <compat/linux/linux_misc.h> -void -linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) -{ - int b, l; - - SIGEMPTYSET(*bss); - bss->__bits[0] = lss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1); - bss->__bits[1] = lss->__bits[1]; - for (l = 1; l <= LINUX_SIGTBLSZ; l++) { - if (LINUX_SIGISMEMBER(*lss, l)) { - b = linux_to_bsd_signal[_SIG_IDX(l)]; - if (b) - SIGADDSET(*bss, b); - } - } -} +static int linux_do_tkill(struct thread *td, struct thread *tdt, + ksiginfo_t *ksi); +static void sicode_to_lsicode(int si_code, int *lsi_code); -void -bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss) -{ - int b, l; - - LINUX_SIGEMPTYSET(*lss); - lss->__bits[0] = bss->__bits[0] & ~((1U << LINUX_SIGTBLSZ) - 1); - lss->__bits[1] = bss->__bits[1]; - for (b = 1; b <= LINUX_SIGTBLSZ; b++) { - if (SIGISMEMBER(*bss, b)) { - l = bsd_to_linux_signal[_SIG_IDX(b)]; - if (l) - LINUX_SIGADDSET(*lss, l); - } - } -} static void linux_to_bsd_sigaction(l_sigaction_t *lsa, struct sigaction *bsa) @@ -155,11 +127,7 @@ linux_do_sigaction(struct thread *td, int linux_sig, l_sigaction_t *linux_nsa, linux_to_bsd_sigaction(linux_nsa, nsa); } else nsa = NULL; - - if (linux_sig <= LINUX_SIGTBLSZ) - sig = linux_to_bsd_signal[_SIG_IDX(linux_sig)]; - else - sig = linux_sig; + sig = linux_to_bsd_signal(linux_sig); error = kern_sigaction(td, sig, nsa, osa, 0); if (error) @@ -171,7 +139,7 @@ linux_do_sigaction(struct thread *td, int linux_sig, l_sigaction_t *linux_nsa, return (0); } - +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_signal(struct thread *td, struct linux_signal_args *args) { @@ -193,6 +161,7 @@ linux_signal(struct thread *td, struct linux_signal_args *args) return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_rt_sigaction(struct thread *td, struct linux_rt_sigaction_args *args) @@ -262,6 +231,7 @@ linux_do_sigprocmask(struct thread *td, int how, l_sigset_t *new, return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args) { @@ -279,7 +249,7 @@ linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args) if (error) return (error); LINUX_SIGEMPTYSET(set); - set.__bits[0] = mask; + set.__mask = mask; } error = linux_do_sigprocmask(td, args->how, @@ -287,12 +257,13 @@ linux_sigprocmask(struct thread *td, struct linux_sigprocmask_args *args) args->omask ? &oset : NULL); if (args->omask != NULL && !error) { - mask = oset.__bits[0]; + mask = oset.__mask; error = copyout(&mask, args->omask, sizeof(l_osigset_t)); } return (error); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_rt_sigprocmask(struct thread *td, struct linux_rt_sigprocmask_args *args) @@ -327,6 +298,7 @@ linux_rt_sigprocmask(struct thread *td, struct linux_rt_sigprocmask_args *args) return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) int linux_sgetmask(struct thread *td, struct linux_sgetmask_args *args) { @@ -341,7 +313,7 @@ linux_sgetmask(struct thread *td, struct linux_sgetmask_args *args) PROC_LOCK(p); bsd_to_linux_sigset(&td->td_sigmask, &mask); PROC_UNLOCK(p); - td->td_retval[0] = mask.__bits[0]; + td->td_retval[0] = mask.__mask; return (0); } @@ -359,9 +331,9 @@ linux_ssetmask(struct thread *td, struct linux_ssetmask_args *args) PROC_LOCK(p); bsd_to_linux_sigset(&td->td_sigmask, &lset); - td->td_retval[0] = lset.__bits[0]; + td->td_retval[0] = lset.__mask; LINUX_SIGEMPTYSET(lset); - lset.__bits[0] = args->mask; + lset.__mask = args->mask; linux_to_bsd_sigset(&lset, &bset); td->td_sigmask = bset; SIG_CANTMASK(td->td_sigmask); @@ -370,9 +342,6 @@ linux_ssetmask(struct thread *td, struct linux_ssetmask_args *args) return (0); } -/* - * MPSAFE - */ int linux_sigpending(struct thread *td, struct linux_sigpending_args *args) { @@ -392,9 +361,10 @@ linux_sigpending(struct thread *td, struct linux_sigpending_args *args) SIGSETAND(bset, td->td_sigmask); PROC_UNLOCK(p); bsd_to_linux_sigset(&bset, &lset); - mask = lset.__bits[0]; + mask = lset.__mask; return (copyout(&mask, args->mask, sizeof(mask))); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ /* * MPSAFE @@ -458,8 +428,8 @@ linux_rt_sigtimedwait(struct thread *td, #ifdef DEBUG if (ldebug(rt_sigtimedwait)) printf(LMSG("linux_rt_sigtimedwait: " - "incoming timeout (%d/%d)\n"), - ltv.tv_sec, ltv.tv_usec); + "incoming timeout (%jd/%jd)\n"), + (intmax_t)ltv.tv_sec, (intmax_t)ltv.tv_usec); #endif tv.tv_sec = (long)ltv.tv_sec; tv.tv_usec = (suseconds_t)ltv.tv_usec; @@ -495,7 +465,7 @@ linux_rt_sigtimedwait(struct thread *td, if (error) return (error); - sig = BSD_TO_LINUX_SIGNAL(info.ksi_signo); + sig = bsd_to_linux_signal(info.ksi_signo); if (args->ptr) { memset(&linfo, 0, sizeof(linfo)); @@ -527,66 +497,31 @@ linux_kill(struct thread *td, struct linux_kill_args *args) if (!LINUX_SIG_VALID(args->signum) && args->signum != 0) return (EINVAL); - if (args->signum > 0 && args->signum <= LINUX_SIGTBLSZ) - tmp.signum = linux_to_bsd_signal[_SIG_IDX(args->signum)]; + if (args->signum > 0) + tmp.signum = linux_to_bsd_signal(args->signum); else - tmp.signum = args->signum; + tmp.signum = 0; tmp.pid = args->pid; return (sys_kill(td, &tmp)); } static int -linux_do_tkill(struct thread *td, l_int tgid, l_int pid, l_int signum) +linux_do_tkill(struct thread *td, struct thread *tdt, ksiginfo_t *ksi) { - struct proc *proc = td->td_proc; - struct linux_emuldata *em; struct proc *p; - ksiginfo_t ksi; int error; - AUDIT_ARG_SIGNUM(signum); - AUDIT_ARG_PID(pid); - - /* - * Allow signal 0 as a means to check for privileges - */ - if (!LINUX_SIG_VALID(signum) && signum != 0) - return (EINVAL); - - if (signum > 0 && signum <= LINUX_SIGTBLSZ) - signum = linux_to_bsd_signal[_SIG_IDX(signum)]; - - if ((p = pfind(pid)) == NULL) { - if ((p = zpfind(pid)) == NULL) - return (ESRCH); - } - + p = tdt->td_proc; + AUDIT_ARG_SIGNUM(ksi->ksi_signo); + AUDIT_ARG_PID(p->p_pid); AUDIT_ARG_PROCESS(p); - error = p_cansignal(td, p, signum); - if (error != 0 || signum == 0) - goto out; - - error = ESRCH; - em = em_find(p, EMUL_DONTLOCK); - if (em == NULL) { -#ifdef DEBUG - printf("emuldata not found in do_tkill.\n"); -#endif + error = p_cansignal(td, p, ksi->ksi_signo); + if (error != 0 || ksi->ksi_signo == 0) goto out; - } - if (tgid > 0 && em->shared->group_pid != tgid) - goto out; - - ksiginfo_init(&ksi); - ksi.ksi_signo = signum; - ksi.ksi_code = LINUX_SI_TKILL; - ksi.ksi_errno = 0; - ksi.ksi_pid = proc->p_pid; - ksi.ksi_uid = proc->p_ucred->cr_ruid; - error = pksignal(p, ksi.ksi_signo, &ksi); + tdksignal(tdt, ksi->ksi_signo, ksi); out: PROC_UNLOCK(p); @@ -596,20 +531,53 @@ out: int linux_tgkill(struct thread *td, struct linux_tgkill_args *args) { + struct thread *tdt; + ksiginfo_t ksi; + int sig; #ifdef DEBUG if (ldebug(tgkill)) - printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig); + printf(ARGS(tgkill, "%d, %d, %d"), + args->tgid, args->pid, args->sig); #endif + if (args->pid <= 0 || args->tgid <=0) return (EINVAL); - return (linux_do_tkill(td, args->tgid, args->pid, args->sig)); + /* + * Allow signal 0 as a means to check for privileges + */ + if (!LINUX_SIG_VALID(args->sig) && args->sig != 0) + return (EINVAL); + + if (args->sig > 0) + sig = linux_to_bsd_signal(args->sig); + else + sig = 0; + + tdt = linux_tdfind(td, args->pid, args->tgid); + if (tdt == NULL) + return (ESRCH); + + ksiginfo_init(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = SI_LWP; + ksi.ksi_errno = 0; + ksi.ksi_pid = td->td_proc->p_pid; + ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; + return (linux_do_tkill(td, tdt, &ksi)); } +/* + * Deprecated since 2.5.75. Replaced by tgkill(). + */ int linux_tkill(struct thread *td, struct linux_tkill_args *args) { + struct thread *tdt; + ksiginfo_t ksi; + int sig; + #ifdef DEBUG if (ldebug(tkill)) printf(ARGS(tkill, "%i, %i"), args->tid, args->sig); @@ -617,40 +585,182 @@ linux_tkill(struct thread *td, struct linux_tkill_args *args) if (args->tid <= 0) return (EINVAL); - return (linux_do_tkill(td, 0, args->tid, args->sig)); + if (!LINUX_SIG_VALID(args->sig)) + return (EINVAL); + + sig = linux_to_bsd_signal(args->sig); + + tdt = linux_tdfind(td, args->tid, -1); + if (tdt == NULL) + return (ESRCH); + + ksiginfo_init(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = SI_LWP; + ksi.ksi_errno = 0; + ksi.ksi_pid = td->td_proc->p_pid; + ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; + return (linux_do_tkill(td, tdt, &ksi)); +} + +void +ksiginfo_to_lsiginfo(const ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig) +{ + + siginfo_to_lsiginfo(&ksi->ksi_info, lsi, sig); +} + +static void +sicode_to_lsicode(int si_code, int *lsi_code) +{ + + switch (si_code) { + case SI_USER: + *lsi_code = LINUX_SI_USER; + break; + case SI_KERNEL: + *lsi_code = LINUX_SI_KERNEL; + break; + case SI_QUEUE: + *lsi_code = LINUX_SI_QUEUE; + break; + case SI_TIMER: + *lsi_code = LINUX_SI_TIMER; + break; + case SI_MESGQ: + *lsi_code = LINUX_SI_MESGQ; + break; + case SI_ASYNCIO: + *lsi_code = LINUX_SI_ASYNCIO; + break; + case SI_LWP: + *lsi_code = LINUX_SI_TKILL; + break; + default: + *lsi_code = si_code; + break; + } } void -ksiginfo_to_lsiginfo(ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig) +siginfo_to_lsiginfo(const siginfo_t *si, l_siginfo_t *lsi, l_int sig) { + /* sig alredy converted */ lsi->lsi_signo = sig; - lsi->lsi_code = ksi->ksi_code; + sicode_to_lsicode(si->si_code, &lsi->lsi_code); - switch (sig) { - case LINUX_SIGPOLL: - /* XXX si_fd? */ - lsi->lsi_band = ksi->ksi_band; + switch (si->si_code) { + case SI_LWP: + lsi->lsi_pid = si->si_pid; + lsi->lsi_uid = si->si_uid; break; - case LINUX_SIGCHLD: - lsi->lsi_pid = ksi->ksi_pid; - lsi->lsi_uid = ksi->ksi_uid; - lsi->lsi_status = ksi->ksi_status; + + case SI_TIMER: + lsi->lsi_int = si->si_value.sival_int; + lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); + lsi->lsi_tid = si->si_timerid; break; - case LINUX_SIGBUS: - case LINUX_SIGILL: - case LINUX_SIGFPE: - case LINUX_SIGSEGV: - lsi->lsi_addr = PTROUT(ksi->ksi_addr); + + case SI_QUEUE: + lsi->lsi_pid = si->si_pid; + lsi->lsi_uid = si->si_uid; + lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); break; + + case SI_ASYNCIO: + lsi->lsi_int = si->si_value.sival_int; + lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); + break; + default: - /* XXX SI_TIMER etc... */ - lsi->lsi_pid = ksi->ksi_pid; - lsi->lsi_uid = ksi->ksi_uid; + switch (sig) { + case LINUX_SIGPOLL: + /* XXX si_fd? */ + lsi->lsi_band = si->si_band; + break; + + case LINUX_SIGCHLD: + lsi->lsi_errno = 0; + lsi->lsi_pid = si->si_pid; + lsi->lsi_uid = si->si_uid; + + if (si->si_code == CLD_STOPPED) + lsi->lsi_status = bsd_to_linux_signal(si->si_status); + else if (si->si_code == CLD_CONTINUED) + lsi->lsi_status = bsd_to_linux_signal(SIGCONT); + else + lsi->lsi_status = si->si_status; + break; + + case LINUX_SIGBUS: + case LINUX_SIGILL: + case LINUX_SIGFPE: + case LINUX_SIGSEGV: + lsi->lsi_addr = PTROUT(si->si_addr); + break; + + default: + lsi->lsi_pid = si->si_pid; + lsi->lsi_uid = si->si_uid; + if (sig >= LINUX_SIGRTMIN) { + lsi->lsi_int = si->si_value.sival_int; + lsi->lsi_ptr = PTROUT(si->si_value.sival_ptr); + } + break; + } break; } - if (sig >= LINUX_SIGRTMIN) { - lsi->lsi_int = ksi->ksi_info.si_value.sival_int; - lsi->lsi_ptr = PTROUT(ksi->ksi_info.si_value.sival_ptr); +} + +void +lsiginfo_to_ksiginfo(const l_siginfo_t *lsi, ksiginfo_t *ksi, int sig) +{ + + ksi->ksi_signo = sig; + ksi->ksi_code = lsi->lsi_code; /* XXX. Convert. */ + ksi->ksi_pid = lsi->lsi_pid; + ksi->ksi_uid = lsi->lsi_uid; + ksi->ksi_status = lsi->lsi_status; + ksi->ksi_addr = PTRIN(lsi->lsi_addr); + ksi->ksi_info.si_value.sival_int = lsi->lsi_int; +} + +int +linux_rt_sigqueueinfo(struct thread *td, struct linux_rt_sigqueueinfo_args *args) +{ + l_siginfo_t linfo; + struct proc *p; + ksiginfo_t ksi; + int error; + int sig; + + if (!LINUX_SIG_VALID(args->sig)) + return (EINVAL); + + error = copyin(args->info, &linfo, sizeof(linfo)); + if (error != 0) + return (error); + + if (linfo.lsi_code >= 0) + return (EPERM); + + sig = linux_to_bsd_signal(args->sig); + + error = ESRCH; + if ((p = pfind(args->pid)) != NULL || + (p = zpfind(args->pid)) != NULL) { + error = p_cansignal(td, p, sig); + if (error != 0) { + PROC_UNLOCK(p); + return (error); + } + + ksiginfo_init(&ksi); + lsiginfo_to_ksiginfo(&linfo, &ksi, sig); + error = tdsendsignal(p, NULL, sig, &ksi); + PROC_UNLOCK(p); } + + return (error); } diff --git a/sys/compat/linux/linux_signal.h b/sys/compat/linux/linux_signal.h index 426cf43..510bfb3 100644 --- a/sys/compat/linux/linux_signal.h +++ b/sys/compat/linux/linux_signal.h @@ -31,19 +31,21 @@ #ifndef _LINUX_SIGNAL_H_ #define _LINUX_SIGNAL_H_ -#define LINUX_SI_TKILL -6; - -extern int bsd_to_linux_signal[]; -extern int linux_to_bsd_signal[]; +/* + * si_code values + */ +#define LINUX_SI_USER 0 /* sent by kill, sigsend, raise */ +#define LINUX_SI_KERNEL 0x80 /* sent by the kernel from somewhere */ +#define LINUX_SI_QUEUE -1 /* sent by sigqueue */ +#define LINUX_SI_TIMER -2 /* sent by timer expiration */ +#define LINUX_SI_MESGQ -3 /* sent by real time mesq state change */ +#define LINUX_SI_ASYNCIO -4 /* sent by AIO completion */ +#define LINUX_SI_SIGIO -5 /* sent by queued SIGIO */ +#define LINUX_SI_TKILL -6 /* sent by tkill system call */ -void linux_to_bsd_sigset(l_sigset_t *, sigset_t *); -void bsd_to_linux_sigset(sigset_t *, l_sigset_t *); int linux_do_sigaction(struct thread *, int, l_sigaction_t *, l_sigaction_t *); -void ksiginfo_to_lsiginfo(ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig); - -#define LINUX_SIG_VALID(sig) ((sig) <= LINUX_NSIG && (sig) > 0) - -#define BSD_TO_LINUX_SIGNAL(sig) \ - (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) +void ksiginfo_to_lsiginfo(const ksiginfo_t *ksi, l_siginfo_t *lsi, l_int sig); +void siginfo_to_lsiginfo(const siginfo_t *si, l_siginfo_t *lsi, l_int sig); +void lsiginfo_to_ksiginfo(const l_siginfo_t *lsi, ksiginfo_t *ksi, int sig); #endif /* _LINUX_SIGNAL_H_ */ diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c index 4b07b5c..34af9da 100644 --- a/sys/compat/linux/linux_socket.c +++ b/sys/compat/linux/linux_socket.c @@ -70,10 +70,17 @@ __FBSDID("$FreeBSD$"); #include <machine/../linux/linux.h> #include <machine/../linux/linux_proto.h> #endif +#include <compat/linux/linux_file.h> #include <compat/linux/linux_socket.h> +#include <compat/linux/linux_timer.h> #include <compat/linux/linux_util.h> static int linux_to_bsd_domain(int); +static int linux_sendmsg_common(struct thread *, l_int, struct l_msghdr *, + l_uint); +static int linux_recvmsg_common(struct thread *, l_int, struct l_msghdr *, + l_uint, struct msghdr *); +static int linux_set_socket_flags(int, int *); /* * Reads a linux sockaddr and does any necessary translation. @@ -428,7 +435,6 @@ linux_to_bsd_sockaddr(struct sockaddr *arg, int len) return (error); } - static int linux_sa_put(struct osockaddr *osa) { @@ -477,6 +483,8 @@ bsd_to_linux_cmsg_type(int cmsg_type) return (LINUX_SCM_RIGHTS); case SCM_CREDS: return (LINUX_SCM_CREDENTIALS); + case SCM_TIMESTAMP: + return (LINUX_SCM_TIMESTAMP); } return (-1); } @@ -529,20 +537,15 @@ bsd_to_linux_msghdr(const struct msghdr *bhdr, struct l_msghdr *lhdr) } static int -linux_set_socket_flags(struct thread *td, int s, int flags) +linux_set_socket_flags(int lflags, int *flags) { - int error; - if (flags & LINUX_SOCK_NONBLOCK) { - error = kern_fcntl(td, s, F_SETFL, O_NONBLOCK); - if (error) - return (error); - } - if (flags & LINUX_SOCK_CLOEXEC) { - error = kern_fcntl(td, s, F_SETFD, FD_CLOEXEC); - if (error) - return (error); - } + if (lflags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) + return (EINVAL); + if (lflags & LINUX_SOCK_NONBLOCK) + *flags |= SOCK_NONBLOCK; + if (lflags & LINUX_SOCK_CLOEXEC) + *flags |= SOCK_CLOEXEC; return (0); } @@ -585,15 +588,6 @@ linux_check_hdrincl(struct thread *td, int s) return (optval == 0); } -struct linux_sendto_args { - int s; - l_uintptr_t msg; - int len; - int flags; - l_uintptr_t to; - int tolen; -}; - /* * Updated sendto() when IP_HDRINCL is set: * tweak endian-dependent fields in the IP packet. @@ -618,7 +612,7 @@ linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) linux_args->len > IP_MAXPACKET) return (EINVAL); - packet = (struct ip *)malloc(linux_args->len, M_TEMP, M_WAITOK); + packet = (struct ip *)malloc(linux_args->len, M_LINUX, M_WAITOK); /* Make kernel copy of the packet to be sent */ if ((error = copyin(PTRIN(linux_args->msg), packet, @@ -641,17 +635,11 @@ linux_sendto_hdrincl(struct thread *td, struct linux_sendto_args *linux_args) error = linux_sendit(td, linux_args->s, &msg, linux_args->flags, NULL, UIO_SYSSPACE); goout: - free(packet, M_TEMP); + free(packet, M_LINUX); return (error); } -struct linux_socket_args { - int domain; - int type; - int protocol; -}; - -static int +int linux_socket(struct thread *td, struct linux_socket_args *args) { struct socket_args /* { @@ -659,15 +647,16 @@ linux_socket(struct thread *td, struct linux_socket_args *args) int type; int protocol; } */ bsd_args; - int retval_socket, socket_flags; + int retval_socket; bsd_args.protocol = args->protocol; - socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK; - if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) - return (EINVAL); bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK; if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX) return (EINVAL); + retval_socket = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, + &bsd_args.type); + if (retval_socket != 0) + return (retval_socket); bsd_args.domain = linux_to_bsd_domain(args->domain); if (bsd_args.domain == -1) return (EAFNOSUPPORT); @@ -676,13 +665,6 @@ linux_socket(struct thread *td, struct linux_socket_args *args) if (retval_socket) return (retval_socket); - retval_socket = linux_set_socket_flags(td, td->td_retval[0], - socket_flags); - if (retval_socket) { - (void)kern_close(td, td->td_retval[0]); - goto out; - } - if (bsd_args.type == SOCK_RAW && (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0) && bsd_args.domain == PF_INET) { @@ -711,17 +693,10 @@ linux_socket(struct thread *td, struct linux_socket_args *args) } #endif -out: return (retval_socket); } -struct linux_bind_args { - int s; - l_uintptr_t name; - int namelen; -}; - -static int +int linux_bind(struct thread *td, struct linux_bind_args *args) { struct sockaddr *sa; @@ -739,13 +714,6 @@ linux_bind(struct thread *td, struct linux_bind_args *args) return (error); } -struct linux_connect_args { - int s; - l_uintptr_t name; - int namelen; -}; -int linux_connect(struct thread *, struct linux_connect_args *); - int linux_connect(struct thread *td, struct linux_connect_args *args) { @@ -790,12 +758,7 @@ linux_connect(struct thread *td, struct linux_connect_args *args) return (error); } -struct linux_listen_args { - int s; - int backlog; -}; - -static int +int linux_listen(struct thread *td, struct linux_listen_args *args) { struct listen_args /* { @@ -812,43 +775,31 @@ static int linux_accept_common(struct thread *td, int s, l_uintptr_t addr, l_uintptr_t namelen, int flags) { - struct accept_args /* { + struct accept4_args /* { int s; struct sockaddr * __restrict name; socklen_t * __restrict anamelen; + int flags; } */ bsd_args; int error; - if (flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) - return (EINVAL); - bsd_args.s = s; /* XXX: */ bsd_args.name = (struct sockaddr * __restrict)PTRIN(addr); bsd_args.anamelen = PTRIN(namelen);/* XXX */ - error = sys_accept(td, &bsd_args); + bsd_args.flags = 0; + error = linux_set_socket_flags(flags, &bsd_args.flags); + if (error != 0) + return (error); + error = sys_accept4(td, &bsd_args); bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.name); if (error) { if (error == EFAULT && namelen != sizeof(struct sockaddr_in)) return (EINVAL); return (error); } - - /* - * linux appears not to copy flags from the parent socket to the - * accepted one, so we must clear the flags in the new descriptor - * and apply the requested flags. - */ - error = kern_fcntl(td, td->td_retval[0], F_SETFL, 0); - if (error) - goto out; - error = linux_set_socket_flags(td, td->td_retval[0], flags); - if (error) - goto out; if (addr) error = linux_sa_put(PTRIN(addr)); - -out: if (error) { (void)kern_close(td, td->td_retval[0]); td->td_retval[0] = 0; @@ -856,13 +807,7 @@ out: return (error); } -struct linux_accept_args { - int s; - l_uintptr_t addr; - l_uintptr_t namelen; -}; - -static int +int linux_accept(struct thread *td, struct linux_accept_args *args) { @@ -870,14 +815,7 @@ linux_accept(struct thread *td, struct linux_accept_args *args) args->namelen, 0)); } -struct linux_accept4_args { - int s; - l_uintptr_t addr; - l_uintptr_t namelen; - int flags; -}; - -static int +int linux_accept4(struct thread *td, struct linux_accept4_args *args) { @@ -885,13 +823,7 @@ linux_accept4(struct thread *td, struct linux_accept4_args *args) args->namelen, args->flags)); } -struct linux_getsockname_args { - int s; - l_uintptr_t addr; - l_uintptr_t namelen; -}; - -static int +int linux_getsockname(struct thread *td, struct linux_getsockname_args *args) { struct getsockname_args /* { @@ -915,13 +847,7 @@ linux_getsockname(struct thread *td, struct linux_getsockname_args *args) return (0); } -struct linux_getpeername_args { - int s; - l_uintptr_t addr; - l_uintptr_t namelen; -}; - -static int +int linux_getpeername(struct thread *td, struct linux_getpeername_args *args) { struct getpeername_args /* { @@ -944,14 +870,7 @@ linux_getpeername(struct thread *td, struct linux_getpeername_args *args) return (0); } -struct linux_socketpair_args { - int domain; - int type; - int protocol; - l_uintptr_t rsv; -}; - -static int +int linux_socketpair(struct thread *td, struct linux_socketpair_args *args) { struct socketpair_args /* { @@ -960,20 +879,18 @@ linux_socketpair(struct thread *td, struct linux_socketpair_args *args) int protocol; int *rsv; } */ bsd_args; - int error, socket_flags; - int sv[2]; + int error; bsd_args.domain = linux_to_bsd_domain(args->domain); if (bsd_args.domain != PF_LOCAL) return (EAFNOSUPPORT); - - socket_flags = args->type & ~LINUX_SOCK_TYPE_MASK; - if (socket_flags & ~(LINUX_SOCK_CLOEXEC | LINUX_SOCK_NONBLOCK)) - return (EINVAL); bsd_args.type = args->type & LINUX_SOCK_TYPE_MASK; if (bsd_args.type < 0 || bsd_args.type > LINUX_SOCK_MAX) return (EINVAL); - + error = linux_set_socket_flags(args->type & ~LINUX_SOCK_TYPE_MASK, + &bsd_args.type); + if (error != 0) + return (error); if (args->protocol != 0 && args->protocol != PF_UNIX) /* @@ -986,27 +903,10 @@ linux_socketpair(struct thread *td, struct linux_socketpair_args *args) else bsd_args.protocol = 0; bsd_args.rsv = (int *)PTRIN(args->rsv); - error = kern_socketpair(td, bsd_args.domain, bsd_args.type, - bsd_args.protocol, sv); - if (error) - return (error); - error = linux_set_socket_flags(td, sv[0], socket_flags); - if (error) - goto out; - error = linux_set_socket_flags(td, sv[1], socket_flags); - if (error) - goto out; - - error = copyout(sv, bsd_args.rsv, 2 * sizeof(int)); - -out: - if (error) { - (void)kern_close(td, sv[0]); - (void)kern_close(td, sv[1]); - } - return (error); + return (sys_socketpair(td, &bsd_args)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct linux_send_args { int s; l_uintptr_t msg; @@ -1062,8 +962,9 @@ linux_recv(struct thread *td, struct linux_recv_args *args) bsd_args.fromlenaddr = 0; return (sys_recvfrom(td, &bsd_args)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ -static int +int linux_sendto(struct thread *td, struct linux_sendto_args *args) { struct msghdr msg; @@ -1087,63 +988,58 @@ linux_sendto(struct thread *td, struct linux_sendto_args *args) return (error); } -struct linux_recvfrom_args { - int s; - l_uintptr_t buf; - int len; - int flags; - l_uintptr_t from; - l_uintptr_t fromlen; -}; - -static int +int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args) { - struct recvfrom_args /* { - int s; - caddr_t buf; - size_t len; - int flags; - struct sockaddr * __restrict from; - socklen_t * __restrict fromlenaddr; - } */ bsd_args; - size_t len; + struct msghdr msg; + struct iovec aiov; int error; - if ((error = copyin(PTRIN(args->fromlen), &len, sizeof(size_t)))) - return (error); + if (PTRIN(args->fromlen) != NULL) { + error = copyin(PTRIN(args->fromlen), &msg.msg_namelen, + sizeof(msg.msg_namelen)); + if (error != 0) + return (error); - bsd_args.s = args->s; - bsd_args.buf = PTRIN(args->buf); - bsd_args.len = args->len; - bsd_args.flags = linux_to_bsd_msg_flags(args->flags); - /* XXX: */ - bsd_args.from = (struct sockaddr * __restrict)PTRIN(args->from); - bsd_args.fromlenaddr = PTRIN(args->fromlen);/* XXX */ - - linux_to_bsd_sockaddr((struct sockaddr *)bsd_args.from, len); - error = sys_recvfrom(td, &bsd_args); - bsd_to_linux_sockaddr((struct sockaddr *)bsd_args.from); - - if (error) + error = linux_to_bsd_sockaddr((struct sockaddr *)PTRIN(args->from), + msg.msg_namelen); + if (error != 0) + return (error); + } else + msg.msg_namelen = 0; + + msg.msg_name = (struct sockaddr * __restrict)PTRIN(args->from); + msg.msg_iov = &aiov; + msg.msg_iovlen = 1; + aiov.iov_base = PTRIN(args->buf); + aiov.iov_len = args->len; + msg.msg_control = 0; + msg.msg_flags = linux_to_bsd_msg_flags(args->flags); + + error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, NULL); + if (error != 0) return (error); - if (args->from) { - error = linux_sa_put((struct osockaddr *) + + if (PTRIN(args->from) != NULL) { + error = bsd_to_linux_sockaddr((struct sockaddr *) PTRIN(args->from)); - if (error) + if (error != 0) return (error); + + error = linux_sa_put((struct osockaddr *) + PTRIN(args->from)); } - return (0); -} -struct linux_sendmsg_args { - int s; - l_uintptr_t msg; - int flags; -}; + if (PTRIN(args->fromlen) != NULL) + error = copyout(&msg.msg_namelen, PTRIN(args->fromlen), + sizeof(msg.msg_namelen)); + + return (error); +} static int -linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) +linux_sendmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, + l_uint flags) { struct cmsghdr *cmsg; struct cmsgcred cmcred; @@ -1159,8 +1055,8 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) void *data; int error; - error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg)); - if (error) + error = copyin(msghdr, &linux_msg, sizeof(linux_msg)); + if (error != 0) return (error); /* @@ -1174,7 +1070,7 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) linux_msg.msg_control = PTROUT(NULL); error = linux_to_bsd_msghdr(&msg, &linux_msg); - if (error) + if (error != 0) return (error); #ifdef COMPAT_LINUX32 @@ -1183,29 +1079,27 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) #else error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); #endif - if (error) + if (error != 0) return (error); control = NULL; cmsg = NULL; if ((ptr_cmsg = LINUX_CMSG_FIRSTHDR(&linux_msg)) != NULL) { - error = kern_getsockname(td, args->s, &sa, &datalen); - if (error) + error = kern_getsockname(td, s, &sa, &datalen); + if (error != 0) goto bad; sa_family = sa->sa_family; free(sa, M_SONAME); error = ENOBUFS; - cmsg = malloc(CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO); + cmsg = malloc(CMSG_HDRSZ, M_LINUX, M_WAITOK|M_ZERO); control = m_get(M_WAITOK, MT_CONTROL); - if (control == NULL) - goto bad; do { error = copyin(ptr_cmsg, &linux_cmsg, sizeof(struct l_cmsghdr)); - if (error) + if (error != 0) goto bad; error = EINVAL; @@ -1269,28 +1163,60 @@ linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) msg.msg_iov = iov; msg.msg_flags = 0; - error = linux_sendit(td, args->s, &msg, args->flags, control, - UIO_USERSPACE); + error = linux_sendit(td, s, &msg, flags, control, UIO_USERSPACE); bad: + m_freem(control); free(iov, M_IOV); if (cmsg) - free(cmsg, M_TEMP); + free(cmsg, M_LINUX); return (error); } -struct linux_recvmsg_args { - int s; - l_uintptr_t msg; - int flags; -}; +int +linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args) +{ + + return (linux_sendmsg_common(td, args->s, PTRIN(args->msg), + args->flags)); +} + +int +linux_sendmmsg(struct thread *td, struct linux_sendmmsg_args *args) +{ + struct l_mmsghdr *msg; + l_uint retval; + int error, datagrams; + + if (args->vlen > UIO_MAXIOV) + args->vlen = UIO_MAXIOV; + + msg = PTRIN(args->msg); + datagrams = 0; + while (datagrams < args->vlen) { + error = linux_sendmsg_common(td, args->s, &msg->msg_hdr, + args->flags); + if (error != 0) + break; + + retval = td->td_retval[0]; + error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); + if (error != 0) + break; + ++msg; + ++datagrams; + } + if (error == 0) + td->td_retval[0] = datagrams; + return (error); +} static int -linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) +linux_recvmsg_common(struct thread *td, l_int s, struct l_msghdr *msghdr, + l_uint flags, struct msghdr *msg) { struct cmsghdr *cm; struct cmsgcred *cmcred; - struct msghdr msg; struct l_cmsghdr *linux_cmsg = NULL; struct l_ucred linux_ucred; socklen_t datalen, outlen; @@ -1298,55 +1224,57 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) struct iovec *iov, *uiov; struct mbuf *control = NULL; struct mbuf **controlp; + struct timeval *ftmvl; + l_timeval ltmvl; caddr_t outbuf; void *data; int error, i, fd, fds, *fdp; - error = copyin(PTRIN(args->msg), &linux_msg, sizeof(linux_msg)); - if (error) + error = copyin(msghdr, &linux_msg, sizeof(linux_msg)); + if (error != 0) return (error); - error = linux_to_bsd_msghdr(&msg, &linux_msg); - if (error) + error = linux_to_bsd_msghdr(msg, &linux_msg); + if (error != 0) return (error); #ifdef COMPAT_LINUX32 - error = linux32_copyiniov(PTRIN(msg.msg_iov), msg.msg_iovlen, + error = linux32_copyiniov(PTRIN(msg->msg_iov), msg->msg_iovlen, &iov, EMSGSIZE); #else - error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); + error = copyiniov(msg->msg_iov, msg->msg_iovlen, &iov, EMSGSIZE); #endif - if (error) + if (error != 0) return (error); - if (msg.msg_name) { - error = linux_to_bsd_sockaddr((struct sockaddr *)msg.msg_name, - msg.msg_namelen); - if (error) + if (msg->msg_name) { + error = linux_to_bsd_sockaddr((struct sockaddr *)msg->msg_name, + msg->msg_namelen); + if (error != 0) goto bad; } - uiov = msg.msg_iov; - msg.msg_iov = iov; - controlp = (msg.msg_control != NULL) ? &control : NULL; - error = kern_recvit(td, args->s, &msg, UIO_USERSPACE, controlp); - msg.msg_iov = uiov; - if (error) + uiov = msg->msg_iov; + msg->msg_iov = iov; + controlp = (msg->msg_control != NULL) ? &control : NULL; + error = kern_recvit(td, s, msg, UIO_USERSPACE, controlp); + msg->msg_iov = uiov; + if (error != 0) goto bad; - error = bsd_to_linux_msghdr(&msg, &linux_msg); - if (error) + error = bsd_to_linux_msghdr(msg, &linux_msg); + if (error != 0) goto bad; if (linux_msg.msg_name) { error = bsd_to_linux_sockaddr((struct sockaddr *) PTRIN(linux_msg.msg_name)); - if (error) + if (error != 0) goto bad; } if (linux_msg.msg_name && linux_msg.msg_namelen > 2) { error = linux_sa_put(PTRIN(linux_msg.msg_name)); - if (error) + if (error != 0) goto bad; } @@ -1354,12 +1282,12 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) outlen = 0; if (control) { - linux_cmsg = malloc(L_CMSG_HDRSZ, M_TEMP, M_WAITOK | M_ZERO); + linux_cmsg = malloc(L_CMSG_HDRSZ, M_LINUX, M_WAITOK | M_ZERO); - msg.msg_control = mtod(control, struct cmsghdr *); - msg.msg_controllen = control->m_len; + msg->msg_control = mtod(control, struct cmsghdr *); + msg->msg_controllen = control->m_len; - cm = CMSG_FIRSTHDR(&msg); + cm = CMSG_FIRSTHDR(msg); while (cm != NULL) { linux_cmsg->cmsg_type = @@ -1379,7 +1307,7 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) switch (cm->cmsg_type) { case SCM_RIGHTS: - if (args->flags & LINUX_MSG_CMSG_CLOEXEC) { + if (flags & LINUX_MSG_CMSG_CLOEXEC) { fds = datalen / sizeof(int); fdp = data; for (i = 0; i < fds; i++) { @@ -1408,6 +1336,18 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) data = &linux_ucred; datalen = sizeof(linux_ucred); break; + + case SCM_TIMESTAMP: + if (datalen != sizeof(struct timeval)) { + error = EMSGSIZE; + goto bad; + } + ftmvl = (struct timeval *)data; + ltmvl.tv_sec = ftmvl->tv_sec; + ltmvl.tv_usec = ftmvl->tv_usec; + data = <mvl; + datalen = sizeof(ltmvl); + break; } if (outlen + LINUX_CMSG_LEN(datalen) > @@ -1436,28 +1376,92 @@ linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) outbuf += LINUX_CMSG_ALIGN(datalen); outlen += LINUX_CMSG_LEN(datalen); - cm = CMSG_NXTHDR(&msg, cm); + cm = CMSG_NXTHDR(msg, cm); } } out: linux_msg.msg_controllen = outlen; - error = copyout(&linux_msg, PTRIN(args->msg), sizeof(linux_msg)); + error = copyout(&linux_msg, msghdr, sizeof(linux_msg)); bad: free(iov, M_IOV); m_freem(control); - free(linux_cmsg, M_TEMP); + free(linux_cmsg, M_LINUX); return (error); } -struct linux_shutdown_args { - int s; - int how; -}; +int +linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args) +{ + struct msghdr bsd_msg; -static int + return (linux_recvmsg_common(td, args->s, PTRIN(args->msg), + args->flags, &bsd_msg)); +} + +int +linux_recvmmsg(struct thread *td, struct linux_recvmmsg_args *args) +{ + struct l_mmsghdr *msg; + struct msghdr bsd_msg; + struct l_timespec lts; + struct timespec ts, tts; + l_uint retval; + int error, datagrams; + + if (args->timeout) { + error = copyin(args->timeout, <s, sizeof(struct l_timespec)); + if (error != 0) + return (error); + error = linux_to_native_timespec(&ts, <s); + if (error != 0) + return (error); + getnanotime(&tts); + timespecadd(&tts, &ts); + } + + msg = PTRIN(args->msg); + datagrams = 0; + while (datagrams < args->vlen) { + error = linux_recvmsg_common(td, args->s, &msg->msg_hdr, + args->flags & ~LINUX_MSG_WAITFORONE, &bsd_msg); + if (error != 0) + break; + + retval = td->td_retval[0]; + error = copyout(&retval, &msg->msg_len, sizeof(msg->msg_len)); + if (error != 0) + break; + ++msg; + ++datagrams; + + /* + * MSG_WAITFORONE turns on MSG_DONTWAIT after one packet. + */ + if (args->flags & LINUX_MSG_WAITFORONE) + args->flags |= LINUX_MSG_DONTWAIT; + + /* + * See BUGS section of recvmmsg(2). + */ + if (args->timeout) { + getnanotime(&ts); + timespecsub(&ts, &tts); + if (!timespecisset(&ts) || ts.tv_sec > 0) + break; + } + /* Out of band data, return right away. */ + if (bsd_msg.msg_flags & MSG_OOB) + break; + } + if (error == 0) + td->td_retval[0] = datagrams; + return (error); +} + +int linux_shutdown(struct thread *td, struct linux_shutdown_args *args) { struct shutdown_args /* { @@ -1470,15 +1474,7 @@ linux_shutdown(struct thread *td, struct linux_shutdown_args *args) return (sys_shutdown(td, &bsd_args)); } -struct linux_setsockopt_args { - int s; - int level; - int optname; - l_uintptr_t optval; - int optlen; -}; - -static int +int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) { struct setsockopt_args /* { @@ -1543,15 +1539,7 @@ linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args) return (error); } -struct linux_getsockopt_args { - int s; - int level; - int optname; - l_uintptr_t optval; - l_uintptr_t optlen; -}; - -static int +int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) { struct getsockopt_args /* { @@ -1635,6 +1623,8 @@ linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args) return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) + /* Argument list sizes for linux_socketcall */ #define LINUX_AL(x) ((x) * sizeof(l_ulong)) @@ -1649,7 +1639,8 @@ static const unsigned char lxs_args[] = { LINUX_AL(6) /* recvfrom */, LINUX_AL(2) /* shutdown */, LINUX_AL(5) /* setsockopt */, LINUX_AL(5) /* getsockopt */, LINUX_AL(3) /* sendmsg */, LINUX_AL(3) /* recvmsg */, - LINUX_AL(4) /* accept4 */ + LINUX_AL(4) /* accept4 */, LINUX_AL(5) /* recvmmsg */, + LINUX_AL(4) /* sendmmsg */ }; #define LINUX_AL_SIZE sizeof(lxs_args) / sizeof(lxs_args[0]) - 1 @@ -1705,8 +1696,13 @@ linux_socketcall(struct thread *td, struct linux_socketcall_args *args) return (linux_recvmsg(td, arg)); case LINUX_ACCEPT4: return (linux_accept4(td, arg)); + case LINUX_RECVMMSG: + return (linux_recvmmsg(td, arg)); + case LINUX_SENDMMSG: + return (linux_sendmmsg(td, arg)); } uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what); return (ENOSYS); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ diff --git a/sys/compat/linux/linux_socket.h b/sys/compat/linux/linux_socket.h index e6efadb..b32a969 100644 --- a/sys/compat/linux/linux_socket.h +++ b/sys/compat/linux/linux_socket.h @@ -48,12 +48,36 @@ #define LINUX_MSG_RST 0x1000 #define LINUX_MSG_ERRQUEUE 0x2000 #define LINUX_MSG_NOSIGNAL 0x4000 +#define LINUX_MSG_WAITFORONE 0x10000 #define LINUX_MSG_CMSG_CLOEXEC 0x40000000 /* Socket-level control message types */ #define LINUX_SCM_RIGHTS 0x01 -#define LINUX_SCM_CREDENTIALS 0x02 +#define LINUX_SCM_CREDENTIALS 0x02 +#define LINUX_SCM_TIMESTAMP 0x1D + +struct l_msghdr { + l_uintptr_t msg_name; + l_int msg_namelen; + l_uintptr_t msg_iov; + l_size_t msg_iovlen; + l_uintptr_t msg_control; + l_size_t msg_controllen; + l_uint msg_flags; +}; + +struct l_mmsghdr { + struct l_msghdr msg_hdr; + l_uint msg_len; + +}; + +struct l_cmsghdr { + l_size_t cmsg_len; + l_int cmsg_level; + l_int cmsg_type; +}; /* Ancilliary data object information macros */ @@ -116,6 +140,133 @@ struct l_ucred { uint32_t gid; }; +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) + +struct linux_sendto_args { + int s; + l_uintptr_t msg; + int len; + int flags; + l_uintptr_t to; + int tolen; +}; + +struct linux_socket_args { + int domain; + int type; + int protocol; +}; + +struct linux_bind_args { + int s; + l_uintptr_t name; + int namelen; +}; + +struct linux_connect_args { + int s; + l_uintptr_t name; + int namelen; +}; + +struct linux_listen_args { + int s; + int backlog; +}; + +struct linux_accept_args { + int s; + l_uintptr_t addr; + l_uintptr_t namelen; +}; + +struct linux_accept4_args { + int s; + l_uintptr_t addr; + l_uintptr_t namelen; + int flags; +}; + +struct linux_getsockname_args { + int s; + l_uintptr_t addr; + l_uintptr_t namelen; +}; + +struct linux_getpeername_args { + int s; + l_uintptr_t addr; + l_uintptr_t namelen; +}; + +struct linux_socketpair_args { + int domain; + int type; + int protocol; + l_uintptr_t rsv; +}; + +struct linux_recvfrom_args { + int s; + l_uintptr_t buf; + int len; + int flags; + l_uintptr_t from; + l_uintptr_t fromlen; +}; + +struct linux_sendmsg_args { + int s; + l_uintptr_t msg; + int flags; +}; + +struct linux_recvmsg_args { + int s; + l_uintptr_t msg; + int flags; +}; + +struct linux_shutdown_args { + int s; + int how; +}; + +struct linux_setsockopt_args { + int s; + int level; + int optname; + l_uintptr_t optval; + int optlen; +}; + +struct linux_getsockopt_args { + int s; + int level; + int optname; + l_uintptr_t optval; + l_uintptr_t optlen; +}; + +int linux_socket(struct thread *td, struct linux_socket_args *args); +int linux_bind(struct thread *td, struct linux_bind_args *args); +int linux_connect(struct thread *, struct linux_connect_args *); +int linux_listen(struct thread *td, struct linux_listen_args *args); +int linux_accept(struct thread *td, struct linux_accept_args *args); +int linux_accept4(struct thread *td, struct linux_accept4_args *args); +int linux_getsockname(struct thread *td, struct linux_getsockname_args *args); +int linux_getpeername(struct thread *td, struct linux_getpeername_args *args); +int linux_socketpair(struct thread *td, struct linux_socketpair_args *args); +int linux_sendto(struct thread *td, struct linux_sendto_args *args); +int linux_recvfrom(struct thread *td, struct linux_recvfrom_args *args); +int linux_sendmsg(struct thread *td, struct linux_sendmsg_args *args); +int linux_recvmsg(struct thread *td, struct linux_recvmsg_args *args); +int linux_shutdown(struct thread *td, struct linux_shutdown_args *args); +int linux_setsockopt(struct thread *td, struct linux_setsockopt_args *args); +int linux_getsockopt(struct thread *td, struct linux_getsockopt_args *args); + +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ + /* Operations for socketcall */ #define LINUX_SOCKET 1 @@ -136,6 +287,8 @@ struct l_ucred { #define LINUX_SENDMSG 16 #define LINUX_RECVMSG 17 #define LINUX_ACCEPT4 18 +#define LINUX_RECVMMSG 19 +#define LINUX_SENDMMSG 20 /* Socket options */ #define LINUX_IP_TOS 1 diff --git a/sys/compat/linux/linux_stats.c b/sys/compat/linux/linux_stats.c index 2e05c85..f96acc0 100644 --- a/sys/compat/linux/linux_stats.c +++ b/sys/compat/linux/linux_stats.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include <sys/param.h> +#include <sys/capsicum.h> #include <sys/dirent.h> #include <sys/file.h> #include <sys/filedesc.h> @@ -58,7 +59,6 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux_util.h> #include <compat/linux/linux_file.h> -#define LINUX_SHMFS_MAGIC 0x01021994 static void translate_vnhook_major_minor(struct vnode *vp, struct stat *sb) @@ -251,6 +251,7 @@ linux_newfstat(struct thread *td, struct linux_newfstat_args *args) return (error); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat_copyout(struct stat *buf, void *ubuf) { @@ -325,19 +326,19 @@ linux_lstat(struct thread *td, struct linux_lstat_args *args) LFREEPATH(path); return(stat_copyout(&buf, args->up)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ -/* XXX - All fields of type l_int are defined as l_long on i386 */ struct l_statfs { - l_int f_type; - l_int f_bsize; - l_int f_blocks; - l_int f_bfree; - l_int f_bavail; - l_int f_files; - l_int f_ffree; + l_long f_type; + l_long f_bsize; + l_long f_blocks; + l_long f_bfree; + l_long f_bavail; + l_long f_files; + l_long f_ffree; l_fsid_t f_fsid; - l_int f_namelen; - l_int f_spare[6]; + l_long f_namelen; + l_long f_spare[6]; }; #define LINUX_CODA_SUPER_MAGIC 0x73757245L @@ -351,6 +352,7 @@ struct l_statfs { #define LINUX_PROC_SUPER_MAGIC 0x9fa0L #define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */ #define LINUX_DEVFS_SUPER_MAGIC 0x1373L +#define LINUX_SHMFS_MAGIC 0x01021994 static long bsd_to_linux_ftype(const char *fstypename) @@ -368,6 +370,7 @@ bsd_to_linux_ftype(const char *fstypename) {"hpfs", LINUX_HPFS_SUPER_MAGIC}, {"coda", LINUX_CODA_SUPER_MAGIC}, {"devfs", LINUX_DEVFS_SUPER_MAGIC}, + {"tmpfs", LINUX_SHMFS_MAGIC}, {NULL, 0L}}; for (i = 0; b2l_tbl[i].bsd_name != NULL; i++) @@ -399,7 +402,7 @@ linux_statfs(struct thread *td, struct linux_statfs_args *args) struct l_statfs linux_statfs; struct statfs bsd_statfs; char *path; - int error, dev_shm; + int error; LCONVPATHEXIST(td, args->path, &path); @@ -407,20 +410,15 @@ linux_statfs(struct thread *td, struct linux_statfs_args *args) if (ldebug(statfs)) printf(ARGS(statfs, "%s, *"), path); #endif - dev_shm = 0; error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs); - if (strncmp(path, "/dev/shm", sizeof("/dev/shm") - 1) == 0) - dev_shm = (path[8] == '\0' - || (path[8] == '/' && path[9] == '\0')); LFREEPATH(path); if (error) return (error); bsd_to_linux_statfs(&bsd_statfs, &linux_statfs); - if (dev_shm) - linux_statfs.f_type = LINUX_SHMFS_MAGIC; return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static void bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs) { @@ -461,6 +459,7 @@ linux_statfs64(struct thread *td, struct linux_statfs64_args *args) bsd_to_linux_statfs64(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } +#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ int linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) @@ -493,7 +492,7 @@ linux_ustat(struct thread *td, struct linux_ustat_args *args) { #ifdef DEBUG if (ldebug(ustat)) - printf(ARGS(ustat, "%d, *"), args->dev); + printf(ARGS(ustat, "%ju, *"), (uintmax_t)args->dev); #endif return (EOPNOTSUPP); @@ -624,4 +623,74 @@ linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args) return (error); } +#else /* __amd64__ && !COMPAT_LINUX32 */ + +int +linux_newfstatat(struct thread *td, struct linux_newfstatat_args *args) +{ + char *path; + int error, dfd, flag; + struct stat buf; + + if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) + return (EINVAL); + flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? + AT_SYMLINK_NOFOLLOW : 0; + + dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; + LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); + +#ifdef DEBUG + if (ldebug(newfstatat)) + printf(ARGS(newfstatat, "%i, %s, %i"), args->dfd, path, args->flag); +#endif + + error = linux_kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); + if (error == 0) + error = newstat_copyout(&buf, args->statbuf); + LFREEPATH(path); + + return (error); +} + #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ + +int +linux_syncfs(struct thread *td, struct linux_syncfs_args *args) +{ + cap_rights_t rights; + struct mount *mp; + struct vnode *vp; + int error, save; + + error = fgetvp(td, args->fd, cap_rights_init(&rights, CAP_FSYNC), &vp); + if (error != 0) + /* + * Linux syncfs() returns only EBADF, however fgetvp() + * can return EINVAL in case of file descriptor does + * not represent a vnode. XXX. + */ + return (error); + + mp = vp->v_mount; + mtx_lock(&mountlist_mtx); + error = vfs_busy(mp, MBF_MNTLSTLOCK); + if (error != 0) { + /* See comment above. */ + mtx_unlock(&mountlist_mtx); + goto out; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0 && + vn_start_write(NULL, &mp, V_NOWAIT) == 0) { + save = curthread_pflags_set(TDP_SYNCIO); + vfs_msync(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_NOWAIT); + curthread_pflags_restore(save); + vn_finished_write(mp); + } + vfs_unbusy(mp); + + out: + vrele(vp); + return (error); +} diff --git a/sys/compat/linux/linux_sysctl.c b/sys/compat/linux/linux_sysctl.c index decd8f8..27b7a3d 100644 --- a/sys/compat/linux/linux_sysctl.c +++ b/sys/compat/linux/linux_sysctl.c @@ -141,12 +141,12 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) return (ENOTDIR); } - mib = malloc(la.nlen * sizeof(l_int), M_TEMP, M_WAITOK); + mib = malloc(la.nlen * sizeof(l_int), M_LINUX, M_WAITOK); error = copyin(PTRIN(la.name), mib, la.nlen * sizeof(l_int)); if (error) { LIN_SDT_PROBE1(sysctl, linux_sysctl, copyin_error, error); LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error); - free(mib, M_TEMP); + free(mib, M_LINUX); return (error); } @@ -158,7 +158,7 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) switch (mib[1]) { case LINUX_KERN_VERSION: error = handle_string(&la, version); - free(mib, M_TEMP); + free(mib, M_LINUX); LIN_SDT_PROBE1(sysctl, linux_sysctl, return, error); return (error); default: @@ -187,7 +187,7 @@ linux_sysctl(struct thread *td, struct linux_sysctl_args *args) sbuf_delete(sb); } - free(mib, M_TEMP); + free(mib, M_LINUX); LIN_SDT_PROBE1(sysctl, linux_sysctl, return, ENOTDIR); return (ENOTDIR); diff --git a/sys/compat/linux/linux_time.c b/sys/compat/linux/linux_time.c index e03af00..663ac92 100644 --- a/sys/compat/linux/linux_time.c +++ b/sys/compat/linux/linux_time.c @@ -40,8 +40,11 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp #include <sys/param.h> #include <sys/kernel.h> +#include <sys/lock.h> #include <sys/ucred.h> #include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/resourcevar.h> #include <sys/sdt.h> #include <sys/signal.h> #include <sys/stdint.h> @@ -60,7 +63,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux_time.c,v 1.14 2006/05/14 03:40:54 christos Exp #endif #include <compat/linux/linux_dtrace.h> -#include <compat/linux/linux_misc.h> +#include <compat/linux/linux_timer.h> /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); @@ -103,27 +106,20 @@ LIN_SDT_PROBE_DEFINE1(time, linux_clock_getres, return, "int"); LIN_SDT_PROBE_DEFINE2(time, linux_nanosleep, entry, "const struct l_timespec *", "struct l_timespec *"); LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, conversion_error, "int"); -LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, nanosleep_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_nanosleep, return, "int"); LIN_SDT_PROBE_DEFINE4(time, linux_clock_nanosleep, entry, "clockid_t", "int", "struct l_timespec *", "struct l_timespec *"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, conversion_error, "int"); -LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, nanosleep_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_flags, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, unsupported_clockid, "int"); LIN_SDT_PROBE_DEFINE1(time, linux_clock_nanosleep, return, "int"); -static void native_to_linux_timespec(struct l_timespec *, - struct timespec *); -static int linux_to_native_timespec(struct timespec *, - struct l_timespec *); -static int linux_to_native_clockid(clockid_t *, clockid_t); -static void +void native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp) { @@ -135,7 +131,7 @@ native_to_linux_timespec(struct l_timespec *ltp, struct timespec *ntp) LIN_SDT_PROBE0(time, native_to_linux_timespec, return); } -static int +int linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp) { @@ -152,12 +148,26 @@ linux_to_native_timespec(struct timespec *ntp, struct l_timespec *ltp) return (0); } -static int +int linux_to_native_clockid(clockid_t *n, clockid_t l) { LIN_SDT_PROBE2(time, linux_to_native_clockid, entry, n, l); + if (l < 0) { + /* cpu-clock */ + if ((l & LINUX_CLOCKFD_MASK) == LINUX_CLOCKFD) + return (EINVAL); + if (LINUX_CPUCLOCK_WHICH(l) >= LINUX_CPUCLOCK_MAX) + return (EINVAL); + + if (LINUX_CPUCLOCK_PERTHREAD(l)) + *n = CLOCK_THREAD_CPUTIME_ID; + else + *n = CLOCK_PROCESS_CPUTIME_ID; + return (0); + } + switch (l) { case LINUX_CLOCK_REALTIME: *n = CLOCK_REALTIME; @@ -165,21 +175,27 @@ linux_to_native_clockid(clockid_t *n, clockid_t l) case LINUX_CLOCK_MONOTONIC: *n = CLOCK_MONOTONIC; break; - case LINUX_CLOCK_PROCESS_CPUTIME_ID: - case LINUX_CLOCK_THREAD_CPUTIME_ID: - case LINUX_CLOCK_REALTIME_HR: - case LINUX_CLOCK_MONOTONIC_HR: + case LINUX_CLOCK_REALTIME_COARSE: + *n = CLOCK_REALTIME_FAST; + break; + case LINUX_CLOCK_MONOTONIC_COARSE: + *n = CLOCK_MONOTONIC_FAST; + break; + case LINUX_CLOCK_MONOTONIC_RAW: + case LINUX_CLOCK_BOOTTIME: + case LINUX_CLOCK_REALTIME_ALARM: + case LINUX_CLOCK_BOOTTIME_ALARM: + case LINUX_CLOCK_SGI_CYCLE: + case LINUX_CLOCK_TAI: LIN_SDT_PROBE1(time, linux_to_native_clockid, unsupported_clockid, l); LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL); return (EINVAL); - break; default: LIN_SDT_PROBE1(time, linux_to_native_clockid, unknown_clockid, l); LIN_SDT_PROBE1(time, linux_to_native_clockid, return, EINVAL); return (EINVAL); - break; } LIN_SDT_PROBE1(time, linux_to_native_clockid, return, 0); @@ -190,9 +206,14 @@ int linux_clock_gettime(struct thread *td, struct linux_clock_gettime_args *args) { struct l_timespec lts; - int error; - clockid_t nwhich = 0; /* XXX: GCC */ struct timespec tp; + struct rusage ru; + struct thread *targettd; + struct proc *p; + int error, clockwhich; + clockid_t nwhich = 0; /* XXX: GCC */ + pid_t pid; + lwpid_t tid; LIN_SDT_PROBE2(time, linux_clock_gettime, entry, args->which, args->tp); @@ -203,7 +224,100 @@ linux_clock_gettime(struct thread *td, struct linux_clock_gettime_args *args) LIN_SDT_PROBE1(time, linux_clock_gettime, return, error); return (error); } - error = kern_clock_gettime(td, nwhich, &tp); + + switch (nwhich) { + case CLOCK_PROCESS_CPUTIME_ID: + clockwhich = LINUX_CPUCLOCK_WHICH(args->which); + pid = LINUX_CPUCLOCK_ID(args->which); + if (pid == 0) { + p = td->td_proc; + PROC_LOCK(p); + } else { + error = pget(pid, PGET_CANSEE, &p); + if (error != 0) + return (EINVAL); + } + switch (clockwhich) { + case LINUX_CPUCLOCK_PROF: + PROC_STATLOCK(p); + calcru(p, &ru.ru_utime, &ru.ru_stime); + PROC_STATUNLOCK(p); + PROC_UNLOCK(p); + timevaladd(&ru.ru_utime, &ru.ru_stime); + TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp); + break; + case LINUX_CPUCLOCK_VIRT: + PROC_STATLOCK(p); + calcru(p, &ru.ru_utime, &ru.ru_stime); + PROC_STATUNLOCK(p); + PROC_UNLOCK(p); + TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp); + break; + case LINUX_CPUCLOCK_SCHED: + PROC_UNLOCK(p); + error = kern_clock_getcpuclockid2(td, pid, + CPUCLOCK_WHICH_PID, &nwhich); + if (error != 0) + return (EINVAL); + error = kern_clock_gettime(td, nwhich, &tp); + break; + default: + PROC_UNLOCK(p); + return (EINVAL); + } + + break; + + case CLOCK_THREAD_CPUTIME_ID: + clockwhich = LINUX_CPUCLOCK_WHICH(args->which); + p = td->td_proc; + tid = LINUX_CPUCLOCK_ID(args->which); + if (tid == 0) { + targettd = td; + PROC_LOCK(p); + } else { + targettd = tdfind(tid, p->p_pid); + if (targettd == NULL) + return (EINVAL); + } + switch (clockwhich) { + case LINUX_CPUCLOCK_PROF: + PROC_STATLOCK(p); + thread_lock(targettd); + rufetchtd(targettd, &ru); + thread_unlock(targettd); + PROC_STATUNLOCK(p); + PROC_UNLOCK(p); + timevaladd(&ru.ru_utime, &ru.ru_stime); + TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp); + break; + case LINUX_CPUCLOCK_VIRT: + PROC_STATLOCK(p); + thread_lock(targettd); + rufetchtd(targettd, &ru); + thread_unlock(targettd); + PROC_STATUNLOCK(p); + PROC_UNLOCK(p); + TIMEVAL_TO_TIMESPEC(&ru.ru_utime, &tp); + break; + case LINUX_CPUCLOCK_SCHED: + error = kern_clock_getcpuclockid2(td, tid, + CPUCLOCK_WHICH_TID, &nwhich); + PROC_UNLOCK(p); + if (error != 0) + return (EINVAL); + error = kern_clock_gettime(td, nwhich, &tp); + break; + default: + PROC_UNLOCK(p); + return (EINVAL); + } + break; + + default: + error = kern_clock_gettime(td, nwhich, &tp); + break; + } if (error != 0) { LIN_SDT_PROBE1(time, linux_clock_gettime, gettime_error, error); LIN_SDT_PROBE1(time, linux_clock_gettime, return, error); @@ -261,19 +375,16 @@ linux_clock_settime(struct thread *td, struct linux_clock_settime_args *args) int linux_clock_getres(struct thread *td, struct linux_clock_getres_args *args) { + struct proc *p; struct timespec ts; struct l_timespec lts; - int error; + int error, clockwhich; clockid_t nwhich = 0; /* XXX: GCC */ + pid_t pid; + lwpid_t tid; LIN_SDT_PROBE2(time, linux_clock_getres, entry, args->which, args->tp); - if (args->tp == NULL) { - LIN_SDT_PROBE0(time, linux_clock_getres, nullcall); - LIN_SDT_PROBE1(time, linux_clock_getres, return, 0); - return (0); - } - error = linux_to_native_clockid(&nwhich, args->which); if (error != 0) { LIN_SDT_PROBE1(time, linux_clock_getres, conversion_error, @@ -281,6 +392,59 @@ linux_clock_getres(struct thread *td, struct linux_clock_getres_args *args) LIN_SDT_PROBE1(time, linux_clock_getres, return, error); return (error); } + + /* + * Check user supplied clock id in case of per-process + * or thread-specific cpu-time clock. + */ + switch (nwhich) { + case CLOCK_THREAD_CPUTIME_ID: + tid = LINUX_CPUCLOCK_ID(args->which); + if (tid != 0) { + p = td->td_proc; + if (tdfind(tid, p->p_pid) == NULL) + return (ESRCH); + PROC_UNLOCK(p); + } + break; + case CLOCK_PROCESS_CPUTIME_ID: + pid = LINUX_CPUCLOCK_ID(args->which); + if (pid != 0) { + error = pget(pid, PGET_CANSEE, &p); + if (error != 0) + return (EINVAL); + PROC_UNLOCK(p); + } + break; + } + + if (args->tp == NULL) { + LIN_SDT_PROBE0(time, linux_clock_getres, nullcall); + LIN_SDT_PROBE1(time, linux_clock_getres, return, 0); + return (0); + } + + switch (nwhich) { + case CLOCK_THREAD_CPUTIME_ID: + case CLOCK_PROCESS_CPUTIME_ID: + clockwhich = LINUX_CPUCLOCK_WHICH(args->which); + switch (clockwhich) { + case LINUX_CPUCLOCK_PROF: + nwhich = CLOCK_PROF; + break; + case LINUX_CPUCLOCK_VIRT: + nwhich = CLOCK_VIRTUAL; + break; + case LINUX_CPUCLOCK_SCHED: + break; + default: + return (EINVAL); + } + break; + + default: + break; + } error = kern_clock_getres(td, nwhich, &ts); if (error != 0) { LIN_SDT_PROBE1(time, linux_clock_getres, getres_error, error); @@ -303,7 +467,7 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args) struct timespec *rmtp; struct l_timespec lrqts, lrmts; struct timespec rqts, rmts; - int error; + int error, error2; LIN_SDT_PROBE2(time, linux_nanosleep, entry, args->rqtp, args->rmtp); @@ -315,9 +479,9 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args) } if (args->rmtp != NULL) - rmtp = &rmts; + rmtp = &rmts; else - rmtp = NULL; + rmtp = NULL; error = linux_to_native_timespec(&rqts, &lrqts); if (error != 0) { @@ -326,25 +490,19 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *args) return (error); } error = kern_nanosleep(td, &rqts, rmtp); - if (error != 0) { - LIN_SDT_PROBE1(time, linux_nanosleep, nanosleep_error, error); - LIN_SDT_PROBE1(time, linux_nanosleep, return, error); - return (error); - } - if (args->rmtp != NULL) { - native_to_linux_timespec(&lrmts, rmtp); - error = copyout(&lrmts, args->rmtp, sizeof(lrmts)); - if (error != 0) { + native_to_linux_timespec(&lrmts, rmtp); + error2 = copyout(&lrmts, args->rmtp, sizeof(lrmts)); + if (error2 != 0) { LIN_SDT_PROBE1(time, linux_nanosleep, copyout_error, - error); - LIN_SDT_PROBE1(time, linux_nanosleep, return, error); - return (error); + error2); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error2); + return (error2); } } - LIN_SDT_PROBE1(time, linux_nanosleep, return, 0); - return (0); + LIN_SDT_PROBE1(time, linux_nanosleep, return, error); + return (error); } int @@ -353,7 +511,7 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args struct timespec *rmtp; struct l_timespec lrqts, lrmts; struct timespec rqts, rmts; - int error; + int error, error2; LIN_SDT_PROBE4(time, linux_clock_nanosleep, entry, args->which, args->flags, args->rqtp, args->rmtp); @@ -373,7 +531,7 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args return (EINVAL); } - error = copyin(args->rqtp, &lrqts, sizeof lrqts); + error = copyin(args->rqtp, &lrqts, sizeof(lrqts)); if (error != 0) { LIN_SDT_PROBE1(time, linux_clock_nanosleep, copyin_error, error); @@ -382,9 +540,9 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args } if (args->rmtp != NULL) - rmtp = &rmts; + rmtp = &rmts; else - rmtp = NULL; + rmtp = NULL; error = linux_to_native_timespec(&rqts, &lrqts); if (error != 0) { @@ -394,24 +552,19 @@ linux_clock_nanosleep(struct thread *td, struct linux_clock_nanosleep_args *args return (error); } error = kern_nanosleep(td, &rqts, rmtp); - if (error != 0) { - LIN_SDT_PROBE1(time, linux_clock_nanosleep, nanosleep_error, - error); - LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error); - return (error); - } - if (args->rmtp != NULL) { + /* XXX. Not for TIMER_ABSTIME */ native_to_linux_timespec(&lrmts, rmtp); - error = copyout(&lrmts, args->rmtp, sizeof lrmts ); - if (error != 0) { + error2 = copyout(&lrmts, args->rmtp, sizeof(lrmts)); + if (error2 != 0) { + LIN_SDT_PROBE1(time, linux_clock_nanosleep, + copyout_error, error2); LIN_SDT_PROBE1(time, linux_clock_nanosleep, - copyout_error, error); - LIN_SDT_PROBE1(time, linux_nanosleep, return, error); - return (error); + return, error2); + return (error2); } } - LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, 0); - return (0); + LIN_SDT_PROBE1(time, linux_clock_nanosleep, return, error); + return (error); } diff --git a/sys/compat/linux/linux_timer.c b/sys/compat/linux/linux_timer.c index 92dae4c..7dbddbe 100644 --- a/sys/compat/linux/linux_timer.c +++ b/sys/compat/linux/linux_timer.c @@ -49,23 +49,6 @@ __FBSDID("$FreeBSD$"); #endif #include <compat/linux/linux_timer.h> -static int -linux_convert_l_clockid(clockid_t *clock_id) -{ - - switch (*clock_id) { - case LINUX_CLOCK_REALTIME: - *clock_id = CLOCK_REALTIME; - break; - case LINUX_CLOCK_MONOTONIC: - *clock_id = CLOCK_MONOTONIC; - break; - default: - return (EINVAL); - } - - return (0); -} static int linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig) @@ -75,7 +58,7 @@ linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig) switch (l_sig->sigev_notify) { case L_SIGEV_SIGNAL: sig->sigev_notify = SIGEV_SIGNAL; - CP(*l_sig, *sig, sigev_signo); + sig->sigev_signo = linux_to_bsd_signal(l_sig->sigev_signo); PTRIN_CP(*l_sig, *sig, sigev_value.sival_ptr); break; case L_SIGEV_NONE: @@ -92,7 +75,7 @@ linux_convert_l_sigevent(struct l_sigevent *l_sig, struct sigevent *sig) case L_SIGEV_THREAD_ID: sig->sigev_notify = SIGEV_THREAD_ID; CP2(*l_sig, *sig, _l_sigev_un._tid, sigev_notify_thread_id); - CP(*l_sig, *sig, sigev_signo); + sig->sigev_signo = linux_to_bsd_signal(l_sig->sigev_signo); PTRIN_CP(*l_sig, *sig, sigev_value.sival_ptr); break; default: @@ -106,6 +89,7 @@ linux_timer_create(struct thread *td, struct linux_timer_create_args *uap) { struct l_sigevent l_ev; struct sigevent ev, *evp; + clockid_t nwhich; int error, id; if (uap->evp == NULL) { @@ -119,10 +103,10 @@ linux_timer_create(struct thread *td, struct linux_timer_create_args *uap) return (error); evp = &ev; } - error = linux_convert_l_clockid(&uap->clock_id); + error = linux_to_native_clockid(&nwhich, uap->clock_id); if (error != 0) return (error); - error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); + error = kern_ktimer_create(td, nwhich, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) @@ -179,4 +163,3 @@ linux_timer_delete(struct thread *td, struct linux_timer_delete_args *uap) return (kern_ktimer_delete(td, uap->timerid)); } - diff --git a/sys/compat/linux/linux_timer.h b/sys/compat/linux/linux_timer.h index 4f64ee5..c79c08d 100644 --- a/sys/compat/linux/linux_timer.h +++ b/sys/compat/linux/linux_timer.h @@ -56,6 +56,23 @@ #define LINUX_CLOCK_SGI_CYCLE 10 #define LINUX_CLOCK_TAI 11 +#define LINUX_CPUCLOCK_PERTHREAD_MASK 4 +#define LINUX_CPUCLOCK_MASK 3 +#define LINUX_CPUCLOCK_WHICH(clock) \ + ((clock) & (clockid_t) LINUX_CPUCLOCK_MASK) +#define LINUX_CPUCLOCK_PROF 0 +#define LINUX_CPUCLOCK_VIRT 1 +#define LINUX_CPUCLOCK_SCHED 2 +#define LINUX_CPUCLOCK_MAX 3 +#define LINUX_CLOCKFD LINUX_CPUCLOCK_MAX +#define LINUX_CLOCKFD_MASK \ + (LINUX_CPUCLOCK_PERTHREAD_MASK|LINUX_CPUCLOCK_MASK) + +#define LINUX_CPUCLOCK_ID(clock) ((pid_t) ~((clock) >> 3)) +#define LINUX_CPUCLOCK_PERTHREAD(clock) \ + (((clock) & (clockid_t) LINUX_CPUCLOCK_PERTHREAD_MASK) != 0) + + #define L_SIGEV_SIGNAL 0 #define L_SIGEV_NONE 1 #define L_SIGEV_THREAD 2 @@ -94,4 +111,10 @@ struct l_itimerspec { struct l_timespec it_value; }; +void native_to_linux_timespec(struct l_timespec *, + struct timespec *); +int linux_to_native_timespec(struct timespec *, + struct l_timespec *); +int linux_to_native_clockid(clockid_t *, clockid_t); + #endif /* _LINUX_TIMER_H */ diff --git a/sys/compat/linux/linux_uid16.c b/sys/compat/linux/linux_uid16.c index a2c3214..9acc047 100644 --- a/sys/compat/linux/linux_uid16.c +++ b/sys/compat/linux/linux_uid16.c @@ -172,12 +172,12 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) LIN_SDT_PROBE1(uid16, linux_setgroups16, return, EINVAL); return (EINVAL); } - linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_TEMP, M_WAITOK); + linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); error = copyin(args->gidset, linux_gidset, ngrp * sizeof(l_gid16_t)); if (error) { LIN_SDT_PROBE1(uid16, linux_setgroups16, copyin_error, error); LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); - free(linux_gidset, M_TEMP); + free(linux_gidset, M_LINUX); return (error); } newcred = crget(); @@ -219,7 +219,7 @@ linux_setgroups16(struct thread *td, struct linux_setgroups16_args *args) crfree(oldcred); error = 0; out: - free(linux_gidset, M_TEMP); + free(linux_gidset, M_LINUX); LIN_SDT_PROBE1(uid16, linux_setgroups16, return, error); return (error); @@ -260,14 +260,14 @@ linux_getgroups16(struct thread *td, struct linux_getgroups16_args *args) ngrp = 0; linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), - M_TEMP, M_WAITOK); + M_LINUX, M_WAITOK); while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } error = copyout(linux_gidset, args->gidset, ngrp * sizeof(l_gid16_t)); - free(linux_gidset, M_TEMP); + free(linux_gidset, M_LINUX); if (error) { LIN_SDT_PROBE1(uid16, linux_getgroups16, copyout_error, error); LIN_SDT_PROBE1(uid16, linux_getgroups16, return, error); diff --git a/sys/compat/linux/linux_util.c b/sys/compat/linux/linux_util.c index 76c210c..fe49120 100644 --- a/sys/compat/linux/linux_util.c +++ b/sys/compat/linux/linux_util.c @@ -53,48 +53,14 @@ __FBSDID("$FreeBSD$"); #include <machine/stdarg.h> #include <compat/linux/linux_util.h> -#ifdef COMPAT_LINUX32 -#include <machine/../linux32/linux.h> -#else -#include <machine/../linux/linux.h> -#endif -#include <compat/linux/linux_dtrace.h> +MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); +MALLOC_DEFINE(M_EPOLL, "lepoll", "Linux events structures"); +MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes"); +MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futex waiting proc"); const char linux_emul_path[] = "/compat/linux"; -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/** - * DTrace probes in this module. - */ -LIN_SDT_PROBE_DEFINE5(util, linux_emul_convpath, entry, "const char *", - "enum uio_seg", "char **", "int", "int"); -LIN_SDT_PROBE_DEFINE1(util, linux_emul_convpath, return, "int"); -LIN_SDT_PROBE_DEFINE1(util, linux_msg, entry, "const char *"); -LIN_SDT_PROBE_DEFINE0(util, linux_msg, return); -LIN_SDT_PROBE_DEFINE2(util, linux_driver_get_name_dev, entry, "device_t", - "const char *"); -LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_name_dev, nullcall); -LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_name_dev, return, "char *"); -LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, entry, "char *", - "int *", "int *"); -LIN_SDT_PROBE_DEFINE0(util, linux_driver_get_major_minor, nullcall); -LIN_SDT_PROBE_DEFINE1(util, linux_driver_get_major_minor, notfound, "char *"); -LIN_SDT_PROBE_DEFINE3(util, linux_driver_get_major_minor, return, "int", - "int", "int"); -LIN_SDT_PROBE_DEFINE0(util, linux_get_char_devices, entry); -LIN_SDT_PROBE_DEFINE1(util, linux_get_char_devices, return, "char *"); -LIN_SDT_PROBE_DEFINE1(util, linux_free_get_char_devices, entry, "char *"); -LIN_SDT_PROBE_DEFINE0(util, linux_free_get_char_devices, return); -LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, entry, - "struct linux_device_handler *"); -LIN_SDT_PROBE_DEFINE1(util, linux_device_register_handler, return, "int"); -LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, entry, - "struct linux_device_handler *"); -LIN_SDT_PROBE_DEFINE1(util, linux_device_unregister_handler, return, "int"); - /* * Search an alternate path before passing pathname arguments on to * system calls. Useful for keeping a separate 'emulation tree'. @@ -108,13 +74,9 @@ linux_emul_convpath(struct thread *td, const char *path, enum uio_seg pathseg, { int retval; - LIN_SDT_PROBE5(util, linux_emul_convpath, entry, path, pathseg, pbuf, - cflag, dfd); - retval = kern_alternate_path(td, linux_emul_path, path, pathseg, pbuf, cflag, dfd); - LIN_SDT_PROBE1(util, linux_emul_convpath, return, retval); return (retval); } @@ -124,16 +86,12 @@ linux_msg(const struct thread *td, const char *fmt, ...) va_list ap; struct proc *p; - LIN_SDT_PROBE1(util, linux_msg, entry, fmt); - p = td->td_proc; printf("linux: pid %d (%s): ", (int)p->p_pid, p->p_comm); va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); - - LIN_SDT_PROBE0(util, linux_msg, return); } struct device_element @@ -156,24 +114,14 @@ linux_driver_get_name_dev(device_t dev) struct device_element *de; const char *device_name = device_get_name(dev); - LIN_SDT_PROBE2(util, linux_driver_get_name_dev, entry, dev, - device_name); - - if (device_name == NULL) { - LIN_SDT_PROBE0(util, linux_driver_get_name_dev, nullcall); - LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL); + if (device_name == NULL) return NULL; - } TAILQ_FOREACH(de, &devices, list) { - if (strcmp(device_name, de->entry.bsd_driver_name) == 0) { - LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, - de->entry.linux_driver_name); + if (strcmp(device_name, de->entry.bsd_driver_name) == 0) return (de->entry.linux_driver_name); - } } - LIN_SDT_PROBE1(util, linux_driver_get_name_dev, return, NULL); - return NULL; + return (NULL); } int @@ -181,15 +129,8 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor) { struct device_element *de; - LIN_SDT_PROBE3(util, linux_driver_get_major_minor, entry, node, major, - minor); - - if (node == NULL || major == NULL || minor == NULL) { - LIN_SDT_PROBE0(util, linux_driver_get_major_minor, nullcall); - LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1, - 0, 0); + if (node == NULL || major == NULL || minor == NULL) return 1; - } if (strlen(node) > strlen("pts/") && strncmp(node, "pts/", strlen("pts/")) == 0) { @@ -204,25 +145,18 @@ linux_driver_get_major_minor(const char *node, int *major, int *minor) *major = 136 + (devno / 256); *minor = devno % 256; - LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 0, - *major, *minor); - return 0; + return (0); } TAILQ_FOREACH(de, &devices, list) { if (strcmp(node, de->entry.bsd_device_name) == 0) { *major = de->entry.linux_major; *minor = de->entry.linux_minor; - - LIN_SDT_PROBE3(util, linux_driver_get_major_minor, - return, 0, *major, *minor); - return 0; + return (0); } } - LIN_SDT_PROBE1(util, linux_driver_get_major_minor, notfound, node); - LIN_SDT_PROBE3(util, linux_driver_get_major_minor, return, 1, 0, 0); - return 1; + return (1); } char * @@ -233,8 +167,6 @@ linux_get_char_devices() char formated[256]; int current_size = 0, string_size = 1024; - LIN_SDT_PROBE0(util, linux_get_char_devices, entry); - string = malloc(string_size, M_LINUX, M_WAITOK); string[0] = '\000'; last = ""; @@ -261,19 +193,14 @@ linux_get_char_devices() } } - LIN_SDT_PROBE1(util, linux_get_char_devices, return, string); - return string; + return (string); } void linux_free_get_char_devices(char *string) { - LIN_SDT_PROBE1(util, linux_get_char_devices, entry, string); - free(string, M_LINUX); - - LIN_SDT_PROBE0(util, linux_get_char_devices, return); } static int linux_major_starting = 200; @@ -283,13 +210,8 @@ linux_device_register_handler(struct linux_device_handler *d) { struct device_element *de; - LIN_SDT_PROBE1(util, linux_device_register_handler, entry, d); - - if (d == NULL) { - LIN_SDT_PROBE1(util, linux_device_register_handler, return, - EINVAL); + if (d == NULL) return (EINVAL); - } de = malloc(sizeof(*de), M_LINUX, M_WAITOK); if (d->linux_major < 0) { @@ -300,7 +222,6 @@ linux_device_register_handler(struct linux_device_handler *d) /* Add the element to the list, sorted on span. */ TAILQ_INSERT_TAIL(&devices, de, list); - LIN_SDT_PROBE1(util, linux_device_register_handler, return, 0); return (0); } @@ -309,25 +230,17 @@ linux_device_unregister_handler(struct linux_device_handler *d) { struct device_element *de; - LIN_SDT_PROBE1(util, linux_device_unregister_handler, entry, d); - - if (d == NULL) { - LIN_SDT_PROBE1(util, linux_device_unregister_handler, return, - EINVAL); + if (d == NULL) return (EINVAL); - } TAILQ_FOREACH(de, &devices, list) { if (bcmp(d, &de->entry, sizeof(*d)) == 0) { TAILQ_REMOVE(&devices, de, list); free(de, M_LINUX); - LIN_SDT_PROBE1(util, linux_device_unregister_handler, - return, 0); return (0); } } - LIN_SDT_PROBE1(util, linux_device_unregister_handler, return, EINVAL); return (EINVAL); } diff --git a/sys/compat/linux/linux_util.h b/sys/compat/linux/linux_util.h index 6be0392..a52a7b9 100644 --- a/sys/compat/linux/linux_util.h +++ b/sys/compat/linux/linux_util.h @@ -44,6 +44,11 @@ #include <sys/cdefs.h> #include <sys/uio.h> +MALLOC_DECLARE(M_LINUX); +MALLOC_DECLARE(M_EPOLL); +MALLOC_DECLARE(M_FUTEX); +MALLOC_DECLARE(M_FUTEX_WP); + extern const char linux_emul_path[]; int linux_emul_convpath(struct thread *, const char *, enum uio_seg, char **, int, int); @@ -115,7 +120,6 @@ void linux_free_get_char_devices(char *string); #define LINUX_CTRFMT(nm, fmt) #nm"("fmt")" #define LINUX_CTR6(f, m, p1, p2, p3, p4, p5, p6) do { \ - if (ldebug(f)) \ CTR6(KTR_LINUX, LINUX_CTRFMT(f, m), \ p1, p2, p3, p4, p5, p6); \ } while (0) diff --git a/sys/compat/linux/linux_vdso.c b/sys/compat/linux/linux_vdso.c new file mode 100644 index 0000000..5ab0ee6 --- /dev/null +++ b/sys/compat/linux/linux_vdso.c @@ -0,0 +1,244 @@ +/*- + * Copyright (c) 2013 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_compat.h" + +#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) +#define __ELF_WORD_SIZE 32 +#else +#define __ELF_WORD_SIZE 64 +#endif + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/elf.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/rwlock.h> +#include <sys/queue.h> +#include <sys/sysent.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> +#include <vm/vm_extern.h> +#include <vm/vm_kern.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> + +#include <compat/linux/linux_vdso.h> + +SLIST_HEAD(, linux_vdso_sym) __elfN(linux_vdso_syms) = + SLIST_HEAD_INITIALIZER(__elfN(linux_vdso_syms)); + +static int __elfN(symtabindex); +static int __elfN(symstrindex); + +static void +__elfN(linux_vdso_lookup)(Elf_Ehdr *, struct linux_vdso_sym *); + + +void +__elfN(linux_vdso_sym_init)(struct linux_vdso_sym *s) +{ + + SLIST_INSERT_HEAD(&__elfN(linux_vdso_syms), s, sym); +} + +vm_object_t +__elfN(linux_shared_page_init)(char **mapping) +{ + vm_page_t m; + vm_object_t obj; + vm_offset_t addr; + + obj = vm_pager_allocate(OBJT_PHYS, 0, PAGE_SIZE, + VM_PROT_DEFAULT, 0, NULL); + VM_OBJECT_WLOCK(obj); + m = vm_page_grab(obj, 0, VM_ALLOC_NOBUSY | VM_ALLOC_ZERO); + m->valid = VM_PAGE_BITS_ALL; + VM_OBJECT_WUNLOCK(obj); + addr = kva_alloc(PAGE_SIZE); + pmap_qenter(addr, &m, 1); + *mapping = (char *)addr; + return (obj); +} + +void +__elfN(linux_shared_page_fini)(vm_object_t obj) +{ + + vm_object_deallocate(obj); +} + +void +__elfN(linux_vdso_fixup)(struct sysentvec *sv) +{ + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + int i; + + ehdr = (Elf_Ehdr *) sv->sv_sigcode; + + if (!IS_ELF(*ehdr) || + ehdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || + ehdr->e_ident[EI_DATA] != ELF_TARG_DATA || + ehdr->e_ident[EI_VERSION] != EV_CURRENT || + ehdr->e_shoff == 0 || + ehdr->e_shentsize != sizeof(Elf_Shdr)) + panic("Linux invalid vdso header.\n"); + + if (ehdr->e_type != ET_DYN) + panic("Linux invalid vdso header.\n"); + + shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff); + + __elfN(symtabindex) = -1; + __elfN(symstrindex) = -1; + for (i = 0; i < ehdr->e_shnum; i++) { + if (shdr[i].sh_size == 0) + continue; + if (shdr[i].sh_type == SHT_DYNSYM) { + __elfN(symtabindex) = i; + __elfN(symstrindex) = shdr[i].sh_link; + } + } + + if (__elfN(symtabindex) == -1 || __elfN(symstrindex) == -1) + panic("Linux invalid vdso header.\n"); + + ehdr->e_ident[EI_OSABI] = ELFOSABI_LINUX; +} + +void +__elfN(linux_vdso_reloc)(struct sysentvec *sv, long vdso_adjust) +{ + struct linux_vdso_sym *lsym; + Elf_Ehdr *ehdr; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + Elf_Dyn *dyn; + Elf_Sym *sym; + int i, symcnt; + + ehdr = (Elf_Ehdr *) sv->sv_sigcode; + + /* Adjust our so relative to the sigcode_base */ + if (vdso_adjust != 0) { + ehdr->e_entry += vdso_adjust; + phdr = (Elf_Phdr *)((caddr_t)ehdr + ehdr->e_phoff); + + /* phdrs */ + for (i = 0; i < ehdr->e_phnum; i++) { + phdr[i].p_vaddr += vdso_adjust; + if (phdr[i].p_type != PT_DYNAMIC) + continue; + dyn = (Elf_Dyn *)((caddr_t)ehdr + phdr[i].p_offset); + for(; dyn->d_tag != DT_NULL; dyn++) { + switch (dyn->d_tag) { + case DT_PLTGOT: + case DT_HASH: + case DT_STRTAB: + case DT_SYMTAB: + case DT_RELA: + case DT_INIT: + case DT_FINI: + case DT_REL: + case DT_DEBUG: + case DT_JMPREL: + case DT_VERSYM: + case DT_VERDEF: + case DT_VERNEED: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + dyn->d_un.d_ptr += vdso_adjust; + break; + case DT_ENCODING ... DT_LOOS-1: + case DT_LOOS ... DT_HIOS: + if (dyn->d_tag >= DT_ENCODING && + (dyn->d_tag & 1) == 0) + dyn->d_un.d_ptr += vdso_adjust; + break; + default: + break; + } + } + } + + /* sections */ + shdr = (Elf_Shdr *)((caddr_t)ehdr + ehdr->e_shoff); + for(i = 0; i < ehdr->e_shnum; i++) { + if (!(shdr[i].sh_flags & SHF_ALLOC)) + continue; + shdr[i].sh_addr += vdso_adjust; + if (shdr[i].sh_type != SHT_SYMTAB && + shdr[i].sh_type != SHT_DYNSYM) + continue; + + sym = (Elf_Sym *)((caddr_t)ehdr + shdr[i].sh_offset); + symcnt = shdr[i].sh_size / sizeof(*sym); + + for(i = 0; i < symcnt; i++, sym++) { + if (sym->st_shndx == SHN_UNDEF || + sym->st_shndx == SHN_ABS) + continue; + sym->st_value += vdso_adjust; + } + } + } + + SLIST_FOREACH(lsym, &__elfN(linux_vdso_syms), sym) + __elfN(linux_vdso_lookup)(ehdr, lsym); +} + +static void +__elfN(linux_vdso_lookup)(Elf_Ehdr *ehdr, struct linux_vdso_sym *vsym) +{ + vm_offset_t strtab, symname; + uint32_t symcnt; + Elf_Shdr *shdr; + int i; + + shdr = (Elf_Shdr *) ((caddr_t)ehdr + ehdr->e_shoff); + + strtab = (vm_offset_t)((caddr_t)ehdr + + shdr[__elfN(symstrindex)].sh_offset); + Elf_Sym *sym = (Elf_Sym *)((caddr_t)ehdr + + shdr[__elfN(symtabindex)].sh_offset); + symcnt = shdr[__elfN(symtabindex)].sh_size / sizeof(*sym); + + for (i = 0; i < symcnt; ++i, ++sym) { + symname = strtab + sym->st_name; + if (strncmp(vsym->symname, (char *)symname, vsym->size) == 0) { + *vsym->ptr = (uintptr_t)sym->st_value; + break; + } + } +} diff --git a/sys/compat/linux/linux_vdso.h b/sys/compat/linux/linux_vdso.h new file mode 100644 index 0000000..e11ee8a --- /dev/null +++ b/sys/compat/linux/linux_vdso.h @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2013 Dmitry Chagin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _LINUX_VDSO_H_ +#define _LINUX_VDSO_H_ + +#include <sys/types.h> + +struct linux_vdso_sym { + SLIST_ENTRY(linux_vdso_sym) sym; + uint32_t size; + uintptr_t * ptr; + char symname[]; +}; + +vm_object_t __elfN(linux_shared_page_init)(char **); +void __elfN(linux_shared_page_fini)(vm_object_t); +void __elfN(linux_vdso_fixup)(struct sysentvec *); +void __elfN(linux_vdso_reloc)(struct sysentvec *, long); +void __elfN(linux_vdso_sym_init)(struct linux_vdso_sym *); + +#define LINUX_VDSO_SYM_INTPTR(name) \ +uintptr_t name; \ +LINUX_VDSO_SYM_DEFINE(name) + +#define LINUX_VDSO_SYM_CHAR(name) \ +const char * name; \ +LINUX_VDSO_SYM_DEFINE(name) + +#define LINUX_VDSO_SYM_DEFINE(name) \ +static struct linux_vdso_sym name ## sym = { \ + .symname = #name, \ + .size = sizeof(#name), \ + .ptr = (uintptr_t *)&name \ +}; \ +SYSINIT(__elfN(name ## _sym_init), SI_SUB_EXEC, \ + SI_ORDER_FIRST, __elfN(linux_vdso_sym_init), &name ## sym); \ +struct __hack + +#endif /* _LINUX_VDSO_H_ */ diff --git a/sys/compat/linux/stats_timing.d b/sys/compat/linux/stats_timing.d index d0b6f73..1b60dc9 100644 --- a/sys/compat/linux/stats_timing.d +++ b/sys/compat/linux/stats_timing.d @@ -39,7 +39,6 @@ * possible for a given application * - graph of longest running (CPU-time!) function in total * - may help finding problem cases in the kernel code - * - timing statistics for the emul_lock * - graph of longest held (CPU-time!) locks */ diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index e244700..5d1a409 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -875,9 +875,9 @@ svr4_sys_times(td, uap) p = td->td_proc; PROC_LOCK(p); - PROC_SLOCK(p); + PROC_STATLOCK(p); calcru(p, &utime, &stime); - PROC_SUNLOCK(p); + PROC_STATUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); @@ -1288,9 +1288,9 @@ loop: pid = p->p_pid; status = p->p_xstat; ru = p->p_ru; - PROC_SLOCK(p); + PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); - PROC_SUNLOCK(p); + PROC_STATUNLOCK(p); PROC_UNLOCK(p); sx_sunlock(&proctree_lock); @@ -1315,9 +1315,9 @@ loop: pid = p->p_pid; status = W_STOPCODE(p->p_xstat); ru = p->p_ru; - PROC_SLOCK(p); + PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); - PROC_SUNLOCK(p); + PROC_STATUNLOCK(p); PROC_UNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) { @@ -1339,9 +1339,9 @@ loop: pid = p->p_pid; ru = p->p_ru; status = SIGCONT; - PROC_SLOCK(p); + PROC_STATLOCK(p); calcru(p, &ru.ru_utime, &ru.ru_stime); - PROC_SUNLOCK(p); + PROC_STATUNLOCK(p); PROC_UNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) { diff --git a/sys/compat/svr4/svr4_sysvec.c b/sys/compat/svr4/svr4_sysvec.c index 561a838..125a7d8 100644 --- a/sys/compat/svr4/svr4_sysvec.c +++ b/sys/compat/svr4/svr4_sysvec.c @@ -196,6 +196,7 @@ struct sysentvec svr4_sysvec = { .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = NULL, .sv_schedtail = NULL, + .sv_thread_detach = NULL, }; const char svr4_emul_path[] = "/compat/svr4"; |