summaryrefslogtreecommitdiffstats
path: root/sys/i386/linux/linux_machdep.c
diff options
context:
space:
mode:
authornetchild <netchild@FreeBSD.org>2006-08-15 12:54:30 +0000
committernetchild <netchild@FreeBSD.org>2006-08-15 12:54:30 +0000
commitec2ba5d85d7c127eef674f77e45cc0bea81d8850 (patch)
treeeefd61a061ceacbae59d210a958f6ea28f9d2b34 /sys/i386/linux/linux_machdep.c
parente8cb5b55782d998cb82d772021c355c69edfaf5e (diff)
downloadFreeBSD-src-ec2ba5d85d7c127eef674f77e45cc0bea81d8850.zip
FreeBSD-src-ec2ba5d85d7c127eef674f77e45cc0bea81d8850.tar.gz
Add the linux 2.6.x stuff (not used by default!):
- TLS - complete - pid/tid mangling - complete - thread area - complete - futexes - complete with issues - clone() extension - complete with some possible minor issues - mq*/timer*/clock* stuff - complete but untested and the mq* stuff is disabled when not build as part of the kernel with native FreeBSD mq* support (module support for this will come later) Tested with: - linux-firefox - works, tested - linux-opera - works, tested - linux-realplay - doesnt work, issue with futexes - linux-skype - doesnt work, issue with futexes - linux-rt2-demo - works, tested - linux-acroread - doesnt work, unknown reason (coredump) and sometimes issue with futexes - various unix utilities in linux-base-gentoo3 and linux-base-fc4: everything tried worked On amd64 not everything is supported like on i386, the catchup is planned for later when the remaining bugs in the new functions are fixed. To test this new stuff, you have to run sysctl compat.linux.osrelease=2.6.16 to switch back use sysctl compat.linux.osrelease=2.4.2 Don't switch while running a linux program, strange things may or may not happen. Sponsored by: Google SoC 2006 Submitted by: rdivacky Some suggestions/help by: jhb, kib, manu@NetBSD.org, netchild
Diffstat (limited to 'sys/i386/linux/linux_machdep.c')
-rw-r--r--sys/i386/linux/linux_machdep.c381
1 files changed, 356 insertions, 25 deletions
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index 68e7039..1b73e7e 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -36,13 +36,16 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
+#include <sys/sx.h>
#include <sys/proc.h>
+#include <sys/queue.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysproto.h>
#include <sys/unistd.h>
+#include <sys/wait.h>
#include <machine/frame.h>
#include <machine/psl.h>
@@ -58,6 +61,16 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
+#include <compat/linux/linux_emul.h>
+
+#include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */
+
+#include "opt_posix.h"
+
+extern struct sx emul_shared_lock;
+extern struct sx emul_lock;
+
+extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */
struct l_descriptor {
l_uint entry_number;
@@ -122,6 +135,14 @@ linux_execve(struct thread *td, struct linux_execve_args *args)
free(newpath, M_TEMP);
if (error == 0)
error = kern_execve(td, &eargs, NULL);
+ if (error == 0)
+ /* linux process can exec fbsd one, dont attempt
+ * to create emuldata for such process using
+ * linux_proc_init, this leads to a panic on KASSERT
+ * because such process has p->p_emuldata == NULL
+ */
+ if (td->td_proc->p_sysent == &elf_linux_sysvec)
+ error = linux_proc_init(td, 0, 0);
return (error);
}
@@ -287,6 +308,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
if (td->td_retval[1] == 1)
td->td_retval[0] = 0;
+ error = linux_proc_init(td, td->td_retval[0], 0);
+ if (error)
+ return (error);
+
return (0);
}
@@ -305,18 +330,12 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
/* Are we the child? */
if (td->td_retval[1] == 1)
td->td_retval[0] = 0;
+ error = linux_proc_init(td, td->td_retval[0], 0);
+ if (error)
+ return (error);
return (0);
}
-#define CLONE_VM 0x100
-#define CLONE_FS 0x200
-#define CLONE_FILES 0x400
-#define CLONE_SIGHAND 0x800
-#define CLONE_PID 0x1000
-#define CLONE_THREAD 0x10000
-
-#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
-
int
linux_clone(struct thread *td, struct linux_clone_args *args)
{
@@ -324,19 +343,16 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
struct proc *p2;
struct thread *td2;
int exit_signal;
+ struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(clone)) {
- printf(ARGS(clone, "flags %x, stack %x"),
- (unsigned int)args->flags, (unsigned int)args->stack);
- if (args->flags & CLONE_PID)
- printf(LMSG("CLONE_PID not yet supported"));
+ printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"),
+ (unsigned int)args->flags, (unsigned int)args->stack,
+ (unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr);
}
#endif
- if (!args->stack)
- return (EINVAL);
-
exit_signal = args->flags & 0x000000ff;
if (exit_signal >= LINUX_NSIG)
return (EINVAL);
@@ -371,12 +387,118 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
if (error)
return (error);
+ /* create the emuldata */
+ error = linux_proc_init(td, p2->p_pid, args->flags);
+ /* reference it - no need to check this */
+ em = em_find(p2, EMUL_UNLOCKED);
+ KASSERT(em != NULL, ("clone: emuldata not found.\n"));
+ /* and adjust it */
+ if (args->flags & CLONE_PARENT_SETTID) {
+ if (args->parent_tidptr == NULL) {
+ EMUL_UNLOCK(&emul_lock);
+ return (EINVAL);
+ }
+ error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid));
+ if (error) {
+ EMUL_UNLOCK(&emul_lock);
+ return (error);
+ }
+ }
+
+ if (args->flags & CLONE_PARENT) {
+#ifdef DEBUG
+ printf("linux_clone: CLONE_PARENT\n");
+#endif
+ }
+
+ if (args->flags & CLONE_THREAD) {
+ /* XXX: linux mangles pgrp and pptr somehow
+ * I think it might be this but I am not sure.
+ */
+#ifdef notyet
+ p2->p_pgrp = td->td_proc->p_pgrp;
+ p2->p_pptr = td->td_proc->p_pptr;
+#endif
+ exit_signal = 0;
+#ifdef DEBUG
+ printf("linux_clone: CLONE_THREADS\n");
+#endif
+ }
+
+ if (args->flags & CLONE_CHILD_SETTID)
+ em->child_set_tid = args->child_tidptr;
+ else
+ em->child_set_tid = NULL;
+
+ if (args->flags & CLONE_CHILD_CLEARTID)
+ em->child_clear_tid = args->child_tidptr;
+ else
+ em->child_clear_tid = NULL;
+ EMUL_UNLOCK(&emul_lock);
PROC_LOCK(p2);
p2->p_sigparent = exit_signal;
PROC_UNLOCK(p2);
td2 = FIRST_THREAD_IN_PROC(p2);
- td2->td_frame->tf_esp = (unsigned int)args->stack;
+ /* in a case of stack = NULL we are supposed to COW calling process stack
+ * this is what normal fork() does so we just keep the tf_esp arg intact
+ */
+ if (args->stack)
+ td2->td_frame->tf_esp = (unsigned int)args->stack;
+
+ if (args->flags & CLONE_SETTLS) {
+ struct l_user_desc info;
+ int idx;
+ int a[2];
+ struct segment_descriptor sd;
+
+ error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+ idx = info.entry_number;
+
+ /* looks like we're getting the idx we returned
+ * in the set_thread_area() syscall
+ */
+ if (idx != 6 && idx != 3)
+ return (EINVAL);
+
+ /* this doesnt happen in practice */
+ if (idx == 6) {
+ /* we might copy out the entry_number as 3 */
+ info.entry_number = 3;
+ error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+ }
+
+ a[0] = LDT_entry_a(&info);
+ a[1] = LDT_entry_b(&info);
+
+ memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+ if (ldebug(clone))
+ printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
+ sd.sd_hibase,
+ sd.sd_lolimit,
+ sd.sd_hilimit,
+ sd.sd_type,
+ sd.sd_dpl,
+ sd.sd_p,
+ sd.sd_xx,
+ sd.sd_def32,
+ sd.sd_gran);
+#endif
+
+ /* this is taken from i386 version of cpu_set_user_tls() */
+ critical_enter();
+ /* set %gs */
+ td2->td_pcb->pcb_gsd = sd;
+ PCPU_GET(fsgs_gdt)[1] = sd;
+ load_gs(GSEL(GUGS_SEL, SEL_UPL));
+ critical_exit();
+ }
#ifdef DEBUG
if (ldebug(clone))
@@ -847,25 +969,234 @@ linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
int
linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
{
- /*
- * Return an error code instead of raising a SIGSYS so that
- * the caller will fall back to simpler LDT methods.
+ struct l_user_desc info;
+ int error;
+ int idx;
+ int a[2];
+ struct segment_descriptor sd;
+
+ error = copyin(args->desc, &info, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+#ifdef DEBUG
+ if (ldebug(set_thread_area))
+ printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
+ info.entry_number,
+ info.base_addr,
+ info.limit,
+ info.seg_32bit,
+ info.contents,
+ info.read_exec_only,
+ info.limit_in_pages,
+ info.seg_not_present,
+ info.useable);
+#endif
+
+ idx = info.entry_number;
+ /* Semantics of linux version: every thread in the system has array
+ * of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
+ * syscall loads one of the selected tls decriptors with a value
+ * and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread
+ * descriptors.
+ *
+ * Semantics of fbsd version: I think we can ignore that linux has 3 per-thread
+ * descriptors and use just the 1st one. The tls_array[] is used only in
+ * set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd
+ * we use just one GDT descriptor for TLS so we will load just one.
+ * XXX: this doesnt work when user-space process tries to use more then 1 TLS segment
+ * comment in the linux sources says wine might do that.
*/
- return (ENOSYS);
+
+ /* we support just GLIBC TLS now
+ * we should let 3 proceed as well because we use this segment so
+ * if code does two subsequent calls it should succeed
+ */
+ if (idx != 6 && idx != -1 && idx != 3)
+ return (EINVAL);
+
+ /* we have to copy out the GDT entry we use
+ * FreeBSD uses GDT entry #3 for storing %gs so load that
+ * XXX: what if userspace program doesnt check this value and tries
+ * to use 6, 7 or 8?
+ */
+ idx = info.entry_number = 3;
+ error = copyout(&info, args->desc, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+ if (LDT_empty(&info)) {
+ a[0] = 0;
+ a[1] = 0;
+ } else {
+ a[0] = LDT_entry_a(&info);
+ a[1] = LDT_entry_b(&info);
+ }
+
+ memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+ if (ldebug(set_thread_area))
+ printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
+ sd.sd_hibase,
+ sd.sd_lolimit,
+ sd.sd_hilimit,
+ sd.sd_type,
+ sd.sd_dpl,
+ sd.sd_p,
+ sd.sd_xx,
+ sd.sd_def32,
+ sd.sd_gran);
+#endif
+
+ /* this is taken from i386 version of cpu_set_user_tls() */
+ critical_enter();
+ /* set %gs */
+ td->td_pcb->pcb_gsd = sd;
+ PCPU_GET(fsgs_gdt)[1] = sd;
+ load_gs(GSEL(GUGS_SEL, SEL_UPL));
+ critical_exit();
+
+ return (0);
}
int
-linux_gettid(struct thread *td, struct linux_gettid_args *args)
+linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
{
+
+ struct l_user_desc info;
+ int error;
+ int idx;
+ struct l_desc_struct desc;
+ struct segment_descriptor sd;
+
+#ifdef DEBUG
+ if (ldebug(get_thread_area))
+ printf(ARGS(get_thread_area, "%p"), args->desc);
+#endif
+
+ error = copyin(args->desc, &info, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+ idx = info.entry_number;
+ /* XXX: I am not sure if we want 3 to be allowed too. */
+ if (idx != 6 && idx != 3)
+ return (EINVAL);
+
+ idx = 3;
+
+ memset(&info, 0, sizeof(info));
+
+ sd = PCPU_GET(fsgs_gdt)[1];
+
+ memcpy(&desc, &sd, sizeof(desc));
+
+ info.entry_number = idx;
+ info.base_addr = GET_BASE(&desc);
+ info.limit = GET_LIMIT(&desc);
+ info.seg_32bit = GET_32BIT(&desc);
+ info.contents = GET_CONTENTS(&desc);
+ info.read_exec_only = !GET_WRITABLE(&desc);
+ info.limit_in_pages = GET_LIMIT_PAGES(&desc);
+ info.seg_not_present = !GET_PRESENT(&desc);
+ info.useable = GET_USEABLE(&desc);
+
+ error = copyout(&info, args->desc, sizeof(struct l_user_desc));
+ if (error)
+ return (EFAULT);
- td->td_retval[0] = td->td_proc->p_pid;
return (0);
}
+/* copied from kern/kern_time.c */
+int
+linux_timer_create(struct thread *td, struct linux_timer_create_args *args)
+{
+ return ktimer_create(td, (struct ktimer_create_args *) args);
+}
+
+int
+linux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
+{
+ return ktimer_settime(td, (struct ktimer_settime_args *) args);
+}
+
+int
+linux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
+{
+ return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
+}
+
+int
+linux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
+{
+ return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
+}
+
int
-linux_tkill(struct thread *td, struct linux_tkill_args *args)
+linux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
{
+ return ktimer_delete(td, (struct ktimer_delete_args *) args);
+}
+
+/* XXX: this wont work with module - convert it */
+int
+linux_mq_open(struct thread *td, struct linux_mq_open_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_open(td, (struct kmq_open_args *) args);
+#else
+ return (ENOSYS);
+#endif
+}
+
+int
+linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_unlink(td, (struct kmq_unlink_args *) args);
+#else
+ return (ENOSYS);
+#endif
+}
- return (linux_kill(td, (struct linux_kill_args *) args));
+int
+linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
+#else
+ return (ENOSYS);
+#endif
+}
+
+int
+linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
+#else
+ return (ENOSYS);
+#endif
+}
+
+int
+linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_notify(td, (struct kmq_notify_args *) args);
+#else
+ return (ENOSYS);
+#endif
+}
+
+int
+linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
+{
+#ifdef P1003_1B_MQUEUE
+ return kmq_setattr(td, (struct kmq_setattr_args *) args);
+#else
+ return (ENOSYS);
+#endif
}
OpenPOWER on IntegriCloud