summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjkim <jkim@FreeBSD.org>2007-03-30 00:06:21 +0000
committerjkim <jkim@FreeBSD.org>2007-03-30 00:06:21 +0000
commit66aaf37941a1e49cbdbc657033b5c88cf4bbc89d (patch)
tree9d89cc517054300aafea61fc017fcfc6460f2207 /sys
parent68bc572d8f14058f355a3d461f630b411afc6ff0 (diff)
downloadFreeBSD-src-66aaf37941a1e49cbdbc657033b5c88cf4bbc89d.zip
FreeBSD-src-66aaf37941a1e49cbdbc657033b5c88cf4bbc89d.tar.gz
MFP4: Linux set_thread_area syscall (aka TLS) support for amd64.
Initial version was submitted by Divacky Roman and mostly rewritten by me. Tested by: emulation
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/cpu_switch.S39
-rw-r--r--sys/amd64/amd64/genassym.c4
-rw-r--r--sys/amd64/amd64/machdep.c9
-rw-r--r--sys/amd64/include/pcb.h5
-rw-r--r--sys/amd64/include/segments.h5
-rw-r--r--sys/amd64/linux32/linux32_locore.s4
-rw-r--r--sys/amd64/linux32/linux32_machdep.c160
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c8
-rw-r--r--sys/amd64/linux32/syscalls.master2
9 files changed, 196 insertions, 40 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 9daf6b4..9f30095 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -104,11 +104,12 @@ ENTRY(cpu_switch)
testl $PCB_32BIT,PCB_FLAGS(%r8)
jz 1f /* no, skip over */
- /* Save segment selector numbers */
- movl %ds,PCB_DS(%r8)
- movl %es,PCB_ES(%r8)
- movl %fs,PCB_FS(%r8)
+ /* Save userland %gs */
movl %gs,PCB_GS(%r8)
+ movq PCB_GS32P(%r8),%rax
+ movq (%rax),%rax
+ movq %rax,PCB_GS32SD(%r8)
+
1:
/* Test if debug registers should be saved. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
@@ -170,22 +171,6 @@ sw1:
*/
movq TD_PCB(%rsi),%r8
- testl $PCB_32BIT,PCB_FLAGS(%r8)
- jz 1f /* no, skip over */
-
- /* Restore segment selector numbers */
- movl PCB_DS(%r8),%ds
- movl PCB_ES(%r8),%es
- movl PCB_FS(%r8),%fs
-
- /* Restore userland %gs while preserving kernel gsbase */
- movl $MSR_GSBASE,%ecx
- rdmsr
- movl PCB_GS(%r8),%gs
- wrmsr
- jmp 2f
-1:
-
/* Restore userland %fs */
movl $MSR_FSBASE,%ecx
movl PCB_FSBASE(%r8),%eax
@@ -197,7 +182,6 @@ sw1:
movl PCB_GSBASE(%r8),%eax
movl PCB_GSBASE+4(%r8),%edx
wrmsr
-2:
/* Update the TSS_RSP0 pointer for the next interrupt */
movq PCPU(TSSP), %rax
@@ -211,6 +195,19 @@ sw1:
movl %eax, PCPU(CURTID)
movq %rsi, PCPU(CURTHREAD) /* into next thread */
+ testl $PCB_32BIT,PCB_FLAGS(%r8)
+ jz 1f /* no, skip over */
+
+ /* Restore userland %gs while preserving kernel gsbase */
+ movq PCB_GS32P(%r8),%rax
+ movq PCB_GS32SD(%r8),%rbx
+ movq %rbx,(%rax)
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ movl PCB_GS(%r8),%gs
+ wrmsr
+
+1:
/* Restore context. */
movq PCB_RBX(%r8),%rbx
movq PCB_RSP(%r8),%rsp
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 6e6f67c..8bfd2b6 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -136,12 +136,14 @@ ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
+ASSYM(PCB_FULLCTX, PCB_FULLCTX);
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
-ASSYM(PCB_FULLCTX, PCB_FULLCTX);
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
+ASSYM(PCB_GS32P, offsetof(struct pcb, pcb_gs32p));
+ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
ASSYM(PCB_SIZE, sizeof(struct pcb));
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 0ecc9e3..bb32980 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -725,6 +725,15 @@ struct soft_segment_descriptor gdt_segs[] = {
0, /* long */
0, /* default 32 vs 16 bit size */
0 /* limit granularity (byte/page units)*/ },
+/* GUGS32_SEL 8 32 bit GS Descriptor for user */
+{ 0x0, /* segment base address */
+ 0xfffff, /* length - all address space */
+ SDT_MEMRWA, /* segment type */
+ SEL_UPL, /* segment descriptor priority level */
+ 1, /* segment descriptor present */
+ 0, /* long */
+ 1, /* default 32 vs 16 bit size */
+ 1 /* limit granularity (byte/page units)*/ },
};
void
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 3d71aa3..30516e9 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -41,6 +41,7 @@
* AMD64 process control block
*/
#include <machine/fpu.h>
+#include <machine/segments.h>
struct pcb {
register_t pcb_cr3;
@@ -73,6 +74,10 @@ struct pcb {
#define PCB_FULLCTX 0x80 /* full context restore on sysret */
caddr_t pcb_onfault; /* copyin/out fault recovery */
+
+ /* 32-bit segment descriptor */
+ struct user_segment_descriptor *pcb_gs32p;
+ struct user_segment_descriptor pcb_gs32sd;
};
#ifdef _KERNEL
diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h
index 4de820b..891404c 100644
--- a/sys/amd64/include/segments.h
+++ b/sys/amd64/include/segments.h
@@ -200,9 +200,10 @@ struct region_descriptor {
#define GUCODE32_SEL 3 /* User 32 bit code Descriptor */
#define GUDATA_SEL 4 /* User 32/64 bit Data Descriptor */
#define GUCODE_SEL 5 /* User 64 bit Code Descriptor */
-#define GPROC0_SEL 6 /* TSS for entering kernel etc */
+#define GPROC0_SEL 6 /* TSS for entering kernel etc */
/* slot 6 is second half of GPROC0_SEL */
-#define NGDT 8
+#define GUGS32_SEL 8 /* User 32 bit GS Descriptor */
+#define NGDT 9
#ifdef _KERNEL
extern struct user_segment_descriptor gdt[];
diff --git a/sys/amd64/linux32/linux32_locore.s b/sys/amd64/linux32/linux32_locore.s
index 6c3d208..8055e56 100644
--- a/sys/amd64/linux32/linux32_locore.s
+++ b/sys/amd64/linux32/linux32_locore.s
@@ -11,8 +11,6 @@
NON_GPROF_ENTRY(linux_sigcode)
call *LINUX_SIGF_HANDLER(%esp)
leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */
- movl LINUX_SC_GS(%ebx),%gs
- movl LINUX_SC_FS(%ebx),%fs
movl LINUX_SC_ES(%ebx),%es
movl LINUX_SC_DS(%ebx),%ds
movl %esp, %ebx /* pass sigframe */
@@ -25,8 +23,6 @@ NON_GPROF_ENTRY(linux_sigcode)
linux_rt_sigcode:
call *LINUX_RT_SIGF_HANDLER(%esp)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
- movl LINUX_SC_GS(%ebx),%gs
- movl LINUX_SC_FS(%ebx),%fs
movl LINUX_SC_ES(%ebx),%es
movl LINUX_SC_DS(%ebx),%ds
push %eax /* fake ret addr */
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index a83ec4f..888c4fc 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -53,7 +53,10 @@ __FBSDID("$FreeBSD$");
#include <sys/unistd.h>
#include <machine/frame.h>
+#include <machine/pcb.h>
#include <machine/psl.h>
+#include <machine/segments.h>
+#include <machine/specialreg.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -656,7 +659,43 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
td2->td_frame->tf_rsp = PTROUT(args->stack);
if (args->flags & LINUX_CLONE_SETTLS) {
- /* XXX: todo */
+ struct user_segment_descriptor sd;
+ struct l_user_desc info;
+ int a[2];
+
+ error = copyin((void *)td->td_frame->tf_rsi, &info,
+ sizeof(struct l_user_desc));
+ if (error) {
+ printf(LMSG("copyin failed!"));
+ } else {
+ /* We might copy out the entry_number as GUGS32_SEL. */
+ info.entry_number = GUGS32_SEL;
+ error = copyout(&info, (void *)td->td_frame->tf_rsi,
+ sizeof(struct l_user_desc));
+ if (error)
+ printf(LMSG("copyout failed!"));
+
+ a[0] = LINUX_LDT_entry_a(&info);
+ a[1] = LINUX_LDT_entry_b(&info);
+
+ memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+ if (ldebug(clone))
+ printf("Segment created in clone with "
+ "CLONE_SETTLS: lobase: %x, hibase: %x, "
+ "lolimit: %x, hilimit: %x, type: %i, "
+ "dpl: %i, p: %i, xx: %i, long: %i, "
+ "def32: %i, gran: %i\n", sd.sd_lobase,
+ sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit,
+ sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx,
+ sd.sd_long, sd.sd_def32, sd.sd_gran);
+#endif
+ td2->td_pcb->pcb_gsbase = (register_t)info.base_addr;
+ td2->td_pcb->pcb_gs32sd = sd;
+ td2->td_pcb->pcb_gs32p = &gdt[GUGS32_SEL];
+ td2->td_pcb->pcb_gs = GSEL(GUGS32_SEL, SEL_UPL);
+ td2->td_pcb->pcb_flags |= PCB_32BIT;
+ }
}
#ifdef DEBUG
@@ -905,6 +944,19 @@ linux_mmap_common(struct thread *td, struct l_mmap_argv *linux_args)
}
int
+linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
+{
+ struct mprotect_args bsd_args;
+
+ bsd_args.addr = uap->addr;
+ bsd_args.len = uap->len;
+ bsd_args.prot = uap->prot;
+ if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
+ bsd_args.prot |= PROT_READ | PROT_EXEC;
+ return (mprotect(td, &bsd_args));
+}
+
+int
linux_iopl(struct thread *td, struct linux_iopl_args *args)
{
int error;
@@ -1177,14 +1229,104 @@ linux_sched_rr_get_interval(struct thread *td,
}
int
-linux_mprotect(struct thread *td, struct linux_mprotect_args *uap)
+linux_set_thread_area(struct thread *td,
+ struct linux_set_thread_area_args *args)
{
- struct mprotect_args bsd_args;
+ struct l_user_desc info;
+ struct user_segment_descriptor sd;
+ int a[2];
+ int error;
- bsd_args.addr = uap->addr;
- bsd_args.len = uap->len;
- bsd_args.prot = uap->prot;
- if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC))
- bsd_args.prot |= PROT_READ | PROT_EXEC;
- return (mprotect(td, &bsd_args));
+ error = copyin(args->desc, &info, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+#ifdef DEBUG
+ if (ldebug(set_thread_area))
+ printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, "
+ "%i, %i, %i"), info.entry_number, info.base_addr,
+ info.limit, info.seg_32bit, info.contents,
+ info.read_exec_only, info.limit_in_pages,
+ info.seg_not_present, info.useable);
+#endif
+
+ /*
+ * Semantics of Linux version: every thread in the system has array
+ * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown.
+ * This syscall loads one of the selected TLS decriptors with a value
+ * and also loads GDT descriptors 6, 7 and 8 with the content of
+ * the per-thread descriptors.
+ *
+ * Semantics of FreeBSD version: I think we can ignore that Linux has
+ * three per-thread descriptors and use just the first one.
+ * The tls_array[] is used only in [gs]et_thread_area() syscalls and
+ * for loading the GDT descriptors. We use just one GDT descriptor
+ * for TLS, so we will load just one.
+ * XXX: This doesnt work when user-space process tries to use more
+ * than one TLS segment. Comment in the Linux source says wine might
+ * do that.
+ */
+
+ /*
+ * GLIBC reads current %gs and call set_thread_area() with it.
+ * We should let GUDATA_SEL and GUGS32_SEL proceed as well because
+ * we use these segments.
+ */
+ switch (info.entry_number) {
+ case GUGS32_SEL:
+ case GUDATA_SEL:
+ case 6:
+ case -1:
+ info.entry_number = GUGS32_SEL;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ /*
+ * We have to copy out the GDT entry we use.
+ * XXX: What if userspace program does not check return value and
+ * tries to use 6, 7 or 8?
+ */
+ error = copyout(&info, args->desc, sizeof(struct l_user_desc));
+ if (error)
+ return (error);
+
+ if (LINUX_LDT_empty(&info)) {
+ a[0] = 0;
+ a[1] = 0;
+ } else {
+ a[0] = LINUX_LDT_entry_a(&info);
+ a[1] = LINUX_LDT_entry_b(&info);
+ }
+
+ memcpy(&sd, &a, sizeof(a));
+#ifdef DEBUG
+ if (ldebug(set_thread_area))
+ printf("Segment created in set_thread_area: "
+ "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, "
+ "type: %i, dpl: %i, p: %i, xx: %i, long: %i, "
+ "def32: %i, gran: %i\n",
+ sd.sd_lobase,
+ sd.sd_hibase,
+ sd.sd_lolimit,
+ sd.sd_hilimit,
+ sd.sd_type,
+ sd.sd_dpl,
+ sd.sd_p,
+ sd.sd_xx,
+ sd.sd_long,
+ sd.sd_def32,
+ sd.sd_gran);
+#endif
+
+ critical_enter();
+ td->td_pcb->pcb_gsbase = (register_t)info.base_addr;
+ td->td_pcb->pcb_gs32sd = gdt[GUGS32_SEL] = sd;
+ td->td_pcb->pcb_gs32p = &gdt[GUGS32_SEL];
+ td->td_pcb->pcb_flags |= PCB_32BIT;
+ wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
+ critical_exit();
+
+ return (0);
}
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 8490e65..f77d0b9 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -408,6 +408,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
+ /* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -528,6 +529,7 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
td->td_pcb->pcb_ds = _udatasel;
load_es(_udatasel);
td->td_pcb->pcb_es = _udatasel;
+ /* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -813,18 +815,20 @@ exec_linux_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
+ critical_exit();
load_ds(_udatasel);
load_es(_udatasel);
load_fs(_udatasel);
- load_gs(0);
+ load_gs(_udatasel);
pcb->pcb_ds = _udatasel;
pcb->pcb_es = _udatasel;
pcb->pcb_fs = _udatasel;
- pcb->pcb_gs = 0;
+ pcb->pcb_gs = _udatasel;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master
index 9fc4cb0..958c5ce 100644
--- a/sys/amd64/linux32/syscalls.master
+++ b/sys/amd64/linux32/syscalls.master
@@ -409,7 +409,7 @@
struct l_timespec *timeout, void *uaddr2, int val3); }
241 AUE_NULL UNIMPL linux_sched_setaffinity
242 AUE_NULL UNIMPL linux_sched_getaffinity
-243 AUE_NULL UNIMPL linux_set_thread_area
+243 AUE_NULL STD { int linux_set_thread_area(struct l_user_desc *desc); }
244 AUE_NULL UNIMPL linux_get_thread_area
245 AUE_NULL UNIMPL linux_io_setup
246 AUE_NULL UNIMPL linux_io_destroy
OpenPOWER on IntegriCloud