summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/acpica/acpi_switch.S17
-rw-r--r--sys/amd64/amd64/apic_vector.S7
-rw-r--r--sys/amd64/amd64/cpu_switch.S153
-rw-r--r--sys/amd64/amd64/db_interface.c10
-rw-r--r--sys/amd64/amd64/db_trace.c6
-rw-r--r--sys/amd64/amd64/exception.S197
-rw-r--r--sys/amd64/amd64/genassym.c23
-rw-r--r--sys/amd64/amd64/machdep.c113
-rw-r--r--sys/amd64/amd64/mp_machdep.c20
-rw-r--r--sys/amd64/amd64/sys_machdep.c581
-rw-r--r--sys/amd64/amd64/trap.c95
-rw-r--r--sys/amd64/amd64/vm_machdep.c96
-rw-r--r--sys/amd64/ia32/ia32_exception.S5
-rw-r--r--sys/amd64/ia32/ia32_misc.c71
-rw-r--r--sys/amd64/ia32/ia32_reg.c35
-rw-r--r--sys/amd64/ia32/ia32_signal.c87
-rw-r--r--sys/amd64/ia32/ia32_sigtramp.S4
-rw-r--r--sys/amd64/include/asmacros.h7
-rw-r--r--sys/amd64/include/frame.h11
-rw-r--r--sys/amd64/include/md_var.h13
-rw-r--r--sys/amd64/include/pcb.h6
-rw-r--r--sys/amd64/include/pcpu.h12
-rw-r--r--sys/amd64/include/proc.h21
-rw-r--r--sys/amd64/include/segments.h3
-rw-r--r--sys/amd64/include/sysarch.h9
-rw-r--r--sys/amd64/linux32/linux32_locore.s4
-rw-r--r--sys/amd64/linux32/linux32_machdep.c9
-rw-r--r--sys/amd64/linux32/linux32_sysvec.c70
-rw-r--r--sys/conf/files.amd641
29 files changed, 1335 insertions, 351 deletions
diff --git a/sys/amd64/acpica/acpi_switch.S b/sys/amd64/acpica/acpi_switch.S
index d4f732a..0b26292 100644
--- a/sys/amd64/acpica/acpi_switch.S
+++ b/sys/amd64/acpica/acpi_switch.S
@@ -64,12 +64,17 @@ ENTRY(acpi_restorecpu)
/* Fetch PCB. */
movq WAKEUP_CTX(xpcb), %r11
- /* Restore segment registers. */
- mov WAKEUP_PCB(DS), %ds
- mov WAKEUP_PCB(ES), %es
- mov WAKEUP_XPCB(SS), %ss
- mov WAKEUP_PCB(FS), %fs
- mov WAKEUP_PCB(GS), %gs
+ /* Force kernel segment registers. */
+ movl $KDSEL, %eax
+ movw %ax, %ds
+ movl $KDSEL, %eax
+ movw %ax, %es
+ movl $KDSEL, %eax
+ movw %ax, %ss
+ movl $KUF32SEL, %eax
+ movw %ax, %fs
+ movl $KUG32SEL, %eax
+ movw %ax, %gs
movl $MSR_FSBASE, %ecx
movl WAKEUP_PCB(FSBASE), %eax
diff --git a/sys/amd64/amd64/apic_vector.S b/sys/amd64/amd64/apic_vector.S
index 0306bb3..cebafc8 100644
--- a/sys/amd64/amd64/apic_vector.S
+++ b/sys/amd64/amd64/apic_vector.S
@@ -219,9 +219,7 @@ IDTVEC(cpustop)
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
call cpustop_handler
-
- POP_FRAME
- iretq
+ jmp doreti
/*
* Executed by a CPU when it receives an IPI_SUSPEND from another CPU.
@@ -251,6 +249,5 @@ IDTVEC(rendezvous)
call smp_rendezvous_action
movq lapic, %rax
movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */
- POP_FRAME /* Why not doreti? */
- iretq
+ jmp doreti
#endif /* SMP */
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index 0c59703..6fc8290 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -75,8 +75,6 @@ ENTRY(cpu_throw)
1:
movq TD_PCB(%rdi),%r8 /* Old pcb */
movl PCPU(CPUID), %eax
- movq PCB_FSBASE(%r8),%r9
- movq PCB_GSBASE(%r8),%r10
/* release bit from old pm_active */
movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */
movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */
@@ -110,28 +108,6 @@ ENTRY(cpu_switch)
movq %rbx,PCB_RBX(%r8)
movq %rax,PCB_RIP(%r8)
- /*
- * Reread fs and gs bases. Explicit fs segment register load
- * by the usermode code may change actual fs base without
- * updating pcb_{fs,gs}base.
- *
- * %rdx still contains the mtx, save %rdx around rdmsr.
- */
- movq %rdx,%r11
- movl $MSR_FSBASE,%ecx
- rdmsr
- shlq $32,%rdx
- leaq (%rax,%rdx),%r9
- movl $MSR_KGSBASE,%ecx
- rdmsr
- shlq $32,%rdx
- leaq (%rax,%rdx),%r10
- movq %r11,%rdx
-
- testl $PCB_32BIT,PCB_FLAGS(%r8)
- jnz store_seg
-done_store_seg:
-
testl $PCB_DBREGS,PCB_FLAGS(%r8)
jnz store_dr /* static predict not taken */
done_store_dr:
@@ -192,36 +168,47 @@ sw1:
testl $TDP_KTHREAD,TD_PFLAGS(%rsi)
jnz do_kthread
- testl $PCB_32BIT,PCB_FLAGS(%r8)
- jnz load_seg
-done_load_seg:
+ /*
+ * Load ldt register
+ */
+ movq TD_PROC(%rsi),%rcx
+ cmpq $0, P_MD+MD_LDT(%rcx)
+ jne do_ldt
+ xorl %eax,%eax
+ld_ldt: lldt %ax
- cmpq PCB_FSBASE(%r8),%r9
- jz 1f
- /* Restore userland %fs */
-restore_fsbase:
- movl $MSR_FSBASE,%ecx
+ /* Restore fs base in GDT */
movl PCB_FSBASE(%r8),%eax
- movl PCB_FSBASE+4(%r8),%edx
- wrmsr
-1:
- cmpq PCB_GSBASE(%r8),%r10
- jz 2f
- /* Restore userland %gs */
- movl $MSR_KGSBASE,%ecx
+ movq PCPU(FS32P),%rdx
+ movw %ax,2(%rdx)
+ shrl $16,%eax
+ movb %al,4(%rdx)
+ shrl $8,%eax
+ movb %al,7(%rdx)
+
+ /* Restore gs base in GDT */
movl PCB_GSBASE(%r8),%eax
- movl PCB_GSBASE+4(%r8),%edx
- wrmsr
-2:
+ movq PCPU(GS32P),%rdx
+ movw %ax,2(%rdx)
+ shrl $16,%eax
+ movb %al,4(%rdx)
+ shrl $8,%eax
+ movb %al,7(%rdx)
-do_tss:
+do_kthread:
+ /* Do we need to reload tss ? */
+ movq PCPU(TSSP),%rax
+ movq PCB_TSSP(%r8),%rdx
+ testq %rdx,%rdx
+ cmovzq PCPU(COMMONTSSP),%rdx
+ cmpq %rax,%rdx
+ jne do_tss
+done_tss:
+ movq %r8,PCPU(RSP0)
+ movq %r8,PCPU(CURPCB)
/* Update the TSS_RSP0 pointer for the next interrupt */
- movq PCPU(TSSP), %rax
- movq %r8, PCPU(RSP0)
- movq %r8, PCPU(CURPCB)
- addq $COMMON_TSS_RSP0, %rax
- movq %rsi, PCPU(CURTHREAD) /* into next thread */
- movq %r8, (%rax)
+ movq %r8,COMMON_TSS_RSP0(%rdx)
+ movq %rsi,PCPU(CURTHREAD) /* into next thread */
/* Test if debug registers should be restored. */
testl $PCB_DBREGS,PCB_FLAGS(%r8)
@@ -250,45 +237,6 @@ done_load_dr:
* We use jumps rather than call in order to avoid the stack.
*/
-do_kthread:
- /*
- * Copy old fs/gsbase to new kthread pcb for future switches
- * This maintains curpcb->pcb_[fg]sbase as caches of the MSR
- */
- movq %r9,PCB_FSBASE(%r8)
- movq %r10,PCB_GSBASE(%r8)
- jmp do_tss
-
-store_seg:
- mov %gs,PCB_GS(%r8)
- testl $PCB_GS32BIT,PCB_FLAGS(%r8)
- jnz 2f
-1: mov %ds,PCB_DS(%r8)
- mov %es,PCB_ES(%r8)
- mov %fs,PCB_FS(%r8)
- jmp done_store_seg
-2: movq PCPU(GS32P),%rax
- movq (%rax),%rax
- movq %rax,PCB_GS32SD(%r8)
- jmp 1b
-
-load_seg:
- testl $PCB_GS32BIT,PCB_FLAGS(%r8)
- jnz 2f
-1: movl $MSR_GSBASE,%ecx
- rdmsr
- mov PCB_GS(%r8),%gs
- wrmsr
- mov PCB_DS(%r8),%ds
- mov PCB_ES(%r8),%es
- mov PCB_FS(%r8),%fs
- jmp restore_fsbase
- /* Restore userland %gs while preserving kernel gsbase */
-2: movq PCPU(GS32P),%rax
- movq PCB_GS32SD(%r8),%rcx
- movq %rcx,(%rax)
- jmp 1b
-
store_dr:
movq %dr7,%rax /* yes, do the save */
movq %dr0,%r15
@@ -325,6 +273,29 @@ load_dr:
movq %r11,%dr6
movq %rax,%dr7
jmp done_load_dr
+
+do_tss: movq %rdx,PCPU(TSSP)
+ movq %rdx,%rcx
+ movq PCPU(TSS),%rax
+ movw %rcx,2(%rax)
+ shrq $16,%rcx
+ movb %cl,4(%rax)
+ shrq $8,%rcx
+ movb %cl,7(%rax)
+ shrq $8,%rcx
+ movl %ecx,8(%rax)
+ movb $0x89,5(%rax) /* unset busy */
+ movl $TSSSEL,%eax
+ ltr %ax
+ jmp done_tss
+
+do_ldt: movq PCPU(LDT),%rax
+ movq P_MD+MD_LDT_SD(%rcx),%rdx
+ movq %rdx,(%rax)
+ movq P_MD+MD_LDT_SD+8(%rcx),%rdx
+ movq %rdx,8(%rax)
+ movl $LDTSEL,%eax
+ jmp ld_ldt
END(cpu_switch)
/*
@@ -398,12 +369,6 @@ ENTRY(savectx2)
movq (%rsp),%rax
movq %rax,PCB_RIP(%r8)
- mov %ds,PCB_DS(%r8)
- mov %es,PCB_ES(%r8)
- mov %ss,XPCB_SS(%r8)
- mov %fs,PCB_FS(%r8)
- mov %gs,PCB_GS(%r8)
-
movq %rbx,PCB_RBX(%r8)
movq %rsp,PCB_RSP(%r8)
movq %rbp,PCB_RBP(%r8)
diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index b297616..287c236 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
@@ -139,7 +139,11 @@ void
db_show_mdpcpu(struct pcpu *pc)
{
-#if 0
- db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
-#endif
+ db_printf("curpmap = %p\n", pc->pc_curpmap);
+ db_printf("tssp = %p\n", pc->pc_tssp);
+ db_printf("commontssp = %p\n", pc->pc_commontssp);
+ db_printf("rsp0 = 0x%lx\n", pc->pc_rsp0);
+ db_printf("gs32p = %p\n", pc->pc_gs32p);
+ db_printf("ldt = %p\n", pc->pc_ldt);
+ db_printf("tss = %p\n", pc->pc_tss);
}
diff --git a/sys/amd64/amd64/db_trace.c b/sys/amd64/amd64/db_trace.c
index 50a5f4d..73ffac5 100644
--- a/sys/amd64/amd64/db_trace.c
+++ b/sys/amd64/amd64/db_trace.c
@@ -69,12 +69,10 @@ static db_varfcn_t db_ss;
#define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x)
struct db_variable db_regs[] = {
{ "cs", DB_OFFSET(tf_cs), db_frame },
-#if 0
{ "ds", DB_OFFSET(tf_ds), db_frame },
{ "es", DB_OFFSET(tf_es), db_frame },
{ "fs", DB_OFFSET(tf_fs), db_frame },
{ "gs", DB_OFFSET(tf_gs), db_frame },
-#endif
{ "ss", NULL, db_ss },
{ "rax", DB_OFFSET(tf_rax), db_frame },
{ "rcx", DB_OFFSET(tf_rcx), db_frame },
@@ -94,7 +92,7 @@ struct db_variable db_regs[] = {
{ "r15", DB_OFFSET(tf_r15), db_frame },
{ "rip", DB_OFFSET(tf_rip), db_frame },
{ "rflags", DB_OFFSET(tf_rflags), db_frame },
-#define DB_N_SHOW_REGS 20 /* Don't show registers after here. */
+#define DB_N_SHOW_REGS 24 /* Don't show registers after here. */
{ "dr0", NULL, db_dr0 },
{ "dr1", NULL, db_dr1 },
{ "dr2", NULL, db_dr2 },
@@ -357,7 +355,7 @@ db_nextframe(struct amd64_frame **fp, db_addr_t *ip, struct thread *td)
rbp = tf->tf_rbp;
switch (frame_type) {
case TRAP:
- db_printf("--- trap %#lr", tf->tf_trapno);
+ db_printf("--- trap %#r", tf->tf_trapno);
break;
case SYSCALL:
db_printf("--- syscall");
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 897bfec..daa5c25 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -42,6 +42,7 @@
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
+#include <machine/specialreg.h>
#include "assym.s"
@@ -99,7 +100,7 @@ MCOUNT_LABEL(btrap)
/* Traps that we leave interrupts disabled for.. */
#define TRAP_NOEN(a) \
subq $TF_RIP,%rsp; \
- movq $(a),TF_TRAPNO(%rsp) ; \
+ movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
movq $0,TF_ERR(%rsp) ; \
jmp alltraps_noen
@@ -111,7 +112,7 @@ IDTVEC(bpt)
/* Regular traps; The cpu does not supply tf_err for these. */
#define TRAP(a) \
subq $TF_RIP,%rsp; \
- movq $(a),TF_TRAPNO(%rsp) ; \
+ movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
movq $0,TF_ERR(%rsp) ; \
jmp alltraps
@@ -139,7 +140,7 @@ IDTVEC(xmm)
/* This group of traps have tf_err already pushed by the cpu */
#define TRAP_ERR(a) \
subq $TF_ERR,%rsp; \
- movq $(a),TF_TRAPNO(%rsp) ; \
+ movl $(a),TF_TRAPNO(%rsp) ; \
movq $0,TF_ADDR(%rsp) ; \
jmp alltraps
IDTVEC(tss)
@@ -164,6 +165,10 @@ alltraps:
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz alltraps_testi /* already running with kernel GS.base */
swapgs
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
alltraps_testi:
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs
@@ -185,6 +190,7 @@ alltraps_pushregs_no_rdi:
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
#ifdef KDTRACE_HOOKS
/*
@@ -193,7 +199,7 @@ alltraps_pushregs_no_rdi:
* interrupt. For all other trap types, just handle them in
* the usual way.
*/
- cmpq $T_BPTFLT,TF_TRAPNO(%rsp)
+ cmpl $T_BPTFLT,TF_TRAPNO(%rsp)
jne calltrap
/* Check if there is no DTrace hook registered. */
@@ -228,13 +234,17 @@ calltrap:
.type alltraps_noen,@function
alltraps_noen:
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz alltraps_pushregs /* already running with kernel GS.base */
+ jz 1f /* already running with kernel GS.base */
swapgs
+1: movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
jmp alltraps_pushregs
IDTVEC(dblfault)
subq $TF_ERR,%rsp
- movq $T_DOUBLEFLT,TF_TRAPNO(%rsp)
+ movl $T_DOUBLEFLT,TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq $0,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp)
@@ -252,6 +262,11 @@ IDTVEC(dblfault)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
@@ -262,7 +277,7 @@ IDTVEC(dblfault)
IDTVEC(page)
subq $TF_ERR,%rsp
- movq $T_PAGEFLT,TF_TRAPNO(%rsp)
+ movl $T_PAGEFLT,TF_TRAPNO(%rsp)
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
jz 1f /* already running with kernel GS.base */
swapgs
@@ -270,6 +285,10 @@ IDTVEC(page)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
movq %cr2,%rdi /* preserve %cr2 before .. */
movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
sti
@@ -283,17 +302,19 @@ IDTVEC(page)
*/
IDTVEC(prot)
subq $TF_ERR,%rsp
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq %rdi,TF_RDI(%rsp) /* free up a GP register */
leaq doreti_iret(%rip),%rdi
cmpq %rdi,TF_RIP(%rsp)
- je 2f /* kernel but with user gsbase!! */
+ je 1f /* kernel but with user gsbase!! */
testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
- jz 1f /* already running with kernel GS.base */
-2:
- swapgs
-1:
+ jz 2f /* already running with kernel GS.base */
+1: swapgs
+2: movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
testl $PSL_I,TF_RFLAGS(%rsp)
jz alltraps_pushregs_no_rdi
sti
@@ -316,6 +337,10 @@ IDTVEC(fast_syscall)
movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */
movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */
movq %r11,TF_RSP(%rsp) /* user stack pointer */
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
sti
movq $KUDSEL,TF_SS(%rsp)
movq $KUCSEL,TF_CS(%rsp)
@@ -333,40 +358,11 @@ IDTVEC(fast_syscall)
movq %r13,TF_R13(%rsp) /* C preserved */
movq %r14,TF_R14(%rsp) /* C preserved */
movq %r15,TF_R15(%rsp) /* C preserved */
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call syscall
movq PCPU(CURPCB),%rax
- testq $PCB_FULLCTX,PCB_FLAGS(%rax)
- jne 3f
-1: /* Check for and handle AST's on return to userland */
- cli
- movq PCPU(CURTHREAD),%rax
- testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
- je 2f
- sti
- movq %rsp, %rdi
- call ast
- jmp 1b
-2: /* restore preserved registers */
- MEXITCOUNT
- movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
- movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */
- movq TF_RDX(%rsp),%rdx /* return value 2 */
- movq TF_RAX(%rsp),%rax /* return value 1 */
- movq TF_RBX(%rsp),%rbx /* C preserved */
- movq TF_RBP(%rsp),%rbp /* C preserved */
- movq TF_R12(%rsp),%r12 /* C preserved */
- movq TF_R13(%rsp),%r13 /* C preserved */
- movq TF_R14(%rsp),%r14 /* C preserved */
- movq TF_R15(%rsp),%r15 /* C preserved */
- movq TF_RFLAGS(%rsp),%r11 /* original %rflags */
- movq TF_RIP(%rsp),%rcx /* original %rip */
- movq TF_RSP(%rsp),%r9 /* user stack pointer */
- movq %r9,%rsp /* original %rsp */
- swapgs
- sysretq
-3: /* Requested full context restore, use doreti for that */
andq $~PCB_FULLCTX,PCB_FLAGS(%rax)
MEXITCOUNT
jmp doreti
@@ -405,7 +401,7 @@ IDTVEC(fast_syscall32)
IDTVEC(nmi)
subq $TF_RIP,%rsp
- movq $(T_NMI),TF_TRAPNO(%rsp)
+ movl $(T_NMI),TF_TRAPNO(%rsp)
movq $0,TF_ADDR(%rsp)
movq $0,TF_ERR(%rsp)
movq %rdi,TF_RDI(%rsp)
@@ -423,6 +419,11 @@ IDTVEC(nmi)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
xorl %ebx,%ebx
testb $SEL_RPL_MASK,TF_CS(%rsp)
jnz nmi_fromuserspace
@@ -515,9 +516,7 @@ outofnmi:
nocallchain:
#endif
testl %ebx,%ebx
- jz nmi_kernelexit
- swapgs
- jmp nmi_restoreregs
+ jnz doreti_exit
nmi_kernelexit:
/*
* Put back the preserved MSR_GSBASE value.
@@ -633,7 +632,55 @@ doreti_ast:
*/
doreti_exit:
MEXITCOUNT
- movq TF_RDI(%rsp),%rdi
+ movq PCPU(CURTHREAD),%r8
+ movq TD_PCB(%r8),%r8
+
+ /*
+ * Do not reload segment registers for kernel.
+ * Since we do not reload segments registers with sane
+ * values on kernel entry, descriptors referenced by
+ * segments registers may be not valid. This is fatal
+ * for the usermode, but is innocent for the kernel.
+ */
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz ld_regs
+
+ testl $TF_HASSEGS,TF_FLAGS(%rsp)
+ je set_segs
+
+do_segs:
+ /* Restore %fs and fsbase */
+ movw TF_FS(%rsp),%ax
+ .globl ld_fs
+ld_fs: movw %ax,%fs
+ cmpw $KUF32SEL,%ax
+ jne 1f
+ movl $MSR_FSBASE,%ecx
+ movl PCB_FSBASE(%r8),%eax
+ movl PCB_FSBASE+4(%r8),%edx
+ wrmsr
+1:
+ /* Restore %gs and gsbase */
+ movw TF_GS(%rsp),%si
+ pushfq
+ cli
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ .globl ld_gs
+ld_gs: movw %si,%gs
+ wrmsr
+ popfq
+ cmpw $KUG32SEL,%si
+ jne 1f
+ movl $MSR_KGSBASE,%ecx
+ movl PCB_GSBASE(%r8),%eax
+ movl PCB_GSBASE+4(%r8),%edx
+ wrmsr
+1: .globl ld_es
+ld_es: movw TF_ES(%rsp),%es
+ .globl ld_ds
+ld_ds: movw TF_DS(%rsp),%ds
+ld_regs:movq TF_RDI(%rsp),%rdi
movq TF_RSI(%rsp),%rsi
movq TF_RDX(%rsp),%rdx
movq TF_RCX(%rsp),%rcx
@@ -657,6 +704,14 @@ doreti_exit:
doreti_iret:
iretq
+set_segs:
+ movw $KUDSEL,%ax
+ movw %ax,TF_DS(%rsp)
+ movw %ax,TF_ES(%rsp)
+ movw $KUF32SEL,TF_FS(%rsp)
+ movw $KUG32SEL,TF_GS(%rsp)
+ jmp do_segs
+
/*
* doreti_iret_fault. Alternative return code for
* the case where we get a fault in the doreti_exit code
@@ -671,7 +726,12 @@ doreti_iret_fault:
testl $PSL_I,TF_RFLAGS(%rsp)
jz 1f
sti
-1: movq %rdi,TF_RDI(%rsp)
+1: movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
+ movq %rdi,TF_RDI(%rsp)
movq %rsi,TF_RSI(%rsp)
movq %rdx,TF_RDX(%rsp)
movq %rcx,TF_RCX(%rsp)
@@ -686,11 +746,48 @@ doreti_iret_fault:
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
- movq $T_PROTFLT,TF_TRAPNO(%rsp)
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
movq $0,TF_ERR(%rsp) /* XXX should be the error code */
movq $0,TF_ADDR(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
jmp calltrap
+
+ ALIGN_TEXT
+ .globl ds_load_fault
+ds_load_fault:
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movzwl TF_DS(%rsp),%edx
+ movl %edx,TF_ERR(%rsp)
+ movw $KUDSEL,TF_DS(%rsp)
+ jmp calltrap
+
+ ALIGN_TEXT
+ .globl es_load_fault
+es_load_fault:
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movzwl TF_ES(%rsp),%edx
+ movl %edx,TF_ERR(%rsp)
+ movw $KUDSEL,TF_ES(%rsp)
+ jmp calltrap
+
+ ALIGN_TEXT
+ .globl fs_load_fault
+fs_load_fault:
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movzwl TF_FS(%rsp),%edx
+ movl %edx,TF_ERR(%rsp)
+ movw $KUF32SEL,TF_FS(%rsp)
+ jmp calltrap
+
+ ALIGN_TEXT
+ .globl gs_load_fault
+gs_load_fault:
+ popfq
+ movl $T_PROTFLT,TF_TRAPNO(%rsp)
+ movzwl TF_GS(%rsp),%edx
+ movl %edx,TF_ERR(%rsp)
+ movw $KUG32SEL,TF_GS(%rsp)
+ jmp calltrap
#ifdef HWPMC_HOOKS
ENTRY(end_exceptions)
#endif
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 5aa3134..ea3d834 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -79,6 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace));
ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
+ASSYM(P_MD, offsetof(struct proc, p_md));
+ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
+ASSYM(MD_LDT_SD, offsetof(struct mdproc, md_ldt_sd));
+
ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
@@ -132,16 +136,13 @@ ASSYM(PCB_RBX, offsetof(struct pcb, pcb_rbx));
ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip));
ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
-ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds));
-ASSYM(PCB_ES, offsetof(struct pcb, pcb_es));
-ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs));
-ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0));
ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1));
ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2));
ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
+ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_32BIT, PCB_32BIT);
ASSYM(PCB_GS32BIT, PCB_GS32BIT);
@@ -193,7 +194,13 @@ ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags));
ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp));
ASSYM(TF_SS, offsetof(struct trapframe, tf_ss));
+ASSYM(TF_DS, offsetof(struct trapframe, tf_ds));
+ASSYM(TF_ES, offsetof(struct trapframe, tf_es));
+ASSYM(TF_FS, offsetof(struct trapframe, tf_fs));
+ASSYM(TF_GS, offsetof(struct trapframe, tf_gs));
+ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags));
ASSYM(TF_SIZE, sizeof(struct trapframe));
+ASSYM(TF_HASSEGS, TF_HASSEGS);
ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc));
@@ -215,7 +222,11 @@ ASSYM(PC_SCRATCH_RSP, offsetof(struct pcpu, pc_scratch_rsp));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp));
ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0));
+ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p));
ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p));
+ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt));
+ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp));
+ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss));
ASSYM(LA_VER, offsetof(struct LAPIC, version));
ASSYM(LA_TPR, offsetof(struct LAPIC, tpr));
@@ -230,6 +241,10 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL));
+ASSYM(KUF32SEL, GSEL(GUFS32_SEL, SEL_UPL));
+ASSYM(KUG32SEL, GSEL(GUGS32_SEL, SEL_UPL));
+ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL));
+ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL));
ASSYM(SEL_RPL_MASK, SEL_RPL_MASK);
ASSYM(MSR_GSBASE, MSR_GSBASE);
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index cd86789..0ad6134 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -159,7 +159,7 @@ extern vm_offset_t ksym_start, ksym_end;
#define ICH_PMBASE 0x400
#define ICH_SMI_EN ICH_PMBASE + 0x30
-int _udatasel, _ucodesel, _ucode32sel;
+int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel;
int cold = 1;
@@ -192,6 +192,8 @@ struct mtx icu_lock;
struct mem_range_softc mem_range_softc;
+struct mtx dt_lock; /* lock for GDT and LDT */
+
static void
cpu_startup(dummy)
void *dummy;
@@ -278,7 +280,7 @@ cpu_startup(dummy)
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
+ * at top to call routine, followed by call
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
@@ -316,6 +318,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
get_fpcontext(td, &sf.sf_uc.uc_mcontext);
fpstate_drop(td);
+ sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
+ sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@@ -370,6 +374,11 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -401,9 +410,16 @@ sigreturn(td, uap)
ksiginfo_t ksi;
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
- if (error != 0)
+ if (error != 0) {
+ printf("sigreturn (pid %d): copyin failed\n", p->p_pid);
return (error);
+ }
ucp = &uc;
+ if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) {
+ printf("sigreturn (pid %d): mc_flags %x\n", p->p_pid,
+ ucp->uc_mcontext.mc_flags);
+ return (EINVAL);
+ }
regs = td->td_frame;
rflags = ucp->uc_mcontext.mc_rflags;
/*
@@ -420,7 +436,8 @@ sigreturn(td, uap)
* one less debugger trap, so allowing it is fairly harmless.
*/
if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) {
- printf("sigreturn: rflags = 0x%lx\n", rflags);
+ printf("sigreturn (pid %d): rflags = 0x%lx\n", p->p_pid,
+ rflags);
return (EINVAL);
}
@@ -431,7 +448,7 @@ sigreturn(td, uap)
*/
cs = ucp->uc_mcontext.mc_cs;
if (!CS_SECURE(cs)) {
- printf("sigreturn: cs = 0x%x\n", cs);
+ printf("sigreturn (pid %d): cs = 0x%x\n", p->p_pid, cs);
ksiginfo_init_trap(&ksi);
ksi.ksi_signo = SIGBUS;
ksi.ksi_code = BUS_OBJERR;
@@ -442,9 +459,13 @@ sigreturn(td, uap)
}
ret = set_fpcontext(td, &ucp->uc_mcontext);
- if (ret != 0)
+ if (ret != 0) {
+ printf("sigreturn (pid %d): set_fpcontext\n", p->p_pid);
return (ret);
+ }
bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
+ td->td_pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
+ td->td_pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
PROC_LOCK(p);
#if defined(COMPAT_43)
@@ -738,22 +759,16 @@ exec_setregs(td, entry, stack, ps_strings)
{
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
+
+ mtx_lock(&dt_lock);
+ if (td->td_proc->p_md.md_ldt != NULL)
+ user_ldt_free(td);
+ else
+ mtx_unlock(&dt_lock);
- critical_enter();
- wrmsr(MSR_FSBASE, 0);
- wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
- critical_exit();
pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT);
- load_ds(_udatasel);
- load_es(_udatasel);
- load_fs(_udatasel);
- load_gs(_udatasel);
- pcb->pcb_ds = _udatasel;
- pcb->pcb_es = _udatasel;
- pcb->pcb_fs = _udatasel;
- pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
bzero((char *)regs, sizeof(struct trapframe));
@@ -763,6 +778,11 @@ exec_setregs(td, entry, stack, ps_strings)
regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
regs->tf_ss = _udatasel;
regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
/*
* Reset the hardware debug registers if they were in use.
@@ -1380,12 +1400,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
/*
* make gdt memory segments
*/
- gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
-
for (x = 0; x < NGDT; x++) {
- if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
+ if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
+ x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1)
ssdtosd(&gdt_segs[x], &gdt[x]);
}
+ gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0];
ssdtosyssd(&gdt_segs[GPROC0_SEL],
(struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
@@ -1403,6 +1423,10 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
PCPU_SET(curthread, &thread0);
PCPU_SET(curpcb, thread0.td_pcb);
PCPU_SET(tssp, &common_tss[0]);
+ PCPU_SET(commontssp, &common_tss[0]);
+ PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]);
+ PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]);
+ PCPU_SET(fs32p, &gdt[GUFS32_SEL]);
PCPU_SET(gs32p, &gdt[GUGS32_SEL]);
/*
@@ -1415,6 +1439,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
*/
mutex_init();
mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+ mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF);
/* exceptions */
for (x = 0; x < NIDT; x++)
@@ -1503,7 +1528,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
common_tss[0].tss_ist2 = (long) np;
/* Set the IO permission bitmap (empty due to tss seg limit) */
- common_tss[0].tss_iobase = sizeof(struct amd64tss);
+ common_tss[0].tss_iobase = sizeof(struct amd64tss) +
+ IOPAGES * PAGE_SIZE;
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
ltr(gsel_tss);
@@ -1531,10 +1557,12 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
_ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
+ _ufssel = GSEL(GUFS32_SEL, SEL_UPL);
+ _ugssel = GSEL(GUGS32_SEL, SEL_UPL);
load_ds(_udatasel);
load_es(_udatasel);
- load_fs(_udatasel);
+ load_fs(_ufssel);
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0;
@@ -1656,6 +1684,17 @@ fill_regs(struct thread *td, struct reg *regs)
regs->r_rflags = tp->tf_rflags;
regs->r_rsp = tp->tf_rsp;
regs->r_ss = tp->tf_ss;
+ if (tp->tf_flags & TF_HASSEGS) {
+ regs->r_ds = tp->tf_ds;
+ regs->r_es = tp->tf_es;
+ regs->r_fs = tp->tf_fs;
+ regs->r_gs = tp->tf_gs;
+ } else {
+ regs->r_ds = 0;
+ regs->r_es = 0;
+ regs->r_fs = 0;
+ regs->r_gs = 0;
+ }
return (0);
}
@@ -1689,6 +1728,13 @@ set_regs(struct thread *td, struct reg *regs)
tp->tf_rflags = rflags;
tp->tf_rsp = regs->r_rsp;
tp->tf_ss = regs->r_ss;
+ if (0) { /* XXXKIB */
+ tp->tf_ds = regs->r_ds;
+ tp->tf_es = regs->r_es;
+ tp->tf_fs = regs->r_fs;
+ tp->tf_gs = regs->r_gs;
+ tp->tf_flags = TF_HASSEGS;
+ }
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}
@@ -1808,8 +1854,15 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int flags)
mcp->mc_cs = tp->tf_cs;
mcp->mc_rsp = tp->tf_rsp;
mcp->mc_ss = tp->tf_ss;
+ mcp->mc_ds = tp->tf_ds;
+ mcp->mc_es = tp->tf_es;
+ mcp->mc_fs = tp->tf_fs;
+ mcp->mc_gs = tp->tf_gs;
+ mcp->mc_flags = tp->tf_flags;
mcp->mc_len = sizeof(*mcp);
get_fpcontext(td, mcp);
+ mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
+ mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
return (0);
}
@@ -1827,7 +1880,8 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
int ret;
tp = td->td_frame;
- if (mcp->mc_len != sizeof(*mcp))
+ if (mcp->mc_len != sizeof(*mcp) ||
+ (mcp->mc_flags & ~_MC_FLAG_MASK) != 0)
return (EINVAL);
rflags = (mcp->mc_rflags & PSL_USERCHANGE) |
(tp->tf_rflags & ~PSL_USERCHANGE);
@@ -1853,6 +1907,17 @@ set_mcontext(struct thread *td, const mcontext_t *mcp)
tp->tf_rflags = rflags;
tp->tf_rsp = mcp->mc_rsp;
tp->tf_ss = mcp->mc_ss;
+ tp->tf_flags = mcp->mc_flags;
+ if (tp->tf_flags & TF_HASSEGS) {
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_gs = mcp->mc_gs;
+ }
+ if (mcp->mc_flags & _MC_HASBASES) {
+ td->td_pcb->pcb_fsbase = mcp->mc_fsbase;
+ td->td_pcb->pcb_gsbase = mcp->mc_gsbase;
+ }
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index b7c03d9..59e3e9b 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -101,8 +101,6 @@ extern pt_entry_t *KPTphys;
/* SMP page table page */
extern pt_entry_t *SMPpt;
-extern int _udatasel;
-
struct pcb stoppcbs[MAXCPU];
struct xpcb *stopxpcbs = NULL;
@@ -463,7 +461,8 @@ init_secondary(void)
/* Init tss */
common_tss[cpu] = common_tss[0];
common_tss[cpu].tss_rsp0 = 0; /* not used until after switch */
- common_tss[cpu].tss_iobase = sizeof(struct amd64tss);
+ common_tss[cpu].tss_iobase = sizeof(struct amd64tss) +
+ IOPAGES * PAGE_SIZE;
common_tss[cpu].tss_ist1 = (long)&doublefault_stack[PAGE_SIZE];
/* The NMI stack runs on IST2. */
@@ -472,12 +471,13 @@ init_secondary(void)
/* Prepare private GDT */
gdt_segs[GPROC0_SEL].ssd_base = (long) &common_tss[cpu];
- ssdtosyssd(&gdt_segs[GPROC0_SEL],
- (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
for (x = 0; x < NGDT; x++) {
- if (x != GPROC0_SEL && x != (GPROC0_SEL + 1))
+ if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) &&
+ x != GUSERLDT_SEL && x != (GUSERLDT_SEL + 1))
ssdtosd(&gdt_segs[x], &gdt[NGDT * cpu + x]);
}
+ ssdtosyssd(&gdt_segs[GPROC0_SEL],
+ (struct system_segment_descriptor *)&gdt[NGDT * cpu + GPROC0_SEL]);
ap_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
ap_gdt.rd_base = (long) &gdt[NGDT * cpu];
lgdt(&ap_gdt); /* does magic intra-segment return */
@@ -491,8 +491,14 @@ init_secondary(void)
pc->pc_prvspace = pc;
pc->pc_curthread = 0;
pc->pc_tssp = &common_tss[cpu];
+ pc->pc_commontssp = &common_tss[cpu];
pc->pc_rsp0 = 0;
+ pc->pc_tss = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
+ GPROC0_SEL];
+ pc->pc_fs32p = &gdt[NGDT * cpu + GUFS32_SEL];
pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL];
+ pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu +
+ GUSERLDT_SEL];
/* Save the per-cpu pointer for use by the NMI handler. */
np->np_pcpu = (register_t) pc;
@@ -601,7 +607,7 @@ init_secondary(void)
load_cr4(rcr4() | CR4_PGE);
load_ds(_udatasel);
load_es(_udatasel);
- load_fs(_udatasel);
+ load_fs(_ufssel);
mtx_unlock_spin(&ap_boot_mtx);
/* wait until all the AP's are up */
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index 7f022d0..834dd2c 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -36,16 +36,39 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/sysproto.h>
-#include <machine/specialreg.h>
-#include <machine/sysarch.h>
-#include <machine/pcb.h>
+#include <sys/uio.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/vm_kern.h> /* for kernel_map */
+#include <vm/vm_extern.h>
+
+#include <machine/frame.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <machine/tss.h>
#include <machine/vmparam.h>
+#include <security/audit/audit.h>
+
+int max_ldt_segment = 1024;
+#define LD_PER_PAGE 512
+#define NULL_LDT_BASE ((caddr_t)NULL)
+
+#ifdef notyet
+#ifdef SMP
+static void set_user_ldt_rv(struct vmspace *vmsp);
+#endif
+#endif
+static void user_ldt_derefl(struct proc_ldt *pldt);
+
#ifndef _SYS_SYSPROTO_H_
struct sysarch_args {
int op;
@@ -54,6 +77,83 @@ struct sysarch_args {
#endif
int
+sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space)
+{
+ struct i386_ldt_args *largs, la;
+ struct user_segment_descriptor *lp;
+ int error = 0;
+
+ /*
+ * XXXKIB check that the BSM generation code knows to encode
+ * the op argument.
+ */
+ AUDIT_ARG(cmd, uap->op);
+ if (uap_space == UIO_USERSPACE) {
+ error = copyin(uap->parms, &la, sizeof(struct i386_ldt_args));
+ if (error != 0)
+ return (error);
+ largs = &la;
+ } else
+ largs = (struct i386_ldt_args *)uap->parms;
+ if (largs->num > max_ldt_segment || largs->num <= 0)
+ return (EINVAL);
+
+ switch (uap->op) {
+ case I386_GET_LDT:
+ error = amd64_get_ldt(td, largs);
+ break;
+ case I386_SET_LDT:
+ if (largs->descs != NULL) {
+ lp = (struct user_segment_descriptor *)
+ kmem_alloc(kernel_map, largs->num *
+ sizeof(struct user_segment_descriptor));
+ if (lp == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ error = copyin(largs->descs, lp, largs->num *
+ sizeof(struct user_segment_descriptor));
+ if (error == 0)
+ error = amd64_set_ldt(td, largs, lp);
+ kmem_free(kernel_map, (vm_offset_t)lp, largs->num *
+ sizeof(struct user_segment_descriptor));
+ } else {
+ error = amd64_set_ldt(td, largs, NULL);
+ }
+ break;
+ }
+ return (error);
+}
+
+void
+update_gdt_gsbase(struct thread *td, uint32_t base)
+{
+ struct user_segment_descriptor *sd;
+
+ if (td != curthread)
+ return;
+ critical_enter();
+ sd = PCPU_GET(gs32p);
+ sd->sd_lobase = base & 0xffffff;
+ sd->sd_hibase = (base >> 24) & 0xff;
+ critical_exit();
+}
+
+void
+update_gdt_fsbase(struct thread *td, uint32_t base)
+{
+ struct user_segment_descriptor *sd;
+
+ if (td != curthread)
+ return;
+ critical_enter();
+ sd = PCPU_GET(fs32p);
+ sd->sd_lobase = base & 0xffffff;
+ sd->sd_hibase = (base >> 24) & 0xff;
+ critical_exit();
+}
+
+int
sysarch(td, uap)
struct thread *td;
register struct sysarch_args *uap;
@@ -62,8 +162,36 @@ sysarch(td, uap)
struct pcb *pcb = curthread->td_pcb;
uint32_t i386base;
uint64_t a64base;
+ struct i386_ioperm_args iargs;
+
+ if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
+ return (sysarch_ldt(td, uap, UIO_USERSPACE));
+ /*
+ * XXXKIB check that the BSM generation code knows to encode
+ * the op argument.
+ */
+ AUDIT_ARG(cmd, uap->op);
+ switch (uap->op) {
+ case I386_GET_IOPERM:
+ case I386_SET_IOPERM:
+ if ((error = copyin(uap->parms, &iargs,
+ sizeof(struct i386_ioperm_args))) != 0)
+ return (error);
+ break;
+ default:
+ break;
+ }
- switch(uap->op) {
+ switch (uap->op) {
+ case I386_GET_IOPERM:
+ error = amd64_get_ioperm(td, &iargs);
+ if (error == 0)
+ error = copyout(&iargs, uap->parms,
+ sizeof(struct i386_ioperm_args));
+ break;
+ case I386_SET_IOPERM:
+ error = amd64_set_ioperm(td, &iargs);
+ break;
case I386_GET_FSBASE:
i386base = pcb->pcb_fsbase;
error = copyout(&i386base, uap->parms, sizeof(i386base));
@@ -71,10 +199,9 @@ sysarch(td, uap)
case I386_SET_FSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
- critical_enter();
- wrmsr(MSR_FSBASE, i386base);
pcb->pcb_fsbase = i386base;
- critical_exit();
+ td->td_frame->tf_fs = _ufssel;
+ update_gdt_fsbase(td, i386base);
}
break;
case I386_GET_GSBASE:
@@ -84,10 +211,9 @@ sysarch(td, uap)
case I386_SET_GSBASE:
error = copyin(uap->parms, &i386base, sizeof(i386base));
if (!error) {
- critical_enter();
- wrmsr(MSR_KGSBASE, i386base);
pcb->pcb_gsbase = i386base;
- critical_exit();
+ td->td_frame->tf_gs = _ugssel;
+ update_gdt_gsbase(td, i386base);
}
break;
case AMD64_GET_FSBASE:
@@ -98,13 +224,10 @@ sysarch(td, uap)
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
- critical_enter();
- wrmsr(MSR_FSBASE, a64base);
pcb->pcb_fsbase = a64base;
- critical_exit();
- } else {
+ td->td_frame->tf_fs = _ufssel;
+ } else
error = EINVAL;
- }
}
break;
@@ -116,13 +239,10 @@ sysarch(td, uap)
error = copyin(uap->parms, &a64base, sizeof(a64base));
if (!error) {
if (a64base < VM_MAXUSER_ADDRESS) {
- critical_enter();
- wrmsr(MSR_KGSBASE, a64base);
pcb->pcb_gsbase = a64base;
- critical_exit();
- } else {
+ td->td_frame->tf_gs = _ugssel;
+ } else
error = EINVAL;
- }
}
break;
@@ -132,3 +252,424 @@ sysarch(td, uap)
}
return (error);
}
+
+int
+amd64_set_ioperm(td, uap)
+ struct thread *td;
+ struct i386_ioperm_args *uap;
+{
+ int i, error;
+ char *iomap;
+ struct amd64tss *tssp;
+ struct system_segment_descriptor *tss_sd;
+ u_long *addr;
+ struct pcb *pcb;
+
+ if ((error = priv_check(td, PRIV_IO)) != 0)
+ return (error);
+ if ((error = securelevel_gt(td->td_ucred, 0)) != 0)
+ return (error);
+ if (uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY)
+ return (EINVAL);
+
+ /*
+ * XXX
+ * While this is restricted to root, we should probably figure out
+ * whether any other driver is using this i/o address, as so not to
+ * cause confusion. This probably requires a global 'usage registry'.
+ */
+ pcb = td->td_pcb;
+ if (pcb->pcb_tssp == NULL) {
+ tssp = (struct amd64tss *)kmem_alloc(kernel_map,
+ ctob(IOPAGES+1));
+ if (tssp == NULL)
+ return (ENOMEM);
+ iomap = (char *)&tssp[1];
+ addr = (u_long *)iomap;
+ for (i = 0; i < (ctob(IOPAGES) + 1) / sizeof(u_long); i++)
+ *addr++ = ~0;
+ critical_enter();
+ /* Takes care of tss_rsp0. */
+ memcpy(tssp, &common_tss[PCPU_GET(cpuid)],
+ sizeof(struct amd64tss));
+ tssp->tss_iobase = sizeof(*tssp);
+ pcb->pcb_tssp = tssp;
+ tss_sd = PCPU_GET(tss);
+ tss_sd->sd_lobase = (u_long)tssp & 0xffffff;
+ tss_sd->sd_hibase = ((u_long)tssp >> 24) & 0xfffffffffful;
+ tss_sd->sd_type = SDT_SYSTSS;
+ ltr(GSEL(GPROC0_SEL, SEL_KPL));
+ PCPU_SET(tssp, tssp);
+ critical_exit();
+ } else
+ iomap = (char *)&pcb->pcb_tssp[1];
+ for (i = uap->start; i < uap->start + uap->length; i++) {
+ if (uap->enable)
+ iomap[i >> 3] &= ~(1 << (i & 7));
+ else
+ iomap[i >> 3] |= (1 << (i & 7));
+ }
+ return (error);
+}
+
+int
+amd64_get_ioperm(td, uap)
+ struct thread *td;
+ struct i386_ioperm_args *uap;
+{
+ int i, state;
+ char *iomap;
+
+ if (uap->start >= IOPAGES * PAGE_SIZE * NBBY)
+ return (EINVAL);
+ if (td->td_pcb->pcb_tssp == NULL) {
+ uap->length = 0;
+ goto done;
+ }
+
+ iomap = (char *)&td->td_pcb->pcb_tssp[1];
+
+ i = uap->start;
+ state = (iomap[i >> 3] >> (i & 7)) & 1;
+ uap->enable = !state;
+ uap->length = 1;
+
+ for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) {
+ if (state != ((iomap[i >> 3] >> (i & 7)) & 1))
+ break;
+ uap->length++;
+ }
+
+done:
+ return (0);
+}
+
+/*
+ * Update the GDT entry pointing to the LDT to point to the LDT of the
+ * current process.
+ */
+void
+set_user_ldt(struct mdproc *mdp)
+{
+
+ critical_enter();
+ *PCPU_GET(ldt) = mdp->md_ldt_sd;
+ lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
+ critical_exit();
+}
+
+#ifdef notyet
+#ifdef SMP
+static void
+set_user_ldt_rv(struct vmspace *vmsp)
+{
+ struct thread *td;
+
+ td = curthread;
+ if (vmsp != td->td_proc->p_vmspace)
+ return;
+
+ set_user_ldt(&td->td_proc->p_md);
+}
+#endif
+#endif
+
+struct proc_ldt *
+user_ldt_alloc(struct proc *p, int force)
+{
+ struct proc_ldt *pldt, *new_ldt;
+ struct mdproc *mdp;
+ struct soft_segment_descriptor sldt;
+
+ mtx_assert(&dt_lock, MA_OWNED);
+ mdp = &p->p_md;
+ if (!force && mdp->md_ldt != NULL)
+ return (mdp->md_ldt);
+ mtx_unlock(&dt_lock);
+ new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
+ new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
+ max_ldt_segment * sizeof(struct user_segment_descriptor));
+ if (new_ldt->ldt_base == NULL) {
+ FREE(new_ldt, M_SUBPROC);
+ mtx_lock(&dt_lock);
+ return (NULL);
+ }
+ new_ldt->ldt_refcnt = 1;
+ sldt.ssd_base = (uint64_t)new_ldt->ldt_base;
+ sldt.ssd_limit = max_ldt_segment *
+ sizeof(struct user_segment_descriptor) - 1;
+ sldt.ssd_type = SDT_SYSLDT;
+ sldt.ssd_dpl = SEL_KPL;
+ sldt.ssd_p = 1;
+ sldt.ssd_long = 0;
+ sldt.ssd_def32 = 0;
+ sldt.ssd_gran = 0;
+ mtx_lock(&dt_lock);
+ pldt = mdp->md_ldt;
+ if (pldt != NULL && !force) {
+ kmem_free(kernel_map, (vm_offset_t)new_ldt->ldt_base,
+ max_ldt_segment * sizeof(struct user_segment_descriptor));
+ free(new_ldt, M_SUBPROC);
+ return (pldt);
+ }
+
+ mdp->md_ldt = new_ldt;
+ if (pldt != NULL) {
+ bcopy(pldt->ldt_base, new_ldt->ldt_base, max_ldt_segment *
+ sizeof(struct user_segment_descriptor));
+ user_ldt_derefl(pldt);
+ }
+ ssdtosyssd(&sldt, &p->p_md.md_ldt_sd);
+ if (p == curproc)
+ set_user_ldt(mdp);
+
+ return (mdp->md_ldt);
+}
+
+void
+user_ldt_free(struct thread *td)
+{
+ struct proc *p = td->td_proc;
+ struct mdproc *mdp = &p->p_md;
+ struct proc_ldt *pldt;
+
+ mtx_assert(&dt_lock, MA_OWNED);
+ if ((pldt = mdp->md_ldt) == NULL) {
+ mtx_unlock(&dt_lock);
+ return;
+ }
+
+ mdp->md_ldt = NULL;
+ bzero(&mdp->md_ldt_sd, sizeof(mdp->md_ldt_sd));
+ if (td == curthread)
+ lldt(GSEL(GNULL_SEL, SEL_KPL));
+ user_ldt_deref(pldt);
+}
+
+static void
+user_ldt_derefl(struct proc_ldt *pldt)
+{
+
+ if (--pldt->ldt_refcnt == 0) {
+ kmem_free(kernel_map, (vm_offset_t)pldt->ldt_base,
+ max_ldt_segment * sizeof(struct user_segment_descriptor));
+ free(pldt, M_SUBPROC);
+ }
+}
+
+void
+user_ldt_deref(struct proc_ldt *pldt)
+{
+
+ mtx_assert(&dt_lock, MA_OWNED);
+ user_ldt_derefl(pldt);
+ mtx_unlock(&dt_lock);
+}
+
+/*
+ * Note for the authors of compat layers (linux, etc): copyout() in
+ * the function below is not a problem since it presents data in
+ * arch-specific format (i.e. i386-specific in this case), not in
+ * the OS-specific one.
+ */
+int
+amd64_get_ldt(td, uap)
+ struct thread *td;
+ struct i386_ldt_args *uap;
+{
+ int error = 0;
+ struct proc_ldt *pldt;
+ int num;
+ struct user_segment_descriptor *lp;
+
+#ifdef DEBUG
+ printf("amd64_get_ldt: start=%d num=%d descs=%p\n",
+ uap->start, uap->num, (void *)uap->descs);
+#endif
+
+ if ((pldt = td->td_proc->p_md.md_ldt) != NULL) {
+ lp = &((struct user_segment_descriptor *)(pldt->ldt_base))
+ [uap->start];
+ num = min(uap->num, max_ldt_segment);
+ } else
+ return (EINVAL);
+
+ if ((uap->start > (unsigned int)max_ldt_segment) ||
+ ((unsigned int)num > (unsigned int)max_ldt_segment) ||
+ ((unsigned int)(uap->start + num) > (unsigned int)max_ldt_segment))
+ return(EINVAL);
+
+ error = copyout(lp, uap->descs, num *
+ sizeof(struct user_segment_descriptor));
+ if (!error)
+ td->td_retval[0] = num;
+
+ return(error);
+}
+
+int
+amd64_set_ldt(td, uap, descs)
+ struct thread *td;
+ struct i386_ldt_args *uap;
+ struct user_segment_descriptor *descs;
+{
+ int error = 0, i;
+ int largest_ld;
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt;
+ struct user_segment_descriptor *dp;
+ struct proc *p;
+
+#ifdef DEBUG
+ printf("amd64_set_ldt: start=%d num=%d descs=%p\n",
+ uap->start, uap->num, (void *)uap->descs);
+#endif
+
+ p = td->td_proc;
+ if (descs == NULL) {
+ /* Free descriptors */
+ if (uap->start == 0 && uap->num == 0)
+ uap->num = max_ldt_segment;
+ if (uap->num <= 0)
+ return (EINVAL);
+ if ((pldt = mdp->md_ldt) == NULL ||
+ uap->start >= max_ldt_segment)
+ return (0);
+ largest_ld = uap->start + uap->num;
+ if (largest_ld > max_ldt_segment)
+ largest_ld = max_ldt_segment;
+ i = largest_ld - uap->start;
+ mtx_lock(&dt_lock);
+ bzero(&((struct user_segment_descriptor *)(pldt->ldt_base))
+ [uap->start], sizeof(struct user_segment_descriptor) * i);
+ mtx_unlock(&dt_lock);
+ return (0);
+ }
+
+ if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
+ /* verify range of descriptors to modify */
+ largest_ld = uap->start + uap->num;
+ if (uap->start >= max_ldt_segment ||
+ uap->num < 0 || largest_ld > max_ldt_segment)
+ return (EINVAL);
+ }
+
+ /* Check descriptors for access violations */
+ for (i = 0; i < uap->num; i++) {
+ dp = &descs[i];
+
+ switch (dp->sd_type) {
+ case SDT_SYSNULL: /* system null */
+ dp->sd_p = 0;
+ break;
+ case SDT_SYS286TSS:
+ case SDT_SYSLDT:
+ case SDT_SYS286BSY:
+ case SDT_SYS286CGT:
+ case SDT_SYSTASKGT:
+ case SDT_SYS286IGT:
+ case SDT_SYS286TGT:
+ case SDT_SYSNULL2:
+ case SDT_SYSTSS:
+ case SDT_SYSNULL3:
+ case SDT_SYSBSY:
+ case SDT_SYSCGT:
+ case SDT_SYSNULL4:
+ case SDT_SYSIGT:
+ case SDT_SYSTGT:
+ /* I can't think of any reason to allow a user proc
+ * to create a segment of these types. They are
+ * for OS use only.
+ */
+ return (EACCES);
+ /*NOTREACHED*/
+
+ /* memory segment types */
+ case SDT_MEMEC: /* memory execute only conforming */
+ case SDT_MEMEAC: /* memory execute only accessed conforming */
+ case SDT_MEMERC: /* memory execute read conforming */
+ case SDT_MEMERAC: /* memory execute read accessed conforming */
+ /* Must be "present" if executable and conforming. */
+ if (dp->sd_p == 0)
+ return (EACCES);
+ break;
+ case SDT_MEMRO: /* memory read only */
+ case SDT_MEMROA: /* memory read only accessed */
+ case SDT_MEMRW: /* memory read write */
+ case SDT_MEMRWA: /* memory read write accessed */
+ case SDT_MEMROD: /* memory read only expand dwn limit */
+ case SDT_MEMRODA: /* memory read only expand dwn lim accessed */
+ case SDT_MEMRWD: /* memory read write expand dwn limit */
+ case SDT_MEMRWDA: /* memory read write expand dwn lim acessed */
+ case SDT_MEME: /* memory execute only */
+ case SDT_MEMEA: /* memory execute only accessed */
+ case SDT_MEMER: /* memory execute read */
+ case SDT_MEMERA: /* memory execute read accessed */
+ break;
+ default:
+ return(EINVAL);
+ /*NOTREACHED*/
+ }
+
+ /* Only user (ring-3) descriptors may be present. */
+ if ((dp->sd_p != 0) && (dp->sd_dpl != SEL_UPL))
+ return (EACCES);
+ }
+
+ if (uap->start == LDT_AUTO_ALLOC && uap->num == 1) {
+ /* Allocate a free slot */
+ mtx_lock(&dt_lock);
+ pldt = user_ldt_alloc(p, 0);
+ if (pldt == NULL) {
+ mtx_unlock(&dt_lock);
+ return (ENOMEM);
+ }
+
+ /*
+ * start scanning a bit up to leave room for NVidia and
+ * Wine, which still user the "Blat" method of allocation.
+ */
+ i = 16;
+ dp = &((struct user_segment_descriptor *)(pldt->ldt_base))[i];
+ for (; i < max_ldt_segment; ++i, ++dp) {
+ if (dp->sd_type == SDT_SYSNULL)
+ break;
+ }
+ if (i >= max_ldt_segment) {
+ mtx_unlock(&dt_lock);
+ return (ENOSPC);
+ }
+ uap->start = i;
+ error = amd64_set_ldt_data(td, i, 1, descs);
+ mtx_unlock(&dt_lock);
+ } else {
+ largest_ld = uap->start + uap->num;
+ if (largest_ld > max_ldt_segment)
+ return (EINVAL);
+ mtx_lock(&dt_lock);
+ if (user_ldt_alloc(p, 0) != NULL) {
+ error = amd64_set_ldt_data(td, uap->start, uap->num,
+ descs);
+ }
+ mtx_unlock(&dt_lock);
+ }
+ if (error == 0)
+ td->td_retval[0] = uap->start;
+ return (error);
+}
+
+int
+amd64_set_ldt_data(struct thread *td, int start, int num,
+ struct user_segment_descriptor *descs)
+{
+ struct mdproc *mdp = &td->td_proc->p_md;
+ struct proc_ldt *pldt = mdp->md_ldt;
+
+ mtx_assert(&dt_lock, MA_OWNED);
+
+ /* Fill in range */
+ bcopy(descs,
+ &((struct user_segment_descriptor *)(pldt->ldt_base))[start],
+ num * sizeof(struct user_segment_descriptor));
+ return (0);
+}
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index a519414..467feaf 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -171,6 +171,52 @@ SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
extern char *syscallnames[];
+/* #define DEBUG 1 */
+#ifdef DEBUG
+static void
+report_seg_fault(const char *segn, struct trapframe *frame)
+{
+ struct proc_ldt *pldt;
+ struct trapframe *pf;
+
+ pldt = curproc->p_md.md_ldt;
+ printf("%d: %s load fault %lx %p %d\n",
+ curproc->p_pid, segn, frame->tf_err,
+ pldt != NULL ? pldt->ldt_base : NULL,
+ pldt != NULL ? pldt->ldt_refcnt : 0);
+ kdb_backtrace();
+ pf = (struct trapframe *)frame->tf_rsp;
+ printf("rdi %lx\n", pf->tf_rdi);
+ printf("rsi %lx\n", pf->tf_rsi);
+ printf("rdx %lx\n", pf->tf_rdx);
+ printf("rcx %lx\n", pf->tf_rcx);
+ printf("r8 %lx\n", pf->tf_r8);
+ printf("r9 %lx\n", pf->tf_r9);
+ printf("rax %lx\n", pf->tf_rax);
+ printf("rbx %lx\n", pf->tf_rbx);
+ printf("rbp %lx\n", pf->tf_rbp);
+ printf("r10 %lx\n", pf->tf_r10);
+ printf("r11 %lx\n", pf->tf_r11);
+ printf("r12 %lx\n", pf->tf_r12);
+ printf("r13 %lx\n", pf->tf_r13);
+ printf("r14 %lx\n", pf->tf_r14);
+ printf("r15 %lx\n", pf->tf_r15);
+ printf("fs %x\n", pf->tf_fs);
+ printf("gs %x\n", pf->tf_gs);
+ printf("es %x\n", pf->tf_es);
+ printf("ds %x\n", pf->tf_ds);
+ printf("tno %x\n", pf->tf_trapno);
+ printf("adr %lx\n", pf->tf_addr);
+ printf("flg %x\n", pf->tf_flags);
+ printf("err %lx\n", pf->tf_err);
+ printf("rip %lx\n", pf->tf_rip);
+ printf("cs %lx\n", pf->tf_cs);
+ printf("rfl %lx\n", pf->tf_rflags);
+ printf("rsp %lx\n", pf->tf_rsp);
+ printf("ss %lx\n", pf->tf_ss);
+}
+#endif
+
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
* This common code is called from assembly language IDT gate entry
@@ -258,6 +304,9 @@ trap(struct trapframe *frame)
*/
printf("kernel trap %d with interrupts disabled\n",
type);
+#ifdef DEBUG
+ report_seg_fault("hlt", frame);
+#endif
/*
* We shouldn't enable interrupts while holding a
* spin lock or servicing an NMI.
@@ -470,6 +519,38 @@ trap(struct trapframe *frame)
frame->tf_rip = (long)doreti_iret_fault;
goto out;
}
+ if (frame->tf_rip == (long)ld_ds) {
+#ifdef DEBUG
+ report_seg_fault("ds", frame);
+#endif
+ frame->tf_rip = (long)ds_load_fault;
+ frame->tf_ds = _udatasel;
+ goto out;
+ }
+ if (frame->tf_rip == (long)ld_es) {
+#ifdef DEBUG
+ report_seg_fault("es", frame);
+#endif
+ frame->tf_rip = (long)es_load_fault;
+ frame->tf_es = _udatasel;
+ goto out;
+ }
+ if (frame->tf_rip == (long)ld_fs) {
+#ifdef DEBUG
+ report_seg_fault("fs", frame);
+#endif
+ frame->tf_rip = (long)fs_load_fault;
+ frame->tf_fs = _ufssel;
+ goto out;
+ }
+ if (frame->tf_rip == (long)ld_gs) {
+#ifdef DEBUG
+ report_seg_fault("gs", frame);
+#endif
+ frame->tf_rip = (long)gs_load_fault;
+ frame->tf_gs = _ugssel;
+ goto out;
+ }
if (PCPU_GET(curpcb)->pcb_onfault != NULL) {
frame->tf_rip =
(long)PCPU_GET(curpcb)->pcb_onfault;
@@ -564,6 +645,9 @@ trap(struct trapframe *frame)
trapsignal(td, &ksi);
#ifdef DEBUG
+{
+ register_t rg,rgk, rf;
+
if (type <= MAX_TRAP_MSG) {
uprintf("fatal process exception: %s",
trap_msg[type]);
@@ -571,6 +655,17 @@ trap(struct trapframe *frame)
uprintf(", fault VA = 0x%lx", frame->tf_addr);
uprintf("\n");
}
+ rf = rdmsr(0xc0000100);
+ rg = rdmsr(0xc0000101);
+ rgk = rdmsr(0xc0000102);
+ uprintf("pid %d TRAP %d rip %lx err %lx addr %lx cs %lx ss %lx ds %x "
+ "es %x fs %x fsbase %lx %lx gs %x gsbase %lx %lx %lx\n",
+ curproc->p_pid, type, frame->tf_rip, frame->tf_err,
+ frame->tf_addr,
+ frame->tf_cs, frame->tf_ss, frame->tf_ds, frame->tf_es,
+ frame->tf_fs, td->td_pcb->pcb_fsbase, rf,
+ frame->tf_gs, td->td_pcb->pcb_gsbase, rg, rgk);
+}
#endif
user:
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index f0003ee..928be34 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/specialreg.h>
+#include <machine/tss.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -102,12 +103,24 @@ cpu_fork(td1, p2, td2, flags)
{
register struct proc *p1;
struct pcb *pcb2;
- struct mdproc *mdp2;
+ struct mdproc *mdp1, *mdp2;
+ struct proc_ldt *pldt;
pmap_t pmap2;
p1 = td1->td_proc;
- if ((flags & RFPROC) == 0)
+ if ((flags & RFPROC) == 0) {
+ if ((flags & RFMEM) == 0) {
+ /* unshare user LDT */
+ mdp1 = &p1->p_md;
+ mtx_lock(&dt_lock);
+ if ((pldt = mdp1->md_ldt) != NULL &&
+ pldt->ldt_refcnt > 1 &&
+ user_ldt_alloc(p1, 1) == NULL)
+ panic("could not copy LDT");
+ mtx_unlock(&dt_lock);
+ }
return;
+ }
/* Ensure that p1's pcb is up to date. */
fpuexit(td1);
@@ -170,6 +183,32 @@ cpu_fork(td1, p2, td2, flags)
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ /* As an i386, do not copy io permission bitmap. */
+ pcb2->pcb_tssp = NULL;
+
+ /* Copy the LDT, if necessary. */
+ mdp1 = &td1->td_proc->p_md;
+ mdp2 = &p2->p_md;
+ mtx_lock(&dt_lock);
+ if (mdp1->md_ldt != NULL) {
+ if (flags & RFMEM) {
+ mdp1->md_ldt->ldt_refcnt++;
+ mdp2->md_ldt = mdp1->md_ldt;
+ bcopy(&mdp1->md_ldt_sd, &mdp2->md_ldt_sd, sizeof(struct
+ system_segment_descriptor));
+ } else {
+ mdp2->md_ldt = NULL;
+ mdp2->md_ldt = user_ldt_alloc(p2, 0);
+ if (mdp2->md_ldt == NULL)
+ panic("could not copy LDT");
+ amd64_set_ldt_data(td2, 0, max_ldt_segment,
+ (struct user_segment_descriptor *)
+ mdp1->md_ldt->ldt_base);
+ }
+ } else
+ mdp2->md_ldt = NULL;
+ mtx_unlock(&dt_lock);
+
/*
* Now, cpu_switch() can schedule the new process.
* pcb_rsp is loaded pointing to the cpu_switch() stack frame
@@ -204,25 +243,49 @@ cpu_set_fork_handler(td, func, arg)
void
cpu_exit(struct thread *td)
{
+
+ /*
+ * If this process has a custom LDT, release it.
+ */
+ mtx_lock(&dt_lock);
+ if (td->td_proc->p_md.md_ldt != 0)
+ user_ldt_free(td);
+ else
+ mtx_unlock(&dt_lock);
}
void
cpu_thread_exit(struct thread *td)
{
+ struct pcb *pcb;
if (td == PCPU_GET(fpcurthread))
fpudrop();
+ pcb = td->td_pcb;
+
/* Disable any hardware breakpoints. */
- if (td->td_pcb->pcb_flags & PCB_DBREGS) {
+ if (pcb->pcb_flags & PCB_DBREGS) {
reset_dbregs();
- td->td_pcb->pcb_flags &= ~PCB_DBREGS;
+ pcb->pcb_flags &= ~PCB_DBREGS;
}
}
void
cpu_thread_clean(struct thread *td)
{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+
+ /*
+ * Clean TSS/iomap
+ */
+ if (pcb->pcb_tssp != NULL) {
+ kmem_free(kernel_map, (vm_offset_t)pcb->pcb_tssp,
+ ctob(IOPAGES + 1));
+ pcb->pcb_tssp = NULL;
+ }
}
void
@@ -247,6 +310,8 @@ cpu_thread_alloc(struct thread *td)
void
cpu_thread_free(struct thread *td)
{
+
+ cpu_thread_clean(td);
}
/*
@@ -358,6 +423,11 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
((register_t)stack->ss_sp + stack->ss_size) & ~0x0f;
td->td_frame->tf_rsp -= 8;
td->td_frame->tf_rip = (register_t)entry;
+ td->td_frame->tf_ds = _udatasel;
+ td->td_frame->tf_es = _udatasel;
+ td->td_frame->tf_fs = _ufssel;
+ td->td_frame->tf_gs = _ugssel;
+ td->td_frame->tf_flags = TF_HASSEGS;
/*
* Pass the address of the mailbox for this kse to the uts
@@ -375,25 +445,11 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
#ifdef COMPAT_IA32
if (td->td_proc->p_sysent->sv_flags & SV_ILP32) {
- if (td == curthread) {
- critical_enter();
- td->td_pcb->pcb_gsbase = (register_t)tls_base;
- wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
- critical_exit();
- } else {
- td->td_pcb->pcb_gsbase = (register_t)tls_base;
- }
+ td->td_pcb->pcb_gsbase = (register_t)tls_base;
return (0);
}
#endif
- if (td == curthread) {
- critical_enter();
- td->td_pcb->pcb_fsbase = (register_t)tls_base;
- wrmsr(MSR_FSBASE, td->td_pcb->pcb_fsbase);
- critical_exit();
- } else {
- td->td_pcb->pcb_fsbase = (register_t)tls_base;
- }
+ td->td_pcb->pcb_fsbase = (register_t)tls_base;
return (0);
}
diff --git a/sys/amd64/ia32/ia32_exception.S b/sys/amd64/ia32/ia32_exception.S
index 4820f53..76c5d5a 100644
--- a/sys/amd64/ia32/ia32_exception.S
+++ b/sys/amd64/ia32/ia32_exception.S
@@ -60,6 +60,11 @@ IDTVEC(int0x80_syscall)
movq %r13,TF_R13(%rsp)
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
+ movw %fs,TF_FS(%rsp)
+ movw %gs,TF_GS(%rsp)
+ movw %es,TF_ES(%rsp)
+ movw %ds,TF_DS(%rsp)
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
movq %rsp, %rdi
call ia32_syscall
diff --git a/sys/amd64/ia32/ia32_misc.c b/sys/amd64/ia32/ia32_misc.c
new file mode 100644
index 0000000..2fa1972
--- /dev/null
+++ b/sys/amd64/ia32/ia32_misc.c
@@ -0,0 +1,71 @@
+/*-
+ * Copyright (c) 2009 Konstantin Belousov
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_compat.h"
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+
+#include <machine/cpu.h>
+#include <machine/sysarch.h>
+
+#include <compat/freebsd32/freebsd32_util.h>
+#include <compat/freebsd32/freebsd32.h>
+#include <compat/freebsd32/freebsd32_proto.h>
+
+int
+freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap)
+{
+ struct sysarch_args uap1;
+ struct i386_ldt_args uapl;
+ struct i386_ldt_args32 uapl32;
+ int error;
+
+ if (uap->op == I386_SET_LDT || uap->op == I386_GET_LDT) {
+ if ((error = copyin(uap->parms, &uapl32, sizeof(uapl32))) != 0)
+ return (error);
+ uap1.op = uap->op;
+ uap1.parms = (char *)&uapl;
+ uapl.start = uapl32.start;
+ uapl.descs = (struct user_segment_descriptor *)(uintptr_t)
+ uapl32.descs;
+ uapl.num = uapl32.num;
+ return (sysarch_ldt(td, &uap1, UIO_SYSSPACE));
+ } else {
+ uap1.op = uap->op;
+ uap1.parms = uap->parms;
+ return (sysarch(td, &uap1));
+ }
+}
diff --git a/sys/amd64/ia32/ia32_reg.c b/sys/amd64/ia32/ia32_reg.c
index 8abc6fc..49dd4e2 100644
--- a/sys/amd64/ia32/ia32_reg.c
+++ b/sys/amd64/ia32/ia32_reg.c
@@ -85,9 +85,17 @@ fill_regs32(struct thread *td, struct reg32 *regs)
tp = td->td_frame;
pcb = td->td_pcb;
- regs->r_fs = pcb->pcb_fs;
- regs->r_es = pcb->pcb_es;
- regs->r_ds = pcb->pcb_ds;
+ if (tp->tf_flags & TF_HASSEGS) {
+ regs->r_gs = tp->tf_gs;
+ regs->r_fs = tp->tf_fs;
+ regs->r_es = tp->tf_es;
+ regs->r_ds = tp->tf_ds;
+ } else {
+ regs->r_gs = _ugssel;
+ regs->r_fs = _ufssel;
+ regs->r_es = _udatasel;
+ regs->r_ds = _udatasel;
+ }
regs->r_edi = tp->tf_rdi;
regs->r_esi = tp->tf_rsi;
regs->r_ebp = tp->tf_rbp;
@@ -100,7 +108,6 @@ fill_regs32(struct thread *td, struct reg32 *regs)
regs->r_eflags = tp->tf_rflags;
regs->r_esp = tp->tf_rsp;
regs->r_ss = tp->tf_ss;
- regs->r_gs = pcb->pcb_gs;
return (0);
}
@@ -114,14 +121,11 @@ set_regs32(struct thread *td, struct reg32 *regs)
if (!EFL_SECURE(regs->r_eflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs))
return (EINVAL);
pcb = td->td_pcb;
-#if 0
- load_fs(regs->r_fs);
- pcb->pcb_fs = regs->r_fs;
- load_es(regs->r_es);
- pcb->pcb_es = regs->r_es;
- load_ds(regs->r_ds);
- pcb->pcb_ds = regs->r_ds;
-#endif
+ tp->tf_gs = regs->r_gs;
+ tp->tf_fs = regs->r_fs;
+ tp->tf_es = regs->r_es;
+ tp->tf_ds = regs->r_ds;
+ tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = regs->r_edi;
tp->tf_rsi = regs->r_esi;
tp->tf_rbp = regs->r_ebp;
@@ -134,10 +138,6 @@ set_regs32(struct thread *td, struct reg32 *regs)
tp->tf_rflags = regs->r_eflags;
tp->tf_rsp = regs->r_esp;
tp->tf_ss = regs->r_ss;
-#if 0
- load_gs(regs->r_gs);
- pcb->pcb_gs = regs->r_gs;
-#endif
return (0);
}
@@ -166,7 +166,8 @@ fill_fpregs32(struct thread *td, struct fpreg32 *regs)
penv_87->en_fcs = td->td_frame->tf_cs;
penv_87->en_opcode = penv_xmm->en_opcode;
penv_87->en_foo = penv_xmm->en_rdp;
- penv_87->en_fos = td->td_pcb->pcb_ds;
+ /* Entry into the kernel always sets TF_HASSEGS */
+ penv_87->en_fos = td->td_frame->tf_ds;
/* FPU registers */
for (i = 0; i < 8; ++i)
diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c
index 019faba..37e8013 100644
--- a/sys/amd64/ia32/ia32_signal.c
+++ b/sys/amd64/ia32/ia32_signal.c
@@ -85,8 +85,6 @@ static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *);
static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp);
static int ia32_set_fpcontext(struct thread *td, const struct ia32_mcontext *mcp);
-extern int _ucode32sel, _udatasel;
-
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
@@ -134,10 +132,11 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
PROC_LOCK(curthread->td_proc);
mcp->mc_onstack = sigonstack(tp->tf_rsp);
PROC_UNLOCK(curthread->td_proc);
- mcp->mc_gs = td->td_pcb->pcb_gs;
- mcp->mc_fs = td->td_pcb->pcb_fs;
- mcp->mc_es = td->td_pcb->pcb_es;
- mcp->mc_ds = td->td_pcb->pcb_ds;
+ /* Entry into kernel always sets TF_HASSEGS */
+ mcp->mc_gs = tp->tf_gs;
+ mcp->mc_fs = tp->tf_fs;
+ mcp->mc_es = tp->tf_es;
+ mcp->mc_ds = tp->tf_ds;
mcp->mc_edi = tp->tf_rdi;
mcp->mc_esi = tp->tf_rsi;
mcp->mc_ebp = tp->tf_rbp;
@@ -158,6 +157,8 @@ ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags)
mcp->mc_ss = tp->tf_ss;
mcp->mc_len = sizeof(*mcp);
ia32_get_fpcontext(td, mcp);
+ mcp->mc_fsbase = td->td_pcb->pcb_fsbase;
+ mcp->mc_gsbase = td->td_pcb->pcb_gsbase;
return (0);
}
@@ -182,11 +183,11 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
ret = ia32_set_fpcontext(td, mcp);
if (ret != 0)
return (ret);
-#if 0 /* XXX deal with load_fs() and friends */
+ tp->tf_gs = mcp->mc_gs;
tp->tf_fs = mcp->mc_fs;
tp->tf_es = mcp->mc_es;
tp->tf_ds = mcp->mc_ds;
-#endif
+ tp->tf_flags = TF_HASSEGS;
tp->tf_rdi = mcp->mc_edi;
tp->tf_rsi = mcp->mc_esi;
tp->tf_rbp = mcp->mc_ebp;
@@ -199,9 +200,6 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp)
tp->tf_rflags = rflags;
tp->tf_rsp = mcp->mc_esp;
tp->tf_ss = mcp->mc_ss;
-#if 0 /* XXX deal with load_gs() and friends */
- td->td_pcb->pcb_gs = mcp->mc_gs;
-#endif
td->td_pcb->pcb_flags |= PCB_FULLCTX;
return (0);
}
@@ -326,10 +324,6 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
- sf.sf_uc.uc_mcontext.mc_gs = rgs();
- sf.sf_uc.uc_mcontext.mc_fs = rfs();
- __asm __volatile("mov %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es));
- __asm __volatile("mov %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds));
sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
@@ -345,6 +339,10 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
+ sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
+ sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
+ sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
+ sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@@ -394,10 +392,8 @@ freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
- load_ds(_udatasel);
- td->td_pcb->pcb_ds = _udatasel;
- load_es(_udatasel);
- td->td_pcb->pcb_es = _udatasel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
/* leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
@@ -441,10 +437,6 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0;
- sf.sf_uc.uc_mcontext.mc_gs = rgs();
- sf.sf_uc.uc_mcontext.mc_fs = rfs();
- __asm __volatile("mov %%es,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_es));
- __asm __volatile("mov %%ds,%0" : "=rm" (sf.sf_uc.uc_mcontext.mc_ds));
sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi;
sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi;
sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp;
@@ -460,9 +452,15 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags;
sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp;
sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss;
+ sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds;
+ sf.sf_uc.uc_mcontext.mc_es = regs->tf_es;
+ sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs;
+ sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs;
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext);
fpstate_drop(td);
+ sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase;
+ sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase;
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
@@ -514,11 +512,9 @@ ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
- load_ds(_udatasel);
- td->td_pcb->pcb_ds = _udatasel;
- load_es(_udatasel);
- td->td_pcb->pcb_es = _udatasel;
- /* leave user %fs and %gs untouched */
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ /* XXXKIB leave user %fs and %gs untouched */
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -591,7 +587,6 @@ freebsd4_freebsd32_sigreturn(td, uap)
return (EINVAL);
}
- /* Segment selectors restored by sigtramp.S */
regs->tf_rdi = ucp->uc_mcontext.mc_edi;
regs->tf_rsi = ucp->uc_mcontext.mc_esi;
regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
@@ -606,6 +601,10 @@ freebsd4_freebsd32_sigreturn(td, uap)
regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
regs->tf_rsp = ucp->uc_mcontext.mc_esp;
regs->tf_ss = ucp->uc_mcontext.mc_ss;
+ regs->tf_ds = ucp->uc_mcontext.mc_ds;
+ regs->tf_es = ucp->uc_mcontext.mc_es;
+ regs->tf_fs = ucp->uc_mcontext.mc_fs;
+ regs->tf_gs = ucp->uc_mcontext.mc_gs;
PROC_LOCK(p);
td->td_sigmask = ucp->uc_sigmask;
@@ -678,7 +677,6 @@ freebsd32_sigreturn(td, uap)
if (ret != 0)
return (ret);
- /* Segment selectors restored by sigtramp.S */
regs->tf_rdi = ucp->uc_mcontext.mc_edi;
regs->tf_rsi = ucp->uc_mcontext.mc_esi;
regs->tf_rbp = ucp->uc_mcontext.mc_ebp;
@@ -693,6 +691,11 @@ freebsd32_sigreturn(td, uap)
regs->tf_rflags = ucp->uc_mcontext.mc_eflags;
regs->tf_rsp = ucp->uc_mcontext.mc_esp;
regs->tf_ss = ucp->uc_mcontext.mc_ss;
+ regs->tf_ds = ucp->uc_mcontext.mc_ds;
+ regs->tf_es = ucp->uc_mcontext.mc_es;
+ regs->tf_fs = ucp->uc_mcontext.mc_fs;
+ regs->tf_gs = ucp->uc_mcontext.mc_gs;
+ regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
td->td_sigmask = ucp->uc_sigmask;
@@ -715,20 +718,14 @@ ia32_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
- critical_enter();
- wrmsr(MSR_FSBASE, 0);
- wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
+ mtx_lock(&dt_lock);
+ if (td->td_proc->p_md.md_ldt != NULL)
+ user_ldt_free(td);
+ else
+ mtx_unlock(&dt_lock);
+
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
- critical_exit();
- load_ds(_udatasel);
- load_es(_udatasel);
- load_fs(_udatasel);
- load_gs(_udatasel);
- pcb->pcb_ds = _udatasel;
- pcb->pcb_es = _udatasel;
- pcb->pcb_fs = _udatasel;
- pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __INITIAL_FPUCW_I386__;
bzero((char *)regs, sizeof(struct trapframe));
@@ -738,6 +735,12 @@ ia32_setregs(td, entry, stack, ps_strings)
regs->tf_ss = _udatasel;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
+
load_cr0(rcr0() | CR0_MP | CR0_TS);
fpstate_drop(td);
diff --git a/sys/amd64/ia32/ia32_sigtramp.S b/sys/amd64/ia32/ia32_sigtramp.S
index 1cd220a..9455169 100644
--- a/sys/amd64/ia32/ia32_sigtramp.S
+++ b/sys/amd64/ia32/ia32_sigtramp.S
@@ -45,8 +45,6 @@ ia32_sigcode:
calll *IA32_SIGF_HANDLER(%esp)
leal IA32_SIGF_UC(%esp),%eax /* get ucontext */
pushl %eax
- mov IA32_UC_ES(%eax),%es /* restore %es */
- mov IA32_UC_DS(%eax),%ds /* restore %ds */
movl $SYS_sigreturn,%eax
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */
@@ -60,8 +58,6 @@ freebsd4_ia32_sigcode:
calll *IA32_SIGF_HANDLER(%esp)
leal IA32_SIGF_UC4(%esp),%eax/* get ucontext */
pushl %eax
- mov IA32_UC4_ES(%eax),%es /* restore %es */
- mov IA32_UC4_DS(%eax),%ds /* restore %ds */
movl $344,%eax /* 4.x SYS_sigreturn */
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */
diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h
index 788f39f..0bf0029 100644
--- a/sys/amd64/include/asmacros.h
+++ b/sys/amd64/include/asmacros.h
@@ -161,7 +161,12 @@
movq %r12,TF_R12(%rsp) ; \
movq %r13,TF_R13(%rsp) ; \
movq %r14,TF_R14(%rsp) ; \
- movq %r15,TF_R15(%rsp)
+ movq %r15,TF_R15(%rsp) ; \
+ movw %fs,TF_FS(%rsp) ; \
+ movw %gs,TF_GS(%rsp) ; \
+ movw %es,TF_ES(%rsp) ; \
+ movw %ds,TF_DS(%rsp) ; \
+ movl $TF_HASSEGS,TF_FLAGS(%rsp)
#define POP_FRAME \
movq TF_RDI(%rsp),%rdi ; \
diff --git a/sys/amd64/include/frame.h b/sys/amd64/include/frame.h
index 26c9dd0..12722a4 100644
--- a/sys/amd64/include/frame.h
+++ b/sys/amd64/include/frame.h
@@ -64,9 +64,13 @@ struct trapframe {
register_t tf_r13;
register_t tf_r14;
register_t tf_r15;
- register_t tf_trapno;
+ uint32_t tf_trapno;
+ uint16_t tf_fs;
+ uint16_t tf_gs;
register_t tf_addr;
- register_t tf_flags;
+ uint32_t tf_flags;
+ uint16_t tf_es;
+ uint16_t tf_ds;
/* below portion defined in hardware */
register_t tf_err;
register_t tf_rip;
@@ -76,4 +80,7 @@ struct trapframe {
register_t tf_ss;
};
+#define TF_HASSEGS 0x1
+/* #define _MC_HASBASES 0x2 */
+
#endif /* _MACHINE_FRAME_H_ */
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 2125b9f..892e19d 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -60,6 +60,11 @@ extern char sigcode[];
extern int szsigcode;
extern uint64_t *vm_page_dump;
extern int vm_page_dump_size;
+extern int _udatasel;
+extern int _ucodesel;
+extern int _ucode32sel;
+extern int _ufssel;
+extern int _ugssel;
typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
struct thread;
@@ -72,6 +77,14 @@ void busdma_swi(void);
void cpu_setregs(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
+void ld_ds(void) __asm(__STRING(ld_ds));
+void ld_es(void) __asm(__STRING(ld_es));
+void ld_fs(void) __asm(__STRING(ld_fs));
+void ld_gs(void) __asm(__STRING(ld_gs));
+void ds_load_fault(void) __asm(__STRING(ds_load_fault));
+void es_load_fault(void) __asm(__STRING(es_load_fault));
+void fs_load_fault(void) __asm(__STRING(fs_load_fault));
+void gs_load_fault(void) __asm(__STRING(gs_load_fault));
void dump_add_page(vm_paddr_t);
void dump_drop_page(vm_paddr_t);
void initializecpu(void);
diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h
index 2e2ca87..39ca832 100644
--- a/sys/amd64/include/pcb.h
+++ b/sys/amd64/include/pcb.h
@@ -62,10 +62,6 @@ struct pcb {
#define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */
#define PCB_FULLCTX 0x80 /* full context restore on sysret */
- u_int32_t pcb_ds;
- u_int32_t pcb_es;
- u_int32_t pcb_fs;
- u_int32_t pcb_gs;
u_int64_t pcb_dr0;
u_int64_t pcb_dr1;
u_int64_t pcb_dr2;
@@ -80,6 +76,8 @@ struct pcb {
/* 32-bit segment descriptor */
struct user_segment_descriptor pcb_gs32sd;
+ /* local tss, with i/o bitmap; NULL for common */
+ struct amd64tss *pcb_tssp;
};
struct xpcb {
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 23818ca..139281a 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -62,12 +62,20 @@
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
struct pmap *pc_curpmap; \
- struct amd64tss *pc_tssp; \
+ struct amd64tss *pc_tssp; /* TSS segment active on CPU */ \
+ struct amd64tss *pc_commontssp;/* Common TSS for the CPU */ \
register_t pc_rsp0; \
register_t pc_scratch_rsp; /* User %rsp in syscall */ \
u_int pc_apic_id; \
u_int pc_acpi_id; /* ACPI CPU id */ \
- struct user_segment_descriptor *pc_gs32p \
+ /* Pointer to the CPU %fs descriptor */ \
+ struct user_segment_descriptor *pc_fs32p; \
+ /* Pointer to the CPU %gs descriptor */ \
+ struct user_segment_descriptor *pc_gs32p; \
+ /* Pointer to the CPU LDT descriptor */ \
+ struct system_segment_descriptor *pc_ldt; \
+ /* Pointer to the CPU TSS descriptor */ \
+ struct system_segment_descriptor *pc_tss
PCPU_XEN_FIELDS
#ifdef _KERNEL
diff --git a/sys/amd64/include/proc.h b/sys/amd64/include/proc.h
index a3ebd79..33d5181 100644
--- a/sys/amd64/include/proc.h
+++ b/sys/amd64/include/proc.h
@@ -33,6 +33,13 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#include <machine/segments.h>
+
+struct proc_ldt {
+ caddr_t ldt_base;
+ int ldt_refcnt;
+};
+
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@@ -42,6 +49,8 @@ struct mdthread {
};
struct mdproc {
+ struct proc_ldt *md_ldt; /* (t) per-process ldt */
+ struct system_segment_descriptor md_ldt_sd;
};
#ifdef _KERNEL
@@ -55,6 +64,18 @@ struct mdproc {
(char *)&td; \
} while (0)
+void set_user_ldt(struct mdproc *);
+struct proc_ldt *user_ldt_alloc(struct proc *, int);
+void user_ldt_free(struct thread *);
+void user_ldt_deref(struct proc_ldt *);
+struct sysarch_args;
+int sysarch_ldt(struct thread *td, struct sysarch_args *uap, int uap_space);
+int amd64_set_ldt_data(struct thread *td, int start, int num,
+ struct user_segment_descriptor *descs);
+
+extern struct mtx dt_lock;
+extern int max_ldt_segment;
+
#endif /* _KERNEL */
#endif /* !_MACHINE_PROC_H_ */
diff --git a/sys/amd64/include/segments.h b/sys/amd64/include/segments.h
index cab7554..3dca80a 100644
--- a/sys/amd64/include/segments.h
+++ b/sys/amd64/include/segments.h
@@ -239,6 +239,9 @@ void ssdtosd(struct soft_segment_descriptor *ssdp,
struct user_segment_descriptor *sdp);
void ssdtosyssd(struct soft_segment_descriptor *ssdp,
struct system_segment_descriptor *sdp);
+void update_gdt_gsbase(struct thread *td, uint32_t base);
+void update_gdt_fsbase(struct thread *td, uint32_t base);
+
#endif /* _KERNEL */
#endif /* !_MACHINE_SEGMENTS_H_ */
diff --git a/sys/amd64/include/sysarch.h b/sys/amd64/include/sysarch.h
index 2b0d0a6..6c3e6c9 100644
--- a/sys/amd64/include/sysarch.h
+++ b/sys/amd64/include/sysarch.h
@@ -77,6 +77,15 @@ int amd64_set_fsbase(void *);
int amd64_set_gsbase(void *);
int sysarch(int, void *);
__END_DECLS
+#else
+struct thread;
+union descriptor;
+
+int amd64_get_ldt(struct thread *, struct i386_ldt_args *);
+int amd64_set_ldt(struct thread *, struct i386_ldt_args *,
+ struct user_segment_descriptor *);
+int amd64_get_ioperm(struct thread *, struct i386_ioperm_args *);
+int amd64_set_ioperm(struct thread *, struct i386_ioperm_args *);
#endif
#endif /* !_MACHINE_SYSARCH_H_ */
diff --git a/sys/amd64/linux32/linux32_locore.s b/sys/amd64/linux32/linux32_locore.s
index 6045925..36e1abf 100644
--- a/sys/amd64/linux32/linux32_locore.s
+++ b/sys/amd64/linux32/linux32_locore.s
@@ -11,8 +11,6 @@
NON_GPROF_ENTRY(linux_sigcode)
call *LINUX_SIGF_HANDLER(%esp)
leal LINUX_SIGF_SC(%esp),%ebx /* linux scp */
- mov LINUX_SC_ES(%ebx),%es
- mov LINUX_SC_DS(%ebx),%ds
movl %esp, %ebx /* pass sigframe */
push %eax /* fake ret addr */
movl $LINUX_SYS_linux_sigreturn,%eax /* linux_sigreturn() */
@@ -24,8 +22,6 @@ linux_rt_sigcode:
call *LINUX_RT_SIGF_HANDLER(%esp)
leal LINUX_RT_SIGF_UC(%esp),%ebx /* linux ucp */
leal LINUX_RT_SIGF_SC(%ebx),%ecx /* linux sigcontext */
- mov LINUX_SC_ES(%ecx),%es
- mov LINUX_SC_DS(%ecx),%ds
push %eax /* fake ret addr */
movl $LINUX_SYS_linux_rt_sigreturn,%eax /* linux_rt_sigreturn() */
int $0x80 /* enter kernel with args */
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index eb91623..42ea070 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -716,8 +716,8 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
sd.sd_long, sd.sd_def32, sd.sd_gran);
#endif
td2->td_pcb->pcb_gsbase = (register_t)info.base_addr;
- td2->td_pcb->pcb_gs32sd = sd;
- td2->td_pcb->pcb_gs = GSEL(GUGS32_SEL, SEL_UPL);
+/* XXXKIB td2->td_pcb->pcb_gs32sd = sd; */
+ td2->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL);
td2->td_pcb->pcb_flags |= PCB_GS32BIT | PCB_32BIT;
}
}
@@ -1359,12 +1359,9 @@ linux_set_thread_area(struct thread *td,
sd.sd_gran);
#endif
- critical_enter();
td->td_pcb->pcb_gsbase = (register_t)info.base_addr;
- td->td_pcb->pcb_gs32sd = *PCPU_GET(gs32p) = sd;
td->td_pcb->pcb_flags |= PCB_32BIT | PCB_GS32BIT;
- wrmsr(MSR_KGSBASE, td->td_pcb->pcb_gsbase);
- critical_exit();
+ update_gdt_gsbase(td, info.base_addr);
return (0);
}
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 3ed65eb..925d2e1 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -290,7 +290,6 @@ elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
return 0;
}
-extern int _ucodesel, _ucode32sel, _udatasel;
extern unsigned long linux_sznonrtsigcode;
static void
@@ -360,13 +359,7 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
- frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
- frame.sf_sc.uc_mcontext.sc_gs = rgs();
- frame.sf_sc.uc_mcontext.sc_fs = rfs();
- __asm __volatile("mov %%es,%0" :
- "=rm" (frame.sf_sc.uc_mcontext.sc_es));
- __asm __volatile("mov %%ds,%0" :
- "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
+ frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0];
frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi;
frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi;
frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp;
@@ -376,6 +369,10 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax;
frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip;
frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs;
+ frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs;
+ frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs;
+ frame.sf_sc.uc_mcontext.sc_es = regs->tf_es;
+ frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds;
frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss;
@@ -413,11 +410,11 @@ linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
- load_ds(_udatasel);
- td->td_pcb->pcb_ds = _udatasel;
- load_es(_udatasel);
- td->td_pcb->pcb_es = _udatasel;
- /* leave user %fs and %gs untouched */
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -495,10 +492,10 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
* Build the signal context to be used by sigreturn.
*/
frame.sf_sc.sc_mask = lmask.__bits[0];
- frame.sf_sc.sc_gs = rgs();
- frame.sf_sc.sc_fs = rfs();
- __asm __volatile("mov %%es,%0" : "=rm" (frame.sf_sc.sc_es));
- __asm __volatile("mov %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
+ frame.sf_sc.sc_gs = regs->tf_gs;
+ frame.sf_sc.sc_fs = regs->tf_fs;
+ frame.sf_sc.sc_es = regs->tf_es;
+ frame.sf_sc.sc_ds = regs->tf_ds;
frame.sf_sc.sc_edi = regs->tf_rdi;
frame.sf_sc.sc_esi = regs->tf_rsi;
frame.sf_sc.sc_ebp = regs->tf_rbp;
@@ -535,11 +532,11 @@ linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
regs->tf_rflags &= ~(PSL_T | PSL_D);
regs->tf_cs = _ucode32sel;
regs->tf_ss = _udatasel;
- load_ds(_udatasel);
- td->td_pcb->pcb_ds = _udatasel;
- load_es(_udatasel);
- td->td_pcb->pcb_es = _udatasel;
- /* leave user %fs and %gs untouched */
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
PROC_LOCK(p);
mtx_lock(&psp->ps_mtx);
}
@@ -624,7 +621,6 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
/*
* Restore signal context.
*/
- /* Selectors were restored by the trampoline. */
regs->tf_rdi = frame.sf_sc.sc_edi;
regs->tf_rsi = frame.sf_sc.sc_esi;
regs->tf_rbp = frame.sf_sc.sc_ebp;
@@ -634,6 +630,10 @@ linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
regs->tf_rax = frame.sf_sc.sc_eax;
regs->tf_rip = frame.sf_sc.sc_eip;
regs->tf_cs = frame.sf_sc.sc_cs;
+ regs->tf_ds = frame.sf_sc.sc_ds;
+ regs->tf_es = frame.sf_sc.sc_es;
+ regs->tf_fs = frame.sf_sc.sc_fs;
+ regs->tf_gs = frame.sf_sc.sc_gs;
regs->tf_rflags = eflags;
regs->tf_rsp = frame.sf_sc.sc_esp_at_signal;
regs->tf_ss = frame.sf_sc.sc_ss;
@@ -722,7 +722,10 @@ linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
/*
* Restore signal context
*/
- /* Selectors were restored by the trampoline. */
+ regs->tf_gs = context->sc_gs;
+ regs->tf_fs = context->sc_fs;
+ regs->tf_es = context->sc_es;
+ regs->tf_ds = context->sc_ds;
regs->tf_rdi = context->sc_edi;
regs->tf_rsi = context->sc_esi;
regs->tf_rbp = context->sc_ebp;
@@ -827,27 +830,30 @@ exec_linux_setregs(td, entry, stack, ps_strings)
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ mtx_lock(&dt_lock);
+ if (td->td_proc->p_md.md_ldt != NULL)
+ user_ldt_free(td);
+ else
+ mtx_unlock(&dt_lock);
+
critical_enter();
wrmsr(MSR_FSBASE, 0);
wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
critical_exit();
- load_ds(_udatasel);
- load_es(_udatasel);
- load_fs(_udatasel);
- load_gs(_udatasel);
- pcb->pcb_ds = _udatasel;
- pcb->pcb_es = _udatasel;
- pcb->pcb_fs = _udatasel;
- pcb->pcb_gs = _udatasel;
pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
regs->tf_rsp = stack;
regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
+ regs->tf_gs = _ugssel;
+ regs->tf_fs = _ufssel;
+ regs->tf_es = _udatasel;
+ regs->tf_ds = _udatasel;
regs->tf_ss = _udatasel;
+ regs->tf_flags = TF_HASSEGS;
regs->tf_cs = _ucode32sel;
regs->tf_rbx = ps_strings;
load_cr0(rcr0() | CR0_MP | CR0_TS);
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index fa95752..56e17d1 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -232,6 +232,7 @@ amd64/ia32/ia32_reg.c optional compat_ia32
amd64/ia32/ia32_signal.c optional compat_ia32
amd64/ia32/ia32_sigtramp.S optional compat_ia32
amd64/ia32/ia32_syscall.c optional compat_ia32
+amd64/ia32/ia32_misc.c optional compat_ia32
compat/freebsd32/freebsd32_ioctl.c optional compat_ia32
compat/freebsd32/freebsd32_misc.c optional compat_ia32
compat/freebsd32/freebsd32_syscalls.c optional compat_ia32
OpenPOWER on IntegriCloud