summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormarcel <marcel@FreeBSD.org>2003-10-28 19:38:26 +0000
committermarcel <marcel@FreeBSD.org>2003-10-28 19:38:26 +0000
commitba29587a94ca9095fb6130b6ebbd3c09132e797a (patch)
treee7f6843001d623637616bc057a791d8a87b39b7e /sys
parentb0cc5e450bc6c18f3bfbaff4380a063a7868c0f9 (diff)
downloadFreeBSD-src-ba29587a94ca9095fb6130b6ebbd3c09132e797a.zip
FreeBSD-src-ba29587a94ca9095fb6130b6ebbd3c09132e797a.tar.gz
When switching the RSE to use the kernel stack as backing store, keep
the RNAT bit index constant. The net effect of this is that there's no discontinuity WRT NaT collections which greatly simplifies certain operations. The cost of this is that there can be up to 504 bytes of unused stack between the true base of the kernel stack and the start of the RSE backing store. The cost of adjusting the backing store pointer to keep the RNAT bit index constant, for each kernel entry, is negligible. The primary reasons for this change are: 1. Asynchronuous contexts in KSE processes have the disadvantage of having to copy the dirty registers from the kernel stack onto the user stack. The implementation we had so far copied the registers one at a time without calculating NaT collection values. A process that used speculation would not work. Now that the RNAT bit index is constant, we can block-copy the registers from the kernel stack to the user stack without having to worry about NaT collections. They will be in the right place on the user stack. 2. The ndirty field in the trapframe is now also usable in userland. This was previously not the case because ndirty also includes the space occupied by NaT collections. The value could be off by 8, depending on the discontinuity. Now that the RNAT bit index is contants, we have exactly the same number of NaT collection points on the kernel stack as we would have had on the user stack if we didn't switch backing stores. 3. Debuggers and other applications that use ptrace(2) can now copy the dirty registers from the kernel stack (using ptrace(2)) and copy them whereever they want them (onto the user stack of the inferior as might be the case for gdb) without having to worry about NaT collections in the same way the kernel doesn't have to worry about them. There's a second order effect caused by the randomization of the base of the backing store, for it depends on the number of dirty registers the processor happened to have at the time of entry into the kernel. The second order effect is that the RSE will have a better cache utilization as compared to having the backing store always aligned at page boundaries. This has not been measured and may be in practice only minimally beneficial, if at all measurable.
Diffstat (limited to 'sys')
-rw-r--r--sys/ia64/ia64/elf_machdep.c32
-rw-r--r--sys/ia64/ia64/exception.S5
-rw-r--r--sys/ia64/ia64/machdep.c36
-rw-r--r--sys/ia64/ia64/ptrace_machdep.c6
-rw-r--r--sys/ia64/ia64/syscall.S21
-rw-r--r--sys/ia64/ia64/trap.c3
-rw-r--r--sys/ia64/ia64/vm_machdep.c15
7 files changed, 59 insertions, 59 deletions
diff --git a/sys/ia64/ia64/elf_machdep.c b/sys/ia64/ia64/elf_machdep.c
index a637113..dbb4bf3 100644
--- a/sys/ia64/ia64/elf_machdep.c
+++ b/sys/ia64/ia64/elf_machdep.c
@@ -101,32 +101,28 @@ static int
ia64_coredump(struct thread *td, struct vnode *vp, off_t limit)
{
struct trapframe *tf;
- uint64_t *kstk, *ustk;
- uint64_t bspst, ndirty;
+ uint64_t bspst, kstk, ndirty, rnat;
tf = td->td_frame;
ndirty = tf->tf_special.ndirty;
if (ndirty != 0) {
- __asm __volatile("mov ar.rsc=0;;");
- __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
+ kstk = td->td_kstack + (tf->tf_special.bspstore & 0x1ffUL);
+ __asm __volatile("mov ar.rsc=0;;");
+ __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
/* Make sure we have all the user registers written out. */
- if (bspst - td->td_kstack < ndirty)
+ if (bspst - kstk < ndirty) {
__asm __volatile("flushrs;;");
- __asm __volatile("mov ar.rsc=3");
- ustk = (uint64_t*)tf->tf_special.bspstore;
- kstk = (uint64_t*)td->td_kstack;
- while (ndirty > 0) {
- *ustk++ = *kstk++;
- if (((uintptr_t)ustk & 0x1ff) == 0x1f8)
- *ustk++ = 0;
- if (((uintptr_t)kstk & 0x1ff) == 0x1f8) {
- kstk++;
- ndirty -= 8;
- }
- ndirty -= 8;
+ __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
}
- tf->tf_special.bspstore = (uintptr_t)ustk;
+ __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat));
+ __asm __volatile("mov ar.rsc=3");
+ copyout((void*)kstk, (void*)tf->tf_special.bspstore, ndirty);
+ kstk += ndirty;
+ tf->tf_special.bspstore += ndirty;
tf->tf_special.ndirty = 0;
+ tf->tf_special.rnat =
+ (bspst > kstk && (bspst & 0x1ffUL) < (kstk & 0x1ffUL))
+ ? *(uint64_t*)(kstk | 0x1f8UL) : rnat;
}
return (elf64_coredump(td, vp, limit));
}
diff --git a/sys/ia64/ia64/exception.S b/sys/ia64/ia64/exception.S
index 2105203..bc51603 100644
--- a/sys/ia64/ia64/exception.S
+++ b/sys/ia64/ia64/exception.S
@@ -158,9 +158,10 @@ exception_save_restart:
;;
}
{ .mmi
+(p13) mov r21=ar.k6 // kernel register stack
+ ;;
st8 [r30]=r18,16 // fpsr
-(p13) mov r20=ar.k6 // kernel register stack
- nop 0
+(p13) dep r20=r20,r21,0,9 // align dirty registers
;;
}
// r20=bspstore, r22=iip, r23=ipsr
diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c
index 9e71d6e..fb4d5bf 100644
--- a/sys/ia64/ia64/machdep.c
+++ b/sys/ia64/ia64/machdep.c
@@ -761,8 +761,6 @@ ia64_init(void)
* Set the kernel sp, reserving space for an (empty) trapframe,
* and make proc0's trapframe pointer point to it for sanity.
* Initialise proc0's backing store to start after u area.
- *
- * XXX what is all this +/- 16 stuff?
*/
thread0.td_frame = (struct trapframe *)thread0.td_pcb - 1;
thread0.td_frame->tf_length = sizeof(struct trapframe);
@@ -1079,35 +1077,28 @@ get_mcontext(struct thread *td, mcontext_t *mc, int clear_ret)
{
struct _special s;
struct trapframe *tf;
- uint64_t bspst, *kstk, *ustk;
+ uint64_t bspst, kstk, rnat;
tf = td->td_frame;
bzero(mc, sizeof(*mc));
s = tf->tf_special;
if (s.ndirty != 0) {
+ kstk = td->td_kstack + (s.bspstore & 0x1ffUL);
__asm __volatile("mov ar.rsc=0;;");
__asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
/* Make sure we have all the user registers written out. */
- if (bspst - td->td_kstack < s.ndirty)
+ if (bspst - kstk < s.ndirty) {
__asm __volatile("flushrs;;");
- __asm __volatile("mov ar.rsc=3");
- kstk = (uint64_t*)td->td_kstack;
- ustk = (uint64_t*)s.bspstore;
- if ((s.bspstore & 0x1ff) == 0x1f8) {
- suword64(ustk++, s.rnat);
- s.rnat = 0;
+ __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
}
- while (s.ndirty > 0) {
- suword64(ustk++, *kstk++);
- if (((uintptr_t)ustk & 0x1ff) == 0x1f8)
- suword64(ustk++, 0);
- if (((uintptr_t)kstk & 0x1ff) == 0x1f8) {
- kstk++;
- s.ndirty -= 8;
- }
- s.ndirty -= 8;
- }
- s.bspstore = (uintptr_t)ustk;
+ __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat));
+ __asm __volatile("mov ar.rsc=3");
+ copyout((void*)kstk, (void*)s.bspstore, s.ndirty);
+ kstk += s.ndirty;
+ s.bspstore += s.ndirty;
+ s.ndirty = 0;
+ s.rnat = (bspst > kstk && (bspst & 0x1ffUL) < (kstk & 0x1ffUL))
+ ? *(uint64_t*)(kstk | 0x1f8UL) : rnat;
}
if (tf->tf_flags & FRAME_SYSCALL) {
/*
@@ -1196,7 +1187,8 @@ exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings)
uint64_t *ksttop, *kst;
tf = td->td_frame;
- ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty);
+ ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty +
+ (tf->tf_special.bspstore & 0x1ffUL));
/*
* We can ignore up to 8KB of dirty registers by masking off the
diff --git a/sys/ia64/ia64/ptrace_machdep.c b/sys/ia64/ia64/ptrace_machdep.c
index 0f269e2..5e64cc5 100644
--- a/sys/ia64/ia64/ptrace_machdep.c
+++ b/sys/ia64/ia64/ptrace_machdep.c
@@ -46,13 +46,15 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data)
switch (req) {
case PT_GETKSTACK:
if (data >= 0 && data < (tf->tf_special.ndirty >> 3)) {
- kstack = (uint64_t*)td->td_kstack;
+ kstack = (uint64_t*)(td->td_kstack +
+ (tf->tf_special.bspstore & 0x1ffUL));
error = copyout(kstack + data, addr, 8);
}
break;
case PT_SETKSTACK:
if (data >= 0 && data < (tf->tf_special.ndirty >> 3)) {
- kstack = (uint64_t*)td->td_kstack;
+ kstack = (uint64_t*)(td->td_kstack +
+ (tf->tf_special.bspstore & 0x1ffUL));
error = copyin(addr, kstack + data, 8);
}
break;
diff --git a/sys/ia64/ia64/syscall.S b/sys/ia64/ia64/syscall.S
index e66d12b..1ea87ca 100644
--- a/sys/ia64/ia64/syscall.S
+++ b/sys/ia64/ia64/syscall.S
@@ -259,27 +259,34 @@ ENTRY(epc_syscall, 8)
}
{ .mmi
mov r18=ar.bspstore
+ ;;
mov r19=ar.rnat
- add r30=-SIZEOF_TRAPFRAME,r14
+ dep r15=r18,r15,0,9
;;
}
{ .mmi
mov ar.bspstore=r15
- mov r13=ar.k4
- dep r30=0,r30,0,10
+ add r30=-SIZEOF_TRAPFRAME,r14
+ mov r20=sp
;;
}
{ .mii
- mov r20=sp
- add r31=8,r30
+ mov r13=ar.k4
+ dep r30=0,r30,0,10
+ ;;
add sp=-16,r30
;;
}
-{ .mmi
+{ .mib
mov r21=ar.unat
+ add r31=8,r30
+ nop 0
+ ;;
+}
+{ .mib
mov r22=ar.fpsr
sub r29=r14,r30
- ;;
+ nop 0
}
{ .mmi
mov r23=ar.bsp
diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c
index 4394ede..9124a89 100644
--- a/sys/ia64/ia64/trap.c
+++ b/sys/ia64/ia64/trap.c
@@ -879,7 +879,8 @@ break_syscall(struct trapframe *tf)
*/
tfp = &tf->tf_scratch.gr16;
nargs = tf->tf_special.cfm & 0x7f;
- bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty);
+ bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty +
+ (tf->tf_special.bspstore & 0x1ffUL));
bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs;
while (nargs--) {
*tfp++ = *bsp++;
diff --git a/sys/ia64/ia64/vm_machdep.c b/sys/ia64/ia64/vm_machdep.c
index 4e02b8a..15736f1 100644
--- a/sys/ia64/ia64/vm_machdep.c
+++ b/sys/ia64/ia64/vm_machdep.c
@@ -159,6 +159,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0)
tf->tf_length = sizeof(struct trapframe);
tf->tf_flags = FRAME_SYSCALL;
tf->tf_special.ndirty = 0;
+ tf->tf_special.bspstore &= ~0x1ffUL;
tf->tf_scratch.gr8 = 0;
tf->tf_scratch.gr9 = 1;
tf->tf_scratch.gr10 = 0;
@@ -182,12 +183,12 @@ cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku)
uint64_t ndirty, stack;
tf = td->td_frame;
+ ndirty = tf->tf_special.ndirty + (tf->tf_special.bspstore & 0x1ffUL);
- KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
+ KASSERT((ndirty & ~PAGE_MASK) == 0,
("Whoa there! We have more than 8KB of dirty registers!"));
fd = ku->ku_func;
- ndirty = tf->tf_special.ndirty;
stack = (uint64_t)ku->ku_stack.ss_sp;
bzero(&tf->tf_special, sizeof(tf->tf_special));
@@ -228,6 +229,7 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2,
int flags)
{
char *stackp;
+ uint64_t ndirty;
KASSERT(td1 == curthread || td1 == &thread0,
("cpu_fork: td1 not curthread and not thread0"));
@@ -263,9 +265,9 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2,
td2->td_frame = (struct trapframe *)stackp;
bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
td2->td_frame->tf_length = sizeof(struct trapframe);
-
- bcopy((void*)td1->td_kstack, (void*)td2->td_kstack,
- td2->td_frame->tf_special.ndirty);
+ ndirty = td2->td_frame->tf_special.ndirty +
+ (td2->td_frame->tf_special.bspstore & 0x1ffUL);
+ bcopy((void*)td1->td_kstack, (void*)td2->td_kstack, ndirty);
/* Set-up the return values as expected by the fork() libc stub. */
if (td2->td_frame->tf_special.psr & IA64_PSR_IS) {
@@ -277,8 +279,7 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2,
td2->td_frame->tf_scratch.gr10 = 0;
}
- td2->td_pcb->pcb_special.bspstore = td2->td_kstack +
- td2->td_frame->tf_special.ndirty;
+ td2->td_pcb->pcb_special.bspstore = td2->td_kstack + ndirty;
td2->td_pcb->pcb_special.pfs = 0;
td2->td_pcb->pcb_current_pmap = vmspace_pmap(td2->td_proc->p_vmspace);
OpenPOWER on IntegriCloud