diff options
author | marcel <marcel@FreeBSD.org> | 2003-10-28 19:38:26 +0000 |
---|---|---|
committer | marcel <marcel@FreeBSD.org> | 2003-10-28 19:38:26 +0000 |
commit | ba29587a94ca9095fb6130b6ebbd3c09132e797a (patch) | |
tree | e7f6843001d623637616bc057a791d8a87b39b7e /sys/ia64 | |
parent | b0cc5e450bc6c18f3bfbaff4380a063a7868c0f9 (diff) | |
download | FreeBSD-src-ba29587a94ca9095fb6130b6ebbd3c09132e797a.zip FreeBSD-src-ba29587a94ca9095fb6130b6ebbd3c09132e797a.tar.gz |
When switching the RSE to use the kernel stack as backing store, keep
the RNAT bit index constant. The net effect of this is that there's
no discontinuity WRT NaT collections which greatly simplifies certain
operations. The cost of this is that there can be up to 504 bytes of
unused stack between the true base of the kernel stack and the start
of the RSE backing store. The cost of adjusting the backing store
pointer to keep the RNAT bit index constant, for each kernel entry,
is negligible.
The primary reasons for this change are:
1. Asynchronuous contexts in KSE processes have the disadvantage of
having to copy the dirty registers from the kernel stack onto the
user stack. The implementation we had so far copied the registers
one at a time without calculating NaT collection values. A process
that used speculation would not work. Now that the RNAT bit index
is constant, we can block-copy the registers from the kernel stack
to the user stack without having to worry about NaT collections.
They will be in the right place on the user stack.
2. The ndirty field in the trapframe is now also usable in userland.
This was previously not the case because ndirty also includes the
space occupied by NaT collections. The value could be off by 8,
depending on the discontinuity. Now that the RNAT bit index is
contants, we have exactly the same number of NaT collection points
on the kernel stack as we would have had on the user stack if we
didn't switch backing stores.
3. Debuggers and other applications that use ptrace(2) can now copy
the dirty registers from the kernel stack (using ptrace(2)) and
copy them whereever they want them (onto the user stack of the
inferior as might be the case for gdb) without having to worry
about NaT collections in the same way the kernel doesn't have to
worry about them.
There's a second order effect caused by the randomization of the
base of the backing store, for it depends on the number of dirty
registers the processor happened to have at the time of entry into
the kernel. The second order effect is that the RSE will have a
better cache utilization as compared to having the backing store
always aligned at page boundaries. This has not been measured and
may be in practice only minimally beneficial, if at all measurable.
Diffstat (limited to 'sys/ia64')
-rw-r--r-- | sys/ia64/ia64/elf_machdep.c | 32 | ||||
-rw-r--r-- | sys/ia64/ia64/exception.S | 5 | ||||
-rw-r--r-- | sys/ia64/ia64/machdep.c | 36 | ||||
-rw-r--r-- | sys/ia64/ia64/ptrace_machdep.c | 6 | ||||
-rw-r--r-- | sys/ia64/ia64/syscall.S | 21 | ||||
-rw-r--r-- | sys/ia64/ia64/trap.c | 3 | ||||
-rw-r--r-- | sys/ia64/ia64/vm_machdep.c | 15 |
7 files changed, 59 insertions, 59 deletions
diff --git a/sys/ia64/ia64/elf_machdep.c b/sys/ia64/ia64/elf_machdep.c index a637113..dbb4bf3 100644 --- a/sys/ia64/ia64/elf_machdep.c +++ b/sys/ia64/ia64/elf_machdep.c @@ -101,32 +101,28 @@ static int ia64_coredump(struct thread *td, struct vnode *vp, off_t limit) { struct trapframe *tf; - uint64_t *kstk, *ustk; - uint64_t bspst, ndirty; + uint64_t bspst, kstk, ndirty, rnat; tf = td->td_frame; ndirty = tf->tf_special.ndirty; if (ndirty != 0) { - __asm __volatile("mov ar.rsc=0;;"); - __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); + kstk = td->td_kstack + (tf->tf_special.bspstore & 0x1ffUL); + __asm __volatile("mov ar.rsc=0;;"); + __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); /* Make sure we have all the user registers written out. */ - if (bspst - td->td_kstack < ndirty) + if (bspst - kstk < ndirty) { __asm __volatile("flushrs;;"); - __asm __volatile("mov ar.rsc=3"); - ustk = (uint64_t*)tf->tf_special.bspstore; - kstk = (uint64_t*)td->td_kstack; - while (ndirty > 0) { - *ustk++ = *kstk++; - if (((uintptr_t)ustk & 0x1ff) == 0x1f8) - *ustk++ = 0; - if (((uintptr_t)kstk & 0x1ff) == 0x1f8) { - kstk++; - ndirty -= 8; - } - ndirty -= 8; + __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); } - tf->tf_special.bspstore = (uintptr_t)ustk; + __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat)); + __asm __volatile("mov ar.rsc=3"); + copyout((void*)kstk, (void*)tf->tf_special.bspstore, ndirty); + kstk += ndirty; + tf->tf_special.bspstore += ndirty; tf->tf_special.ndirty = 0; + tf->tf_special.rnat = + (bspst > kstk && (bspst & 0x1ffUL) < (kstk & 0x1ffUL)) + ? *(uint64_t*)(kstk | 0x1f8UL) : rnat; } return (elf64_coredump(td, vp, limit)); } diff --git a/sys/ia64/ia64/exception.S b/sys/ia64/ia64/exception.S index 2105203..bc51603 100644 --- a/sys/ia64/ia64/exception.S +++ b/sys/ia64/ia64/exception.S @@ -158,9 +158,10 @@ exception_save_restart: ;; } { .mmi +(p13) mov r21=ar.k6 // kernel register stack + ;; st8 [r30]=r18,16 // fpsr -(p13) mov r20=ar.k6 // kernel register stack - nop 0 +(p13) dep r20=r20,r21,0,9 // align dirty registers ;; } // r20=bspstore, r22=iip, r23=ipsr diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 9e71d6e..fb4d5bf 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -761,8 +761,6 @@ ia64_init(void) * Set the kernel sp, reserving space for an (empty) trapframe, * and make proc0's trapframe pointer point to it for sanity. * Initialise proc0's backing store to start after u area. - * - * XXX what is all this +/- 16 stuff? */ thread0.td_frame = (struct trapframe *)thread0.td_pcb - 1; thread0.td_frame->tf_length = sizeof(struct trapframe); @@ -1079,35 +1077,28 @@ get_mcontext(struct thread *td, mcontext_t *mc, int clear_ret) { struct _special s; struct trapframe *tf; - uint64_t bspst, *kstk, *ustk; + uint64_t bspst, kstk, rnat; tf = td->td_frame; bzero(mc, sizeof(*mc)); s = tf->tf_special; if (s.ndirty != 0) { + kstk = td->td_kstack + (s.bspstore & 0x1ffUL); __asm __volatile("mov ar.rsc=0;;"); __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); /* Make sure we have all the user registers written out. */ - if (bspst - td->td_kstack < s.ndirty) + if (bspst - kstk < s.ndirty) { __asm __volatile("flushrs;;"); - __asm __volatile("mov ar.rsc=3"); - kstk = (uint64_t*)td->td_kstack; - ustk = (uint64_t*)s.bspstore; - if ((s.bspstore & 0x1ff) == 0x1f8) { - suword64(ustk++, s.rnat); - s.rnat = 0; + __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); } - while (s.ndirty > 0) { - suword64(ustk++, *kstk++); - if (((uintptr_t)ustk & 0x1ff) == 0x1f8) - suword64(ustk++, 0); - if (((uintptr_t)kstk & 0x1ff) == 0x1f8) { - kstk++; - s.ndirty -= 8; - } - s.ndirty -= 8; - } - s.bspstore = (uintptr_t)ustk; + __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat)); + __asm __volatile("mov ar.rsc=3"); + copyout((void*)kstk, (void*)s.bspstore, s.ndirty); + kstk += s.ndirty; + s.bspstore += s.ndirty; + s.ndirty = 0; + s.rnat = (bspst > kstk && (bspst & 0x1ffUL) < (kstk & 0x1ffUL)) + ? *(uint64_t*)(kstk | 0x1f8UL) : rnat; } if (tf->tf_flags & FRAME_SYSCALL) { /* @@ -1196,7 +1187,8 @@ exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) uint64_t *ksttop, *kst; tf = td->td_frame; - ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty); + ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty + + (tf->tf_special.bspstore & 0x1ffUL)); /* * We can ignore up to 8KB of dirty registers by masking off the diff --git a/sys/ia64/ia64/ptrace_machdep.c b/sys/ia64/ia64/ptrace_machdep.c index 0f269e2..5e64cc5 100644 --- a/sys/ia64/ia64/ptrace_machdep.c +++ b/sys/ia64/ia64/ptrace_machdep.c @@ -46,13 +46,15 @@ cpu_ptrace(struct thread *td, int req, void *addr, int data) switch (req) { case PT_GETKSTACK: if (data >= 0 && data < (tf->tf_special.ndirty >> 3)) { - kstack = (uint64_t*)td->td_kstack; + kstack = (uint64_t*)(td->td_kstack + + (tf->tf_special.bspstore & 0x1ffUL)); error = copyout(kstack + data, addr, 8); } break; case PT_SETKSTACK: if (data >= 0 && data < (tf->tf_special.ndirty >> 3)) { - kstack = (uint64_t*)td->td_kstack; + kstack = (uint64_t*)(td->td_kstack + + (tf->tf_special.bspstore & 0x1ffUL)); error = copyin(addr, kstack + data, 8); } break; diff --git a/sys/ia64/ia64/syscall.S b/sys/ia64/ia64/syscall.S index e66d12b..1ea87ca 100644 --- a/sys/ia64/ia64/syscall.S +++ b/sys/ia64/ia64/syscall.S @@ -259,27 +259,34 @@ ENTRY(epc_syscall, 8) } { .mmi mov r18=ar.bspstore + ;; mov r19=ar.rnat - add r30=-SIZEOF_TRAPFRAME,r14 + dep r15=r18,r15,0,9 ;; } { .mmi mov ar.bspstore=r15 - mov r13=ar.k4 - dep r30=0,r30,0,10 + add r30=-SIZEOF_TRAPFRAME,r14 + mov r20=sp ;; } { .mii - mov r20=sp - add r31=8,r30 + mov r13=ar.k4 + dep r30=0,r30,0,10 + ;; add sp=-16,r30 ;; } -{ .mmi +{ .mib mov r21=ar.unat + add r31=8,r30 + nop 0 + ;; +} +{ .mib mov r22=ar.fpsr sub r29=r14,r30 - ;; + nop 0 } { .mmi mov r23=ar.bsp diff --git a/sys/ia64/ia64/trap.c b/sys/ia64/ia64/trap.c index 4394ede..9124a89 100644 --- a/sys/ia64/ia64/trap.c +++ b/sys/ia64/ia64/trap.c @@ -879,7 +879,8 @@ break_syscall(struct trapframe *tf) */ tfp = &tf->tf_scratch.gr16; nargs = tf->tf_special.cfm & 0x7f; - bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty); + bsp = (uint64_t*)(curthread->td_kstack + tf->tf_special.ndirty + + (tf->tf_special.bspstore & 0x1ffUL)); bsp -= (((uintptr_t)bsp & 0x1ff) < (nargs << 3)) ? (nargs + 1): nargs; while (nargs--) { *tfp++ = *bsp++; diff --git a/sys/ia64/ia64/vm_machdep.c b/sys/ia64/ia64/vm_machdep.c index 4e02b8a..15736f1 100644 --- a/sys/ia64/ia64/vm_machdep.c +++ b/sys/ia64/ia64/vm_machdep.c @@ -159,6 +159,7 @@ cpu_set_upcall(struct thread *td, struct thread *td0) tf->tf_length = sizeof(struct trapframe); tf->tf_flags = FRAME_SYSCALL; tf->tf_special.ndirty = 0; + tf->tf_special.bspstore &= ~0x1ffUL; tf->tf_scratch.gr8 = 0; tf->tf_scratch.gr9 = 1; tf->tf_scratch.gr10 = 0; @@ -182,12 +183,12 @@ cpu_set_upcall_kse(struct thread *td, struct kse_upcall *ku) uint64_t ndirty, stack; tf = td->td_frame; + ndirty = tf->tf_special.ndirty + (tf->tf_special.bspstore & 0x1ffUL); - KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0, + KASSERT((ndirty & ~PAGE_MASK) == 0, ("Whoa there! We have more than 8KB of dirty registers!")); fd = ku->ku_func; - ndirty = tf->tf_special.ndirty; stack = (uint64_t)ku->ku_stack.ss_sp; bzero(&tf->tf_special, sizeof(tf->tf_special)); @@ -228,6 +229,7 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2, int flags) { char *stackp; + uint64_t ndirty; KASSERT(td1 == curthread || td1 == &thread0, ("cpu_fork: td1 not curthread and not thread0")); @@ -263,9 +265,9 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2, td2->td_frame = (struct trapframe *)stackp; bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe)); td2->td_frame->tf_length = sizeof(struct trapframe); - - bcopy((void*)td1->td_kstack, (void*)td2->td_kstack, - td2->td_frame->tf_special.ndirty); + ndirty = td2->td_frame->tf_special.ndirty + + (td2->td_frame->tf_special.bspstore & 0x1ffUL); + bcopy((void*)td1->td_kstack, (void*)td2->td_kstack, ndirty); /* Set-up the return values as expected by the fork() libc stub. */ if (td2->td_frame->tf_special.psr & IA64_PSR_IS) { @@ -277,8 +279,7 @@ cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2, td2->td_frame->tf_scratch.gr10 = 0; } - td2->td_pcb->pcb_special.bspstore = td2->td_kstack + - td2->td_frame->tf_special.ndirty; + td2->td_pcb->pcb_special.bspstore = td2->td_kstack + ndirty; td2->td_pcb->pcb_special.pfs = 0; td2->td_pcb->pcb_current_pmap = vmspace_pmap(td2->td_proc->p_vmspace); |