diff options
author | peter <peter@FreeBSD.org> | 1997-04-07 07:16:06 +0000 |
---|---|---|
committer | peter <peter@FreeBSD.org> | 1997-04-07 07:16:06 +0000 |
commit | ecf50a7463380158994f03aa71d9b084c0c5114a (patch) | |
tree | ec1548851ef256720ebb6254e235cc3d5bc04523 /sys/i386 | |
parent | 237ff29ca4d094b8fbf6c41083b91ddda096ae46 (diff) | |
download | FreeBSD-src-ecf50a7463380158994f03aa71d9b084c0c5114a.zip FreeBSD-src-ecf50a7463380158994f03aa71d9b084c0c5114a.tar.gz |
The biggie: Get rid of the UPAGES from the top of the per-process address
space. (!)
Have each process use the kernel stack and pcb in the kvm space. Since
the stacks are at a different address, we cannot copy the stack at fork()
and allow the child to return up through the function call tree to return
to user mode - create a new execution context and have the new process
begin executing from cpu_switch() and go to user mode directly.
In theory this should speed up fork a bit.
Context switch the tss_esp0 pointer in the common tss. This is a lot
simpler since than swithching the gdt[GPROC0_SEL].sd.sd_base pointer
to each process's tss since the esp0 pointer is a 32 bit pointer, and the
sd_base setting is split into three different bit sections at non-aligned
boundaries and requires a lot of twiddling to reset.
The 8K of memory at the top of the process space is now empty, and unmapped
(and unmappable, it's higher than VM_MAXUSER_ADDRESS).
Simplity the pmap code to manage process contexts, we no longer have to
double map the UPAGES, this simplifies and should measuably speed up fork().
The following parts came from John Dyson:
Set PG_G on the UPAGES that are now in kernel context, and invalidate
them when swapping them out.
Move the upages object (upobj) from the vmspace to the proc structure.
Now that the UPAGES (pcb and kernel stack) are out of user space, make
rfork(..RFMEM..) do what was intended by sharing the vmspace
entirely via reference counting rather than simply inheriting the mappings.
Diffstat (limited to 'sys/i386')
-rw-r--r-- | sys/i386/i386/exception.s | 16 | ||||
-rw-r--r-- | sys/i386/i386/genassym.c | 5 | ||||
-rw-r--r-- | sys/i386/i386/locore.s | 25 | ||||
-rw-r--r-- | sys/i386/i386/machdep.c | 29 | ||||
-rw-r--r-- | sys/i386/i386/pmap.c | 69 | ||||
-rw-r--r-- | sys/i386/i386/swtch.s | 43 | ||||
-rw-r--r-- | sys/i386/i386/symbols.raw | 3 | ||||
-rw-r--r-- | sys/i386/i386/trap.c | 21 | ||||
-rw-r--r-- | sys/i386/i386/vm_machdep.c | 98 | ||||
-rw-r--r-- | sys/i386/include/cpu.h | 6 |
10 files changed, 198 insertions, 117 deletions
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s index 4f53156..5ecc160 100644 --- a/sys/i386/i386/exception.s +++ b/sys/i386/i386/exception.s @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: exception.s,v 1.21 1997/02/22 09:32:16 peter Exp $ */ #include "npx.h" /* NNPX */ @@ -250,6 +250,20 @@ IDTVEC(int0x80_syscall) MEXITCOUNT jmp _doreti +ENTRY(fork_trampoline) + pushl %ebx /* arg1 */ + call %esi /* function */ + addl $4,%esp + /* cut from syscall */ + /* + * Return via _doreti to handle ASTs. + */ + pushl $0 /* cpl to restore */ + subl $4,%esp + movb $1,_intr_nesting_level + MEXITCOUNT + jmp _doreti + /* * Include what was once config+isa-dependent code. * XXX it should be in a stand-alone file. It's still icu-dependent and diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index 903fad8..c1d3b4c 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)genassym.c 5.11 (Berkeley) 5/10/91 - * $Id: genassym.c,v 1.42 1997/02/22 09:32:18 peter Exp $ + * $Id: genassym.c,v 1.43 1997/04/07 06:45:11 peter Exp $ */ #include <stdio.h> @@ -57,6 +57,7 @@ #include <machine/psl.h> #include <machine/reg.h> #include <machine/bootinfo.h> +#include <machine/tss.h> #include <sys/syscall.h> #include <sys/vmmeter.h> #include <vm/vm.h> @@ -84,6 +85,7 @@ main() struct uprof *uprof = (struct uprof *)0; struct vmspace *vms = (struct vmspace *)0; struct pcb *pcb = (struct pcb *)0; + struct i386tss *tss = (struct i386tss *)0; struct trapframe *tf = (struct trapframe *)0; struct sigframe *sigf = (struct sigframe *)0; struct bootinfo *bootinfo = (struct bootinfo *)0; @@ -127,6 +129,7 @@ main() printf("#define\tPCB_ESP %p\n", &pcb->pcb_esp); printf("#define\tPCB_EBX %p\n", &pcb->pcb_ebx); printf("#define\tPCB_EIP %p\n", &pcb->pcb_eip); + printf("#define\tTSS_ESP0 %p\n", &tss->tss_esp0); printf("#define\tPCB_USERLDT %p\n", &pcb->pcb_ldt); printf("#define\tU_PROF %p\n", &up->u_stats.p_prof); printf("#define\tU_PROFSCALE %p\n", &up->u_stats.p_prof.pr_scale); diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 5efddc5..b22f2e7 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 - * $Id: locore.s,v 1.81 1997/02/22 09:32:22 peter Exp $ + * $Id: locore.s,v 1.82 1997/03/22 18:52:03 kato Exp $ * * originally from: locore.s, by William F. Jolitz * @@ -87,14 +87,6 @@ .set _APTDpde,_PTD + (APTDPTDI * PDESIZE) /* - * Access to each processes kernel stack is via a region of - * per-process address space (at the beginning), immediately above - * the user process stack. - */ - .set _kstack,USRSTACK - .globl _kstack - -/* * Globals */ .data @@ -336,7 +328,8 @@ _pc98_system_parameter: /* now running relocated at KERNBASE where the system is linked to run */ begin: /* set up bootstrap stack */ - movl $_kstack+UPAGES*PAGE_SIZE,%esp /* bootstrap stack end location */ + movl _proc0paddr,%esp /* location of in-kernel pages */ + addl $UPAGES*PAGE_SIZE,%esp /* bootstrap stack end location */ xorl %eax,%eax /* mark end of frames */ movl %eax,%ebp movl _proc0paddr,%eax @@ -361,8 +354,13 @@ begin: pushl %esp /* call main with frame pointer */ call _main /* autoconfiguration, mountroot etc */ - addl $(13*4),%esp /* back to a frame we can return with */ + hlt /* never returns to here */ +/* + * When starting init, call this to configure the process for user + * mode. This will be inherited by other processes. + */ +NON_GPROF_ENTRY(prepare_usermode) /* * Now we've run main() and determined what cpu-type we are, we can * enable write protection and alignment checking on i486 cpus and @@ -383,11 +381,14 @@ begin: movl __ucodesel,%eax movl __udatasel,%ecx +#if 0 movl %cx,%ds +#endif movl %cx,%es movl %ax,%fs /* double map cs to fs */ movl %cx,%gs /* and ds to gs */ - iret /* goto user! */ + ret /* goto user! */ + #define LCALL(x,y) .byte 0x9a ; .long y ; .word x diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index d643e10..5dc7522 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.234 1997/03/31 11:10:37 davidg Exp $ + * $Id: machdep.c,v 1.235 1997/04/07 06:45:13 peter Exp $ */ #include "npx.h" @@ -755,6 +755,11 @@ static char dblfault_stack[PAGE_SIZE]; extern struct user *proc0paddr; +#ifdef TSS_IS_CACHED /* cpu_switch helper */ +struct segment_descriptor *tssptr; +int gsel_tss; +#endif + /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ @@ -812,7 +817,7 @@ struct soft_segment_descriptor gdt_segs[] = { 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ -{ (int) &common_tss, /* segment base address */ +{ (int) &common_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ @@ -956,7 +961,9 @@ init386(first) int x; unsigned biosbasemem, biosextmem; struct gate_descriptor *gdp; +#ifndef TSS_IS_CACHED int gsel_tss; +#endif struct isa_device *idp; /* table descriptors - used to load tables by microp */ struct region_descriptor r_gdt, r_idt; @@ -1300,8 +1307,8 @@ init386(first) avail_end + off, VM_PROT_ALL, TRUE); msgbufmapped = 1; - /* make a initial tss so microp can get interrupt stack on syscall! */ - common_tss.tss_esp0 = (int) kstack + UPAGES*PAGE_SIZE; + /* make an initial tss so cpu can get interrupt stack on syscall! */ + common_tss.tss_esp0 = (int) proc0.p_addr + UPAGES*PAGE_SIZE; common_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL) ; common_tss.tss_ioopt = (sizeof common_tss) << 16; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); @@ -1314,11 +1321,15 @@ init386(first) dblfault_tss.tss_cr3 = IdlePTD; dblfault_tss.tss_eip = (int) dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; - dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = dblfault_tss.tss_gs = - GSEL(GDATA_SEL, SEL_KPL); + dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_fs = + dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); +#ifdef TSS_IS_CACHED /* cpu_switch helper */ + tssptr = &gdt[GPROC0_SEL].sd; +#endif + /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; @@ -1353,9 +1364,7 @@ init386(first) * index into the user block. Don't you just *love* virtual memory? * (I'm starting to think seymour is right...) */ -#define TF_REGP(p) ((struct trapframe *) \ - ((char *)(p)->p_addr \ - + ((char *)(p)->p_md.md_regs - kstack))) +#define TF_REGP(p) ((struct trapframe *)(p)->p_md.md_regs) int ptrace_set_pc(p, addr) @@ -1387,7 +1396,7 @@ int ptrace_write_u(p, off, data) * Privileged kernel state is scattered all over the user area. * Only allow write access to parts of regs and to fpregs. */ - min = (char *)p->p_md.md_regs - kstack; + min = (char *)p->p_md.md_regs - (char *)p->p_addr; if (off >= min && off <= min + sizeof(struct trapframe) - sizeof(int)) { tp = TF_REGP(p); frame_copy = *tp; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 540f85e..d7f531b 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id$ + * $Id: pmap.c,v 1.138 1997/02/22 09:32:40 peter Exp $ */ /* @@ -685,32 +685,22 @@ pmap_new_proc(p) { int i; vm_object_t upobj; - pmap_t pmap; vm_page_t m; struct user *up; - unsigned *ptep, *ptek; - - pmap = &p->p_vmspace->vm_pmap; + unsigned *ptek; /* * allocate object for the upages */ upobj = vm_object_allocate( OBJT_DEFAULT, UPAGES); - p->p_vmspace->vm_upages_obj = upobj; + p->p_upages_obj = upobj; /* get a kernel virtual address for the UPAGES for this proc */ up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE); if (up == NULL) - panic("vm_fork: u_map allocation failed"); - - /* - * Allocate the ptp and incr the hold count appropriately - */ - m = pmap_allocpte(pmap, (vm_offset_t) kstack); - m->hold_count += (UPAGES - 1); + panic("pmap_new_proc: u_map allocation failed"); - ptep = (unsigned *) pmap_pte(pmap, (vm_offset_t) kstack); ptek = (unsigned *) vtopte((vm_offset_t) up); for(i=0;i<UPAGES;i++) { @@ -729,18 +719,15 @@ pmap_new_proc(p) ++cnt.v_wire_count; /* - * Enter the page into both the kernel and the process - * address space. + * Enter the page into the kernel address space. */ - *(ptep + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V; - *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V; + *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; m->flags &= ~(PG_ZERO|PG_BUSY); m->flags |= PG_MAPPED|PG_WRITEABLE; m->valid = VM_PAGE_BITS_ALL; } - pmap->pm_stats.resident_count += UPAGES; p->p_addr = up; } @@ -754,26 +741,26 @@ pmap_dispose_proc(p) { int i; vm_object_t upobj; - pmap_t pmap; vm_page_t m; - unsigned *ptep, *ptek; + unsigned *ptek; - pmap = &p->p_vmspace->vm_pmap; - ptep = (unsigned *) pmap_pte(pmap, (vm_offset_t) kstack); ptek = (unsigned *) vtopte((vm_offset_t) p->p_addr); - upobj = p->p_vmspace->vm_upages_obj; + upobj = p->p_upages_obj; for(i=0;i<UPAGES;i++) { + unsigned oldpte; if ((m = vm_page_lookup(upobj, i)) == NULL) panic("pmap_dispose_proc: upage already missing???"); - *(ptep + i) = 0; + oldpte = *(ptek + i); *(ptek + i) = 0; - pmap_unuse_pt(pmap, (vm_offset_t) kstack + i * PAGE_SIZE, NULL); + if (oldpte & PG_G) + invlpg((vm_offset_t) p->p_addr + i * PAGE_SIZE); vm_page_unwire(m); vm_page_free(m); } - pmap->pm_stats.resident_count -= UPAGES; + + vm_object_deallocate(upobj); kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); } @@ -787,29 +774,20 @@ pmap_swapout_proc(p) { int i; vm_object_t upobj; - pmap_t pmap; vm_page_t m; - unsigned *pte; - - pmap = &p->p_vmspace->vm_pmap; - pte = (unsigned *) pmap_pte(pmap, (vm_offset_t) kstack); - upobj = p->p_vmspace->vm_upages_obj; + upobj = p->p_upages_obj; /* * let the upages be paged */ for(i=0;i<UPAGES;i++) { if ((m = vm_page_lookup(upobj, i)) == NULL) - panic("pmap_pageout_proc: upage already missing???"); + panic("pmap_swapout_proc: upage already missing???"); m->dirty = VM_PAGE_BITS_ALL; - *(pte + i) = 0; - pmap_unuse_pt(pmap, (vm_offset_t) kstack + i * PAGE_SIZE, NULL); - vm_page_unwire(m); vm_page_deactivate(m); pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); } - pmap->pm_stats.resident_count -= UPAGES; } /* @@ -821,19 +799,10 @@ pmap_swapin_proc(p) { int i; vm_object_t upobj; - pmap_t pmap; vm_page_t m; unsigned *pte; - pmap = &p->p_vmspace->vm_pmap; - /* - * Allocate the ptp and incr the hold count appropriately - */ - m = pmap_allocpte(pmap, (vm_offset_t) kstack); - m->hold_count += (UPAGES - 1); - pte = (unsigned *) pmap_pte(pmap, (vm_offset_t) kstack); - - upobj = p->p_vmspace->vm_upages_obj; + upobj = p->p_upages_obj; for(i=0;i<UPAGES;i++) { int s; s = splvm(); @@ -854,7 +823,6 @@ retry: vm_page_wire(m); splx(s); - *(pte+i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V; pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m)); @@ -862,13 +830,12 @@ retry: int rv; rv = vm_pager_get_pages(upobj, &m, 1, 0); if (rv != VM_PAGER_OK) - panic("faultin: cannot get upages for proc: %d\n", p->p_pid); + panic("pmap_swapin_proc: cannot get upages for proc: %d\n", p->p_pid); m->valid = VM_PAGE_BITS_ALL; } PAGE_WAKEUP(m); m->flags |= PG_MAPPED|PG_WRITEABLE; } - pmap->pm_stats.resident_count += UPAGES; } /*************************************************** diff --git a/sys/i386/i386/swtch.s b/sys/i386/i386/swtch.s index 092e1b7..6c040fe 100644 --- a/sys/i386/i386/swtch.s +++ b/sys/i386/i386/swtch.s @@ -33,7 +33,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: swtch.s,v 1.43 1997/02/22 09:32:51 peter Exp $ */ #include "apm.h" @@ -234,6 +234,19 @@ _idle: movl $tmpstk,%esp movl _IdlePTD,%ecx movl %ecx,%cr3 + + /* update common_tss.tss_esp0 pointer */ + movl $_common_tss, %eax + movl %esp, TSS_ESP0(%eax) + +#ifdef TSS_IS_CACHED /* example only */ + /* Reload task register to force reload of selector */ + movl _tssptr, %ebx + andb $~0x02, 5(%ebx) /* Flip 386BSY -> 386TSS */ + movl _gsel_tss, %ebx + ltr %bx +#endif + sti /* @@ -406,6 +419,34 @@ swtch_com: /* switch address space */ movl %ebx,%cr3 +#ifdef HOW_TO_SWITCH_TSS /* example only */ + /* Fix up tss pointer to floating pcb/stack structure */ + /* XXX probably lots faster to store the 64 bits of tss entry + * in the pcb somewhere and copy them on activation. + */ + movl _tssptr, %ebx + movl %edx, %eax /* edx = pcb/tss */ + movw %ax, 2(%ebx) /* store bits 0->15 */ + roll $16, %eax /* swap upper and lower */ + movb %al, 4(%ebx) /* store bits 16->23 */ + movb %ah, 7(%ebx) /* store bits 24->31 */ + andb $~0x02, 5(%ebx) /* Flip 386BSY -> 386TSS */ +#endif + + /* update common_tss.tss_esp0 pointer */ + movl $_common_tss, %eax + movl %edx, %ebx /* pcb */ + addl $(UPAGES * PAGE_SIZE), %ebx + movl %ebx, TSS_ESP0(%eax) + +#ifdef TSS_IS_CACHED /* example only */ + /* Reload task register to force reload of selector */ + movl _tssptr, %ebx + andb $~0x02, 5(%ebx) /* Flip 386BSY -> 386TSS */ + movl _gsel_tss, %ebx + ltr %bx +#endif + /* restore context */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp diff --git a/sys/i386/i386/symbols.raw b/sys/i386/i386/symbols.raw index d757e39..467d4d4 100644 --- a/sys/i386/i386/symbols.raw +++ b/sys/i386/i386/symbols.raw @@ -1,6 +1,6 @@ # @(#)symbols.raw 7.6 (Berkeley) 5/8/91 # -# $Id$ +# $Id: symbols.raw,v 1.8 1997/02/22 09:32:52 peter Exp $ # @@ -8,7 +8,6 @@ _IdlePTD _PTD _curpcb - _kstack _panicstr _atdevbase # _version diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 5aff98f..2024c6b 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.89 1997/04/06 02:29:19 dyson Exp $ + * $Id: trap.c,v 1.90 1997/04/07 06:45:15 peter Exp $ */ /* @@ -938,3 +938,22 @@ bad: ktrsysret(p->p_tracep, code, error, rval[0]); #endif } + +/* + * Simplified back end of syscall(), used when returning from fork() + * directly into user mode. + */ +void +fork_return(p, frame) + struct proc *p; + struct trapframe frame; +{ + frame.tf_eax = 0; /* Child returns zero */ + frame.tf_eflags &= ~PSL_C; /* success */ + + userret(p, &frame, 0); +#ifdef KTRACE + if (KTRPOINT(p, KTR_SYSRET)) + ktrsysret(p->p_tracep, SYS_fork, 0, 0); +#endif +} diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 0cf7d31..af7fe19 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.76 1997/03/22 04:28:16 dyson Exp $ + * $Id: vm_machdep.c,v 1.77 1997/03/29 04:35:26 bde Exp $ */ #include "npx.h" @@ -54,6 +54,8 @@ #include <machine/clock.h> #include <machine/md_var.h> +#include <machine/cpu.h> +#include <machine/reg.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -558,61 +560,83 @@ vm_fault_quick(v, prot) /* * Finish a fork operation, with process p2 nearly set up. - * Copy and update the kernel stack and pcb, making the child - * ready to run, and marking it so that it can return differently - * than the parent. Returns 1 in the child process, 0 in the parent. - * We currently double-map the user area so that the stack is at the same - * address in each process; in the future we will probably relocate - * the frame pointers on the stack after copying. + * Copy and update the pcb, set up the stack so that the child + * ready to run and return to user mode. */ -int +void cpu_fork(p1, p2) register struct proc *p1, *p2; { struct pcb *pcb2 = &p2->p_addr->u_pcb; - int sp, offset; - volatile int retval; -#ifdef USER_LDT - struct pcb *pcb = &p2->p_addr->u_pcb; -#endif /* - * Copy pcb and stack from proc p1 to p2. - * We do this as cheaply as possible, copying only the active - * part of the stack. The stack and pcb need to agree; - * this is tricky, as the final pcb is constructed by savectx, - * but its frame isn't yet on the stack when the stack is copied. - * This should be done differently, with a single call - * that copies and updates the pcb+stack, - * replacing the bcopy and savectx. + * copy current pcb, and save current context into it while it's + * possibly in some writeback cache line. */ + bcopy(&p1->p_addr->u_pcb, pcb2, sizeof(struct pcb)); + pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir); + savectx(pcb2); /* irrelevant? fp registers? */ - __asm __volatile("movl %%esp,%0" : "=r" (sp)); - offset = sp - (int)kstack; - - retval = 1; /* return 1 in child */ - bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset, - (unsigned) ctob(UPAGES) - offset); - p2->p_md.md_regs = p1->p_md.md_regs; + /* + * Create a new fresh stack for the new process. + * Copy the trap frame for the return to user mode as if from a syscall. + * This copies the user mode register values. + */ + p2->p_md.md_regs = (int *)(((struct trapframe *) + ((int)p2->p_addr + (UPAGES * PAGE_SIZE))) - 1); + bcopy(p1->p_md.md_regs, p2->p_md.md_regs, sizeof(struct trapframe)); - *pcb2 = p1->p_addr->u_pcb; - pcb2->pcb_cr3 = vtophys(p2->p_vmspace->vm_pmap.pm_pdir); + /* + * Set registers for trampoline to user mode. Leave space for the + * return address on stack. These are the kernel mode register values. + */ + /* XXX these overwrite most of the regs from savectx() above! */ + pcb2->pcb_eip = (int)fork_trampoline; + pcb2->pcb_esi = (int)fork_return; + pcb2->pcb_ebx = (int)p2; + pcb2->pcb_esp = (int)p2->p_md.md_regs - sizeof(void *); #ifdef USER_LDT /* Copy the LDT, if necessary. */ - if (pcb->pcb_ldt != 0) { + if (pcb2->pcb_ldt != 0) { union descriptor *new_ldt; - size_t len = pcb->pcb_ldt_len * sizeof(union descriptor); + size_t len = pcb2->pcb_ldt_len * sizeof(union descriptor); new_ldt = (union descriptor *)kmem_alloc(kernel_map, len); - bcopy(pcb->pcb_ldt, new_ldt, len); - pcb->pcb_ldt = (caddr_t)new_ldt; + bcopy(pcb2->pcb_ldt, new_ldt, len); + pcb2->pcb_ldt = (caddr_t)new_ldt; } #endif - retval = 0; /* return 0 in parent */ - savectx(pcb2); - return (retval); + /* + * Now, cpu_switch() can schedule the new process. + * pcb_esp is loaded pointing to the cpu_switch() stack frame + * containing the return address when exiting cpu_switch. + * This will normally be to proc_trampoline(), which will have + * %ebx loaded with the new proc's pointer. proc_trampoline() + * will set up a stack to call fork_return(p, frame); to complete + * the return to user-mode. + */ +} + +/* + * Intercept the return address from a freshly forked process that has NOT + * been scheduled yet. + * + * This is needed to make kernel threads stay in kernel mode. + */ +void +cpu_set_fork_handler(p, func, arg) + struct proc *p; + void (*func) __P((void *)); + void *arg; +{ + /* + * Note that the trap frame follows the args, so the function + * is really called like this: func(arg, frame); + */ + p->p_addr->u_pcb.pcb_esi = (int) func; /* function */ + p->p_addr->u_pcb.pcb_ebx = (int) arg; /* first arg */ } void diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index ff0c70c..d28079a 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)cpu.h 5.4 (Berkeley) 5/9/91 - * $Id$ + * $Id: cpu.h,v 1.28 1997/02/22 09:34:04 peter Exp $ */ #ifndef _MACHINE_CPU_H_ @@ -134,6 +134,10 @@ extern int cpu; extern int cpu_class; extern u_char intr_nesting_level; extern int want_resched; /* resched was called */ + +void fork_trampoline __P((void)); +void fork_return __P((struct proc *, struct trapframe)); +void cpu_set_fork_handler __P((struct proc *, void (*pc)(void *), void *)); #endif #endif /* !_MACHINE_CPU_H_ */ |