diff options
-rw-r--r-- | sys/amd64/amd64/pmap.c | 941 | ||||
-rw-r--r-- | sys/amd64/include/pmap.h | 25 | ||||
-rw-r--r-- | sys/i386/i386/pmap.c | 941 | ||||
-rw-r--r-- | sys/i386/include/pmap.h | 25 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 5 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 29 | ||||
-rw-r--r-- | sys/vm/vm_fault.c | 61 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 145 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 3 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 204 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 7 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 49 | ||||
-rw-r--r-- | sys/vm/vm_page.h | 10 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 58 | ||||
-rw-r--r-- | sys/vm/vnode_pager.c | 8 |
16 files changed, 1259 insertions, 1256 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 6dbffd1..5e2838b 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -99,6 +99,12 @@ #define PMAP_DIAGNOSTIC #endif +#if !defined(SMALL_KERNEL) +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + static void init_pv_entries __P((int)); /* @@ -146,7 +152,7 @@ extern int cpu_class; * Data for the pv entry allocation mechanism */ static int pv_freelistcnt; -static pv_entry_t pv_freelist; +TAILQ_HEAD (,pv_entry) pv_freelist; static vm_offset_t pvva; static int npvvapg; @@ -155,7 +161,6 @@ static int npvvapg; */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; @@ -165,32 +170,32 @@ pt_entry_t *PMAP1; unsigned *PADDR1; static void free_pv_entry __P((pv_entry_t pv)); -static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static unsigned * get_ptbase __P((pmap_t pmap)); static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); -static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); static int pmap_is_managed __P((vm_offset_t pa)); -static void pmap_remove_all __P((vm_offset_t pa)); -static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, - vm_offset_t pa)); +static int pmap_remove_all __P((vm_offset_t pa)); +static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, + vm_offset_t pa, vm_page_t mpte)); static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, +static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv, vm_offset_t va)); -static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, +static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_offset_t pa)); -static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); -static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); static vm_page_t _pmap_allocpte __P((pmap_t pmap, int ptepindex)); +unsigned * __pure pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); +int pmap_tcbit __P((vm_offset_t pa, int bit)); +static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex)); -#define VATRACK 4 -#define PDSTACKMAX 16 +#define PDSTACKMAX 6 static vm_offset_t pdstack[PDSTACKMAX]; static int pdstackptr; @@ -240,6 +245,7 @@ pmap_bootstrap(firstaddr, loadaddr) kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); kernel_pmap->pm_count = 1; + TAILQ_INIT(&kernel_pmap->pm_pvlist.pv_list); nkpt = NKPT; /* @@ -306,10 +312,15 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry *) * npg); + s = (vm_size_t) (sizeof(pv_table_t) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t *) addr; + pv_table = (pv_table_t *) addr; + for(i=0;i<npg;i++) { + pv_table[i].pv_list_count = 0; + TAILQ_INIT(&pv_table[i].pv_list); + } + TAILQ_INIT(&pv_freelist); /* * init the pv free list @@ -372,7 +383,7 @@ pmap_nw_modified(pt_entry_t ptea) { * this routine defines the region(s) of memory that should * not be tested for the modified bit. */ -static __inline int +static PMAP_INLINE int pmap_track_modified( vm_offset_t va) { if ((va < clean_sva) || (va >= clean_eva)) return 1; @@ -384,7 +395,7 @@ pmap_track_modified( vm_offset_t va) { * The below are finer grained pmap_update routines. These eliminate * the gratuitious tlb flushes on non-i386 architectures. */ -static __inline void +static PMAP_INLINE void pmap_update_1pg( vm_offset_t va) { #if defined(I386_CPU) if (cpu_class == CPUCLASS_386) @@ -394,7 +405,7 @@ pmap_update_1pg( vm_offset_t va) { __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va)); } -static __inline void +static PMAP_INLINE void pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { #if defined(I386_CPU) if (cpu_class == CPUCLASS_386) { @@ -432,7 +443,7 @@ get_ptbase(pmap) * with the given map/virtual_address pair. */ -__inline unsigned * __pure +unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; @@ -448,25 +459,27 @@ pmap_pte(pmap, va) * the pv lists. This eliminates many coarse-grained * pmap_update calls. */ -__inline unsigned * __pure +unsigned * __pure pmap_pte_quick(pmap, va) register pmap_t pmap; vm_offset_t va; { - unsigned pde; + unsigned pde, newpf; if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) { unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; /* are we current address space or kernel? */ if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { return (unsigned *) PTmap + i386_btop(va); } - * (int *) PMAP1 = (pde & PG_FRAME) | PG_V | PG_RW; - pmap_update_1pg((vm_offset_t) PADDR1); + newpf = pde & PG_FRAME; + if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) { + * (unsigned *) PMAP1 = newpf | PG_RW | PG_V; + pmap_update_1pg((vm_offset_t) PADDR1); + } return PADDR1 + ((unsigned) i386_btop(va) & (NPTEPG - 1)); } return (0); } - /* * Routine: pmap_extract @@ -491,7 +504,7 @@ pmap_extract(pmap, va) /* * determine if a page is managed (memory vs. device) */ -static __inline __pure int +static PMAP_INLINE __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -535,7 +548,7 @@ pmap_qenter(va, m, count) pte = (unsigned *)vtopte(tva); opte = *pte; *pte = npte; - if (opte) + if (opte & PG_V) pmap_update_1pg(tva); } } @@ -564,7 +577,7 @@ pmap_qremove(va, count) * note that in order for the mapping to take effect -- you * should do a pmap_update after doing the pmap_kenter... */ -__inline void +PMAP_INLINE void pmap_kenter(va, pa) vm_offset_t va; register vm_offset_t pa; @@ -576,14 +589,14 @@ pmap_kenter(va, pa) pte = (unsigned *)vtopte(va); opte = *pte; *pte = npte; - if (opte) + if (opte & PG_V) pmap_update_1pg(va); } /* * remove a page from the kernel pagetables */ -__inline void +PMAP_INLINE void pmap_kremove(va) vm_offset_t va; { @@ -594,80 +607,17 @@ pmap_kremove(va) pmap_update_1pg(va); } - -/*************************************************** - * Page table page management routines..... - ***************************************************/ - -/* - * This routine unholds page table pages, and if the hold count - * drops to zero, then it decrements the wire count. - */ -static __inline int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { - vm_page_unhold(m); - if (m->hold_count == 0) { - vm_offset_t pteva; - /* - * unmap the page table page - */ - pmap->pm_pdir[m->pindex] = 0; - --pmap->pm_stats.resident_count; - /* - * Do a pmap_update to make the invalidated mapping - * take effect immediately. - */ - pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); - pmap_update_1pg(pteva); - /* - * If the page is finally unwired, simply free it. - */ - --m->wire_count; - if (m->wire_count == 0) { - vm_page_free_zero(m); - --cnt.v_wire_count; - } - return 1; - } - return 0; -} - -/* - * After removing a page table entry, this routine is used to - * conditionally free the page, and manage the hold/wire counts. - */ -int -pmap_unuse_pt(pmap, va, mpte) - pmap_t pmap; - vm_offset_t va; - vm_page_t mpte; +static vm_page_t +pmap_page_alloc(object, pindex) + vm_object_t object; + vm_pindex_t pindex; { - if (va >= UPT_MIN_ADDRESS) - return 0; - - if (mpte == NULL) { - vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)); -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); -#endif - if (!ptepa) - return 0; - mpte = PHYS_TO_VM_PAGE(ptepa); - } - -#if defined(PMAP_DIAGNOSTIC) - if (mpte->pindex != (va >> PDRSHIFT)) - panic("pmap_unuse_pt: pindex(0x%x) != va(0x%x)", - mpte->pindex, (va >> PDRSHIFT)); - - if (mpte->hold_count == 0) { - panic("pmap_unuse_pt: hold count < 0, va: 0x%x", va); + vm_page_t m; + m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; } -#endif - - return pmap_unwire_pte_hold(pmap, mpte); + return m; } /* @@ -701,12 +651,12 @@ pmap_pinit(pmap) * allocate the page directory page */ retry: - ptdpg = vm_page_alloc( pmap->pm_pteobj, PTDPTDI, VM_ALLOC_ZERO); - if (ptdpg == NULL) { - VM_WAIT; + ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI); + if (ptdpg == NULL) goto retry; - } - vm_page_wire(ptdpg); + + ptdpg->wire_count = 1; + ++cnt.v_wire_count; ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ ptdpg->valid = VM_PAGE_BITS_ALL; @@ -722,6 +672,8 @@ retry: VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW; pmap->pm_count = 1; + pmap->pm_ptphint = NULL; + TAILQ_INIT(&pmap->pm_pvlist.pv_list); } static int @@ -751,31 +703,7 @@ pmap_release_free_page(pmap, p) --pmap->pm_stats.resident_count; if (p->hold_count) { - int *kvap; - int i; -#if defined(PMAP_DIAGNOSTIC) panic("pmap_release: freeing held page table page"); -#else - printf("pmap_release: freeing held page table page:\n"); -#endif - kvap = (int *)vm_pager_map_page(p); - for(i=0;i<NPTEPG;i++) { - if (kvap[i]) { - printf("pte: 0x%x, index: %d\n", kvap[i],i); - } - } - vm_pager_unmap_page((vm_offset_t)kvap); - - /* - * HACK ALERT!!! - * If this failure happens, we must clear the page, because - * there is likely a mapping still valid. This condition - * is an error, but at least this zero operation will mitigate - * some Sig-11's or crashes, because this page is thought - * to be zero. This is a robustness fix, and not meant to - * be a long term work-around. - */ - pmap_zero_page(VM_PAGE_TO_PHYS(p)); } /* * Page directory pages need to have the kernel @@ -787,6 +715,9 @@ pmap_release_free_page(pmap, p) pmap_kremove((vm_offset_t) pmap->pm_pdir); } + if (pmap->pm_ptphint == p) + pmap->pm_ptphint = NULL; + vm_page_free_zero(p); splx(s); return 1; @@ -801,7 +732,7 @@ _pmap_allocpte(pmap, ptepindex) pmap_t pmap; int ptepindex; { - vm_offset_t pteva, ptepa; + vm_offset_t ptepa; vm_page_t m; /* @@ -810,11 +741,9 @@ _pmap_allocpte(pmap, ptepindex) retry: m = vm_page_lookup(pmap->pm_pteobj, ptepindex); if (m == NULL) { - m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); - if (m == NULL) { - VM_WAIT; + m = pmap_page_alloc(pmap->pm_pteobj, ptepindex); + if (m == NULL) goto retry; - } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(VM_PAGE_TO_PHYS(m)); m->flags &= ~(PG_ZERO|PG_BUSY); @@ -827,22 +756,16 @@ retry: } } - /* - * mark the object writeable - */ - pmap->pm_pteobj->flags |= OBJ_WRITEABLE; - if (m->queue != PQ_NONE) { int s = splvm(); - vm_page_unqueue(m); + vm_page_unqueue(m,1); splx(s); } - if (m->hold_count == 0) { - if (m->wire_count == 0) - ++cnt.v_wire_count; - ++m->wire_count; - } + if (m->wire_count == 0) + ++cnt.v_wire_count; + ++m->wire_count; + /* * Increment the hold count for the page table page * (denoting a new mapping.) @@ -859,14 +782,12 @@ retry: ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V); - pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex); - pmap_update_1pg(pteva); m->flags |= PG_MAPPED; return m; } -static __inline vm_page_t +PMAP_INLINE static vm_page_t pmap_allocpte(pmap, va) pmap_t pmap; vm_offset_t va; @@ -890,7 +811,13 @@ pmap_allocpte(pmap, va) * hold count, and activate it. */ if (ptepa) { - m = PHYS_TO_VM_PAGE(ptepa); + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + m = pmap->pm_ptphint; + } else { + m = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = m; + } ++m->hold_count; return m; } @@ -1035,13 +962,12 @@ pmap_reference(pmap) /* * free the pv_entry back to the free list */ -static __inline void +static PMAP_INLINE void free_pv_entry(pv) pv_entry_t pv; { ++pv_freelistcnt; - pv->pv_next = pv_freelist; - pv_freelist = pv; + TAILQ_INSERT_HEAD(&pv_freelist, pv, pv_list); } /* @@ -1050,7 +976,7 @@ free_pv_entry(pv) * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ -static __inline pv_entry_t +static PMAP_INLINE pv_entry_t get_pv_entry() { pv_entry_t tmp; @@ -1058,15 +984,16 @@ get_pv_entry() /* * get more pv_entry pages if needed */ - if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { + if (pv_freelistcnt < PV_FREELIST_MIN) { pmap_alloc_pv_entry(); } + /* * get a pv_entry off of the free list */ --pv_freelistcnt; - tmp = pv_freelist; - pv_freelist = tmp->pv_next; + tmp = TAILQ_FIRST(&pv_freelist); + TAILQ_REMOVE(&pv_freelist, tmp, pv_list); return tmp; } @@ -1123,7 +1050,7 @@ pmap_alloc_pv_entry() } } } - if (!pv_freelist) + if (TAILQ_FIRST(&pv_freelist) == NULL) panic("get_pv_entry: cannot get a pv_entry_t"); } @@ -1150,62 +1077,115 @@ init_pv_entries(npg) } /* - * If it is the first entry on the list, it is actually - * in the header and we must copy the following entry up - * to the header. Otherwise we must search the list for - * the entry. In either case we free the now unused entry. + * This routine unholds page table pages, and if the hold count + * drops to zero, then it decrements the wire count. */ -static __inline int -pmap_remove_entry(pmap, ppv, va) - struct pmap *pmap; - pv_entry_t *ppv; - vm_offset_t va; -{ - pv_entry_t npv; - int s; - - s = splvm(); - for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); - *ppv = npv->pv_next; - free_pv_entry(npv); - splx(s); - return rtval; +static int +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { + vm_page_unhold(m); + if (m->hold_count == 0) { + vm_offset_t pteva; + /* + * unmap the page table page + */ + pmap->pm_pdir[m->pindex] = 0; + --pmap->pm_stats.resident_count; + if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == + (((unsigned) PTDpde) & PG_FRAME)) { + /* + * Do a pmap_update to make the invalidated mapping + * take effect immediately. + */ + pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); + pmap_update_1pg(pteva); + } + /* + * If the page is finally unwired, simply free it. + */ + --m->wire_count; + if (m->wire_count == 0) { + if (pmap->pm_ptphint == m) + pmap->pm_ptphint = NULL; + vm_page_free_zero(m); + --cnt.v_wire_count; } + return 1; } - splx(s); return 0; } /* - * Create a pv entry for page at pa for - * (pmap, va). + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. */ -static __inline void -pmap_insert_entry(pmap, va, mpte, pa) +PMAP_INLINE int +pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; - vm_offset_t pa; { + int ptepindex; + if (va >= UPT_MIN_ADDRESS) + return 0; + + if (mpte == NULL) { + ptepindex = (va >> PDRSHIFT); + if (pmap->pm_ptphint && + pmap->pm_ptphint->pindex == ptepindex) { + mpte = pmap->pm_ptphint; + } else { + mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + } + + return pmap_unwire_pte_hold(pmap, mpte); +} +/* + * If it is the first entry on the list, it is actually + * in the header and we must copy the following entry up + * to the header. Otherwise we must search the list for + * the entry. In either case we free the now unused entry. + */ +static int +pmap_remove_entry(pmap, ppv, va) + struct pmap *pmap; + pv_table_t *ppv; + vm_offset_t va; +{ + pv_entry_t pv; + int rtval; int s; - pv_entry_t *ppv, pv; s = splvm(); - pv = get_pv_entry(); - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_ptem = mpte; + if (ppv->pv_list_count < pmap->pm_stats.resident_count) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_list)) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + for (pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_plist)) { + if (va == pv->pv_va) + break; + } + } - ppv = pa_to_pvh(pa); - if (*ppv) - pv->pv_next = *ppv; - else - pv->pv_next = NULL; - *ppv = pv; + rtval = 0; + if (pv) { + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; + TAILQ_REMOVE(&pmap->pm_pvlist.pv_list, pv, pv_plist); + free_pv_entry(pv); + } + splx(s); + return rtval; } /* @@ -1218,7 +1198,6 @@ pmap_remove_pte(pmap, ptq, va) vm_offset_t va; { unsigned oldpte; - pv_entry_t *ppv; oldpte = *ptq; *ptq = 0; @@ -1235,8 +1214,7 @@ pmap_remove_pte(pmap, ptq, va) if (pmap_track_modified(va)) PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } - ppv = pa_to_pvh(oldpte); - return pmap_remove_entry(pmap, ppv, va); + return pmap_remove_entry(pmap, pa_to_pvh(oldpte), va); } else { return pmap_unuse_pt(pmap, va, NULL); } @@ -1265,9 +1243,11 @@ pmap_remove_page(pmap, va) * get a local va for mappings for this pmap. */ ptq = get_ptbase(pmap) + i386_btop(va); - if (*ptq) { + if (*ptq & PG_V) { (void) pmap_remove_pte(pmap, ptq, va); - pmap_update_1pg(va); + if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == (((unsigned) PTDpde) & PG_FRAME)) { + pmap_update_1pg(va); + } } return; } @@ -1290,7 +1270,6 @@ pmap_remove(pmap, sva, eva) vm_offset_t sindex, eindex; vm_page_t mpte; int anyvalid; - vm_offset_t vachanged[VATRACK]; if (pmap == NULL) return; @@ -1315,6 +1294,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + mpte = NULL; for (; sindex < eindex; sindex = pdnxt) { @@ -1331,19 +1311,6 @@ pmap_remove(pmap, sva, eva) if (ptpaddr == 0) continue; - if (sindex < i386_btop(UPT_MIN_ADDRESS)) { - /* - * get the vm_page_t for the page table page - */ - mpte = PHYS_TO_VM_PAGE(ptpaddr); - - /* - * if the pte isn't wired, just skip it. - */ - if (mpte->wire_count == 0) - continue; - } - /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the @@ -1355,13 +1322,11 @@ pmap_remove(pmap, sva, eva) for ( ;sindex != pdnxt; sindex++) { vm_offset_t va; - if (ptbase[sindex] == 0) { + if ((ptbase[sindex] & PG_V) == 0) { continue; } va = i386_ptob(sindex); - if (anyvalid < VATRACK) - vachanged[anyvalid] = va; anyvalid++; if (pmap_remove_pte(pmap, ptbase + sindex, va)) @@ -1370,15 +1335,8 @@ pmap_remove(pmap, sva, eva) } if (anyvalid) { - if (anyvalid <= VATRACK) { - int i; - for(i=0;i<anyvalid;i++) - pmap_update_1pg(vachanged[i]); - } else { - pmap_update(); - } + pmap_update(); } - } /* @@ -1393,15 +1351,18 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static int pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, *ppv, npv; + register pv_entry_t pv, npv; + pv_table_t *ppv; register unsigned *pte, tpte; vm_page_t m; + int nmodify; int s; + nmodify = 0; #if defined(PMAP_DIAGNOSTIC) /* * XXX this makes pmap_page_protect(NONE) illegal for non-managed @@ -1415,9 +1376,11 @@ pmap_remove_all(pa) s = splvm(); m = NULL; ppv = pa_to_pvh(pa); - for (pv = *ppv; pv; pv=pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = npv) { pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (tpte = *pte) { + if ((tpte = *pte) & PG_V) { pv->pv_pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) @@ -1425,28 +1388,27 @@ pmap_remove_all(pa) /* * Update the vm_page_t clean and reference bits. */ - if (tpte & PG_M) { + if ((tpte & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte); } #endif - if (pmap_track_modified(pv->pv_va)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } + if (pmap_track_modified(pv->pv_va)) + nmodify += 1; } } - } + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist); - for (pv = *ppv; pv; pv = npv) { - npv = pv->pv_next; + npv = TAILQ_NEXT(pv, pv_list); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } - *ppv = NULL; + splx(s); + return nmodify; } /* @@ -1484,6 +1446,7 @@ pmap_protect(pmap, sva, eva, prot) sindex = i386_btop(sva); eindex = i386_btop(eva); + mpte = NULL; for (; sindex < eindex; sindex = pdnxt) { pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); @@ -1496,18 +1459,6 @@ pmap_protect(pmap, sva, eva, prot) if (ptpaddr == 0) continue; - /* - * Skip page ranges, where the page table page isn't wired. - * If the page table page is not wired, there are no page mappings - * there. - */ - if (sindex < i386_btop(UPT_MIN_ADDRESS)) { - mpte = PHYS_TO_VM_PAGE(ptpaddr); - - if (mpte->wire_count == 0) - continue; - } - if (pdnxt > eindex) { pdnxt = eindex; } @@ -1516,8 +1467,8 @@ pmap_protect(pmap, sva, eva, prot) unsigned pbits = ptbase[sindex]; - if (pbits & PG_RW) { - if (pbits & PG_M) { + if ((pbits & (PG_RW|PG_V)) == (PG_RW|PG_V)) { + if ((pbits & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) { vm_offset_t sva = i386_ptob(sindex); if (pmap_track_modified(sva)) { vm_page_t m = PHYS_TO_VM_PAGE(pbits); @@ -1534,6 +1485,37 @@ pmap_protect(pmap, sva, eva, prot) } /* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t pv; + pv_table_t *ppv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + TAILQ_INSERT_TAIL(&pmap->pm_pvlist.pv_list, pv, pv_plist); + + ppv = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); + ++ppv->pv_list_count; + + splx(s); +} + +/* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. @@ -1577,7 +1559,7 @@ pmap_enter(pmap, va, pa, prot, wired) if (va < UPT_MIN_ADDRESS) mpte = pmap_allocpte(pmap, va); - pte = pmap_pte_quick(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ @@ -1593,7 +1575,7 @@ pmap_enter(pmap, va, pa, prot, wired) /* * Mapping has not changed, must be protection or wiring change. */ - if (opa == pa) { + if ((origpte & PG_V) && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there @@ -1616,12 +1598,10 @@ pmap_enter(pmap, va, pa, prot, wired) * so we go ahead and sense modify status. */ if (origpte & PG_MANAGED) { - vm_page_t m; - if (origpte & PG_M) { - if (pmap_track_modified(va)) { - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } + if ((origpte & PG_M) && pmap_track_modified(va)) { + vm_page_t m; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } pa |= PG_MANAGED; } @@ -1635,7 +1615,7 @@ pmap_enter(pmap, va, pa, prot, wired) * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { + if (origpte & PG_V) { int err; err = pmap_remove_pte(pmap, pte, va); if (err) @@ -1692,22 +1672,55 @@ validate: * but is *MUCH* faster than pmap_enter... */ -static void -pmap_enter_quick(pmap, va, pa) +static vm_page_t +pmap_enter_quick(pmap, va, pa, mpte) register pmap_t pmap; vm_offset_t va; register vm_offset_t pa; + vm_page_t mpte; { register unsigned *pte; - vm_page_t mpte; - mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if (va < UPT_MIN_ADDRESS) - mpte = pmap_allocpte(pmap, va); + if (va < UPT_MIN_ADDRESS) { + int ptepindex; + vm_offset_t ptepa; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + if (mpte && (mpte->pindex == ptepindex)) { + ++mpte->hold_count; + } else { + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment + * the hold count, and activate it. + */ + if (ptepa) { + if (pmap->pm_ptphint && + pmap->pm_ptphint->pindex == ptepindex) { + mpte = pmap->pm_ptphint; + } else { + mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + ++mpte->hold_count; + } else { + mpte = _pmap_allocpte(pmap, ptepindex); + } + } + } else { + mpte = NULL; + } /* * This call to vtopte makes the assumption that we are @@ -1716,10 +1729,10 @@ pmap_enter_quick(pmap, va, pa) * But that isn't as quick as vtopte. */ pte = (unsigned *)vtopte(va); - if (*pte) { + if (*pte & PG_V) { if (mpte) pmap_unwire_pte_hold(pmap, mpte); - return; + return NULL; } /* @@ -1739,7 +1752,7 @@ pmap_enter_quick(pmap, va, pa) */ *pte = pa | PG_V | PG_U | PG_MANAGED; - return; + return mpte; } #define MAX_INIT_PT (96) @@ -1759,7 +1772,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) { vm_offset_t tmpidx; int psize; - vm_page_t p; + vm_page_t p, mpte; int objpgs; psize = i386_btop(size); @@ -1773,6 +1786,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (psize + pindex > object->size) psize = object->size - pindex; + mpte = NULL; /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1798,9 +1812,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (p->queue == PQ_CACHE) vm_page_deactivate(p); p->flags |= PG_BUSY; - pmap_enter_quick(pmap, + mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), - VM_PAGE_TO_PHYS(p)); + VM_PAGE_TO_PHYS(p), mpte); p->flags |= PG_MAPPED; PAGE_WAKEUP(p); } @@ -1819,9 +1833,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (p->queue == PQ_CACHE) vm_page_deactivate(p); p->flags |= PG_BUSY; - pmap_enter_quick(pmap, + mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), - VM_PAGE_TO_PHYS(p)); + VM_PAGE_TO_PHYS(p), mpte); p->flags |= PG_MAPPED; PAGE_WAKEUP(p); } @@ -1855,7 +1869,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t starta; vm_offset_t addr; vm_pindex_t pindex; - vm_page_t m; + vm_page_t m, mpte; if (entry->object.vm_object != object) return; @@ -1870,6 +1884,7 @@ pmap_prefault(pmap, addra, entry, object) starta = 0; } + mpte = NULL; for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; unsigned *pte; @@ -1910,7 +1925,8 @@ pmap_prefault(pmap, addra, entry, object) vm_page_deactivate(m); } m->flags |= PG_BUSY; - pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); + mpte = pmap_enter_quick(pmap, addr, + VM_PAGE_TO_PHYS(m), mpte); m->flags |= PG_MAPPED; PAGE_WAKEUP(m); } @@ -1949,8 +1965,6 @@ pmap_change_wiring(pmap, va, wired) pmap_pte_set_w(pte, wired); } - - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len @@ -1987,16 +2001,19 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) unsigned *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; vm_offset_t srcptepaddr; + int ptepindex; if (addr >= UPT_MIN_ADDRESS) panic("pmap_copy: invalid to pmap_copy page tables\n"); + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); - srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; - if (srcptepaddr == 0) { + ptepindex = addr >> PDRSHIFT; + + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; + if (srcptepaddr == 0) continue; - } - srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); if (srcmpte->hold_count == 0) continue; @@ -2020,12 +2037,14 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) dstmpte = pmap_allocpte(dst_pmap, addr); if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* - * Simply clear the modified and accessed (referenced) - * bits. + * Clear the modified and + * accessed (referenced) bits + * during the copy. */ *dst_pte = ptetemp & ~(PG_M|PG_A); dst_pmap->pm_stats.resident_count++; - pmap_insert_entry(dst_pmap, addr, dstmpte, + pmap_insert_entry(dst_pmap, addr, + dstmpte, (ptetemp & PG_FRAME)); } else { pmap_unwire_pte_hold(dst_pmap, dstmpte); @@ -2126,7 +2145,8 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t *ppv, pv; + register pv_entry_t pv; + pv_table_t *ppv; int s; if (!pmap_is_managed(pa)) @@ -2138,7 +2158,9 @@ pmap_page_exists(pmap, pa) /* * Not found, check current mappings returning immediately if found. */ - for (pv = *ppv; pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_list)) { if (pv->pv_pmap == pmap) { splx(s); return TRUE; @@ -2148,149 +2170,73 @@ pmap_page_exists(pmap, pa) return (FALSE); } +#define PMAP_REMOVE_PAGES_CURPROC_ONLY /* - * pmap_testbit tests bits in pte's - * note that the testbit/changebit routines are inline, - * and a lot of things compile-time evaluate. + * Remove all pages from specified address space + * this aids process exit speeds. Also, this code + * is special cased for current process only. */ -static __inline boolean_t -pmap_testbit(pa, bit) - register vm_offset_t pa; - int bit; +void +pmap_remove_pages(pmap, sva, eva) + pmap_t pmap; + vm_offset_t sva, eva; { - register pv_entry_t *ppv, pv; - unsigned *pte; + unsigned *pte, tpte; + pv_table_t *ppv; + pv_entry_t pv, npv; int s; - if (!pmap_is_managed(pa)) - return FALSE; - - ppv = pa_to_pvh(pa); - if (*ppv == NULL) - return FALSE; +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + if (pmap != &curproc->p_vmspace->vm_pmap) { + printf("warning: pmap_remove_pages called with non-current pmap\n"); + return; + } +#endif s = splvm(); - /* - * Not found, check current mappings returning immediately if found. - */ - for (pv = *ppv ;pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if (!pmap_track_modified(pv->pv_va)) - continue; - } + for(pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list); + pv; + pv = npv) { - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + if (pv->pv_va >= eva || pv->pv_va < sva) { + npv = TAILQ_NEXT(pv, pv_plist); continue; } + +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + pte = (unsigned *)vtopte(pv->pv_va); +#else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (pte == NULL) - continue; - if (*pte & bit) { - splx(s); - return TRUE; - } - } - splx(s); - return (FALSE); -} +#endif + tpte = *pte; + *pte = 0; -/* - * this routine is used to modify bits in ptes - */ -static __inline void -pmap_changebit(pa, bit, setem) - vm_offset_t pa; - int bit; - boolean_t setem; -{ - register pv_entry_t pv, *ppv; - register unsigned *pte; - vm_offset_t va; - int changed; - int s; + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist); - if (!pmap_is_managed(pa)) - return; + if (tpte & PG_V) { + pv->pv_pmap->pm_stats.resident_count--; + if (tpte & PG_W) + pv->pv_pmap->pm_stats.wired_count--; + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { + PHYS_TO_VM_PAGE(tpte)->dirty = VM_PAGE_BITS_ALL; + } + } - s = splvm(); - changed = 0; - ppv = pa_to_pvh(pa); - /* - * Loop over all current mappings setting/clearing as appropos If - * setting RO do we need to clear the VAC? - */ - for ( pv = *ppv; pv; pv = pv->pv_next) { - va = pv->pv_va; + npv = TAILQ_NEXT(pv, pv_plist); - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif - continue; - } + ppv = pa_to_pvh(tpte); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; - pte = pmap_pte_quick(pv->pv_pmap, va); - if (pte == NULL) - continue; - if (setem) { - *(int *)pte |= bit; - changed = 1; - } else { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & bit) - changed = 1; - if (bit == PG_RW) { - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte = pbits & ~(PG_M|PG_RW); - } else { - *(int *)pte = pbits & ~bit; - } - } + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } + pmap_update(); splx(s); - if (changed) - pmap_update(); -} - -/* - * pmap_page_protect: - * - * Lower the permission for all mappings to a given page. - */ -void -pmap_page_protect(phys, prot) - vm_offset_t phys; - vm_prot_t prot; -{ - if ((prot & VM_PROT_WRITE) == 0) { - if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - pmap_changebit(phys, PG_RW, FALSE); - } else { - pmap_remove_all(phys); - pmap_update(); - } - } } vm_offset_t @@ -2301,150 +2247,111 @@ pmap_phys_address(ppn) } /* - * pmap_is_referenced: + * pmap_tcbit: * - * Return whether or not the specified physical page was referenced - * by any physical maps. - */ -boolean_t -pmap_is_referenced(vm_offset_t pa) -{ - register pv_entry_t *ppv, pv, lpv; - unsigned *pte; - int s; - - if (!pmap_is_managed(pa)) - return FALSE; - - ppv = pa_to_pvh(pa); - - s = splvm(); - /* - * Not found, check current mappings returning immediately if found. - */ - for (lpv = NULL, pv = *ppv ;pv; lpv = pv, pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (!pmap_track_modified(pv->pv_va)) - continue; - if (!pv->pv_pmap) { - continue; - } - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (pte == NULL) - continue; - if ((int) *pte & PG_A) { - if (lpv) { - lpv->pv_next = pv->pv_next; - pv->pv_next = *ppv; - *ppv = pv; - } - splx(s); - return TRUE; - } - } - splx(s); - return (FALSE); -} - -/* - * pmap_ts_referenced: - * - * Return the count of reference bits for a page, clearing all of them. + * Return the count of bits for a page, clearing all of them. * */ int -pmap_ts_referenced(vm_offset_t pa) +pmap_tcbit(vm_offset_t pa, int bit) { - register pv_entry_t *ppv, pv; + register pv_entry_t pv, npv; + pv_table_t *ppv; unsigned *pte; int s; int rtval = 0; - vm_offset_t vachanged[VATRACK]; - - if (!pmap_is_managed(pa)) - return FALSE; s = splvm(); ppv = pa_to_pvh(pa); - - if (*ppv == NULL) { - splx(s); - return 0; - } - /* * Not found, check current mappings returning immediately if found. */ - for (pv = *ppv ;pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = npv) { + npv = TAILQ_NEXT(pv, pv_list); /* * if the bit being tested is the modified bit, then * mark clean_map and ptes as never * modified. */ - if (!pmap_track_modified(pv->pv_va)) + if (((bit & PG_M) != 0) + && !pmap_track_modified(pv->pv_va)) continue; - if (!pv->pv_pmap) { - continue; - } pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte == NULL) continue; - if (*pte & PG_A) { - if (rtval < VATRACK) - vachanged[rtval] = pv->pv_va; - rtval++; - *pte &= ~PG_A; + + if ((rtval == 0) && (*pte & bit)) { + rtval = 1; + *pte &= ~bit; + } else { + *pte &= ~bit; } } splx(s); if (rtval) { - if (rtval <= VATRACK) { - int i; - for(i=0;i<rtval;i++) - pmap_update_1pg(vachanged[i]); - } else { + if (curproc != pageproc) pmap_update(); - } } return (rtval); } /* - * pmap_is_modified: + * pmap_tc_modified: * - * Return whether or not the specified physical page was modified - * in any physical maps. + * Return the count of modified bits for a page, clearing all of them. + * */ -boolean_t -pmap_is_modified(vm_offset_t pa) +int +pmap_tc_modified(vm_page_t m) { - return pmap_testbit((pa), PG_M); + int rtval; + + rtval = pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_M); + if (rtval) + m->dirty = VM_PAGE_BITS_ALL; + + return rtval; } /* - * Clear the modify bits on the specified physical page. + * pmap_tc_referenced: + * + * Return the count of referenced bits for a page, clearing all of them. + * */ -void -pmap_clear_modify(vm_offset_t pa) +int +pmap_tc_referenced(vm_offset_t pa) { - pmap_changebit((pa), PG_M, FALSE); + if (!pmap_is_managed(pa)) + return 0; + return pmap_tcbit(pa, PG_A); } /* - * pmap_clear_reference: + * pmap_page_protect: * - * Clear the reference bit on the specified physical page. + * Lower the permission for all mappings to a given page. */ void -pmap_clear_reference(vm_offset_t pa) +pmap_page_protect(m, prot) + vm_page_t m; + vm_prot_t prot; { - pmap_changebit((pa), PG_A, FALSE); + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { + if ((m->flags & PG_FICTITIOUS) == 0) + pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_RW); + } else { + if (pmap_remove_all(VM_PAGE_TO_PHYS(m))) { + m->dirty = VM_PAGE_BITS_ALL; + } + pmap_update(); + } + } } /* @@ -2525,7 +2432,7 @@ pmap_mincore(pmap, addr) unsigned *ptep, pte; int val = 0; - ptep = pmap_pte(pmap, addr); + ptep = pmap_pte_quick(pmap, addr); if (ptep == 0) { return 0; } @@ -2544,8 +2451,10 @@ pmap_mincore(pmap, addr) * Modified by someone */ else if (PHYS_TO_VM_PAGE(pa)->dirty || - pmap_is_modified(pa)) + pmap_tcbit(pa, PG_M)) { val |= MINCORE_MODIFIED_OTHER; + PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + } /* * Referenced by us */ @@ -2556,7 +2465,7 @@ pmap_mincore(pmap, addr) * Referenced by someone */ else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) || - pmap_is_referenced(pa)) + pmap_tcbit(pa, PG_A)) val |= MINCORE_REFERENCED_OTHER; } return val; @@ -2592,7 +2501,7 @@ pmap_pid_dump(int pid) { } return npte; } - pte = pmap_pte( pmap, va); + pte = pmap_pte_quick( pmap, va); if (pte && pmap_pte_v(pte)) { vm_offset_t pa; vm_page_t m; @@ -2641,7 +2550,7 @@ pads(pm) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; - ptep = pmap_pte(pm, va); + ptep = pmap_pte_quick(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *(int *) ptep); }; @@ -2655,7 +2564,9 @@ pmap_pvdump(pa) register pv_entry_t pv; printf("pa %x", pa); - for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(pa_to_pvh(pa)); + pv; + pv = TAILQ_NEXT(pv->pv_list)) { #ifdef used_to_be printf(" -> pmap %x, va %x, flags %x", pv->pv_pmap, pv->pv_va, pv->pv_flags); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index dbadf6f..b53db62 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -42,12 +42,13 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.39 1996/05/18 03:36:38 dyson Exp $ + * $Id: pmap.h,v 1.40 1996/06/08 11:21:19 bde Exp $ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ + /* * Page-directory and page-table entires follow this format, with a few * of the fields not present here and there, depending on a lot of things. @@ -113,6 +114,9 @@ #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) #ifndef LOCORE + +#include <sys/queue.h> + typedef unsigned int *pd_entry_t; typedef unsigned int *pt_entry_t; @@ -158,17 +162,24 @@ pmap_kextract(vm_offset_t va) } #endif +struct vm_page; + /* * Pmap stuff */ +struct pv_entry; +typedef struct { + int pv_list_count; + TAILQ_HEAD(,pv_entry) pv_list; +} pv_table_t; struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ vm_object_t pm_pteobj; /* Container for pte's */ - short pm_dref; /* page directory ref count */ - short pm_count; /* pmap reference count */ + pv_table_t pm_pvlist; /* list of mappings in pmap */ + int pm_count; /* reference count */ struct pmap_statistics pm_stats; /* pmap statistics */ - struct vm_map *pm_map; /* map that owns this pmap */ + struct vm_page *pm_ptphint; /* pmap ptp hint */ }; typedef struct pmap *pmap_t; @@ -177,14 +188,16 @@ typedef struct pmap *pmap_t; extern pmap_t kernel_pmap; #endif + /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { - struct pv_entry *pv_next; /* next pv_entry */ pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ + TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_plist; vm_page_t pv_ptem; /* VM page for pte */ } *pv_entry_t; @@ -200,7 +213,7 @@ extern pt_entry_t *CMAP1; extern vm_offset_t avail_end; extern vm_offset_t avail_start; extern vm_offset_t phys_avail[]; -extern pv_entry_t *pv_table; /* array of entries, one per page */ +pv_table_t *pv_table; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 6dbffd1..5e2838b 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -99,6 +99,12 @@ #define PMAP_DIAGNOSTIC #endif +#if !defined(SMALL_KERNEL) +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + static void init_pv_entries __P((int)); /* @@ -146,7 +152,7 @@ extern int cpu_class; * Data for the pv entry allocation mechanism */ static int pv_freelistcnt; -static pv_entry_t pv_freelist; +TAILQ_HEAD (,pv_entry) pv_freelist; static vm_offset_t pvva; static int npvvapg; @@ -155,7 +161,6 @@ static int npvvapg; */ pt_entry_t *CMAP1; static pt_entry_t *CMAP2, *ptmmap; -static pv_entry_t *pv_table; caddr_t CADDR1, ptvmmap; static caddr_t CADDR2; static pt_entry_t *msgbufmap; @@ -165,32 +170,32 @@ pt_entry_t *PMAP1; unsigned *PADDR1; static void free_pv_entry __P((pv_entry_t pv)); -static __inline unsigned * get_ptbase __P((pmap_t pmap)); +static unsigned * get_ptbase __P((pmap_t pmap)); static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); static void pmap_alloc_pv_entry __P((void)); -static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); static int pmap_is_managed __P((vm_offset_t pa)); -static void pmap_remove_all __P((vm_offset_t pa)); -static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, - vm_offset_t pa)); +static int pmap_remove_all __P((vm_offset_t pa)); +static vm_page_t pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, + vm_offset_t pa, vm_page_t mpte)); static int pmap_remove_pte __P((struct pmap *pmap, unsigned *ptq, vm_offset_t sva)); static void pmap_remove_page __P((struct pmap *pmap, vm_offset_t va)); -static __inline int pmap_remove_entry __P((struct pmap *pmap, pv_entry_t *pv, +static int pmap_remove_entry __P((struct pmap *pmap, pv_table_t *pv, vm_offset_t va)); -static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); -static __inline void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, +static void pmap_insert_entry __P((pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_offset_t pa)); -static __inline vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); +static vm_page_t pmap_allocpte __P((pmap_t pmap, vm_offset_t va)); -static __inline int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); +static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); static vm_page_t _pmap_allocpte __P((pmap_t pmap, int ptepindex)); +unsigned * __pure pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); +int pmap_tcbit __P((vm_offset_t pa, int bit)); +static vm_page_t pmap_page_alloc __P((vm_object_t object, vm_pindex_t pindex)); -#define VATRACK 4 -#define PDSTACKMAX 16 +#define PDSTACKMAX 6 static vm_offset_t pdstack[PDSTACKMAX]; static int pdstackptr; @@ -240,6 +245,7 @@ pmap_bootstrap(firstaddr, loadaddr) kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD); kernel_pmap->pm_count = 1; + TAILQ_INIT(&kernel_pmap->pm_pvlist.pv_list); nkpt = NKPT; /* @@ -306,10 +312,15 @@ pmap_init(phys_start, phys_end) * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ - s = (vm_size_t) (sizeof(struct pv_entry *) * npg); + s = (vm_size_t) (sizeof(pv_table_t) * npg); s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); - pv_table = (pv_entry_t *) addr; + pv_table = (pv_table_t *) addr; + for(i=0;i<npg;i++) { + pv_table[i].pv_list_count = 0; + TAILQ_INIT(&pv_table[i].pv_list); + } + TAILQ_INIT(&pv_freelist); /* * init the pv free list @@ -372,7 +383,7 @@ pmap_nw_modified(pt_entry_t ptea) { * this routine defines the region(s) of memory that should * not be tested for the modified bit. */ -static __inline int +static PMAP_INLINE int pmap_track_modified( vm_offset_t va) { if ((va < clean_sva) || (va >= clean_eva)) return 1; @@ -384,7 +395,7 @@ pmap_track_modified( vm_offset_t va) { * The below are finer grained pmap_update routines. These eliminate * the gratuitious tlb flushes on non-i386 architectures. */ -static __inline void +static PMAP_INLINE void pmap_update_1pg( vm_offset_t va) { #if defined(I386_CPU) if (cpu_class == CPUCLASS_386) @@ -394,7 +405,7 @@ pmap_update_1pg( vm_offset_t va) { __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va)); } -static __inline void +static PMAP_INLINE void pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { #if defined(I386_CPU) if (cpu_class == CPUCLASS_386) { @@ -432,7 +443,7 @@ get_ptbase(pmap) * with the given map/virtual_address pair. */ -__inline unsigned * __pure +unsigned * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; @@ -448,25 +459,27 @@ pmap_pte(pmap, va) * the pv lists. This eliminates many coarse-grained * pmap_update calls. */ -__inline unsigned * __pure +unsigned * __pure pmap_pte_quick(pmap, va) register pmap_t pmap; vm_offset_t va; { - unsigned pde; + unsigned pde, newpf; if (pde = (unsigned) pmap->pm_pdir[va >> PDRSHIFT]) { unsigned frame = (unsigned) pmap->pm_pdir[PTDPTDI] & PG_FRAME; /* are we current address space or kernel? */ if (pmap == kernel_pmap || frame == (((unsigned) PTDpde) & PG_FRAME)) { return (unsigned *) PTmap + i386_btop(va); } - * (int *) PMAP1 = (pde & PG_FRAME) | PG_V | PG_RW; - pmap_update_1pg((vm_offset_t) PADDR1); + newpf = pde & PG_FRAME; + if ( ((* (unsigned *) PMAP1) & PG_FRAME) != newpf) { + * (unsigned *) PMAP1 = newpf | PG_RW | PG_V; + pmap_update_1pg((vm_offset_t) PADDR1); + } return PADDR1 + ((unsigned) i386_btop(va) & (NPTEPG - 1)); } return (0); } - /* * Routine: pmap_extract @@ -491,7 +504,7 @@ pmap_extract(pmap, va) /* * determine if a page is managed (memory vs. device) */ -static __inline __pure int +static PMAP_INLINE __pure int pmap_is_managed(pa) vm_offset_t pa; { @@ -535,7 +548,7 @@ pmap_qenter(va, m, count) pte = (unsigned *)vtopte(tva); opte = *pte; *pte = npte; - if (opte) + if (opte & PG_V) pmap_update_1pg(tva); } } @@ -564,7 +577,7 @@ pmap_qremove(va, count) * note that in order for the mapping to take effect -- you * should do a pmap_update after doing the pmap_kenter... */ -__inline void +PMAP_INLINE void pmap_kenter(va, pa) vm_offset_t va; register vm_offset_t pa; @@ -576,14 +589,14 @@ pmap_kenter(va, pa) pte = (unsigned *)vtopte(va); opte = *pte; *pte = npte; - if (opte) + if (opte & PG_V) pmap_update_1pg(va); } /* * remove a page from the kernel pagetables */ -__inline void +PMAP_INLINE void pmap_kremove(va) vm_offset_t va; { @@ -594,80 +607,17 @@ pmap_kremove(va) pmap_update_1pg(va); } - -/*************************************************** - * Page table page management routines..... - ***************************************************/ - -/* - * This routine unholds page table pages, and if the hold count - * drops to zero, then it decrements the wire count. - */ -static __inline int -pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { - vm_page_unhold(m); - if (m->hold_count == 0) { - vm_offset_t pteva; - /* - * unmap the page table page - */ - pmap->pm_pdir[m->pindex] = 0; - --pmap->pm_stats.resident_count; - /* - * Do a pmap_update to make the invalidated mapping - * take effect immediately. - */ - pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); - pmap_update_1pg(pteva); - /* - * If the page is finally unwired, simply free it. - */ - --m->wire_count; - if (m->wire_count == 0) { - vm_page_free_zero(m); - --cnt.v_wire_count; - } - return 1; - } - return 0; -} - -/* - * After removing a page table entry, this routine is used to - * conditionally free the page, and manage the hold/wire counts. - */ -int -pmap_unuse_pt(pmap, va, mpte) - pmap_t pmap; - vm_offset_t va; - vm_page_t mpte; +static vm_page_t +pmap_page_alloc(object, pindex) + vm_object_t object; + vm_pindex_t pindex; { - if (va >= UPT_MIN_ADDRESS) - return 0; - - if (mpte == NULL) { - vm_offset_t ptepa; - ptepa = ((vm_offset_t) *pmap_pde(pmap, va)); -#if defined(PMAP_DIAGNOSTIC) - if (!ptepa) - panic("pmap_unuse_pt: pagetable page missing, va: 0x%x", va); -#endif - if (!ptepa) - return 0; - mpte = PHYS_TO_VM_PAGE(ptepa); - } - -#if defined(PMAP_DIAGNOSTIC) - if (mpte->pindex != (va >> PDRSHIFT)) - panic("pmap_unuse_pt: pindex(0x%x) != va(0x%x)", - mpte->pindex, (va >> PDRSHIFT)); - - if (mpte->hold_count == 0) { - panic("pmap_unuse_pt: hold count < 0, va: 0x%x", va); + vm_page_t m; + m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO); + if (m == NULL) { + VM_WAIT; } -#endif - - return pmap_unwire_pte_hold(pmap, mpte); + return m; } /* @@ -701,12 +651,12 @@ pmap_pinit(pmap) * allocate the page directory page */ retry: - ptdpg = vm_page_alloc( pmap->pm_pteobj, PTDPTDI, VM_ALLOC_ZERO); - if (ptdpg == NULL) { - VM_WAIT; + ptdpg = pmap_page_alloc( pmap->pm_pteobj, PTDPTDI); + if (ptdpg == NULL) goto retry; - } - vm_page_wire(ptdpg); + + ptdpg->wire_count = 1; + ++cnt.v_wire_count; ptdpg->flags &= ~(PG_MAPPED|PG_BUSY); /* not mapped normally */ ptdpg->valid = VM_PAGE_BITS_ALL; @@ -722,6 +672,8 @@ retry: VM_PAGE_TO_PHYS(ptdpg) | PG_V | PG_RW; pmap->pm_count = 1; + pmap->pm_ptphint = NULL; + TAILQ_INIT(&pmap->pm_pvlist.pv_list); } static int @@ -751,31 +703,7 @@ pmap_release_free_page(pmap, p) --pmap->pm_stats.resident_count; if (p->hold_count) { - int *kvap; - int i; -#if defined(PMAP_DIAGNOSTIC) panic("pmap_release: freeing held page table page"); -#else - printf("pmap_release: freeing held page table page:\n"); -#endif - kvap = (int *)vm_pager_map_page(p); - for(i=0;i<NPTEPG;i++) { - if (kvap[i]) { - printf("pte: 0x%x, index: %d\n", kvap[i],i); - } - } - vm_pager_unmap_page((vm_offset_t)kvap); - - /* - * HACK ALERT!!! - * If this failure happens, we must clear the page, because - * there is likely a mapping still valid. This condition - * is an error, but at least this zero operation will mitigate - * some Sig-11's or crashes, because this page is thought - * to be zero. This is a robustness fix, and not meant to - * be a long term work-around. - */ - pmap_zero_page(VM_PAGE_TO_PHYS(p)); } /* * Page directory pages need to have the kernel @@ -787,6 +715,9 @@ pmap_release_free_page(pmap, p) pmap_kremove((vm_offset_t) pmap->pm_pdir); } + if (pmap->pm_ptphint == p) + pmap->pm_ptphint = NULL; + vm_page_free_zero(p); splx(s); return 1; @@ -801,7 +732,7 @@ _pmap_allocpte(pmap, ptepindex) pmap_t pmap; int ptepindex; { - vm_offset_t pteva, ptepa; + vm_offset_t ptepa; vm_page_t m; /* @@ -810,11 +741,9 @@ _pmap_allocpte(pmap, ptepindex) retry: m = vm_page_lookup(pmap->pm_pteobj, ptepindex); if (m == NULL) { - m = vm_page_alloc(pmap->pm_pteobj, ptepindex, VM_ALLOC_ZERO); - if (m == NULL) { - VM_WAIT; + m = pmap_page_alloc(pmap->pm_pteobj, ptepindex); + if (m == NULL) goto retry; - } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(VM_PAGE_TO_PHYS(m)); m->flags &= ~(PG_ZERO|PG_BUSY); @@ -827,22 +756,16 @@ retry: } } - /* - * mark the object writeable - */ - pmap->pm_pteobj->flags |= OBJ_WRITEABLE; - if (m->queue != PQ_NONE) { int s = splvm(); - vm_page_unqueue(m); + vm_page_unqueue(m,1); splx(s); } - if (m->hold_count == 0) { - if (m->wire_count == 0) - ++cnt.v_wire_count; - ++m->wire_count; - } + if (m->wire_count == 0) + ++cnt.v_wire_count; + ++m->wire_count; + /* * Increment the hold count for the page table page * (denoting a new mapping.) @@ -859,14 +782,12 @@ retry: ptepa = VM_PAGE_TO_PHYS(m); pmap->pm_pdir[ptepindex] = (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V); - pteva = UPT_MIN_ADDRESS + i386_ptob(ptepindex); - pmap_update_1pg(pteva); m->flags |= PG_MAPPED; return m; } -static __inline vm_page_t +PMAP_INLINE static vm_page_t pmap_allocpte(pmap, va) pmap_t pmap; vm_offset_t va; @@ -890,7 +811,13 @@ pmap_allocpte(pmap, va) * hold count, and activate it. */ if (ptepa) { - m = PHYS_TO_VM_PAGE(ptepa); + if (pmap->pm_ptphint && + (pmap->pm_ptphint->pindex == ptepindex)) { + m = pmap->pm_ptphint; + } else { + m = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = m; + } ++m->hold_count; return m; } @@ -1035,13 +962,12 @@ pmap_reference(pmap) /* * free the pv_entry back to the free list */ -static __inline void +static PMAP_INLINE void free_pv_entry(pv) pv_entry_t pv; { ++pv_freelistcnt; - pv->pv_next = pv_freelist; - pv_freelist = pv; + TAILQ_INSERT_HEAD(&pv_freelist, pv, pv_list); } /* @@ -1050,7 +976,7 @@ free_pv_entry(pv) * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ -static __inline pv_entry_t +static PMAP_INLINE pv_entry_t get_pv_entry() { pv_entry_t tmp; @@ -1058,15 +984,16 @@ get_pv_entry() /* * get more pv_entry pages if needed */ - if (pv_freelistcnt < PV_FREELIST_MIN || pv_freelist == 0) { + if (pv_freelistcnt < PV_FREELIST_MIN) { pmap_alloc_pv_entry(); } + /* * get a pv_entry off of the free list */ --pv_freelistcnt; - tmp = pv_freelist; - pv_freelist = tmp->pv_next; + tmp = TAILQ_FIRST(&pv_freelist); + TAILQ_REMOVE(&pv_freelist, tmp, pv_list); return tmp; } @@ -1123,7 +1050,7 @@ pmap_alloc_pv_entry() } } } - if (!pv_freelist) + if (TAILQ_FIRST(&pv_freelist) == NULL) panic("get_pv_entry: cannot get a pv_entry_t"); } @@ -1150,62 +1077,115 @@ init_pv_entries(npg) } /* - * If it is the first entry on the list, it is actually - * in the header and we must copy the following entry up - * to the header. Otherwise we must search the list for - * the entry. In either case we free the now unused entry. + * This routine unholds page table pages, and if the hold count + * drops to zero, then it decrements the wire count. */ -static __inline int -pmap_remove_entry(pmap, ppv, va) - struct pmap *pmap; - pv_entry_t *ppv; - vm_offset_t va; -{ - pv_entry_t npv; - int s; - - s = splvm(); - for (npv = *ppv; npv; (ppv = &npv->pv_next, npv = *ppv)) { - if (pmap == npv->pv_pmap && va == npv->pv_va) { - int rtval = pmap_unuse_pt(pmap, va, npv->pv_ptem); - *ppv = npv->pv_next; - free_pv_entry(npv); - splx(s); - return rtval; +static int +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) { + vm_page_unhold(m); + if (m->hold_count == 0) { + vm_offset_t pteva; + /* + * unmap the page table page + */ + pmap->pm_pdir[m->pindex] = 0; + --pmap->pm_stats.resident_count; + if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == + (((unsigned) PTDpde) & PG_FRAME)) { + /* + * Do a pmap_update to make the invalidated mapping + * take effect immediately. + */ + pteva = UPT_MIN_ADDRESS + i386_ptob(m->pindex); + pmap_update_1pg(pteva); + } + /* + * If the page is finally unwired, simply free it. + */ + --m->wire_count; + if (m->wire_count == 0) { + if (pmap->pm_ptphint == m) + pmap->pm_ptphint = NULL; + vm_page_free_zero(m); + --cnt.v_wire_count; } + return 1; } - splx(s); return 0; } /* - * Create a pv entry for page at pa for - * (pmap, va). + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. */ -static __inline void -pmap_insert_entry(pmap, va, mpte, pa) +PMAP_INLINE int +pmap_unuse_pt(pmap, va, mpte) pmap_t pmap; vm_offset_t va; vm_page_t mpte; - vm_offset_t pa; { + int ptepindex; + if (va >= UPT_MIN_ADDRESS) + return 0; + + if (mpte == NULL) { + ptepindex = (va >> PDRSHIFT); + if (pmap->pm_ptphint && + pmap->pm_ptphint->pindex == ptepindex) { + mpte = pmap->pm_ptphint; + } else { + mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + } + + return pmap_unwire_pte_hold(pmap, mpte); +} +/* + * If it is the first entry on the list, it is actually + * in the header and we must copy the following entry up + * to the header. Otherwise we must search the list for + * the entry. In either case we free the now unused entry. + */ +static int +pmap_remove_entry(pmap, ppv, va) + struct pmap *pmap; + pv_table_t *ppv; + vm_offset_t va; +{ + pv_entry_t pv; + int rtval; int s; - pv_entry_t *ppv, pv; s = splvm(); - pv = get_pv_entry(); - pv->pv_va = va; - pv->pv_pmap = pmap; - pv->pv_ptem = mpte; + if (ppv->pv_list_count < pmap->pm_stats.resident_count) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_list)) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + for (pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_plist)) { + if (va == pv->pv_va) + break; + } + } - ppv = pa_to_pvh(pa); - if (*ppv) - pv->pv_next = *ppv; - else - pv->pv_next = NULL; - *ppv = pv; + rtval = 0; + if (pv) { + rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; + TAILQ_REMOVE(&pmap->pm_pvlist.pv_list, pv, pv_plist); + free_pv_entry(pv); + } + splx(s); + return rtval; } /* @@ -1218,7 +1198,6 @@ pmap_remove_pte(pmap, ptq, va) vm_offset_t va; { unsigned oldpte; - pv_entry_t *ppv; oldpte = *ptq; *ptq = 0; @@ -1235,8 +1214,7 @@ pmap_remove_pte(pmap, ptq, va) if (pmap_track_modified(va)) PHYS_TO_VM_PAGE(oldpte)->dirty = VM_PAGE_BITS_ALL; } - ppv = pa_to_pvh(oldpte); - return pmap_remove_entry(pmap, ppv, va); + return pmap_remove_entry(pmap, pa_to_pvh(oldpte), va); } else { return pmap_unuse_pt(pmap, va, NULL); } @@ -1265,9 +1243,11 @@ pmap_remove_page(pmap, va) * get a local va for mappings for this pmap. */ ptq = get_ptbase(pmap) + i386_btop(va); - if (*ptq) { + if (*ptq & PG_V) { (void) pmap_remove_pte(pmap, ptq, va); - pmap_update_1pg(va); + if ((((unsigned)pmap->pm_pdir[PTDPTDI]) & PG_FRAME) == (((unsigned) PTDpde) & PG_FRAME)) { + pmap_update_1pg(va); + } } return; } @@ -1290,7 +1270,6 @@ pmap_remove(pmap, sva, eva) vm_offset_t sindex, eindex; vm_page_t mpte; int anyvalid; - vm_offset_t vachanged[VATRACK]; if (pmap == NULL) return; @@ -1315,6 +1294,7 @@ pmap_remove(pmap, sva, eva) sindex = i386_btop(sva); eindex = i386_btop(eva); + mpte = NULL; for (; sindex < eindex; sindex = pdnxt) { @@ -1331,19 +1311,6 @@ pmap_remove(pmap, sva, eva) if (ptpaddr == 0) continue; - if (sindex < i386_btop(UPT_MIN_ADDRESS)) { - /* - * get the vm_page_t for the page table page - */ - mpte = PHYS_TO_VM_PAGE(ptpaddr); - - /* - * if the pte isn't wired, just skip it. - */ - if (mpte->wire_count == 0) - continue; - } - /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the @@ -1355,13 +1322,11 @@ pmap_remove(pmap, sva, eva) for ( ;sindex != pdnxt; sindex++) { vm_offset_t va; - if (ptbase[sindex] == 0) { + if ((ptbase[sindex] & PG_V) == 0) { continue; } va = i386_ptob(sindex); - if (anyvalid < VATRACK) - vachanged[anyvalid] = va; anyvalid++; if (pmap_remove_pte(pmap, ptbase + sindex, va)) @@ -1370,15 +1335,8 @@ pmap_remove(pmap, sva, eva) } if (anyvalid) { - if (anyvalid <= VATRACK) { - int i; - for(i=0;i<anyvalid;i++) - pmap_update_1pg(vachanged[i]); - } else { - pmap_update(); - } + pmap_update(); } - } /* @@ -1393,15 +1351,18 @@ pmap_remove(pmap, sva, eva) * inefficient because they iteratively called * pmap_remove (slow...) */ -static void +static int pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, *ppv, npv; + register pv_entry_t pv, npv; + pv_table_t *ppv; register unsigned *pte, tpte; vm_page_t m; + int nmodify; int s; + nmodify = 0; #if defined(PMAP_DIAGNOSTIC) /* * XXX this makes pmap_page_protect(NONE) illegal for non-managed @@ -1415,9 +1376,11 @@ pmap_remove_all(pa) s = splvm(); m = NULL; ppv = pa_to_pvh(pa); - for (pv = *ppv; pv; pv=pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = npv) { pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (tpte = *pte) { + if ((tpte = *pte) & PG_V) { pv->pv_pmap->pm_stats.resident_count--; *pte = 0; if (tpte & PG_W) @@ -1425,28 +1388,27 @@ pmap_remove_all(pa) /* * Update the vm_page_t clean and reference bits. */ - if (tpte & PG_M) { + if ((tpte & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) { #if defined(PMAP_DIAGNOSTIC) if (pmap_nw_modified((pt_entry_t) tpte)) { printf("pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", pv->pv_va, tpte); } #endif - if (pmap_track_modified(pv->pv_va)) { - if (m == NULL) - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } + if (pmap_track_modified(pv->pv_va)) + nmodify += 1; } } - } + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist); - for (pv = *ppv; pv; pv = npv) { - npv = pv->pv_next; + npv = TAILQ_NEXT(pv, pv_list); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); free_pv_entry(pv); } - *ppv = NULL; + splx(s); + return nmodify; } /* @@ -1484,6 +1446,7 @@ pmap_protect(pmap, sva, eva, prot) sindex = i386_btop(sva); eindex = i386_btop(eva); + mpte = NULL; for (; sindex < eindex; sindex = pdnxt) { pdnxt = ((sindex + NPTEPG) & ~(NPTEPG - 1)); @@ -1496,18 +1459,6 @@ pmap_protect(pmap, sva, eva, prot) if (ptpaddr == 0) continue; - /* - * Skip page ranges, where the page table page isn't wired. - * If the page table page is not wired, there are no page mappings - * there. - */ - if (sindex < i386_btop(UPT_MIN_ADDRESS)) { - mpte = PHYS_TO_VM_PAGE(ptpaddr); - - if (mpte->wire_count == 0) - continue; - } - if (pdnxt > eindex) { pdnxt = eindex; } @@ -1516,8 +1467,8 @@ pmap_protect(pmap, sva, eva, prot) unsigned pbits = ptbase[sindex]; - if (pbits & PG_RW) { - if (pbits & PG_M) { + if ((pbits & (PG_RW|PG_V)) == (PG_RW|PG_V)) { + if ((pbits & (PG_M|PG_MANAGED)) == (PG_M|PG_MANAGED)) { vm_offset_t sva = i386_ptob(sindex); if (pmap_track_modified(sva)) { vm_page_t m = PHYS_TO_VM_PAGE(pbits); @@ -1534,6 +1485,37 @@ pmap_protect(pmap, sva, eva, prot) } /* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static void +pmap_insert_entry(pmap, va, mpte, pa) + pmap_t pmap; + vm_offset_t va; + vm_page_t mpte; + vm_offset_t pa; +{ + + int s; + pv_entry_t pv; + pv_table_t *ppv; + + s = splvm(); + pv = get_pv_entry(); + pv->pv_va = va; + pv->pv_pmap = pmap; + pv->pv_ptem = mpte; + + TAILQ_INSERT_TAIL(&pmap->pm_pvlist.pv_list, pv, pv_plist); + + ppv = pa_to_pvh(pa); + TAILQ_INSERT_TAIL(&ppv->pv_list, pv, pv_list); + ++ppv->pv_list_count; + + splx(s); +} + +/* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. @@ -1577,7 +1559,7 @@ pmap_enter(pmap, va, pa, prot, wired) if (va < UPT_MIN_ADDRESS) mpte = pmap_allocpte(pmap, va); - pte = pmap_pte_quick(pmap, va); + pte = pmap_pte(pmap, va); /* * Page Directory table entry not valid, we need a new PT page */ @@ -1593,7 +1575,7 @@ pmap_enter(pmap, va, pa, prot, wired) /* * Mapping has not changed, must be protection or wiring change. */ - if (opa == pa) { + if ((origpte & PG_V) && (opa == pa)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there @@ -1616,12 +1598,10 @@ pmap_enter(pmap, va, pa, prot, wired) * so we go ahead and sense modify status. */ if (origpte & PG_MANAGED) { - vm_page_t m; - if (origpte & PG_M) { - if (pmap_track_modified(va)) { - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } + if ((origpte & PG_M) && pmap_track_modified(va)) { + vm_page_t m; + m = PHYS_TO_VM_PAGE(pa); + m->dirty = VM_PAGE_BITS_ALL; } pa |= PG_MANAGED; } @@ -1635,7 +1615,7 @@ pmap_enter(pmap, va, pa, prot, wired) * Mapping has changed, invalidate old range and fall through to * handle validating new mapping. */ - if (opa) { + if (origpte & PG_V) { int err; err = pmap_remove_pte(pmap, pte, va); if (err) @@ -1692,22 +1672,55 @@ validate: * but is *MUCH* faster than pmap_enter... */ -static void -pmap_enter_quick(pmap, va, pa) +static vm_page_t +pmap_enter_quick(pmap, va, pa, mpte) register pmap_t pmap; vm_offset_t va; register vm_offset_t pa; + vm_page_t mpte; { register unsigned *pte; - vm_page_t mpte; - mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ - if (va < UPT_MIN_ADDRESS) - mpte = pmap_allocpte(pmap, va); + if (va < UPT_MIN_ADDRESS) { + int ptepindex; + vm_offset_t ptepa; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + if (mpte && (mpte->pindex == ptepindex)) { + ++mpte->hold_count; + } else { + /* + * Get the page directory entry + */ + ptepa = (vm_offset_t) pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment + * the hold count, and activate it. + */ + if (ptepa) { + if (pmap->pm_ptphint && + pmap->pm_ptphint->pindex == ptepindex) { + mpte = pmap->pm_ptphint; + } else { + mpte = vm_page_lookup( pmap->pm_pteobj, ptepindex); + pmap->pm_ptphint = mpte; + } + ++mpte->hold_count; + } else { + mpte = _pmap_allocpte(pmap, ptepindex); + } + } + } else { + mpte = NULL; + } /* * This call to vtopte makes the assumption that we are @@ -1716,10 +1729,10 @@ pmap_enter_quick(pmap, va, pa) * But that isn't as quick as vtopte. */ pte = (unsigned *)vtopte(va); - if (*pte) { + if (*pte & PG_V) { if (mpte) pmap_unwire_pte_hold(pmap, mpte); - return; + return NULL; } /* @@ -1739,7 +1752,7 @@ pmap_enter_quick(pmap, va, pa) */ *pte = pa | PG_V | PG_U | PG_MANAGED; - return; + return mpte; } #define MAX_INIT_PT (96) @@ -1759,7 +1772,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) { vm_offset_t tmpidx; int psize; - vm_page_t p; + vm_page_t p, mpte; int objpgs; psize = i386_btop(size); @@ -1773,6 +1786,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (psize + pindex > object->size) psize = object->size - pindex; + mpte = NULL; /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1798,9 +1812,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (p->queue == PQ_CACHE) vm_page_deactivate(p); p->flags |= PG_BUSY; - pmap_enter_quick(pmap, + mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), - VM_PAGE_TO_PHYS(p)); + VM_PAGE_TO_PHYS(p), mpte); p->flags |= PG_MAPPED; PAGE_WAKEUP(p); } @@ -1819,9 +1833,9 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit) if (p->queue == PQ_CACHE) vm_page_deactivate(p); p->flags |= PG_BUSY; - pmap_enter_quick(pmap, + mpte = pmap_enter_quick(pmap, addr + i386_ptob(tmpidx), - VM_PAGE_TO_PHYS(p)); + VM_PAGE_TO_PHYS(p), mpte); p->flags |= PG_MAPPED; PAGE_WAKEUP(p); } @@ -1855,7 +1869,7 @@ pmap_prefault(pmap, addra, entry, object) vm_offset_t starta; vm_offset_t addr; vm_pindex_t pindex; - vm_page_t m; + vm_page_t m, mpte; if (entry->object.vm_object != object) return; @@ -1870,6 +1884,7 @@ pmap_prefault(pmap, addra, entry, object) starta = 0; } + mpte = NULL; for (i = 0; i < PAGEORDER_SIZE; i++) { vm_object_t lobject; unsigned *pte; @@ -1910,7 +1925,8 @@ pmap_prefault(pmap, addra, entry, object) vm_page_deactivate(m); } m->flags |= PG_BUSY; - pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); + mpte = pmap_enter_quick(pmap, addr, + VM_PAGE_TO_PHYS(m), mpte); m->flags |= PG_MAPPED; PAGE_WAKEUP(m); } @@ -1949,8 +1965,6 @@ pmap_change_wiring(pmap, va, wired) pmap_pte_set_w(pte, wired); } - - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len @@ -1987,16 +2001,19 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) unsigned *src_pte, *dst_pte; vm_page_t dstmpte, srcmpte; vm_offset_t srcptepaddr; + int ptepindex; if (addr >= UPT_MIN_ADDRESS) panic("pmap_copy: invalid to pmap_copy page tables\n"); + pdnxt = ((addr + PAGE_SIZE*NPTEPG) & ~(PAGE_SIZE*NPTEPG - 1)); - srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[addr >> PDRSHIFT]; - if (srcptepaddr == 0) { + ptepindex = addr >> PDRSHIFT; + + srcptepaddr = (vm_offset_t) src_pmap->pm_pdir[ptepindex]; + if (srcptepaddr == 0) continue; - } - srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); + srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); if (srcmpte->hold_count == 0) continue; @@ -2020,12 +2037,14 @@ pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) dstmpte = pmap_allocpte(dst_pmap, addr); if ((*dst_pte == 0) && (ptetemp = *src_pte)) { /* - * Simply clear the modified and accessed (referenced) - * bits. + * Clear the modified and + * accessed (referenced) bits + * during the copy. */ *dst_pte = ptetemp & ~(PG_M|PG_A); dst_pmap->pm_stats.resident_count++; - pmap_insert_entry(dst_pmap, addr, dstmpte, + pmap_insert_entry(dst_pmap, addr, + dstmpte, (ptetemp & PG_FRAME)); } else { pmap_unwire_pte_hold(dst_pmap, dstmpte); @@ -2126,7 +2145,8 @@ pmap_page_exists(pmap, pa) pmap_t pmap; vm_offset_t pa; { - register pv_entry_t *ppv, pv; + register pv_entry_t pv; + pv_table_t *ppv; int s; if (!pmap_is_managed(pa)) @@ -2138,7 +2158,9 @@ pmap_page_exists(pmap, pa) /* * Not found, check current mappings returning immediately if found. */ - for (pv = *ppv; pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = TAILQ_NEXT(pv, pv_list)) { if (pv->pv_pmap == pmap) { splx(s); return TRUE; @@ -2148,149 +2170,73 @@ pmap_page_exists(pmap, pa) return (FALSE); } +#define PMAP_REMOVE_PAGES_CURPROC_ONLY /* - * pmap_testbit tests bits in pte's - * note that the testbit/changebit routines are inline, - * and a lot of things compile-time evaluate. + * Remove all pages from specified address space + * this aids process exit speeds. Also, this code + * is special cased for current process only. */ -static __inline boolean_t -pmap_testbit(pa, bit) - register vm_offset_t pa; - int bit; +void +pmap_remove_pages(pmap, sva, eva) + pmap_t pmap; + vm_offset_t sva, eva; { - register pv_entry_t *ppv, pv; - unsigned *pte; + unsigned *pte, tpte; + pv_table_t *ppv; + pv_entry_t pv, npv; int s; - if (!pmap_is_managed(pa)) - return FALSE; - - ppv = pa_to_pvh(pa); - if (*ppv == NULL) - return FALSE; +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + if (pmap != &curproc->p_vmspace->vm_pmap) { + printf("warning: pmap_remove_pages called with non-current pmap\n"); + return; + } +#endif s = splvm(); - /* - * Not found, check current mappings returning immediately if found. - */ - for (pv = *ppv ;pv; pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (bit & (PG_A|PG_M)) { - if (!pmap_track_modified(pv->pv_va)) - continue; - } + for(pv = TAILQ_FIRST(&pmap->pm_pvlist.pv_list); + pv; + pv = npv) { - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); -#endif + if (pv->pv_va >= eva || pv->pv_va < sva) { + npv = TAILQ_NEXT(pv, pv_plist); continue; } + +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + pte = (unsigned *)vtopte(pv->pv_va); +#else pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (pte == NULL) - continue; - if (*pte & bit) { - splx(s); - return TRUE; - } - } - splx(s); - return (FALSE); -} +#endif + tpte = *pte; + *pte = 0; -/* - * this routine is used to modify bits in ptes - */ -static __inline void -pmap_changebit(pa, bit, setem) - vm_offset_t pa; - int bit; - boolean_t setem; -{ - register pv_entry_t pv, *ppv; - register unsigned *pte; - vm_offset_t va; - int changed; - int s; + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist.pv_list, pv, pv_plist); - if (!pmap_is_managed(pa)) - return; + if (tpte & PG_V) { + pv->pv_pmap->pm_stats.resident_count--; + if (tpte & PG_W) + pv->pv_pmap->pm_stats.wired_count--; + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { + PHYS_TO_VM_PAGE(tpte)->dirty = VM_PAGE_BITS_ALL; + } + } - s = splvm(); - changed = 0; - ppv = pa_to_pvh(pa); - /* - * Loop over all current mappings setting/clearing as appropos If - * setting RO do we need to clear the VAC? - */ - for ( pv = *ppv; pv; pv = pv->pv_next) { - va = pv->pv_va; + npv = TAILQ_NEXT(pv, pv_plist); - /* - * don't write protect pager mappings - */ - if (!setem && (bit == PG_RW)) { - if (va >= clean_sva && va < clean_eva) - continue; - } - if (!pv->pv_pmap) { -#if defined(PMAP_DIAGNOSTIC) - printf("Null pmap (cb) at va: 0x%lx\n", va); -#endif - continue; - } + ppv = pa_to_pvh(tpte); + TAILQ_REMOVE(&ppv->pv_list, pv, pv_list); + --ppv->pv_list_count; - pte = pmap_pte_quick(pv->pv_pmap, va); - if (pte == NULL) - continue; - if (setem) { - *(int *)pte |= bit; - changed = 1; - } else { - vm_offset_t pbits = *(vm_offset_t *)pte; - if (pbits & bit) - changed = 1; - if (bit == PG_RW) { - if (pbits & PG_M) { - vm_page_t m; - vm_offset_t pa = pbits & PG_FRAME; - m = PHYS_TO_VM_PAGE(pa); - m->dirty = VM_PAGE_BITS_ALL; - } - *(int *)pte = pbits & ~(PG_M|PG_RW); - } else { - *(int *)pte = pbits & ~bit; - } - } + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); + free_pv_entry(pv); } + pmap_update(); splx(s); - if (changed) - pmap_update(); -} - -/* - * pmap_page_protect: - * - * Lower the permission for all mappings to a given page. - */ -void -pmap_page_protect(phys, prot) - vm_offset_t phys; - vm_prot_t prot; -{ - if ((prot & VM_PROT_WRITE) == 0) { - if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - pmap_changebit(phys, PG_RW, FALSE); - } else { - pmap_remove_all(phys); - pmap_update(); - } - } } vm_offset_t @@ -2301,150 +2247,111 @@ pmap_phys_address(ppn) } /* - * pmap_is_referenced: + * pmap_tcbit: * - * Return whether or not the specified physical page was referenced - * by any physical maps. - */ -boolean_t -pmap_is_referenced(vm_offset_t pa) -{ - register pv_entry_t *ppv, pv, lpv; - unsigned *pte; - int s; - - if (!pmap_is_managed(pa)) - return FALSE; - - ppv = pa_to_pvh(pa); - - s = splvm(); - /* - * Not found, check current mappings returning immediately if found. - */ - for (lpv = NULL, pv = *ppv ;pv; lpv = pv, pv = pv->pv_next) { - /* - * if the bit being tested is the modified bit, then - * mark clean_map and ptes as never - * modified. - */ - if (!pmap_track_modified(pv->pv_va)) - continue; - if (!pv->pv_pmap) { - continue; - } - pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); - if (pte == NULL) - continue; - if ((int) *pte & PG_A) { - if (lpv) { - lpv->pv_next = pv->pv_next; - pv->pv_next = *ppv; - *ppv = pv; - } - splx(s); - return TRUE; - } - } - splx(s); - return (FALSE); -} - -/* - * pmap_ts_referenced: - * - * Return the count of reference bits for a page, clearing all of them. + * Return the count of bits for a page, clearing all of them. * */ int -pmap_ts_referenced(vm_offset_t pa) +pmap_tcbit(vm_offset_t pa, int bit) { - register pv_entry_t *ppv, pv; + register pv_entry_t pv, npv; + pv_table_t *ppv; unsigned *pte; int s; int rtval = 0; - vm_offset_t vachanged[VATRACK]; - - if (!pmap_is_managed(pa)) - return FALSE; s = splvm(); ppv = pa_to_pvh(pa); - - if (*ppv == NULL) { - splx(s); - return 0; - } - /* * Not found, check current mappings returning immediately if found. */ - for (pv = *ppv ;pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(&ppv->pv_list); + pv; + pv = npv) { + npv = TAILQ_NEXT(pv, pv_list); /* * if the bit being tested is the modified bit, then * mark clean_map and ptes as never * modified. */ - if (!pmap_track_modified(pv->pv_va)) + if (((bit & PG_M) != 0) + && !pmap_track_modified(pv->pv_va)) continue; - if (!pv->pv_pmap) { - continue; - } pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); if (pte == NULL) continue; - if (*pte & PG_A) { - if (rtval < VATRACK) - vachanged[rtval] = pv->pv_va; - rtval++; - *pte &= ~PG_A; + + if ((rtval == 0) && (*pte & bit)) { + rtval = 1; + *pte &= ~bit; + } else { + *pte &= ~bit; } } splx(s); if (rtval) { - if (rtval <= VATRACK) { - int i; - for(i=0;i<rtval;i++) - pmap_update_1pg(vachanged[i]); - } else { + if (curproc != pageproc) pmap_update(); - } } return (rtval); } /* - * pmap_is_modified: + * pmap_tc_modified: * - * Return whether or not the specified physical page was modified - * in any physical maps. + * Return the count of modified bits for a page, clearing all of them. + * */ -boolean_t -pmap_is_modified(vm_offset_t pa) +int +pmap_tc_modified(vm_page_t m) { - return pmap_testbit((pa), PG_M); + int rtval; + + rtval = pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_M); + if (rtval) + m->dirty = VM_PAGE_BITS_ALL; + + return rtval; } /* - * Clear the modify bits on the specified physical page. + * pmap_tc_referenced: + * + * Return the count of referenced bits for a page, clearing all of them. + * */ -void -pmap_clear_modify(vm_offset_t pa) +int +pmap_tc_referenced(vm_offset_t pa) { - pmap_changebit((pa), PG_M, FALSE); + if (!pmap_is_managed(pa)) + return 0; + return pmap_tcbit(pa, PG_A); } /* - * pmap_clear_reference: + * pmap_page_protect: * - * Clear the reference bit on the specified physical page. + * Lower the permission for all mappings to a given page. */ void -pmap_clear_reference(vm_offset_t pa) +pmap_page_protect(m, prot) + vm_page_t m; + vm_prot_t prot; { - pmap_changebit((pa), PG_A, FALSE); + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { + if ((m->flags & PG_FICTITIOUS) == 0) + pmap_tcbit(VM_PAGE_TO_PHYS(m), PG_RW); + } else { + if (pmap_remove_all(VM_PAGE_TO_PHYS(m))) { + m->dirty = VM_PAGE_BITS_ALL; + } + pmap_update(); + } + } } /* @@ -2525,7 +2432,7 @@ pmap_mincore(pmap, addr) unsigned *ptep, pte; int val = 0; - ptep = pmap_pte(pmap, addr); + ptep = pmap_pte_quick(pmap, addr); if (ptep == 0) { return 0; } @@ -2544,8 +2451,10 @@ pmap_mincore(pmap, addr) * Modified by someone */ else if (PHYS_TO_VM_PAGE(pa)->dirty || - pmap_is_modified(pa)) + pmap_tcbit(pa, PG_M)) { val |= MINCORE_MODIFIED_OTHER; + PHYS_TO_VM_PAGE(pa)->dirty = VM_PAGE_BITS_ALL; + } /* * Referenced by us */ @@ -2556,7 +2465,7 @@ pmap_mincore(pmap, addr) * Referenced by someone */ else if ((PHYS_TO_VM_PAGE(pa)->flags & PG_REFERENCED) || - pmap_is_referenced(pa)) + pmap_tcbit(pa, PG_A)) val |= MINCORE_REFERENCED_OTHER; } return val; @@ -2592,7 +2501,7 @@ pmap_pid_dump(int pid) { } return npte; } - pte = pmap_pte( pmap, va); + pte = pmap_pte_quick( pmap, va); if (pte && pmap_pte_v(pte)) { vm_offset_t pa; vm_page_t m; @@ -2641,7 +2550,7 @@ pads(pm) continue; if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) continue; - ptep = pmap_pte(pm, va); + ptep = pmap_pte_quick(pm, va); if (pmap_pte_v(ptep)) printf("%x:%x ", va, *(int *) ptep); }; @@ -2655,7 +2564,9 @@ pmap_pvdump(pa) register pv_entry_t pv; printf("pa %x", pa); - for (pv = pa_to_pvh(pa); pv; pv = pv->pv_next) { + for (pv = TAILQ_FIRST(pa_to_pvh(pa)); + pv; + pv = TAILQ_NEXT(pv->pv_list)) { #ifdef used_to_be printf(" -> pmap %x, va %x, flags %x", pv->pv_pmap, pv->pv_va, pv->pv_flags); diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index dbadf6f..b53db62 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -42,12 +42,13 @@ * * from: hp300: @(#)pmap.h 7.2 (Berkeley) 12/16/90 * from: @(#)pmap.h 7.4 (Berkeley) 5/12/91 - * $Id: pmap.h,v 1.39 1996/05/18 03:36:38 dyson Exp $ + * $Id: pmap.h,v 1.40 1996/06/08 11:21:19 bde Exp $ */ #ifndef _MACHINE_PMAP_H_ #define _MACHINE_PMAP_H_ + /* * Page-directory and page-table entires follow this format, with a few * of the fields not present here and there, depending on a lot of things. @@ -113,6 +114,9 @@ #define ISA_HOLE_LENGTH (0x100000-ISA_HOLE_START) #ifndef LOCORE + +#include <sys/queue.h> + typedef unsigned int *pd_entry_t; typedef unsigned int *pt_entry_t; @@ -158,17 +162,24 @@ pmap_kextract(vm_offset_t va) } #endif +struct vm_page; + /* * Pmap stuff */ +struct pv_entry; +typedef struct { + int pv_list_count; + TAILQ_HEAD(,pv_entry) pv_list; +} pv_table_t; struct pmap { pd_entry_t *pm_pdir; /* KVA of page directory */ vm_object_t pm_pteobj; /* Container for pte's */ - short pm_dref; /* page directory ref count */ - short pm_count; /* pmap reference count */ + pv_table_t pm_pvlist; /* list of mappings in pmap */ + int pm_count; /* reference count */ struct pmap_statistics pm_stats; /* pmap statistics */ - struct vm_map *pm_map; /* map that owns this pmap */ + struct vm_page *pm_ptphint; /* pmap ptp hint */ }; typedef struct pmap *pmap_t; @@ -177,14 +188,16 @@ typedef struct pmap *pmap_t; extern pmap_t kernel_pmap; #endif + /* * For each vm_page_t, there is a list of all currently valid virtual * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { - struct pv_entry *pv_next; /* next pv_entry */ pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ + TAILQ_ENTRY(pv_entry) pv_list; + TAILQ_ENTRY(pv_entry) pv_plist; vm_page_t pv_ptem; /* VM page for pte */ } *pv_entry_t; @@ -200,7 +213,7 @@ extern pt_entry_t *CMAP1; extern vm_offset_t avail_end; extern vm_offset_t avail_start; extern vm_offset_t phys_avail[]; -extern pv_entry_t *pv_table; /* array of entries, one per page */ +pv_table_t *pv_table; extern vm_offset_t virtual_avail; extern vm_offset_t virtual_end; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index cfe55e0..7d3a0eb 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: kern_exec.c,v 1.43 1996/06/03 04:12:18 davidg Exp $ + * $Id: kern_exec.c,v 1.44 1996/07/12 04:11:37 bde Exp $ */ #include <sys/param.h> @@ -367,7 +367,7 @@ exec_new_vmspace(imgp) /* Blow away entire process VM */ if (vmspace->vm_shm) shmexit(imgp->proc); - vm_map_remove(&vmspace->vm_map, 0, USRSTACK); + vm_map_remove_userspace(&vmspace->vm_map); /* Allocate a new stack */ error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *)&stack_addr, diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 65ddfa5..86398d7 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 - * $Id: kern_exit.c,v 1.32 1996/04/11 20:56:29 bde Exp $ + * $Id: kern_exit.c,v 1.33 1996/06/12 05:07:28 gpalmer Exp $ */ #include "opt_ktrace.h" @@ -156,8 +156,7 @@ exit1(p, rv) * may be mapped within that space also. */ if (vm->vm_refcnt == 1) - (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS, - VM_MAXUSER_ADDRESS); + vm_map_remove_userspace(&vm->vm_map); if (SESS_LEADER(p)) { register struct session *sp = p->p_session; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index ec37003..b68195c 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -39,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - * $Id: swap_pager.c,v 1.67 1996/05/23 00:45:50 dyson Exp $ + * $Id: swap_pager.c,v 1.68 1996/06/10 04:58:48 dyson Exp $ */ /* @@ -1078,7 +1078,7 @@ swap_pager_getpages(object, m, count, reqpage) pagedaemon_wakeup(); swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT); if (rv == VM_PAGER_OK) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage])); + pmap_tc_modified(m[reqpage]); m[reqpage]->valid = VM_PAGE_BITS_ALL; m[reqpage]->dirty = 0; } @@ -1092,7 +1092,7 @@ swap_pager_getpages(object, m, count, reqpage) */ if (rv == VM_PAGER_OK) { for (i = 0; i < count; i++) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + pmap_tc_modified(m[i]); m[i]->dirty = 0; m[i]->flags &= ~PG_ZERO; if (i != reqpage) { @@ -1469,7 +1469,7 @@ retryfree: if (rv == VM_PAGER_OK) { for (i = 0; i < count; i++) { if (rtvals[i] == VM_PAGER_OK) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + pmap_tc_modified(m[i]); m[i]->dirty = 0; /* * optimization, if a page has been read @@ -1477,7 +1477,7 @@ retryfree: */ if ((m[i]->queue != PQ_ACTIVE) && ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) { + pmap_tc_referenced(VM_PAGE_TO_PHYS(m[i])))) { vm_page_activate(m[i]); } } @@ -1580,12 +1580,21 @@ swap_pager_finish(spc) (u_long) VM_PAGE_TO_PHYS(spc->spc_m[i])); } } else { + int pagewanted = 0; for (i = 0; i < spc->spc_count; i++) { - pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); + if (spc->spc_m[i]->flags & (PG_WANTED | PG_REFERENCED)) { + pagewanted = 1; + break; + } + } + for (i = 0; i < spc->spc_count; i++) { + pmap_tc_modified(spc->spc_m[i]); spc->spc_m[i]->dirty = 0; - if ((spc->spc_m[i]->queue != PQ_ACTIVE) && - ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) - vm_page_activate(spc->spc_m[i]); + if (pagewanted) { + if (spc->spc_m[i]->queue != PQ_ACTIVE) + vm_page_activate(spc->spc_m[i]); + spc->spc_m[i]->flags |= PG_REFERENCED; + } } } @@ -1625,9 +1634,7 @@ swap_pager_iodone(bp) if (bp->b_vp) pbrelvp(bp); -/* if (bp->b_flags & B_WANTED) -*/ wakeup(bp); if (bp->b_rcred != NOCRED) diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index c3424b9..df2f0aa 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.52 1996/06/16 20:37:26 dyson Exp $ + * $Id: vm_fault.c,v 1.53 1996/07/02 02:07:59 dyson Exp $ */ /* @@ -103,10 +103,6 @@ int vm_fault_additional_pages __P((vm_page_t, int, int, vm_page_t *, int *)); #define VM_FAULT_READ_BEHIND 3 #define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1) -int vm_fault_free_1; -int vm_fault_copy_save_1; -int vm_fault_copy_save_2; - /* * vm_fault: * @@ -282,7 +278,7 @@ RetryFault:; } queue = m->queue; - vm_page_unqueue_nowakeup(m); + vm_page_unqueue(m,0); /* * Mark page busy for other processes, and the pagedaemon. @@ -297,8 +293,7 @@ RetryFault:; m->flags |= PG_BUSY; - if (m->valid && - ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && + if (((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && m->object != kernel_object && m->object != kmem_object) { goto readrest; } @@ -401,19 +396,17 @@ readrest: if (rv == VM_PAGER_OK) { /* - * Found the page. Leave it busy while we play - * with it. - */ - - /* * Relookup in case pager changed page. Pager * is responsible for disposition of old page * if moved. */ - m = vm_page_lookup(object, pindex); - if( !m) { - UNLOCK_AND_DEALLOCATE; - goto RetryFault; + if ((m->object != object) || (m->pindex != pindex) || + (m->flags & PG_TABLED) == 0) { + m = vm_page_lookup(object, pindex); + if( !m) { + UNLOCK_AND_DEALLOCATE; + goto RetryFault; + } } hardfault++; @@ -485,9 +478,26 @@ readrest: } first_m = NULL; - if ((m->flags & PG_ZERO) == 0) - vm_page_zero_fill(m); - cnt.v_zfod++; + if ((m->flags & PG_ZERO) == 0) { + if (vm_page_zero_count) { + vm_page_protect(m, VM_PROT_NONE); + PAGE_WAKEUP(m); + vm_page_free(m); + m = vm_page_alloc(object, pindex, VM_ALLOC_ZERO); + if (!m) + panic("vm_fault: missing zero page"); + /* + * This should not be true, but just in case... + */ + if ((m->flags & PG_ZERO) == 0) { + vm_page_zero_fill(m); + cnt.v_zfod++; + } + } else { + vm_page_zero_fill(m); + cnt.v_zfod++; + } + } break; } else { if (object != first_object) { @@ -565,7 +575,6 @@ readrest: first_m = m; m->dirty = VM_PAGE_BITS_ALL; m = NULL; - ++vm_fault_copy_save_1; } else { /* * Oh, well, lets copy it. @@ -639,7 +648,6 @@ readrest: PAGE_WAKEUP(m); vm_page_free(m); m = NULL; - ++vm_fault_free_1; tm->dirty = VM_PAGE_BITS_ALL; first_m->dirty = VM_PAGE_BITS_ALL; } @@ -651,7 +659,6 @@ readrest: vm_page_rename(m, other_object, other_pindex); m->dirty = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL; - ++vm_fault_copy_save_2; } } } @@ -660,9 +667,9 @@ readrest: if (m) { if (m->queue != PQ_ACTIVE) vm_page_activate(m); - /* - * We no longer need the old page or object. - */ + /* + * We no longer need the old page or object. + */ PAGE_WAKEUP(m); } @@ -1091,7 +1098,7 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage) endpindex = pindex + (rahead + 1); if (endpindex > object->size) endpindex = object->size; - while (tpindex < endpindex) { + while (tpindex < endpindex) { if ( vm_page_lookup(object, tpindex)) { break; } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 48de311..31455b4 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.51 1996/06/16 20:37:29 dyson Exp $ + * $Id: vm_map.c,v 1.52 1996/07/07 03:27:41 davidg Exp $ */ /* @@ -172,6 +172,8 @@ static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t)); static void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t)); +static __pure int vm_map_simplify_okay __P((vm_map_entry_t entry1, + vm_map_entry_t entry2)); void vm_map_startup() @@ -230,7 +232,6 @@ vmspace_alloc(min, max, pageable) vm_map_init(&vm->vm_map, min, max, pageable); pmap_pinit(&vm->vm_pmap); vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ - vm->vm_pmap.pm_map = &vm->vm_map; vm->vm_refcnt = 1; return (vm); } @@ -634,8 +635,8 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) return (KERN_NO_SPACE); if ((prev_entry != &map->header) && + (object == NULL) && (prev_entry->end == start) && - ((object == NULL) || (prev_entry->object.vm_object == object)) && (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && @@ -648,24 +649,22 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) * See if we can avoid creating a new entry by extending one of our * neighbors. */ - if (object == NULL) { - if (vm_object_coalesce(prev_entry->object.vm_object, - OFF_TO_IDX(prev_entry->offset), - (vm_size_t) (prev_entry->end - - prev_entry->start), - (vm_size_t) (end - prev_entry->end))) { + if (vm_object_coalesce(prev_entry->object.vm_object, + OFF_TO_IDX(prev_entry->offset), + (vm_size_t) (prev_entry->end + - prev_entry->start), + (vm_size_t) (end - prev_entry->end))) { - /* - * Coalesced the two objects - can extend the - * previous map entry to include the new - * range. - */ - map->size += (end - prev_entry->end); - prev_entry->end = end; - prev_object = prev_entry->object.vm_object; - default_pager_convert_to_swapq(prev_object); - return (KERN_SUCCESS); - } + /* + * Coalesced the two objects - can extend the + * previous map entry to include the new + * range. + */ + map->size += (end - prev_entry->end); + prev_entry->end = end; + prev_object = prev_entry->object.vm_object; + default_pager_convert_to_swapq(prev_object); + return (KERN_SUCCESS); } } /* @@ -707,9 +706,10 @@ vm_map_insert(map, object, offset, start, end, prot, max, cow) /* * Update the free space hint */ - if ((map->first_free == prev_entry) && - (prev_entry->end >= new_entry->start)) - map->first_free = new_entry; + if (map->first_free == prev_entry) { + if (prev_entry->end == new_entry->start) + map->first_free = new_entry; + } default_pager_convert_to_swapq(object); return (KERN_SUCCESS); @@ -739,8 +739,9 @@ vm_map_findspace(map, start, length, addr) * at this address, we have to start after it. */ if (start == map->min_offset) { - if ((entry = map->first_free) != &map->header) + if ((entry = map->first_free) != &map->header) { start = entry->end; + } } else { vm_map_entry_t tmp; @@ -821,12 +822,39 @@ vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow) return (result); } +static __pure int +vm_map_simplify_okay(entry1, entry2) + vm_map_entry_t entry1, entry2; +{ + if ((entry1->end != entry2->start) || + (entry1->object.vm_object != entry2->object.vm_object)) + return 0; + if (entry1->object.vm_object) { + if (entry1->object.vm_object->behavior != + entry2->object.vm_object->behavior) + return 0; + if (entry1->offset + (entry1->end - entry1->start) != + entry2->offset) + return 0; + } + if ((entry1->needs_copy != entry2->needs_copy) || + (entry1->copy_on_write != entry2->copy_on_write) || + (entry1->protection != entry2->protection) || + (entry1->max_protection != entry2->max_protection) || + (entry1->inheritance != entry2->inheritance) || + (entry1->is_sub_map != FALSE) || + (entry1->is_a_map != FALSE) || + (entry1->wired_count != 0) || + (entry2->is_sub_map != FALSE) || + (entry2->is_a_map != FALSE) || + (entry2->wired_count != 0)) + return 0; + + return 1; +} + /* * vm_map_simplify_entry: [ internal use only ] - * - * Simplify the given map entry by: - * removing extra sharing maps - * [XXX maybe later] merging with a neighbor */ static void vm_map_simplify_entry(map, entry) @@ -834,34 +862,13 @@ vm_map_simplify_entry(map, entry) vm_map_entry_t entry; { vm_map_entry_t next, prev; - vm_size_t nextsize, prevsize, esize; - /* - * If this entry corresponds to a sharing map, then see if we can - * remove the level of indirection. If it's not a sharing map, then it - * points to a VM object, so see if we can merge with either of our - * neighbors. - */ - - if (entry->is_sub_map || entry->is_a_map || entry->wired_count) + if (entry->is_a_map || entry->is_sub_map || entry->wired_count) return; prev = entry->prev; if (prev != &map->header) { - prevsize = prev->end - prev->start; - if ( (prev->end == entry->start) && - (prev->object.vm_object == entry->object.vm_object) && - (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) && - (!prev->object.vm_object || - (prev->offset + prevsize == entry->offset)) && - (prev->needs_copy == entry->needs_copy) && - (prev->copy_on_write == entry->copy_on_write) && - (prev->protection == entry->protection) && - (prev->max_protection == entry->max_protection) && - (prev->inheritance == entry->inheritance) && - (prev->is_a_map == FALSE) && - (prev->is_sub_map == FALSE) && - (prev->wired_count == 0)) { + if ( vm_map_simplify_okay(prev, entry)) { if (map->first_free == prev) map->first_free = entry; if (map->hint == prev) @@ -877,21 +884,7 @@ vm_map_simplify_entry(map, entry) next = entry->next; if (next != &map->header) { - nextsize = next->end - next->start; - esize = entry->end - entry->start; - if ((entry->end == next->start) && - (next->object.vm_object == entry->object.vm_object) && - (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) && - (!entry->object.vm_object || - (entry->offset + esize == next->offset)) && - (next->needs_copy == entry->needs_copy) && - (next->copy_on_write == entry->copy_on_write) && - (next->protection == entry->protection) && - (next->max_protection == entry->max_protection) && - (next->inheritance == entry->inheritance) && - (next->is_a_map == FALSE) && - (next->is_sub_map == FALSE) && - (next->wired_count == 0)) { + if ( vm_map_simplify_okay(entry, next)) { if (map->first_free == next) map->first_free = entry; if (map->hint == next) @@ -904,6 +897,7 @@ vm_map_simplify_entry(map, entry) } } } + /* * vm_map_clip_start: [ internal use only ] * @@ -1841,6 +1835,21 @@ vm_map_remove(map, start, end) } /* + * vm_map_remove_userspace: + * Removes the user portion of the address space. + */ +void +vm_map_remove_userspace(map) + register vm_map_t map; +{ + vm_map_lock(map); + pmap_remove_pages(map->pmap, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); + vm_map_delete(map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); + vm_map_unlock(map); + return; +} + +/* * vm_map_check_protection: * * Assert that the target map allows the specified @@ -2257,8 +2266,8 @@ RetryLookup:; lock_write_to_read(&share_map->lock); } - if (entry->object.vm_object != NULL) - default_pager_convert_to_swapq(entry->object.vm_object); + default_pager_convert_to_swapq(entry->object.vm_object); + /* * Return the object/offset from this entry. If the entry was * copy-on-write or empty, it has been fixed up. diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 3ba8375..a50fa62 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.h,v 1.12 1996/01/30 23:02:35 mpp Exp $ + * $Id: vm_map.h,v 1.13 1996/05/19 07:36:48 dyson Exp $ */ /* @@ -233,6 +233,7 @@ int vm_map_clean __P((vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t)) int vm_map_protect __P((vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t, boolean_t)); void vm_map_reference __P((vm_map_t)); int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t)); +void vm_map_remove_userspace __P((vm_map_t)); void vm_map_simplify __P((vm_map_t, vm_offset_t)); void vm_map_startup __P((void)); int vm_map_submap __P((vm_map_t, vm_offset_t, vm_offset_t, vm_map_t)); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 697baf6..ff0e79c 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.43 1996/05/19 07:36:49 dyson Exp $ + * $Id: vm_mmap.c,v 1.44 1996/05/31 00:38:00 dyson Exp $ */ /* @@ -72,6 +72,7 @@ #include <vm/vm_extern.h> #include <vm/vm_kern.h> #include <vm/vm_page.h> +#include <vm/loadaout.h> #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { @@ -689,10 +690,10 @@ mincore(p, uap, retval) if (m) { mincoreinfo = MINCORE_INCORE; if (m->dirty || - pmap_is_modified(VM_PAGE_TO_PHYS(m))) + pmap_tc_modified(m)) mincoreinfo |= MINCORE_MODIFIED_OTHER; if ((m->flags & PG_REFERENCED) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m))) + pmap_tc_referenced(VM_PAGE_TO_PHYS(m))) mincoreinfo |= MINCORE_REFERENCED_OTHER; } } @@ -844,7 +845,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) vm_ooffset_t foff; { boolean_t fitit; - vm_object_t object, object2; + vm_object_t object; struct vnode *vp = NULL; objtype_t type; int rv = KERN_SUCCESS; @@ -916,29 +917,13 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) flags |= MAP_SHARED; } - object2 = NULL; docow = 0; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { - docow = MAP_COPY_ON_WRITE; - if (objsize < size) { - object2 = vm_object_allocate( OBJT_DEFAULT, - OFF_TO_IDX(size - (foff & ~PAGE_MASK))); - object2->backing_object = object; - object2->backing_object_offset = foff; - TAILQ_INSERT_TAIL(&object->shadow_head, - object2, shadow_list); - ++object->shadow_count; - } else { - docow |= MAP_COPY_NEEDED; - } + docow = MAP_COPY_ON_WRITE|MAP_COPY_NEEDED; } - if (object2) - rv = vm_map_find(map, object2, 0, addr, size, fitit, - prot, maxprot, docow); - else - rv = vm_map_find(map, object, foff, addr, size, fitit, - prot, maxprot, docow); + rv = vm_map_find(map, object, foff, addr, size, fitit, + prot, maxprot, docow); if (rv != KERN_SUCCESS) { @@ -947,10 +932,7 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) * object if it's an unnamed anonymous mapping * or named anonymous without other references. */ - if (object2) - vm_object_deallocate(object2); - else - vm_object_deallocate(object); + vm_object_deallocate(object); goto out; } @@ -985,3 +967,171 @@ out: return (EINVAL); } } + +#ifdef notyet +/* + * Efficient mapping of a .text+.data+.bss object + */ +int +vm_mapaout(map, baseaddr, vp, foff, textsize, datasize, bsssize, addr) + vm_map_t map; + vm_offset_t baseaddr; + struct vnode *vp; + vm_ooffset_t foff; + register vm_size_t textsize, datasize, bsssize; + vm_offset_t *addr; +{ + vm_object_t object; + int rv; + vm_pindex_t objpsize; + struct proc *p = curproc; + + vm_size_t totalsize; + vm_size_t textend; + struct vattr vat; + int error; + + textsize = round_page(textsize); + datasize = round_page(datasize); + bsssize = round_page(bsssize); + totalsize = textsize + datasize + bsssize; + + vm_map_lock(map); + /* + * If baseaddr == -1, then we need to search for space. Otherwise, + * we need to be loaded into a certain spot. + */ + if (baseaddr != (vm_offset_t) -1) { + if (vm_map_findspace(map, baseaddr, totalsize, addr)) { + goto outnomem; + } + + if(*addr != baseaddr) { + goto outnomem; + } + } else { + baseaddr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ); + if (vm_map_findspace(map, baseaddr, totalsize, addr)) { + goto outnomem; + } + } + + if (foff & PAGE_MASK) { + vm_map_unlock(map); + return EINVAL; + } + + /* + * get the object size to allocate + */ + error = VOP_GETATTR(vp, &vat, p->p_ucred, p); + if (error) { + vm_map_unlock(map); + return error; + } + objpsize = OFF_TO_IDX(round_page(vat.va_size)); + + /* + * Alloc/reference the object + */ + object = vm_pager_allocate(OBJT_VNODE, vp, + objpsize, VM_PROT_ALL, foff); + if (object == NULL) { + goto outnomem; + } + + /* + * Insert .text into the map + */ + textend = *addr + textsize; + rv = vm_map_insert(map, object, foff, + *addr, textend, + VM_PROT_READ|VM_PROT_EXECUTE, VM_PROT_ALL, + MAP_COPY_ON_WRITE|MAP_COPY_NEEDED); + if (rv != KERN_SUCCESS) { + vm_object_deallocate(object); + goto out; + } + + /* + * Insert .data into the map, if there is any to map. + */ + if (datasize != 0) { + object->ref_count++; + rv = vm_map_insert(map, object, foff + textsize, + textend, textend + datasize, + VM_PROT_ALL, VM_PROT_ALL, + MAP_COPY_ON_WRITE|MAP_COPY_NEEDED); + if (rv != KERN_SUCCESS) { + --object->ref_count; + vm_map_delete(map, *addr, textend); + goto out; + } + } + + /* + * Preload the page tables + */ + pmap_object_init_pt(map->pmap, *addr, + object, (vm_pindex_t) OFF_TO_IDX(foff), + textsize + datasize, 1); + + /* + * Get the space for bss. + */ + if (bsssize != 0) { + rv = vm_map_insert(map, NULL, 0, + textend + datasize, + *addr + totalsize, + VM_PROT_ALL, VM_PROT_ALL, 0); + } + if (rv != KERN_SUCCESS) { + vm_map_delete(map, *addr, textend + datasize + bsssize); + } + +out: + vm_map_unlock(map); + switch (rv) { + case KERN_SUCCESS: + return 0; + case KERN_INVALID_ADDRESS: + case KERN_NO_SPACE: + return ENOMEM; + case KERN_PROTECTION_FAILURE: + return EACCES; + default: + return EINVAL; + } +outnomem: + vm_map_unlock(map); + return ENOMEM; +} + + +int +mapaout(struct proc *p, struct mapaout_args *uap, int *retval) +{ + + register struct filedesc *fdp = p->p_fd; + struct file *fp; + struct vnode *vp; + int rtval; + + if (((unsigned) uap->fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[uap->fd]) == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) + return (EINVAL); + + vp = (struct vnode *) fp->f_data; + if ((vp->v_type != VREG) && (vp->v_type != VCHR)) + return (EINVAL); + + rtval = vm_mapaout( &p->p_vmspace->vm_map, + uap->addr, vp, uap->offset, + uap->textsize, uap->datasize, uap->bsssize, + (vm_offset_t *)retval); + + return rtval; +} +#endif diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 473e2397..39c7ee0 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.75 1996/05/31 00:38:02 dyson Exp $ + * $Id: vm_object.c,v 1.76 1996/06/16 20:37:30 dyson Exp $ */ /* @@ -219,7 +219,6 @@ vm_object_allocate(type, size) result = (vm_object_t) malloc((u_long) sizeof *result, M_VMOBJ, M_WAITOK); - _vm_object_allocate(type, size, result); return (result); @@ -231,7 +230,7 @@ vm_object_allocate(type, size) * * Gets another reference to the given object. */ -inline void +void vm_object_reference(object) register vm_object_t object; { @@ -403,8 +402,10 @@ vm_object_terminate(object) * from paging queues. */ while ((p = TAILQ_FIRST(&object->memq)) != NULL) { +#if defined(DIAGNOSTIC) if (p->flags & PG_BUSY) printf("vm_object_terminate: freeing busy page\n"); +#endif PAGE_WAKEUP(p); vm_page_free(p); cnt.v_pfree++; diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 4a95e6e..79dd930 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.59 1996/06/21 05:39:22 dyson Exp $ + * $Id: vm_page.c,v 1.60 1996/06/26 05:39:25 dyson Exp $ */ /* @@ -385,7 +385,7 @@ vm_page_hash(object, pindex) * The object and page must be locked, and must be splhigh. */ -__inline void +void vm_page_insert(m, object, pindex) register vm_page_t m; register vm_object_t object; @@ -434,7 +434,7 @@ vm_page_insert(m, object, pindex) * The object and page must be locked, and at splhigh. */ -__inline void +void vm_page_remove(m) register vm_page_t m; { @@ -523,34 +523,19 @@ vm_page_rename(m, new_object, new_pindex) } /* - * vm_page_unqueue without any wakeup - */ -__inline void -vm_page_unqueue_nowakeup(m) - vm_page_t m; -{ - int queue = m->queue; - if (queue != PQ_NONE) { - m->queue = PQ_NONE; - TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); - --(*vm_page_queues[queue].cnt); - } -} - - -/* * vm_page_unqueue must be called at splhigh(); */ __inline void -vm_page_unqueue(m) +vm_page_unqueue(m, wakeup) vm_page_t m; + int wakeup; { int queue = m->queue; if (queue != PQ_NONE) { m->queue = PQ_NONE; TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); --(*vm_page_queues[queue].cnt); - if (queue == PQ_CACHE) { + if ((queue == PQ_CACHE) && wakeup) { if ((cnt.v_cache_count + cnt.v_free_count) < (cnt.v_free_reserved + cnt.v_cache_min)) pagedaemon_wakeup(); @@ -736,7 +721,7 @@ vm_page_activate(m) if (m->queue == PQ_CACHE) cnt.v_reactivated++; - vm_page_unqueue(m); + vm_page_unqueue(m, 1); if (m->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); @@ -751,7 +736,7 @@ vm_page_activate(m) /* * helper routine for vm_page_free and vm_page_free_zero */ -static int +__inline static int vm_page_freechk_and_unqueue(m) vm_page_t m; { @@ -769,7 +754,7 @@ vm_page_freechk_and_unqueue(m) } vm_page_remove(m); - vm_page_unqueue_nowakeup(m); + vm_page_unqueue(m,0); if ((m->flags & PG_FICTITIOUS) != 0) { return 0; } @@ -788,7 +773,7 @@ vm_page_freechk_and_unqueue(m) /* * helper routine for vm_page_free and vm_page_free_zero */ -static __inline void +__inline static void vm_page_free_wakeup() { @@ -895,7 +880,7 @@ vm_page_wire(m) if (m->wire_count == 0) { s = splvm(); - vm_page_unqueue(m); + vm_page_unqueue(m,1); splx(s); cnt.v_wire_count++; } @@ -961,7 +946,7 @@ vm_page_deactivate(m) if (m->wire_count == 0 && m->hold_count == 0) { if (m->queue == PQ_CACHE) cnt.v_reactivated++; - vm_page_unqueue(m); + vm_page_unqueue(m,1); TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m->queue = PQ_INACTIVE; cnt.v_inactive_count++; @@ -992,7 +977,7 @@ vm_page_cache(m) panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); } s = splvm(); - vm_page_unqueue_nowakeup(m); + vm_page_unqueue(m,0); TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); m->queue = PQ_CACHE; cnt.v_cache_count++; @@ -1031,7 +1016,7 @@ vm_page_set_validclean(m, base, size) m->valid |= pagebits; m->dirty &= ~pagebits; if( base == 0 && size == PAGE_SIZE) - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + pmap_tc_modified(m); } /* @@ -1071,10 +1056,8 @@ void vm_page_test_dirty(m) vm_page_t m; { - if ((m->dirty != VM_PAGE_BITS_ALL) && - pmap_is_modified(VM_PAGE_TO_PHYS(m))) { - m->dirty = VM_PAGE_BITS_ALL; - } + if (m->dirty != VM_PAGE_BITS_ALL) + pmap_tc_modified(m); } /* diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index c246deb..1680d4d 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_page.h,v 1.28 1996/06/08 06:48:35 dyson Exp $ + * $Id: vm_page.h,v 1.29 1996/06/26 05:39:25 dyson Exp $ */ /* @@ -220,6 +220,7 @@ extern vm_offset_t last_phys_addr; /* physical address for last_page */ (m)->flags &= ~PG_BUSY; \ if ((m)->flags & PG_WANTED) { \ (m)->flags &= ~PG_WANTED; \ + (m)->flags |= PG_REFERENCED; \ wakeup((caddr_t) (m)); \ } \ } @@ -251,8 +252,7 @@ void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t)); vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t)); void vm_page_unwire __P((vm_page_t)); void vm_page_wire __P((vm_page_t)); -void vm_page_unqueue __P((vm_page_t)); -void vm_page_unqueue_nowakeup __P((vm_page_t)); +void vm_page_unqueue __P((vm_page_t, int)); void vm_page_set_validclean __P((vm_page_t, int, int)); void vm_page_set_invalid __P((vm_page_t, int, int)); static __inline boolean_t vm_page_zero_fill __P((vm_page_t)); @@ -292,11 +292,11 @@ vm_page_protect(vm_page_t mem, int prot) { if (prot == VM_PROT_NONE) { if (mem->flags & (PG_WRITEABLE|PG_MAPPED)) { - pmap_page_protect(VM_PAGE_TO_PHYS(mem), prot); + pmap_page_protect(mem, prot); mem->flags &= ~(PG_WRITEABLE|PG_MAPPED); } } else if ((prot == VM_PROT_READ) && (mem->flags & PG_WRITEABLE)) { - pmap_page_protect(VM_PAGE_TO_PHYS(mem), prot); + pmap_page_protect(mem, prot); mem->flags &= ~PG_WRITEABLE; } } diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 98ad6ef..26df38c 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -314,12 +314,9 @@ do_backward: } } - /* - * we allow reads during pageouts... - */ for (i = page_base; i < (page_base + pageout_count); i++) { mc[i]->flags |= PG_BUSY; - vm_page_protect(mc[i], VM_PROT_READ); + vm_page_protect(mc[i], VM_PROT_NONE); } return vm_pageout_flush(&mc[page_base], pageout_count, sync); @@ -359,7 +356,7 @@ vm_pageout_flush(mc, count, sync) * essentially lose the changes by pretending it * worked. */ - pmap_clear_modify(VM_PAGE_TO_PHYS(mt)); + pmap_tc_modified(mt); mt->dirty = 0; break; case VM_PAGER_ERROR: @@ -446,7 +443,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only) continue; } - refcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p)); + refcount = pmap_tc_referenced(VM_PAGE_TO_PHYS(p)); if (refcount) { p->flags |= PG_REFERENCED; } else if (p->flags & PG_REFERENCED) { @@ -586,7 +583,7 @@ vm_pageout_scan() maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ? MAXLAUNDER : cnt.v_inactive_target; -rescan0: + maxscan = cnt.v_inactive_count; for( m = TAILQ_FIRST(&vm_page_queue_inactive); @@ -599,7 +596,7 @@ rescan0: cnt.v_pdpages++; if (m->queue != PQ_INACTIVE) { - goto rescan0; + break; } next = TAILQ_NEXT(m, pageq); @@ -621,32 +618,33 @@ rescan0: continue; } - if (m->object->ref_count == 0) { - m->flags &= ~PG_REFERENCED; - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - } else if (((m->flags & PG_REFERENCED) == 0) && - pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) { - vm_page_activate(m); - continue; - } - - if ((m->flags & PG_REFERENCED) != 0) { - m->flags &= ~PG_REFERENCED; - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); - vm_page_activate(m); - continue; - } + if (m->valid != 0) { + if (m->object->ref_count == 0) { + m->flags &= ~PG_REFERENCED; + pmap_tc_referenced(VM_PAGE_TO_PHYS(m)); + } else if (((m->flags & PG_REFERENCED) == 0) && + pmap_tc_referenced(VM_PAGE_TO_PHYS(m))) { + vm_page_activate(m); + continue; + } - if (m->dirty == 0) { - vm_page_test_dirty(m); - } else if (m->dirty != 0) { - m->dirty = VM_PAGE_BITS_ALL; - } + if ((m->flags & PG_REFERENCED) != 0) { + m->flags &= ~PG_REFERENCED; + pmap_tc_referenced(VM_PAGE_TO_PHYS(m)); + vm_page_activate(m); + continue; + } + if (m->dirty == 0) { + vm_page_test_dirty(m); + } else if (m->dirty != 0) { + m->dirty = VM_PAGE_BITS_ALL; + } + } if (m->valid == 0) { vm_page_protect(m, VM_PROT_NONE); vm_page_free(m); - cnt.v_dfree++; + ++cnt.v_dfree; ++pages_freed; } else if (m->dirty == 0) { vm_page_cache(m); @@ -788,7 +786,7 @@ rescan0: if (m->flags & PG_REFERENCED) { refcount += 1; } - refcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m)); + refcount += pmap_tc_referenced(VM_PAGE_TO_PHYS(m)); if (refcount) { m->act_count += ACT_ADVANCE + refcount; if (m->act_count > ACT_MAX) diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 4cef671..3fe0ae3 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -38,7 +38,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.59 1996/03/19 05:13:22 dyson Exp $ + * $Id: vnode_pager.c,v 1.60 1996/05/03 21:01:54 phk Exp $ */ /* @@ -525,7 +525,7 @@ vnode_pager_input_smlfs(object, m) } } vm_pager_unmap_page(kva); - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + pmap_tc_modified(m); m->flags &= ~PG_ZERO; if (error) { return VM_PAGER_ERROR; @@ -588,7 +588,7 @@ vnode_pager_input_old(object, m) } vm_pager_unmap_page(kva); } - pmap_clear_modify(VM_PAGE_TO_PHYS(m)); + pmap_tc_modified(m); m->dirty = 0; m->flags &= ~PG_ZERO; return error ? VM_PAGER_ERROR : VM_PAGER_OK; @@ -808,7 +808,7 @@ vnode_pager_leaf_getpages(object, m, count, reqpage) relpbuf(bp); for (i = 0; i < count; i++) { - pmap_clear_modify(VM_PAGE_TO_PHYS(m[i])); + pmap_tc_modified(m[i]); m[i]->dirty = 0; m[i]->valid = VM_PAGE_BITS_ALL; m[i]->flags &= ~PG_ZERO; |