diff options
author | dyson <dyson@FreeBSD.org> | 1996-01-19 04:00:31 +0000 |
---|---|---|
committer | dyson <dyson@FreeBSD.org> | 1996-01-19 04:00:31 +0000 |
commit | 8fc8a772af22f6e03233d248fa2dbd9b5c2bdd7d (patch) | |
tree | 3c31fd95ea745005a9cd6733db5a16f31bd828a6 | |
parent | 6755beedbf0ddaa9e66e91c8e74f620ede6bfad5 (diff) | |
download | FreeBSD-src-8fc8a772af22f6e03233d248fa2dbd9b5c2bdd7d.zip FreeBSD-src-8fc8a772af22f6e03233d248fa2dbd9b5c2bdd7d.tar.gz |
Eliminated many redundant vm_map_lookup operations for vm_mmap.
Speed up for vfs_bio -- addition of a routine bqrelse to greatly diminish
overhead for merged cache.
Efficiency improvement for vfs_cluster. It used to do alot of redundant
calls to cluster_rbuild.
Correct the ordering for vrele of .text and release of credentials.
Use the selective tlb update for 486/586/P6.
Numerous fixes to the size of objects allocated for files. Additionally,
fixes in the various pagers.
Fixes for proper positioning of vnode_pager_setsize in msdosfs and ext2fs.
Fixes in the swap pager for exhausted resources. The pageout code
will not as readily thrash.
Change the page queue flags (PG_ACTIVE, PG_INACTIVE, PG_FREE, PG_CACHE) into
page queue indices (PQ_ACTIVE, PQ_INACTIVE, PQ_FREE, PQ_CACHE),
thereby improving efficiency of several routines.
Eliminate even more unnecessary vm_page_protect operations.
Significantly speed up process forks.
Make vm_object_page_clean more efficient, thereby eliminating the pause
that happens every 30seconds.
Make sequential clustered writes B_ASYNC instead of B_DELWRI even in the
case of filesystems mounted async.
Fix a panic with busy pages when write clustering is done for non-VMIO
buffers.
59 files changed, 1604 insertions, 1016 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index a9c4961..942d081 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.168 1996/01/04 21:10:53 wollman Exp $ + * $Id: machdep.c,v 1.169 1996/01/05 20:12:19 wollman Exp $ */ #include "npx.h" @@ -1315,7 +1315,7 @@ init386(first) */ /* * XXX text protection is temporarily (?) disabled. The limit was - * i386_btop(i386_round_page(etext)) - 1. + * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index e0410ef..ba0a8b5 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.71 1995/12/17 07:19:15 bde Exp $ + * $Id: pmap.c,v 1.72 1995/12/22 18:21:26 bde Exp $ */ /* @@ -91,6 +91,7 @@ #include <sys/malloc.h> #include <sys/msgbuf.h> #include <sys/queue.h> +#include <sys/vmmeter.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -108,6 +109,10 @@ #include <i386/isa/isa.h> +#define PMAP_KEEP_PDIRS + +static void init_pv_entries __P((int)); + /* * Get PDEs and PTEs for user/kernel address space */ @@ -164,7 +169,6 @@ static pt_entry_t * static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); -static void init_pv_entries __P((int npg)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, @@ -179,6 +183,33 @@ static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); /* + * The below are finer grained pmap_update routines. These eliminate + * the gratuitious tlb flushes on non-i386 architectures. + */ +static __inline void +pmap_update_1pg( vm_offset_t va) { +#if defined(I386_CPU) + if (cpuclass == CPUCLASS_I386) + pmap_update(); + else +#endif + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va)); +} + +static __inline void +pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { +#if defined(I386_CPU) + if (cpuclass == CPUCLASS_I386) { + pmap_update(); + } else +#endif + { + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1)); + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2)); + } +} + +/* * Routine: pmap_pte * Function: * Extract the page table entry associated @@ -186,7 +217,7 @@ static boolean_t * [ what about induced faults -wfj] */ -inline pt_entry_t * __pure +__inline pt_entry_t * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; @@ -248,7 +279,7 @@ pmap_extract(pmap, va) /* * determine if a page is managed (memory vs. device) */ -static inline int +static __inline int pmap_is_managed(pa) vm_offset_t pa; { @@ -274,8 +305,8 @@ pmap_pte_vm_page(pmap, pt) { vm_page_t m; - pt = i386_trunc_page(pt); - pt = (pt - UPT_MIN_ADDRESS) / NBPG; + pt = trunc_page(pt); + pt = (pt - UPT_MIN_ADDRESS) / PAGE_SIZE; pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; m = PHYS_TO_VM_PAGE(pt); return m; @@ -301,7 +332,7 @@ pmap_use_pt(pmap, va) /* * Unwire a page table page */ -inline void +__inline void pmap_unuse_pt(pmap, va) pmap_t pmap; vm_offset_t va; @@ -319,8 +350,19 @@ pmap_unuse_pt(pmap, va) (m->hold_count == 0) && (m->wire_count == 0) && (va < KPT_MIN_ADDRESS)) { +/* + * We don't free page-table-pages anymore because it can have a negative + * impact on perf at times. Now we just deactivate, and it'll get cleaned + * up if needed... Also, if the page ends up getting used, it will fault + * back into the process address space and be reactivated. + */ +#ifdef PMAP_FREE_OLD_PTES pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); vm_page_free(m); +#else + m->dirty = 0; + vm_page_deactivate(m); +#endif } } @@ -354,7 +396,7 @@ pmap_bootstrap(firstaddr, loadaddr) avail_start = firstaddr; /* - * XXX The calculation of virtual_avail is wrong. It's NKPT*NBPG too + * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too * large. It should instead be correctly calculated in locore.s and * not based on 'first' (which is a physical address, not a virtual * address, for the start of unused physical memory). The kernel @@ -386,7 +428,7 @@ pmap_bootstrap(firstaddr, loadaddr) * mapping of pages. */ #define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); + v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = pmap_pte(kernel_pmap, va); @@ -433,14 +475,14 @@ pmap_init(phys_start, phys_end) */ vm_first_phys = phys_avail[0]; for (i = 0; phys_avail[i + 1]; i += 2); - npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG; + npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE; /* * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ s = (vm_size_t) (sizeof(struct pv_entry) * npg); - s = i386_round_page(s); + s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); pv_table = (pv_entry_t) addr; @@ -476,6 +518,38 @@ pmap_map(virt, start, end, prot) return (virt); } +#ifdef PMAP_KEEP_PDIRS +int nfreepdir; +caddr_t *pdirlist; +#define NFREEPDIR 3 + +static void * +pmap_getpdir() { + caddr_t *pdir; + if (pdirlist) { + --nfreepdir; + pdir = pdirlist; + pdirlist = (caddr_t *) *pdir; + bzero( (caddr_t) pdir, PAGE_SIZE); + } else { + pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); + } + + return (void *) pdir; +} + +static void +pmap_freepdir(void *pdir) { + if (nfreepdir > NFREEPDIR) { + kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); + } else { + * (caddr_t *) pdir = (caddr_t) pdirlist; + pdirlist = (caddr_t *) pdir; + ++nfreepdir; + } +} +#endif + /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -488,7 +562,12 @@ pmap_pinit(pmap) * No need to allocate page table space yet but we do need a valid * page directory table. */ + +#ifdef PMAP_KEEP_PDIRS + pmap->pm_pdir = pmap_getpdir(); +#else pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); +#endif /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); @@ -519,14 +598,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -548,7 +627,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -583,7 +662,11 @@ void pmap_release(pmap) register struct pmap *pmap; { +#ifdef PMAP_KEEP_PDIRS + pmap_freepdir( (void *)pmap->pm_pdir); +#else kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); +#endif } /* @@ -598,7 +681,7 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2) +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) /* * Data for the pv entry allocation mechanism @@ -611,7 +694,7 @@ static int npvvapg; /* * free the pv_entry back to the free list */ -inline static void +static __inline void free_pv_entry(pv) pv_entry_t pv; { @@ -628,7 +711,7 @@ free_pv_entry(pv) * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ -static inline pv_entry_t +static __inline pv_entry_t get_pv_entry() { pv_entry_t tmp; @@ -677,7 +760,7 @@ pmap_alloc_pv_entry() int i; pv_entry_t entry; - newentries = (NBPG / sizeof(struct pv_entry)); + newentries = (PAGE_SIZE / sizeof(struct pv_entry)); /* * wire the page */ @@ -692,7 +775,7 @@ pmap_alloc_pv_entry() /* * update the allocation pointers */ - pvva += NBPG; + pvva += PAGE_SIZE; --npvvapg; /* @@ -724,8 +807,9 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG; - pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; + pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ @@ -775,11 +859,10 @@ pmap_remove_entry(pmap, pv, va) pv->pv_pmap = NULL; } } else { - for (npv = pv->pv_next; npv; npv = npv->pv_next) { + for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { if (pmap == npv->pv_pmap && va == npv->pv_va) { break; } - pv = npv; } if (npv) { pv->pv_next = npv->pv_next; @@ -817,7 +900,7 @@ pmap_remove(pmap, sva, eva) * common operation and easy to short circuit some * code. */ - if ((sva + NBPG) == eva) { + if ((sva + PAGE_SIZE) == eva) { if (*pmap_pde(pmap, sva) == 0) return; @@ -839,7 +922,7 @@ pmap_remove(pmap, sva, eva) if (pmap_is_managed(pa)) { if ((int) oldpte & PG_M) { - if (sva < USRSTACK + (UPAGES * NBPG) || + if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -848,7 +931,7 @@ pmap_remove(pmap, sva, eva) pmap_remove_entry(pmap, pv, sva); } pmap_unuse_pt(pmap, sva); - pmap_update(); + pmap_update_1pg(sva); return; } sva = i386_btop(sva); @@ -913,12 +996,12 @@ pmap_remove(pmap, sva, eva) */ pa = ((int) oldpte) & PG_FRAME; if (!pmap_is_managed(pa)) { - pmap_unuse_pt(pmap, va); + pmap_unuse_pt(pmap, (vm_offset_t) va); ++sva; continue; } if ((int) oldpte & PG_M) { - if (sva < USRSTACK + (UPAGES * NBPG) || + if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -947,7 +1030,7 @@ static void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, npv; + register pv_entry_t pv, opv, npv; register pt_entry_t *pte, *ptp; vm_offset_t va; struct pmap *pmap; @@ -965,13 +1048,15 @@ pmap_remove_all(pa) if (!pmap_is_managed(pa)) return; - pa = i386_trunc_page(pa); - pv = pa_to_pvh(pa); - m = PHYS_TO_VM_PAGE(pa); + pa = trunc_page(pa); + opv = pa_to_pvh(pa); + if (opv->pv_pmap == NULL) + return; + m = PHYS_TO_VM_PAGE(pa); s = splhigh(); - while (pv->pv_pmap != NULL) { - pmap = pv->pv_pmap; + pv = opv; + while (pv && ((pmap = pv->pv_pmap) != NULL)) { ptp = get_pt_entry(pmap); va = pv->pv_va; pte = ptp + i386_btop(va); @@ -979,13 +1064,14 @@ pmap_remove_all(pa) pmap->pm_stats.wired_count--; if (*pte) { pmap->pm_stats.resident_count--; - anyvalid++; + if (curproc != pageproc) + anyvalid++; /* * Update the vm_page_t clean and reference bits. */ if ((int) *pte & PG_M) { - if (va < USRSTACK + (UPAGES * NBPG) || + if (va < USRSTACK + (UPAGES * PAGE_SIZE) || (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -993,14 +1079,17 @@ pmap_remove_all(pa) *pte = 0; pmap_unuse_pt(pmap, va); } + pv = pv->pv_next; + } + + for (pv = opv->pv_next; pv; pv = npv) { npv = pv->pv_next; - if (npv) { - *pv = *npv; - free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } + free_pv_entry(pv); } + + opv->pv_pmap = NULL; + opv->pv_next = NULL; + splx(s); if (anyvalid) pmap_update(); @@ -1123,8 +1212,8 @@ pmap_enter(pmap, va, pa, prot, wired) if (pmap == NULL) return; - va = i386_trunc_page(va); - pa = i386_trunc_page(pa); + va = trunc_page(va); + pa = trunc_page(pa); if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); @@ -1236,7 +1325,7 @@ validate: *pte = npte; } if (ptevalid) { - pmap_update(); + pmap_update_1pg(va); } else { pmap_use_pt(pmap, va); } @@ -1261,13 +1350,13 @@ pmap_qenter(va, m, count) register pt_entry_t *pte; for (i = 0; i < count; i++) { - pte = vtopte(va + i * NBPG); - if (*pte) - anyvalid++; - *pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); + vm_offset_t tva = va + i * PAGE_SIZE; + pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); + pte = vtopte(tva); + if (*pte && (*pte != npte)) + pmap_update_1pg(tva); + *pte = npte; } - if (anyvalid) - pmap_update(); } /* * this routine jerks page mappings from the @@ -1282,10 +1371,11 @@ pmap_qremove(va, count) register pt_entry_t *pte; for (i = 0; i < count; i++) { - pte = vtopte(va + i * NBPG); + vm_offset_t tva = va + i * PAGE_SIZE; + pte = vtopte(tva); *pte = 0; + pmap_update_1pg(tva); } - pmap_update(); } /* @@ -1309,7 +1399,7 @@ pmap_kenter(va, pa) *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); if (wasvalid) - pmap_update(); + pmap_update_1pg(va); } /* @@ -1324,7 +1414,7 @@ pmap_kremove(va) pte = vtopte(va); *pte = (pt_entry_t) 0; - pmap_update(); + pmap_update_1pg(va); } /* @@ -1338,7 +1428,7 @@ pmap_kremove(va) * but is *MUCH* faster than pmap_enter... */ -static inline void +static __inline void pmap_enter_quick(pmap, va, pa) register pmap_t pmap; vm_offset_t va; @@ -1398,7 +1488,7 @@ pmap_enter_quick(pmap, va, pa) return; } -#define MAX_INIT_PT (512 * 4096) +#define MAX_INIT_PT (512) /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft @@ -1417,12 +1507,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_t p; int objpgs; - if (!pmap || ((size > MAX_INIT_PT) && - (object->resident_page_count > MAX_INIT_PT / PAGE_SIZE))) { + psize = (size >> PAGE_SHIFT); + + if (!pmap || ((psize > MAX_INIT_PT) && + (object->resident_page_count > MAX_INIT_PT))) { return; } - psize = (size >> PAGE_SHIFT); /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1442,12 +1533,10 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (tmpidx >= psize) { continue; } - if (((p->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) != 0) && - ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (p->bmapped == 0) && + if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->flags & PG_CACHE) + if (p->queue == PQ_CACHE) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; @@ -1464,13 +1553,10 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && - ((p->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) != 0) && - (p->bmapped == 0) && - (p->busy == 0) && + if (p && (p->busy == 0) && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->flags & PG_CACHE) + if (p->queue == PQ_CACHE) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; @@ -1484,6 +1570,94 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) } /* + * pmap_prefault provides a quick way of clustering + * pagefaults into a processes address space. It is a "cousin" + * of pmap_object_init_pt, except it runs at page fault time instead + * of mmap time. + */ +#define PFBAK 2 +#define PFFOR 2 +#define PAGEORDER_SIZE (PFBAK+PFFOR) + +static int pmap_prefault_pageorder[] = { + -NBPG, NBPG, -2 * NBPG, 2 * NBPG +}; + +void +pmap_prefault(pmap, addra, entry, object) + pmap_t pmap; + vm_offset_t addra; + vm_map_entry_t entry; + vm_object_t object; +{ + int i; + vm_offset_t starta; + vm_offset_t addr; + vm_pindex_t pindex; + vm_page_t m; + int pageorder_index; + + if (entry->object.vm_object != object) + return; + + if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) + return; + + starta = addra - PFBAK * PAGE_SIZE; + if (starta < entry->start) { + starta = entry->start; + } else if (starta > addra) { + starta = 0; + } + + for (i = 0; i < PAGEORDER_SIZE; i++) { + vm_object_t lobject; + pt_entry_t *pte; + + addr = addra + pmap_prefault_pageorder[i]; + if (addr < starta || addr >= entry->end) + continue; + + pte = vtopte(addr); + if (*pte) + continue; + + pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; + lobject = object; + for (m = vm_page_lookup(lobject, pindex); + (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); + lobject = lobject->backing_object) { + if (lobject->backing_object_offset & (PAGE_MASK-1)) + break; + pindex += (lobject->backing_object_offset >> PAGE_SHIFT); + m = vm_page_lookup(lobject->backing_object, pindex); + } + + /* + * give-up when a page is not in memory + */ + if (m == NULL) + break; + + if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (m->busy == 0) && + (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + + if (m->queue == PQ_CACHE) { + if (cnt.v_free_count + cnt.v_cache_count < + cnt.v_free_min) + break; + vm_page_deactivate(m); + } + vm_page_hold(m); + m->flags |= PG_MAPPED; + pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); + vm_page_unhold(m); + } + } +} + +/* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. @@ -1513,12 +1687,6 @@ pmap_change_wiring(pmap, va, wired) * invalidate TLB. */ pmap_pte_set_w(pte, wired); - /* - * When unwiring, set the modified bit in the pte -- could have been - * changed by the kernel - */ - if (!wired) - (int) *pte |= PG_M; } @@ -1563,11 +1731,11 @@ pmap_zero_page(phys) if (*(int *) CMAP2) panic("pmap_zero_page: CMAP busy"); - *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys); - bzero(CADDR2, NBPG); + *(int *) CMAP2 = PG_V | PG_KW | trunc_page(phys); + bzero(CADDR2, PAGE_SIZE); *(int *) CMAP2 = 0; - pmap_update(); + pmap_update_1pg((vm_offset_t) CADDR2); } /* @@ -1584,17 +1752,17 @@ pmap_copy_page(src, dst) if (*(int *) CMAP1 || *(int *) CMAP2) panic("pmap_copy_page: CMAP busy"); - *(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src); - *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst); + *(int *) CMAP1 = PG_V | PG_KW | trunc_page(src); + *(int *) CMAP2 = PG_V | PG_KW | trunc_page(dst); #if __GNUC__ > 1 - memcpy(CADDR2, CADDR1, NBPG); + memcpy(CADDR2, CADDR1, PAGE_SIZE); #else - bcopy(CADDR1, CADDR2, NBPG); + bcopy(CADDR1, CADDR2, PAGE_SIZE); #endif *(int *) CMAP1 = 0; *(int *) CMAP2 = 0; - pmap_update(); + pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2); } @@ -1683,25 +1851,11 @@ pmap_testbit(pa, bit) * mark UPAGES as always modified, and ptes as never * modified. */ - if (bit & PG_U) { + if (bit & (PG_U|PG_M)) { if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } } - if (bit & PG_M) { - if (pv->pv_va >= USRSTACK) { - if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) { - continue; - } - if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) { - splx(s); - return TRUE; - } else if (pv->pv_va < KERNBASE) { - splx(s); - return FALSE; - } - } - } if (!pv->pv_pmap) { printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); continue; @@ -1729,6 +1883,7 @@ pmap_changebit(pa, bit, setem) register pv_entry_t pv; register pt_entry_t *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) @@ -1757,15 +1912,17 @@ pmap_changebit(pa, bit, setem) continue; } pte = pmap_pte(pv->pv_pmap, va); - if (setem) + if (setem) { (int) npte = (int) *pte | bit; - else + } else { (int) npte = (int) *pte & ~bit; + } *pte = npte; } } splx(s); - pmap_update(); + if (curproc != pageproc) + pmap_update(); } /* @@ -1903,6 +2060,62 @@ pmap_mapdev(pa, size) return ((void *) va); } +#ifdef PMAP_DEBUG +pmap_pid_dump(int pid) { + pmap_t pmap; + struct proc *p; + int npte = 0; + int index; + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { + if (p->p_pid != pid) + continue; + + if (p->p_vmspace) { + int i,j; + index = 0; + pmap = &p->p_vmspace->vm_pmap; + for(i=0;i<1024;i++) { + pd_entry_t *pde; + pt_entry_t *pte; + unsigned base = i << PD_SHIFT; + + pde = &pmap->pm_pdir[i]; + if (pde && pmap_pde_v(pde)) { + for(j=0;j<1024;j++) { + unsigned va = base + (j << PG_SHIFT); + if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { + if (index) { + index = 0; + printf("\n"); + } + return npte; + } + pte = pmap_pte( pmap, va); + if (pte && pmap_pte_v(pte)) { + vm_offset_t pa; + vm_page_t m; + pa = *(int *)pte; + m = PHYS_TO_VM_PAGE((pa & PG_FRAME)); + printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", + va, pa, m->hold_count, m->wire_count, m->flags); + npte++; + index++; + if (index >= 2) { + index = 0; + printf("\n"); + } else { + printf(" "); + } + } + } + } + } + } + } + return npte; +} +#endif + #ifdef DEBUG static void pads __P((pmap_t pm)); diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 39afcdd..8b44962 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.69 1996/01/03 21:41:36 wollman Exp $ + * $Id: trap.c,v 1.70 1996/01/04 21:11:03 wollman Exp $ */ /* @@ -649,7 +649,8 @@ trap_pfault(frame, usermode) /* Fault the pte only if needed: */ if (*((int *)vtopte(v)) == 0) - (void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE); + (void) vm_fault(map, + trunc_page(v), VM_PROT_WRITE, FALSE); pmap_use_pt( vm_map_pmap(map), va); diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 4459bc8..cc79caa 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.49 1995/12/14 08:31:01 phk Exp $ + * $Id: vm_machdep.c,v 1.50 1996/01/05 20:12:23 wollman Exp $ */ #include "npx.h" @@ -330,8 +330,8 @@ vm_bounce_alloc(bp) vastart = (vm_offset_t) bp->b_data; vaend = (vm_offset_t) bp->b_data + bp->b_bufsize; - vapstart = i386_trunc_page(vastart); - vapend = i386_round_page(vaend); + vapstart = trunc_page(vastart); + vapend = round_page(vaend); countvmpg = (vapend - vapstart) / NBPG; /* @@ -444,13 +444,13 @@ vm_bounce_free(bp) vm_offset_t mybouncepa; vm_offset_t copycount; - copycount = i386_round_page(bouncekva + 1) - bouncekva; - mybouncepa = pmap_kextract(i386_trunc_page(bouncekva)); + copycount = round_page(bouncekva + 1) - bouncekva; + mybouncepa = pmap_kextract(trunc_page(bouncekva)); /* * if this is a bounced pa, then process as one */ - if ( mybouncepa != pmap_kextract( i386_trunc_page( origkva))) { + if ( mybouncepa != pmap_kextract( trunc_page( origkva))) { vm_offset_t tocopy = copycount; if (i + tocopy > bp->b_bufsize) tocopy = bp->b_bufsize - i; @@ -481,8 +481,8 @@ vm_bounce_free(bp) * add the old kva into the "to free" list */ - bouncekva= i386_trunc_page((vm_offset_t) bp->b_data); - bouncekvaend= i386_round_page((vm_offset_t)bp->b_data + bp->b_bufsize); + bouncekva= trunc_page((vm_offset_t) bp->b_data); + bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize); /* printf("freeva: %d\n", (bouncekvaend - bouncekva) / NBPG); @@ -614,12 +614,11 @@ cpu_exit(p) } void -cpu_wait(p) struct proc *p; { -/* extern vm_map_t upages_map; */ - +cpu_wait(p) + struct proc *p; +{ /* drop per-process resources */ - pmap_remove(vm_map_pmap(u_map), (vm_offset_t) p->p_addr, - ((vm_offset_t) p->p_addr) + ctob(UPAGES)); + pmap_qremove((vm_offset_t) p->p_addr, UPAGES); kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); vmspace_free(p->p_vmspace); } @@ -841,7 +840,7 @@ grow(p, sp) grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT); } if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v, - grow_amount, FALSE) != KERN_SUCCESS)) { + grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) { return (0); } vm->vm_ssize += grow_amount >> PAGE_SHIFT; @@ -864,6 +863,7 @@ vm_page_zero_idle() { pmap_zero_page(VM_PAGE_TO_PHYS(m)); disable_intr(); TAILQ_INSERT_HEAD(&vm_page_queue_zero, m, pageq); + m->queue = PQ_ZERO; ++vm_page_zero_count; return 1; } diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c index 9f71630..2f86783 100644 --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_denode.c,v 1.14 1995/12/03 16:41:53 bde Exp $ */ +/* $Id: msdosfs_denode.c,v 1.15 1995/12/07 12:47:19 davidg Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.9 1994/08/21 18:44:00 ws Exp $ */ /*- @@ -442,10 +442,11 @@ detrunc(dep, length, flags, cred, p) return EINVAL; } - vnode_pager_setsize(DETOV(dep), length); - if (dep->de_FileSize < length) + if (dep->de_FileSize < length) { + vnode_pager_setsize(DETOV(dep), length); return deextend(dep, length, cred); + } /* * If the desired length is 0 then remember the starting cluster of @@ -515,6 +516,7 @@ detrunc(dep, length, flags, cred, p) dep->de_flag |= DE_UPDATE; vflags = (length > 0 ? V_SAVE : 0) | V_SAVEMETA; vinvalbuf(DETOV(dep), vflags, cred, p, 0, 0); + vnode_pager_setsize(DETOV(dep), length); TIMEVAL_TO_TIMESPEC(&time, &ts); allerror = deupdat(dep, &ts, 1); #ifdef MSDOSFS_DEBUG diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c index 7dbbb7d..b536055 100644 --- a/sys/fs/procfs/procfs_mem.c +++ b/sys/fs/procfs/procfs_mem.c @@ -37,7 +37,7 @@ * * @(#)procfs_mem.c 8.4 (Berkeley) 1/21/94 * - * $Id: procfs_mem.c,v 1.13 1995/12/11 04:56:31 dyson Exp $ + * $Id: procfs_mem.c,v 1.14 1995/12/17 07:19:24 bde Exp $ */ /* @@ -171,7 +171,8 @@ procfs_rwmem(p, uio) /* Find space in kernel_map for the page we're interested in */ if (!error) error = vm_map_find(kernel_map, object, - IDX_TO_OFF(pindex), &kva, PAGE_SIZE, 1); + IDX_TO_OFF(pindex), &kva, PAGE_SIZE, 1, + VM_PROT_ALL, VM_PROT_ALL, 0); if (!error) { /* diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c index c8b3cd4..2b2531d 100644 --- a/sys/gnu/ext2fs/ext2_bmap.c +++ b/sys/gnu/ext2fs/ext2_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.9 1995/09/04 00:21:09 dyson Exp $ + * $Id: ufs_bmap.c,v 1.10 1995/11/05 23:07:37 dyson Exp $ */ #include <sys/param.h> @@ -188,7 +188,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) * or we have a disk address for it, go fetch it. */ if (bp) - brelse(bp); + bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); @@ -226,7 +226,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) } } if (bp) - brelse(bp); + bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; diff --git a/sys/gnu/ext2fs/ext2_inode.c b/sys/gnu/ext2fs/ext2_inode.c index 2b40d37..74aebaa 100644 --- a/sys/gnu/ext2fs/ext2_inode.c +++ b/sys/gnu/ext2fs/ext2_inode.c @@ -216,7 +216,6 @@ printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); if (error = getinoquota(oip)) return (error); #endif - vnode_pager_setsize(ovp, (u_long)length); fs = oip->i_e2fs; osize = oip->i_size; ext2_discard_prealloc(oip); @@ -231,6 +230,7 @@ printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; + vnode_pager_setsize(ovp, length); if (error = ext2_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, aflags)) return (error); @@ -405,6 +405,7 @@ done: if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; + vnode_pager_setsize(ovp, length); #if QUOTA (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif diff --git a/sys/gnu/ext2fs/ext2_readwrite.c b/sys/gnu/ext2fs/ext2_readwrite.c index 093f7fb..478f0d2 100644 --- a/sys/gnu/ext2fs/ext2_readwrite.c +++ b/sys/gnu/ext2fs/ext2_readwrite.c @@ -120,8 +120,11 @@ READ(ap) size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); - if (error) + if (error) { + brelse(bp); + bp = NULL; break; + } vp->v_lastr = lbn; /* @@ -148,10 +151,10 @@ READ(ap) uio->uio_offset == ip->i_size)) bp->b_flags |= B_AGE; #endif - brelse(bp); + bqrelse(bp); } if (bp != NULL) - brelse(bp); + bqrelse(bp); ip->i_flag |= IN_ACCESS; return (error); } diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c index c8b3cd4..2b2531d 100644 --- a/sys/gnu/fs/ext2fs/ext2_bmap.c +++ b/sys/gnu/fs/ext2fs/ext2_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.9 1995/09/04 00:21:09 dyson Exp $ + * $Id: ufs_bmap.c,v 1.10 1995/11/05 23:07:37 dyson Exp $ */ #include <sys/param.h> @@ -188,7 +188,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) * or we have a disk address for it, go fetch it. */ if (bp) - brelse(bp); + bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); @@ -226,7 +226,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) } } if (bp) - brelse(bp); + bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; diff --git a/sys/gnu/fs/ext2fs/ext2_inode.c b/sys/gnu/fs/ext2fs/ext2_inode.c index 2b40d37..74aebaa 100644 --- a/sys/gnu/fs/ext2fs/ext2_inode.c +++ b/sys/gnu/fs/ext2fs/ext2_inode.c @@ -216,7 +216,6 @@ printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); if (error = getinoquota(oip)) return (error); #endif - vnode_pager_setsize(ovp, (u_long)length); fs = oip->i_e2fs; osize = oip->i_size; ext2_discard_prealloc(oip); @@ -231,6 +230,7 @@ printf("ext2_truncate called %d to %d\n", VTOI(ovp)->i_number, ap->a_length); aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; + vnode_pager_setsize(ovp, length); if (error = ext2_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, aflags)) return (error); @@ -405,6 +405,7 @@ done: if (oip->i_blocks < 0) /* sanity */ oip->i_blocks = 0; oip->i_flag |= IN_CHANGE; + vnode_pager_setsize(ovp, length); #if QUOTA (void) chkdq(oip, -blocksreleased, NOCRED, 0); #endif diff --git a/sys/gnu/fs/ext2fs/ext2_readwrite.c b/sys/gnu/fs/ext2fs/ext2_readwrite.c index 093f7fb..478f0d2 100644 --- a/sys/gnu/fs/ext2fs/ext2_readwrite.c +++ b/sys/gnu/fs/ext2fs/ext2_readwrite.c @@ -120,8 +120,11 @@ READ(ap) size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else error = bread(vp, lbn, size, NOCRED, &bp); - if (error) + if (error) { + brelse(bp); + bp = NULL; break; + } vp->v_lastr = lbn; /* @@ -148,10 +151,10 @@ READ(ap) uio->uio_offset == ip->i_size)) bp->b_flags |= B_AGE; #endif - brelse(bp); + bqrelse(bp); } if (bp != NULL) - brelse(bp); + bqrelse(bp); ip->i_flag |= IN_ACCESS; return (error); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index a9c4961..942d081 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 - * $Id: machdep.c,v 1.168 1996/01/04 21:10:53 wollman Exp $ + * $Id: machdep.c,v 1.169 1996/01/05 20:12:19 wollman Exp $ */ #include "npx.h" @@ -1315,7 +1315,7 @@ init386(first) */ /* * XXX text protection is temporarily (?) disabled. The limit was - * i386_btop(i386_round_page(etext)) - 1. + * i386_btop(round_page(etext)) - 1. */ gdt_segs[GCODE_SEL].ssd_limit = i386_btop(0) - 1; gdt_segs[GDATA_SEL].ssd_limit = i386_btop(0) - 1; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index e0410ef..ba0a8b5 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 - * $Id: pmap.c,v 1.71 1995/12/17 07:19:15 bde Exp $ + * $Id: pmap.c,v 1.72 1995/12/22 18:21:26 bde Exp $ */ /* @@ -91,6 +91,7 @@ #include <sys/malloc.h> #include <sys/msgbuf.h> #include <sys/queue.h> +#include <sys/vmmeter.h> #include <vm/vm.h> #include <vm/vm_param.h> @@ -108,6 +109,10 @@ #include <i386/isa/isa.h> +#define PMAP_KEEP_PDIRS + +static void init_pv_entries __P((int)); + /* * Get PDEs and PTEs for user/kernel address space */ @@ -164,7 +169,6 @@ static pt_entry_t * static pv_entry_t get_pv_entry __P((void)); static void i386_protection_init __P((void)); -static void init_pv_entries __P((int npg)); static void pmap_alloc_pv_entry __P((void)); static void pmap_changebit __P((vm_offset_t pa, int bit, boolean_t setem)); static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, @@ -179,6 +183,33 @@ static boolean_t pmap_testbit __P((vm_offset_t pa, int bit)); /* + * The below are finer grained pmap_update routines. These eliminate + * the gratuitious tlb flushes on non-i386 architectures. + */ +static __inline void +pmap_update_1pg( vm_offset_t va) { +#if defined(I386_CPU) + if (cpuclass == CPUCLASS_I386) + pmap_update(); + else +#endif + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va)); +} + +static __inline void +pmap_update_2pg( vm_offset_t va1, vm_offset_t va2) { +#if defined(I386_CPU) + if (cpuclass == CPUCLASS_I386) { + pmap_update(); + } else +#endif + { + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va1)); + __asm __volatile(".byte 0xf,0x1,0x38": :"a" (va2)); + } +} + +/* * Routine: pmap_pte * Function: * Extract the page table entry associated @@ -186,7 +217,7 @@ static boolean_t * [ what about induced faults -wfj] */ -inline pt_entry_t * __pure +__inline pt_entry_t * __pure pmap_pte(pmap, va) register pmap_t pmap; vm_offset_t va; @@ -248,7 +279,7 @@ pmap_extract(pmap, va) /* * determine if a page is managed (memory vs. device) */ -static inline int +static __inline int pmap_is_managed(pa) vm_offset_t pa; { @@ -274,8 +305,8 @@ pmap_pte_vm_page(pmap, pt) { vm_page_t m; - pt = i386_trunc_page(pt); - pt = (pt - UPT_MIN_ADDRESS) / NBPG; + pt = trunc_page(pt); + pt = (pt - UPT_MIN_ADDRESS) / PAGE_SIZE; pt = ((vm_offset_t) pmap->pm_pdir[pt]) & PG_FRAME; m = PHYS_TO_VM_PAGE(pt); return m; @@ -301,7 +332,7 @@ pmap_use_pt(pmap, va) /* * Unwire a page table page */ -inline void +__inline void pmap_unuse_pt(pmap, va) pmap_t pmap; vm_offset_t va; @@ -319,8 +350,19 @@ pmap_unuse_pt(pmap, va) (m->hold_count == 0) && (m->wire_count == 0) && (va < KPT_MIN_ADDRESS)) { +/* + * We don't free page-table-pages anymore because it can have a negative + * impact on perf at times. Now we just deactivate, and it'll get cleaned + * up if needed... Also, if the page ends up getting used, it will fault + * back into the process address space and be reactivated. + */ +#ifdef PMAP_FREE_OLD_PTES pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); vm_page_free(m); +#else + m->dirty = 0; + vm_page_deactivate(m); +#endif } } @@ -354,7 +396,7 @@ pmap_bootstrap(firstaddr, loadaddr) avail_start = firstaddr; /* - * XXX The calculation of virtual_avail is wrong. It's NKPT*NBPG too + * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too * large. It should instead be correctly calculated in locore.s and * not based on 'first' (which is a physical address, not a virtual * address, for the start of unused physical memory). The kernel @@ -386,7 +428,7 @@ pmap_bootstrap(firstaddr, loadaddr) * mapping of pages. */ #define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*NBPG); p = pte; pte += (n); + v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = pmap_pte(kernel_pmap, va); @@ -433,14 +475,14 @@ pmap_init(phys_start, phys_end) */ vm_first_phys = phys_avail[0]; for (i = 0; phys_avail[i + 1]; i += 2); - npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / NBPG; + npg = (phys_avail[(i - 2) + 1] - vm_first_phys) / PAGE_SIZE; /* * Allocate memory for random pmap data structures. Includes the * pv_head_table. */ s = (vm_size_t) (sizeof(struct pv_entry) * npg); - s = i386_round_page(s); + s = round_page(s); addr = (vm_offset_t) kmem_alloc(kernel_map, s); pv_table = (pv_entry_t) addr; @@ -476,6 +518,38 @@ pmap_map(virt, start, end, prot) return (virt); } +#ifdef PMAP_KEEP_PDIRS +int nfreepdir; +caddr_t *pdirlist; +#define NFREEPDIR 3 + +static void * +pmap_getpdir() { + caddr_t *pdir; + if (pdirlist) { + --nfreepdir; + pdir = pdirlist; + pdirlist = (caddr_t *) *pdir; + bzero( (caddr_t) pdir, PAGE_SIZE); + } else { + pdir = (caddr_t *) kmem_alloc(kernel_map, PAGE_SIZE); + } + + return (void *) pdir; +} + +static void +pmap_freepdir(void *pdir) { + if (nfreepdir > NFREEPDIR) { + kmem_free(kernel_map, (vm_offset_t) pdir, PAGE_SIZE); + } else { + * (caddr_t *) pdir = (caddr_t) pdirlist; + pdirlist = (caddr_t *) pdir; + ++nfreepdir; + } +} +#endif + /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -488,7 +562,12 @@ pmap_pinit(pmap) * No need to allocate page table space yet but we do need a valid * page directory table. */ + +#ifdef PMAP_KEEP_PDIRS + pmap->pm_pdir = pmap_getpdir(); +#else pmap->pm_pdir = (pd_entry_t *) kmem_alloc(kernel_map, PAGE_SIZE); +#endif /* wire in kernel global address entries */ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * PTESIZE); @@ -519,14 +598,14 @@ pmap_growkernel(vm_offset_t addr) kernel_vm_end = KERNBASE; nkpt = 0; while (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); ++nkpt; } } - addr = (addr + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); while (kernel_vm_end < addr) { if (pdir_pde(PTD, kernel_vm_end)) { - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); continue; } ++nkpt; @@ -548,7 +627,7 @@ pmap_growkernel(vm_offset_t addr) } } *pmap_pde(kernel_pmap, kernel_vm_end) = pdir_pde(PTD, kernel_vm_end); - kernel_vm_end = (kernel_vm_end + NBPG * NPTEPG) & ~(NBPG * NPTEPG - 1); + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); } splx(s); } @@ -583,7 +662,11 @@ void pmap_release(pmap) register struct pmap *pmap; { +#ifdef PMAP_KEEP_PDIRS + pmap_freepdir( (void *)pmap->pm_pdir); +#else kmem_free(kernel_map, (vm_offset_t) pmap->pm_pdir, PAGE_SIZE); +#endif } /* @@ -598,7 +681,7 @@ pmap_reference(pmap) } } -#define PV_FREELIST_MIN ((NBPG / sizeof (struct pv_entry)) / 2) +#define PV_FREELIST_MIN ((PAGE_SIZE / sizeof (struct pv_entry)) / 2) /* * Data for the pv entry allocation mechanism @@ -611,7 +694,7 @@ static int npvvapg; /* * free the pv_entry back to the free list */ -inline static void +static __inline void free_pv_entry(pv) pv_entry_t pv; { @@ -628,7 +711,7 @@ free_pv_entry(pv) * the memory allocation is performed bypassing the malloc code * because of the possibility of allocations at interrupt time. */ -static inline pv_entry_t +static __inline pv_entry_t get_pv_entry() { pv_entry_t tmp; @@ -677,7 +760,7 @@ pmap_alloc_pv_entry() int i; pv_entry_t entry; - newentries = (NBPG / sizeof(struct pv_entry)); + newentries = (PAGE_SIZE / sizeof(struct pv_entry)); /* * wire the page */ @@ -692,7 +775,7 @@ pmap_alloc_pv_entry() /* * update the allocation pointers */ - pvva += NBPG; + pvva += PAGE_SIZE; --npvvapg; /* @@ -724,8 +807,9 @@ init_pv_entries(npg) * kvm space is fairly cheap, be generous!!! (the system can panic if * this is too small.) */ - npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + NBPG - 1) / NBPG; - pvva = kmem_alloc_pageable(kernel_map, npvvapg * NBPG); + npvvapg = ((npg * PVSPERPAGE) * sizeof(struct pv_entry) + + PAGE_SIZE - 1) / PAGE_SIZE; + pvva = kmem_alloc_pageable(kernel_map, npvvapg * PAGE_SIZE); /* * get the first batch of entries */ @@ -775,11 +859,10 @@ pmap_remove_entry(pmap, pv, va) pv->pv_pmap = NULL; } } else { - for (npv = pv->pv_next; npv; npv = npv->pv_next) { + for (npv = pv->pv_next; npv; (pv = npv, npv = pv->pv_next)) { if (pmap == npv->pv_pmap && va == npv->pv_va) { break; } - pv = npv; } if (npv) { pv->pv_next = npv->pv_next; @@ -817,7 +900,7 @@ pmap_remove(pmap, sva, eva) * common operation and easy to short circuit some * code. */ - if ((sva + NBPG) == eva) { + if ((sva + PAGE_SIZE) == eva) { if (*pmap_pde(pmap, sva) == 0) return; @@ -839,7 +922,7 @@ pmap_remove(pmap, sva, eva) if (pmap_is_managed(pa)) { if ((int) oldpte & PG_M) { - if (sva < USRSTACK + (UPAGES * NBPG) || + if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -848,7 +931,7 @@ pmap_remove(pmap, sva, eva) pmap_remove_entry(pmap, pv, sva); } pmap_unuse_pt(pmap, sva); - pmap_update(); + pmap_update_1pg(sva); return; } sva = i386_btop(sva); @@ -913,12 +996,12 @@ pmap_remove(pmap, sva, eva) */ pa = ((int) oldpte) & PG_FRAME; if (!pmap_is_managed(pa)) { - pmap_unuse_pt(pmap, va); + pmap_unuse_pt(pmap, (vm_offset_t) va); ++sva; continue; } if ((int) oldpte & PG_M) { - if (sva < USRSTACK + (UPAGES * NBPG) || + if (sva < USRSTACK + (UPAGES * PAGE_SIZE) || (sva >= KERNBASE && (sva < clean_sva || sva >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -947,7 +1030,7 @@ static void pmap_remove_all(pa) vm_offset_t pa; { - register pv_entry_t pv, npv; + register pv_entry_t pv, opv, npv; register pt_entry_t *pte, *ptp; vm_offset_t va; struct pmap *pmap; @@ -965,13 +1048,15 @@ pmap_remove_all(pa) if (!pmap_is_managed(pa)) return; - pa = i386_trunc_page(pa); - pv = pa_to_pvh(pa); - m = PHYS_TO_VM_PAGE(pa); + pa = trunc_page(pa); + opv = pa_to_pvh(pa); + if (opv->pv_pmap == NULL) + return; + m = PHYS_TO_VM_PAGE(pa); s = splhigh(); - while (pv->pv_pmap != NULL) { - pmap = pv->pv_pmap; + pv = opv; + while (pv && ((pmap = pv->pv_pmap) != NULL)) { ptp = get_pt_entry(pmap); va = pv->pv_va; pte = ptp + i386_btop(va); @@ -979,13 +1064,14 @@ pmap_remove_all(pa) pmap->pm_stats.wired_count--; if (*pte) { pmap->pm_stats.resident_count--; - anyvalid++; + if (curproc != pageproc) + anyvalid++; /* * Update the vm_page_t clean and reference bits. */ if ((int) *pte & PG_M) { - if (va < USRSTACK + (UPAGES * NBPG) || + if (va < USRSTACK + (UPAGES * PAGE_SIZE) || (va >= KERNBASE && (va < clean_sva || va >= clean_eva))) { PHYS_TO_VM_PAGE(pa)->dirty |= VM_PAGE_BITS_ALL; } @@ -993,14 +1079,17 @@ pmap_remove_all(pa) *pte = 0; pmap_unuse_pt(pmap, va); } + pv = pv->pv_next; + } + + for (pv = opv->pv_next; pv; pv = npv) { npv = pv->pv_next; - if (npv) { - *pv = *npv; - free_pv_entry(npv); - } else { - pv->pv_pmap = NULL; - } + free_pv_entry(pv); } + + opv->pv_pmap = NULL; + opv->pv_next = NULL; + splx(s); if (anyvalid) pmap_update(); @@ -1123,8 +1212,8 @@ pmap_enter(pmap, va, pa, prot, wired) if (pmap == NULL) return; - va = i386_trunc_page(va); - pa = i386_trunc_page(pa); + va = trunc_page(va); + pa = trunc_page(pa); if (va > VM_MAX_KERNEL_ADDRESS) panic("pmap_enter: toobig"); @@ -1236,7 +1325,7 @@ validate: *pte = npte; } if (ptevalid) { - pmap_update(); + pmap_update_1pg(va); } else { pmap_use_pt(pmap, va); } @@ -1261,13 +1350,13 @@ pmap_qenter(va, m, count) register pt_entry_t *pte; for (i = 0; i < count; i++) { - pte = vtopte(va + i * NBPG); - if (*pte) - anyvalid++; - *pte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); + vm_offset_t tva = va + i * PAGE_SIZE; + pt_entry_t npte = (pt_entry_t) ((int) (VM_PAGE_TO_PHYS(m[i]) | PG_RW | PG_V)); + pte = vtopte(tva); + if (*pte && (*pte != npte)) + pmap_update_1pg(tva); + *pte = npte; } - if (anyvalid) - pmap_update(); } /* * this routine jerks page mappings from the @@ -1282,10 +1371,11 @@ pmap_qremove(va, count) register pt_entry_t *pte; for (i = 0; i < count; i++) { - pte = vtopte(va + i * NBPG); + vm_offset_t tva = va + i * PAGE_SIZE; + pte = vtopte(tva); *pte = 0; + pmap_update_1pg(tva); } - pmap_update(); } /* @@ -1309,7 +1399,7 @@ pmap_kenter(va, pa) *pte = (pt_entry_t) ((int) (pa | PG_RW | PG_V)); if (wasvalid) - pmap_update(); + pmap_update_1pg(va); } /* @@ -1324,7 +1414,7 @@ pmap_kremove(va) pte = vtopte(va); *pte = (pt_entry_t) 0; - pmap_update(); + pmap_update_1pg(va); } /* @@ -1338,7 +1428,7 @@ pmap_kremove(va) * but is *MUCH* faster than pmap_enter... */ -static inline void +static __inline void pmap_enter_quick(pmap, va, pa) register pmap_t pmap; vm_offset_t va; @@ -1398,7 +1488,7 @@ pmap_enter_quick(pmap, va, pa) return; } -#define MAX_INIT_PT (512 * 4096) +#define MAX_INIT_PT (512) /* * pmap_object_init_pt preloads the ptes for a given object * into the specified pmap. This eliminates the blast of soft @@ -1417,12 +1507,13 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) vm_page_t p; int objpgs; - if (!pmap || ((size > MAX_INIT_PT) && - (object->resident_page_count > MAX_INIT_PT / PAGE_SIZE))) { + psize = (size >> PAGE_SHIFT); + + if (!pmap || ((psize > MAX_INIT_PT) && + (object->resident_page_count > MAX_INIT_PT))) { return; } - psize = (size >> PAGE_SHIFT); /* * if we are processing a major portion of the object, then scan the * entire thing. @@ -1442,12 +1533,10 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) if (tmpidx >= psize) { continue; } - if (((p->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) != 0) && - ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && - (p->bmapped == 0) && + if (((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->busy == 0) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->flags & PG_CACHE) + if (p->queue == PQ_CACHE) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; @@ -1464,13 +1553,10 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) */ for (tmpidx = 0; tmpidx < psize; tmpidx += 1) { p = vm_page_lookup(object, tmpidx + pindex); - if (p && - ((p->flags & (PG_ACTIVE | PG_INACTIVE | PG_CACHE)) != 0) && - (p->bmapped == 0) && - (p->busy == 0) && + if (p && (p->busy == 0) && ((p->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && (p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { - if (p->flags & PG_CACHE) + if (p->queue == PQ_CACHE) vm_page_deactivate(p); vm_page_hold(p); p->flags |= PG_MAPPED; @@ -1484,6 +1570,94 @@ pmap_object_init_pt(pmap, addr, object, pindex, size) } /* + * pmap_prefault provides a quick way of clustering + * pagefaults into a processes address space. It is a "cousin" + * of pmap_object_init_pt, except it runs at page fault time instead + * of mmap time. + */ +#define PFBAK 2 +#define PFFOR 2 +#define PAGEORDER_SIZE (PFBAK+PFFOR) + +static int pmap_prefault_pageorder[] = { + -NBPG, NBPG, -2 * NBPG, 2 * NBPG +}; + +void +pmap_prefault(pmap, addra, entry, object) + pmap_t pmap; + vm_offset_t addra; + vm_map_entry_t entry; + vm_object_t object; +{ + int i; + vm_offset_t starta; + vm_offset_t addr; + vm_pindex_t pindex; + vm_page_t m; + int pageorder_index; + + if (entry->object.vm_object != object) + return; + + if (!curproc || (pmap != &curproc->p_vmspace->vm_pmap)) + return; + + starta = addra - PFBAK * PAGE_SIZE; + if (starta < entry->start) { + starta = entry->start; + } else if (starta > addra) { + starta = 0; + } + + for (i = 0; i < PAGEORDER_SIZE; i++) { + vm_object_t lobject; + pt_entry_t *pte; + + addr = addra + pmap_prefault_pageorder[i]; + if (addr < starta || addr >= entry->end) + continue; + + pte = vtopte(addr); + if (*pte) + continue; + + pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; + lobject = object; + for (m = vm_page_lookup(lobject, pindex); + (!m && (lobject->type == OBJT_DEFAULT) && (lobject->backing_object)); + lobject = lobject->backing_object) { + if (lobject->backing_object_offset & (PAGE_MASK-1)) + break; + pindex += (lobject->backing_object_offset >> PAGE_SHIFT); + m = vm_page_lookup(lobject->backing_object, pindex); + } + + /* + * give-up when a page is not in memory + */ + if (m == NULL) + break; + + if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && + (m->busy == 0) && + (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { + + if (m->queue == PQ_CACHE) { + if (cnt.v_free_count + cnt.v_cache_count < + cnt.v_free_min) + break; + vm_page_deactivate(m); + } + vm_page_hold(m); + m->flags |= PG_MAPPED; + pmap_enter_quick(pmap, addr, VM_PAGE_TO_PHYS(m)); + vm_page_unhold(m); + } + } +} + +/* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address * pair. @@ -1513,12 +1687,6 @@ pmap_change_wiring(pmap, va, wired) * invalidate TLB. */ pmap_pte_set_w(pte, wired); - /* - * When unwiring, set the modified bit in the pte -- could have been - * changed by the kernel - */ - if (!wired) - (int) *pte |= PG_M; } @@ -1563,11 +1731,11 @@ pmap_zero_page(phys) if (*(int *) CMAP2) panic("pmap_zero_page: CMAP busy"); - *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(phys); - bzero(CADDR2, NBPG); + *(int *) CMAP2 = PG_V | PG_KW | trunc_page(phys); + bzero(CADDR2, PAGE_SIZE); *(int *) CMAP2 = 0; - pmap_update(); + pmap_update_1pg((vm_offset_t) CADDR2); } /* @@ -1584,17 +1752,17 @@ pmap_copy_page(src, dst) if (*(int *) CMAP1 || *(int *) CMAP2) panic("pmap_copy_page: CMAP busy"); - *(int *) CMAP1 = PG_V | PG_KW | i386_trunc_page(src); - *(int *) CMAP2 = PG_V | PG_KW | i386_trunc_page(dst); + *(int *) CMAP1 = PG_V | PG_KW | trunc_page(src); + *(int *) CMAP2 = PG_V | PG_KW | trunc_page(dst); #if __GNUC__ > 1 - memcpy(CADDR2, CADDR1, NBPG); + memcpy(CADDR2, CADDR1, PAGE_SIZE); #else - bcopy(CADDR1, CADDR2, NBPG); + bcopy(CADDR1, CADDR2, PAGE_SIZE); #endif *(int *) CMAP1 = 0; *(int *) CMAP2 = 0; - pmap_update(); + pmap_update_2pg( (vm_offset_t) CADDR1, (vm_offset_t) CADDR2); } @@ -1683,25 +1851,11 @@ pmap_testbit(pa, bit) * mark UPAGES as always modified, and ptes as never * modified. */ - if (bit & PG_U) { + if (bit & (PG_U|PG_M)) { if ((pv->pv_va >= clean_sva) && (pv->pv_va < clean_eva)) { continue; } } - if (bit & PG_M) { - if (pv->pv_va >= USRSTACK) { - if (pv->pv_va >= clean_sva && pv->pv_va < clean_eva) { - continue; - } - if (pv->pv_va < USRSTACK + (UPAGES * NBPG)) { - splx(s); - return TRUE; - } else if (pv->pv_va < KERNBASE) { - splx(s); - return FALSE; - } - } - } if (!pv->pv_pmap) { printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); continue; @@ -1729,6 +1883,7 @@ pmap_changebit(pa, bit, setem) register pv_entry_t pv; register pt_entry_t *pte, npte; vm_offset_t va; + int changed; int s; if (!pmap_is_managed(pa)) @@ -1757,15 +1912,17 @@ pmap_changebit(pa, bit, setem) continue; } pte = pmap_pte(pv->pv_pmap, va); - if (setem) + if (setem) { (int) npte = (int) *pte | bit; - else + } else { (int) npte = (int) *pte & ~bit; + } *pte = npte; } } splx(s); - pmap_update(); + if (curproc != pageproc) + pmap_update(); } /* @@ -1903,6 +2060,62 @@ pmap_mapdev(pa, size) return ((void *) va); } +#ifdef PMAP_DEBUG +pmap_pid_dump(int pid) { + pmap_t pmap; + struct proc *p; + int npte = 0; + int index; + for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { + if (p->p_pid != pid) + continue; + + if (p->p_vmspace) { + int i,j; + index = 0; + pmap = &p->p_vmspace->vm_pmap; + for(i=0;i<1024;i++) { + pd_entry_t *pde; + pt_entry_t *pte; + unsigned base = i << PD_SHIFT; + + pde = &pmap->pm_pdir[i]; + if (pde && pmap_pde_v(pde)) { + for(j=0;j<1024;j++) { + unsigned va = base + (j << PG_SHIFT); + if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { + if (index) { + index = 0; + printf("\n"); + } + return npte; + } + pte = pmap_pte( pmap, va); + if (pte && pmap_pte_v(pte)) { + vm_offset_t pa; + vm_page_t m; + pa = *(int *)pte; + m = PHYS_TO_VM_PAGE((pa & PG_FRAME)); + printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", + va, pa, m->hold_count, m->wire_count, m->flags); + npte++; + index++; + if (index >= 2) { + index = 0; + printf("\n"); + } else { + printf(" "); + } + } + } + } + } + } + } + return npte; +} +#endif + #ifdef DEBUG static void pads __P((pmap_t pm)); diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c index 39afcdd..8b44962 100644 --- a/sys/i386/i386/trap.c +++ b/sys/i386/i386/trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.69 1996/01/03 21:41:36 wollman Exp $ + * $Id: trap.c,v 1.70 1996/01/04 21:11:03 wollman Exp $ */ /* @@ -649,7 +649,8 @@ trap_pfault(frame, usermode) /* Fault the pte only if needed: */ if (*((int *)vtopte(v)) == 0) - (void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE); + (void) vm_fault(map, + trunc_page(v), VM_PROT_WRITE, FALSE); pmap_use_pt( vm_map_pmap(map), va); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 4459bc8..cc79caa 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -38,7 +38,7 @@ * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ - * $Id: vm_machdep.c,v 1.49 1995/12/14 08:31:01 phk Exp $ + * $Id: vm_machdep.c,v 1.50 1996/01/05 20:12:23 wollman Exp $ */ #include "npx.h" @@ -330,8 +330,8 @@ vm_bounce_alloc(bp) vastart = (vm_offset_t) bp->b_data; vaend = (vm_offset_t) bp->b_data + bp->b_bufsize; - vapstart = i386_trunc_page(vastart); - vapend = i386_round_page(vaend); + vapstart = trunc_page(vastart); + vapend = round_page(vaend); countvmpg = (vapend - vapstart) / NBPG; /* @@ -444,13 +444,13 @@ vm_bounce_free(bp) vm_offset_t mybouncepa; vm_offset_t copycount; - copycount = i386_round_page(bouncekva + 1) - bouncekva; - mybouncepa = pmap_kextract(i386_trunc_page(bouncekva)); + copycount = round_page(bouncekva + 1) - bouncekva; + mybouncepa = pmap_kextract(trunc_page(bouncekva)); /* * if this is a bounced pa, then process as one */ - if ( mybouncepa != pmap_kextract( i386_trunc_page( origkva))) { + if ( mybouncepa != pmap_kextract( trunc_page( origkva))) { vm_offset_t tocopy = copycount; if (i + tocopy > bp->b_bufsize) tocopy = bp->b_bufsize - i; @@ -481,8 +481,8 @@ vm_bounce_free(bp) * add the old kva into the "to free" list */ - bouncekva= i386_trunc_page((vm_offset_t) bp->b_data); - bouncekvaend= i386_round_page((vm_offset_t)bp->b_data + bp->b_bufsize); + bouncekva= trunc_page((vm_offset_t) bp->b_data); + bouncekvaend= round_page((vm_offset_t)bp->b_data + bp->b_bufsize); /* printf("freeva: %d\n", (bouncekvaend - bouncekva) / NBPG); @@ -614,12 +614,11 @@ cpu_exit(p) } void -cpu_wait(p) struct proc *p; { -/* extern vm_map_t upages_map; */ - +cpu_wait(p) + struct proc *p; +{ /* drop per-process resources */ - pmap_remove(vm_map_pmap(u_map), (vm_offset_t) p->p_addr, - ((vm_offset_t) p->p_addr) + ctob(UPAGES)); + pmap_qremove((vm_offset_t) p->p_addr, UPAGES); kmem_free(u_map, (vm_offset_t)p->p_addr, ctob(UPAGES)); vmspace_free(p->p_vmspace); } @@ -841,7 +840,7 @@ grow(p, sp) grow_amount = MAXSSIZ - (vm->vm_ssize << PAGE_SHIFT); } if ((grow_amount == 0) || (vm_map_find(&vm->vm_map, NULL, 0, (vm_offset_t *)&v, - grow_amount, FALSE) != KERN_SUCCESS)) { + grow_amount, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != KERN_SUCCESS)) { return (0); } vm->vm_ssize += grow_amount >> PAGE_SHIFT; @@ -864,6 +863,7 @@ vm_page_zero_idle() { pmap_zero_page(VM_PAGE_TO_PHYS(m)); disable_intr(); TAILQ_INSERT_HEAD(&vm_page_queue_zero, m, pageq); + m->queue = PQ_ZERO; ++vm_page_zero_count; return 1; } diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 3c7a007..2bfc188 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -28,7 +28,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: imgact_aout.c,v 1.20 1995/12/11 04:56:00 dyson Exp $ + * $Id: imgact_aout.c,v 1.21 1995/12/15 02:57:40 peter Exp $ */ #include <sys/param.h> @@ -148,15 +148,15 @@ exec_aout_imgact(imgp) exec_new_vmspace(imgp); /* - * Map text read/execute + * Map text/data read/execute */ vmaddr = virtual_offset; error = vm_mmap(&vmspace->vm_map, /* map */ &vmaddr, /* address */ - a_out->a_text, /* size */ + a_out->a_text + a_out->a_data, /* size */ VM_PROT_READ | VM_PROT_EXECUTE, /* protection */ - VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_WRITE, /* max protection */ + VM_PROT_ALL, /* max protection */ MAP_PRIVATE | MAP_FIXED, /* flags */ (caddr_t)imgp->vp, /* vnode */ file_offset); /* offset */ @@ -164,20 +164,13 @@ exec_aout_imgact(imgp) return (error); /* - * Map data read/write (if text is 0, assume text is in data area - * [Bill's screwball mode]) + * allow writing of data */ - vmaddr = virtual_offset + a_out->a_text; - error = - vm_mmap(&vmspace->vm_map, - &vmaddr, - a_out->a_data, - VM_PROT_READ | VM_PROT_WRITE | (a_out->a_text ? 0 : VM_PROT_EXECUTE), - VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, - (caddr_t) imgp->vp, - file_offset + a_out->a_text); - if (error) - return (error); + vm_map_protect(&vmspace->vm_map, + vmaddr + a_out->a_text, + vmaddr + a_out->a_text + a_out->a_data, + VM_PROT_ALL, + FALSE); if (bss_size != 0) { /* @@ -186,7 +179,7 @@ exec_aout_imgact(imgp) * instruction of the same name. */ vmaddr = virtual_offset + a_out->a_text + a_out->a_data; - error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, bss_size, FALSE); + error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return (error); } diff --git a/sys/kern/imgact_gzip.c b/sys/kern/imgact_gzip.c index 4fa24ef..1322472 100644 --- a/sys/kern/imgact_gzip.c +++ b/sys/kern/imgact_gzip.c @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: imgact_gzip.c,v 1.16 1995/12/02 16:32:01 bde Exp $ + * $Id: imgact_gzip.c,v 1.17 1995/12/07 12:46:35 davidg Exp $ * * This module handles execution of a.out files which have been run through * "gzip". This saves diskspace, but wastes cpu-cycles and VM. @@ -255,7 +255,7 @@ do_aout_hdr(struct imgact_gzip * gz) * same name. */ vmaddr = gz->virtual_offset + gz->a_out.a_text + gz->a_out.a_data; - error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, gz->bss_size, FALSE); + error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, gz->bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { gz->where = __LINE__; return (error); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 24a68c4..7e1bb1d 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -39,7 +39,7 @@ * SUCH DAMAGE. * * @(#)init_main.c 8.9 (Berkeley) 1/21/94 - * $Id: init_main.c,v 1.35 1995/12/07 12:46:36 davidg Exp $ + * $Id: init_main.c,v 1.36 1995/12/10 13:45:11 phk Exp $ */ #include <sys/param.h> @@ -558,7 +558,7 @@ start_init(p, framep) * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = trunc_page(VM_MAXUSER_ADDRESS - PAGE_SIZE); - if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, FALSE) != 0) + if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 8c42542..579e9e5 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -28,7 +28,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: kern_exec.c,v 1.31 1996/01/04 20:28:45 wollman Exp $ + * $Id: kern_exec.c,v 1.32 1996/01/08 04:30:41 peter Exp $ */ #include "opt_sysvipc.h" @@ -362,7 +362,7 @@ exec_new_vmspace(imgp) /* Allocate a new stack */ error = vm_map_find(&vmspace->vm_map, NULL, 0, (vm_offset_t *)&stack_addr, - SGROWSIZ, FALSE); + SGROWSIZ, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return(error); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 596cf8f..110c318 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_exit.c 8.7 (Berkeley) 2/12/94 - * $Id: kern_exit.c,v 1.25 1996/01/04 20:28:46 wollman Exp $ + * $Id: kern_exit.c,v 1.26 1996/01/08 04:30:44 peter Exp $ */ #include "opt_ktrace.h" @@ -413,6 +413,12 @@ loop: (void)chgproccnt(p->p_cred->p_ruid, -1); /* + * Release reference to text vnode + */ + if (p->p_textvp) + vrele(p->p_textvp); + + /* * Free up credentials. */ if (--p->p_cred->p_refcnt == 0) { @@ -422,12 +428,6 @@ loop: } /* - * Release reference to text vnode - */ - if (p->p_textvp) - vrele(p->p_textvp); - - /* * Finally finished with old proc entry. * Unlink it from its process group and free it. */ diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 39afcdd..8b44962 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 - * $Id: trap.c,v 1.69 1996/01/03 21:41:36 wollman Exp $ + * $Id: trap.c,v 1.70 1996/01/04 21:11:03 wollman Exp $ */ /* @@ -649,7 +649,8 @@ trap_pfault(frame, usermode) /* Fault the pte only if needed: */ if (*((int *)vtopte(v)) == 0) - (void) vm_fault(map, trunc_page(v), VM_PROT_WRITE, FALSE); + (void) vm_fault(map, + trunc_page(v), VM_PROT_WRITE, FALSE); pmap_use_pt( vm_map_pmap(map), va); diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index e9a8c50..cf004dc 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -28,7 +28,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: sys_process.c,v 1.18 1995/12/16 21:43:47 bde Exp $ + * $Id: sys_process.c,v 1.19 1995/12/17 06:59:36 bde Exp $ */ #include <sys/param.h> @@ -85,8 +85,8 @@ pread (struct proc *procp, unsigned int addr, unsigned int *retval) { vm_map_lookup_done (tmap, out_entry); /* Find space in kernel_map for the page we're interested in */ - rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex), &kva, - PAGE_SIZE, 1); + rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex), + &kva, PAGE_SIZE, 0, VM_PROT_ALL, VM_PROT_ALL, 0); if (!rv) { vm_object_reference (object); @@ -175,9 +175,9 @@ pwrite (struct proc *procp, unsigned int addr, unsigned int datum) { return EFAULT; /* Find space in kernel_map for the page we're interested in */ - rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex), &kva, - PAGE_SIZE, 1); - + rv = vm_map_find (kernel_map, object, IDX_TO_OFF(pindex), + &kva, PAGE_SIZE, 0, + VM_PROT_ALL, VM_PROT_ALL, 0); if (!rv) { vm_object_reference (object); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 829a796..c6adf9a 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -18,7 +18,7 @@ * 5. Modifications may be freely made to this file if the above conditions * are met. * - * $Id: vfs_bio.c,v 1.82 1996/01/06 23:23:02 davidg Exp $ + * $Id: vfs_bio.c,v 1.83 1996/01/06 23:58:03 davidg Exp $ */ /* @@ -78,6 +78,7 @@ static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to); static void vfs_clean_pages(struct buf * bp); static void vfs_setdirty(struct buf *bp); +static void vfs_vmio_release(struct buf *bp); int needsbuffer; @@ -108,6 +109,8 @@ static int bufspace, maxbufspace; static struct bufhashhdr bufhashtbl[BUFHSZ], invalhash; static struct bqueues bufqueues[BUFFER_QUEUES]; +extern int vm_swap_size; + #define BUF_MAXUSE 8 /* @@ -363,7 +366,7 @@ bdwrite(struct buf * bp) * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); - brelse(bp); + bqrelse(bp); return; } @@ -412,8 +415,11 @@ brelse(struct buf * bp) (bp->b_bufsize <= 0)) { bp->b_flags |= B_INVAL; bp->b_flags &= ~(B_DELWRI | B_CACHE); - if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) + if (((bp->b_flags & B_VMIO) == 0) && bp->b_vp) { + if (bp->b_bufsize) + allocbuf(bp, 0); brelvp(bp); + } } /* @@ -470,57 +476,23 @@ brelse(struct buf * bp) vm_page_protect(m, VM_PROT_NONE); } } - } - foff += resid; - iototal -= resid; - } - } - - if (bp->b_flags & (B_INVAL | B_RELBUF)) { - for(i = 0; i < bp->b_npages; i++) { - m = bp->b_pages[i]; - --m->bmapped; - if (m->bmapped == 0) { - if (m->flags & PG_WANTED) { - m->flags &= ~PG_WANTED; - wakeup(m); - } - if ((m->busy == 0) && ((m->flags & PG_BUSY) == 0)) { - if (m->object->flags & OBJ_MIGHTBEDIRTY) { - vm_page_test_dirty(m); + if (resid >= PAGE_SIZE) { + if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { + bp->b_flags |= B_INVAL; } - /* - * if page isn't valid, no sense in keeping it around - */ - if (m->valid == 0) { - vm_page_protect(m, VM_PROT_NONE); - vm_page_free(m); - /* - * if page isn't dirty and hasn't been referenced by - * a process, then cache it - */ - } else if ((m->dirty & m->valid) == 0 && - (m->flags & PG_REFERENCED) == 0 && - !pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { - vm_page_cache(m); - /* - * otherwise activate it - */ - } else if ((m->flags & PG_ACTIVE) == 0) { - vm_page_activate(m); - m->act_count = 0; + } else { + if (!vm_page_is_valid(m, + (((vm_offset_t) bp->b_data) & PAGE_MASK), resid)) { + bp->b_flags |= B_INVAL; } } } + foff += resid; + iototal -= resid; } - bufspace -= bp->b_bufsize; - pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); - bp->b_npages = 0; - bp->b_bufsize = 0; - bp->b_flags &= ~B_VMIO; - if (bp->b_vp) - brelvp(bp); } + if (bp->b_flags & (B_INVAL | B_RELBUF)) + vfs_vmio_release(bp); } if (bp->b_qindex != QUEUE_NONE) panic("brelse: free buffer onto another queue???"); @@ -560,6 +532,85 @@ brelse(struct buf * bp) } /* + * Release a buffer. + */ +void +bqrelse(struct buf * bp) +{ + int s; + + s = splbio(); + + if (needsbuffer) { + needsbuffer = 0; + wakeup(&needsbuffer); + } + + /* anyone need this block? */ + if (bp->b_flags & B_WANTED) { + bp->b_flags &= ~(B_WANTED | B_AGE); + wakeup(bp); + } + + if (bp->b_qindex != QUEUE_NONE) + panic("bqrelse: free buffer onto another queue???"); + + if (bp->b_flags & B_LOCKED) { + bp->b_flags &= ~B_ERROR; + bp->b_qindex = QUEUE_LOCKED; + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist); + /* buffers with stale but valid contents */ + } else { + bp->b_qindex = QUEUE_LRU; + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); + } + + /* unlock */ + bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); + splx(s); +} + +static void +vfs_vmio_release(bp) + struct buf *bp; +{ + int i; + vm_page_t m; + + for (i = 0; i < bp->b_npages; i++) { + m = bp->b_pages[i]; + bp->b_pages[i] = NULL; + if (m->flags & PG_WANTED) { + m->flags &= ~PG_WANTED; + wakeup(m); + } + vm_page_unwire(m); + if (m->wire_count == 0) { + if (m->valid) { + /* + * this keeps pressure off of the process memory + */ + if ((vm_swap_size == 0) || + (cnt.v_free_count < cnt.v_free_min)) + vm_page_cache(m); + } else if ((m->hold_count == 0) && + ((m->flags & PG_BUSY) == 0) && + (m->busy == 0)) { + vm_page_protect(m, VM_PROT_NONE); + vm_page_free(m); + } + } + } + bufspace -= bp->b_bufsize; + pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + bp->b_npages = 0; + bp->b_bufsize = 0; + bp->b_flags &= ~B_VMIO; + if (bp->b_vp) + brelvp(bp); +} + +/* * Check to see if a block is currently memory resident. */ __inline struct buf * @@ -666,6 +717,7 @@ start: if (bp->b_qindex != QUEUE_EMPTY) panic("getnewbuf: inconsistent EMPTY queue, qindex=%d", bp->b_qindex); + bp->b_flags |= B_BUSY; bremfree(bp); goto fillbuf; } @@ -717,12 +769,10 @@ trytofreespace: wakeup(bp); } bremfree(bp); + bp->b_flags |= B_BUSY; - if (bp->b_flags & B_VMIO) { - bp->b_flags |= B_RELBUF | B_BUSY | B_DONE; - brelse(bp); - bremfree(bp); - } + if (bp->b_flags & B_VMIO) + vfs_vmio_release(bp); if (bp->b_vp) brelvp(bp); @@ -737,7 +787,7 @@ fillbuf: crfree(bp->b_wcred); bp->b_wcred = NOCRED; } - bp->b_flags |= B_BUSY; + LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&invalhash, bp, b_hash); splx(s); @@ -776,19 +826,7 @@ incore(struct vnode * vp, daddr_t blkno) struct bufhashhdr *bh; int s = splbio(); - - bh = BUFHASH(vp, blkno); - bp = bh->lh_first; - - /* Search hash chain */ - while (bp != NULL) { - /* hit */ - if (bp->b_vp == vp && bp->b_lblkno == blkno && - (bp->b_flags & B_INVAL) == 0) { - break; - } - bp = bp->b_hash.le_next; - } + bp = gbincore(vp, blkno); splx(s); return (bp); } @@ -933,35 +971,6 @@ loop: } } - /* - * make sure that all pages in the buffer are valid, if they - * aren't, clear the cache flag. - * ASSUMPTION: - * if the buffer is greater than 1 page in size, it is assumed - * that the buffer address starts on a page boundary... - */ - if (bp->b_flags & B_VMIO) { - int szleft, i; - szleft = size; - for (i=0;i<bp->b_npages;i++) { - if (szleft > PAGE_SIZE) { - if ((bp->b_pages[i]->valid & VM_PAGE_BITS_ALL) != - VM_PAGE_BITS_ALL) { - bp->b_flags &= ~(B_CACHE|B_DONE); - break; - } - szleft -= PAGE_SIZE; - } else { - if (!vm_page_is_valid(bp->b_pages[i], - (((vm_offset_t) bp->b_data) & PAGE_MASK), - szleft)) { - bp->b_flags &= ~(B_CACHE|B_DONE); - break; - } - szleft = 0; - } - } - } if (bp->b_usecount < BUF_MAXUSE) ++bp->b_usecount; splx(s); @@ -1035,6 +1044,7 @@ geteblk(int size) return (bp); } + /* * This code constitutes the buffer memory from either anonymous system * memory (in the case of non-VMIO operations) or from an associated @@ -1084,9 +1094,11 @@ allocbuf(struct buf * bp, int size) if (newbsize < bp->b_bufsize) { if (desiredpages < bp->b_npages) { - pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + - (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); for (i = desiredpages; i < bp->b_npages; i++) { + /* + * the page is not freed here -- it + * is the responsibility of vnode_pager_setsize + */ m = bp->b_pages[i]; s = splhigh(); while ((m->flags & PG_BUSY) || (m->busy != 0)) { @@ -1095,17 +1107,11 @@ allocbuf(struct buf * bp, int size) } splx(s); - if (m->bmapped == 0) { - printf("allocbuf: bmapped is zero for page %d\n", i); - panic("allocbuf: error"); - } - --m->bmapped; - if (m->bmapped == 0) { - vm_page_protect(m, VM_PROT_NONE); - vm_page_free(m); - } bp->b_pages[i] = NULL; + vm_page_unwire(m); } + pmap_qremove((vm_offset_t) trunc_page(bp->b_data) + + (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } } else if (newbsize > bp->b_bufsize) { @@ -1141,46 +1147,39 @@ allocbuf(struct buf * bp, int size) if (pageindex < curbpnpages) { m = bp->b_pages[pageindex]; +#ifdef VFS_BIO_DIAG if (m->pindex != objoff) panic("allocbuf: page changed offset??!!!?"); +#endif bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; - if (!vm_page_is_valid(m, + if ((bp->b_flags & B_CACHE) && + !vm_page_is_valid(m, (vm_offset_t) ((toff + off) & (PAGE_SIZE - 1)), bytesinpage)) { bp->b_flags &= ~B_CACHE; } - if ((m->flags & PG_ACTIVE) == 0) { - vm_page_activate(m); - m->act_count = 0; - } continue; } m = vm_page_lookup(obj, objoff); if (!m) { m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL); if (!m) { - int j; - - for (j = bp->b_npages; j < pageindex; j++) { - PAGE_WAKEUP(bp->b_pages[j]); - } VM_WAIT; goto doretry; } - vm_page_activate(m); - m->act_count = 0; - m->valid = 0; + /* + * Normally it is unwise to clear PG_BUSY without + * PAGE_WAKEUP -- but it is okay here, as there is + * no chance for blocking between here and vm_page_alloc + */ + m->flags &= ~PG_BUSY; + vm_page_wire(m); bp->b_flags &= ~B_CACHE; } else if (m->flags & PG_BUSY) { - int j; - - for (j = bp->b_npages; j < pageindex; j++) { - PAGE_WAKEUP(bp->b_pages[j]); - } - s = splbio(); + s = splhigh(); m->flags |= PG_WANTED; tsleep(m, PVM, "pgtblk", 0); splx(s); @@ -1188,36 +1187,33 @@ allocbuf(struct buf * bp, int size) goto doretry; } else { if ((curproc != pageproc) && - (m->flags & PG_CACHE) && - (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { + (m->queue == PQ_CACHE) && + ((cnt.v_free_count + cnt.v_cache_count) < + (cnt.v_free_min + cnt.v_cache_min))) { pagedaemon_wakeup(); } bytesinpage = tinc; if (tinc > (newbsize - toff)) bytesinpage = newbsize - toff; - if (!vm_page_is_valid(m, + if ((bp->b_flags & B_CACHE) && + !vm_page_is_valid(m, (vm_offset_t) ((toff + off) & (PAGE_SIZE - 1)), bytesinpage)) { bp->b_flags &= ~B_CACHE; } - if ((m->flags & PG_ACTIVE) == 0) { - vm_page_activate(m); - m->act_count = 0; - } - m->flags |= PG_BUSY; + vm_page_wire(m); } bp->b_pages[pageindex] = m; curbpnpages = pageindex + 1; } - for (i = bp->b_npages; i < curbpnpages; i++) { - m = bp->b_pages[i]; - m->bmapped++; - PAGE_WAKEUP(m); - } - bp->b_npages = curbpnpages; +/* bp->b_data = buffers_kva + (bp - buf) * MAXBSIZE; - pmap_qenter((vm_offset_t) bp->b_data, bp->b_pages, bp->b_npages); - bp->b_data += off & (PAGE_SIZE - 1); +*/ + bp->b_data = (caddr_t) trunc_page(bp->b_data); + bp->b_npages = curbpnpages; + pmap_qenter((vm_offset_t) bp->b_data, + bp->b_pages, bp->b_npages); + ((vm_offset_t) bp->b_data) |= off & (PAGE_SIZE - 1); } } } @@ -1363,8 +1359,8 @@ biodone(register struct buf * bp) printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n", (int) bp->b_lblkno, bp->b_flags, bp->b_npages); - printf(" valid: 0x%x, dirty: 0x%x, mapped: %d\n", - m->valid, m->dirty, m->bmapped); + printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n", + m->valid, m->dirty, m->wire_count); panic("biodone: page busy < 0\n"); } --m->busy; @@ -1389,7 +1385,10 @@ biodone(register struct buf * bp) */ if (bp->b_flags & B_ASYNC) { - brelse(bp); + if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0) + brelse(bp); + else + bqrelse(bp); } else { wakeup(bp); } @@ -1568,6 +1567,7 @@ vfs_clean_pages(struct buf * bp) void vfs_bio_clrbuf(struct buf *bp) { int i; + int remapbuffer = 0; if( bp->b_flags & B_VMIO) { if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) { int mask; @@ -1585,8 +1585,9 @@ vfs_bio_clrbuf(struct buf *bp) { if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL) continue; if( bp->b_pages[i]->valid == 0) { - if ((bp->b_pages[i]->flags & PG_ZERO) == 0) + if ((bp->b_pages[i]->flags & PG_ZERO) == 0) { bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE); + } } else { int j; for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) { @@ -1600,6 +1601,8 @@ vfs_bio_clrbuf(struct buf *bp) { } else { clrbuf(bp); } + if (remapbuffer) + pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages); } /* @@ -1612,10 +1615,13 @@ vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; + int index; to = round_page(to); + from = round_page(from); + index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT; - for (pg = round_page(from); pg < to; pg += PAGE_SIZE) { + for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: @@ -1627,10 +1633,10 @@ tryagain: } vm_page_wire(p); pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); - bp->b_pages[(pg - trunc_page(bp->b_data)) >> PAGE_SHIFT] = p; + bp->b_pages[index] = p; PAGE_WAKEUP(p); - bp->b_npages++; } + bp->b_npages = to >> PAGE_SHIFT; } void @@ -1646,9 +1652,16 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; - bp->b_pages[index] = 0; - pmap_kremove(pg); - vm_page_free(p); - --bp->b_npages; + if (p && (index < bp->b_npages)) { + if (p->busy) { + printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n", + bp->b_blkno, bp->b_lblkno); + } + bp->b_pages[index] = NULL; + pmap_kremove(pg); + vm_page_unwire(p); + vm_page_free(p); + } } + bp->b_npages = from >> PAGE_SHIFT; } diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 6c231f4..924fa18 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -33,7 +33,7 @@ * SUCH DAMAGE. * * @(#)vfs_cache.c 8.3 (Berkeley) 8/22/94 - * $Id: vfs_cache.c,v 1.18 1995/12/14 09:52:47 phk Exp $ + * $Id: vfs_cache.c,v 1.19 1995/12/22 15:56:35 phk Exp $ */ #include <sys/param.h> @@ -47,6 +47,8 @@ #include <sys/errno.h> #include <sys/malloc.h> +#define MAXVNODEUSE 32 + /* * Name caching works as follows: * @@ -173,6 +175,8 @@ cache_lookup(dvp, vpp, cnp) nchstats.ncs_goodhits++; TOUCH(ncp); *vpp = ncp->nc_vp; + if ((*vpp)->v_usage < MAXVNODEUSE) + (*vpp)->v_usage++; return (-1); } @@ -234,6 +238,8 @@ cache_enter(dvp, vp, cnp) /* fill in cache info */ ncp->nc_vp = vp; + if (vp->v_usage < MAXVNODEUSE) + ++vp->v_usage; ncp->nc_vpid = vp->v_id; ncp->nc_dvp = dvp; ncp->nc_dvpid = dvp->v_id; diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 2895b09..b232071 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -33,7 +33,7 @@ * SUCH DAMAGE. * * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 - * $Id: vfs_cluster.c,v 1.30 1995/12/11 04:56:07 dyson Exp $ + * $Id: vfs_cluster.c,v 1.31 1995/12/22 16:06:46 bde Exp $ */ #include <sys/param.h> @@ -149,8 +149,7 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp) vp->v_ralen >>= RA_SHIFTDOWN; return 0; } else if( vp->v_maxra > lblkno) { - if ( (vp->v_maxra + (vp->v_ralen / RA_MULTIPLE_SLOW)) >= - (lblkno + vp->v_ralen)) { + if ( vp->v_maxra > lblkno + (vp->v_ralen / RA_MULTIPLE_SLOW) ) { if ((vp->v_ralen + 1) < RA_MULTIPLE_FAST*(MAXPHYS / size)) ++vp->v_ralen; return 0; @@ -192,18 +191,13 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp) * ahead too much, and we need to back-off, otherwise we might * try to read more. */ - for (i = 0; i < vp->v_ralen; i++) { + for (i = 0; i < vp->v_maxra - lblkno; i++) { rablkno = lblkno + i; - alreadyincore = (int) gbincore(vp, rablkno); + alreadyincore = (int) incore(vp, rablkno); if (!alreadyincore) { - if (rablkno < vp->v_maxra) { - vp->v_maxra = rablkno; - vp->v_ralen >>= RA_SHIFTDOWN; - alreadyincore = 1; - } - break; - } else if (vp->v_maxra < rablkno) { - vp->v_maxra = rablkno + 1; + vp->v_maxra = rablkno; + vp->v_ralen >>= RA_SHIFTDOWN; + alreadyincore = 1; } } } @@ -248,9 +242,12 @@ cluster_read(vp, filesize, lblkno, size, cred, bpp) */ if (rbp) { vp->v_maxra = rbp->b_lblkno + rbp->b_bcount / size; - if (error || (rbp->b_flags & B_CACHE)) { + if (error) { rbp->b_flags &= ~(B_ASYNC | B_READ); brelse(rbp); + } else if (rbp->b_flags & B_CACHE) { + rbp->b_flags &= ~(B_ASYNC | B_READ); + bqrelse(rbp); } else { if ((rbp->b_flags & B_CLUSTER) == 0) vfs_busy_pages(rbp, 0); @@ -328,14 +325,14 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run) round_page(size) > MAXPHYS) break; - if (gbincore(vp, lbn + i)) + if (incore(vp, lbn + i)) break; tbp = getblk(vp, lbn + i, size, 0, 0); if ((tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { - brelse(tbp); + bqrelse(tbp); break; } @@ -532,10 +529,7 @@ cluster_write(bp, filesize) vp->v_clen = maxclen; if (!async && maxclen == 0) { /* I/O not contiguous */ vp->v_cstart = lbn + 1; - if (!async) - bawrite(bp); - else - bdwrite(bp); + bawrite(bp); } else { /* Wait for rest of cluster */ vp->v_cstart = lbn; bdwrite(bp); @@ -545,8 +539,7 @@ cluster_write(bp, filesize) * At end of cluster, write it out. */ bdwrite(bp); - cluster_wbuild(vp, lblocksize, vp->v_cstart, - vp->v_clen + 1); + cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1); vp->v_clen = 0; vp->v_cstart = lbn + 1; } else @@ -653,15 +646,17 @@ cluster_wbuild(vp, size, start_lbn, len) tbp->b_flags &= ~B_DONE; splx(s); } - for (j = 0; j < tbp->b_npages; j += 1) { - vm_page_t m; - m = tbp->b_pages[j]; - ++m->busy; - ++m->object->paging_in_progress; - if ((bp->b_npages == 0) || - (bp->b_pages[bp->b_npages - 1] != m)) { - bp->b_pages[bp->b_npages] = m; - bp->b_npages++; + if (tbp->b_flags & B_VMIO) { + for (j = 0; j < tbp->b_npages; j += 1) { + vm_page_t m; + m = tbp->b_pages[j]; + ++m->busy; + ++m->object->paging_in_progress; + if ((bp->b_npages == 0) || + (bp->b_pages[bp->b_npages - 1] != m)) { + bp->b_pages[bp->b_npages] = m; + bp->b_npages++; + } } } bp->b_bcount += size; diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index cd0c7247..7fec92b 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.50 1996/01/02 18:13:20 davidg Exp $ + * $Id: vfs_subr.c,v 1.51 1996/01/04 21:12:26 wollman Exp $ */ /* @@ -340,6 +340,7 @@ getnewvnode(tag, mp, vops, vpp) { register struct vnode *vp; +retry: vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if @@ -360,16 +361,21 @@ getnewvnode(tag, mp, vops, vpp) numvnodes++; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + if (vp->v_usage > 0) { + --vp->v_usage; + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + goto retry; + } freevnodes--; - if (vp->v_usecount) - panic("free vnode isn't"); - /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); + if (vp->v_usecount) + panic("free vnode isn't"); + #ifdef DIAGNOSTIC { int s; @@ -392,6 +398,7 @@ getnewvnode(tag, mp, vops, vpp) vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ + vp->v_usage = 0; } vp->v_type = VNON; cache_purge(vp); @@ -653,7 +660,8 @@ reassignbuf(bp, newvp) if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { bufinsvn(bp, &newvp->v_dirtyblkhd); } else { - while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + while (tbp->b_vnbufs.le_next && + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); @@ -845,6 +853,7 @@ vrele(vp) if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); vp->v_flag &= ~VAGE; + vp->v_usage = 0; } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index cd0c7247..7fec92b 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 - * $Id: vfs_subr.c,v 1.50 1996/01/02 18:13:20 davidg Exp $ + * $Id: vfs_subr.c,v 1.51 1996/01/04 21:12:26 wollman Exp $ */ /* @@ -340,6 +340,7 @@ getnewvnode(tag, mp, vops, vpp) { register struct vnode *vp; +retry: vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if @@ -360,16 +361,21 @@ getnewvnode(tag, mp, vops, vpp) numvnodes++; } else { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + if (vp->v_usage > 0) { + --vp->v_usage; + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + goto retry; + } freevnodes--; - if (vp->v_usecount) - panic("free vnode isn't"); - /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; vp->v_lease = NULL; if (vp->v_type != VBAD) vgone(vp); + if (vp->v_usecount) + panic("free vnode isn't"); + #ifdef DIAGNOSTIC { int s; @@ -392,6 +398,7 @@ getnewvnode(tag, mp, vops, vpp) vp->v_clen = 0; vp->v_socket = 0; vp->v_writecount = 0; /* XXX */ + vp->v_usage = 0; } vp->v_type = VNON; cache_purge(vp); @@ -653,7 +660,8 @@ reassignbuf(bp, newvp) if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) { bufinsvn(bp, &newvp->v_dirtyblkhd); } else { - while (tbp->b_vnbufs.le_next && (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { + while (tbp->b_vnbufs.le_next && + (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) { tbp = tbp->b_vnbufs.le_next; } LIST_INSERT_AFTER(tbp, bp, b_vnbufs); @@ -845,6 +853,7 @@ vrele(vp) if (vp->v_flag & VAGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); vp->v_flag &= ~VAGE; + vp->v_usage = 0; } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 78a43a7..db739aa 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 - * $Id: vfs_vnops.c,v 1.21 1995/12/11 04:56:13 dyson Exp $ + * $Id: vfs_vnops.c,v 1.22 1995/12/17 21:23:24 phk Exp $ */ #include <sys/param.h> @@ -476,12 +476,12 @@ vn_vmio_open(vp, p, cred) /* * this is here for VMIO support */ - if (vp->v_type == VREG || vp->v_type == VBLK) { + if (vp->v_type == VREG /* || vp->v_type == VBLK */) { retry: if ((vp->v_flag & VVMIO) == 0) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) return error; - (void) vnode_pager_alloc(vp, vat.va_size, 0, 0); + (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); vp->v_flag |= VVMIO; } else { vm_object_t object; diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c index 7dbbb7d..b536055 100644 --- a/sys/miscfs/procfs/procfs_mem.c +++ b/sys/miscfs/procfs/procfs_mem.c @@ -37,7 +37,7 @@ * * @(#)procfs_mem.c 8.4 (Berkeley) 1/21/94 * - * $Id: procfs_mem.c,v 1.13 1995/12/11 04:56:31 dyson Exp $ + * $Id: procfs_mem.c,v 1.14 1995/12/17 07:19:24 bde Exp $ */ /* @@ -171,7 +171,8 @@ procfs_rwmem(p, uio) /* Find space in kernel_map for the page we're interested in */ if (!error) error = vm_map_find(kernel_map, object, - IDX_TO_OFF(pindex), &kva, PAGE_SIZE, 1); + IDX_TO_OFF(pindex), &kva, PAGE_SIZE, 1, + VM_PROT_ALL, VM_PROT_ALL, 0); if (!error) { /* diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c index 9f71630..2f86783 100644 --- a/sys/msdosfs/msdosfs_denode.c +++ b/sys/msdosfs/msdosfs_denode.c @@ -1,4 +1,4 @@ -/* $Id: msdosfs_denode.c,v 1.14 1995/12/03 16:41:53 bde Exp $ */ +/* $Id: msdosfs_denode.c,v 1.15 1995/12/07 12:47:19 davidg Exp $ */ /* $NetBSD: msdosfs_denode.c,v 1.9 1994/08/21 18:44:00 ws Exp $ */ /*- @@ -442,10 +442,11 @@ detrunc(dep, length, flags, cred, p) return EINVAL; } - vnode_pager_setsize(DETOV(dep), length); - if (dep->de_FileSize < length) + if (dep->de_FileSize < length) { + vnode_pager_setsize(DETOV(dep), length); return deextend(dep, length, cred); + } /* * If the desired length is 0 then remember the starting cluster of @@ -515,6 +516,7 @@ detrunc(dep, length, flags, cred, p) dep->de_flag |= DE_UPDATE; vflags = (length > 0 ? V_SAVE : 0) | V_SAVEMETA; vinvalbuf(DETOV(dep), vflags, cred, p, 0, 0); + vnode_pager_setsize(DETOV(dep), length); TIMEVAL_TO_TIMESPEC(&time, &ts); allerror = deupdat(dep, &ts, 1); #ifdef MSDOSFS_DEBUG diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index 6d064c7..f0c670c 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.26 1995/12/17 21:12:30 phk Exp $ + * $Id: nfs_subs.c,v 1.27 1996/01/13 23:27:56 phk Exp $ */ /* @@ -1941,7 +1941,7 @@ retry: if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) panic("nfsrv_vmio: VOP_GETATTR failed"); - (void) vnode_pager_alloc(vp, vat.va_size, 0, 0); + (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); vp->v_flag |= VVMIO; } else { diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c index 6d064c7..f0c670c 100644 --- a/sys/nfs/nfs_subs.c +++ b/sys/nfs/nfs_subs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.26 1995/12/17 21:12:30 phk Exp $ + * $Id: nfs_subs.c,v 1.27 1996/01/13 23:27:56 phk Exp $ */ /* @@ -1941,7 +1941,7 @@ retry: if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) panic("nfsrv_vmio: VOP_GETATTR failed"); - (void) vnode_pager_alloc(vp, vat.va_size, 0, 0); + (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); vp->v_flag |= VVMIO; } else { diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 6d064c7..f0c670c 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.26 1995/12/17 21:12:30 phk Exp $ + * $Id: nfs_subs.c,v 1.27 1996/01/13 23:27:56 phk Exp $ */ /* @@ -1941,7 +1941,7 @@ retry: if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) panic("nfsrv_vmio: VOP_GETATTR failed"); - (void) vnode_pager_alloc(vp, vat.va_size, 0, 0); + (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); vp->v_flag |= VVMIO; } else { diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index 6d064c7..f0c670c 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.26 1995/12/17 21:12:30 phk Exp $ + * $Id: nfs_subs.c,v 1.27 1996/01/13 23:27:56 phk Exp $ */ /* @@ -1941,7 +1941,7 @@ retry: if (VOP_GETATTR(vp, &vat, p->p_ucred, p) != 0) panic("nfsrv_vmio: VOP_GETATTR failed"); - (void) vnode_pager_alloc(vp, vat.va_size, 0, 0); + (void) vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); vp->v_flag |= VVMIO; } else { diff --git a/sys/sys/bio.h b/sys/sys/bio.h index e0c260a..cd843c0 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.7 (Berkeley) 1/21/94 - * $Id: buf.h,v 1.25 1995/12/11 04:57:20 dyson Exp $ + * $Id: buf.h,v 1.26 1995/12/28 23:34:28 davidg Exp $ */ #ifndef _SYS_BUF_H_ @@ -207,6 +207,7 @@ int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); void brelse __P((struct buf *)); +void bqrelse __P((struct buf *)); int vfs_bio_awrite __P((struct buf *)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index e0c260a..cd843c0 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.7 (Berkeley) 1/21/94 - * $Id: buf.h,v 1.25 1995/12/11 04:57:20 dyson Exp $ + * $Id: buf.h,v 1.26 1995/12/28 23:34:28 davidg Exp $ */ #ifndef _SYS_BUF_H_ @@ -207,6 +207,7 @@ int bwrite __P((struct buf *)); void bdwrite __P((struct buf *)); void bawrite __P((struct buf *)); void brelse __P((struct buf *)); +void bqrelse __P((struct buf *)); int vfs_bio_awrite __P((struct buf *)); struct buf * getpbuf __P((void)); struct buf *incore __P((struct vnode *, daddr_t)); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 9a4c602..26297d8 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 - * $Id: vnode.h,v 1.27 1995/12/17 21:23:44 phk Exp $ + * $Id: vnode.h,v 1.28 1995/12/25 07:24:13 bde Exp $ */ #ifndef _SYS_VNODE_H_ @@ -96,6 +96,7 @@ struct vnode { daddr_t v_lasta; /* last allocation */ int v_clen; /* length of current cluster */ int v_ralen; /* Read-ahead length */ + int v_usage; /* Vnode usage counter */ daddr_t v_maxra; /* last readahead block */ void *v_object; /* Place to store VM object */ enum vtagtype v_tag; /* type of underlying data */ diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index cbe9c60..005fea6 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 - * $Id: ffs_balloc.c,v 1.8 1995/05/28 04:32:23 davidg Exp $ + * $Id: ffs_balloc.c,v 1.9 1995/05/30 08:14:59 rgrimes Exp $ */ #include <sys/param.h> @@ -210,7 +210,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) break; i += 1; if (nb != 0) { - brelse(bp); + bqrelse(bp); continue; } if (pref == 0) diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 8c99d2b..63186c6 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93 - * $Id: ffs_inode.c,v 1.18 1995/12/11 04:57:37 dyson Exp $ + * $Id: ffs_inode.c,v 1.19 1996/01/05 18:31:48 wollman Exp $ */ #include "opt_quota.h" @@ -220,6 +220,7 @@ ffs_truncate(ap) aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; + vnode_pager_setsize(ovp, length); error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, aflags); if (error) @@ -231,7 +232,6 @@ ffs_truncate(ap) bdwrite(bp); else bawrite(bp); - vnode_pager_setsize(ovp, length); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(ovp, &tv, &tv, 1)); } @@ -290,7 +290,7 @@ ffs_truncate(ap) for (i = NDADDR - 1; i > lastblock; i--) oip->i_db[i] = 0; oip->i_flag |= IN_CHANGE | IN_UPDATE; - error = VOP_UPDATE(ovp, &tv, &tv, 0); + error = VOP_UPDATE(ovp, &tv, &tv, ((length > 0) ? 0 : 1)); if (error) allerror = error; /* diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index e310708..7fe9299 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 - * $Id: ffs_vfsops.c,v 1.33 1996/01/05 18:31:49 wollman Exp $ + * $Id: ffs_vfsops.c,v 1.34 1996/01/14 18:54:59 bde Exp $ */ #include "opt_quota.h" @@ -908,13 +908,13 @@ restart: * still zero, it will be unlinked and returned to the free * list by vput(). */ - vput(vp); brelse(bp); + vput(vp); *vpp = NULL; return (error); } ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - brelse(bp); + bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index c8b3cd4..2b2531d 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94 - * $Id: ufs_bmap.c,v 1.9 1995/09/04 00:21:09 dyson Exp $ + * $Id: ufs_bmap.c,v 1.10 1995/11/05 23:07:37 dyson Exp $ */ #include <sys/param.h> @@ -188,7 +188,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) * or we have a disk address for it, go fetch it. */ if (bp) - brelse(bp); + bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); @@ -226,7 +226,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) } } if (bp) - brelse(bp); + bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 582595c..8edb6a4 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94 - * $Id: ufs_readwrite.c,v 1.18 1996/01/06 12:49:53 phk Exp $ + * $Id: ufs_readwrite.c,v 1.19 1996/01/07 09:42:36 phk Exp $ */ #ifdef LFS_READWRITE @@ -107,6 +107,7 @@ READ(ap) nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; @@ -129,8 +130,11 @@ READ(ap) } else error = bread(vp, lbn, size, NOCRED, &bp); #endif - if (error) + if (error) { + brelse(bp); + bp = NULL; break; + } vp->v_lastr = lbn; /* @@ -155,10 +159,10 @@ READ(ap) if (error) break; - brelse(bp); + bqrelse(bp); } if (bp != NULL) - brelse(bp); + bqrelse(bp); ip->i_flag |= IN_ACCESS; return (error); } @@ -280,8 +284,7 @@ WRITE(ap) if (ioflag & IO_SYNC) { (void)bwrite(bp); - } else if (xfersize + blkoffset == fs->fs_bsize && - (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { + } else if (xfersize + blkoffset == fs->fs_bsize) { if (doclusterwrite) { bp->b_flags |= B_CLUSTEROK; cluster_write(bp, ip->i_size); diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 4ac9bbb..4b60aaa 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.10 (Berkeley) 4/1/94 - * $Id: ufs_vnops.c,v 1.35 1995/12/11 04:57:49 dyson Exp $ + * $Id: ufs_vnops.c,v 1.36 1996/01/05 18:31:58 wollman Exp $ */ #include "opt_quota.h" @@ -2131,6 +2131,7 @@ ufs_makeinode(mode, dvp, vpp, cnp) #endif /* EXT2FS */ if (error) goto bad; + if ((cnp->cn_flags & SAVESTART) == 0) FREE(cnp->cn_pnbuf, M_NAMEI); vput(dvp); diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c index f61439b..dae1837 100644 --- a/sys/vm/default_pager.c +++ b/sys/vm/default_pager.c @@ -28,7 +28,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: default_pager.c,v 1.4 1995/12/11 04:57:56 dyson Exp $ + * $Id: default_pager.c,v 1.5 1995/12/14 09:54:46 phk Exp $ */ #include <sys/param.h> @@ -80,7 +80,7 @@ default_pager_alloc(handle, size, prot, offset) if (handle != NULL) panic("default_pager_alloc: handle specified"); - return vm_object_allocate(OBJT_DEFAULT, offset + size); + return vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(offset) + size); } static void diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 63201de..942af2d 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)device_pager.c 8.1 (Berkeley) 6/11/93 - * $Id: device_pager.c,v 1.18 1995/12/13 15:13:54 julian Exp $ + * $Id: device_pager.c,v 1.19 1995/12/14 09:54:49 phk Exp $ */ #include <sys/param.h> @@ -128,7 +128,7 @@ dev_pager_alloc(handle, size, prot, foff) * * XXX assumes VM_PROT_* == PROT_* */ - npages = atop(round_page(size)); + npages = size; for (off = foff; npages--; off += PAGE_SIZE) if ((*mapfunc) (dev, off, (int) prot) == -1) return (NULL); @@ -152,7 +152,7 @@ dev_pager_alloc(handle, size, prot, foff) * Allocate object and associate it with the pager. */ object = vm_object_allocate(OBJT_DEVICE, - OFF_TO_IDX(foff + size)); + OFF_TO_IDX(foff) + size); object->handle = handle; TAILQ_INIT(&object->un_pager.devp.devp_pglist); TAILQ_INSERT_TAIL(&dev_pager_object_list, object, pager_object_list); @@ -161,8 +161,8 @@ dev_pager_alloc(handle, size, prot, foff) * Gain a reference to the object. */ vm_object_reference(object); - if (OFF_TO_IDX(foff + size) > object->size) - object->size = OFF_TO_IDX(foff + size); + if (OFF_TO_IDX(foff) + size > object->size) + object->size = OFF_TO_IDX(foff) + size; } dev_pager_alloc_lock = 0; @@ -279,7 +279,7 @@ dev_pager_getfake(paddr) m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; m->busy = 0; - m->bmapped = 0; + m->queue = PQ_NONE; m->wire_count = 1; m->phys_addr = paddr; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 1fc7f85..25b608d 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -39,7 +39,7 @@ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ * * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94 - * $Id: swap_pager.c,v 1.57 1995/12/14 09:54:52 phk Exp $ + * $Id: swap_pager.c,v 1.58 1995/12/17 07:19:55 bde Exp $ */ /* @@ -302,13 +302,13 @@ swap_pager_alloc(handle, size, prot, offset) * rip support of "named anonymous regions" out altogether. */ object = vm_object_allocate(OBJT_SWAP, - OFF_TO_IDX(offset+ PAGE_SIZE - 1 + size)); + OFF_TO_IDX(offset + PAGE_SIZE - 1) + size); object->handle = handle; (void) swap_pager_swp_alloc(object, M_WAITOK); } } else { object = vm_object_allocate(OBJT_SWAP, - OFF_TO_IDX(offset + PAGE_SIZE - 1 + size)); + OFF_TO_IDX(offset + PAGE_SIZE - 1) + size); (void) swap_pager_swp_alloc(object, M_WAITOK); } @@ -1266,6 +1266,7 @@ swap_pager_putpages(object, m, count, sync, rtvals) swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { s = splbio(); if (curproc == pageproc) { +retryfree: /* * pageout daemon needs a swap control block */ @@ -1273,33 +1274,42 @@ swap_pager_putpages(object, m, count, sync, rtvals) /* * if it does not get one within a short time, then * there is a potential deadlock, so we go-on trying - * to free pages. + * to free pages. It is important to block here as opposed + * to returning, thereby allowing the pageout daemon to continue. + * It is likely that pageout daemon will start suboptimally + * reclaiming vnode backed pages if we don't block. Since the + * I/O subsystem is probably already fully utilized, might as + * well wait. */ - tsleep(&swap_pager_free, PVM, "swpfre", hz/10); - swap_pager_sync(); - if (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { - splx(s); - return VM_PAGER_AGAIN; + if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) { + swap_pager_sync(); + if (swap_pager_free.tqh_first == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + splx(s); + return VM_PAGER_AGAIN; + } + } else { + /* + * we make sure that pageouts aren't taking up all of + * the free swap control blocks. + */ + swap_pager_sync(); + if (swap_pager_free.tqh_first == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + goto retryfree; + } } - } else + } else { pagedaemon_wakeup(); - while (swap_pager_free.tqh_first == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next == NULL || - swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { - if (curproc == pageproc) { - swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT; - if((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_reserved) - wakeup(&cnt.v_free_count); - } - - swap_pager_needflags |= SWAP_FREE_NEEDED; - tsleep(&swap_pager_free, PVM, "swpfre", 0); - if (curproc == pageproc) - swap_pager_sync(); - else + while (swap_pager_free.tqh_first == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next == NULL || + swap_pager_free.tqh_first->spc_list.tqe_next->spc_list.tqe_next == NULL) { + swap_pager_needflags |= SWAP_FREE_NEEDED; + tsleep(&swap_pager_free, PVM, "swpfre", 0); pagedaemon_wakeup(); + } } splx(s); } @@ -1436,7 +1446,7 @@ swap_pager_putpages(object, m, count, sync, rtvals) * optimization, if a page has been read * during the pageout process, we activate it. */ - if ((m[i]->flags & PG_ACTIVE) == 0 && + if ((m[i]->queue != PQ_ACTIVE) && ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) || pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) { vm_page_activate(m[i]); @@ -1542,7 +1552,7 @@ swap_pager_finish(spc) for (i = 0; i < spc->spc_count; i++) { pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i])); spc->spc_m[i]->dirty = 0; - if ((spc->spc_m[i]->flags & PG_ACTIVE) == 0 && + if ((spc->spc_m[i]->queue != PQ_ACTIVE) && ((spc->spc_m[i]->flags & PG_WANTED) || pmap_is_referenced(VM_PAGE_TO_PHYS(spc->spc_m[i])))) vm_page_activate(spc->spc_m[i]); } diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 87090a4..771da03 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.38 1995/12/07 12:48:10 davidg Exp $ + * $Id: vm_fault.c,v 1.39 1995/12/11 04:58:06 dyson Exp $ */ /* @@ -157,7 +157,7 @@ vm_fault(map, vaddr, fault_type, change_wiring) #define RELEASE_PAGE(m) { \ PAGE_WAKEUP(m); \ - if ((m->flags & PG_ACTIVE) == 0) vm_page_activate(m); \ + if (m->queue != PQ_ACTIVE) vm_page_activate(m); \ } #define UNLOCK_MAP { \ @@ -280,7 +280,7 @@ RetryFault:; * Mark page busy for other processes, and the pagedaemon. */ m->flags |= PG_BUSY; - if ((m->flags & PG_CACHE) && + if ((m->queue == PQ_CACHE) && (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_reserved) { UNLOCK_AND_DEALLOCATE; VM_WAIT; @@ -288,8 +288,9 @@ RetryFault:; goto RetryFault; } - if (m->valid && ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && - m->object != kernel_object && m->object != kmem_object) { + if (m->valid && + ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && + m->object != kernel_object && m->object != kmem_object) { goto readrest; } break; @@ -306,7 +307,7 @@ RetryFault:; * Allocate a new page for this object/offset pair. */ m = vm_page_alloc(object, pindex, - vp?VM_ALLOC_NORMAL:(VM_ALLOC_NORMAL|VM_ALLOC_ZERO)); + vp?VM_ALLOC_NORMAL:VM_ALLOC_ZERO); if (m == NULL) { UNLOCK_AND_DEALLOCATE; @@ -504,9 +505,8 @@ readrest: * call. */ - if ((m->flags & PG_ACTIVE) == 0) + if (m->queue != PQ_ACTIVE) vm_page_activate(m); - vm_page_protect(m, VM_PROT_NONE); /* * We no longer need the old page or object. @@ -642,7 +642,7 @@ readrest: else vm_page_unwire(m); } else { - if ((m->flags & PG_ACTIVE) == 0) + if (m->queue != PQ_ACTIVE) vm_page_activate(m); } @@ -654,8 +654,6 @@ readrest: } } - if ((m->flags & PG_BUSY) == 0) - printf("page not busy: %d\n", m->pindex); /* * Unlock everything, and return */ diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index c6ff0e0..77d82a6 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -59,7 +59,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_glue.c,v 1.33 1995/12/14 09:54:57 phk Exp $ + * $Id: vm_glue.c,v 1.35 1996/01/04 21:13:14 wollman Exp $ */ #include "opt_sysvipc.h" @@ -87,6 +87,8 @@ #include <vm/vm_pageout.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> #include <sys/user.h> @@ -213,9 +215,11 @@ vm_fork(p1, p2, isvfork) int isvfork; { register struct user *up; - vm_offset_t addr, ptaddr; + vm_offset_t addr, ptaddr, ptpa; int error, i; - struct vm_map *vp; + vm_map_t vp; + pmap_t pvp; + vm_page_t stkm; while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) { VM_WAIT; @@ -243,30 +247,48 @@ vm_fork(p1, p2, isvfork) addr = (vm_offset_t) kstack; vp = &p2->p_vmspace->vm_map; + pvp = &p2->p_vmspace->vm_pmap; /* get new pagetables and kernel stack */ - (void) vm_map_find(vp, NULL, 0, &addr, UPT_MAX_ADDRESS - addr, FALSE); - - /* force in the page table encompassing the UPAGES */ - ptaddr = trunc_page((u_int) vtopte(addr)); - error = vm_map_pageable(vp, ptaddr, ptaddr + PAGE_SIZE, FALSE); - if (error) - panic("vm_fork: wire of PT failed. error=%d", error); - - /* and force in (demand-zero) the UPAGES */ - error = vm_map_pageable(vp, addr, addr + UPAGES * PAGE_SIZE, FALSE); - if (error) - panic("vm_fork: wire of UPAGES failed. error=%d", error); + (void) vm_map_find(vp, NULL, 0, &addr, UPT_MAX_ADDRESS - addr, FALSE, + VM_PROT_ALL, VM_PROT_ALL, 0); /* get a kernel virtual address for the UPAGES for this proc */ up = (struct user *) kmem_alloc_pageable(u_map, UPAGES * PAGE_SIZE); if (up == NULL) panic("vm_fork: u_map allocation failed"); - /* and force-map the upages into the kernel pmap */ - for (i = 0; i < UPAGES; i++) - pmap_kenter(((vm_offset_t) up) + PAGE_SIZE * i, - pmap_extract(vp->pmap, addr + PAGE_SIZE * i)); + p2->p_vmspace->vm_upages_obj = vm_object_allocate( OBJT_DEFAULT, + UPAGES); + + ptaddr = trunc_page((u_int) vtopte(kstack)); + (void) vm_fault(vp, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); + ptpa = pmap_extract(pvp, ptaddr); + if (ptpa == 0) { + panic("vm_fork: no pte for UPAGES"); + } + stkm = PHYS_TO_VM_PAGE(ptpa); + vm_page_hold(stkm); + + for(i=0;i<UPAGES;i++) { + vm_page_t m; + + while ((m = vm_page_alloc(p2->p_vmspace->vm_upages_obj, i, VM_ALLOC_ZERO)) == NULL) { + VM_WAIT; + } + + vm_page_wire(m); + m->flags &= ~PG_BUSY; + pmap_enter( pvp, (vm_offset_t) kstack + i * PAGE_SIZE, + VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, 1); + pmap_kenter(((vm_offset_t) up) + i * PAGE_SIZE, + VM_PAGE_TO_PHYS(m)); + if ((m->flags & PG_ZERO) == 0) + bzero(((caddr_t) up) + i * PAGE_SIZE, PAGE_SIZE); + m->flags &= ~PG_ZERO; + m->valid = VM_PAGE_BITS_ALL; + } + vm_page_unhold(stkm); p2->p_addr = up; @@ -334,37 +356,62 @@ faultin(p) int s; if ((p->p_flag & P_INMEM) == 0) { - vm_map_t map; + vm_map_t map = &p->p_vmspace->vm_map; + pmap_t pmap = &p->p_vmspace->vm_pmap; + vm_page_t stkm, m; + vm_offset_t ptpa; int error; ++p->p_lock; - map = &p->p_vmspace->vm_map; - /* force the page table encompassing the kernel stack (upages) */ ptaddr = trunc_page((u_int) vtopte(kstack)); - error = vm_map_pageable(map, ptaddr, ptaddr + PAGE_SIZE, FALSE); - if (error) - panic("faultin: wire of PT failed. error=%d", error); - - /* wire in the UPAGES */ - error = vm_map_pageable(map, (vm_offset_t) kstack, - (vm_offset_t) kstack + UPAGES * PAGE_SIZE, FALSE); - if (error) - panic("faultin: wire of UPAGES failed. error=%d", error); - - /* and map them nicely into the kernel pmap */ - for (i = 0; i < UPAGES; i++) { - vm_offset_t off = i * PAGE_SIZE; - vm_offset_t pa = (vm_offset_t) - pmap_extract(&p->p_vmspace->vm_pmap, - (vm_offset_t) kstack + off); - - if (pa == 0) - panic("faultin: missing page for UPAGES\n"); - - pmap_kenter(((vm_offset_t) p->p_addr) + off, pa); + (void) vm_fault(map, ptaddr, VM_PROT_READ|VM_PROT_WRITE, FALSE); + ptpa = pmap_extract(&p->p_vmspace->vm_pmap, ptaddr); + if (ptpa == 0) { + panic("vm_fork: no pte for UPAGES"); } + stkm = PHYS_TO_VM_PAGE(ptpa); + vm_page_hold(stkm); + for(i=0;i<UPAGES;i++) { + int s; + s = splhigh(); + +retry: + if ((m = vm_page_lookup(p->p_vmspace->vm_upages_obj, i)) == NULL) { + if ((m = vm_page_alloc(p->p_vmspace->vm_upages_obj, i, VM_ALLOC_NORMAL)) == NULL) { + VM_WAIT; + goto retry; + } + } else { + if ((m->flags & PG_BUSY) || m->busy) { + m->flags |= PG_WANTED; + tsleep(m, PVM, "swinuw",0); + goto retry; + } + } + vm_page_wire(m); + if (m->valid == VM_PAGE_BITS_ALL) + m->flags &= ~PG_BUSY; + splx(s); + + pmap_enter( pmap, (vm_offset_t) kstack + i * PAGE_SIZE, + VM_PAGE_TO_PHYS(m), VM_PROT_READ|VM_PROT_WRITE, TRUE); + pmap_kenter(((vm_offset_t) p->p_addr) + i * PAGE_SIZE, + VM_PAGE_TO_PHYS(m)); + if (m->valid != VM_PAGE_BITS_ALL) { + int rv; + rv = vm_pager_get_pages(p->p_vmspace->vm_upages_obj, + &m, 1, 0); + if (rv != VM_PAGER_OK) + panic("faultin: cannot get upages for proc: %d\n", p->p_pid); + m->valid = VM_PAGE_BITS_ALL; + m->flags &= ~PG_BUSY; + } + } + vm_page_unhold(stkm); + + s = splhigh(); if (p->p_stat == SRUN) @@ -402,7 +449,8 @@ loop: pp = NULL; ppri = INT_MIN; for (p = (struct proc *) allproc; p != NULL; p = p->p_next) { - if (p->p_stat == SRUN && (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) { + if (p->p_stat == SRUN && + (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) { int mempri; pri = p->p_swtime + p->p_slptime - p->p_nice * 8; @@ -515,6 +563,7 @@ swapout(p) register struct proc *p; { vm_map_t map = &p->p_vmspace->vm_map; + pmap_t pmap = &p->p_vmspace->vm_pmap; vm_offset_t ptaddr; int i; @@ -535,14 +584,16 @@ swapout(p) /* * let the upages be paged */ - for(i=0;i<UPAGES;i++) + for(i=0;i<UPAGES;i++) { + vm_page_t m; + if ((m = vm_page_lookup(p->p_vmspace->vm_upages_obj, i)) == NULL) + panic("swapout: upage already missing???"); + m->dirty = VM_PAGE_BITS_ALL; + vm_page_unwire(m); pmap_kremove( (vm_offset_t) p->p_addr + PAGE_SIZE * i); - - vm_map_pageable(map, (vm_offset_t) kstack, - (vm_offset_t) kstack + UPAGES * PAGE_SIZE, TRUE); - - ptaddr = trunc_page((u_int) vtopte(kstack)); - vm_map_pageable(map, ptaddr, ptaddr + PAGE_SIZE, TRUE); + } + pmap_remove(pmap, (vm_offset_t) kstack, + (vm_offset_t) kstack + PAGE_SIZE * UPAGES); p->p_flag &= ~P_SWAPPING; p->p_swtime = 0; diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 6b3b006..9569a39 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_kern.c,v 1.19 1995/12/10 14:52:09 bde Exp $ + * $Id: vm_kern.c,v 1.20 1995/12/11 04:58:09 dyson Exp $ */ /* @@ -118,7 +118,7 @@ kmem_alloc_pageable(map, size) size = round_page(size); addr = vm_map_min(map); result = vm_map_find(map, NULL, (vm_offset_t) 0, - &addr, size, TRUE); + &addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); if (result != KERN_SUCCESS) { return (0); } @@ -157,7 +157,8 @@ kmem_alloc(map, size) } offset = addr - VM_MIN_KERNEL_ADDRESS; vm_object_reference(kernel_object); - vm_map_insert(map, kernel_object, offset, addr, addr + size); + vm_map_insert(map, kernel_object, offset, addr, addr + size, + VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); /* @@ -182,8 +183,7 @@ kmem_alloc(map, size) vm_page_t mem; while ((mem = vm_page_alloc(kernel_object, - OFF_TO_IDX(offset + i), - (VM_ALLOC_NORMAL|VM_ALLOC_ZERO))) == NULL) { + OFF_TO_IDX(offset + i), VM_ALLOC_ZERO)) == NULL) { VM_WAIT; } if ((mem->flags & PG_ZERO) == 0) @@ -249,7 +249,7 @@ kmem_suballoc(parent, min, max, size, pageable) *min = (vm_offset_t) vm_map_min(parent); ret = vm_map_find(parent, NULL, (vm_offset_t) 0, - min, size, TRUE); + min, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0); if (ret != KERN_SUCCESS) { printf("kmem_suballoc: bad status return of %d.\n", ret); panic("kmem_suballoc"); @@ -316,7 +316,8 @@ kmem_malloc(map, size, waitflag) } offset = addr - vm_map_min(kmem_map); vm_object_reference(kmem_object); - vm_map_insert(map, kmem_object, offset, addr, addr + size); + vm_map_insert(map, kmem_object, offset, addr, addr + size, + VM_PROT_ALL, VM_PROT_ALL, 0); /* * If we can wait, just mark the range as wired (will fault pages as @@ -376,6 +377,7 @@ kmem_malloc(map, size, waitflag) */ for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); + vm_page_wire(m); pmap_kenter(addr + i, VM_PAGE_TO_PHYS(m)); } vm_map_unlock(map); @@ -416,7 +418,7 @@ kmem_alloc_wait(map, size) vm_map_unlock(map); tsleep(map, PVM, "kmaw", 0); } - vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size); + vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(map); return (addr); } @@ -456,7 +458,7 @@ kmem_init(start, end) /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; (void) vm_map_insert(m, NULL, (vm_offset_t) 0, - VM_MIN_KERNEL_ADDRESS, start); + VM_MIN_KERNEL_ADDRESS, start, VM_PROT_ALL, VM_PROT_ALL, 0); /* ... and ending with the completion of the above `insert' */ vm_map_unlock(m); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 3bfb8ad..e1d9330 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.30 1995/12/14 09:54:59 phk Exp $ + * $Id: vm_map.c,v 1.31 1996/01/04 21:13:17 wollman Exp $ */ /* @@ -151,6 +151,7 @@ vm_offset_t kentry_data; vm_size_t kentry_data_size; static vm_map_entry_t kentry_free; static vm_map_t kmap_free; +extern char kstack[]; static int kentry_count; static vm_offset_t mapvm_start, mapvm, mapvmmax; @@ -241,12 +242,17 @@ vmspace_free(vm) panic("vmspace_free: attempt to free already freed vmspace"); if (--vm->vm_refcnt == 0) { + int s, i; + + pmap_remove(&vm->vm_pmap, (vm_offset_t) kstack, (vm_offset_t) kstack+UPAGES*PAGE_SIZE); + /* * Lock the map, to wait out all other references to it. * Delete all of the mappings and pages they hold, then call * the pmap module to reclaim anything left. */ vm_map_lock(&vm->vm_map); + vm_object_deallocate(vm->vm_upages_obj); (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); @@ -509,12 +515,14 @@ vm_map_deallocate(map) * Requires that the map be locked, and leaves it so. */ int -vm_map_insert(map, object, offset, start, end) +vm_map_insert(map, object, offset, start, end, prot, max, cow) vm_map_t map; vm_object_t object; vm_ooffset_t offset; vm_offset_t start; vm_offset_t end; + vm_prot_t prot, max; + int cow; { register vm_map_entry_t new_entry; register vm_map_entry_t prev_entry; @@ -558,8 +566,8 @@ vm_map_insert(map, object, offset, start, end) (prev_entry->is_a_map == FALSE) && (prev_entry->is_sub_map == FALSE) && (prev_entry->inheritance == VM_INHERIT_DEFAULT) && - (prev_entry->protection == VM_PROT_DEFAULT) && - (prev_entry->max_protection == VM_PROT_DEFAULT) && + (prev_entry->protection == prot) && + (prev_entry->max_protection == max) && (prev_entry->wired_count == 0)) { if (vm_object_coalesce(prev_entry->object.vm_object, @@ -591,13 +599,20 @@ vm_map_insert(map, object, offset, start, end) new_entry->object.vm_object = object; new_entry->offset = offset; - new_entry->copy_on_write = FALSE; - new_entry->needs_copy = FALSE; + if (cow & MAP_COPY_NEEDED) + new_entry->needs_copy = TRUE; + else + new_entry->needs_copy = FALSE; + + if (cow & MAP_COPY_ON_WRITE) + new_entry->copy_on_write = TRUE; + else + new_entry->copy_on_write = FALSE; if (map->is_main_map) { new_entry->inheritance = VM_INHERIT_DEFAULT; - new_entry->protection = VM_PROT_DEFAULT; - new_entry->max_protection = VM_PROT_DEFAULT; + new_entry->protection = prot; + new_entry->max_protection = max; new_entry->wired_count = 0; } /* @@ -611,7 +626,8 @@ vm_map_insert(map, object, offset, start, end) * Update the free space hint */ - if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start)) + if ((map->first_free == prev_entry) && + (prev_entry->end >= new_entry->start)) map->first_free = new_entry; return (KERN_SUCCESS); @@ -770,13 +786,15 @@ vm_map_findspace(map, start, length, addr) * */ int -vm_map_find(map, object, offset, addr, length, find_space) +vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow) vm_map_t map; vm_object_t object; vm_ooffset_t offset; vm_offset_t *addr; /* IN/OUT */ vm_size_t length; boolean_t find_space; + vm_prot_t prot, max; + int cow; { register vm_offset_t start; int result, s = 0; @@ -796,7 +814,8 @@ vm_map_find(map, object, offset, addr, length, find_space) } start = *addr; } - result = vm_map_insert(map, object, offset, start, start + length); + result = vm_map_insert(map, object, offset, + start, start + length, prot, max, cow); vm_map_unlock(map); if (map == kmem_map) @@ -1767,20 +1786,6 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) if (dst_entry->wired_count != 0) vm_map_entry_unwire(dst_map, dst_entry); - /* - * If we're dealing with a sharing map, we must remove the destination - * pages from all maps (since we cannot know which maps this sharing - * map belongs in). - */ - - if (dst_map->is_main_map) - pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end); - else - vm_object_pmap_remove(dst_entry->object.vm_object, - OFF_TO_IDX(dst_entry->offset), - OFF_TO_IDX(dst_entry->offset + - (dst_entry->end - dst_entry->start))); - if (src_entry->wired_count == 0) { boolean_t src_needs_copy; @@ -1800,17 +1805,21 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) if (!(su = src_map->is_main_map)) { su = (src_map->ref_count == 1); } +#ifdef VM_MAP_OLD if (su) { pmap_protect(src_map->pmap, src_entry->start, src_entry->end, src_entry->protection & ~VM_PROT_WRITE); } else { +#endif vm_object_pmap_copy(src_entry->object.vm_object, OFF_TO_IDX(src_entry->offset), OFF_TO_IDX(src_entry->offset + (src_entry->end - src_entry->start))); +#ifdef VM_MAP_OLD } +#endif } /* * Make a copy of the object. @@ -1932,7 +1941,8 @@ vmspace_fork(vm1) new_entry->is_a_map = FALSE; vm_map_entry_link(new_map, new_map->header.prev, new_entry); - vm_map_copy_entry(old_map, new_map, old_entry, new_entry); + vm_map_copy_entry(old_map, new_map, old_entry, + new_entry); break; } old_entry = old_entry->next; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 93cb5ec..e24eace 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.h,v 1.9 1995/12/11 04:58:14 dyson Exp $ + * $Id: vm_map.h,v 1.10 1995/12/14 09:55:00 phk Exp $ */ /* @@ -148,6 +148,7 @@ struct vmspace { struct pmap vm_pmap; /* private physical map */ int vm_refcnt; /* number of references */ caddr_t vm_shm; /* SYS5 shared memory private data XXX */ + vm_object_t vm_upages_obj; /* UPAGES object */ /* we copy from vm_startcopy to the end of the structure on fork */ #define vm_startcopy vm_rssize segsz_t vm_rssize; /* current resident set size in pages */ @@ -202,6 +203,12 @@ typedef struct { #define MAX_KMAP 10 #define MAX_KMAPENT 128 +/* + * Copy-on-write flags for vm_map operations + */ +#define MAP_COPY_NEEDED 0x1 +#define MAP_COPY_ON_WRITE 0x2 + #ifdef KERNEL extern vm_offset_t kentry_data; extern vm_size_t kentry_data_size; @@ -212,11 +219,11 @@ struct pmap; vm_map_t vm_map_create __P((struct pmap *, vm_offset_t, vm_offset_t, boolean_t)); void vm_map_deallocate __P((vm_map_t)); int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t)); -int vm_map_find __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, boolean_t)); +int vm_map_find __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, boolean_t, vm_prot_t, vm_prot_t, int)); int vm_map_findspace __P((vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *)); int vm_map_inherit __P((vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t)); void vm_map_init __P((struct vm_map *, vm_offset_t, vm_offset_t, boolean_t)); -int vm_map_insert __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t)); +int vm_map_insert __P((vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int)); int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *, vm_pindex_t *, vm_prot_t *, boolean_t *, boolean_t *)); void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t)); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 6579961..c68f5f4 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$ * * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94 - * $Id: vm_mmap.c,v 1.33 1995/12/13 12:28:39 dyson Exp $ + * $Id: vm_mmap.c,v 1.34 1995/12/17 07:19:57 bde Exp $ */ /* @@ -70,6 +70,7 @@ #include <vm/vm_pager.h> #include <vm/vm_pageout.h> #include <vm/vm_extern.h> +#include <vm/vm_kern.h> #ifndef _SYS_SYSPROTO_H_ struct sbrk_args { @@ -604,11 +605,12 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) vm_ooffset_t foff; { boolean_t fitit; - vm_object_t object; + vm_object_t object, object2; struct vnode *vp = NULL; objtype_t type; int rv = KERN_SUCCESS; - vm_size_t objsize; + vm_ooffset_t objsize; + int docow; struct proc *p = curproc; if (size == 0) @@ -659,69 +661,60 @@ vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff) error = VOP_GETATTR(vp, &vat, p->p_ucred, p); if (error) return (error); - objsize = vat.va_size; + objsize = round_page(vat.va_size); type = OBJT_VNODE; } } - object = vm_pager_allocate(type, handle, objsize, prot, foff); + object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff); if (object == NULL) return (type == OBJT_DEVICE ? EINVAL : ENOMEM); - rv = vm_map_find(map, object, foff, addr, size, fitit); + object2 = NULL; + docow = 0; + if ((flags & (MAP_ANON|MAP_SHARED)) == 0 && (type != OBJT_DEVICE)) { + docow = MAP_COPY_ON_WRITE; + if (objsize < size) { + object2 = vm_object_allocate( OBJT_DEFAULT, + OFF_TO_IDX(size - (foff & ~(PAGE_SIZE - 1)))); + object2->backing_object = object; + object2->backing_object_offset = foff; + TAILQ_INSERT_TAIL(&object->shadow_head, + object2, shadow_list); + } else { + docow |= MAP_COPY_NEEDED; + } + } + if (object2) + rv = vm_map_find(map, object2, 0, addr, size, fitit, + prot, maxprot, docow); + else + rv = vm_map_find(map, object, foff, addr, size, fitit, + prot, maxprot, docow); + + if (rv != KERN_SUCCESS) { /* * Lose the object reference. Will destroy the * object if it's an unnamed anonymous mapping * or named anonymous without other references. */ - vm_object_deallocate(object); + if (object2) + vm_object_deallocate(object2); + else + vm_object_deallocate(object); goto out; } /* - * mmap a COW regular file - */ - if ((flags & (MAP_ANON|MAP_SHARED)) == 0 && (type != OBJT_DEVICE)) { - vm_map_entry_t entry; - if (!vm_map_lookup_entry(map, *addr, &entry)) { - panic("vm_mmap: missing map entry!!!"); - } - entry->copy_on_write = TRUE; - /* - * This will create the processes private object on - * an as needed basis. - */ - entry->needs_copy = TRUE; - - /* - * set pages COW and protect for read access only - */ - vm_object_pmap_copy(object, foff, foff + size); - - } - - /* * "Pre-fault" resident pages. */ - if ((type == OBJT_VNODE) && (map->pmap != NULL)) { + if ((map != kernel_map) && + (type == OBJT_VNODE) && (map->pmap != NULL)) { pmap_object_init_pt(map->pmap, *addr, object, (vm_pindex_t) OFF_TO_IDX(foff), size); } /* - * Correct protection (default is VM_PROT_ALL). If maxprot is - * different than prot, we must set both explicitly. - */ - rv = KERN_SUCCESS; - if (maxprot != VM_PROT_ALL) - rv = vm_map_protect(map, *addr, *addr + size, maxprot, TRUE); - if (rv == KERN_SUCCESS && prot != maxprot) - rv = vm_map_protect(map, *addr, *addr + size, prot, FALSE); - if (rv != KERN_SUCCESS) { - (void) vm_map_remove(map, *addr, *addr + size); - goto out; - } - /* * Shared memory is also shared with children. */ if (flags & MAP_SHARED) { diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 7ba53e8..088d8b6 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.61 1996/01/04 18:32:31 davidg Exp $ + * $Id: vm_object.c,v 1.62 1996/01/04 21:13:20 wollman Exp $ */ /* @@ -442,11 +442,18 @@ vm_object_page_clean(object, start, end, syncio, lockflag) boolean_t syncio; boolean_t lockflag; { - register vm_page_t p; + register vm_page_t p, np, tp; register vm_offset_t tstart, tend; + vm_pindex_t pi; int s; struct vnode *vp; int runlen; + int maxf; + int chkb; + int maxb; + int i; + vm_page_t maf[vm_pageout_page_count]; + vm_page_t mab[vm_pageout_page_count]; vm_page_t ma[vm_pageout_page_count]; if (object->type != OBJT_VNODE || @@ -468,62 +475,99 @@ vm_object_page_clean(object, start, end, syncio, lockflag) if ((tstart == 0) && (tend == object->size)) { object->flags &= ~(OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); } - - runlen = 0; - for(;tstart < tend; tstart += 1) { -relookup: - p = vm_page_lookup(object, tstart); - if (!p) { - if (runlen > 0) { - vm_pageout_flush(ma, runlen, syncio); - runlen = 0; - } + for(p = object->memq.tqh_first; p; p = p->listq.tqe_next) + p->flags |= PG_CLEANCHK; + +rescan: + for(p = object->memq.tqh_first; p; p = np) { + np = p->listq.tqe_next; + + pi = p->pindex; + if (((p->flags & PG_CLEANCHK) == 0) || + (pi < tstart) || (pi >= tend) || + (p->valid == 0) || (p->queue == PQ_CACHE)) { + p->flags &= ~PG_CLEANCHK; continue; } - if ((p->valid == 0) || (p->flags & PG_CACHE)) { - if (runlen > 0) { - vm_pageout_flush(ma, runlen, syncio); - runlen = 0; - } + + vm_page_test_dirty(p); + if ((p->dirty & p->valid) == 0) { + p->flags &= ~PG_CLEANCHK; continue; } - vm_page_protect(p, VM_PROT_READ); - s = splhigh(); - while ((p->flags & PG_BUSY) || p->busy) { - if (runlen > 0) { - splx(s); - vm_pageout_flush(ma, runlen, syncio); - runlen = 0; - goto relookup; - } + if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED|PG_REFERENCED; tsleep(p, PVM, "vpcwai", 0); splx(s); - goto relookup; + goto rescan; } splx(s); + + maxf = 0; + for(i=1;i<vm_pageout_page_count;i++) { + if (tp = vm_page_lookup(object, pi + i)) { + if ((tp->flags & PG_BUSY) || + (tp->flags & PG_CLEANCHK) == 0) + break; + vm_page_test_dirty(tp); + if ((tp->dirty & tp->valid) == 0) { + tp->flags &= ~PG_CLEANCHK; + break; + } + maf[ i - 1 ] = tp; + maxf++; + continue; + } + break; + } - if (p->dirty == 0) - vm_page_test_dirty(p); - - if ((p->valid & p->dirty) != 0) { - ma[runlen] = p; - p->flags |= PG_BUSY; - runlen++; - if (runlen >= vm_pageout_page_count) { - vm_pageout_flush(ma, runlen, syncio); - runlen = 0; + maxb = 0; + chkb = vm_pageout_page_count - maxf; + if (chkb) { + for(i = 1; i < chkb;i++) { + if (tp = vm_page_lookup(object, pi - i)) { + if ((tp->flags & PG_BUSY) || + (tp->flags & PG_CLEANCHK) == 0) + break; + vm_page_test_dirty(tp); + if ((tp->dirty & tp->valid) == 0) { + tp->flags &= ~PG_CLEANCHK; + break; + } + mab[ i - 1 ] = tp; + maxb++; + continue; + } + break; } - } else if (runlen > 0) { - vm_pageout_flush(ma, runlen, syncio); - runlen = 0; } - - } - if (runlen > 0) { - vm_pageout_flush(ma, runlen, syncio); + + for(i=0;i<maxb;i++) { + int index = (maxb - i) - 1; + ma[index] = mab[i]; + ma[index]->flags |= PG_BUSY; + ma[index]->flags &= ~PG_CLEANCHK; + vm_page_protect(ma[index], VM_PROT_READ); + } + vm_page_protect(p, VM_PROT_READ); + p->flags |= PG_BUSY; + p->flags &= ~PG_CLEANCHK; + ma[maxb] = p; + for(i=0;i<maxf;i++) { + int index = (maxb + i) + 1; + ma[index] = maf[i]; + ma[index]->flags |= PG_BUSY; + ma[index]->flags &= ~PG_CLEANCHK; + vm_page_protect(ma[index], VM_PROT_READ); + } + runlen = maxb + maxf + 1; +/* + printf("maxb: %d, maxf: %d, runlen: %d, offset: %d\n", maxb, maxf, runlen, ma[0]->pindex); +*/ + vm_pageout_flush(ma, runlen, 0); + goto rescan; } VOP_FSYNC(vp, NULL, syncio, curproc); @@ -619,7 +663,8 @@ vm_object_pmap_remove(object, start, end) if (object == NULL) return; for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) { - vm_page_protect(p, VM_PROT_NONE); + if (p->pindex >= start && p->pindex < end) + vm_page_protect(p, VM_PROT_NONE); } } @@ -763,8 +808,8 @@ vm_object_qcollapse(object) vm_page_t next; next = p->listq.tqe_next; - if ((p->flags & (PG_BUSY | PG_FICTITIOUS | PG_CACHE)) || - !p->valid || p->hold_count || p->wire_count || p->busy) { + if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) || + (p->queue == PQ_CACHE) || !p->valid || p->hold_count || p->wire_count || p->busy) { p = next; continue; } @@ -1104,12 +1149,13 @@ again: if (size > 4 || size >= object->size / 4) { for (p = object->memq.tqh_first; p != NULL; p = next) { next = p->listq.tqe_next; + if (p->wire_count != 0) { + vm_page_protect(p, VM_PROT_NONE); + p->valid = 0; + continue; + } if ((start <= p->pindex) && (p->pindex < end)) { s = splhigh(); - if (p->bmapped) { - splx(s); - continue; - } if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep(p, PVM, "vmopar", 0); @@ -1129,12 +1175,15 @@ again: } } else { while (size > 0) { - while ((p = vm_page_lookup(object, start)) != 0) { - s = splhigh(); - if (p->bmapped) { - splx(s); - break; + if ((p = vm_page_lookup(object, start)) != 0) { + if (p->wire_count != 0) { + p->valid = 0; + vm_page_protect(p, VM_PROT_NONE); + start += 1; + size -= 1; + continue; } + s = splhigh(); if ((p->flags & PG_BUSY) || p->busy) { p->flags |= PG_WANTED; tsleep(p, PVM, "vmopar", 0); @@ -1144,8 +1193,11 @@ again: splx(s); if (clean_only) { vm_page_test_dirty(p); - if (p->valid & p->dirty) + if (p->valid & p->dirty) { + start += 1; + size -= 1; continue; + } } vm_page_protect(p, VM_PROT_NONE); PAGE_WAKEUP(p); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 325b5d5..288f140 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - * $Id: vm_page.c,v 1.44 1995/12/17 07:19:58 bde Exp $ + * $Id: vm_page.c,v 1.45 1996/01/04 21:13:23 wollman Exp $ */ /* @@ -105,6 +105,20 @@ struct pglist vm_page_queue_active; struct pglist vm_page_queue_inactive; struct pglist vm_page_queue_cache; +int no_queue; + +struct { + struct pglist *pl; + int *cnt; +} vm_page_queues[PQ_CACHE+1] = { + {NULL, &no_queue}, + { &vm_page_queue_free, &cnt.v_free_count}, + { &vm_page_queue_zero, &cnt.v_free_count}, + { &vm_page_queue_inactive, &cnt.v_inactive_count}, + { &vm_page_queue_active, &cnt.v_active_count}, + { &vm_page_queue_cache, &cnt.v_cache_count} +}; + vm_page_t vm_page_array; static int vm_page_array_size; long first_page; @@ -229,7 +243,7 @@ vm_page_startup(starta, enda, vaddr) vm_page_buckets = (struct pglist *) vaddr; bucket = vm_page_buckets; if (vm_page_bucket_count == 0) { - vm_page_bucket_count = 1; + vm_page_bucket_count = 2; while (vm_page_bucket_count < atop(total)) vm_page_bucket_count <<= 1; } @@ -333,7 +347,8 @@ vm_page_startup(starta, enda, vaddr) ++cnt.v_page_count; ++cnt.v_free_count; m = PHYS_TO_VM_PAGE(pa); - m->flags = PG_FREE; + m->queue = PQ_FREE; + m->flags = 0; m->phys_addr = pa; TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); pa += PAGE_SIZE; @@ -368,36 +383,36 @@ vm_page_hash(object, pindex) */ inline void -vm_page_insert(mem, object, pindex) - register vm_page_t mem; +vm_page_insert(m, object, pindex) + register vm_page_t m; register vm_object_t object; register vm_pindex_t pindex; { register struct pglist *bucket; - if (mem->flags & PG_TABLED) + if (m->flags & PG_TABLED) panic("vm_page_insert: already inserted"); /* * Record the object/offset pair in this page */ - mem->object = object; - mem->pindex = pindex; + m->object = object; + m->pindex = pindex; /* * Insert it into the object_object/offset hash table */ bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; - TAILQ_INSERT_TAIL(bucket, mem, hashq); + TAILQ_INSERT_TAIL(bucket, m, hashq); /* * Now link into the object's list of backed pages. */ - TAILQ_INSERT_TAIL(&object->memq, mem, listq); - mem->flags |= PG_TABLED; + TAILQ_INSERT_TAIL(&object->memq, m, listq); + m->flags |= PG_TABLED; /* * And show that the object has one more resident page. @@ -417,34 +432,34 @@ vm_page_insert(mem, object, pindex) */ inline void -vm_page_remove(mem) - register vm_page_t mem; +vm_page_remove(m) + register vm_page_t m; { register struct pglist *bucket; - if (!(mem->flags & PG_TABLED)) + if (!(m->flags & PG_TABLED)) return; /* * Remove from the object_object/offset hash table */ - bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->pindex)]; - TAILQ_REMOVE(bucket, mem, hashq); + bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; + TAILQ_REMOVE(bucket, m, hashq); /* * Now remove from the object's list of backed pages. */ - TAILQ_REMOVE(&mem->object->memq, mem, listq); + TAILQ_REMOVE(&m->object->memq, m, listq); /* * And show that the object has one fewer resident page. */ - mem->object->resident_page_count--; + m->object->resident_page_count--; - mem->flags &= ~PG_TABLED; + m->flags &= ~PG_TABLED; } /* @@ -461,7 +476,7 @@ vm_page_lookup(object, pindex) register vm_object_t object; register vm_pindex_t pindex; { - register vm_page_t mem; + register vm_page_t m; register struct pglist *bucket; int s; @@ -472,10 +487,10 @@ vm_page_lookup(object, pindex) bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; s = splhigh(); - for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) { - if ((mem->object == object) && (mem->pindex == pindex)) { + for (m = bucket->tqh_first; m != NULL; m = m->hashq.tqe_next) { + if ((m->object == object) && (m->pindex == pindex)) { splx(s); - return (mem); + return (m); } } @@ -492,16 +507,16 @@ vm_page_lookup(object, pindex) * The object must be locked. */ void -vm_page_rename(mem, new_object, new_pindex) - register vm_page_t mem; +vm_page_rename(m, new_object, new_pindex) + register vm_page_t m; register vm_object_t new_object; vm_pindex_t new_pindex; { int s; s = splhigh(); - vm_page_remove(mem); - vm_page_insert(mem, new_object, new_pindex); + vm_page_remove(m); + vm_page_insert(m, new_object, new_pindex); splx(s); } @@ -509,28 +524,17 @@ vm_page_rename(mem, new_object, new_pindex) * vm_page_unqueue must be called at splhigh(); */ static inline void -vm_page_unqueue(vm_page_t mem) +vm_page_unqueue(vm_page_t m) { - int origflags; - - origflags = mem->flags; - - if ((origflags & (PG_ACTIVE|PG_INACTIVE|PG_CACHE)) == 0) + int queue = m->queue; + if (queue == PQ_NONE) return; - - if (origflags & PG_ACTIVE) { - TAILQ_REMOVE(&vm_page_queue_active, mem, pageq); - cnt.v_active_count--; - mem->flags &= ~PG_ACTIVE; - } else if (origflags & PG_INACTIVE) { - TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq); - cnt.v_inactive_count--; - mem->flags &= ~PG_INACTIVE; - } else if (origflags & PG_CACHE) { - TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); - cnt.v_cache_count--; - mem->flags &= ~PG_CACHE; - if (cnt.v_cache_count + cnt.v_free_count < cnt.v_free_reserved) + m->queue = PQ_NONE; + TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); + --(*vm_page_queues[queue].cnt); + if (queue == PQ_CACHE) { + if ((cnt.v_cache_count + cnt.v_free_count) < + (cnt.v_free_min + cnt.v_cache_min)) pagedaemon_wakeup(); } return; @@ -546,7 +550,6 @@ vm_page_unqueue(vm_page_t mem) * VM_ALLOC_NORMAL normal process request * VM_ALLOC_SYSTEM system *really* needs a page * VM_ALLOC_INTERRUPT interrupt time request - * or in: * VM_ALLOC_ZERO zero page * * Object must be locked. @@ -557,12 +560,13 @@ vm_page_alloc(object, pindex, page_req) vm_pindex_t pindex; int page_req; { - register vm_page_t mem; + register vm_page_t m; + int queue; int s; #ifdef DIAGNOSTIC - mem = vm_page_lookup(object, pindex); - if (mem) + m = vm_page_lookup(object, pindex); + if (m) panic("vm_page_alloc: page already allocated"); #endif @@ -572,41 +576,36 @@ vm_page_alloc(object, pindex, page_req) s = splhigh(); - switch ((page_req & ~(VM_ALLOC_ZERO))) { + switch (page_req) { + case VM_ALLOC_NORMAL: if (cnt.v_free_count >= cnt.v_free_reserved) { - if (page_req & VM_ALLOC_ZERO) { - mem = vm_page_queue_zero.tqh_first; - if (mem) { - --vm_page_zero_count; - TAILQ_REMOVE(&vm_page_queue_zero, mem, pageq); - mem->flags = PG_BUSY|PG_ZERO; - } else { - mem = vm_page_queue_free.tqh_first; - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - mem->flags = PG_BUSY; - } - } else { - mem = vm_page_queue_free.tqh_first; - if (mem) { - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - mem->flags = PG_BUSY; - } else { - --vm_page_zero_count; - mem = vm_page_queue_zero.tqh_first; - TAILQ_REMOVE(&vm_page_queue_zero, mem, pageq); - mem->flags = PG_BUSY|PG_ZERO; - } + m = vm_page_queue_free.tqh_first; + if (m == NULL) { + --vm_page_zero_count; + m = vm_page_queue_zero.tqh_first; } - cnt.v_free_count--; } else { - mem = vm_page_queue_cache.tqh_first; - if (mem != NULL) { - TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); - vm_page_remove(mem); - mem->flags = PG_BUSY; - cnt.v_cache_count--; + m = vm_page_queue_cache.tqh_first; + if (m == NULL) { + splx(s); + pagedaemon_wakeup(); + return (NULL); + } + } + break; + + case VM_ALLOC_ZERO: + if (cnt.v_free_count >= cnt.v_free_reserved) { + m = vm_page_queue_zero.tqh_first; + if (m) { + --vm_page_zero_count; } else { + m = vm_page_queue_free.tqh_first; + } + } else { + m = vm_page_queue_cache.tqh_first; + if (m == NULL) { splx(s); pagedaemon_wakeup(); return (NULL); @@ -618,38 +617,14 @@ vm_page_alloc(object, pindex, page_req) if ((cnt.v_free_count >= cnt.v_free_reserved) || ((cnt.v_cache_count == 0) && (cnt.v_free_count >= cnt.v_interrupt_free_min))) { - if (page_req & VM_ALLOC_ZERO) { - mem = vm_page_queue_zero.tqh_first; - if (mem) { + m = vm_page_queue_free.tqh_first; + if (m == NULL) { --vm_page_zero_count; - TAILQ_REMOVE(&vm_page_queue_zero, mem, pageq); - mem->flags = PG_BUSY|PG_ZERO; - } else { - mem = vm_page_queue_free.tqh_first; - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - mem->flags = PG_BUSY; + m = vm_page_queue_zero.tqh_first; } - } else { - mem = vm_page_queue_free.tqh_first; - if (mem) { - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - mem->flags = PG_BUSY; - } else { - --vm_page_zero_count; - mem = vm_page_queue_zero.tqh_first; - TAILQ_REMOVE(&vm_page_queue_zero, mem, pageq); - mem->flags = PG_BUSY|PG_ZERO; - } - } - cnt.v_free_count--; } else { - mem = vm_page_queue_cache.tqh_first; - if (mem != NULL) { - TAILQ_REMOVE(&vm_page_queue_cache, mem, pageq); - vm_page_remove(mem); - mem->flags = PG_BUSY; - cnt.v_cache_count--; - } else { + m = vm_page_queue_cache.tqh_first; + if (m == NULL) { splx(s); pagedaemon_wakeup(); return (NULL); @@ -659,21 +634,15 @@ vm_page_alloc(object, pindex, page_req) case VM_ALLOC_INTERRUPT: if (cnt.v_free_count > 0) { - mem = vm_page_queue_free.tqh_first; - if (mem) { - TAILQ_REMOVE(&vm_page_queue_free, mem, pageq); - mem->flags = PG_BUSY; - } else { + m = vm_page_queue_free.tqh_first; + if (m == NULL) { --vm_page_zero_count; - mem = vm_page_queue_zero.tqh_first; - TAILQ_REMOVE(&vm_page_queue_zero, mem, pageq); - mem->flags = PG_BUSY|PG_ZERO; + m = vm_page_queue_zero.tqh_first; } - cnt.v_free_count--; } else { splx(s); pagedaemon_wakeup(); - return NULL; + return (NULL); } break; @@ -681,16 +650,27 @@ vm_page_alloc(object, pindex, page_req) panic("vm_page_alloc: invalid allocation class"); } - mem->wire_count = 0; - mem->hold_count = 0; - mem->act_count = 0; - mem->busy = 0; - mem->valid = 0; - mem->dirty = 0; - mem->bmapped = 0; + queue = m->queue; + TAILQ_REMOVE(vm_page_queues[queue].pl, m, pageq); + --(*vm_page_queues[queue].cnt); + if (queue == PQ_ZERO) { + m->flags = PG_ZERO|PG_BUSY; + } else if (queue == PQ_CACHE) { + vm_page_remove(m); + m->flags = PG_BUSY; + } else { + m->flags = PG_BUSY; + } + m->wire_count = 0; + m->hold_count = 0; + m->act_count = 0; + m->busy = 0; + m->valid = 0; + m->dirty = 0; + m->queue = PQ_NONE; /* XXX before splx until vm_page_insert is safe */ - vm_page_insert(mem, object, pindex); + vm_page_insert(m, object, pindex); splx(s); @@ -698,11 +678,12 @@ vm_page_alloc(object, pindex, page_req) * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ - if (((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) || - (cnt.v_free_count < cnt.v_pageout_free_min)) + if (((cnt.v_free_count + cnt.v_cache_count) < + (cnt.v_free_min + cnt.v_cache_min)) || + (cnt.v_free_count < cnt.v_pageout_free_min)) pagedaemon_wakeup(); - return (mem); + return (m); } vm_offset_t @@ -727,7 +708,7 @@ again: */ for (i = start; i < cnt.v_page_count; i++) { phys = VM_PAGE_TO_PHYS(&pga[i]); - if (((pga[i].flags & PG_FREE) == PG_FREE) && + if ((pga[i].queue == PQ_FREE) && (phys >= low) && (phys < high) && ((phys & (alignment - 1)) == 0)) break; @@ -736,7 +717,8 @@ again: /* * If the above failed or we will exceed the upper bound, fail. */ - if ((i == cnt.v_page_count) || ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { + if ((i == cnt.v_page_count) || + ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { splx(s); return (NULL); } @@ -747,8 +729,8 @@ again: */ for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { if ((VM_PAGE_TO_PHYS(&pga[i]) != - (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || - ((pga[i].flags & PG_FREE) != PG_FREE)) { + (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || + (pga[i].queue != PQ_FREE)) { start++; goto again; } @@ -771,8 +753,8 @@ again: m->dirty = 0; m->wire_count = 0; m->act_count = 0; - m->bmapped = 0; m->busy = 0; + m->queue = PQ_NONE; vm_page_insert(m, kernel_object, OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); vm_page_wire(m); @@ -793,38 +775,40 @@ again: * Object and page must be locked prior to entry. */ void -vm_page_free(mem) - register vm_page_t mem; +vm_page_free(m) + register vm_page_t m; { int s; - int flags; + int flags = m->flags; s = splhigh(); - vm_page_remove(mem); - vm_page_unqueue(mem); - - flags = mem->flags; - if (mem->bmapped || mem->busy || flags & (PG_BUSY|PG_FREE)) { - if (flags & PG_FREE) + if (m->busy || (flags & PG_BUSY) || (m->queue == PQ_FREE)) { + printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d)\n", + m->pindex, m->busy, (flags & PG_BUSY) ? 1 : 0); + if (m->queue == PQ_FREE) panic("vm_page_free: freeing free page"); - printf("vm_page_free: pindex(%ld), bmapped(%d), busy(%d), PG_BUSY(%d)\n", - mem->pindex, mem->bmapped, mem->busy, (flags & PG_BUSY) ? 1 : 0); - panic("vm_page_free: freeing busy page"); + else + panic("vm_page_free: freeing busy page"); } + vm_page_remove(m); + vm_page_unqueue(m); + +/* if ((flags & PG_WANTED) != 0) - wakeup(mem); + wakeup(m); +*/ if ((flags & PG_FICTITIOUS) == 0) { - if (mem->wire_count) { - if (mem->wire_count > 1) { - printf("vm_page_free: wire count > 1 (%d)", mem->wire_count); + if (m->wire_count) { + if (m->wire_count > 1) { + printf("vm_page_free: wire count > 1 (%d)", m->wire_count); panic("vm_page_free: invalid wire count"); } cnt.v_wire_count--; - mem->wire_count = 0; + m->wire_count = 0; } - mem->flags |= PG_FREE; - TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq); + m->queue = PQ_FREE; + TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq); splx(s); /* * if pageout daemon needs pages, then tell it that there are @@ -862,19 +846,19 @@ vm_page_free(mem) * The page queues must be locked. */ void -vm_page_wire(mem) - register vm_page_t mem; +vm_page_wire(m) + register vm_page_t m; { int s; - if (mem->wire_count == 0) { + if (m->wire_count == 0) { s = splhigh(); - vm_page_unqueue(mem); + vm_page_unqueue(m); splx(s); cnt.v_wire_count++; } - mem->flags |= PG_WRITEABLE|PG_MAPPED; - mem->wire_count++; + m->wire_count++; + m->flags |= PG_MAPPED; } /* @@ -886,20 +870,23 @@ vm_page_wire(mem) * The page queues must be locked. */ void -vm_page_unwire(mem) - register vm_page_t mem; +vm_page_unwire(m) + register vm_page_t m; { int s; s = splhigh(); - if (mem->wire_count) - mem->wire_count--; - if (mem->wire_count == 0) { - TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq); - cnt.v_active_count++; - mem->flags |= PG_ACTIVE; + if (m->wire_count > 0) + m->wire_count--; + + if (m->wire_count == 0) { cnt.v_wire_count--; + TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); + m->queue = PQ_ACTIVE; + if( m->act_count < ACT_MAX) + m->act_count += 1; + cnt.v_active_count++; } splx(s); } @@ -918,17 +905,17 @@ vm_page_activate(m) int s; s = splhigh(); - if (m->flags & PG_ACTIVE) + if (m->queue == PQ_ACTIVE) panic("vm_page_activate: already active"); - if (m->flags & PG_CACHE) + if (m->queue == PQ_CACHE) cnt.v_reactivated++; vm_page_unqueue(m); if (m->wire_count == 0) { TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); - m->flags |= PG_ACTIVE; + m->queue = PQ_ACTIVE; if (m->act_count < 5) m->act_count = 5; else if( m->act_count < ACT_MAX) @@ -960,15 +947,16 @@ vm_page_deactivate(m) * we need to put them on the inactive queue also, otherwise we lose * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. */ + if (m->queue == PQ_INACTIVE) + return; spl = splhigh(); - if (!(m->flags & PG_INACTIVE) && m->wire_count == 0 && - m->hold_count == 0) { - if (m->flags & PG_CACHE) + if (m->wire_count == 0 && m->hold_count == 0) { + if (m->queue == PQ_CACHE) cnt.v_reactivated++; vm_page_unqueue(m); TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); - m->flags |= PG_INACTIVE; + m->queue = PQ_INACTIVE; cnt.v_inactive_count++; m->act_count = 0; } @@ -986,16 +974,16 @@ vm_page_cache(m) { int s; - if ((m->flags & (PG_CACHE | PG_BUSY)) || m->busy || m->wire_count || - m->bmapped) + if ((m->flags & PG_BUSY) || m->busy || m->wire_count) + return; + if (m->queue == PQ_CACHE) return; + vm_page_protect(m, VM_PROT_NONE); s = splhigh(); vm_page_unqueue(m); - vm_page_protect(m, VM_PROT_NONE); - TAILQ_INSERT_TAIL(&vm_page_queue_cache, m, pageq); - m->flags |= PG_CACHE; + m->queue = PQ_CACHE; cnt.v_cache_count++; if ((cnt.v_free_count + cnt.v_cache_count) == cnt.v_free_min) { wakeup(&cnt.v_free_count); @@ -1005,7 +993,6 @@ vm_page_cache(m) wakeup(&vm_pageout_pages_needed); vm_pageout_pages_needed = 0; } - splx(s); } diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 031bf3b..b67c9a5 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_page.h,v 1.22 1995/11/20 12:19:32 phk Exp $ + * $Id: vm_page.h,v 1.23 1995/12/11 04:58:26 dyson Exp $ */ /* @@ -107,34 +107,39 @@ struct vm_page { vm_object_t object; /* which object am I in (O,P) */ vm_pindex_t pindex; /* offset into object (O,P) */ vm_offset_t phys_addr; /* physical address of page */ - + u_short queue:4, /* page queue index */ + flags:12; /* see below */ u_short wire_count; /* wired down maps refs (P) */ - u_short flags; /* see below */ short hold_count; /* page hold count */ - u_short act_count; /* page usage count */ - u_short bmapped; /* number of buffers mapped */ - u_short busy; /* page busy count */ - u_short valid; /* map of valid DEV_BSIZE chunks */ - u_short dirty; /* map of dirty DEV_BSIZE chunks */ + u_char act_count; /* page usage count */ + u_char busy; /* page busy count */ + /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ + /* so, on normal X86 kernels, they must be at least 8 bits wide */ + u_char valid; /* map of valid DEV_BSIZE chunks */ + u_char dirty; /* map of dirty DEV_BSIZE chunks */ }; +#define PQ_NONE 0 +#define PQ_FREE 1 +#define PQ_ZERO 2 +#define PQ_INACTIVE 3 +#define PQ_ACTIVE 4 +#define PQ_CACHE 5 + /* * These are the flags defined for vm_page. * * Note: PG_FILLED and PG_DIRTY are added for the filesystems. */ -#define PG_INACTIVE 0x0001 /* page is in inactive list (P) */ -#define PG_ACTIVE 0x0002 /* page is in active list (P) */ -#define PG_BUSY 0x0010 /* page is in transit (O) */ -#define PG_WANTED 0x0020 /* someone is waiting for page (O) */ -#define PG_TABLED 0x0040 /* page is in VP table (O) */ -#define PG_FICTITIOUS 0x0100 /* physical page doesn't exist (O) */ -#define PG_WRITEABLE 0x0200 /* page is mapped writeable */ -#define PG_MAPPED 0x0400 /* page is mapped */ -#define PG_ZERO 0x0800 /* page is zeroed */ -#define PG_REFERENCED 0x1000 /* page has been referenced */ -#define PG_CACHE 0x4000 /* On VMIO cache */ -#define PG_FREE 0x8000 /* page is in free list */ +#define PG_BUSY 0x01 /* page is in transit (O) */ +#define PG_WANTED 0x02 /* someone is waiting for page (O) */ +#define PG_TABLED 0x04 /* page is in VP table (O) */ +#define PG_FICTITIOUS 0x08 /* physical page doesn't exist (O) */ +#define PG_WRITEABLE 0x10 /* page is mapped writeable */ +#define PG_MAPPED 0x20 /* page is mapped */ +#define PG_ZERO 0x40 /* page is zeroed */ +#define PG_REFERENCED 0x80 /* page has been referenced */ +#define PG_CLEANCHK 0x100 /* page has been checked for cleaning */ /* * Misc constants. @@ -229,7 +234,7 @@ extern vm_offset_t last_phys_addr; /* physical address for last_page */ #define VM_ALLOC_NORMAL 0 #define VM_ALLOC_INTERRUPT 1 #define VM_ALLOC_SYSTEM 2 -#define VM_ALLOC_ZERO 0x80 +#define VM_ALLOC_ZERO 3 void vm_page_activate __P((vm_page_t)); vm_page_t vm_page_alloc __P((vm_object_t, vm_pindex_t, int)); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 7946335..ef7dbe9 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.62 1995/12/11 04:58:28 dyson Exp $ + * $Id: vm_pageout.c,v 1.63 1995/12/14 09:55:09 phk Exp $ */ /* @@ -234,13 +234,13 @@ vm_pageout_clean(m, sync) } p = vm_page_lookup(object, pindex + i); if (p) { - if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy) { + if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) { forward_okay = FALSE; goto do_backward; } vm_page_test_dirty(p); if ((p->dirty & p->valid) != 0 && - ((p->flags & PG_INACTIVE) || + ((p->queue == PQ_INACTIVE) || (sync == VM_PAGEOUT_FORCE)) && (p->wire_count == 0) && (p->hold_count == 0)) { @@ -268,13 +268,13 @@ do_backward: } p = vm_page_lookup(object, pindex - i); if (p) { - if ((p->flags & (PG_BUSY|PG_CACHE)) || p->busy) { + if ((p->queue == PQ_CACHE) || (p->flags & PG_BUSY) || p->busy) { backward_okay = FALSE; continue; } vm_page_test_dirty(p); if ((p->dirty & p->valid) != 0 && - ((p->flags & PG_INACTIVE) || + ((p->queue == PQ_INACTIVE) || (sync == VM_PAGEOUT_FORCE)) && (p->wire_count == 0) && (p->hold_count == 0)) { @@ -348,7 +348,7 @@ vm_pageout_flush(mc, count, sync) * page so it doesn't clog the inactive list. (We * will try paging out it again later). */ - if (mt->flags & PG_INACTIVE) + if (mt->queue == PQ_INACTIVE) vm_page_activate(mt); break; case VM_PAGER_AGAIN: @@ -364,13 +364,6 @@ vm_pageout_flush(mc, count, sync) */ if (pageout_status[i] != VM_PAGER_PEND) { vm_object_pip_wakeup(object); - if ((mt->flags & (PG_REFERENCED|PG_WANTED)) || - pmap_is_referenced(VM_PAGE_TO_PHYS(mt))) { - pmap_clear_reference(VM_PAGE_TO_PHYS(mt)); - mt->flags &= ~PG_REFERENCED; - if (mt->flags & PG_INACTIVE) - vm_page_activate(mt); - } PAGE_WAKEUP(mt); } } @@ -427,6 +420,7 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) if (p->wire_count != 0 || p->hold_count != 0 || p->busy != 0 || + (p->flags & PG_BUSY) || !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) { p = next; continue; @@ -435,9 +429,9 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) * if a page is active, not wired and is in the processes * pmap, then deactivate the page. */ - if ((p->flags & (PG_ACTIVE | PG_BUSY)) == PG_ACTIVE) { + if (p->queue == PQ_ACTIVE) { if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p)) && - (p->flags & (PG_REFERENCED|PG_WANTED)) == 0) { + (p->flags & PG_REFERENCED) == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); /* * if the page act_count is zero -- then we @@ -461,7 +455,7 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) /* * see if we are done yet */ - if (p->flags & PG_INACTIVE) { + if (p->queue == PQ_INACTIVE) { --count; ++dcount; if (count <= 0 && @@ -481,7 +475,7 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) TAILQ_REMOVE(&vm_page_queue_active, p, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq); } - } else if ((p->flags & (PG_INACTIVE | PG_BUSY)) == PG_INACTIVE) { + } else if (p->queue == PQ_INACTIVE) { vm_page_protect(p, VM_PROT_NONE); } p = next; @@ -489,7 +483,6 @@ vm_pageout_object_deactivate_pages(map, object, count, map_remove_only) return dcount; } - /* * deactivate some number of pages in a map, try to do it fairly, but * that is really hard to do. @@ -584,7 +577,7 @@ rescan1: next = m->pageq.tqe_next; #if defined(VM_DIAGNOSE) - if ((m->flags & PG_INACTIVE) == 0) { + if (m->queue != PQ_INACTIVE) { printf("vm_pageout_scan: page not inactive?\n"); break; } @@ -593,12 +586,17 @@ rescan1: /* * dont mess with busy pages */ - if (m->hold_count || m->busy || (m->flags & PG_BUSY)) { + if (m->busy || (m->flags & PG_BUSY)) { + m = next; + continue; + } + if (m->hold_count) { TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); m = next; continue; } + if (((m->flags & PG_REFERENCED) == 0) && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) { m->flags |= PG_REFERENCED; @@ -607,7 +605,7 @@ rescan1: m->flags &= ~PG_REFERENCED; pmap_clear_reference(VM_PAGE_TO_PHYS(m)); } - if ((m->flags & (PG_REFERENCED|PG_WANTED)) != 0) { + if ((m->flags & PG_REFERENCED) != 0) { m->flags &= ~PG_REFERENCED; pmap_clear_reference(VM_PAGE_TO_PHYS(m)); vm_page_activate(m); @@ -617,21 +615,18 @@ rescan1: continue; } - vm_page_test_dirty(m); if (m->dirty == 0) { - if (m->bmapped == 0) { - if (m->valid == 0) { - pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE); - vm_page_free(m); - cnt.v_dfree++; - } else { - vm_page_cache(m); - } - ++pages_freed; - } else { - m = next; - continue; - } + vm_page_test_dirty(m); + } else if (m->dirty != 0) + m->dirty = VM_PAGE_BITS_ALL; + if (m->valid == 0) { + vm_page_protect(m, VM_PROT_NONE); + vm_page_free(m); + cnt.v_dfree++; + ++pages_freed; + } else if (m->dirty == 0) { + vm_page_cache(m); + ++pages_freed; } else if (maxlaunder > 0) { int written; struct vnode *vp = NULL; @@ -671,7 +666,7 @@ rescan1: * if the next page has been re-activated, start * scanning again */ - if ((next->flags & PG_INACTIVE) == 0) { + if (next->queue != PQ_INACTIVE) { vm_pager_sync(); goto rescan1; } @@ -697,7 +692,8 @@ rescan1: maxscan = MAXSCAN; pcount = cnt.v_active_count; m = vm_page_queue_active.tqh_first; - while ((m != NULL) && (maxscan > 0) && (pcount-- > 0) && (page_shortage > 0)) { + while ((m != NULL) && (maxscan > 0) && + (pcount-- > 0) && (page_shortage > 0)) { cnt.v_pdpages++; next = m->pageq.tqe_next; @@ -711,13 +707,11 @@ rescan1: TAILQ_REMOVE(&vm_page_queue_active, m, pageq); TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); m = next; - /* printf("busy: s: %d, f: 0x%x, h: %d\n", - m->busy, m->flags, m->hold_count); */ continue; } if (m->object->ref_count && - ((m->flags & (PG_REFERENCED|PG_WANTED)) || - pmap_is_referenced(VM_PAGE_TO_PHYS(m)))) { + ((m->flags & PG_REFERENCED) || + pmap_is_referenced(VM_PAGE_TO_PHYS(m))) ) { pmap_clear_reference(VM_PAGE_TO_PHYS(m)); m->flags &= ~PG_REFERENCED; if (m->act_count < ACT_MAX) { @@ -737,7 +731,7 @@ rescan1: if (m->object->ref_count == 0) { --page_shortage; vm_page_test_dirty(m); - if ((m->bmapped == 0) && (m->dirty == 0) ) { + if (m->dirty == 0) { m->act_count = 0; vm_page_cache(m); } else { @@ -773,7 +767,8 @@ rescan1: * in a writeable object, wakeup the sync daemon. And kick swapout * if we did not get enough free pages. */ - if ((cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_target) { + if ((cnt.v_cache_count + cnt.v_free_count) < + (cnt.v_free_target + cnt.v_cache_min) ) { if (vnodes_skipped && (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) { if (!vfs_update_wakeup) { diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index 883e36d..46531e8 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -38,7 +38,7 @@ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$ * * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93 - * $Id: vm_unix.c,v 1.8 1995/11/12 06:43:28 bde Exp $ + * $Id: vm_unix.c,v 1.9 1995/12/07 12:48:29 davidg Exp $ */ /* @@ -56,6 +56,7 @@ #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/swap_pager.h> +#include <vm/vm_prot.h> #ifndef _SYS_SYSPROTO_H_ struct obreak_args { @@ -85,7 +86,8 @@ obreak(p, uap, retval) if (swap_pager_full) { return (ENOMEM); } - rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE); + rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE, + VM_PROT_ALL, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) { return (ENOMEM); } diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 873b20ce..4edbd35 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -38,7 +38,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.56 1995/12/14 09:55:14 phk Exp $ + * $Id: vnode_pager.c,v 1.57 1995/12/17 23:29:56 dyson Exp $ */ /* @@ -133,7 +133,8 @@ vnode_pager_alloc(handle, size, prot, offset) * If the object is being terminated, wait for it to * go away. */ - while (((object = vp->v_object) != NULL) && (object->flags & OBJ_DEAD)) { + while (((object = vp->v_object) != NULL) && + (object->flags & OBJ_DEAD)) { tsleep(object, PVM, "vadead", 0); } |