summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/alpha/alpha/pmap.c21
-rw-r--r--sys/alpha/alpha/symbols.raw5
-rw-r--r--sys/amd64/amd64/pmap.c65
-rw-r--r--sys/cam/cam_periph.c6
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/options3
-rw-r--r--sys/fs/procfs/procfs_map.c10
-rw-r--r--sys/fs/specfs/spec_vnops.c8
-rw-r--r--sys/i386/i386/pmap.c65
-rw-r--r--sys/i386/i386/symbols.raw5
-rw-r--r--sys/kern/kern_malloc.c36
-rw-r--r--sys/kern/kern_physio.c8
-rw-r--r--sys/kern/subr_rlist.c11
-rw-r--r--sys/kern/sysv_shm.c3
-rw-r--r--sys/kern/uipc_syscalls.c29
-rw-r--r--sys/kern/uipc_usrreq.c9
-rw-r--r--sys/kern/vfs_aio.c10
-rw-r--r--sys/kern/vfs_bio.c119
-rw-r--r--sys/kern/vfs_cluster.c10
-rw-r--r--sys/kern/vfs_export.c46
-rw-r--r--sys/kern/vfs_subr.c46
-rw-r--r--sys/miscfs/devfs/devfs_vnops.c8
-rw-r--r--sys/miscfs/procfs/procfs_map.c10
-rw-r--r--sys/miscfs/specfs/spec_vnops.c8
-rw-r--r--sys/net/if_sl.c12
-rw-r--r--sys/nfs/nfs_bio.c38
-rw-r--r--sys/nfs/nfs_common.c5
-rw-r--r--sys/nfs/nfs_subs.c5
-rw-r--r--sys/nfs/nfs_vnops.c5
-rw-r--r--sys/nfsclient/nfs_bio.c38
-rw-r--r--sys/nfsclient/nfs_subs.c5
-rw-r--r--sys/nfsclient/nfs_vnops.c5
-rw-r--r--sys/nfsserver/nfs_srvsubs.c5
-rw-r--r--sys/sys/bio.h36
-rw-r--r--sys/sys/buf.h36
-rw-r--r--sys/sys/malloc.h10
-rw-r--r--sys/sys/param.h8
-rw-r--r--sys/sys/types.h3
-rw-r--r--sys/ufs/mfs/mfs_extern.h5
-rw-r--r--sys/ufs/mfs/mfs_vfsops.c22
-rw-r--r--sys/ufs/mfs/mfs_vnops.c120
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c12
-rw-r--r--sys/ufs/ufs/ufs_vnops.c5
-rw-r--r--sys/vm/default_pager.c82
-rw-r--r--sys/vm/device_pager.c4
-rw-r--r--sys/vm/swap_pager.c2553
-rw-r--r--sys/vm/swap_pager.h44
-rw-r--r--sys/vm/vm_fault.c121
-rw-r--r--sys/vm/vm_glue.c11
-rw-r--r--sys/vm/vm_kern.c78
-rw-r--r--sys/vm/vm_map.c51
-rw-r--r--sys/vm/vm_meter.c7
-rw-r--r--sys/vm/vm_mmap.c3
-rw-r--r--sys/vm/vm_object.c246
-rw-r--r--sys/vm/vm_object.h59
-rw-r--r--sys/vm/vm_page.c436
-rw-r--r--sys/vm/vm_page.h132
-rw-r--r--sys/vm/vm_pageout.c269
-rw-r--r--sys/vm/vm_pageout.h4
-rw-r--r--sys/vm/vm_pager.c109
-rw-r--r--sys/vm/vm_pager.h59
-rw-r--r--sys/vm/vm_swap.c56
-rw-r--r--sys/vm/vnode_pager.c19
63 files changed, 3198 insertions, 2062 deletions
diff --git a/sys/alpha/alpha/pmap.c b/sys/alpha/alpha/pmap.c
index 0e7aa73..fe8741d 100644
--- a/sys/alpha/alpha/pmap.c
+++ b/sys/alpha/alpha/pmap.c
@@ -43,7 +43,7 @@
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
* from: i386 Id: pmap.c,v 1.193 1998/04/19 15:22:48 bde Exp
* with some ideas from NetBSD's alpha pmap
- * $Id: pmap.c,v 1.11 1998/10/21 11:38:06 dg Exp $
+ * $Id: pmap.c,v 1.12 1998/10/28 13:36:49 dg Exp $
*/
/*
@@ -950,7 +950,7 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
- if (m && vm_page_sleep(m, "pplookp", NULL))
+ if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
goto retry;
return m;
}
@@ -1039,7 +1039,7 @@ pmap_dispose_proc(p)
if ((m = vm_page_lookup(upobj, i)) == NULL)
panic("pmap_dispose_proc: upage already missing???");
- vm_page_flag_set(m, PG_BUSY);
+ vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0;
@@ -1128,7 +1128,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
int s;
- while (vm_page_sleep(m, "pmuwpt", NULL));
+ while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
+ ;
if (m->hold_count == 0) {
vm_offset_t pteva;
@@ -1181,7 +1182,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
wakeup(m);
}
- vm_page_flag_set(m, PG_BUSY);
+ vm_page_busy(m);
vm_page_free_zero(m);
--cnt.v_wire_count;
}
@@ -1316,10 +1317,10 @@ pmap_release_free_page(pmap_t pmap, vm_page_t p)
* page-table pages. Those pages are zero now, and
* might as well be placed directly into the zero queue.
*/
- if (vm_page_sleep(p, "pmaprl", NULL))
+ if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
return 0;
- vm_page_flag_set(p, PG_BUSY);
+ vm_page_busy(p);
/*
* Remove the page table page from the processes address space.
@@ -2336,7 +2337,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- vm_page_flag_set(p, PG_BUSY);
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + alpha_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
@@ -2356,7 +2357,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- vm_page_flag_set(p, PG_BUSY);
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + alpha_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
@@ -2453,7 +2454,7 @@ pmap_prefault(pmap, addra, entry)
if ((m->queue - m->pc) == PQ_CACHE) {
vm_page_deactivate(m);
}
- vm_page_flag_set(m, PG_BUSY);
+ vm_page_busy(m);
mpte = pmap_enter_quick(pmap, addr,
VM_PAGE_TO_PHYS(m), mpte);
vm_page_flag_set(m, PG_MAPPED);
diff --git a/sys/alpha/alpha/symbols.raw b/sys/alpha/alpha/symbols.raw
index bf8881a..2b03da9 100644
--- a/sys/alpha/alpha/symbols.raw
+++ b/sys/alpha/alpha/symbols.raw
@@ -1,6 +1,6 @@
# @(#)symbols.raw 7.6 (Berkeley) 5/8/91
#
-# $Id: symbols.raw,v 1.12 1998/03/30 09:48:20 phk Exp $
+# $Id: symbols.raw,v 1.1 1998/06/10 10:53:25 dfr Exp $
#
@@ -34,7 +34,8 @@
#pstat
# _cons
_nswap
- _swaplist
+ _swapblist
+# _swaplist
#vmstat
_cp_time
# _rate
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 66c9b63..2a378d3 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -39,7 +39,7 @@
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- * $Id: pmap.c,v 1.218 1999/01/09 21:41:22 dt Exp $
+ * $Id: pmap.c,v 1.219 1999/01/12 00:17:53 eivind Exp $
*/
/*
@@ -942,7 +942,7 @@ pmap_page_lookup(object, pindex)
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
- if (m && vm_page_sleep(m, "pplookp", NULL))
+ if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
goto retry;
return m;
}
@@ -1009,8 +1009,8 @@ pmap_new_proc(p)
}
vm_page_wakeup(m);
- m->flags &= ~PG_ZERO;
- m->flags |= PG_MAPPED | PG_WRITEABLE;
+ vm_page_flag_clear(m, PG_ZERO);
+ vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
if (updateneeded)
@@ -1038,7 +1038,7 @@ pmap_dispose_proc(p)
if ((m = vm_page_lookup(upobj, i)) == NULL)
panic("pmap_dispose_proc: upage already missing???");
- m->flags |= PG_BUSY;
+ vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0;
@@ -1107,7 +1107,7 @@ pmap_swapin_proc(p)
vm_page_wire(m);
vm_page_wakeup(m);
- m->flags |= PG_MAPPED | PG_WRITEABLE;
+ vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
}
}
@@ -1122,7 +1122,8 @@ pmap_swapin_proc(p)
static int
_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
- while (vm_page_sleep(m, "pmuwpt", NULL));
+ while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
+ ;
if (m->hold_count == 0) {
vm_offset_t pteva;
@@ -1150,12 +1151,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
--m->wire_count;
if (m->wire_count == 0) {
- if (m->flags & PG_WANTED) {
- m->flags &= ~PG_WANTED;
- wakeup(m);
- }
-
- m->flags |= PG_BUSY;
+ vm_page_flash(m);
+ vm_page_busy(m);
vm_page_free_zero(m);
--cnt.v_wire_count;
}
@@ -1257,7 +1254,8 @@ pmap_pinit(pmap)
ptdpg->wire_count = 1;
++cnt.v_wire_count;
- ptdpg->flags &= ~(PG_MAPPED | PG_BUSY); /* not mapped normally */
+
+ vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
ptdpg->valid = VM_PAGE_BITS_ALL;
pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
@@ -1290,10 +1288,10 @@ pmap_release_free_page(pmap, p)
* page-table pages. Those pages are zero now, and
* might as well be placed directly into the zero queue.
*/
- if (vm_page_sleep(p, "pmaprl", NULL))
+ if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
return 0;
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
/*
* Remove the page table page from the processes address space.
@@ -1393,8 +1391,9 @@ _pmap_allocpte(pmap, ptepindex)
}
m->valid = VM_PAGE_BITS_ALL;
- m->flags &= ~(PG_ZERO | PG_BUSY);
- m->flags |= PG_MAPPED;
+ vm_page_flag_clear(m, PG_ZERO);
+ vm_page_flag_set(m, PG_MAPPED);
+ vm_page_wakeup(m);
return m;
}
@@ -1713,7 +1712,7 @@ pmap_remove_entry(pmap, ppv, va)
TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
ppv->pv_list_count--;
if (TAILQ_FIRST(&ppv->pv_list) == NULL)
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
free_pv_entry(pv);
@@ -1791,7 +1790,7 @@ pmap_remove_pte(pmap, ptq, va)
ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
}
if (oldpte & PG_A)
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
return pmap_remove_entry(pmap, ppv, va);
} else {
return pmap_unuse_pt(pmap, va, NULL);
@@ -1976,7 +1975,7 @@ pmap_remove_all(pa)
pv->pv_pmap->pm_stats.wired_count--;
if (tpte & PG_A)
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
/*
* Update the vm_page_t clean and reference bits.
@@ -2005,7 +2004,7 @@ pmap_remove_all(pa)
free_pv_entry(pv);
}
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
if (update_needed)
invltlb();
@@ -2081,7 +2080,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
ppv = NULL;
if (pbits & PG_A) {
ppv = pa_to_pvh(pbits);
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
pbits &= ~PG_A;
}
if (pbits & PG_M) {
@@ -2436,7 +2435,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
retry:
p = vm_page_lookup(object, pindex);
- if (p && vm_page_sleep(p, "init4p", NULL))
+ if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
goto retry;
if (p == NULL) {
@@ -2469,7 +2468,7 @@ retry:
ptepa += NBPDR;
ptepindex += 1;
}
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
invltlb();
return;
}
@@ -2510,11 +2509,11 @@ retry:
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + i386_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
objpgs -= 1;
@@ -2531,11 +2530,11 @@ retry:
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + i386_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
}
@@ -2628,10 +2627,10 @@ pmap_prefault(pmap, addra, entry)
if ((m->queue - m->pc) == PQ_CACHE) {
vm_page_deactivate(m);
}
- m->flags |= PG_BUSY;
+ vm_page_busy(m);
mpte = pmap_enter_quick(pmap, addr,
VM_PAGE_TO_PHYS(m), mpte);
- m->flags |= PG_MAPPED;
+ vm_page_flag_set(m, PG_MAPPED);
vm_page_wakeup(m);
}
}
@@ -3026,7 +3025,7 @@ pmap_remove_pages(pmap, sva, eva)
ppv->pv_list_count--;
TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
}
pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
@@ -3406,7 +3405,7 @@ pmap_mincore(pmap, addr)
*/
else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) {
val |= MINCORE_REFERENCED_OTHER;
- m->flags |= PG_REFERENCED;
+ vm_page_flag_set(m, PG_REFERENCED);
}
}
return val;
diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c
index 57ac533..e4be47f 100644
--- a/sys/cam/cam_periph.c
+++ b/sys/cam/cam_periph.c
@@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: cam_periph.c,v 1.8 1998/12/16 21:00:06 ken Exp $
+ * $Id: cam_periph.c,v 1.9 1999/01/14 06:21:54 jdp Exp $
*/
#include <sys/param.h>
@@ -599,7 +599,7 @@ cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
/*
* Get the buffer.
*/
- mapinfo->bp[i] = getpbuf();
+ mapinfo->bp[i] = getpbuf(NULL);
/* save the buffer's data address */
mapinfo->bp[i]->b_saveaddr = mapinfo->bp[i]->b_data;
@@ -674,7 +674,7 @@ cam_periph_unmapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
mapinfo->bp[i]->b_flags &= ~(B_PHYS|B_BUSY);
/* release the buffer */
- relpbuf(mapinfo->bp[i]);
+ relpbuf(mapinfo->bp[i], NULL);
}
/* allow ourselves to be swapped once again */
diff --git a/sys/conf/files b/sys/conf/files
index 795f6f8..02a281b 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -306,6 +306,7 @@ kern/subr_module.c standard
kern/subr_prf.c standard
kern/subr_prof.c standard
kern/subr_rlist.c standard
+kern/subr_blist.c standard
kern/subr_scanf.c standard
kern/subr_xxx.c standard
kern/sys_generic.c standard
diff --git a/sys/conf/options b/sys/conf/options
index 35ceb1a..6dfc0cc 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -1,4 +1,4 @@
-# $Id: options,v 1.120 1999/01/17 19:02:39 peter Exp $
+# $Id: options,v 1.121 1999/01/20 14:49:07 eivind Exp $
#
# On the handling of kernel options
#
@@ -209,6 +209,7 @@ TCPDEBUG
IPFILTER opt_ipfilter.h
IPFILTER_LOG opt_ipfilter.h
IPFILTER_LKM opt_ipfilter.h
+SLIP_IFF_OPTS opt_slip.h
# ATM (HARP version)
ATM_CORE opt_atm.h
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
index 4dae10a..c6b8966 100644
--- a/sys/fs/procfs/procfs_map.c
+++ b/sys/fs/procfs/procfs_map.c
@@ -36,7 +36,7 @@
*
* @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
*
- * $Id: procfs_map.c,v 1.17 1998/04/29 04:28:22 dyson Exp $
+ * $Id: procfs_map.c,v 1.18 1998/12/04 22:54:51 archie Exp $
*/
#include <sys/param.h>
@@ -93,7 +93,7 @@ procfs_domap(curp, p, pfs, uio)
((uio->uio_resid > 0) && (entry != &map->header));
entry = entry->next) {
vm_object_t obj, tobj, lobj;
- int ref_count, shadow_count, id, flags;
+ int ref_count, shadow_count, flags;
vm_offset_t addr;
int resident, privateresident;
char *type;
@@ -139,13 +139,11 @@ case OBJT_DEVICE:
flags = obj->flags;
ref_count = obj->ref_count;
shadow_count = obj->shadow_count;
- id = obj->id;
} else {
type = "none";
flags = 0;
ref_count = 0;
shadow_count = 0;
- id = 0;
}
@@ -154,9 +152,9 @@ case OBJT_DEVICE:
* start, end, resident, private resident, cow, access, type.
*/
snprintf(mebuffer, sizeof(mebuffer),
- "0x%x 0x%x %d %d %d %s%s%s %d %d 0x%x %s %s %s\n",
+ "0x%x 0x%x %d %d %p %s%s%s %d %d 0x%x %s %s %s\n",
entry->start, entry->end,
- resident, privateresident, id,
+ resident, privateresident, obj,
(entry->protection & VM_PROT_READ)?"r":"-",
(entry->protection & VM_PROT_WRITE)?"w":"-",
(entry->protection & VM_PROT_EXECUTE)?"x":"-",
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index ff0f347..6096a1b 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.77 1998/12/07 21:58:33 archie Exp $
+ * $Id: spec_vnops.c,v 1.78 1998/12/16 00:10:51 eivind Exp $
*/
#include <sys/param.h>
@@ -781,7 +781,7 @@ spec_getpages(ap)
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
- bp = getpbuf();
+ bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
@@ -894,13 +894,13 @@ spec_getpages(ap)
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_ERROR;
}
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_OK;
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 66c9b63..2a378d3 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -39,7 +39,7 @@
* SUCH DAMAGE.
*
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
- * $Id: pmap.c,v 1.218 1999/01/09 21:41:22 dt Exp $
+ * $Id: pmap.c,v 1.219 1999/01/12 00:17:53 eivind Exp $
*/
/*
@@ -942,7 +942,7 @@ pmap_page_lookup(object, pindex)
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
- if (m && vm_page_sleep(m, "pplookp", NULL))
+ if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
goto retry;
return m;
}
@@ -1009,8 +1009,8 @@ pmap_new_proc(p)
}
vm_page_wakeup(m);
- m->flags &= ~PG_ZERO;
- m->flags |= PG_MAPPED | PG_WRITEABLE;
+ vm_page_flag_clear(m, PG_ZERO);
+ vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
if (updateneeded)
@@ -1038,7 +1038,7 @@ pmap_dispose_proc(p)
if ((m = vm_page_lookup(upobj, i)) == NULL)
panic("pmap_dispose_proc: upage already missing???");
- m->flags |= PG_BUSY;
+ vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0;
@@ -1107,7 +1107,7 @@ pmap_swapin_proc(p)
vm_page_wire(m);
vm_page_wakeup(m);
- m->flags |= PG_MAPPED | PG_WRITEABLE;
+ vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
}
}
@@ -1122,7 +1122,8 @@ pmap_swapin_proc(p)
static int
_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
- while (vm_page_sleep(m, "pmuwpt", NULL));
+ while (vm_page_sleep_busy(m, FALSE, "pmuwpt"))
+ ;
if (m->hold_count == 0) {
vm_offset_t pteva;
@@ -1150,12 +1151,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) {
--m->wire_count;
if (m->wire_count == 0) {
- if (m->flags & PG_WANTED) {
- m->flags &= ~PG_WANTED;
- wakeup(m);
- }
-
- m->flags |= PG_BUSY;
+ vm_page_flash(m);
+ vm_page_busy(m);
vm_page_free_zero(m);
--cnt.v_wire_count;
}
@@ -1257,7 +1254,8 @@ pmap_pinit(pmap)
ptdpg->wire_count = 1;
++cnt.v_wire_count;
- ptdpg->flags &= ~(PG_MAPPED | PG_BUSY); /* not mapped normally */
+
+ vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
ptdpg->valid = VM_PAGE_BITS_ALL;
pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
@@ -1290,10 +1288,10 @@ pmap_release_free_page(pmap, p)
* page-table pages. Those pages are zero now, and
* might as well be placed directly into the zero queue.
*/
- if (vm_page_sleep(p, "pmaprl", NULL))
+ if (vm_page_sleep_busy(p, FALSE, "pmaprl"))
return 0;
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
/*
* Remove the page table page from the processes address space.
@@ -1393,8 +1391,9 @@ _pmap_allocpte(pmap, ptepindex)
}
m->valid = VM_PAGE_BITS_ALL;
- m->flags &= ~(PG_ZERO | PG_BUSY);
- m->flags |= PG_MAPPED;
+ vm_page_flag_clear(m, PG_ZERO);
+ vm_page_flag_set(m, PG_MAPPED);
+ vm_page_wakeup(m);
return m;
}
@@ -1713,7 +1712,7 @@ pmap_remove_entry(pmap, ppv, va)
TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
ppv->pv_list_count--;
if (TAILQ_FIRST(&ppv->pv_list) == NULL)
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
free_pv_entry(pv);
@@ -1791,7 +1790,7 @@ pmap_remove_pte(pmap, ptq, va)
ppv->pv_vm_page->dirty = VM_PAGE_BITS_ALL;
}
if (oldpte & PG_A)
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
return pmap_remove_entry(pmap, ppv, va);
} else {
return pmap_unuse_pt(pmap, va, NULL);
@@ -1976,7 +1975,7 @@ pmap_remove_all(pa)
pv->pv_pmap->pm_stats.wired_count--;
if (tpte & PG_A)
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
/*
* Update the vm_page_t clean and reference bits.
@@ -2005,7 +2004,7 @@ pmap_remove_all(pa)
free_pv_entry(pv);
}
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
if (update_needed)
invltlb();
@@ -2081,7 +2080,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
ppv = NULL;
if (pbits & PG_A) {
ppv = pa_to_pvh(pbits);
- ppv->pv_vm_page->flags |= PG_REFERENCED;
+ vm_page_flag_set(ppv->pv_vm_page, PG_REFERENCED);
pbits &= ~PG_A;
}
if (pbits & PG_M) {
@@ -2436,7 +2435,7 @@ pmap_object_init_pt(pmap, addr, object, pindex, size, limit)
retry:
p = vm_page_lookup(object, pindex);
- if (p && vm_page_sleep(p, "init4p", NULL))
+ if (p && vm_page_sleep_busy(p, FALSE, "init4p"))
goto retry;
if (p == NULL) {
@@ -2469,7 +2468,7 @@ retry:
ptepa += NBPDR;
ptepindex += 1;
}
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
invltlb();
return;
}
@@ -2510,11 +2509,11 @@ retry:
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + i386_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
objpgs -= 1;
@@ -2531,11 +2530,11 @@ retry:
(p->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) {
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
- p->flags |= PG_BUSY;
+ vm_page_busy(p);
mpte = pmap_enter_quick(pmap,
addr + i386_ptob(tmpidx),
VM_PAGE_TO_PHYS(p), mpte);
- p->flags |= PG_MAPPED;
+ vm_page_flag_set(p, PG_MAPPED);
vm_page_wakeup(p);
}
}
@@ -2628,10 +2627,10 @@ pmap_prefault(pmap, addra, entry)
if ((m->queue - m->pc) == PQ_CACHE) {
vm_page_deactivate(m);
}
- m->flags |= PG_BUSY;
+ vm_page_busy(m);
mpte = pmap_enter_quick(pmap, addr,
VM_PAGE_TO_PHYS(m), mpte);
- m->flags |= PG_MAPPED;
+ vm_page_flag_set(m, PG_MAPPED);
vm_page_wakeup(m);
}
}
@@ -3026,7 +3025,7 @@ pmap_remove_pages(pmap, sva, eva)
ppv->pv_list_count--;
TAILQ_REMOVE(&ppv->pv_list, pv, pv_list);
if (TAILQ_FIRST(&ppv->pv_list) == NULL) {
- ppv->pv_vm_page->flags &= ~(PG_MAPPED | PG_WRITEABLE);
+ vm_page_flag_clear(ppv->pv_vm_page, PG_MAPPED | PG_WRITEABLE);
}
pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem);
@@ -3406,7 +3405,7 @@ pmap_mincore(pmap, addr)
*/
else if ((m->flags & PG_REFERENCED) || pmap_ts_referenced(pa)) {
val |= MINCORE_REFERENCED_OTHER;
- m->flags |= PG_REFERENCED;
+ vm_page_flag_set(m, PG_REFERENCED);
}
}
return val;
diff --git a/sys/i386/i386/symbols.raw b/sys/i386/i386/symbols.raw
index 4703c30..943d8ae 100644
--- a/sys/i386/i386/symbols.raw
+++ b/sys/i386/i386/symbols.raw
@@ -1,6 +1,6 @@
# @(#)symbols.raw 7.6 (Berkeley) 5/8/91
#
-# $Id: symbols.raw,v 1.12 1998/03/30 09:48:20 phk Exp $
+# $Id: symbols.raw,v 1.13 1998/09/15 10:03:43 gibbs Exp $
#
@@ -28,7 +28,8 @@
#pstat
# _cons
_nswap
- _swaplist
+ _swapblist
+# _swaplist
#vmstat
_cp_time
# _rate
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index a9776a5..be9f9d3 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
- * $Id: kern_malloc.c,v 1.50 1999/01/08 17:31:09 eivind Exp $
+ * $Id: kern_malloc.c,v 1.51 1999/01/10 01:58:24 eivind Exp $
*/
#include "opt_vm.h"
@@ -101,7 +101,16 @@ struct freelist {
#endif /* INVARIANTS */
/*
- * Allocate a block of memory
+ * malloc:
+ *
+ * Allocate a block of memory.
+ *
+ * If M_NOWAIT is set, this routine will not block and return NULL if
+ * the allocation fails.
+ *
+ * If M_ASLEEP is set (M_NOWAIT must also be set), this routine
+ * will have the side effect of calling asleep() if it returns NULL,
+ * allowing the parent to await() at some future time.
*/
void *
malloc(size, type, flags)
@@ -122,13 +131,26 @@ malloc(size, type, flags)
#endif
register struct malloc_type *ksp = type;
- if (!type->ks_next)
+ /*
+ * Must be at splmem() prior to initializing segment to handle
+ * potential initialization race.
+ */
+
+ s = splmem();
+
+ if (!type->ks_next) {
malloc_init(type);
+ }
indx = BUCKETINDX(size);
kbp = &bucket[indx];
- s = splmem();
+
while (ksp->ks_memuse >= ksp->ks_limit) {
+ if (flags & M_ASLEEP) {
+ if (ksp->ks_limblocks < 65535)
+ ksp->ks_limblocks++;
+ asleep((caddr_t)ksp, PSWP+2, type->ks_shortdesc, 0);
+ }
if (flags & M_NOWAIT) {
splx(s);
return ((void *) NULL);
@@ -239,7 +261,11 @@ out:
}
/*
- * Free a block of memory allocated by malloc.
+ * free:
+ *
+ * Free a block of memory allocated by malloc.
+ *
+ * This routine may not block.
*/
void
free(addr, type)
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 441d95f..ad63a98 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -16,7 +16,7 @@
* 4. Modifications may be freely made to this file if the above conditions
* are met.
*
- * $Id: kern_physio.c,v 1.28 1998/08/19 10:50:32 sos Exp $
+ * $Id: kern_physio.c,v 1.29 1998/10/25 17:44:51 phk Exp $
*/
#include <sys/param.h>
@@ -147,7 +147,7 @@ physio(strategy, bp, dev, rw, minp, uio)
doerror:
- relpbuf(bpa);
+ relpbuf(bpa, NULL);
if (!bp_alloc) {
bp->b_flags &= ~(B_BUSY|B_PHYS);
if( bp->b_flags & B_WANTED) {
@@ -197,13 +197,13 @@ phygetvpbuf(dev_t dev, int resid)
bdsw = cdevsw[major(dev)];
if ((bdsw == NULL) || (bdsw->d_bmaj == -1))
- return getpbuf();
+ return getpbuf(NULL);
maxio = bdsw->d_maxio;
if (resid > maxio)
resid = maxio;
- return getpbuf();
+ return getpbuf(NULL);
}
static void
diff --git a/sys/kern/subr_rlist.c b/sys/kern/subr_rlist.c
index d637ab4..810b87e 100644
--- a/sys/kern/subr_rlist.c
+++ b/sys/kern/subr_rlist.c
@@ -13,7 +13,7 @@
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This software is a component of "386BSD" developed by
- William F. Jolitz, TeleMuse.
+ * William F. Jolitz, TeleMuse.
* 4. Neither the name of the developer nor the name "386BSD"
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -54,9 +54,13 @@
* functioning of this software, nor does the author assume any responsibility
* for damages incurred with its use.
*
- * $Id: subr_rlist.c,v 1.28 1999/01/08 17:31:12 eivind Exp $
+ * --------- DEPRECIATED ---------
+ *
+ * $Id: subr_rlist.c,v 1.29 1999/01/10 01:58:25 eivind Exp $
*/
+#if 0
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/rlist.h>
@@ -307,3 +311,6 @@ rlist_destroy (rlh)
rlist_mfree(lp);
}
}
+
+#endif
+
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index edc74a7..a6c2dfe 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -1,4 +1,4 @@
-/* $Id: sysv_shm.c,v 1.38 1998/08/24 08:39:38 dfr Exp $ */
+/* $Id: sysv_shm.c,v 1.39 1998/10/13 08:24:40 dg Exp $ */
/* $NetBSD: sysv_shm.c,v 1.23 1994/07/04 23:25:12 glass Exp $ */
/*
@@ -52,6 +52,7 @@
#include <vm/pmap.h>
#include <vm/vm_object.h>
#include <vm/vm_map.h>
+#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_inherit.h>
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 6cc487a..1634681 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
- * $Id: uipc_syscalls.c,v 1.48 1998/12/03 12:35:47 dg Exp $
+ * $Id: uipc_syscalls.c,v 1.49 1998/12/07 21:58:29 archie Exp $
*/
#include "opt_compat.h"
@@ -1543,7 +1543,13 @@ retry_lookup:
VM_WAIT;
goto retry_lookup;
}
- vm_page_flag_clear(pg, PG_BUSY);
+ /*
+ * don't just clear PG_BUSY manually -
+ * vm_page_alloc() should be considered opaque,
+ * use the VM routine provided to clear
+ * PG_BUSY.
+ */
+ vm_page_wakeup(pg);
}
/*
* Ensure that our page is still around when the I/O completes.
@@ -1583,21 +1589,12 @@ retry_lookup:
goto done;
}
} else {
- if ((pg->flags & PG_BUSY) || pg->busy) {
- s = splvm();
- if ((pg->flags & PG_BUSY) || pg->busy) {
- /*
- * Page is busy. Wait and retry.
- */
- vm_page_flag_set(pg, PG_WANTED);
- tsleep(pg, PVM, "sfpbsy", 0);
- splx(s);
- goto retry_lookup;
- }
- splx(s);
- }
+ if (vm_page_sleep_busy(pg, TRUE, "sfpbsy"))
+ goto retry_lookup;
+
/*
- * Protect from having the page ripped out from beneath us.
+ * Protect from having the page ripped out from
+ * beneath us.
*/
vm_page_wire(pg);
}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 93f6164..d528f5e 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
- * $Id: uipc_usrreq.c,v 1.36 1998/07/15 02:32:12 bde Exp $
+ * $Id: uipc_usrreq.c,v 1.37 1998/10/25 17:44:51 phk Exp $
*/
#include <sys/param.h>
@@ -1114,8 +1114,11 @@ unp_gc()
/*
* for each FD on our hit list, do the following two things
*/
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
- sorflush((struct socket *)(*fpp)->f_data);
+ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
+ struct file *tfp = *fpp;
+ if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
+ sorflush((struct socket *)(tfp->f_data));
+ }
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
closef(*fpp, (struct proc *) NULL);
free((caddr_t)extra_ref, M_FILE);
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index c7c8aa9..c1af873 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -13,7 +13,7 @@
* bad that happens because of using this software isn't the responsibility
* of the author. This software is distributed AS-IS.
*
- * $Id: vfs_aio.c,v 1.35 1998/11/27 01:14:21 tegge Exp $
+ * $Id: vfs_aio.c,v 1.36 1998/12/15 17:38:33 des Exp $
*/
/*
@@ -386,7 +386,7 @@ aio_free_entry(struct aiocblist *aiocbe)
splx(s);
if (aiocbe->bp) {
vunmapbuf(aiocbe->bp);
- relpbuf(aiocbe->bp);
+ relpbuf(aiocbe->bp, NULL);
aiocbe->bp = NULL;
}
}
@@ -1035,7 +1035,7 @@ aio_qphysio(p, aiocbe)
}
/* create and build a buffer header for a transfer */
- bp = (struct buf *)getpbuf();
+ bp = (struct buf *)getpbuf(NULL);
/*
* get a copy of the kva from the physical buffer
@@ -1122,7 +1122,7 @@ doerror:
lj->lioj_buffer_count--;
}
aiocbe->bp = NULL;
- relpbuf(bp);
+ relpbuf(bp, NULL);
return error;
}
@@ -1172,7 +1172,7 @@ aio_fphysio(p, iocb, flgwait)
error = bp->b_error;
}
- relpbuf(bp);
+ relpbuf(bp, NULL);
return (error);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 30018b5..3bb204e 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -11,7 +11,7 @@
* 2. Absolutely no warranty of function or purpose is made by the author
* John S. Dyson.
*
- * $Id: vfs_bio.c,v 1.192 1999/01/12 11:59:34 eivind Exp $
+ * $Id: vfs_bio.c,v 1.193 1999/01/19 08:00:51 dillon Exp $
*/
/*
@@ -562,7 +562,7 @@ brelse(struct buf * bp)
int s;
if (bp->b_flags & B_CLUSTER) {
- relpbuf(bp);
+ relpbuf(bp, NULL);
return;
}
@@ -1364,6 +1364,7 @@ vfs_setdirty(struct buf *bp) {
break;
}
}
+
boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
if (boffset < bp->b_dirtyoff) {
bp->b_dirtyoff = max(boffset, 0);
@@ -1412,7 +1413,6 @@ loop:
if ((bp = gbincore(vp, blkno))) {
if (bp->b_flags & B_BUSY) {
-
bp->b_flags |= B_WANTED;
if (bp->b_usecount < BUF_MAXUSE)
++bp->b_usecount;
@@ -1429,16 +1429,13 @@ loop:
bremfree(bp);
/*
- * check for size inconsistancies (note that they shouldn't
- * happen but do when filesystems don't handle the size changes
- * correctly.) We are conservative on metadata and don't just
- * extend the buffer but write (if needed) and re-constitute it.
+ * check for size inconsistancies for non-VMIO case.
*/
if (bp->b_bcount != size) {
- if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) {
- allocbuf(bp, size);
- } else {
+ if ((bp->b_flags & B_VMIO) == 0 ||
+ (size > bp->b_kvasize)
+ ) {
if (bp->b_flags & B_DELWRI) {
bp->b_flags |= B_NOCACHE;
VOP_BWRITE(bp);
@@ -1455,15 +1452,26 @@ loop:
goto loop;
}
}
+
+ /*
+ * If the size is inconsistant in the VMIO case, we can resize
+ * the buffer. This might lead to B_CACHE getting cleared.
+ */
+
+ if (bp->b_bcount != size)
+ allocbuf(bp, size);
+
KASSERT(bp->b_offset != NOOFFSET,
("getblk: no buffer offset"));
+
/*
* Check that the constituted buffer really deserves for the
* B_CACHE bit to be set. B_VMIO type buffers might not
* contain fully valid pages. Normal (old-style) buffers
- * should be fully valid.
+ * should be fully valid. This might also lead to B_CACHE
+ * getting clear.
*/
- if (bp->b_flags & B_VMIO) {
+ if ((bp->b_flags & B_VMIO|B_CACHE) == (B_VMIO|B_CACHE)) {
int checksize = bp->b_bufsize;
int poffset = bp->b_offset & PAGE_MASK;
int resid;
@@ -1479,6 +1487,19 @@ loop:
}
}
+ /*
+ * If B_DELWRI is set and B_CACHE got cleared ( or was
+ * already clear ), we have to commit the write and
+ * retry. The NFS code absolutely depends on this,
+ * and so might the FFS code. In anycase, it formalizes
+ * the B_CACHE rules. See sys/buf.h.
+ */
+
+ if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
+ VOP_BWRITE(bp);
+ goto loop;
+ }
+
if (bp->b_usecount < BUF_MAXUSE)
++bp->b_usecount;
splx(s);
@@ -1572,19 +1593,18 @@ geteblk(int size)
/*
* This code constitutes the buffer memory from either anonymous system
* memory (in the case of non-VMIO operations) or from an associated
- * VM object (in the case of VMIO operations).
+ * VM object (in the case of VMIO operations). This code is able to
+ * resize a buffer up or down.
*
* Note that this code is tricky, and has many complications to resolve
- * deadlock or inconsistant data situations. Tread lightly!!!
- *
- * Modify the length of a buffer's underlying buffer storage without
- * destroying information (unless, of course the buffer is shrinking).
+ * deadlock or inconsistant data situations. Tread lightly!!!
+ * There are B_CACHE and B_DELWRI interactions that must be dealt with by
+ * the caller. Calling this code willy nilly can result in the loss of data.
*/
+
int
-allocbuf(struct buf * bp, int size)
+allocbuf(struct buf *bp, int size)
{
-
- int s;
int newbsize, mbsize;
int i;
@@ -1705,7 +1725,8 @@ allocbuf(struct buf * bp, int size)
m = bp->b_pages[i];
KASSERT(m != bogus_page,
("allocbuf: bogus page found"));
- vm_page_sleep(m, "biodep", &m->busy);
+ while (vm_page_sleep_busy(m, TRUE, "biodep"))
+ ;
bp->b_pages[i] = NULL;
vm_page_unwire(m, 0);
@@ -1771,16 +1792,25 @@ allocbuf(struct buf * bp, int size)
}
vm_page_wire(m);
- vm_page_flag_clear(m, PG_BUSY);
+ vm_page_wakeup(m);
bp->b_flags &= ~B_CACHE;
- } else if (m->flags & PG_BUSY) {
- s = splvm();
- if (m->flags & PG_BUSY) {
- vm_page_flag_set(m, PG_WANTED);
- tsleep(m, PVM, "pgtblk", 0);
- }
- splx(s);
+ } else if (vm_page_sleep_busy(m, FALSE, "pgtblk")) {
+ /*
+ * If we had to sleep, retry.
+ *
+ * Also note that we only test
+ * PG_BUSY here, not m->busy.
+ *
+ * We cannot sleep on m->busy
+ * here because a vm_fault ->
+ * getpages -> cluster-read ->
+ * ...-> allocbuf sequence
+ * will convert PG_BUSY to
+ * m->busy so we have to let
+ * m->busy through if we do
+ * not want to deadlock.
+ */
goto doretry;
} else {
if ((curproc != pageproc) &&
@@ -2010,12 +2040,8 @@ biodone(register struct buf * bp)
foff += resid;
iosize -= resid;
}
- if (obj &&
- (obj->paging_in_progress == 0) &&
- (obj->flags & OBJ_PIPWNT)) {
- vm_object_clear_flag(obj, OBJ_PIPWNT);
- wakeup(obj);
- }
+ if (obj)
+ vm_object_pip_wakeupn(obj, 0);
}
/*
* For asynchronous completions, release the buffer now. The brelse
@@ -2096,11 +2122,7 @@ vfs_unbusy_pages(struct buf * bp)
vm_page_flag_clear(m, PG_ZERO);
vm_page_io_finish(m);
}
- if (obj->paging_in_progress == 0 &&
- (obj->flags & OBJ_PIPWNT)) {
- vm_object_clear_flag(obj, OBJ_PIPWNT);
- wakeup(obj);
- }
+ vm_object_pip_wakeupn(obj, 0);
}
}
@@ -2109,6 +2131,8 @@ vfs_unbusy_pages(struct buf * bp)
* of a page. If the consumer is not NFS, and the page is not
* valid for the entire range, clear the B_CACHE flag to force
* the consumer to re-read the page.
+ *
+ * B_CACHE interaction is especially tricky.
*/
static void
vfs_buf_set_valid(struct buf *bp,
@@ -2135,13 +2159,16 @@ vfs_buf_set_valid(struct buf *bp,
}
evalid = min(evalid, off + size);
/*
- * Make sure this range is contiguous with the range
- * built up from previous pages. If not, then we will
- * just use the range from the previous pages.
+ * We can only set b_validoff/end if this range is contiguous
+ * with the range built up already. If we cannot set
+ * b_validoff/end, we must clear B_CACHE to force an update
+ * to clean the bp up.
*/
if (svalid == bp->b_validend) {
bp->b_validoff = min(bp->b_validoff, svalid);
bp->b_validend = max(bp->b_validend, evalid);
+ } else {
+ bp->b_flags &= ~B_CACHE;
}
} else if (!vm_page_is_valid(m,
(vm_offset_t) ((foff + off) & PAGE_MASK),
@@ -2154,6 +2181,10 @@ vfs_buf_set_valid(struct buf *bp,
* Set the valid bits in a page, taking care of the b_validoff,
* b_validend fields which NFS uses to optimise small reads. Off is
* the offset within the file and pageno is the page index within the buf.
+ *
+ * XXX we have to set the valid & clean bits for all page fragments
+ * touched by b_validoff/validend, even if the page fragment goes somewhat
+ * beyond b_validoff/validend due to alignment.
*/
static void
vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
@@ -2208,7 +2239,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
retry:
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
- if (vm_page_sleep(m, "vbpage", NULL))
+ if (vm_page_sleep_busy(m, FALSE, "vbpage"))
goto retry;
}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index ce842ad..781508e 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -33,7 +33,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94
- * $Id: vfs_cluster.c,v 1.76 1999/01/08 17:31:15 eivind Exp $
+ * $Id: vfs_cluster.c,v 1.77 1999/01/10 01:58:25 eivind Exp $
*/
#include "opt_debug_cluster.h"
@@ -68,6 +68,8 @@ static struct buf *
extern vm_page_t bogus_page;
+extern int cluster_pbuf_freecnt;
+
/*
* Maximum number of blocks for read-ahead.
*/
@@ -336,7 +338,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
return tbp;
- bp = trypbuf();
+ bp = trypbuf(&cluster_pbuf_freecnt);
if (bp == 0)
return tbp;
@@ -475,7 +477,7 @@ cluster_callback(bp)
tbp->b_dirtyoff = tbp->b_dirtyend = 0;
biodone(tbp);
}
- relpbuf(bp);
+ relpbuf(bp, &cluster_pbuf_freecnt);
}
/*
@@ -654,7 +656,7 @@ cluster_wbuild(vp, size, start_lbn, len)
(tbp->b_bcount != tbp->b_bufsize) ||
(tbp->b_bcount != size) ||
(len == 1) ||
- ((bp = trypbuf()) == NULL)) {
+ ((bp = trypbuf(&cluster_pbuf_freecnt)) == NULL)) {
totalwritten += tbp->b_bufsize;
bawrite(tbp);
++start_lbn;
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 179ef78..44b1698 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.181 1999/01/08 17:31:17 eivind Exp $
+ * $Id: vfs_subr.c,v 1.182 1999/01/10 01:58:26 eivind Exp $
*/
/*
@@ -63,10 +63,13 @@
#include <machine/limits.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_prot.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
#include <vm/vm_zone.h>
@@ -985,6 +988,10 @@ sched_sync(void)
/*
* Associate a p-buffer with a vnode.
+ *
+ * Also sets B_PAGING flag to indicate that vnode is not fully associated
+ * with the buffer. i.e. the bp has not been linked into the vnode or
+ * ref-counted.
*/
void
pbgetvp(vp, bp)
@@ -995,6 +1002,7 @@ pbgetvp(vp, bp)
KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
bp->b_vp = vp;
+ bp->b_flags |= B_PAGING;
if (vp->v_type == VBLK || vp->v_type == VCHR)
bp->b_dev = vp->v_rdev;
else
@@ -1011,7 +1019,34 @@ pbrelvp(bp)
KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
+#if !defined(MAX_PERF)
+ /* XXX REMOVE ME */
+ if (bp->b_vnbufs.tqe_next != NULL) {
+ panic(
+ "relpbuf(): b_vp was probably reassignbuf()d %p %x",
+ bp,
+ (int)bp->b_flags
+ );
+ }
+#endif
bp->b_vp = (struct vnode *) 0;
+ bp->b_flags &= ~B_PAGING;
+}
+
+void
+pbreassignbuf(bp, newvp)
+ struct buf *bp;
+ struct vnode *newvp;
+{
+#if !defined(MAX_PERF)
+ if ((bp->b_flags & B_PAGING) == 0) {
+ panic(
+ "pbreassignbuf() on non phys bp %p",
+ bp
+ );
+ }
+#endif
+ bp->b_vp = newvp;
}
/*
@@ -1034,6 +1069,15 @@ reassignbuf(bp, newvp)
return;
}
+#if !defined(MAX_PERF)
+ /*
+ * B_PAGING flagged buffers cannot be reassigned because their vp
+ * is not fully linked in.
+ */
+ if (bp->b_flags & B_PAGING)
+ panic("cannot reassign paging buffer");
+#endif
+
s = splbio();
/*
* Delete from old vnode list, if on one.
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 179ef78..44b1698 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.181 1999/01/08 17:31:17 eivind Exp $
+ * $Id: vfs_subr.c,v 1.182 1999/01/10 01:58:26 eivind Exp $
*/
/*
@@ -63,10 +63,13 @@
#include <machine/limits.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_prot.h>
#include <vm/vm_object.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vnode_pager.h>
#include <vm/vm_zone.h>
@@ -985,6 +988,10 @@ sched_sync(void)
/*
* Associate a p-buffer with a vnode.
+ *
+ * Also sets B_PAGING flag to indicate that vnode is not fully associated
+ * with the buffer. i.e. the bp has not been linked into the vnode or
+ * ref-counted.
*/
void
pbgetvp(vp, bp)
@@ -995,6 +1002,7 @@ pbgetvp(vp, bp)
KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
bp->b_vp = vp;
+ bp->b_flags |= B_PAGING;
if (vp->v_type == VBLK || vp->v_type == VCHR)
bp->b_dev = vp->v_rdev;
else
@@ -1011,7 +1019,34 @@ pbrelvp(bp)
KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
+#if !defined(MAX_PERF)
+ /* XXX REMOVE ME */
+ if (bp->b_vnbufs.tqe_next != NULL) {
+ panic(
+ "relpbuf(): b_vp was probably reassignbuf()d %p %x",
+ bp,
+ (int)bp->b_flags
+ );
+ }
+#endif
bp->b_vp = (struct vnode *) 0;
+ bp->b_flags &= ~B_PAGING;
+}
+
+void
+pbreassignbuf(bp, newvp)
+ struct buf *bp;
+ struct vnode *newvp;
+{
+#if !defined(MAX_PERF)
+ if ((bp->b_flags & B_PAGING) == 0) {
+ panic(
+ "pbreassignbuf() on non phys bp %p",
+ bp
+ );
+ }
+#endif
+ bp->b_vp = newvp;
}
/*
@@ -1034,6 +1069,15 @@ reassignbuf(bp, newvp)
return;
}
+#if !defined(MAX_PERF)
+ /*
+ * B_PAGING flagged buffers cannot be reassigned because their vp
+ * is not fully linked in.
+ */
+ if (bp->b_flags & B_PAGING)
+ panic("cannot reassign paging buffer");
+#endif
+
s = splbio();
/*
* Delete from old vnode list, if on one.
diff --git a/sys/miscfs/devfs/devfs_vnops.c b/sys/miscfs/devfs/devfs_vnops.c
index e9bdc2a..56fa842 100644
--- a/sys/miscfs/devfs/devfs_vnops.c
+++ b/sys/miscfs/devfs/devfs_vnops.c
@@ -23,7 +23,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: devfs_vnops.c,v 1.64 1998/12/15 23:46:59 eivind Exp $
+ * $Id: devfs_vnops.c,v 1.65 1999/01/12 11:49:29 eivind Exp $
*/
@@ -1933,7 +1933,7 @@ devfs_getpages(struct vop_getpages_args *ap)
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
- bp = getpbuf();
+ bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
@@ -2042,13 +2042,13 @@ devfs_getpages(struct vop_getpages_args *ap)
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_ERROR;
}
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_OK;
}
diff --git a/sys/miscfs/procfs/procfs_map.c b/sys/miscfs/procfs/procfs_map.c
index 4dae10a..c6b8966 100644
--- a/sys/miscfs/procfs/procfs_map.c
+++ b/sys/miscfs/procfs/procfs_map.c
@@ -36,7 +36,7 @@
*
* @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
*
- * $Id: procfs_map.c,v 1.17 1998/04/29 04:28:22 dyson Exp $
+ * $Id: procfs_map.c,v 1.18 1998/12/04 22:54:51 archie Exp $
*/
#include <sys/param.h>
@@ -93,7 +93,7 @@ procfs_domap(curp, p, pfs, uio)
((uio->uio_resid > 0) && (entry != &map->header));
entry = entry->next) {
vm_object_t obj, tobj, lobj;
- int ref_count, shadow_count, id, flags;
+ int ref_count, shadow_count, flags;
vm_offset_t addr;
int resident, privateresident;
char *type;
@@ -139,13 +139,11 @@ case OBJT_DEVICE:
flags = obj->flags;
ref_count = obj->ref_count;
shadow_count = obj->shadow_count;
- id = obj->id;
} else {
type = "none";
flags = 0;
ref_count = 0;
shadow_count = 0;
- id = 0;
}
@@ -154,9 +152,9 @@ case OBJT_DEVICE:
* start, end, resident, private resident, cow, access, type.
*/
snprintf(mebuffer, sizeof(mebuffer),
- "0x%x 0x%x %d %d %d %s%s%s %d %d 0x%x %s %s %s\n",
+ "0x%x 0x%x %d %d %p %s%s%s %d %d 0x%x %s %s %s\n",
entry->start, entry->end,
- resident, privateresident, id,
+ resident, privateresident, obj,
(entry->protection & VM_PROT_READ)?"r":"-",
(entry->protection & VM_PROT_WRITE)?"w":"-",
(entry->protection & VM_PROT_EXECUTE)?"x":"-",
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index ff0f347..6096a1b 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.77 1998/12/07 21:58:33 archie Exp $
+ * $Id: spec_vnops.c,v 1.78 1998/12/16 00:10:51 eivind Exp $
*/
#include <sys/param.h>
@@ -781,7 +781,7 @@ spec_getpages(ap)
blksiz = DEV_BSIZE;
size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
- bp = getpbuf();
+ bp = getpbuf(NULL);
kva = (vm_offset_t)bp->b_data;
/*
@@ -894,13 +894,13 @@ spec_getpages(ap)
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_ERROR;
}
/*
* Free the buffer header back to the swap buffer pool.
*/
- relpbuf(bp);
+ relpbuf(bp, NULL);
return VM_PAGER_OK;
}
diff --git a/sys/net/if_sl.c b/sys/net/if_sl.c
index 99a6978..151df6e 100644
--- a/sys/net/if_sl.c
+++ b/sys/net/if_sl.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)if_sl.c 8.6 (Berkeley) 2/1/94
- * $Id: if_sl.c,v 1.69 1998/06/07 17:12:05 dfr Exp $
+ * $Id: if_sl.c,v 1.70 1998/07/15 02:32:23 bde Exp $
*/
/*
@@ -70,7 +70,9 @@
#include "bpfilter.h"
#include "opt_inet.h"
-
+#if !defined(ACTUALLY_LKM_NOT_KERNEL) && !defined(KLD_MODULE)
+#include "opt_slip.h"
+#endif
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
@@ -217,7 +219,11 @@ slattach(dummy)
sc->sc_if.if_unit = i++;
sc->sc_if.if_mtu = SLMTU;
sc->sc_if.if_flags =
- IFF_POINTOPOINT | SC_AUTOCOMP | IFF_MULTICAST;
+#ifdef SLIP_IFF_OPTS
+ SLIP_IFF_OPTS;
+#else
+ IFF_BROADCAST | IFF_POINTOPOINT | SC_AUTOCOMP | IFF_MULTICAST;
+#endif
sc->sc_if.if_type = IFT_SLIP;
sc->sc_if.if_ioctl = slioctl;
sc->sc_if.if_output = sloutput;
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index c973700..fb437a5 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.64 1998/12/07 21:58:43 archie Exp $
+ * $Id: nfs_bio.c,v 1.65 1998/12/14 17:51:30 dt Exp $
*/
@@ -68,6 +68,7 @@ static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
static void nfs_prot_buf __P((struct buf *bp, int off, int n));
extern int nfs_numasync;
+extern int nfs_pbuf_freecnt;
extern struct nfsstats nfsstats;
/*
@@ -113,7 +114,7 @@ nfs_getpages(ap)
* We use only the kva address for the buffer, but this is extremely
* convienient and fast.
*/
- bp = getpbuf();
+ bp = getpbuf(&nfs_pbuf_freecnt);
npages = btoc(count);
kva = (vm_offset_t) bp->b_data;
@@ -132,10 +133,16 @@ nfs_getpages(ap)
error = nfs_readrpc(vp, &uio, cred);
pmap_qremove(kva, npages);
- relpbuf(bp);
+ relpbuf(bp, &nfs_pbuf_freecnt);
- if (error && (uio.uio_resid == count))
+ if (error && (uio.uio_resid == count)) {
+ printf("nfs_getpages: error %d\n", error);
+ for (i = 0; i < npages; ++i) {
+ if (i != ap->a_reqpage)
+ vnode_pager_freepage(pages[i]);
+ }
return VM_PAGER_ERROR;
+ }
size = count - uio.uio_resid;
@@ -228,7 +235,7 @@ nfs_putpages(ap)
* We use only the kva address for the buffer, but this is extremely
* convienient and fast.
*/
- bp = getpbuf();
+ bp = getpbuf(&nfs_pbuf_freecnt);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -251,7 +258,7 @@ nfs_putpages(ap)
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
pmap_qremove(kva, npages);
- relpbuf(bp);
+ relpbuf(bp, &nfs_pbuf_freecnt);
if (!error) {
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
@@ -439,6 +446,7 @@ again:
bp = nfs_getcacheblk(vp, lbn, bufsize, p);
if (!bp)
return (EINTR);
+
/*
* If we are being called from nfs_getpages, we must
* make sure the buffer is a vmio buffer. The vp will
@@ -779,6 +787,7 @@ again:
* area, just update the b_dirtyoff and b_dirtyend,
* otherwise force a write rpc of the old dirty area.
*/
+
if (bp->b_dirtyend > 0 &&
(on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
bp->b_proc = p;
@@ -1254,17 +1263,24 @@ nfs_doio(bp, cr, p)
* write rpc with iomode == NFSV3WRITE_FILESYNC before
* the block is reused. This is indicated by setting
* the B_DELWRI and B_NEEDCOMMIT flags.
+ *
+ * If the buffer is marked B_PAGING, it does not reside on
+ * the vp's paging queues so we do not ( and cannot ) reassign
+ * it. XXX numdirtybuffers should be integrated into
+ * reassignbuf() call.
*/
if (error == EINTR
|| (!error && (bp->b_flags & B_NEEDCOMMIT))) {
int s;
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- ++numdirtybuffers;
- bp->b_flags |= B_DELWRI;
- s = splbio();
- reassignbuf(bp, vp);
- splx(s);
+ if ((bp->b_flags & B_PAGING) == 0) {
+ ++numdirtybuffers;
+ bp->b_flags |= B_DELWRI;
+ s = splbio();
+ reassignbuf(bp, vp);
+ splx(s);
+ }
if ((bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
} else {
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
index b3eec24..6c9cfb7 100644
--- a/sys/nfs/nfs_common.c
+++ b/sys/nfs/nfs_common.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.69 1998/12/14 18:54:03 dt Exp $
+ * $Id: nfs_subs.c,v 1.70 1999/01/05 18:49:58 eivind Exp $
*/
/*
@@ -99,6 +99,7 @@ enum vtype nv3tov_type[8]= {
};
int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfssvc_sockhead nfssvc_sockhead;
@@ -1191,6 +1192,8 @@ nfs_init(vfsp)
sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
#endif
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
return (0);
}
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index b3eec24..6c9cfb7 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.69 1998/12/14 18:54:03 dt Exp $
+ * $Id: nfs_subs.c,v 1.70 1999/01/05 18:49:58 eivind Exp $
*/
/*
@@ -99,6 +99,7 @@ enum vtype nv3tov_type[8]= {
};
int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfssvc_sockhead nfssvc_sockhead;
@@ -1191,6 +1192,8 @@ nfs_init(vfsp)
sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
#endif
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
return (0);
}
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index c97267a..4131b60 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.115 1998/12/25 10:34:27 dfr Exp $
+ * $Id: nfs_vnops.c,v 1.116 1999/01/12 12:39:14 eivind Exp $
*/
@@ -2627,14 +2627,17 @@ nfs_strategy(ap)
if (bp->b_flags & B_PHYS)
panic("nfs physio");
+
if (bp->b_flags & B_ASYNC)
p = (struct proc *)0;
else
p = curproc; /* XXX */
+
if (bp->b_flags & B_READ)
cr = bp->b_rcred;
else
cr = bp->b_wcred;
+
/*
* If the op is asynchronous and an i/o daemon is waiting
* queue the request, wake it up and wait for completion
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index c973700..fb437a5 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.64 1998/12/07 21:58:43 archie Exp $
+ * $Id: nfs_bio.c,v 1.65 1998/12/14 17:51:30 dt Exp $
*/
@@ -68,6 +68,7 @@ static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
static void nfs_prot_buf __P((struct buf *bp, int off, int n));
extern int nfs_numasync;
+extern int nfs_pbuf_freecnt;
extern struct nfsstats nfsstats;
/*
@@ -113,7 +114,7 @@ nfs_getpages(ap)
* We use only the kva address for the buffer, but this is extremely
* convienient and fast.
*/
- bp = getpbuf();
+ bp = getpbuf(&nfs_pbuf_freecnt);
npages = btoc(count);
kva = (vm_offset_t) bp->b_data;
@@ -132,10 +133,16 @@ nfs_getpages(ap)
error = nfs_readrpc(vp, &uio, cred);
pmap_qremove(kva, npages);
- relpbuf(bp);
+ relpbuf(bp, &nfs_pbuf_freecnt);
- if (error && (uio.uio_resid == count))
+ if (error && (uio.uio_resid == count)) {
+ printf("nfs_getpages: error %d\n", error);
+ for (i = 0; i < npages; ++i) {
+ if (i != ap->a_reqpage)
+ vnode_pager_freepage(pages[i]);
+ }
return VM_PAGER_ERROR;
+ }
size = count - uio.uio_resid;
@@ -228,7 +235,7 @@ nfs_putpages(ap)
* We use only the kva address for the buffer, but this is extremely
* convienient and fast.
*/
- bp = getpbuf();
+ bp = getpbuf(&nfs_pbuf_freecnt);
kva = (vm_offset_t) bp->b_data;
pmap_qenter(kva, pages, npages);
@@ -251,7 +258,7 @@ nfs_putpages(ap)
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
pmap_qremove(kva, npages);
- relpbuf(bp);
+ relpbuf(bp, &nfs_pbuf_freecnt);
if (!error) {
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
@@ -439,6 +446,7 @@ again:
bp = nfs_getcacheblk(vp, lbn, bufsize, p);
if (!bp)
return (EINTR);
+
/*
* If we are being called from nfs_getpages, we must
* make sure the buffer is a vmio buffer. The vp will
@@ -779,6 +787,7 @@ again:
* area, just update the b_dirtyoff and b_dirtyend,
* otherwise force a write rpc of the old dirty area.
*/
+
if (bp->b_dirtyend > 0 &&
(on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
bp->b_proc = p;
@@ -1254,17 +1263,24 @@ nfs_doio(bp, cr, p)
* write rpc with iomode == NFSV3WRITE_FILESYNC before
* the block is reused. This is indicated by setting
* the B_DELWRI and B_NEEDCOMMIT flags.
+ *
+ * If the buffer is marked B_PAGING, it does not reside on
+ * the vp's paging queues so we do not ( and cannot ) reassign
+ * it. XXX numdirtybuffers should be integrated into
+ * reassignbuf() call.
*/
if (error == EINTR
|| (!error && (bp->b_flags & B_NEEDCOMMIT))) {
int s;
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
- ++numdirtybuffers;
- bp->b_flags |= B_DELWRI;
- s = splbio();
- reassignbuf(bp, vp);
- splx(s);
+ if ((bp->b_flags & B_PAGING) == 0) {
+ ++numdirtybuffers;
+ bp->b_flags |= B_DELWRI;
+ s = splbio();
+ reassignbuf(bp, vp);
+ splx(s);
+ }
if ((bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
} else {
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index b3eec24..6c9cfb7 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.69 1998/12/14 18:54:03 dt Exp $
+ * $Id: nfs_subs.c,v 1.70 1999/01/05 18:49:58 eivind Exp $
*/
/*
@@ -99,6 +99,7 @@ enum vtype nv3tov_type[8]= {
};
int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfssvc_sockhead nfssvc_sockhead;
@@ -1191,6 +1192,8 @@ nfs_init(vfsp)
sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
#endif
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
return (0);
}
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index c97267a..4131b60 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.115 1998/12/25 10:34:27 dfr Exp $
+ * $Id: nfs_vnops.c,v 1.116 1999/01/12 12:39:14 eivind Exp $
*/
@@ -2627,14 +2627,17 @@ nfs_strategy(ap)
if (bp->b_flags & B_PHYS)
panic("nfs physio");
+
if (bp->b_flags & B_ASYNC)
p = (struct proc *)0;
else
p = curproc; /* XXX */
+
if (bp->b_flags & B_READ)
cr = bp->b_rcred;
else
cr = bp->b_wcred;
+
/*
* If the op is asynchronous and an i/o daemon is waiting
* queue the request, wake it up and wait for completion
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
index b3eec24..6c9cfb7 100644
--- a/sys/nfsserver/nfs_srvsubs.c
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.69 1998/12/14 18:54:03 dt Exp $
+ * $Id: nfs_subs.c,v 1.70 1999/01/05 18:49:58 eivind Exp $
*/
/*
@@ -99,6 +99,7 @@ enum vtype nv3tov_type[8]= {
};
int nfs_ticks;
+int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfssvc_sockhead nfssvc_sockhead;
@@ -1191,6 +1192,8 @@ nfs_init(vfsp)
sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
#endif
+ nfs_pbuf_freecnt = nswbuf / 2 + 1;
+
return (0);
}
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 191fdbc..f2b0f4b 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.60 1998/10/31 14:05:11 peter Exp $
+ * $Id: buf.h,v 1.61 1998/11/13 01:01:44 dg Exp $
*/
#ifndef _SYS_BUF_H_
@@ -116,7 +116,10 @@ struct buf {
caddr_t b_savekva; /* saved kva for transfer while bouncing */
void *b_driver1; /* for private use by the driver */
void *b_driver2; /* for private use by the driver */
- void *b_spc;
+ union pager_info {
+ void *pg_spc;
+ int pg_reqpage;
+ } b_pager;
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
@@ -126,9 +129,29 @@ struct buf {
struct workhead b_dep; /* List of filesystem dependencies. */
};
+#define b_spc b_pager.pg_spc
+
/*
* These flags are kept in b_flags.
+ *
+ * Notes:
+ *
+ * B_ASYNC VOP calls on bp's are usually async whether or not
+ * B_ASYNC is set, but some subsystems, such as NFS, like
+ * to know what is best for the caller so they can
+ * optimize the I/O.
+ *
+ * B_PAGING Indicates that bp is being used by the paging system or
+ * some paging system and that the bp is not linked into
+ * the b_vp's clean/dirty linked lists or ref counts.
+ * Buffer vp reassignments are illegal in this case.
+ *
+ * B_CACHE This may only be set if the buffer is entirely valid.
+ * The situation where B_DELWRI is set and B_CACHE gets
+ * cleared MUST be committed to disk so B_DELWRI can
+ * also be cleared.
*/
+
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
@@ -312,13 +335,12 @@ int bowrite __P((struct buf *));
void brelse __P((struct buf *));
void bqrelse __P((struct buf *));
int vfs_bio_awrite __P((struct buf *));
-struct buf * getpbuf __P((void));
+struct buf * getpbuf __P((int *));
struct buf *incore __P((struct vnode *, daddr_t));
struct buf *gbincore __P((struct vnode *, daddr_t));
int inmem __P((struct vnode *, daddr_t));
struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
struct buf *geteblk __P((int));
-int allocbuf __P((struct buf *, int));
int biowait __P((struct buf *));
void biodone __P((struct buf *));
@@ -336,13 +358,15 @@ void vfs_unbusy_pages __P((struct buf *));
void vwakeup __P((struct buf *));
void vmapbuf __P((struct buf *));
void vunmapbuf __P((struct buf *));
-void relpbuf __P((struct buf *));
+void relpbuf __P((struct buf *, int *));
void brelvp __P((struct buf *));
void bgetvp __P((struct vnode *, struct buf *));
void pbgetvp __P((struct vnode *, struct buf *));
void pbrelvp __P((struct buf *));
+int allocbuf __P((struct buf *bp, int size));
void reassignbuf __P((struct buf *, struct vnode *));
-struct buf *trypbuf __P((void));
+void bpreassignbuf __P((struct buf *, struct vnode *));
+struct buf *trypbuf __P((int *));
void vfs_bio_need_satisfy __P((void));
#endif /* KERNEL */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 191fdbc..f2b0f4b 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.60 1998/10/31 14:05:11 peter Exp $
+ * $Id: buf.h,v 1.61 1998/11/13 01:01:44 dg Exp $
*/
#ifndef _SYS_BUF_H_
@@ -116,7 +116,10 @@ struct buf {
caddr_t b_savekva; /* saved kva for transfer while bouncing */
void *b_driver1; /* for private use by the driver */
void *b_driver2; /* for private use by the driver */
- void *b_spc;
+ union pager_info {
+ void *pg_spc;
+ int pg_reqpage;
+ } b_pager;
union cluster_info {
TAILQ_HEAD(cluster_list_head, buf) cluster_head;
TAILQ_ENTRY(buf) cluster_entry;
@@ -126,9 +129,29 @@ struct buf {
struct workhead b_dep; /* List of filesystem dependencies. */
};
+#define b_spc b_pager.pg_spc
+
/*
* These flags are kept in b_flags.
+ *
+ * Notes:
+ *
+ * B_ASYNC VOP calls on bp's are usually async whether or not
+ * B_ASYNC is set, but some subsystems, such as NFS, like
+ * to know what is best for the caller so they can
+ * optimize the I/O.
+ *
+ * B_PAGING Indicates that bp is being used by the paging system or
+ * some paging system and that the bp is not linked into
+ * the b_vp's clean/dirty linked lists or ref counts.
+ * Buffer vp reassignments are illegal in this case.
+ *
+ * B_CACHE This may only be set if the buffer is entirely valid.
+ * The situation where B_DELWRI is set and B_CACHE gets
+ * cleared MUST be committed to disk so B_DELWRI can
+ * also be cleared.
*/
+
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
@@ -312,13 +335,12 @@ int bowrite __P((struct buf *));
void brelse __P((struct buf *));
void bqrelse __P((struct buf *));
int vfs_bio_awrite __P((struct buf *));
-struct buf * getpbuf __P((void));
+struct buf * getpbuf __P((int *));
struct buf *incore __P((struct vnode *, daddr_t));
struct buf *gbincore __P((struct vnode *, daddr_t));
int inmem __P((struct vnode *, daddr_t));
struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
struct buf *geteblk __P((int));
-int allocbuf __P((struct buf *, int));
int biowait __P((struct buf *));
void biodone __P((struct buf *));
@@ -336,13 +358,15 @@ void vfs_unbusy_pages __P((struct buf *));
void vwakeup __P((struct buf *));
void vmapbuf __P((struct buf *));
void vunmapbuf __P((struct buf *));
-void relpbuf __P((struct buf *));
+void relpbuf __P((struct buf *, int *));
void brelvp __P((struct buf *));
void bgetvp __P((struct vnode *, struct buf *));
void pbgetvp __P((struct vnode *, struct buf *));
void pbrelvp __P((struct buf *));
+int allocbuf __P((struct buf *bp, int size));
void reassignbuf __P((struct buf *, struct vnode *));
-struct buf *trypbuf __P((void));
+void bpreassignbuf __P((struct buf *, struct vnode *));
+struct buf *trypbuf __P((int *));
void vfs_bio_need_satisfy __P((void));
#endif /* KERNEL */
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index d8e0cd8..87949b8 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)malloc.h 8.5 (Berkeley) 5/3/95
- * $Id: malloc.h,v 1.37 1998/03/08 09:58:26 julian Exp $
+ * $Id: malloc.h,v 1.38 1998/11/10 08:46:24 peter Exp $
*/
#ifndef _SYS_MALLOC_H_
@@ -42,11 +42,13 @@
#define KMEMSTATS
/*
- * flags to malloc
+ * flags to malloc.
*/
+
#define M_WAITOK 0x0000
-#define M_NOWAIT 0x0001
-#define M_KERNEL 0x0002
+#define M_NOWAIT 0x0001 /* do not block */
+#define M_USE_RESERVE 0x0002 /* can alloc out of reserve memory */
+#define M_ASLEEP 0x0004 /* async sleep on failure */
#define M_MAGIC 877983977 /* time when first defined :-) */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index badddca..fb15db3 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)param.h 8.3 (Berkeley) 4/4/95
- * $Id: param.h,v 1.37 1998/10/16 04:28:04 jkh Exp $
+ * $Id: param.h,v 1.38 1998/10/16 06:55:07 jkh Exp $
*/
#ifndef _SYS_PARAM_H_
@@ -227,4 +227,10 @@
#define FSHIFT 11 /* bits to right of fixed binary point */
#define FSCALE (1<<FSHIFT)
+#define dbtoc(db) /* calculates devblks to pages */ \
+ ((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT))
+
+#define ctodb(db) /* calculates pages to devblks */ \
+ ((db) << (PAGE_SHIFT - DEV_BSHIFT))
+
#endif /* _SYS_PARAM_H_ */
diff --git a/sys/sys/types.h b/sys/sys/types.h
index 93f8698..c65fe67 100644
--- a/sys/sys/types.h
+++ b/sys/sys/types.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)types.h 8.6 (Berkeley) 2/19/95
- * $Id: types.h,v 1.25 1998/06/07 17:13:05 dfr Exp $
+ * $Id: types.h,v 1.26 1998/12/19 00:02:34 dt Exp $
*/
#ifndef _SYS_TYPES_H_
@@ -68,6 +68,7 @@ typedef quad_t * qaddr_t;
typedef char * caddr_t; /* core address */
typedef int32_t daddr_t; /* disk address */
+typedef u_int32_t u_daddr_t; /* unsigned disk address */
typedef u_int32_t dev_t; /* device number */
typedef u_int32_t fixpt_t; /* fixed point number */
typedef u_int32_t gid_t; /* group id */
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
index ca19cc4..ae5b7af 100644
--- a/sys/ufs/mfs/mfs_extern.h
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)mfs_extern.h 8.4 (Berkeley) 3/30/95
- * $Id: mfs_extern.h,v 1.10 1997/10/16 10:50:00 phk Exp $
+ * $Id: mfs_extern.h,v 1.11 1998/02/03 21:52:02 bde Exp $
*/
#ifndef _UFS_MFS_MFS_EXTERN_H_
@@ -41,8 +41,9 @@ struct buf;
struct mount;
struct proc;
struct vnode;
+struct mfsnode;
-void mfs_doio __P((struct buf *bp, caddr_t base));
+void mfs_doio __P((struct buf *bp, struct mfsnode *mfsnode));
int mfs_mountfs __P((struct vnode *, struct mount *, struct proc *));
int mfs_mountroot __P((void));
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
index 1ea0804..73ab75a 100644
--- a/sys/ufs/mfs/mfs_vfsops.c
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)mfs_vfsops.c 8.11 (Berkeley) 6/19/95
- * $Id: mfs_vfsops.c,v 1.52 1998/12/07 21:58:49 archie Exp $
+ * $Id: mfs_vfsops.c,v 1.53 1999/01/01 04:14:11 dillon Exp $
*/
@@ -64,8 +64,10 @@ MALLOC_DEFINE(M_MFSNODE, "MFS node", "MFS vnode private part");
u_char * mfs_getimage __P((void));
+#ifdef MFS_ROOT
static caddr_t mfs_rootbase; /* address of mini-root in kernel virtual memory */
static u_long mfs_rootsize; /* size of mini-root in bytes */
+#endif
static int mfs_minor; /* used for building internal dev_t */
@@ -178,7 +180,9 @@ mfs_mount(mp, path, data, ndp, p)
struct mfs_args args;
struct ufsmount *ump;
struct fs *fs;
+#ifdef MFS_ROOT
u_char *base;
+#endif
struct mfsnode *mfsp;
u_int size;
int flags, err;
@@ -344,7 +348,9 @@ mfs_mount(mp, path, data, ndp, p)
goto error_2;
}
+#ifdef MFS_ROOT
dostatfs:
+#endif
/*
* Initialize FS stat information in mount struct; uses both
* mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
@@ -387,11 +393,8 @@ mfs_start(mp, flags, p)
register struct vnode *vp = VFSTOUFS(mp)->um_devvp;
register struct mfsnode *mfsp = VTOMFS(vp);
register struct buf *bp;
- register caddr_t base;
register int gotsig = 0;
- base = mfsp->mfs_baseoff;
-
/*
* Must set P_SYSTEM to prevent system from trying to kill
* this process. What happens is that the process is unkillable,
@@ -402,11 +405,20 @@ mfs_start(mp, flags, p)
curproc->p_flag |= P_SYSTEM;
while (mfsp->mfs_active) {
+ int s;
+
+ s = splbio();
+
while (bp = bufq_first(&mfsp->buf_queue)) {
bufq_remove(&mfsp->buf_queue, bp);
- mfs_doio(bp, base);
+ splx(s);
+ mfs_doio(bp, mfsp);
wakeup((caddr_t)bp);
+ s = splbio();
}
+
+ splx(s);
+
/*
* If a non-ignored signal is received, try to unmount.
* If that fails, clear the signal (it has been "processed"),
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
index 88cfec6..083843c 100644
--- a/sys/ufs/mfs/mfs_vnops.c
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)mfs_vnops.c 8.11 (Berkeley) 5/22/95
- * $Id: mfs_vnops.c,v 1.37 1998/07/11 07:46:05 bde Exp $
+ * $Id: mfs_vnops.c,v 1.38 1998/09/07 06:52:01 phk Exp $
*/
#include <sys/param.h>
@@ -41,6 +41,8 @@
#include <sys/buf.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
+#include <sys/sysproto.h>
+#include <sys/mman.h>
#include <miscfs/specfs/specdev.h>
@@ -51,6 +53,7 @@ static int mfs_badop __P((struct vop_generic_args *));
static int mfs_bmap __P((struct vop_bmap_args *));
static int mfs_close __P((struct vop_close_args *));
static int mfs_fsync __P((struct vop_fsync_args *));
+static int mfs_freeblks __P((struct vop_freeblks_args *));
static int mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
static int mfs_open __P((struct vop_open_args *));
static int mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
@@ -66,7 +69,7 @@ static struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
{ &vop_bmap_desc, (vop_t *) mfs_bmap },
{ &vop_bwrite_desc, (vop_t *) vop_defaultop },
{ &vop_close_desc, (vop_t *) mfs_close },
- { &vop_freeblks_desc, (vop_t *) vop_defaultop },
+ { &vop_freeblks_desc, (vop_t *) mfs_freeblks },
{ &vop_fsync_desc, (vop_t *) mfs_fsync },
{ &vop_getpages_desc, (vop_t *) mfs_getpages },
{ &vop_inactive_desc, (vop_t *) mfs_inactive },
@@ -119,6 +122,38 @@ mfs_fsync(ap)
}
/*
+ * mfs_freeblks() - hook to allow us to free physical memory.
+ *
+ * We implement the B_FREEBUF strategy. We can't just madvise()
+ * here because we have to do it in the correct order vs other bio
+ * requests, so we queue it.
+ */
+
+static int
+mfs_freeblks(ap)
+ struct vop_freeblks_args /* {
+ struct vnode *a_vp;
+ daddr_t a_addr;
+ daddr_t a_length;
+ } */ *ap;
+{
+ struct buf *bp;
+ struct vnode *vp;
+
+ if (!vfinddev(ap->a_vp->v_rdev, VBLK, &vp) || vp->v_usecount == 0)
+ panic("mfs_strategy: bad dev");
+
+ bp = geteblk(ap->a_length);
+ bp->b_flags |= B_FREEBUF | B_BUSY;
+ bp->b_dev = ap->a_vp->v_rdev;
+ bp->b_blkno = ap->a_addr;
+ bp->b_offset = dbtob(ap->a_addr);
+ bp->b_bcount = ap->a_length;
+ VOP_STRATEGY(vp, bp);
+ return(0);
+}
+
+/*
* Pass I/O requests to the memory filesystem process.
*/
static int
@@ -132,26 +167,50 @@ mfs_strategy(ap)
register struct mfsnode *mfsp;
struct vnode *vp;
struct proc *p = curproc; /* XXX */
+ int s;
if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
panic("mfs_strategy: bad dev");
mfsp = VTOMFS(vp);
- /* check for mini-root access */
+
+ /*
+ * splbio required for queueing/dequeueing, in case of forwarded
+ * BPs from bio interrupts (??). It may not be necessary.
+ */
+
+ s = splbio();
+
if (mfsp->mfs_pid == 0) {
+ /*
+ * mini-root. Note: B_FREEBUF not supported at the moment,
+ * I'm not sure what kind of dataspace b_data is in.
+ */
caddr_t base;
base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+ if (bp->b_flags & B_FREEBUF)
+ ;
if (bp->b_flags & B_READ)
bcopy(base, bp->b_data, bp->b_bcount);
else
bcopy(bp->b_data, base, bp->b_bcount);
biodone(bp);
} else if (mfsp->mfs_pid == p->p_pid) {
- mfs_doio(bp, mfsp->mfs_baseoff);
+ /*
+ * VOP to self
+ */
+ splx(s);
+ mfs_doio(bp, mfsp);
+ s = splbio();
} else {
+ /*
+ * VOP from some other process, queue to MFS process and
+ * wake it up.
+ */
bufq_insert_tail(&mfsp->buf_queue, bp);
wakeup((caddr_t)vp);
}
+ splx(s);
return (0);
}
@@ -159,18 +218,59 @@ mfs_strategy(ap)
* Memory file system I/O.
*
* Trivial on the HP since buffer has already been mapping into KVA space.
+ *
+ * Read and Write are handled with a simple copyin and copyout.
+ *
+ * We also partially support VOP_FREEBLKS() via B_FREEBUF. We can't implement
+ * completely -- for example, on fragments or inode metadata, but we can
+ * implement it for page-aligned requests.
*/
void
-mfs_doio(bp, base)
+mfs_doio(bp, mfsp)
register struct buf *bp;
- caddr_t base;
+ struct mfsnode *mfsp;
{
+ caddr_t base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
+
+ if (bp->b_flags & B_FREEBUF) {
+ /*
+ * Implement B_FREEBUF, which allows the filesystem to tell
+ * a block device when blocks are no longer needed (like when
+ * a file is deleted). We use the hook to MADV_FREE the VM.
+ * This makes an MFS filesystem work as well or better then
+ * a sun-style swap-mounted filesystem.
+ */
+ int bytes = bp->b_bcount;
+
+ if ((vm_offset_t)base & PAGE_MASK) {
+ int n = PAGE_SIZE - ((vm_offset_t)base & PAGE_MASK);
+ bytes -= n;
+ base += n;
+ }
+ if (bytes > 0) {
+ struct madvise_args uap;
- base += (bp->b_blkno << DEV_BSHIFT);
- if (bp->b_flags & B_READ)
+ bytes &= ~PAGE_MASK;
+ if (bytes != 0) {
+ bzero(&uap, sizeof(uap));
+ uap.addr = base;
+ uap.len = bytes;
+ uap.behav = MADV_FREE;
+ madvise(curproc, &uap);
+ }
+ }
+ bp->b_error = 0;
+ } else if (bp->b_flags & B_READ) {
+ /*
+ * Read data from our 'memory' disk
+ */
bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
- else
+ } else {
+ /*
+ * Write data to our 'memory' disk
+ */
bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
+ }
if (bp->b_error)
bp->b_flags |= B_ERROR;
biodone(bp);
@@ -222,7 +322,7 @@ mfs_close(ap)
*/
while (bp = bufq_first(&mfsp->buf_queue)) {
bufq_remove(&mfsp->buf_queue, bp);
- mfs_doio(bp, mfsp->mfs_baseoff);
+ mfs_doio(bp, mfsp);
wakeup((caddr_t)bp);
}
/*
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 026d3486..fd3555a 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
- * $Id: ufs_readwrite.c,v 1.54 1998/12/15 03:29:52 julian Exp $
+ * $Id: ufs_readwrite.c,v 1.55 1999/01/07 16:14:19 bde Exp $
*/
#define BLKSIZE(a, b, c) blksize(a, b, c)
@@ -392,7 +392,10 @@ WRITE(ap)
panic("%s: nonsync dir write", WRITE_S);
break;
default:
- panic("%s: type", WRITE_S);
+ panic("%s: type %p %d (%d,%d)", WRITE_S, vp, (int)vp->v_type,
+ (int)uio->uio_offset,
+ (int)uio->uio_resid
+ );
}
fs = ip->I_FS;
@@ -598,9 +601,8 @@ ffs_getpages(ap)
vm_page_busy(m);
vm_page_free(m);
} else if (m == mreq) {
- while (m->flags & PG_BUSY) {
- vm_page_sleep(m, "ffspwt", NULL);
- }
+ while (vm_page_sleep_busy(m, FALSE, "ffspwt"))
+ ;
vm_page_busy(m);
vp->v_lastr = m->pindex + 1;
} else {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 1010085..49e1a29 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
- * $Id: ufs_vnops.c,v 1.103 1998/12/24 09:45:10 bde Exp $
+ * $Id: ufs_vnops.c,v 1.104 1999/01/07 16:14:19 bde Exp $
*/
#include "opt_quota.h"
@@ -1731,6 +1731,9 @@ ufs_abortop(ap)
/*
* Calculate the logical to physical mapping if not done already,
* then call the device strategy routine.
+ *
+ * In order to be able to swap to a file, the VOP_BMAP operation may not
+ * deadlock on memory. See ufs_bmap() for details.
*/
int
ufs_strategy(ap)
diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c
index ba92894..16b7512 100644
--- a/sys/vm/default_pager.c
+++ b/sys/vm/default_pager.c
@@ -28,7 +28,15 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: default_pager.c,v 1.15 1998/02/06 12:14:20 eivind Exp $
+ * The default pager is responsible for supplying backing store to unbacked
+ * storage. The backing store is usually swap so we just fall through to
+ * the swap routines. However, since swap metadata has not been assigned,
+ * the swap routines assign and manage the swap backing store through the
+ * vm_page->swapblk field. The object is only converted when the page is
+ * physically freed after having been cleaned and even then vm_page->swapblk
+ * is maintained whenever a resident page also has swap backing store.
+ *
+ * $Id: default_pager.c,v 1.16 1998/10/13 08:24:42 dg Exp $
*/
#include <sys/param.h>
@@ -78,6 +86,14 @@ default_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
return vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(round_page(offset + size)));
}
+/*
+ * deallocate resources associated with default objects. The default objects
+ * have no special resources allocated to them, but the vm_page's being used
+ * in this object might. Still, we do not have to do anything - we will free
+ * the swapblk in the underlying vm_page's when we free the vm_page or
+ * garbage collect the vm_page cache list.
+ */
+
static void
default_pager_dealloc(object)
vm_object_t object;
@@ -88,9 +104,11 @@ default_pager_dealloc(object)
}
/*
- * The default pager has no backing store, so we always return
- * failure.
+ * Load pages from backing store. Since OBJT_DEFAULT is converted to
+ * OBJT_SWAP at the time a swap-backed vm_page_t is freed, we will never
+ * see a vm_page with assigned swap here.
*/
+
static int
default_pager_getpages(object, m, count, reqpage)
vm_object_t object;
@@ -101,6 +119,13 @@ default_pager_getpages(object, m, count, reqpage)
return VM_PAGER_FAIL;
}
+/*
+ * Store pages to backing store. We should assign swap and initiate
+ * I/O. We do not actually convert the object to OBJT_SWAP here. The
+ * object will be converted when the written-out vm_page_t is moved from the
+ * cache to the free list.
+ */
+
static int
default_pager_putpages(object, m, c, sync, rtvals)
vm_object_t object;
@@ -109,26 +134,22 @@ default_pager_putpages(object, m, c, sync, rtvals)
boolean_t sync;
int *rtvals;
{
- int i;
-
- /*
- * Try to convert the object type into a OBJT_SWAP.
- * If the swp structure allocation fails, convert it
- * back to OBJT_DEFAULT and return failure. Otherwise
- * pass this putpages to the swap pager.
- */
- object->type = OBJT_SWAP;
-
- if (swap_pager_swp_alloc(object, M_KERNEL) != 0) {
- object->type = OBJT_DEFAULT;
- for (i = 0; i < c; i++)
- rtvals[i] = VM_PAGER_FAIL;
- return VM_PAGER_FAIL;
- }
-
return swap_pager_putpages(object, m, c, sync, rtvals);
}
+/*
+ * Tell us whether the backing store for the requested (object,index) is
+ * synchronized. i.e. tell us whether we can throw the page away and
+ * reload it later. So, for example, if we are in the process of writing
+ * the page to its backing store, or if no backing store has been assigned,
+ * it is not yet synchronized.
+ *
+ * It is possible to have fully-synchronized swap assigned without the
+ * object having been converted. We just call swap_pager_haspage() to
+ * deal with it since it must already deal with it plus deal with swap
+ * meta-data structures.
+ */
+
static boolean_t
default_pager_haspage(object, pindex, before, after)
vm_object_t object;
@@ -139,24 +160,3 @@ default_pager_haspage(object, pindex, before, after)
return FALSE;
}
-void
-default_pager_convert_to_swap(object)
- vm_object_t object;
-{
- object->type = OBJT_SWAP;
- if (swap_pager_swp_alloc(object, M_KERNEL) != 0) {
- object->type = OBJT_DEFAULT;
- }
-}
-
-void
-default_pager_convert_to_swapq(object)
- vm_object_t object;
-{
- if (object &&
- (object->type == OBJT_DEFAULT) &&
- (object != kernel_object && object != kmem_object) &&
- (object->size > ((cnt.v_page_count - cnt.v_wire_count) / 4)))
- default_pager_convert_to_swap(object);
-}
-
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
index a200b9c..cc742b0 100644
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)device_pager.c 8.1 (Berkeley) 6/11/93
- * $Id: device_pager.c,v 1.36 1998/12/07 21:58:50 archie Exp $
+ * $Id: device_pager.c,v 1.37 1999/01/08 17:31:23 eivind Exp $
*/
#include <sys/param.h>
@@ -200,7 +200,7 @@ dev_pager_getpages(object, m, count, reqpage)
int prot;
dev = (dev_t) (uintptr_t) object->handle;
- offset = m[reqpage]->pindex + OFF_TO_IDX(object->paging_offset);
+ offset = m[reqpage]->pindex;
prot = PROT_READ; /* XXX should pass in? */
mapfunc = cdevsw[major(dev)]->d_mmap;
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 1691168..b063520 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 1998 Matthew Dillon,
* Copyright (c) 1994 John S. Dyson
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
@@ -36,17 +37,34 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
+ * New Swap System
+ * Matthew Dillon
+ *
+ * Radix Bitmap 'blists'.
+ *
+ * - The new swapper uses the new radix bitmap code. This should scale
+ * to arbitrarily small or arbitrarily large swap spaces and an almost
+ * arbitrary degree of fragmentation.
+ *
+ * Features:
+ *
+ * - on the fly reallocation of swap during putpages. The new system
+ * does not try to keep previously allocated swap blocks for dirty
+ * pages.
+ *
+ * - on the fly deallocation of swap
+ *
+ * - No more garbage collection required. Unnecessarily allocated swap
+ * blocks only exist for dirty vm_page_t's now and these are already
+ * cycled (in a high-load system) by the pager. We also do on-the-fly
+ * removal of invalidated swap blocks when a page is destroyed
+ * or renamed.
+ *
* from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
*
* @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
- * $Id: swap_pager.c,v 1.106 1999/01/08 17:31:23 eivind Exp $
- */
-
-/*
- * Quick hack to page to dedicated partition(s).
- * TODO:
- * Add multiprocessor locks
- * Deal with async writes in a better fashion
+ *
+ * $Id: swap_pager.c,v 1.107 1999/01/10 01:58:28 eivind Exp $
*/
#include <sys/param.h>
@@ -57,18 +75,16 @@
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/vmmeter.h>
-#include <sys/rlist.h>
+#include <sys/blist.h>
+#include <sys/lock.h>
#ifndef MAX_PAGEOUT_CLUSTER
#define MAX_PAGEOUT_CLUSTER 16
#endif
-#ifndef NPENDINGIO
-#define NPENDINGIO 16
-#endif
-
-#define SWB_NPAGES MAX_PAGEOUT_CLUSTER
+#define SWB_NPAGES MAX_PAGEOUT_CLUSTER
+#include "opt_swap.h"
#include <vm/vm.h>
#include <vm/vm_prot.h>
#include <vm/vm_object.h>
@@ -77,848 +93,651 @@
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
-static int nswiodone;
-int swap_pager_full;
-extern int vm_swap_size;
-static int no_swap_space = 1;
-static int max_pageout_cluster;
-struct rlisthdr swaplist;
-
-TAILQ_HEAD(swpclean, swpagerclean);
-
-typedef struct swpagerclean *swp_clean_t;
+#define SWM_FREE 0x02 /* free, period */
+#define SWM_POP 0x04 /* pop out */
-static struct swpagerclean {
- TAILQ_ENTRY(swpagerclean) spc_list;
- int spc_flags;
- struct buf *spc_bp;
- vm_object_t spc_object;
- vm_offset_t spc_kva;
- int spc_first;
- int spc_count;
- vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
-} swcleanlist[NPENDINGIO];
-
-
-/* spc_flags values */
-#define SPC_ERROR 0x01
+/*
+ * vm_swap_size is in page-sized chunks now. It was DEV_BSIZE'd chunks
+ * in the old system.
+ */
-#define SWB_EMPTY (-1)
+extern int vm_swap_size; /* number of free swap blocks, in pages */
-/* list of completed page cleans */
-static struct swpclean swap_pager_done;
+int swap_pager_full; /* swap space exhaustion (w/ hysteresis)*/
+static int nsw_rcount; /* free read buffers */
+static int nsw_wcount; /* free write buffers */
+static int nsw_hysteresis; /* hysteresis */
+static int max_pageout_cluster; /* maximum VOP I/O allowed */
+static int sw_alloc_interlock; /* swap pager allocation interlock */
-/* list of pending page cleans */
-static struct swpclean swap_pager_inuse;
+struct blist *swapblist;
+static struct swblock **swhash;
+static int swhash_mask;
-/* list of free pager clean structs */
-static struct swpclean swap_pager_free;
-static int swap_pager_free_count;
-static int swap_pager_free_pending;
-/* list of "named" anon region objects */
-static struct pagerlst swap_pager_object_list;
+/*
+ * "named" and "unnamed" anon region objects. Try to reduce the overhead
+ * of searching a named list by hashing it just a little.
+ */
-/* list of "unnamed" anon region objects */
-struct pagerlst swap_pager_un_object_list;
+#define NOBJLISTS 8
-#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
-#define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
-static int swap_pager_needflags;
+#define NOBJLIST(handle) \
+ (&swap_pager_object_list[((int)(long)handle >> 4) & (NOBJLISTS-1)])
-static struct pagerlst *swp_qs[] = {
- &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
-};
+static struct pagerlst swap_pager_object_list[NOBJLISTS];
+struct pagerlst swap_pager_un_object_list;
+vm_zone_t swap_zone;
/*
- * pagerops for OBJT_SWAP - "swap pager".
+ * pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure
+ * calls hooked from other parts of the VM system and do not appear here.
+ * (see vm/swap_pager.h).
*/
+
static vm_object_t
swap_pager_alloc __P((void *handle, vm_ooffset_t size,
vm_prot_t prot, vm_ooffset_t offset));
static void swap_pager_dealloc __P((vm_object_t object));
-static boolean_t
- swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
- int *before, int *after));
static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
static void swap_pager_init __P((void));
-static void spc_free __P((swp_clean_t));
+static void swap_pager_unswapped __P((vm_page_t));
struct pagerops swappagerops = {
- swap_pager_init,
- swap_pager_alloc,
- swap_pager_dealloc,
- swap_pager_getpages,
- swap_pager_putpages,
- swap_pager_haspage,
- swap_pager_sync
+ swap_pager_init, /* early system initialization of pager */
+ swap_pager_alloc, /* allocate an OBJT_SWAP object */
+ swap_pager_dealloc, /* deallocate an OBJT_SWAP object */
+ swap_pager_getpages, /* pagein */
+ swap_pager_putpages, /* pageout */
+ swap_pager_haspage, /* get backing store status for page */
+ swap_pager_unswapped /* remove swap related to page */
};
-static int npendingio;
-static int dmmin;
+/*
+ * dmmax is in page-sized chunks with the new swap system. It was
+ * dev-bsized chunks in the old.
+ *
+ * swap_*() routines are externally accessible. swp_*() routines are
+ * internal.
+ */
+
int dmmax;
+static int dmmax_mask;
+int nswap_lowat = 128; /* in pages, swap_pager_full warning */
+int nswap_hiwat = 256; /* in pages, swap_pager_full warning */
+
+static __inline void swp_sizecheck __P((void));
+static void swp_pager_sync_iodone __P((struct buf *bp));
+static void swp_pager_async_iodone __P((struct buf *bp));
+
+/*
+ * Swap bitmap functions
+ */
+
+static __inline void swp_pager_freeswapspace __P((daddr_t blk, int npages));
+static __inline daddr_t swp_pager_getswapspace __P((int npages));
+
+/*
+ * Metadata functions
+ */
+
+static void swp_pager_meta_build __P((vm_object_t, daddr_t, daddr_t, int));
+static void swp_pager_meta_free __P((vm_object_t, daddr_t, daddr_t));
+static void swp_pager_meta_free_all __P((vm_object_t));
+static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
-static int swap_pager_block_index __P((vm_pindex_t pindex));
-static int swap_pager_block_offset __P((vm_pindex_t pindex));
-static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
- vm_pindex_t pindex, int *valid));
-static void swap_pager_finish __P((swp_clean_t spc));
-static void swap_pager_free_swap __P((vm_object_t object));
-static void swap_pager_freeswapspace __P((vm_object_t object,
- unsigned int from,
- unsigned int to));
-static int swap_pager_getswapspace __P((vm_object_t object,
- unsigned int amount,
- daddr_t *rtval));
-static void swap_pager_iodone __P((struct buf *));
-static void swap_pager_iodone1 __P((struct buf *bp));
-static void swap_pager_reclaim __P((void));
-static void swap_pager_ridpages __P((vm_page_t *m, int count,
- int reqpage));
-static void swap_pager_setvalid __P((vm_object_t object,
- vm_offset_t offset, int valid));
-static __inline void swapsizecheck __P((void));
-
-#define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
+/*
+ * SWP_SIZECHECK() - update swap_pager_full indication
+ *
+ * update the swap_pager_full indication and warn when we are
+ * about to run out of swap space.
+ *
+ * No restrictions on call
+ * This routine may not block.
+ * This routine must be called at splvm()
+ */
static __inline void
-swapsizecheck()
+swp_sizecheck()
{
- if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
+ if (vm_swap_size < nswap_lowat) {
if (swap_pager_full == 0)
printf("swap_pager: out of swap space\n");
swap_pager_full = 1;
- } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
+ } else if (vm_swap_size > nswap_hiwat) {
swap_pager_full = 0;
+ }
}
+/*
+ * SWAP_PAGER_INIT() - initialize the swap pager!
+ *
+ * Expected to be started from system init. NOTE: This code is run
+ * before much else so be careful what you depend on. Most of the VM
+ * system has yet to be initialized at this point.
+ */
+
static void
swap_pager_init()
{
- int maxsafepending;
- TAILQ_INIT(&swap_pager_object_list);
- TAILQ_INIT(&swap_pager_un_object_list);
-
/*
- * Initialize clean lists
+ * Initialize object lists
*/
- TAILQ_INIT(&swap_pager_inuse);
- TAILQ_INIT(&swap_pager_done);
- TAILQ_INIT(&swap_pager_free);
- swap_pager_free_count = 0;
+ int i;
+
+ for (i = 0; i < NOBJLISTS; ++i)
+ TAILQ_INIT(&swap_pager_object_list[i]);
+ TAILQ_INIT(&swap_pager_un_object_list);
/*
- * Calculate the swap allocation constants.
+ * Device Stripe, in PAGE_SIZE'd blocks
*/
- dmmin = PAGE_SIZE / DEV_BSIZE;
- dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
-
- maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
- npendingio = NPENDINGIO;
- max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
-
- if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
- max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
- npendingio = maxsafepending / (2 * max_pageout_cluster);
- if (npendingio < 2)
- npendingio = 2;
- }
+
+ dmmax = SWB_NPAGES * 2;
+ dmmax_mask = ~(dmmax - 1);
}
+/*
+ * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process
+ *
+ * Expected to be started from pageout process once, prior to entering
+ * its main loop.
+ */
+
void
swap_pager_swap_init()
{
- swp_clean_t spc;
- struct buf *bp;
- int i;
+ int n;
/*
- * kva's are allocated here so that we dont need to keep doing
- * kmem_alloc pageables at runtime
+ * Number of in-transit swap bp operations. Don't
+ * exhaust the pbufs completely. Make sure we
+ * initialize workable values (0 will work for hysteresis
+ * but it isn't very efficient).
+ *
+ * The max_pageout_cluster is constrained by the bp->b_pages[]
+ * array (MAXPHYS/PAGE_SIZE) and our locally defined
+ * MAX_PAGEOUT_CLUSTER. Also be aware that swap ops are
+ * constrained by the swap device interleave stripe size.
*/
- for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
- spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
- if (!spc->spc_kva) {
- break;
- }
- spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
- if (!spc->spc_bp) {
- kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
- break;
- }
- spc->spc_flags = 0;
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
- swap_pager_free_count++;
- }
-}
-int
-swap_pager_swp_alloc(object, wait)
- vm_object_t object;
- int wait;
-{
- sw_blk_t swb;
- int nblocks;
- int i, j;
-
- nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
- swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
- if (swb == NULL)
- return 1;
-
- for (i = 0; i < nblocks; i++) {
- swb[i].swb_valid = 0;
- swb[i].swb_locked = 0;
- for (j = 0; j < SWB_NPAGES; j++)
- swb[i].swb_block[j] = SWB_EMPTY;
- }
+ nsw_rcount = (nswbuf + 1) / 2;
+ nsw_wcount = (nswbuf + 3) / 4;
+ nsw_hysteresis = nsw_wcount / 2;
+ max_pageout_cluster = min((MAXPHYS/PAGE_SIZE), MAX_PAGEOUT_CLUSTER);
- object->un_pager.swp.swp_nblocks = nblocks;
- object->un_pager.swp.swp_allocsize = 0;
- object->un_pager.swp.swp_blocks = swb;
- object->un_pager.swp.swp_poip = 0;
+ /*
+ * Initialize our zone. Right now I'm just guessing on the number
+ * we need based on the number of pages in the system. Each swblock
+ * can hold 16 pages, so this is probably overkill.
+ */
- if (object->handle != NULL) {
- TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
- } else {
- TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
- }
+ n = cnt.v_page_count * 2;
- return 0;
+ swap_zone = zinit(
+ "SWAPMETA",
+ sizeof(struct swblock),
+ n,
+ ZONE_INTERRUPT,
+ 1
+ );
+
+ /*
+ * Initialize our meta-data hash table. The swapper does not need to
+ * be quite as efficient as the VM system, so we do not use an
+ * oversized hash table.
+ *
+ * n: size of hash table, must be power of 2
+ * swhash_mask: hash table index mask
+ */
+
+ for (n = 1; n < cnt.v_page_count / 4; n <<= 1)
+ ;
+
+ swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK);
+ bzero(swhash, sizeof(struct swblock *) * n);
+
+ swhash_mask = n - 1;
}
/*
- * Allocate an object and associated resources.
- * Note that if we are called from the pageout daemon (handle == NULL)
- * we should not wait for memory as it could resulting in deadlock.
+ * SWAP_PAGER_ALLOC() - allocate a new OBJT_SWAP VM object and instantiate
+ * its metadata structures.
+ *
+ * This routine is called from the mmap and fork code to create a new
+ * OBJT_SWAP object. We do this by creating an OBJT_DEFAULT object
+ * and then converting it with swp_pager_meta_build().
+ *
+ * This routine may block in vm_object_allocate() and create a named
+ * object lookup race, so we must interlock. We must also run at
+ * splvm() for the object lookup to handle races with interrupts, but
+ * we do not have to maintain splvm() in between the lookup and the
+ * add because (I believe) it is not possible to attempt to create
+ * a new swap object w/handle when a default object with that handle
+ * already exists.
*/
+
static vm_object_t
swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t offset)
{
vm_object_t object;
- /*
- * If this is a "named" anonymous region, look it up and use the
- * object if it exists, otherwise allocate a new one.
- */
if (handle) {
- object = vm_pager_object_lookup(&swap_pager_object_list, handle);
+ /*
+ * Reference existing named region or allocate new one. There
+ * should not be a race here against swp_pager_meta_build()
+ * as called from vm_page_remove() in regards to the lookup
+ * of the handle.
+ */
+
+ while (sw_alloc_interlock) {
+ sw_alloc_interlock = -1;
+ tsleep(&sw_alloc_interlock, PVM, "swpalc", 0);
+ }
+ sw_alloc_interlock = 1;
+
+ object = vm_pager_object_lookup(NOBJLIST(handle), handle);
+
if (object != NULL) {
vm_object_reference(object);
} else {
- /*
- * XXX - there is a race condition here. Two processes
- * can request the same named object simultaneuously,
- * and if one blocks for memory, the result is a disaster.
- * Probably quite rare, but is yet another reason to just
- * rip support of "named anonymous regions" out altogether.
- */
- object = vm_object_allocate(OBJT_SWAP,
+ object = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(offset + PAGE_MASK + size));
object->handle = handle;
- (void) swap_pager_swp_alloc(object, M_WAITOK);
+
+ swp_pager_meta_build(
+ object,
+ 0,
+ SWAPBLK_NONE,
+ 0
+ );
}
+
+ if (sw_alloc_interlock < 0)
+ wakeup(&sw_alloc_interlock);
+
+ sw_alloc_interlock = 0;
} else {
- object = vm_object_allocate(OBJT_SWAP,
+ object = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(offset + PAGE_MASK + size));
- (void) swap_pager_swp_alloc(object, M_WAITOK);
+
+ swp_pager_meta_build(
+ object,
+ 0,
+ SWAPBLK_NONE,
+ 0
+ );
}
return (object);
}
/*
- * returns disk block associated with pager and offset
- * additionally, as a side effect returns a flag indicating
- * if the block has been written
+ * SWAP_PAGER_DEALLOC() - remove swap metadata from object
+ *
+ * The swap backing for the object is destroyed. The code is
+ * designed such that we can reinstantiate it later, but this
+ * routine is typically called only when the entire object is
+ * about to be destroyed.
+ *
+ * This routine may block, but no longer does.
+ *
+ * The object must be locked or unreferenceable.
*/
-static __inline daddr_t *
-swap_pager_diskaddr(object, pindex, valid)
+static void
+swap_pager_dealloc(object)
vm_object_t object;
- vm_pindex_t pindex;
- int *valid;
{
- register sw_blk_t swb;
- int ix;
-
- if (valid)
- *valid = 0;
- ix = pindex / SWB_NPAGES;
- if ((ix >= object->un_pager.swp.swp_nblocks) ||
- (pindex >= object->size)) {
- return (FALSE);
+ /*
+ * Remove from list right away so lookups will fail if we block for
+ * pageout completion.
+ */
+
+ if (object->handle == NULL) {
+ TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
+ } else {
+ TAILQ_REMOVE(NOBJLIST(object->handle), object, pager_object_list);
}
- swb = &object->un_pager.swp.swp_blocks[ix];
- ix = pindex % SWB_NPAGES;
- if (valid)
- *valid = swb->swb_valid & (1 << ix);
- return &swb->swb_block[ix];
-}
-/*
- * Utility routine to set the valid (written) bit for
- * a block associated with a pager and offset
- */
-static void
-swap_pager_setvalid(object, offset, valid)
- vm_object_t object;
- vm_offset_t offset;
- int valid;
-{
- register sw_blk_t swb;
- int ix;
+ vm_object_pip_wait(object, "swpdea");
- ix = offset / SWB_NPAGES;
- if (ix >= object->un_pager.swp.swp_nblocks)
- return;
+ /*
+ * Free all remaining metadata. We only bother to free it from
+ * the swap meta data. We do not attempt to free swapblk's still
+ * associated with vm_page_t's for this object. We do not care
+ * if paging is still in progress on some objects.
+ */
- swb = &object->un_pager.swp.swp_blocks[ix];
- ix = offset % SWB_NPAGES;
- if (valid)
- swb->swb_valid |= (1 << ix);
- else
- swb->swb_valid &= ~(1 << ix);
- return;
+ swp_pager_meta_free_all(object);
}
+/************************************************************************
+ * SWAP PAGER BITMAP ROUTINES *
+ ************************************************************************/
+
/*
- * this routine allocates swap space with a fragmentation
- * minimization policy.
+ * SWP_PAGER_GETSWAPSPACE() - allocate raw swap space
+ *
+ * Allocate swap for the requested number of pages. The starting
+ * swap block number (a page index) is returned or SWAPBLK_NONE
+ * if the allocation failed.
+ *
+ * Also has the side effect of advising that somebody made a mistake
+ * when they configured swap and didn't configure enough.
+ *
+ * Must be called at splvm() to avoid races with bitmap frees from
+ * vm_page_remove() aka swap_pager_page_removed().
+ *
+ * This routine may not block
+ * This routine must be called at splvm().
*/
-static int
-swap_pager_getswapspace(object, amount, rtval)
- vm_object_t object;
- unsigned int amount;
- daddr_t *rtval;
+
+static __inline daddr_t
+swp_pager_getswapspace(npages)
+ int npages;
{
- unsigned location;
+ daddr_t blk;
- vm_swap_size -= amount;
-
- if (!rlist_alloc(&swaplist, amount, &location)) {
- vm_swap_size += amount;
- return 0;
+ if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
+ printf("swap_pager_getswapspace: failed\n");
} else {
- swapsizecheck();
- object->un_pager.swp.swp_allocsize += amount;
- *rtval = location;
- return 1;
+ vm_swap_size -= npages;
+ swp_sizecheck();
}
+ return(blk);
}
/*
- * this routine frees swap space with a fragmentation
- * minimization policy.
+ * SWP_PAGER_FREESWAPSPACE() - free raw swap space
+ *
+ * This routine returns the specified swap blocks back to the bitmap.
+ *
+ * Note: This routine may not block (it could in the old swap code),
+ * and through the use of the new blist routines it does not block.
+ *
+ * We must be called at splvm() to avoid races with bitmap frees from
+ * vm_page_remove() aka swap_pager_page_removed().
+ *
+ * This routine may not block
+ * This routine must be called at splvm().
*/
-static void
-swap_pager_freeswapspace(object, from, to)
- vm_object_t object;
- unsigned int from;
- unsigned int to;
+
+static __inline void
+swp_pager_freeswapspace(blk, npages)
+ daddr_t blk;
+ int npages;
{
- rlist_free(&swaplist, from, to);
- vm_swap_size += (to - from) + 1;
- object->un_pager.swp.swp_allocsize -= (to - from) + 1;
- swapsizecheck();
+ blist_free(swapblist, blk, npages);
+ vm_swap_size += npages;
+ swp_sizecheck();
}
+
/*
- * this routine frees swap blocks from a specified pager
+ * SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page
+ * range within an object.
+ *
+ * This is a globally accessible routine.
+ *
+ * This routine removes swapblk assignments from swap metadata.
+ *
+ * The external callers of this routine typically have already destroyed
+ * or renamed vm_page_t's associated with this range in the object so
+ * we should be ok.
*/
+
void
swap_pager_freespace(object, start, size)
vm_object_t object;
vm_pindex_t start;
vm_size_t size;
{
- vm_pindex_t i;
- int s;
-
- s = splvm();
- for (i = start; i < start + size; i += 1) {
- int valid;
- daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
-
- if (addr && *addr != SWB_EMPTY) {
- swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
- if (valid) {
- swap_pager_setvalid(object, i, 0);
- }
- *addr = SWB_EMPTY;
- }
- }
- splx(s);
+ swp_pager_meta_free(object, start, size);
}
/*
- * same as freespace, but don't free, just force a DMZ next time
- */
-void
-swap_pager_dmzspace(object, start, size)
- vm_object_t object;
- vm_pindex_t start;
- vm_size_t size;
-{
- vm_pindex_t i;
- int s;
-
- s = splvm();
- for (i = start; i < start + size; i += 1) {
- int valid;
- daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
-
- if (addr && *addr != SWB_EMPTY) {
- if (valid) {
- swap_pager_setvalid(object, i, 0);
- }
- }
- }
- splx(s);
-}
-
-static void
-swap_pager_free_swap(object)
- vm_object_t object;
-{
- register int i, j;
- register sw_blk_t swb;
- int first_block=0, block_count=0;
- int s;
- /*
- * Free left over swap blocks
- */
- swb = object->un_pager.swp.swp_blocks;
- if (swb == NULL) {
- return;
- }
-
- s = splvm();
- for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
- for (j = 0; j < SWB_NPAGES; j++) {
- if (swb->swb_block[j] != SWB_EMPTY) {
- /*
- * initially the length of the run is zero
- */
- if (block_count == 0) {
- first_block = swb->swb_block[j];
- block_count = btodb(PAGE_SIZE);
- swb->swb_block[j] = SWB_EMPTY;
- /*
- * if the new block can be included into the current run
- */
- } else if (swb->swb_block[j] == first_block + block_count) {
- block_count += btodb(PAGE_SIZE);
- swb->swb_block[j] = SWB_EMPTY;
- /*
- * terminate the previous run, and start a new one
- */
- } else {
- swap_pager_freeswapspace(object, first_block,
- (unsigned) first_block + block_count - 1);
- first_block = swb->swb_block[j];
- block_count = btodb(PAGE_SIZE);
- swb->swb_block[j] = SWB_EMPTY;
- }
- }
- }
- }
-
- if (block_count) {
- swap_pager_freeswapspace(object, first_block,
- (unsigned) first_block + block_count - 1);
- }
- splx(s);
-}
-
-
-/*
- * swap_pager_reclaim frees up over-allocated space from all pagers
- * this eliminates internal fragmentation due to allocation of space
- * for segments that are never swapped to. It has been written so that
- * it does not block until the rlist_free operation occurs; it keeps
- * the queues consistant.
- */
-
-/*
- * Maximum number of blocks (pages) to reclaim per pass
- */
-#define MAXRECLAIM 128
-
-static void
-swap_pager_reclaim()
-{
- vm_object_t object;
- int i, j, k;
- int s;
- int reclaimcount;
- static struct {
- int address;
- vm_object_t object;
- } reclaims[MAXRECLAIM];
- static int in_reclaim;
-
- /*
- * allow only one process to be in the swap_pager_reclaim subroutine
- */
- s = splvm();
- if (in_reclaim) {
- tsleep(&in_reclaim, PSWP, "swrclm", 0);
- splx(s);
- return;
- }
- in_reclaim = 1;
- reclaimcount = 0;
-
- /* for each pager queue */
- for (k = 0; swp_qs[k]; k++) {
-
- object = TAILQ_FIRST(swp_qs[k]);
- while (object && (reclaimcount < MAXRECLAIM)) {
-
- /*
- * see if any blocks associated with a pager has been
- * allocated but not used (written)
- */
- if ((object->flags & OBJ_DEAD) == 0 &&
- (object->paging_in_progress == 0)) {
- for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
- sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
-
- if (swb->swb_locked)
- continue;
- for (j = 0; j < SWB_NPAGES; j++) {
- if (swb->swb_block[j] != SWB_EMPTY &&
- (swb->swb_valid & (1 << j)) == 0) {
- reclaims[reclaimcount].address = swb->swb_block[j];
- reclaims[reclaimcount++].object = object;
- swb->swb_block[j] = SWB_EMPTY;
- if (reclaimcount >= MAXRECLAIM)
- goto rfinished;
- }
- }
- }
- }
- object = TAILQ_NEXT(object, pager_object_list);
- }
- }
-
-rfinished:
-
- /*
- * free the blocks that have been added to the reclaim list
- */
- for (i = 0; i < reclaimcount; i++) {
- swap_pager_freeswapspace(reclaims[i].object,
- reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
- }
- splx(s);
- in_reclaim = 0;
- wakeup(&in_reclaim);
-}
-
-
-/*
- * swap_pager_copy copies blocks from one pager to another and
- * destroys the source pager
+ * SWAP_PAGER_COPY() - copy blocks from source pager to destination pager
+ * and destroy the source.
+ *
+ * Copy any valid swapblks from the source to the destination. In
+ * cases where both the source and destination have a valid swapblk,
+ * we keep the destination's.
+ *
+ * This routine is allowed to block. It may block allocating metadata
+ * indirectly through swp_pager_meta_build() or if paging is still in
+ * progress on the source.
+ *
+ * XXX vm_page_collapse() kinda expects us not to block because we
+ * supposedly do not need to allocate memory, but for the moment we
+ * *may* have to get a little memory from the zone allocator, but
+ * it is taken from the interrupt memory. We should be ok.
+ *
+ * The source object contains no vm_page_t's (which is just as well)
+ *
+ * The source object is of type OBJT_SWAP.
+ *
+ * The source and destination objects must be
+ * locked or inaccessible (XXX are they ???)
*/
void
-swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset,
- offset, destroysource)
+swap_pager_copy(srcobject, dstobject, offset, destroysource)
vm_object_t srcobject;
- vm_pindex_t srcoffset;
vm_object_t dstobject;
- vm_pindex_t dstoffset;
vm_pindex_t offset;
int destroysource;
{
vm_pindex_t i;
- int origsize;
- int s;
-
- if (vm_swap_size)
- no_swap_space = 0;
-
- origsize = srcobject->un_pager.swp.swp_allocsize;
/*
- * remove the source object from the swap_pager internal queue
+ * If destroysource is set, we remove the source object from the
+ * swap_pager internal queue now.
*/
+
if (destroysource) {
if (srcobject->handle == NULL) {
- TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
+ TAILQ_REMOVE(
+ &swap_pager_un_object_list,
+ srcobject,
+ pager_object_list
+ );
} else {
- TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
+ TAILQ_REMOVE(
+ NOBJLIST(srcobject->handle),
+ srcobject,
+ pager_object_list
+ );
}
}
- s = splvm();
- while (srcobject->un_pager.swp.swp_poip) {
- tsleep(srcobject, PVM, "spgout", 0);
- }
-
/*
- * clean all of the pages that are currently active and finished
+ * transfer source to destination.
*/
- if (swap_pager_free_pending)
- swap_pager_sync();
- /*
- * transfer source to destination
- */
- for (i = 0; i < dstobject->size; i += 1) {
- int srcvalid, dstvalid;
- daddr_t *srcaddrp = swap_pager_diskaddr(srcobject,
- i + offset + srcoffset, &srcvalid);
- daddr_t *dstaddrp;
+ for (i = 0; i < dstobject->size; ++i) {
+ daddr_t dstaddr;
/*
- * see if the source has space allocated
+ * Locate (without changing) the swapblk on the destination,
+ * unless it is invalid in which case free it silently, or
+ * if the destination is a resident page, in which case the
+ * source is thrown away.
*/
- if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+
+ dstaddr = swp_pager_meta_ctl(dstobject, i, 0);
+
+ if (dstaddr == SWAPBLK_NONE) {
/*
- * if the source is valid and the dest has no space,
- * then copy the allocation from the srouce to the
- * dest.
+ * Destination has no swapblk and is not resident,
+ * copy source.
*/
- if (srcvalid) {
- dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
- &dstvalid);
- /*
- * if the dest already has a valid block,
- * deallocate the source block without
- * copying.
- */
- if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
- swap_pager_freeswapspace(dstobject, *dstaddrp,
- *dstaddrp + btodb(PAGE_SIZE) - 1);
- *dstaddrp = SWB_EMPTY;
- }
- if (dstaddrp && *dstaddrp == SWB_EMPTY) {
- *dstaddrp = *srcaddrp;
- *srcaddrp = SWB_EMPTY;
- dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
- srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
- swap_pager_setvalid(dstobject, i + dstoffset, 1);
- }
- }
+ daddr_t srcaddr;
+
+ srcaddr = swp_pager_meta_ctl(
+ srcobject,
+ i + offset,
+ SWM_POP
+ );
+
+ if (srcaddr != SWAPBLK_NONE)
+ swp_pager_meta_build(dstobject, i, srcaddr, 1);
+ } else {
/*
- * if the source is not empty at this point, then
- * deallocate the space.
+ * Destination has valid swapblk or it is represented
+ * by a resident page. We destroy the sourceblock.
*/
- if (*srcaddrp != SWB_EMPTY) {
- swap_pager_freeswapspace(srcobject, *srcaddrp,
- *srcaddrp + btodb(PAGE_SIZE) - 1);
- *srcaddrp = SWB_EMPTY;
- }
+
+ swp_pager_meta_ctl(srcobject, i + offset, SWM_FREE);
}
}
- splx(s);
/*
- * Free left over swap blocks
+ * Free left over swap blocks in source.
+ *
+ * We have to revert the type to OBJT_DEFAULT so we do not accidently
+ * double-remove the object from the swap queues.
*/
- if (destroysource) {
- swap_pager_free_swap(srcobject);
- if (srcobject->un_pager.swp.swp_allocsize) {
- printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
- srcobject->un_pager.swp.swp_allocsize, origsize);
- }
-
- free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
- srcobject->un_pager.swp.swp_blocks = NULL;
+ if (destroysource) {
+ swp_pager_meta_free_all(srcobject);
+ /*
+ * Reverting the type is not necessary, the caller is going
+ * to destroy srcobject directly, but I'm doing it here
+ * for consistancy since we've removed the object from its
+ * queues.
+ */
+ srcobject->type = OBJT_DEFAULT;
}
return;
}
-static void
-swap_pager_dealloc(object)
+/*
+ * SWAP_PAGER_HASPAGE() - determine if we have good backing store for
+ * the requested page.
+ *
+ * We determine whether good backing store exists for the requested
+ * page and return TRUE if it does, FALSE if it doesn't.
+ *
+ * If TRUE, we also try to determine how much valid, contiguous backing
+ * store exists before and after the requested page within a reasonable
+ * distance. We do not try to restrict it to the swap device stripe
+ * (that is handled in getpages/putpages). It probably isn't worth
+ * doing here.
+ */
+
+boolean_t
+swap_pager_haspage(object, pindex, before, after)
vm_object_t object;
+ vm_pindex_t pindex;
+ int *before;
+ int *after;
{
- int s;
- sw_blk_t swb;
+ daddr_t blk0;
/*
- * Remove from list right away so lookups will fail if we block for
- * pageout completion.
+ * do we have good backing store at the requested index ?
*/
- if (object->handle == NULL) {
- TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
- } else {
- TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
- }
- /*
- * Wait for all pageouts to finish and remove all entries from
- * cleaning list.
- */
+ blk0 = swp_pager_meta_ctl(object, pindex, 0);
- s = splvm();
- while (object->un_pager.swp.swp_poip) {
- tsleep(object, PVM, "swpout", 0);
+ if (blk0 & SWAPBLK_NONE) {
+ if (before)
+ *before = 0;
+ if (after)
+ *after = 0;
+ return (FALSE);
}
- splx(s);
-
- if (swap_pager_free_pending)
- swap_pager_sync();
/*
- * Free left over swap blocks
+ * find backwards-looking contiguous good backing store
*/
- swap_pager_free_swap(object);
- if (object->un_pager.swp.swp_allocsize) {
- printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
- object->un_pager.swp.swp_allocsize);
- }
- swb = object->un_pager.swp.swp_blocks;
- if (swb) {
- /*
- * Free swap management resources
- */
- free(swb, M_VMPGDATA);
- object->un_pager.swp.swp_blocks = NULL;
- }
-}
+ if (before != NULL) {
+ int i;
-static __inline int
-swap_pager_block_index(pindex)
- vm_pindex_t pindex;
-{
- return (pindex / SWB_NPAGES);
-}
-
-static __inline int
-swap_pager_block_offset(pindex)
- vm_pindex_t pindex;
-{
- return (pindex % SWB_NPAGES);
-}
+ for (i = 1; i < (SWB_NPAGES/2); ++i) {
+ daddr_t blk;
-/*
- * swap_pager_haspage returns TRUE if the pager has data that has
- * been written out.
- */
-static boolean_t
-swap_pager_haspage(object, pindex, before, after)
- vm_object_t object;
- vm_pindex_t pindex;
- int *before;
- int *after;
-{
- register sw_blk_t swb;
- int ix;
-
- if (before != NULL)
- *before = 0;
- if (after != NULL)
- *after = 0;
- ix = pindex / SWB_NPAGES;
- if (ix >= object->un_pager.swp.swp_nblocks) {
- return (FALSE);
+ if (i > pindex)
+ break;
+ blk = swp_pager_meta_ctl(object, pindex - i, 0);
+ if (blk & SWAPBLK_NONE)
+ break;
+ if (blk != blk0 - i)
+ break;
+ }
+ *before = (i - 1);
}
- swb = &object->un_pager.swp.swp_blocks[ix];
- ix = pindex % SWB_NPAGES;
-
- if (swb->swb_block[ix] != SWB_EMPTY) {
-
- if (swb->swb_valid & (1 << ix)) {
- int tix;
- if (before) {
- for(tix = ix - 1; tix >= 0; --tix) {
- if ((swb->swb_valid & (1 << tix)) == 0)
- break;
- if ((swb->swb_block[tix] +
- (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
- swb->swb_block[ix])
- break;
- (*before)++;
- }
- }
- if (after) {
- for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
- if ((swb->swb_valid & (1 << tix)) == 0)
- break;
- if ((swb->swb_block[tix] -
- (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
- swb->swb_block[ix])
- break;
- (*after)++;
- }
- }
+ /*
+ * find forward-looking contiguous good backing store
+ */
- return TRUE;
+ if (after != NULL) {
+ int i;
+
+ for (i = 1; i < (SWB_NPAGES/2); ++i) {
+ daddr_t blk;
+
+ blk = swp_pager_meta_ctl(object, pindex + i, 0);
+ if (blk & SWAPBLK_NONE)
+ break;
+ if (blk != blk0 + i)
+ break;
}
+ *after = (i - 1);
}
- return (FALSE);
-}
-/*
- * Wakeup based upon spc state
- */
-static void
-spc_wakeup(void)
-{
- if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
- swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
- wakeup(&swap_pager_needflags);
- } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
- swap_pager_free_count >= ((2 * npendingio) / 3)) {
- swap_pager_needflags &= ~SWAP_FREE_NEEDED;
- wakeup(&swap_pager_free);
- }
+ return (TRUE);
}
/*
- * Free an spc structure
+ * SWAP_PAGER_PAGE_UNSWAPPED() - remove swap backing store related to page
+ *
+ * This removes any associated swap backing store, whether valid or
+ * not, from the page.
+ *
+ * This routine is typically called when a page is made dirty, at
+ * which point any associated swap can be freed. MADV_FREE also
+ * calls us in a special-case situation
+ *
+ * NOTE!!! If the page is clean and the swap was valid, the caller
+ * should make the page dirty before calling this routine. This routine
+ * does NOT change the m->dirty status of the page. Also: MADV_FREE
+ * depends on it.
+ *
+ * This routine may not block
*/
-static void
-spc_free(spc)
- swp_clean_t spc;
-{
- spc->spc_flags = 0;
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
- swap_pager_free_count++;
- if (swap_pager_needflags) {
- spc_wakeup();
- }
-}
-/*
- * swap_pager_ridpages is a convienience routine that deallocates all
- * but the required page. this is usually used in error returns that
- * need to invalidate the "extra" readahead pages.
- */
static void
-swap_pager_ridpages(m, count, reqpage)
- vm_page_t *m;
- int count;
- int reqpage;
+swap_pager_unswapped(m)
+ vm_page_t m;
{
- int i;
-
- for (i = 0; i < count; i++) {
- if (i != reqpage) {
- vm_page_free(m[i]);
- }
- }
+ swp_pager_meta_ctl(m->object, m->pindex, SWM_FREE);
}
/*
- * swap_pager_iodone1 is the completion routine for both reads and async writes
+ * SWAP_PAGER_GETPAGES() - bring pages in from swap
+ *
+ * Attempt to retrieve (m, count) pages from backing store, but make
+ * sure we retrieve at least m[reqpage]. We try to load in as large
+ * a chunk surrounding m[reqpage] as is contiguous in swap and which
+ * belongs to the same object.
+ *
+ * The code is designed for asynchronous operation and
+ * immediate-notification of 'reqpage' but tends not to be
+ * used that way. Please do not optimize-out this algorithmic
+ * feature, I intend to improve on it in the future.
+ *
+ * The parent has a single vm_object_pip_add() reference prior to
+ * calling us and we should return with the same.
+ *
+ * The parent has BUSY'd the pages. We should return with 'm'
+ * left busy, but the others adjusted.
*/
-static void
-swap_pager_iodone1(bp)
- struct buf *bp;
-{
- bp->b_flags |= B_DONE;
- bp->b_flags &= ~B_ASYNC;
- wakeup(bp);
-}
static int
swap_pager_getpages(object, m, count, reqpage)
@@ -926,208 +745,235 @@ swap_pager_getpages(object, m, count, reqpage)
vm_page_t *m;
int count, reqpage;
{
- register struct buf *bp;
- sw_blk_t swb[count];
- register int s;
+ struct buf *bp;
+ vm_page_t mreq;
+ int s;
int i;
- boolean_t rv;
- vm_offset_t kva, off[count];
- vm_pindex_t paging_offset;
- int reqaddr[count];
- int sequential;
-
- int first, last;
- int failed;
- int reqdskregion;
-
- object = m[reqpage]->object;
- paging_offset = OFF_TO_IDX(object->paging_offset);
- sequential = (m[reqpage]->pindex == (object->last_read + 1));
-
- for (i = 0; i < count; i++) {
- vm_pindex_t fidx = m[i]->pindex + paging_offset;
- int ix = swap_pager_block_index(fidx);
-
- if (ix >= object->un_pager.swp.swp_nblocks) {
- int j;
-
- if (i <= reqpage) {
- swap_pager_ridpages(m, count, reqpage);
- return (VM_PAGER_FAIL);
- }
- for (j = i; j < count; j++) {
- vm_page_free(m[j]);
- }
- count = i;
+ int j;
+ daddr_t blk;
+ vm_offset_t kva;
+ vm_pindex_t lastpindex;
+
+ mreq = m[reqpage];
+
+#if !defined(MAX_PERF)
+ if (mreq->object != object) {
+ panic("swap_pager_getpages: object mismatch %p/%p",
+ object,
+ mreq->object
+ );
+ }
+#endif
+ /*
+ * Calculate range to retrieve. The pages have already been assigned
+ * their swapblks. We require a *contiguous* range that falls entirely
+ * within a single device stripe. If we do not supply it, bad things
+ * happen.
+ */
+
+
+ blk = swp_pager_meta_ctl(mreq->object, mreq->pindex, 0);
+
+ for (i = reqpage - 1; i >= 0; --i) {
+ daddr_t iblk;
+
+ iblk = swp_pager_meta_ctl(m[i]->object, m[i]->pindex, 0);
+ if (iblk & SWAPBLK_NONE)
+ break;
+
+ if ((blk ^ iblk) & dmmax_mask)
+ break;
+
+ if (blk != iblk + (reqpage - i))
break;
- }
- swb[i] = &object->un_pager.swp.swp_blocks[ix];
- off[i] = swap_pager_block_offset(fidx);
- reqaddr[i] = swb[i]->swb_block[off[i]];
}
+ ++i;
- /* make sure that our required input request is existant */
+ for (j = reqpage + 1; j < count; ++j) {
+ daddr_t jblk;
- if (reqaddr[reqpage] == SWB_EMPTY ||
- (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
- swap_pager_ridpages(m, count, reqpage);
- return (VM_PAGER_FAIL);
+ jblk = swp_pager_meta_ctl(m[j]->object, m[j]->pindex, 0);
+ if (jblk & SWAPBLK_NONE)
+ break;
+
+ if ((blk ^ jblk) & dmmax_mask)
+ break;
+
+ if (blk != jblk - (j - reqpage))
+ break;
}
- reqdskregion = reqaddr[reqpage] / dmmax;
/*
- * search backwards for the first contiguous page to transfer
+ * If blk itself is bad, well, we can't do any I/O. This should
+ * already be covered as a side effect, but I'm making sure.
*/
- failed = 0;
- first = 0;
- for (i = reqpage - 1; i >= 0; --i) {
- if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
- (swb[i]->swb_valid & (1 << off[i])) == 0 ||
- (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
- ((reqaddr[i] / dmmax) != reqdskregion)) {
- failed = 1;
- vm_page_free(m[i]);
- if (first == 0)
- first = i + 1;
- }
+
+ if (blk & SWAPBLK_NONE) {
+ i = reqpage;
+ j = reqpage + 1;
}
+
/*
- * search forwards for the last contiguous page to transfer
+ * free pages outside our collection range. Note: we never free
+ * mreq, it must remain busy throughout.
*/
- failed = 0;
- last = count;
- for (i = reqpage + 1; i < count; i++) {
- if (failed || (reqaddr[i] == SWB_EMPTY) ||
- (swb[i]->swb_valid & (1 << off[i])) == 0 ||
- (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
- ((reqaddr[i] / dmmax) != reqdskregion)) {
- failed = 1;
- vm_page_free(m[i]);
- if (last == count)
- last = i;
- }
- }
- count = last;
- if (first != 0) {
- for (i = first; i < count; i++) {
- m[i - first] = m[i];
- reqaddr[i - first] = reqaddr[i];
- off[i - first] = off[i];
+ {
+ int k;
+
+ for (k = 0; k < i; ++k) {
+ vm_page_free(m[k]);
+ }
+ for (k = j; k < count; ++k) {
+ vm_page_free(m[k]);
}
- count -= first;
- reqpage -= first;
}
- ++swb[reqpage]->swb_locked;
/*
- * at this point: "m" is a pointer to the array of vm_page_t for
- * paging I/O "count" is the number of vm_page_t entries represented
- * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
- * into "m" for the page actually faulted
+ * Return VM_PAGER_FAIL if we have nothing
+ * to do. Return mreq still busy, but the
+ * others unbusied.
*/
+ if (blk & SWAPBLK_NONE)
+ return(VM_PAGER_FAIL);
+
+
/*
* Get a swap buffer header to perform the IO
*/
- bp = getpbuf();
+
+ bp = getpbuf(&nsw_rcount);
kva = (vm_offset_t) bp->b_data;
/*
* map our page(s) into kva for input
+ *
+ * NOTE: B_PAGING is set by pbgetvp()
*/
- pmap_qenter(kva, m, count);
- bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
- bp->b_iodone = swap_pager_iodone1;
+ pmap_qenter(kva, m + i, j - i);
+
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = swp_pager_async_iodone;
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
crhold(bp->b_rcred);
crhold(bp->b_wcred);
bp->b_data = (caddr_t) kva;
- bp->b_blkno = reqaddr[0];
- bp->b_bcount = PAGE_SIZE * count;
- bp->b_bufsize = PAGE_SIZE * count;
+ /*
+ * b_blkno is in page-sized chunks. swapblk is valid, too, so
+ * we don't have to mask it against SWAPBLK_MASK.
+ */
+ bp->b_blkno = blk - (reqpage - i);
+ bp->b_bcount = PAGE_SIZE * (j - i);
+ bp->b_bufsize = PAGE_SIZE * (j - i);
+ bp->b_pager.pg_reqpage = reqpage - i;
+
+ {
+ int k;
+
+ for (k = i; k < j; ++k) {
+ bp->b_pages[k - i] = m[k];
+ vm_page_flag_set(m[k], PG_SWAPINPROG);
+ }
+ }
+ bp->b_npages = j - i;
pbgetvp(swapdev_vp, bp);
cnt.v_swapin++;
- cnt.v_swappgsin += count;
+ cnt.v_swappgsin += bp->b_npages;
+
+ /*
+ * We still hold the lock on mreq, and our automatic completion routine
+ * does not remove it.
+ */
+
+ vm_object_pip_add(mreq->object, bp->b_npages);
+ lastpindex = m[j-1]->pindex;
+
/*
- * perform the I/O
+ * perform the I/O. NOTE!!! bp cannot be considered valid after
+ * this point because we automatically release it on completion.
+ * Instead, we look at the one page we are interested in which we
+ * still hold a lock on even through the I/O completion.
+ *
+ * The other pages in our m[] array are also released on completion,
+ * so we cannot assume they are valid anymore either.
+ *
+ * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
*/
+
VOP_STRATEGY(bp->b_vp, bp);
/*
- * wait for the sync I/O to complete
+ * wait for the page we want to complete. PG_SWAPINPROG is always
+ * cleared on completion. If an I/O error occurs, SWAPBLK_NONE
+ * is set in the meta-data.
*/
+
s = splvm();
- while ((bp->b_flags & B_DONE) == 0) {
- if (tsleep(bp, PVM, "swread", hz*20)) {
+
+ while ((mreq->flags & PG_SWAPINPROG) != 0) {
+ vm_page_flag_set(mreq, PG_WANTED | PG_REFERENCED);
+ cnt.v_intrans++;
+ if (tsleep(mreq, PSWP, "swread", hz*20)) {
printf(
-"swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n",
+ "swap_pager: indefinite wait buffer: device:"
+ " %#lx, blkno: %ld, size: %ld\n",
(u_long)bp->b_dev, (long)bp->b_blkno,
- (long)bp->b_bcount);
+ (long)bp->b_bcount
+ );
}
}
- if (bp->b_flags & B_ERROR) {
- printf(
-"swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n",
- (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
- rv = VM_PAGER_ERROR;
- } else {
- rv = VM_PAGER_OK;
- }
-
splx(s);
- swb[reqpage]->swb_locked--;
-
- /*
- * remove the mapping for kernel virtual
- */
- pmap_qremove(kva, count);
/*
- * release the physical I/O buffer
- */
- relpbuf(bp);
- /*
- * finish up input if everything is ok
+ * mreq is left bussied after completion, but all the other pages
+ * are freed. If we had an unrecoverable read error the page will
+ * not be valid.
*/
- if (rv == VM_PAGER_OK) {
- for (i = 0; i < count; i++) {
- m[i]->dirty = 0;
- vm_page_flag_clear(m[i], PG_ZERO);
- if (i != reqpage) {
- /*
- * whether or not to leave the page
- * activated is up in the air, but we
- * should put the page on a page queue
- * somewhere. (it already is in the
- * object). After some emperical
- * results, it is best to deactivate
- * the readahead pages.
- */
- vm_page_deactivate(m[i]);
- /*
- * just in case someone was asking for
- * this page we now tell them that it
- * is ok to use
- */
- m[i]->valid = VM_PAGE_BITS_ALL;
- vm_page_wakeup(m[i]);
- }
- }
-
- m[reqpage]->object->last_read = m[count-1]->pindex;
+ if (mreq->valid != VM_PAGE_BITS_ALL) {
+ return(VM_PAGER_ERROR);
} else {
- swap_pager_ridpages(m, count, reqpage);
+ mreq->object->last_read = lastpindex;
+ return(VM_PAGER_OK);
}
- return (rv);
+
+ /*
+ * A final note: in a low swap situation, we cannot deallocate swap
+ * and mark a page dirty here because the caller is likely to mark
+ * the page clean when we return, causing the page to possibly revert
+ * to all-zero's later.
+ */
}
+/*
+ * swap_pager_putpages:
+ *
+ * Assign swap (if necessary) and initiate I/O on the specified pages.
+ *
+ * We support both OBJT_DEFAULT and OBJT_SWAP objects. DEFAULT objects
+ * are automatically converted to SWAP objects.
+ *
+ * In a low memory situation we may block in VOP_STRATEGY(), but the new
+ * vm_page reservation system coupled with properly written VFS devices
+ * should ensure that no low-memory deadlock occurs. This is an area
+ * which needs work.
+ *
+ * The parent has N vm_object_pip_add() references prior to
+ * calling us and will remove references for rtvals[] that are
+ * not set to VM_PAGER_PEND. We need to remove the rest on I/O
+ * completion.
+ *
+ * The parent has soft-busy'd the pages it passes us and will unbusy
+ * those whos rtvals[] entry is not set to VM_PAGER_PEND on return.
+ * We need to unbusy the rest on I/O completion.
+ */
+
int
swap_pager_putpages(object, m, count, sync, rtvals)
vm_object_t object;
@@ -1136,534 +982,749 @@ swap_pager_putpages(object, m, count, sync, rtvals)
boolean_t sync;
int *rtvals;
{
- register struct buf *bp;
- sw_blk_t swb[count];
- register int s;
- int i, j, ix, firstidx, lastidx;
- boolean_t rv;
- vm_offset_t kva, off, fidx;
- swp_clean_t spc;
- vm_pindex_t paging_pindex;
- int reqaddr[count];
- int failed;
-
- if (vm_swap_size)
- no_swap_space = 0;
-
- if (no_swap_space) {
- for (i = 0; i < count; i++)
- rtvals[i] = VM_PAGER_FAIL;
- return VM_PAGER_FAIL;
+ int i;
+ int n = 0;
+ int grv = VM_PAGER_OK;
+
+#if !defined(MAX_PERF)
+ if (count && m[0]->object != object) {
+ panic("swap_pager_getpages: object mismatch %p/%p",
+ object,
+ m[0]->object
+ );
+ }
+#endif
+ /*
+ * Step 1
+ *
+ * Turn object into OBJT_SWAP
+ * check for bogus sysops
+ * force sync if not pageout process
+ */
+
+ if (object->type != OBJT_SWAP) {
+ swp_pager_meta_build(object, 0, SWAPBLK_NONE, 0);
}
if (curproc != pageproc)
sync = TRUE;
- object = m[0]->object;
- paging_pindex = OFF_TO_IDX(object->paging_offset);
-
- failed = 0;
- for (j = 0; j < count; j++) {
- fidx = m[j]->pindex + paging_pindex;
- ix = swap_pager_block_index(fidx);
- swb[j] = 0;
- if (ix >= object->un_pager.swp.swp_nblocks) {
- rtvals[j] = VM_PAGER_FAIL;
- failed = 1;
- continue;
- } else {
- rtvals[j] = VM_PAGER_OK;
- }
- swb[j] = &object->un_pager.swp.swp_blocks[ix];
- swb[j]->swb_locked++;
- if (failed) {
- rtvals[j] = VM_PAGER_FAIL;
- continue;
- }
- off = swap_pager_block_offset(fidx);
- reqaddr[j] = swb[j]->swb_block[off];
- if (reqaddr[j] == SWB_EMPTY) {
- daddr_t blk;
- int tries;
- int ntoget;
+ /*
+ * Step 2
+ *
+ * Assign swap blocks and issue I/O. We reallocate swap on the fly.
+ * The page is left dirty until the pageout operation completes
+ * successfully.
+ */
- tries = 0;
- s = splvm();
+ for (i = 0; i < count; i += n) {
+ int s;
+ int j;
+ struct buf *bp;
+ daddr_t blk;
- /*
- * if any other pages have been allocated in this
- * block, we only try to get one page.
- */
- for (i = 0; i < SWB_NPAGES; i++) {
- if (swb[j]->swb_block[i] != SWB_EMPTY)
- break;
- }
+ /*
+ * Maximum I/O size is limited by a number of factors.
+ */
- ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
- /*
- * this code is alittle conservative, but works (the
- * intent of this code is to allocate small chunks for
- * small objects)
- */
- if ((off == 0) && ((fidx + ntoget) > object->size)) {
- ntoget = object->size - fidx;
- }
- retrygetspace:
- if (!swap_pager_full && ntoget > 1 &&
- swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
- &blk)) {
-
- for (i = 0; i < ntoget; i++) {
- swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
- swb[j]->swb_valid = 0;
- }
+ n = min(BLIST_MAX_ALLOC, count - i);
+ n = min(n, max_pageout_cluster);
- reqaddr[j] = swb[j]->swb_block[off];
- } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
- &swb[j]->swb_block[off])) {
- /*
- * if the allocation has failed, we try to
- * reclaim space and retry.
- */
- if (++tries == 1) {
- swap_pager_reclaim();
- goto retrygetspace;
- }
- rtvals[j] = VM_PAGER_AGAIN;
- failed = 1;
- swap_pager_full = 1;
- } else {
- reqaddr[j] = swb[j]->swb_block[off];
- swb[j]->swb_valid &= ~(1 << off);
+ /*
+ * Get biggest block of swap we can. If we fail, fall
+ * back and try to allocate a smaller block. Don't go
+ * overboard trying to allocate space if it would overly
+ * fragment swap.
+ */
+ while (
+ (blk = swp_pager_getswapspace(n)) == SWAPBLK_NONE &&
+ n > 4
+ ) {
+ n >>= 1;
+ }
+ if (blk == SWAPBLK_NONE) {
+ for (j = 0; j < n; ++j) {
+ rtvals[i+j] = VM_PAGER_FAIL;
}
- splx(s);
+ grv = VM_PAGER_FAIL;
+ continue;
}
- }
- /*
- * search forwards for the last contiguous page to transfer
- */
- failed = 0;
- for (i = 0; i < count; i++) {
- if (failed ||
- (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
- ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
- (rtvals[i] != VM_PAGER_OK)) {
- failed = 1;
- if (rtvals[i] == VM_PAGER_OK)
- rtvals[i] = VM_PAGER_AGAIN;
+ /*
+ * Oops, too big if it crosses a stripe
+ *
+ * 1111000000
+ * 111111
+ * 1000001
+ */
+ if ((blk ^ (blk + n)) & dmmax_mask) {
+ j = ((blk + dmmax) & dmmax_mask) - blk;
+ swp_pager_freeswapspace(blk + j, n - j);
+ n = j;
}
- }
- ix = 0;
- firstidx = -1;
- for (i = 0; i < count; i++) {
- if (rtvals[i] == VM_PAGER_OK) {
- ix++;
- if (firstidx == -1) {
- firstidx = i;
- }
- } else if (firstidx >= 0) {
- break;
- }
- }
+ /*
+ * All I/O parameters have been satisfied, build the I/O
+ * request and assign the swap space.
+ *
+ * NOTE: B_PAGING is set by pbgetvp()
+ */
- if (firstidx == -1) {
- for (i = 0; i < count; i++) {
- if (rtvals[i] == VM_PAGER_OK)
- rtvals[i] = VM_PAGER_AGAIN;
- }
- return VM_PAGER_AGAIN;
- }
+ bp = getpbuf(&nsw_wcount);
+ bp->b_spc = NULL; /* not used, but NULL-out anyway */
- lastidx = firstidx + ix;
+ pmap_qenter((vm_offset_t)bp->b_data, &m[i], n);
- if (ix > max_pageout_cluster) {
- for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
- if (rtvals[i] == VM_PAGER_OK)
- rtvals[i] = VM_PAGER_AGAIN;
- }
- ix = max_pageout_cluster;
- lastidx = firstidx + ix;
- }
+ bp->b_flags = B_BUSY | B_ASYNC;
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS this is ok) */
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
- for (i = 0; i < firstidx; i++) {
- if (swb[i])
- swb[i]->swb_locked--;
- }
+ if (bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if (bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ pbgetvp(swapdev_vp, bp);
- for (i = lastidx; i < count; i++) {
- if (swb[i])
- swb[i]->swb_locked--;
- }
+ bp->b_bcount = PAGE_SIZE * n;
+ bp->b_bufsize = PAGE_SIZE * n;
+ bp->b_blkno = blk;
-#ifdef INVARIANTS
- for (i = firstidx; i < lastidx; i++) {
- if (reqaddr[i] == SWB_EMPTY) {
- printf("I/O to empty block???? -- pindex: %d, i: %d\n",
- m[i]->pindex, i);
- }
- }
-#endif
+ s = splvm();
- /*
- * Clean up all completed async pageouts.
- */
- if (swap_pager_free_pending)
- swap_pager_sync();
+ for (j = 0; j < n; ++j) {
+ vm_page_t mreq = m[i+j];
- /*
- * get a swap pager clean data structure, block until we get it
- */
- if (curproc == pageproc) {
- if (swap_pager_free_count == 0) {
- s = splvm();
- while (swap_pager_free_count == 0) {
- swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
- /*
- * if it does not get one within a short time, then
- * there is a potential deadlock, so we go-on trying
- * to free pages. It is important to block here as opposed
- * to returning, thereby allowing the pageout daemon to continue.
- * It is likely that pageout daemon will start suboptimally
- * reclaiming vnode backed pages if we don't block. Since the
- * I/O subsystem is probably already fully utilized, might as
- * well wait.
- */
- if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
- if (swap_pager_free_pending)
- swap_pager_sync();
- if (swap_pager_free_count == 0) {
- for (i = firstidx; i < lastidx; i++) {
- rtvals[i] = VM_PAGER_AGAIN;
- }
- splx(s);
- return VM_PAGER_AGAIN;
- }
- } else {
- swap_pager_sync();
- }
- }
- splx(s);
+ swp_pager_meta_build(
+ mreq->object,
+ mreq->pindex,
+ blk + j,
+ 0
+ );
+ mreq->dirty = VM_PAGE_BITS_ALL;
+ rtvals[i+j] = VM_PAGER_OK;
+
+ vm_page_flag_set(mreq, PG_SWAPINPROG);
+ bp->b_pages[j] = mreq;
}
+ bp->b_flags |= B_CALL;
+ bp->b_npages = n;
- spc = TAILQ_FIRST(&swap_pager_free);
- KASSERT(spc != NULL,
- ("swap_pager_putpages: free queue is empty, %d expected\n",
- swap_pager_free_count));
- TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
- swap_pager_free_count--;
-
- kva = spc->spc_kva;
- bp = spc->spc_bp;
- bzero(bp, sizeof *bp);
- bp->b_spc = spc;
- bp->b_xflags = 0;
- bp->b_data = (caddr_t) kva;
- } else {
- spc = NULL;
- bp = getpbuf();
- kva = (vm_offset_t) bp->b_data;
- bp->b_spc = NULL;
- }
+ cnt.v_swapout++;
+ cnt.v_swappgsout += bp->b_npages;
+ swapdev_vp->v_numoutput++;
- /*
- * map our page(s) into kva for I/O
- */
- pmap_qenter(kva, &m[firstidx], ix);
+ /*
+ * asynchronous
+ *
+ * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
+ */
+
+ if (sync == FALSE) {
+ bp->b_iodone = swp_pager_async_iodone;
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bp->b_bcount;
+ VOP_STRATEGY(bp->b_vp, bp);
+
+ for (j = 0; j < n; ++j)
+ rtvals[i+j] = VM_PAGER_PEND;
+
+ splx(s);
+ grv = VM_PAGER_PEND;
+ continue;
+ }
- /*
- * get the base I/O offset into the swap file
- */
- for (i = firstidx; i < lastidx ; i++) {
- fidx = m[i]->pindex + paging_pindex;
- off = swap_pager_block_offset(fidx);
/*
- * set the valid bit
+ * synchronous
+ *
+ * NOTE: b_blkno is destroyed by the call to VOP_STRATEGY
*/
- swb[i]->swb_valid |= (1 << off);
+
+ bp->b_iodone = swp_pager_sync_iodone;
+ VOP_STRATEGY(bp->b_vp, bp);
+
/*
- * and unlock the data structure
+ * Wait for the sync I/O to complete, then update rtvals.
+ * We just set the rtvals[] to VM_PAGER_PEND so we can call
+ * our async completion routine at the end, thus avoiding a
+ * double-free.
*/
- swb[i]->swb_locked--;
- }
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep(bp, PVM, "swwrt", 0);
+ }
- bp->b_flags = B_BUSY | B_PAGING;
- bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
- bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
- if (bp->b_rcred != NOCRED)
- crhold(bp->b_rcred);
- if (bp->b_wcred != NOCRED)
- crhold(bp->b_wcred);
- bp->b_blkno = reqaddr[firstidx];
- pbgetvp(swapdev_vp, bp);
+ if (bp->b_flags & B_ERROR) {
+ grv = VM_PAGER_ERROR;
+ }
- bp->b_bcount = PAGE_SIZE * ix;
- bp->b_bufsize = PAGE_SIZE * ix;
+ for (j = 0; j < n; ++j)
+ rtvals[i+j] = VM_PAGER_PEND;
- s = splvm();
- swapdev_vp->v_numoutput++;
+ if (bp->b_flags & B_ERROR) {
+ grv = VM_PAGER_ERROR;
+ }
- /*
- * If this is an async write we set up additional buffer fields and
- * place a "cleaning" entry on the inuse queue.
- */
- object->un_pager.swp.swp_poip++;
-
- if (spc) {
- spc->spc_flags = 0;
- spc->spc_object = object;
- bp->b_npages = ix;
- for (i = firstidx; i < lastidx; i++) {
- spc->spc_m[i] = m[i];
- bp->b_pages[i - firstidx] = m[i];
- vm_page_protect(m[i], VM_PROT_READ);
- pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
- m[i]->dirty = 0;
- }
- spc->spc_first = firstidx;
- spc->spc_count = ix;
/*
- * the completion routine for async writes
+ * Now that we are through with the bp, we can call the
+ * normal async completion, which frees everything up.
*/
- bp->b_flags |= B_CALL;
- bp->b_iodone = swap_pager_iodone;
- bp->b_dirtyoff = 0;
- bp->b_dirtyend = bp->b_bcount;
- TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
- } else {
- bp->b_flags |= B_CALL;
- bp->b_iodone = swap_pager_iodone1;
- bp->b_npages = ix;
- for (i = firstidx; i < lastidx; i++)
- bp->b_pages[i - firstidx] = m[i];
- }
- cnt.v_swapout++;
- cnt.v_swappgsout += ix;
+ swp_pager_async_iodone(bp);
- /*
- * perform the I/O
- */
- VOP_STRATEGY(bp->b_vp, bp);
- if (sync == FALSE) {
- if (swap_pager_free_pending) {
- swap_pager_sync();
- }
- for (i = firstidx; i < lastidx; i++) {
- rtvals[i] = VM_PAGER_PEND;
- }
splx(s);
- return VM_PAGER_PEND;
}
+ return(grv);
+}
+
+/*
+ * swap_pager_sync_iodone:
+ *
+ * Completion routine for synchronous reads and writes from/to swap.
+ * We just mark the bp is complete and wake up anyone waiting on it.
+ *
+ * This routine may not block.
+ */
+
+static void
+swp_pager_sync_iodone(bp)
+ struct buf *bp;
+{
+ bp->b_flags |= B_DONE;
+ bp->b_flags &= ~B_ASYNC;
+ wakeup(bp);
+}
+
+/*
+ * swp_pager_async_iodone:
+ *
+ * Completion routine for asynchronous reads and writes from/to swap.
+ * Also called manually by synchronous code to finish up a bp.
+ *
+ * WARNING! This routine may be called from an interrupt. We cannot
+ * mess with swap metadata unless we want to run all our other routines
+ * at splbio() too, which I'd rather not do. We up ourselves
+ * to splvm() because we may call vm_page_free(), which can unlink a
+ * page from an object.
+ *
+ * XXX currently I do not believe any object routines protect
+ * object->memq at splvm(). The code must be gone over to determine
+ * the actual state of the problem.
+ *
+ * For READ operations, the pages are PG_BUSY'd. For WRITE operations,
+ * the pages are vm_page_t->busy'd. For READ operations, we PG_BUSY
+ * unbusy all pages except the 'main' request page. For WRITE
+ * operations, we vm_page_t->busy'd unbusy all pages ( we can do this
+ * because we marked them all VM_PAGER_PEND on return from putpages ).
+ *
+ * This routine may not block.
+ * This routine is called at splbio()
+ */
+
+static void
+swp_pager_async_iodone(bp)
+ register struct buf *bp;
+{
+ int s;
+ int i;
+ vm_object_t object = NULL;
+
+ s = splvm();
+
+ bp->b_flags |= B_DONE;
+
/*
- * wait for the sync I/O to complete
+ * report error
*/
- while ((bp->b_flags & B_DONE) == 0) {
- tsleep(bp, PVM, "swwrt", 0);
- }
if (bp->b_flags & B_ERROR) {
printf(
-"swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n",
- (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
- rv = VM_PAGER_ERROR;
- } else {
- rv = VM_PAGER_OK;
+ "swap_pager: I/O error - %s failed; blkno %ld,"
+ "size %ld, error %d\n",
+ ((bp->b_flags & B_READ) ? "pagein" : "pageout"),
+ (long)bp->b_blkno,
+ (long)bp->b_bcount,
+ bp->b_error
+ );
}
- object->un_pager.swp.swp_poip--;
- if (object->un_pager.swp.swp_poip == 0)
- wakeup(object);
-
- if (bp->b_vp)
- pbrelvp(bp);
+ /*
+ * set object.
+ */
- splx(s);
+ if (bp->b_npages)
+ object = bp->b_pages[0]->object;
/*
* remove the mapping for kernel virtual
*/
- pmap_qremove(kva, ix);
+
+ pmap_qremove((vm_offset_t)bp->b_data, bp->b_npages);
/*
- * if we have written the page, then indicate that the page is clean.
+ * cleanup pages. If an error occurs writing to swap, we are in
+ * very serious trouble. If it happens to be a disk error, though,
+ * we may be able to recover by reassigning the swap later on. So
+ * in this case we remove the m->swapblk assignment for the page
+ * but do not free it in the rlist. The errornous block(s) are thus
+ * never reallocated as swap. Redirty the page and continue.
*/
- if (rv == VM_PAGER_OK) {
- for (i = firstidx; i < lastidx; i++) {
- if (rtvals[i] == VM_PAGER_OK) {
- pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
- m[i]->dirty = 0;
+
+ for (i = 0; i < bp->b_npages; ++i) {
+ vm_page_t m = bp->b_pages[i];
+
+ vm_page_flag_clear(m, PG_SWAPINPROG);
+
+ if (bp->b_flags & B_ERROR) {
+ /*
+ * If an error occurs I'd love to throw the swapblk
+ * away without freeing it back to swapspace, so it
+ * can never be used again. But I can't from an
+ * interrupt.
+ */
+
+ if (bp->b_flags & B_READ) {
/*
- * optimization, if a page has been read
- * during the pageout process, we activate it.
+ * When reading, reqpage needs to stay
+ * locked for the parent, but all other
+ * pages can be freed. We still want to
+ * wakeup the parent waiting on the page,
+ * though. ( also: pg_reqpage can be -1 and
+ * not match anything ).
+ *
+ * We have to wake specifically requested pages
+ * up too because we cleared PG_SWAPINPROG and
+ * someone may be waiting for that.
+ *
+ * NOTE: for reads, m->dirty will probably
+ * be overriden by the original caller of
+ * getpages so don't play cute tricks here.
+ *
+ * XXX it may not be legal to free the page
+ * here as this messes with the object->memq's.
*/
- if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
- pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
- vm_page_activate(m[i]);
- }
+
+ m->valid = 0;
+ vm_page_flag_clear(m, PG_ZERO);
+
+ if (i != bp->b_pager.pg_reqpage)
+ vm_page_free(m);
+ else
+ vm_page_flash(m);
+ /*
+ * If i == bp->b_pager.pg_reqpage, do not wake
+ * the page up. The caller needs to.
+ */
+ } else {
+ /*
+ * If a write error occurs, reactivate page
+ * so it doesn't clog the inactive list,
+ * then finish the I/O.
+ */
+ m->dirty = VM_PAGE_BITS_ALL;
+ vm_page_activate(m);
+ vm_page_io_finish(m);
}
- }
- } else {
- for (i = firstidx; i < lastidx; i++) {
- rtvals[i] = rv;
+ } else if (bp->b_flags & B_READ) {
+ /*
+ * For read success, clear dirty bits. Nobody should
+ * have this page mapped but don't take any chances,
+ * make sure the pmap modify bits are also cleared.
+ *
+ * NOTE: for reads, m->dirty will probably be
+ * overriden by the original caller of getpages so
+ * we cannot set them in order to free the underlying
+ * swap in a low-swap situation. I don't think we'd
+ * want to do that anyway, but it was an optimization
+ * that existed in the old swapper for a time before
+ * it got ripped out due to precisely this problem.
+ *
+ * clear PG_ZERO in page.
+ *
+ * If not the requested page then deactivate it.
+ *
+ * Note that the requested page, reqpage, is left
+ * busied, but we still have to wake it up. The
+ * other pages are released (unbusied) by
+ * vm_page_wakeup(). We do not set reqpage's
+ * valid bits here, it is up to the caller.
+ */
+
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->valid = VM_PAGE_BITS_ALL;
+ m->dirty = 0;
+ vm_page_flag_clear(m, PG_ZERO);
+
+ /*
+ * We have to wake specifically requested pages
+ * up too because we cleared PG_SWAPINPROG and
+ * could be waiting for it in getpages. However,
+ * be sure to not unbusy getpages specifically
+ * requested page - getpages expects it to be
+ * left busy.
+ */
+ if (i != bp->b_pager.pg_reqpage) {
+ vm_page_deactivate(m);
+ vm_page_wakeup(m);
+ } else {
+ vm_page_flash(m);
+ }
+ } else {
+ /*
+ * For write success, clear the modify and dirty
+ * status, then finish the I/O ( which decrements the
+ * busy count and possibly wakes waiter's up ).
+ */
+ vm_page_protect(m, VM_PROT_READ);
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->dirty = 0;
+ vm_page_io_finish(m);
}
}
- if (spc != NULL) {
- if (bp->b_rcred != NOCRED)
- crfree(bp->b_rcred);
- if (bp->b_wcred != NOCRED)
- crfree(bp->b_wcred);
- spc_free(spc);
- } else
- relpbuf(bp);
- if (swap_pager_free_pending)
- swap_pager_sync();
-
- return (rv);
+ /*
+ * adjust pip. NOTE: the original parent may still have its own
+ * pip refs on the object.
+ */
+
+ if (object)
+ vm_object_pip_wakeupn(object, bp->b_npages);
+
+ /*
+ * release the physical I/O buffer
+ */
+
+ relpbuf(bp, ((bp->b_flags & B_READ) ? &nsw_rcount : &nsw_wcount));
+
+ splx(s);
}
-void
-swap_pager_sync()
+/************************************************************************
+ * SWAP META DATA *
+ ************************************************************************
+ *
+ * These routines manipulate the swap metadata stored in the
+ * OBJT_SWAP object.
+ *
+ * In fact, we just have a few counters in the vm_object_t. The
+ * metadata is actually stored in a hash table.
+ */
+
+/*
+ * SWP_PAGER_HASH() - hash swap meta data
+ *
+ * This is an inline helper function which hash the swapblk given
+ * the object and page index. It returns a pointer to a pointer
+ * to the object, or a pointer to a NULL pointer if it could not
+ * find a swapblk.
+ */
+
+static __inline struct swblock **
+swp_pager_hash(vm_object_t object, daddr_t index)
{
- swp_clean_t spc;
+ struct swblock **pswap;
+ struct swblock *swap;
+
+ index &= ~SWAP_META_MASK;
+ pswap = &swhash[(index ^ (int)(long)object) & swhash_mask];
- while (spc = TAILQ_FIRST(&swap_pager_done)) {
- swap_pager_finish(spc);
+ while ((swap = *pswap) != NULL) {
+ if (swap->swb_object == object &&
+ swap->swb_index == index
+ ) {
+ break;
+ }
+ pswap = &swap->swb_hnext;
}
- return;
+ return(pswap);
}
+/*
+ * SWP_PAGER_META_BUILD() - add swap block to swap meta data for object
+ *
+ * We first convert the object to a swap object if it is a default
+ * object.
+ *
+ * The specified swapblk is added to the object's swap metadata. If
+ * the swapblk is not valid, it is freed instead. Any previously
+ * assigned swapblk is freed.
+ */
+
static void
-swap_pager_finish(spc)
- register swp_clean_t spc;
-{
- int i, s, lastidx;
- vm_object_t object;
- vm_page_t *ma;
+swp_pager_meta_build(
+ vm_object_t object,
+ daddr_t index,
+ daddr_t swapblk,
+ int waitok
+) {
+ struct swblock *swap;
+ struct swblock **pswap;
- ma = spc->spc_m;
- object = spc->spc_object;
- lastidx = spc->spc_first + spc->spc_count;
+ /*
+ * Convert default object to swap object if necessary
+ */
- s = splvm();
- TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
- splx(s);
+ if (object->type != OBJT_SWAP) {
+ object->type = OBJT_SWAP;
+ object->un_pager.swp.swp_bcount = 0;
+
+ if (object->handle != NULL) {
+ TAILQ_INSERT_TAIL(
+ NOBJLIST(object->handle),
+ object,
+ pager_object_list
+ );
+ } else {
+ TAILQ_INSERT_TAIL(
+ &swap_pager_un_object_list,
+ object,
+ pager_object_list
+ );
+ }
+ }
+
+ /*
+ * Wait for free memory when waitok is TRUE prior to calling the
+ * zone allocator.
+ */
- pmap_qremove(spc->spc_kva, spc->spc_count);
+ while (waitok && cnt.v_free_count == 0) {
+ VM_WAIT;
+ }
/*
- * If no error, mark as clean and inform the pmap system. If error,
- * mark as dirty so we will try again. (XXX could get stuck doing
- * this, should give up after awhile)
+ * If swapblk being added is invalid, just free it.
*/
- if (spc->spc_flags & SPC_ERROR) {
- for (i = spc->spc_first; i < lastidx; i++) {
- printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
- (u_long) VM_PAGE_TO_PHYS(ma[i]));
- ma[i]->dirty = VM_PAGE_BITS_ALL;
- vm_page_io_finish(ma[i]);
+ if (swapblk & SWAPBLK_NONE) {
+ if (swapblk != SWAPBLK_NONE) {
+ swp_pager_freeswapspace(
+ index,
+ 1
+ );
+ swapblk = SWAPBLK_NONE;
}
+ }
- vm_object_pip_subtract(object, spc->spc_count);
- if ((object->paging_in_progress == 0) &&
- (object->flags & OBJ_PIPWNT)) {
- vm_object_clear_flag(object, OBJ_PIPWNT);
- wakeup(object);
- }
+ /*
+ * Locate hash entry. If not found create, but if we aren't adding
+ * anything just return.
+ */
- } else {
- for (i = spc->spc_first; i < lastidx; i++) {
- if ((ma[i]->queue != PQ_ACTIVE) &&
- ((ma[i]->flags & PG_WANTED) ||
- pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) {
- vm_page_activate(ma[i]);
- }
- }
+ pswap = swp_pager_hash(object, index);
+
+ if ((swap = *pswap) == NULL) {
+ int i;
+
+ if (swapblk == SWAPBLK_NONE)
+ return;
+
+ swap = *pswap = zalloc(swap_zone);
+
+ swap->swb_hnext = NULL;
+ swap->swb_object = object;
+ swap->swb_index = index & ~SWAP_META_MASK;
+ swap->swb_count = 0;
+
+ ++object->un_pager.swp.swp_bcount;
+
+ for (i = 0; i < SWAP_META_PAGES; ++i)
+ swap->swb_pages[i] = SWAPBLK_NONE;
}
- nswiodone -= spc->spc_count;
- swap_pager_free_pending--;
- spc_free(spc);
+ /*
+ * Delete prior contents of metadata
+ */
- return;
+ index &= SWAP_META_MASK;
+
+ if (swap->swb_pages[index] != SWAPBLK_NONE) {
+ swp_pager_freeswapspace(
+ swap->swb_pages[index] & SWAPBLK_MASK,
+ 1
+ );
+ --swap->swb_count;
+ }
+
+ /*
+ * Enter block into metadata
+ */
+
+ swap->swb_pages[index] = swapblk;
+ ++swap->swb_count;
}
/*
- * swap_pager_iodone
+ * SWP_PAGER_META_FREE() - free a range of blocks in the object's swap metadata
+ *
+ * The requested range of blocks is freed, with any associated swap
+ * returned to the swap bitmap.
+ *
+ * This routine will free swap metadata structures as they are cleaned
+ * out. This routine does *NOT* operate on swap metadata associated
+ * with resident pages.
+ *
+ * This routine must be called at splvm()
*/
+
static void
-swap_pager_iodone(bp)
- register struct buf *bp;
+swp_pager_meta_free(vm_object_t object, daddr_t index, daddr_t count)
{
- int i, s, lastidx;
- register swp_clean_t spc;
- vm_object_t object;
- vm_page_t *ma;
+ if (object->type != OBJT_SWAP)
+ return;
+ while (count > 0) {
+ struct swblock **pswap;
+ struct swblock *swap;
- s = splvm();
- spc = (swp_clean_t) bp->b_spc;
- TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
- TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
+ pswap = swp_pager_hash(object, index);
- object = spc->spc_object;
+ if ((swap = *pswap) != NULL) {
+ daddr_t v = swap->swb_pages[index & SWAP_META_MASK];
-#if defined(DIAGNOSTIC)
- if (object->paging_in_progress < spc->spc_count)
- printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
- object->paging_in_progress, spc->spc_count);
-#endif
-
- if (bp->b_flags & B_ERROR) {
- spc->spc_flags |= SPC_ERROR;
- printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
- (bp->b_flags & B_READ) ? "pagein" : "pageout",
- (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
- } else {
- vm_object_pip_subtract(object, spc->spc_count);
- if ((object->paging_in_progress == 0) &&
- (object->flags & OBJ_PIPWNT)) {
- vm_object_clear_flag(object, OBJ_PIPWNT);
- wakeup(object);
- }
- ma = spc->spc_m;
- lastidx = spc->spc_first + spc->spc_count;
- for (i = spc->spc_first; i < lastidx; i++) {
- /*
- * we wakeup any processes that are waiting on these pages.
- */
- vm_page_io_finish(ma[i]);
+ if (v != SWAPBLK_NONE) {
+ swp_pager_freeswapspace(v, 1);
+ swap->swb_pages[index & SWAP_META_MASK] =
+ SWAPBLK_NONE;
+ if (--swap->swb_count == 0) {
+ *pswap = swap->swb_hnext;
+ zfree(swap_zone, swap);
+ --object->un_pager.swp.swp_bcount;
+ }
+ }
+ --count;
+ ++index;
+ } else {
+ daddr_t n = SWAP_META_PAGES - (index & SWAP_META_MASK);
+ count -= n;
+ index += n;
}
}
+}
+
+/*
+ * SWP_PAGER_META_FREE_ALL() - destroy all swap metadata associated with object
+ *
+ * This routine locates and destroys all swap metadata associated with
+ * an object.
+ */
+
+static void
+swp_pager_meta_free_all(vm_object_t object)
+{
+ daddr_t index = 0;
- if (bp->b_vp)
- pbrelvp(bp);
+ if (object->type != OBJT_SWAP)
+ return;
- if (bp->b_rcred != NOCRED)
- crfree(bp->b_rcred);
- if (bp->b_wcred != NOCRED)
- crfree(bp->b_wcred);
+ while (object->un_pager.swp.swp_bcount) {
+ struct swblock **pswap;
+ struct swblock *swap;
- nswiodone += spc->spc_count;
- swap_pager_free_pending++;
- if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
- wakeup(spc->spc_object);
- }
+ pswap = swp_pager_hash(object, index);
+ if ((swap = *pswap) != NULL) {
+ int i;
- if (swap_pager_needflags &&
- ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
- spc_wakeup();
+ for (i = 0; i < SWAP_META_PAGES; ++i) {
+ daddr_t v = swap->swb_pages[i];
+ if (v != SWAPBLK_NONE) {
+#if !defined(MAX_PERF)
+ --swap->swb_count;
+#endif
+ swp_pager_freeswapspace(
+ v,
+ 1
+ );
+ }
+ }
+#if !defined(MAX_PERF)
+ if (swap->swb_count != 0)
+ panic("swap_pager_meta_free_all: swb_count != 0");
+#endif
+ *pswap = swap->swb_hnext;
+ zfree(swap_zone, swap);
+ --object->un_pager.swp.swp_bcount;
+ }
+ index += SWAP_META_PAGES;
+#if !defined(MAX_PERF)
+ if (index > 0x20000000)
+ panic("swp_pager_meta_free_all: failed to locate all swap meta blocks");
+#endif
}
+}
- if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
- vm_pageout_pages_needed) {
- wakeup(&vm_pageout_pages_needed);
- vm_pageout_pages_needed = 0;
+/*
+ * SWP_PAGER_METACTL() - misc control of swap and vm_page_t meta data.
+ *
+ * This routine is capable of looking up, popping, or freeing
+ * swapblk assignments in the swap meta data or in the vm_page_t.
+ * The routine typically returns the swapblk being looked-up, or popped,
+ * or SWAPBLK_NONE if the block was freed, or SWAPBLK_NONE if the block
+ * was invalid. This routine will automatically free any invalid
+ * meta-data swapblks.
+ *
+ * It is not possible to store invalid swapblks in the swap meta data
+ * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
+ *
+ * When acting on a busy resident page and paging is in progress, we
+ * have to wait until paging is complete but otherwise can act on the
+ * busy page.
+ *
+ * SWM_FREE remove and free swap block from metadata
+ *
+ * SWM_POP remove from meta data but do not free.. pop it out
+ */
+
+static daddr_t
+swp_pager_meta_ctl(
+ vm_object_t object,
+ vm_pindex_t index,
+ int flags
+) {
+ /*
+ * The meta data only exists of the object is OBJT_SWAP
+ * and even then might not be allocated yet.
+ */
+
+ if (
+ object->type != OBJT_SWAP ||
+ object->un_pager.swp.swp_bcount == 0
+ ) {
+ return(SWAPBLK_NONE);
}
- splx(s);
+ {
+ struct swblock **pswap;
+ struct swblock *swap;
+ daddr_t r1 = SWAPBLK_NONE;
+
+ pswap = swp_pager_hash(object, index);
+
+ index &= SWAP_META_MASK;
+
+ if ((swap = *pswap) != NULL) {
+ r1 = swap->swb_pages[index];
+
+ if (r1 != SWAPBLK_NONE) {
+ if (flags & SWM_FREE) {
+ swp_pager_freeswapspace(
+ r1,
+ 1
+ );
+ r1 = SWAPBLK_NONE;
+ }
+ if (flags & (SWM_FREE|SWM_POP)) {
+ swap->swb_pages[index] = SWAPBLK_NONE;
+ if (--swap->swb_count == 0) {
+ *pswap = swap->swb_hnext;
+ zfree(swap_zone, swap);
+ --object->un_pager.swp.swp_bcount;
+ }
+ }
+ }
+ }
+
+ return(r1);
+ }
+ /* not reached */
}
+
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
index ceb88b6..374223c 100644
--- a/sys/vm/swap_pager.h
+++ b/sys/vm/swap_pager.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90
- * $Id: swap_pager.h,v 1.21 1998/04/29 04:28:02 dyson Exp $
+ * $Id: swap_pager.h,v 1.22 1998/07/10 21:50:17 alex Exp $
*/
/*
@@ -59,26 +59,50 @@
#define SWB_NPAGES 8
#endif
+/*
+ * Piecemeal swap metadata structure. Swap is stored in a radix tree.
+ *
+ * If SWB_NPAGES is 8 and sizeof(char *) == sizeof(daddr_t), our radix
+ * is basically 8. Assuming PAGE_SIZE == 4096, one tree level represents
+ * 32K worth of data, two levels represent 256K, three levels represent
+ * 2 MBytes. This is acceptable.
+ *
+ * Overall memory utilization is about the same as the old swap structure.
+ */
+
+#define SWCORRECT(n) (sizeof(void *) * (n) / sizeof(daddr_t))
+
+#define SWAP_META_PAGES (SWB_NPAGES * 2)
+#define SWAP_META_MASK (SWAP_META_PAGES - 1)
+
struct swblock {
- unsigned short swb_valid; /* bitmask for valid pages */
- unsigned short swb_locked; /* block locked */
- daddr_t swb_block[SWB_NPAGES];
+ struct swblock *swb_hnext;
+ vm_object_t swb_object;
+ int swb_index;
+ int swb_count;
+ daddr_t swb_pages[SWAP_META_PAGES];
};
-typedef struct swblock *sw_blk_t;
#ifdef KERNEL
extern struct pagerlst swap_pager_un_object_list;
extern int swap_pager_full;
-extern struct rlisthdr swaplist;
+extern struct blist *swapblist;
+
+int swap_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
+boolean_t swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex, int *before, int *after));
-int swap_pager_putpages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
int swap_pager_swp_alloc __P((vm_object_t, int));
-void swap_pager_copy __P((vm_object_t, vm_pindex_t, vm_object_t,
- vm_pindex_t, vm_pindex_t, int));
+void swap_pager_copy __P((vm_object_t, vm_object_t, vm_pindex_t, int));
void swap_pager_freespace __P((vm_object_t, vm_pindex_t, vm_size_t));
void swap_pager_dmzspace __P((vm_object_t, vm_pindex_t, vm_size_t));
void swap_pager_swap_init __P((void));
-void swap_pager_sync __P((void));
+
+/*
+ * newswap functions
+ */
+
+void swap_pager_page_removed __P((vm_page_t, vm_object_t));
+
#endif
#endif /* _SWAP_PAGER_ */
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e3d64f9..d0f4754 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -66,7 +66,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_fault.c,v 1.92 1999/01/08 17:31:24 eivind Exp $
+ * $Id: vm_fault.c,v 1.93 1999/01/10 01:58:28 eivind Exp $
*/
/*
@@ -114,7 +114,7 @@ struct faultstate {
struct vnode *vp;
};
-static void
+static __inline void
release_page(struct faultstate *fs)
{
vm_page_wakeup(fs->m);
@@ -122,7 +122,7 @@ release_page(struct faultstate *fs)
fs->m = NULL;
}
-static void
+static __inline void
unlock_map(struct faultstate *fs)
{
if (fs->lookup_still_valid) {
@@ -263,36 +263,43 @@ RetryFault:;
fs.object = fs.first_object;
fs.pindex = fs.first_pindex;
- /*
- * See whether this page is resident
- */
while (TRUE) {
+ /*
+ * If the object is dead, we stop here
+ */
if (fs.object->flags & OBJ_DEAD) {
unlock_and_deallocate(&fs);
return (KERN_PROTECTION_FAILURE);
}
+
+ /*
+ * See if page is resident
+ */
fs.m = vm_page_lookup(fs.object, fs.pindex);
if (fs.m != NULL) {
int queue, s;
/*
- * If the page is being brought in, wait for it and
- * then retry.
+ * Wait/Retry if the page is busy. We have to do this
+ * if the page is busy via either PG_BUSY or
+ * vm_page_t->busy because the vm_pager may be using
+ * vm_page_t->busy for pageouts ( and even pageins if
+ * it is the vnode pager ), and we could end up trying
+ * to pagein and pageout the same page simultaniously.
+ *
+ * We can theoretically allow the busy case on a read
+ * fault if the page is marked valid, but since such
+ * pages are typically already pmap'd, putting that
+ * special case in might be more effort then it is
+ * worth. We cannot under any circumstances mess
+ * around with a vm_page_t->busy page except, perhaps,
+ * to pmap it.
*/
- if ((fs.m->flags & PG_BUSY) ||
- (fs.m->busy &&
- (fs.m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
+ if ((fs.m->flags & PG_BUSY) || fs.m->busy) {
unlock_things(&fs);
- s = splvm();
- if ((fs.m->flags & PG_BUSY) ||
- (fs.m->busy &&
- (fs.m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
- vm_page_flag_set(fs.m, PG_WANTED | PG_REFERENCED);
- cnt.v_intrans++;
- tsleep(fs.m, PSWP, "vmpfw", 0);
- }
- splx(s);
+ (void)vm_page_sleep_busy(fs.m, TRUE, "vmpfw");
+ cnt.v_intrans++;
vm_object_deallocate(fs.first_object);
goto RetryFault;
}
@@ -302,8 +309,12 @@ RetryFault:;
vm_page_unqueue_nowakeup(fs.m);
splx(s);
+#if 0
/*
- * Mark page busy for other processes, and the pagedaemon.
+ * Code removed. In a low-memory situation (say, a
+ * memory-bound program is running), the last thing you
+ * do is starve reactivations for other processes.
+ * XXX we need to find a better way.
*/
if (((queue - fs.m->pc) == PQ_CACHE) &&
(cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
@@ -312,6 +323,13 @@ RetryFault:;
VM_WAIT;
goto RetryFault;
}
+#endif
+ /*
+ * Mark page busy for other processes, and the
+ * pagedaemon. If it still isn't completely valid
+ * (readable), jump to readrest, else break-out ( we
+ * found the page ).
+ */
vm_page_busy(fs.m);
if (((fs.m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) &&
@@ -321,6 +339,12 @@ RetryFault:;
break;
}
+
+ /*
+ * Page is not resident, If this is the search termination,
+ * allocate a new page.
+ */
+
if (((fs.object->type != OBJT_DEFAULT) &&
(((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired))
|| (fs.object == fs.first_object)) {
@@ -344,6 +368,13 @@ RetryFault:;
}
readrest:
+ /*
+ * Have page, but it may not be entirely valid ( or valid at
+ * all ). If this object is not the default, try to fault-in
+ * the page as well as activate additional pages when
+ * appropriate, and page-in additional pages when appropriate.
+ */
+
if (fs.object->type != OBJT_DEFAULT &&
(((fault_flags & VM_FAULT_WIRE_MASK) == 0) || wired)) {
int rv;
@@ -410,13 +441,16 @@ readrest:
* vm_page_t passed to the routine. The reqpage
* return value is the index into the marray for the
* vm_page_t passed to the routine.
+ *
+ * fs.m plus the additional pages are PG_BUSY'd.
*/
faultcount = vm_fault_additional_pages(
fs.m, behind, ahead, marray, &reqpage);
/*
* Call the pager to retrieve the data, if any, after
- * releasing the lock on the map.
+ * releasing the lock on the map. We hold a ref on
+ * fs.object and the pages are PG_BUSY'd.
*/
unlock_map(&fs);
@@ -442,7 +476,7 @@ readrest:
}
hardfault++;
- break;
+ break; /* break to PAGE HAS BEEN FOUND */
}
/*
* Remove the bogus page (which does not exist at this
@@ -486,8 +520,8 @@ readrest:
}
}
/*
- * We get here if the object has default pager (or unwiring) or the
- * pager doesn't have the page.
+ * We get here if the object has default pager (or unwiring)
+ * or the pager doesn't have the page.
*/
if (fs.object == fs.first_object)
fs.first_m = fs.m;
@@ -518,15 +552,17 @@ readrest:
cnt.v_ozfod++;
}
cnt.v_zfod++;
- break;
+ break; /* break to PAGE HAS BEEN FOUND */
} else {
if (fs.object != fs.first_object) {
vm_object_pip_wakeup(fs.object);
}
+ KASSERT(fs.object != next_object, ("object loop %p", next_object));
fs.object = next_object;
vm_object_pip_add(fs.object, 1);
}
}
+
KASSERT((fs.m->flags & PG_BUSY) != 0,
("vm_fault: not busy after main loop"));
@@ -549,14 +585,15 @@ readrest:
*/
if (fault_type & VM_PROT_WRITE) {
-
/*
- * This allows pages to be virtually copied from a backing_object
- * into the first_object, where the backing object has no other
- * refs to it, and cannot gain any more refs. Instead of a
- * bcopy, we just move the page from the backing object to the
- * first object. Note that we must mark the page dirty in the
- * first object so that it will go out to swap when needed.
+ * This allows pages to be virtually copied from a
+ * backing_object into the first_object, where the
+ * backing object has no other refs to it, and cannot
+ * gain any more refs. Instead of a bcopy, we just
+ * move the page from the backing object to the
+ * first object. Note that we must mark the page
+ * dirty in the first object so that it will go out
+ * to swap when needed.
*/
if (map_generation == fs.map->timestamp &&
/*
@@ -598,11 +635,12 @@ readrest:
fs.first_m = NULL;
/*
- * grab the page and put it into the process'es object
+ * grab the page and put it into the
+ * process'es object. The page is
+ * automatically made dirty.
*/
vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
fs.first_m = fs.m;
- fs.first_m->dirty = VM_PAGE_BITS_ALL;
vm_page_busy(fs.first_m);
fs.m = NULL;
cnt.v_cow_optim++;
@@ -620,7 +658,13 @@ readrest:
release_page(&fs);
}
+ /*
+ * fs.object != fs.first_object due to above
+ * conditional
+ */
+
vm_object_pip_wakeup(fs.object);
+
/*
* Only use the new page below...
*/
@@ -708,9 +752,13 @@ readrest:
* If the fault is a write, we know that this page is being
* written NOW. This will save on the pmap_is_modified() calls
* later.
+ *
+ * Also tell the backing pager, if any, that it should remove
+ * any swap backing since the page is now dirty.
*/
if (fault_flags & VM_FAULT_DIRTY) {
fs.m->dirty = VM_PAGE_BITS_ALL;
+ vm_pager_page_unswapped(fs.m);
}
}
@@ -1021,8 +1069,7 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
* if the requested page is not available, then give up now
*/
- if (!vm_pager_has_page(object,
- OFF_TO_IDX(object->paging_offset) + pindex, &cbehind, &cahead)) {
+ if (!vm_pager_has_page(object, pindex, &cbehind, &cahead)) {
return 0;
}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index ec844db..0a3309d 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -59,7 +59,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_glue.c,v 1.79 1998/12/19 08:23:31 julian Exp $
+ * $Id: vm_glue.c,v 1.80 1999/01/07 21:23:50 julian Exp $
*/
#include "opt_rlimit.h"
@@ -213,10 +213,19 @@ vm_fork(p1, p2, flags)
p1->p_vmspace->vm_refcnt++;
}
+ /*
+ * Great, so we have a memory-heavy process and the
+ * entire machine comes to a screaching halt because
+ * nobody can fork/exec anything. What we really need
+ * to do is fix the process swapper so it swaps out the right
+ * processes.
+ */
+#if 0
while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
vm_pageout_deficit += (UPAGES + VM_INITIAL_PAGEIN);
VM_WAIT;
}
+#endif
if ((flags & RFMEM) == 0) {
p2->p_vmspace = vmspace_fork(p1->p_vmspace);
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index ea7f45b..b2e1102 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_kern.c,v 1.49 1998/08/24 08:39:37 dfr Exp $
+ * $Id: vm_kern.c,v 1.50 1998/09/04 08:06:57 dfr Exp $
*/
/*
@@ -181,8 +181,9 @@ kmem_alloc(map, size)
VM_ALLOC_ZERO | VM_ALLOC_RETRY);
if ((mem->flags & PG_ZERO) == 0)
vm_page_zero_fill(mem);
- vm_page_flag_clear(mem, (PG_BUSY | PG_ZERO));
mem->valid = VM_PAGE_BITS_ALL;
+ vm_page_flag_clear(mem, PG_ZERO);
+ vm_page_wakeup(mem);
}
/*
@@ -200,6 +201,8 @@ kmem_alloc(map, size)
* Release a region of kernel virtual memory allocated
* with kmem_alloc, and return the physical pages
* associated with that region.
+ *
+ * This routine may not block on kernel maps.
*/
void
kmem_free(map, addr, size)
@@ -252,26 +255,31 @@ kmem_suballoc(parent, min, max, size)
}
/*
- * Allocate wired-down memory in the kernel's address map for the higher
- * level kernel memory allocator (kern/kern_malloc.c). We cannot use
- * kmem_alloc() because we may need to allocate memory at interrupt
- * level where we cannot block (canwait == FALSE).
+ * kmem_malloc:
+ *
+ * Allocate wired-down memory in the kernel's address map for the higher
+ * level kernel memory allocator (kern/kern_malloc.c). We cannot use
+ * kmem_alloc() because we may need to allocate memory at interrupt
+ * level where we cannot block (canwait == FALSE).
+ *
+ * This routine has its own private kernel submap (kmem_map) and object
+ * (kmem_object). This, combined with the fact that only malloc uses
+ * this routine, ensures that we will never block in map or object waits.
*
- * This routine has its own private kernel submap (kmem_map) and object
- * (kmem_object). This, combined with the fact that only malloc uses
- * this routine, ensures that we will never block in map or object waits.
+ * Note that this still only works in a uni-processor environment and
+ * when called at splhigh().
*
- * Note that this still only works in a uni-processor environment and
- * when called at splhigh().
+ * We don't worry about expanding the map (adding entries) since entries
+ * for wired maps are statically allocated.
*
- * We don't worry about expanding the map (adding entries) since entries
- * for wired maps are statically allocated.
+ * NOTE: This routine is not supposed to block if M_NOWAIT is set, but
+ * I have not verified that it actually does not block.
*/
vm_offset_t
-kmem_malloc(map, size, waitflag)
+kmem_malloc(map, size, flags)
register vm_map_t map;
register vm_size_t size;
- boolean_t waitflag;
+ int flags;
{
register vm_offset_t offset, i;
vm_map_entry_t entry;
@@ -297,7 +305,7 @@ kmem_malloc(map, size, waitflag)
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
return (0);
}
- if (waitflag == M_WAITOK)
+ if ((flags & M_NOWAIT) == 0)
panic("kmem_malloc(%d): kmem_map too small: %d total allocated",
size, map->size);
return (0);
@@ -308,9 +316,19 @@ kmem_malloc(map, size, waitflag)
VM_PROT_ALL, VM_PROT_ALL, 0);
for (i = 0; i < size; i += PAGE_SIZE) {
+ /*
+ * Note: if M_NOWAIT specified alone, allocate from
+ * interrupt-safe queues only (just the free list). If
+ * M_ASLEEP or M_USE_RESERVE is also specified, we can also
+ * allocate from the cache. Neither of the latter two
+ * flags may be specified from an interrupt since interrupts
+ * are not allowed to mess with the cache queue.
+ */
retry:
m = vm_page_alloc(kmem_object, OFF_TO_IDX(offset + i),
- (waitflag == M_NOWAIT) ? VM_ALLOC_INTERRUPT : VM_ALLOC_SYSTEM);
+ ((flags & (M_NOWAIT|M_ASLEEP|M_USE_RESERVE)) == M_NOWAIT) ?
+ VM_ALLOC_INTERRUPT :
+ VM_ALLOC_SYSTEM);
/*
* Ran out of space, free everything up and return. Don't need
@@ -318,7 +336,7 @@ retry:
* aren't on any queues.
*/
if (m == NULL) {
- if (waitflag == M_WAITOK) {
+ if ((flags & M_NOWAIT) == 0) {
VM_WAIT;
goto retry;
}
@@ -330,6 +348,9 @@ retry:
}
vm_map_delete(map, addr, addr + size);
vm_map_unlock(map);
+ if (flags & M_ASLEEP) {
+ VM_AWAIT;
+ }
return (0);
}
vm_page_flag_clear(m, PG_ZERO);
@@ -359,6 +380,9 @@ retry:
m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i));
vm_page_wire(m);
vm_page_wakeup(m);
+ /*
+ * Because this is kernel_pmap, this call will not block.
+ */
pmap_enter(kernel_pmap, addr + i, VM_PAGE_TO_PHYS(m),
VM_PROT_ALL, 1);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE | PG_REFERENCED);
@@ -369,12 +393,14 @@ retry:
}
/*
- * kmem_alloc_wait
+ * kmem_alloc_wait:
*
* Allocates pageable memory from a sub-map of the kernel. If the submap
* has no room, the caller sleeps waiting for more memory in the submap.
*
+ * This routine may block.
*/
+
vm_offset_t
kmem_alloc_wait(map, size)
vm_map_t map;
@@ -406,7 +432,7 @@ kmem_alloc_wait(map, size)
}
/*
- * kmem_free_wakeup
+ * kmem_free_wakeup:
*
* Returns memory to a submap of the kernel, and wakes up any processes
* waiting for memory in that map.
@@ -424,11 +450,14 @@ kmem_free_wakeup(map, addr, size)
}
/*
- * Create the kernel map; insert a mapping covering kernel text, data, bss,
- * and all space allocated thus far (`boostrap' data). The new map will thus
- * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
- * the range between `start' and `end' as free.
+ * kmem_init:
+ *
+ * Create the kernel map; insert a mapping covering kernel text,
+ * data, bss, and all space allocated thus far (`boostrap' data). The
+ * new map will thus map the range between VM_MIN_KERNEL_ADDRESS and
+ * `start' as allocated, and the range between `start' and `end' as free.
*/
+
void
kmem_init(start, end)
vm_offset_t start, end;
@@ -445,3 +474,4 @@ kmem_init(start, end)
/* ... and ending with the completion of the above `insert' */
vm_map_unlock(m);
}
+
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 829548a..f495788 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_map.c,v 1.138 1998/10/25 17:44:58 phk Exp $
+ * $Id: vm_map.c,v 1.139 1999/01/06 23:05:41 julian Exp $
*/
/*
@@ -440,7 +440,9 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_map_entry_t new_entry;
vm_map_entry_t prev_entry;
vm_map_entry_t temp_entry;
+#if 0
vm_object_t prev_object;
+#endif
u_char protoeflags;
if ((object != NULL) && (cow & MAP_NOFAULT)) {
@@ -514,10 +516,15 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
map->size += (end - prev_entry->end);
prev_entry->end = end;
+#if 0
+ /*
+ * (no longer applies)
+ */
if ((cow & MAP_NOFAULT) == 0) {
prev_object = prev_entry->object.vm_object;
default_pager_convert_to_swapq(prev_object);
}
+#endif
return (KERN_SUCCESS);
}
else {
@@ -573,7 +580,12 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
(prev_entry->end >= new_entry->start))
map->first_free = new_entry;
+#if 0
+ /*
+ * (no longer applies)
+ */
default_pager_convert_to_swapq(object);
+#endif
return (KERN_SUCCESS);
}
@@ -1504,7 +1516,12 @@ vm_map_user_pageable(map, start, end, new_pageable)
entry->offset = (vm_offset_t) 0;
}
+#if 0
+ /*
+ * (no longer applies)
+ */
default_pager_convert_to_swapq(entry->object.vm_object);
+#endif
}
vm_map_clip_start(map, entry, start);
@@ -1695,7 +1712,12 @@ vm_map_pageable(map, start, end, new_pageable)
atop(entry->end - entry->start));
entry->offset = (vm_offset_t) 0;
}
+#if 0
+ /*
+ * (no longer applies)
+ */
default_pager_convert_to_swapq(entry->object.vm_object);
+#endif
}
}
vm_map_clip_start(map, entry, start);
@@ -2192,16 +2214,18 @@ vm_map_split(entry)
m = vm_page_lookup(orig_object, offidxstart + idx);
if (m == NULL)
continue;
- if (m->flags & PG_BUSY) {
- vm_page_flag_set(m, PG_WANTED);
- tsleep(m, PVM, "spltwt", 0);
+
+ /*
+ * We must wait for pending I/O to complete before we can
+ * rename the page.
+ */
+ if (vm_page_sleep_busy(m, TRUE, "spltwt"))
goto retry;
- }
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_rename(m, new_object, idx);
- m->dirty = VM_PAGE_BITS_ALL;
+ /* page automatically made dirty by rename */
vm_page_busy(m);
}
@@ -2212,9 +2236,7 @@ vm_map_split(entry)
* and destroy unneeded pages in
* shadow object.
*/
- swap_pager_copy(orig_object, OFF_TO_IDX(orig_object->paging_offset),
- new_object, OFF_TO_IDX(new_object->paging_offset),
- offidxstart, 0);
+ swap_pager_copy(orig_object, new_object, offidxstart, 0);
vm_object_pip_wakeup(orig_object);
}
@@ -2670,8 +2692,13 @@ RetryLookup:;
vm_map_lock_downgrade(share_map);
}
+#if 0
+ /*
+ * (no longer applies)
+ */
if (entry->object.vm_object->type == OBJT_DEFAULT)
default_pager_convert_to_swapq(entry->object.vm_object);
+#endif
/*
* Return the object/offset from this entry. If the entry was
* copy-on-write or empty, it has been fixed up.
@@ -2781,6 +2808,10 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
vm_map_lookup_done(map, entry);
return 0;
}
+ /*
+ * disallow busy or invalid pages, but allow
+ * m->busy pages if they are entirely valid.
+ */
if ((m->flags & PG_BUSY) ||
((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
vm_map_lookup_done(map, entry);
@@ -2856,7 +2887,7 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
*/
if (first_object->type == OBJT_SWAP) {
swap_pager_freespace(first_object,
- OFF_TO_IDX(first_object->paging_offset),
+ 0,
first_object->size);
}
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 5bc74bd..bb52f66 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)vm_meter.c 8.4 (Berkeley) 1/4/94
- * $Id: vm_meter.c,v 1.26 1998/08/24 08:39:37 dfr Exp $
+ * $Id: vm_meter.c,v 1.27 1998/10/31 17:21:31 peter Exp $
*/
#include <sys/param.h>
@@ -195,6 +195,11 @@ vmtotal SYSCTL_HANDLER_ARGS
for (object = TAILQ_FIRST(&vm_object_list);
object != NULL;
object = TAILQ_NEXT(object, object_list)) {
+ /*
+ * devices, like /dev/mem, will badly skew our totals
+ */
+ if (object->type == OBJT_DEVICE)
+ continue;
totalp->t_vm += object->size;
totalp->t_rm += object->resident_page_count;
if (object->flags & OBJ_ACTIVE) {
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index ba36e41..1374dfb 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -38,7 +38,7 @@
* from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
*
* @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94
- * $Id: vm_mmap.c,v 1.85 1998/12/09 20:22:21 dt Exp $
+ * $Id: vm_mmap.c,v 1.86 1999/01/06 23:05:42 julian Exp $
*/
/*
@@ -71,6 +71,7 @@
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/vm_pageout.h>
#include <vm/vm_extern.h>
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index a1477f2..86c71c8 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_object.c,v 1.137 1999/01/08 17:31:26 eivind Exp $
+ * $Id: vm_object.c,v 1.138 1999/01/10 01:58:28 eivind Exp $
*/
/*
@@ -134,9 +134,12 @@ static long object_bypasses;
static int next_index;
static vm_zone_t obj_zone;
static struct vm_zone obj_zone_store;
+static int object_hash_rand;
#define VM_OBJECTS_INIT 256
static struct vm_object vm_objects_init[VM_OBJECTS_INIT];
+#if 0
static int objidnumber;
+#endif
void
_vm_object_allocate(type, size, object)
@@ -152,7 +155,9 @@ _vm_object_allocate(type, size, object)
object->size = size;
object->ref_count = 1;
object->flags = 0;
+#if 0
object->id = ++objidnumber;
+#endif
if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
vm_object_set_flag(object, OBJ_ONEMAPPING);
object->behavior = OBJ_NORMAL;
@@ -168,16 +173,25 @@ _vm_object_allocate(type, size, object)
incr = size;
next_index = (next_index + incr) & PQ_L2_MASK;
object->handle = NULL;
- object->paging_offset = (vm_ooffset_t) 0;
object->backing_object = NULL;
object->backing_object_offset = (vm_ooffset_t) 0;
+#if 0
object->page_hint = NULL;
+#endif
+ /*
+ * Try to generate a number that will spread objects out in the
+ * hash table. We 'wipe' new objects across the hash in 128 page
+ * increments plus 1 more to offset it a little more by the time
+ * it wraps around.
+ */
+ object->hash_rand = object_hash_rand - 129;
object->last_read = 0;
object->generation++;
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
vm_object_count++;
+ object_hash_rand = object->hash_rand;
}
/*
@@ -336,25 +350,15 @@ vm_object_deallocate(object)
robject->ref_count++;
- retry:
- if (robject->paging_in_progress ||
- object->paging_in_progress) {
+ while (
+ robject->paging_in_progress ||
+ object->paging_in_progress
+ ) {
vm_object_pip_sleep(robject, "objde1");
- if (robject->paging_in_progress &&
- robject->type == OBJT_SWAP) {
- swap_pager_sync();
- goto retry;
- }
-
vm_object_pip_sleep(object, "objde2");
- if (object->paging_in_progress &&
- object->type == OBJT_SWAP) {
- swap_pager_sync();
- }
- goto retry;
}
- if( robject->ref_count == 1) {
+ if (robject->ref_count == 1) {
robject->ref_count--;
object = robject;
goto doterm;
@@ -396,6 +400,7 @@ doterm:
* up all previously used resources.
*
* The object must be locked.
+ * This routine may block.
*/
void
vm_object_terminate(object)
@@ -444,13 +449,13 @@ vm_object_terminate(object)
/*
* Now free any remaining pages. For internal objects, this also
* removes them from paging queues. Don't free wired pages, just
- * remove them from the object.
+ * remove them from the object.
*/
s = splvm();
while ((p = TAILQ_FIRST(&object->memq)) != NULL) {
#if !defined(MAX_PERF)
if (p->busy || (p->flags & PG_BUSY))
- printf("vm_object_terminate: freeing busy page\n");
+ panic("vm_object_terminate: freeing busy page %p\n", p);
#endif
if (p->wire_count == 0) {
vm_page_busy(p);
@@ -566,9 +571,7 @@ rescan:
}
s = splvm();
- while ((p->flags & PG_BUSY) || p->busy) {
- vm_page_flag_set(p, PG_WANTED | PG_REFERENCED);
- tsleep(p, PVM, "vpcwai", 0);
+ while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
if (object->generation != curgeneration) {
splx(s);
goto rescan;
@@ -763,6 +766,12 @@ vm_object_pmap_remove(object, start, end)
* vm_object_madvise:
*
* Implements the madvise function at the object/page level.
+ *
+ * Currently, madvise() functions are limited to the default and
+ * swap object types only, and also limited to only the unshared portions
+ * of a process's address space. MADV_FREE, certainly, could never be
+ * run on anything else. The others are more flexible and the code could
+ * be adjusted in the future to handle expanded cases for them.
*/
void
vm_object_madvise(object, pindex, count, advise)
@@ -780,22 +789,59 @@ vm_object_madvise(object, pindex, count, advise)
end = pindex + count;
- for (; pindex < end; pindex += 1) {
+ /*
+ * MADV_FREE special case - free any swap backing store (as well
+ * as resident pages later on).
+ */
+
+ if (advise == MADV_FREE) {
+ tobject = object;
+ tpindex = pindex;
+ while (
+ (tobject->type == OBJT_DEFAULT ||
+ tobject->type == OBJT_SWAP) &&
+ (tobject->flags & OBJ_ONEMAPPING)
+ ) {
+ if (tobject->type == OBJT_SWAP) {
+ swap_pager_freespace(tobject, tpindex, count);
+ }
+ if ((tobject = tobject->backing_object) == NULL)
+ break;
+ tpindex += OFF_TO_IDX(tobject->backing_object_offset);
+ }
+ }
+
+ /*
+ * Locate and adjust resident pages
+ */
+
+ for (; pindex < end; pindex += 1) {
relookup:
tobject = object;
tpindex = pindex;
shadowlookup:
+
+ if (tobject->type != OBJT_DEFAULT &&
+ tobject->type != OBJT_SWAP
+ ) {
+ continue;
+ }
+
+ if ((tobject->flags & OBJ_ONEMAPPING) == 0)
+ continue;
+
m = vm_page_lookup(tobject, tpindex);
+
if (m == NULL) {
- if (tobject->type != OBJT_DEFAULT) {
- continue;
- }
-
tobject = tobject->backing_object;
+ if (tobject == NULL)
+ continue;
+#if 0
if ((tobject == NULL) || (tobject->ref_count != 1)) {
continue;
}
+#endif
tpindex += OFF_TO_IDX(tobject->backing_object_offset);
goto shadowlookup;
}
@@ -805,12 +851,15 @@ shadowlookup:
* we skip it. Things can break if we mess with pages
* in any of the below states.
*/
- if (m->hold_count || m->wire_count ||
- m->valid != VM_PAGE_BITS_ALL) {
+ if (
+ m->hold_count ||
+ m->wire_count ||
+ m->valid != VM_PAGE_BITS_ALL
+ ) {
continue;
}
- if (vm_page_sleep(m, "madvpo", &m->busy))
+ if (vm_page_sleep_busy(m, TRUE, "madvpo"))
goto relookup;
if (advise == MADV_WILLNEED) {
@@ -818,15 +867,25 @@ shadowlookup:
} else if (advise == MADV_DONTNEED) {
vm_page_deactivate(m);
} else if (advise == MADV_FREE) {
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- m->dirty = 0;
/*
- * Force a demand zero if attempt to read from swap.
- * We currently don't handle vnode files correctly,
- * and will reread stale contents unnecessarily.
+ * If MADV_FREE_FORCE_FREE is defined, we attempt to
+ * immediately free the page. Otherwise we just
+ * destroy any swap backing store, mark it clean,
+ * and stuff it into the cache.
*/
- if (object->type == OBJT_SWAP)
- swap_pager_dmzspace(tobject, m->pindex, 1);
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->dirty = 0;
+
+#ifdef MADV_FREE_FORCE_FREE
+ if (tobject->resident_page_count > 1) {
+ vm_page_busy(m);
+ vm_page_protect(m, VM_PROT_NONE);
+ vm_page_free(m);
+ } else
+#endif
+ {
+ vm_page_cache(m);
+ }
}
}
}
@@ -900,8 +959,7 @@ vm_object_qcollapse(object)
register vm_object_t object;
{
register vm_object_t backing_object;
- register vm_pindex_t backing_offset_index, paging_offset_index;
- vm_pindex_t backing_object_paging_offset_index;
+ register vm_pindex_t backing_offset_index;
vm_pindex_t new_pindex;
register vm_page_t p, pp;
register vm_size_t size;
@@ -913,27 +971,39 @@ vm_object_qcollapse(object)
backing_object->ref_count += 2;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
- backing_object_paging_offset_index = OFF_TO_IDX(backing_object->paging_offset);
- paging_offset_index = OFF_TO_IDX(object->paging_offset);
size = object->size;
+
p = TAILQ_FIRST(&backing_object->memq);
while (p) {
vm_page_t next;
+ /*
+ * setup for loop.
+ * loop if the page isn't trivial.
+ */
+
next = TAILQ_NEXT(p, listq);
if ((p->flags & (PG_BUSY | PG_FICTITIOUS)) ||
!p->valid || p->hold_count || p->wire_count || p->busy) {
p = next;
continue;
}
+
+ /*
+ * busy the page and move it from the backing store to the
+ * parent object.
+ */
+
vm_page_busy(p);
+ KASSERT(p->object == object, ("vm_object_qcollapse(): object mismatch"));
+
new_pindex = p->pindex - backing_offset_index;
if (p->pindex < backing_offset_index ||
new_pindex >= size) {
if (backing_object->type == OBJT_SWAP)
swap_pager_freespace(backing_object,
- backing_object_paging_offset_index+p->pindex,
+ p->pindex,
1);
vm_page_protect(p, VM_PROT_NONE);
vm_page_free(p);
@@ -941,16 +1011,16 @@ vm_object_qcollapse(object)
pp = vm_page_lookup(object, new_pindex);
if (pp != NULL ||
(object->type == OBJT_SWAP && vm_pager_has_page(object,
- paging_offset_index + new_pindex, NULL, NULL))) {
+ new_pindex, NULL, NULL))) {
if (backing_object->type == OBJT_SWAP)
swap_pager_freespace(backing_object,
- backing_object_paging_offset_index + p->pindex, 1);
+ p->pindex, 1);
vm_page_protect(p, VM_PROT_NONE);
vm_page_free(p);
} else {
if (backing_object->type == OBJT_SWAP)
swap_pager_freespace(backing_object,
- backing_object_paging_offset_index + p->pindex, 1);
+ p->pindex, 1);
if ((p->queue - p->pc) == PQ_CACHE)
vm_page_deactivate(p);
@@ -958,7 +1028,7 @@ vm_object_qcollapse(object)
vm_page_protect(p, VM_PROT_NONE);
vm_page_rename(p, object, new_pindex);
- p->dirty = VM_PAGE_BITS_ALL;
+ /* page automatically made dirty by rename */
}
}
p = next;
@@ -1049,9 +1119,10 @@ vm_object_collapse(object)
*/
while ((p = TAILQ_FIRST(&backing_object->memq)) != 0) {
-
- new_pindex = p->pindex - backing_offset_index;
+ if (vm_page_sleep_busy(p, TRUE, "vmocol"))
+ continue;
vm_page_busy(p);
+ new_pindex = p->pindex - backing_offset_index;
/*
* If the parent has a page here, or if this
@@ -1068,7 +1139,7 @@ vm_object_collapse(object)
} else {
pp = vm_page_lookup(object, new_pindex);
if (pp != NULL || (object->type == OBJT_SWAP && vm_pager_has_page(object,
- OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL))) {
+ new_pindex, NULL, NULL))) {
vm_page_protect(p, VM_PROT_NONE);
vm_page_free(p);
} else {
@@ -1077,7 +1148,7 @@ vm_object_collapse(object)
else
vm_page_protect(p, VM_PROT_NONE);
vm_page_rename(p, object, new_pindex);
- p->dirty = VM_PAGE_BITS_ALL;
+ /* page automatically made dirty by rename */
}
}
}
@@ -1088,52 +1159,22 @@ vm_object_collapse(object)
if (backing_object->type == OBJT_SWAP) {
vm_object_pip_add(backing_object, 1);
- if (object->type == OBJT_SWAP) {
- vm_object_pip_add(object, 1);
- /*
- * copy shadow object pages into ours
- * and destroy unneeded pages in
- * shadow object.
- */
- swap_pager_copy(
- backing_object,
- OFF_TO_IDX(backing_object->paging_offset),
- object,
- OFF_TO_IDX(object->paging_offset),
- OFF_TO_IDX(object->backing_object_offset), TRUE);
- vm_object_pip_wakeup(object);
- } else {
- vm_object_pip_add(object, 1);
- /*
- * move the shadow backing_object's pager data to
- * "object" and convert "object" type to OBJT_SWAP.
- */
- object->type = OBJT_SWAP;
- object->un_pager.swp.swp_nblocks =
- backing_object->un_pager.swp.swp_nblocks;
- object->un_pager.swp.swp_allocsize =
- backing_object->un_pager.swp.swp_allocsize;
- object->un_pager.swp.swp_blocks =
- backing_object->un_pager.swp.swp_blocks;
- object->un_pager.swp.swp_poip = /* XXX */
- backing_object->un_pager.swp.swp_poip;
- object->paging_offset = backing_object->paging_offset + backing_offset;
- TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
-
- /*
- * Convert backing object from OBJT_SWAP to
- * OBJT_DEFAULT. XXX - only the TAILQ_REMOVE is
- * actually necessary.
- */
- backing_object->type = OBJT_DEFAULT;
- TAILQ_REMOVE(&swap_pager_un_object_list, backing_object, pager_object_list);
- /*
- * free unnecessary blocks
- */
- swap_pager_freespace(object, 0,
- OFF_TO_IDX(object->paging_offset));
- vm_object_pip_wakeup(object);
- }
+
+ /*
+ * scrap the paging_offset junk and do a
+ * discrete copy. This also removes major
+ * assumptions about how the swap-pager
+ * works from where it doesn't belong. The
+ * new swapper is able to optimize the
+ * destroy-source case.
+ */
+
+ vm_object_pip_add(object, 1);
+ swap_pager_copy(
+ backing_object,
+ object,
+ OFF_TO_IDX(object->backing_object_offset), TRUE);
+ vm_object_pip_wakeup(object);
vm_object_pip_wakeup(backing_object);
}
@@ -1223,7 +1264,7 @@ vm_object_collapse(object)
vm_page_busy(pp);
if ((pp->valid == 0) &&
- !vm_pager_has_page(object, OFF_TO_IDX(object->paging_offset) + new_pindex, NULL, NULL)) {
+ !vm_pager_has_page(object, new_pindex, NULL, NULL)) {
/*
* Page still needed. Can't go any
* further.
@@ -1318,7 +1359,7 @@ again:
* interrupt -- minimize the spl transitions
*/
- if (vm_page_sleep(p, "vmopar", &p->busy))
+ if (vm_page_sleep_busy(p, TRUE, "vmopar"))
goto again;
if (clean_only && p->valid) {
@@ -1349,7 +1390,7 @@ again:
* The busy flags are only cleared at
* interrupt -- minimize the spl transitions
*/
- if (vm_page_sleep(p, "vmopar", &p->busy))
+ if (vm_page_sleep_busy(p, TRUE, "vmopar"))
goto again;
if (clean_only && p->valid) {
@@ -1589,11 +1630,10 @@ DB_SHOW_COMMAND(object, vm_object_print_static)
object, (int)object->type, (u_long)object->size,
object->resident_page_count, object->ref_count, object->flags);
/*
- * XXX no %qd in kernel. Truncate object->paging_offset and
- * object->backing_object_offset.
+ * XXX no %qd in kernel. Truncate object->backing_object_offset.
*/
- db_iprintf(" sref=%d, offset=0x%lx, backing_object(%d)=(%p)+0x%lx\n",
- object->shadow_count, (long)object->paging_offset,
+ db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n",
+ object->shadow_count,
object->backing_object ? object->backing_object->ref_count : 0,
object->backing_object, (long)object->backing_object_offset);
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 9897393..7f54ab6 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_object.h,v 1.50 1998/08/06 08:33:19 dfr Exp $
+ * $Id: vm_object.h,v 1.51 1998/08/24 08:39:37 dfr Exp $
*/
/*
@@ -81,6 +81,7 @@ typedef enum obj_type objtype_t;
* Types defined:
*
* vm_object_t Virtual memory object.
+ *
*/
struct vm_object {
@@ -94,32 +95,49 @@ struct vm_object {
int ref_count; /* How many refs?? */
int shadow_count; /* how many objects that this is a shadow for */
int pg_color; /* color of first page in obj */
- int id; /* ID for no purpose, other than info */
+#if 0
+ int id; /* ID for no purpose, other than info */
+#endif
+ int hash_rand; /* vm hash table randomizer */
u_short flags; /* see below */
u_short paging_in_progress; /* Paging (in or out) so don't collapse or destroy */
u_short behavior; /* see below */
int resident_page_count; /* number of resident pages */
- int cache_count; /* number of cached pages */
- int wire_count; /* number of wired pages */
- vm_ooffset_t paging_offset; /* Offset into paging space */
+ int cache_count; /* number of cached pages */
+ int wire_count; /* number of wired pages */
struct vm_object *backing_object; /* object that I'm a shadow of */
vm_ooffset_t backing_object_offset;/* Offset in backing object */
vm_offset_t last_read; /* last read in object -- detect seq behavior */
- vm_page_t page_hint; /* hint for last looked-up or allocated page */
TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
void *handle;
union {
+ /*
+ * VNode pager
+ *
+ * vnp_size - current size of file
+ */
struct {
- off_t vnp_size; /* Current size of file */
+ off_t vnp_size;
} vnp;
+
+ /*
+ * Device pager
+ *
+ * devp_pglist - list of allocated pages
+ */
struct {
- TAILQ_HEAD(, vm_page) devp_pglist; /* list of pages allocated */
+ TAILQ_HEAD(, vm_page) devp_pglist;
} devp;
+
+ /*
+ * Swap pager
+ *
+ * swp_bcount - number of swap 'swblock' metablocks, each
+ * contains up to 16 swapblk assignments.
+ * see vm/swap_pager.h
+ */
struct {
- int swp_nblocks;
- int swp_allocsize;
- struct swblock *swp_blocks;
- short swp_poip;
+ int swp_bcount;
} swp;
} un_pager;
};
@@ -132,7 +150,7 @@ struct vm_object {
#define OBJ_NOSPLIT 0x0010 /* dont split this object */
#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */
#define OBJ_WRITEABLE 0x0080 /* object has been made writable */
-#define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */
+#define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */
#define OBJ_CLEANING 0x0200
#define OBJ_OPT 0x1000 /* I/O optimization */
#define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */
@@ -197,12 +215,21 @@ vm_object_pip_wakeup(vm_object_t object)
}
static __inline void
-vm_object_pip_sleep(vm_object_t object, char *waitid)
+vm_object_pip_wakeupn(vm_object_t object, int i)
{
- int s;
+ if (i)
+ atomic_subtract_short(&object->paging_in_progress, i);
+ if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
+ vm_object_clear_flag(object, OBJ_PIPWNT);
+ wakeup(object);
+ }
+}
+static __inline void
+vm_object_pip_sleep(vm_object_t object, char *waitid)
+{
if (object->paging_in_progress) {
- s = splvm();
+ int s = splvm();
if (object->paging_in_progress) {
vm_object_set_flag(object, OBJ_PIPWNT);
tsleep(object, PVM, waitid, 0);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index c953559..2f0f4bd 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
- * $Id: vm_page.c,v 1.115 1999/01/08 17:31:27 eivind Exp $
+ * $Id: vm_page.c,v 1.116 1999/01/10 01:58:29 eivind Exp $
*/
/*
@@ -83,6 +83,7 @@
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
static void vm_page_queue_init __P((void));
@@ -95,7 +96,7 @@ static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t));
* page structure.
*/
-static struct pglist *vm_page_buckets; /* Array of buckets */
+static struct vm_page **vm_page_buckets; /* Array of buckets */
static int vm_page_bucket_count; /* How big is array? */
static int vm_page_hash_mask; /* Mask for hash function */
static volatile int vm_page_bucket_generation;
@@ -162,7 +163,6 @@ static u_short vm_page_dev_bsize_chunks[] = {
};
static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex));
-static int vm_page_freechk_and_unqueue __P((vm_page_t m));
static void vm_page_free_wakeup __P((void));
/*
@@ -206,7 +206,7 @@ vm_page_startup(starta, enda, vaddr)
{
register vm_offset_t mapped;
register vm_page_t m;
- register struct pglist *bucket;
+ register struct vm_page **bucket;
vm_size_t npages, page_range;
register vm_offset_t new_start;
int i;
@@ -256,24 +256,30 @@ vm_page_startup(starta, enda, vaddr)
*
* The number of buckets MUST BE a power of 2, and the actual value is
* the next power of 2 greater than the number of physical pages in
- * the system.
+ * the system.
+ *
+ * We make the hash table approximately 2x the number of pages to
+ * reduce the chain length. This is about the same size using the
+ * singly-linked list as the 1x hash table we were using before
+ * using TAILQ but the chain length will be smaller.
*
* Note: This computation can be tweaked if desired.
*/
- vm_page_buckets = (struct pglist *) vaddr;
+ vm_page_buckets = (struct vm_page **)vaddr;
bucket = vm_page_buckets;
if (vm_page_bucket_count == 0) {
vm_page_bucket_count = 1;
while (vm_page_bucket_count < atop(total))
vm_page_bucket_count <<= 1;
}
+ vm_page_bucket_count <<= 1;
vm_page_hash_mask = vm_page_bucket_count - 1;
/*
* Validate these addresses.
*/
- new_start = start + vm_page_bucket_count * sizeof(struct pglist);
+ new_start = start + vm_page_bucket_count * sizeof(struct vm_page *);
new_start = round_page(new_start);
mapped = round_page(vaddr);
vaddr = pmap_map(mapped, start, new_start,
@@ -283,7 +289,7 @@ vm_page_startup(starta, enda, vaddr)
bzero((caddr_t) mapped, vaddr - mapped);
for (i = 0; i < vm_page_bucket_count; i++) {
- TAILQ_INIT(bucket);
+ *bucket = NULL;
bucket++;
}
@@ -353,13 +359,18 @@ vm_page_startup(starta, enda, vaddr)
*
* NOTE: This macro depends on vm_page_bucket_count being a power of 2.
* This routine may not block.
+ *
+ * We try to randomize the hash based on the object to spread the pages
+ * out in the hash table without it costing us too much.
*/
static __inline int
vm_page_hash(object, pindex)
vm_object_t object;
vm_pindex_t pindex;
{
- return ((((uintptr_t) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask;
+ int i = ((uintptr_t)object + pindex) ^ object->hash_rand;
+
+ return(i & vm_page_hash_mask);
}
/*
@@ -382,7 +393,7 @@ vm_page_insert(m, object, pindex)
register vm_object_t object;
register vm_pindex_t pindex;
{
- register struct pglist *bucket;
+ register struct vm_page **bucket;
if (m->object != NULL)
panic("vm_page_insert: already inserted");
@@ -399,7 +410,8 @@ vm_page_insert(m, object, pindex)
*/
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
- TAILQ_INSERT_TAIL(bucket, m, hashq);
+ m->hnext = *bucket;
+ *bucket = m;
vm_page_bucket_generation++;
/*
@@ -407,7 +419,9 @@ vm_page_insert(m, object, pindex)
*/
TAILQ_INSERT_TAIL(&object->memq, m, listq);
+#if 0
m->object->page_hint = m;
+#endif
m->object->generation++;
if (m->wire_count)
@@ -417,50 +431,48 @@ vm_page_insert(m, object, pindex)
object->cache_count++;
/*
- * And show that the object has one more resident page.
+ * show that the object has one more resident page.
*/
object->resident_page_count++;
}
/*
- * vm_page_remove: [ internal use only ]
+ * vm_page_remove:
* NOTE: used by device pager as well -wfj
*
* Removes the given mem entry from the object/offset-page
- * table and the object page list.
+ * table and the object page list, but do not invalidate/terminate
+ * the backing store.
*
* The object and page must be locked, and at splhigh.
+ * The underlying pmap entry (if any) is NOT removed here.
* This routine may not block.
- *
- * I do not think the underlying pmap entry (if any) is removed here.
*/
-void
+vm_object_t
vm_page_remove(m)
- register vm_page_t m;
+ vm_page_t m;
{
- register struct pglist *bucket;
+ register struct vm_page **bucket;
vm_object_t object;
if (m->object == NULL)
- return;
+ return(NULL);
#if !defined(MAX_PERF)
if ((m->flags & PG_BUSY) == 0) {
panic("vm_page_remove: page not busy");
}
#endif
-
- vm_page_flag_clear(m, PG_BUSY);
- if (m->flags & PG_WANTED) {
- vm_page_flag_clear(m, PG_WANTED);
- wakeup(m);
- }
+
+ /*
+ * Basically destroy the page.
+ */
+
+ vm_page_wakeup(m);
object = m->object;
- if (object->page_hint == m)
- object->page_hint = NULL;
if (m->wire_count)
object->wire_count--;
@@ -469,11 +481,23 @@ vm_page_remove(m)
object->cache_count--;
/*
- * Remove from the object_object/offset hash table
+ * Remove from the object_object/offset hash table. The object
+ * must be on the hash queue, we will panic if it isn't
+ *
+ * Note: we must NULL-out m->hnext to prevent loops in detached
+ * buffers with vm_page_lookup().
*/
bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)];
- TAILQ_REMOVE(bucket, m, hashq);
+ while (*bucket != m) {
+#if !defined(MAX_PERF)
+ if (*bucket == NULL)
+ panic("vm_page_remove(): page not found in hash");
+#endif
+ bucket = &(*bucket)->hnext;
+ }
+ *bucket = m->hnext;
+ m->hnext = NULL;
vm_page_bucket_generation++;
/*
@@ -490,6 +514,8 @@ vm_page_remove(m)
object->generation++;
m->object = NULL;
+
+ return(object);
}
/*
@@ -498,8 +524,14 @@ vm_page_remove(m)
* Returns the page associated with the object/offset
* pair specified; if none is found, NULL is returned.
*
+ * NOTE: the code below does not lock. It will operate properly if
+ * an interrupt makes a change, but the generation algorithm will not
+ * operate properly in an SMP environment where both cpu's are able to run
+ * kernel code simultaniously.
+ *
* The object must be locked. No side effects.
* This routine may not block.
+ * This is a critical path routine
*/
vm_page_t
@@ -508,25 +540,29 @@ vm_page_lookup(object, pindex)
register vm_pindex_t pindex;
{
register vm_page_t m;
- register struct pglist *bucket;
+ register struct vm_page **bucket;
int generation;
/*
* Search the hash table for this object/offset pair
*/
+#if 0
if (object->page_hint && (object->page_hint->pindex == pindex) &&
(object->page_hint->object == object))
return object->page_hint;
+#endif
retry:
generation = vm_page_bucket_generation;
bucket = &vm_page_buckets[vm_page_hash(object, pindex)];
- for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) {
+ for (m = *bucket; m != NULL; m = m->hnext) {
if ((m->object == object) && (m->pindex == pindex)) {
if (vm_page_bucket_generation != generation)
goto retry;
+#if 0
m->object->page_hint = m;
+#endif
return (m);
}
}
@@ -545,6 +581,16 @@ retry:
* This routine may not block.
*
* Note: this routine will raise itself to splvm(), the caller need not.
+ *
+ * Note: swap associated with the page must be invalidated by the move. We
+ * have to do this for several reasons: (1) we aren't freeing the
+ * page, (2) we are dirtying the page, (3) the VM system is probably
+ * moving the page from object A to B, and will then later move
+ * the backing store from A to B and we can't have a conflict.
+ *
+ * Note: we *always* dirty the page. It is necessary both for the
+ * fact that we moved it, and because we may be invalidating
+ * swap.
*/
void
@@ -558,6 +604,7 @@ vm_page_rename(m, new_object, new_pindex)
s = splvm();
vm_page_remove(m);
vm_page_insert(m, new_object, new_pindex);
+ m->dirty = VM_PAGE_BITS_ALL;
splx(s);
}
@@ -625,6 +672,12 @@ vm_page_unqueue(m)
*
* Find a page on the specified queue with color optimization.
*
+ * The page coloring optimization attempts to locate a page
+ * that does not overload other nearby pages in the object in
+ * the cpu's L1 or L2 caches. We need this optmization because
+ * cpu caches tend to be physical caches, while object spaces tend
+ * to be virtual.
+ *
* This routine must be called at splvm().
* This routine may not block.
*/
@@ -759,7 +812,10 @@ vm_page_select_free(object, pindex, prefqueue)
int i,j;
int index, hindex;
#endif
- vm_page_t m, mh;
+ vm_page_t m;
+#if 0
+ vm_page_t mh;
+#endif
int oqueuediff;
struct vpgqueues *pq;
@@ -768,6 +824,7 @@ vm_page_select_free(object, pindex, prefqueue)
else
oqueuediff = PQ_ZERO - PQ_FREE;
+#if 0
if (mh = object->page_hint) {
if (mh->pindex == (pindex - 1)) {
if ((mh->flags & PG_FICTITIOUS) == 0) {
@@ -785,6 +842,7 @@ vm_page_select_free(object, pindex, prefqueue)
}
}
}
+#endif
pq = &vm_page_queues[prefqueue];
@@ -857,6 +915,8 @@ vm_page_select_free(object, pindex, prefqueue)
* Additional special handling is required when called from an
* interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with
* the page cache in this case.
+ *
+ * vm_page_alloc()
*/
vm_page_t
vm_page_alloc(object, pindex, page_req)
@@ -864,7 +924,7 @@ vm_page_alloc(object, pindex, page_req)
vm_pindex_t pindex;
int page_req;
{
- register vm_page_t m;
+ register vm_page_t m = NULL;
struct vpgqueues *pq;
vm_object_t oldobject;
int queue, qtype;
@@ -873,12 +933,17 @@ vm_page_alloc(object, pindex, page_req)
KASSERT(!vm_page_lookup(object, pindex),
("vm_page_alloc: page already allocated"));
+ /*
+ * The pager is allowed to eat deeper into the free page list.
+ */
+
if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) {
page_req = VM_ALLOC_SYSTEM;
};
s = splvm();
+loop:
switch (page_req) {
case VM_ALLOC_NORMAL:
@@ -961,20 +1026,36 @@ vm_page_alloc(object, pindex, page_req)
queue = m->queue;
qtype = queue - m->pc;
- if (qtype == PQ_ZERO)
- vm_page_zero_count--;
+
+ /*
+ * Cache pages must be formally freed (and doubly so with the
+ * new pagerops functions). We free the page and try again.
+ *
+ * This also has the side effect of ensuring that the minfreepage
+ * wall is held more tightly verses the old code.
+ */
+
+ if (qtype == PQ_CACHE) {
+#if !defined(MAX_PERF)
+ if (m->dirty)
+ panic("found dirty cache page %p", m);
+
+#endif
+ vm_page_busy(m);
+ vm_page_protect(m, VM_PROT_NONE);
+ vm_page_free(m);
+ goto loop;
+ }
+
pq = &vm_page_queues[queue];
TAILQ_REMOVE(pq->pl, m, pageq);
(*pq->cnt)--;
(*pq->lcnt)--;
oldobject = NULL;
+
if (qtype == PQ_ZERO) {
+ vm_page_zero_count--;
m->flags = PG_ZERO | PG_BUSY;
- } else if (qtype == PQ_CACHE) {
- oldobject = m->object;
- vm_page_busy(m);
- vm_page_remove(m);
- m->flags = PG_BUSY;
} else {
m->flags = PG_BUSY;
}
@@ -1004,6 +1085,12 @@ vm_page_alloc(object, pindex, page_req)
(cnt.v_free_count < cnt.v_pageout_free_min))
pagedaemon_wakeup();
+#if 0
+ /*
+ * (code removed - was previously a manual breakout of the act of
+ * freeing a page from cache. We now just call vm_page_free() on
+ * a cache page an loop so this code no longer needs to be here)
+ */
if ((qtype == PQ_CACHE) &&
((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) &&
oldobject && (oldobject->type == OBJT_VNODE) &&
@@ -1017,6 +1104,7 @@ vm_page_alloc(object, pindex, page_req)
}
}
}
+#endif
splx(s);
return (m);
@@ -1048,6 +1136,33 @@ vm_wait()
}
/*
+ * vm_await: (also see VM_AWAIT macro)
+ *
+ * asleep on an event that will signal when free pages are available
+ * for allocation.
+ */
+
+void
+vm_await()
+{
+ int s;
+
+ s = splvm();
+ if (curproc == pageproc) {
+ vm_pageout_pages_needed = 1;
+ asleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
+ } else {
+ if (!vm_pages_needed) {
+ vm_pages_needed++;
+ wakeup(&vm_pages_needed);
+ }
+ asleep(&cnt.v_free_count, PVM, "vmwait", 0);
+ }
+ splx(s);
+}
+
+#if 0
+/*
* vm_page_sleep:
*
* Block until page is no longer busy.
@@ -1069,6 +1184,38 @@ vm_page_sleep(vm_page_t m, char *msg, char *busy) {
return slept;
}
+#endif
+
+#if 0
+
+/*
+ * vm_page_asleep:
+ *
+ * Similar to vm_page_sleep(), but does not block. Returns 0 if
+ * the page is not busy, or 1 if the page is busy.
+ *
+ * This routine has the side effect of calling asleep() if the page
+ * was busy (1 returned).
+ */
+
+int
+vm_page_asleep(vm_page_t m, char *msg, char *busy) {
+ int slept = 0;
+ if ((busy && *busy) || (m->flags & PG_BUSY)) {
+ int s;
+ s = splvm();
+ if ((busy && *busy) || (m->flags & PG_BUSY)) {
+ vm_page_flag_set(m, PG_WANTED);
+ asleep(m, PVM, msg, 0);
+ slept = 1;
+ }
+ splx(s);
+ }
+ return slept;
+}
+
+#endif
+
/*
* vm_page_activate:
*
@@ -1111,13 +1258,49 @@ vm_page_activate(m)
*
* This routine may not block.
*/
-static int
-vm_page_freechk_and_unqueue(m)
- vm_page_t m;
+static __inline void
+vm_page_free_wakeup()
{
- vm_object_t oldobject;
+ /*
+ * if pageout daemon needs pages, then tell it that there are
+ * some free.
+ */
+ if (vm_pageout_pages_needed) {
+ wakeup(&vm_pageout_pages_needed);
+ vm_pageout_pages_needed = 0;
+ }
+ /*
+ * wakeup processes that are waiting on memory if we hit a
+ * high water mark. And wakeup scheduler process if we have
+ * lots of memory. this process will swapin processes.
+ */
+ if (vm_pages_needed &&
+ ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
+ wakeup(&cnt.v_free_count);
+ vm_pages_needed = 0;
+ }
+}
- oldobject = m->object;
+/*
+ * vm_page_free_toq:
+ *
+ * Returns the given page to the PQ_FREE or PQ_ZERO list,
+ * disassociating it with any VM object.
+ *
+ * Object and page must be locked prior to entry.
+ * This routine may not block.
+ */
+
+void
+vm_page_free_toq(vm_page_t m, int queue)
+{
+ int s;
+ struct vpgqueues *pq;
+ vm_object_t object = m->object;
+
+ s = splvm();
+
+ cnt.v_tfree++;
#if !defined(MAX_PERF)
if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
@@ -1133,11 +1316,24 @@ vm_page_freechk_and_unqueue(m)
}
#endif
+ /*
+ * unqueue, then remove page. Note that we cannot destroy
+ * the page here because we do not want to call the pager's
+ * callback routine until after we've put the page on the
+ * appropriate free queue.
+ */
+
vm_page_unqueue_nowakeup(m);
vm_page_remove(m);
+ /*
+ * If fictitious remove object association and
+ * return, otherwise delay object association removal.
+ */
+
if ((m->flags & PG_FICTITIOUS) != 0) {
- return 0;
+ splx(s);
+ return;
}
m->valid = 0;
@@ -1156,10 +1352,17 @@ vm_page_freechk_and_unqueue(m)
cnt.v_wire_count--;
}
- if (oldobject && (oldobject->type == OBJT_VNODE) &&
- ((oldobject->flags & OBJ_DEAD) == 0)) {
- struct vnode *vp;
- vp = (struct vnode *) oldobject->handle;
+ /*
+ * If we've exhausted the object's resident pages we want to free
+ * it up.
+ */
+
+ if (object &&
+ (object->type == OBJT_VNODE) &&
+ ((object->flags & OBJ_DEAD) == 0)
+ ) {
+ struct vnode *vp = (struct vnode *)object->handle;
+
if (vp && VSHOULDFREE(vp)) {
if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) {
TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist);
@@ -1172,107 +1375,31 @@ vm_page_freechk_and_unqueue(m)
pmap_page_is_free(m);
#endif
- return 1;
-}
-
-/*
- * helper routine for vm_page_free and vm_page_free_zero.
- *
- * This routine may not block.
- */
-static __inline void
-vm_page_free_wakeup()
-{
-
-/*
- * if pageout daemon needs pages, then tell it that there are
- * some free.
- */
- if (vm_pageout_pages_needed) {
- wakeup(&vm_pageout_pages_needed);
- vm_pageout_pages_needed = 0;
- }
- /*
- * wakeup processes that are waiting on memory if we hit a
- * high water mark. And wakeup scheduler process if we have
- * lots of memory. this process will swapin processes.
- */
- if (vm_pages_needed &&
- ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
- wakeup(&cnt.v_free_count);
- vm_pages_needed = 0;
- }
-}
-
-/*
- * vm_page_free:
- *
- * Returns the given page to the free list,
- * disassociating it with any VM object.
- *
- * Object and page must be locked prior to entry.
- * This routine may not block.
- */
-void
-vm_page_free(m)
- register vm_page_t m;
-{
- int s;
- struct vpgqueues *pq;
-
- s = splvm();
-
- cnt.v_tfree++;
-
- if (!vm_page_freechk_and_unqueue(m)) {
- splx(s);
- return;
- }
-
- m->queue = PQ_FREE + m->pc;
+ m->queue = queue + m->pc;
pq = &vm_page_queues[m->queue];
++(*pq->lcnt);
++(*pq->cnt);
- /*
- * If the pageout process is grabbing the page, it is likely
- * that the page is NOT in the cache. It is more likely that
- * the page will be partially in the cache if it is being
- * explicitly freed.
- */
- if (curproc == pageproc) {
- TAILQ_INSERT_TAIL(pq->pl, m, pageq);
- } else {
- TAILQ_INSERT_HEAD(pq->pl, m, pageq);
- }
- vm_page_free_wakeup();
- splx(s);
-}
-
-void
-vm_page_free_zero(m)
- register vm_page_t m;
-{
- int s;
- struct vpgqueues *pq;
-
- s = splvm();
-
- cnt.v_tfree++;
+ if (queue == PQ_ZERO) {
+ TAILQ_INSERT_HEAD(pq->pl, m, pageq);
+ ++vm_page_zero_count;
+ } else {
+ /*
+ * If the pageout process is grabbing the page, it is likely
+ * that the page is NOT in the cache. It is more likely that
+ * the page will be partially in the cache if it is being
+ * explicitly freed.
+ */
- if (!vm_page_freechk_and_unqueue(m)) {
- splx(s);
- return;
+ if (curproc == pageproc) {
+ TAILQ_INSERT_TAIL(pq->pl, m, pageq);
+ } else {
+ TAILQ_INSERT_HEAD(pq->pl, m, pageq);
+ }
}
- m->queue = PQ_ZERO + m->pc;
- pq = &vm_page_queues[m->queue];
- ++(*pq->lcnt);
- ++(*pq->cnt);
-
- TAILQ_INSERT_HEAD(pq->pl, m, pageq);
- ++vm_page_zero_count;
vm_page_free_wakeup();
+
splx(s);
}
@@ -1311,6 +1438,17 @@ vm_page_wire(m)
* Release one wiring of this page, potentially
* enabling it to be paged again.
*
+ * Many pages placed on the inactive queue should actually go
+ * into the cache, but it is difficult to figure out which. What
+ * we do instead, if the inactive target is well met, is to put
+ * clean pages at the head of the inactive queue instead of the tail.
+ * This will cause them to be moved to the cache more quickly and
+ * if not actively re-referenced, freed more quickly. If we just
+ * stick these pages at the end of the inactive queue, heavy filesystem
+ * meta-data accesses can cause an unnecessary paging load on memory bound
+ * processes. This optimization causes one-time-use metadata to be
+ * reused more quickly.
+ *
* The page queues must be locked.
* This routine may not block.
*/
@@ -1351,7 +1489,8 @@ vm_page_unwire(m, activate)
/*
- * Move the specified page to the inactive queue.
+ * Move the specified page to the inactive queue. If the page has
+ * any associated swap, the swap is deallocated.
*
* This routine may not block.
*/
@@ -1383,7 +1522,8 @@ vm_page_deactivate(m)
/*
* vm_page_cache
*
- * Put the specified page onto the page cache queue (if appropriate).
+ * Put the specified page onto the page cache queue (if appropriate).
+ *
* This routine may not block.
*/
void
@@ -1624,7 +1764,7 @@ again1:
}
next = TAILQ_NEXT(m, pageq);
- if (vm_page_sleep(m, "vpctw0", &m->busy))
+ if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
goto again1;
vm_page_test_dirty(m);
if (m->dirty) {
@@ -1652,7 +1792,7 @@ again1:
}
next = TAILQ_NEXT(m, pageq);
- if (vm_page_sleep(m, "vpctw1", &m->busy))
+ if (vm_page_sleep_busy(m, TRUE, "vpctw1"))
goto again1;
vm_page_test_dirty(m);
if (m->dirty) {
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 3149391..f9e4926 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_page.h,v 1.48 1998/10/28 13:37:02 dg Exp $
+ * $Id: vm_page.h,v 1.49 1999/01/08 17:31:28 eivind Exp $
*/
/*
@@ -105,10 +105,10 @@ TAILQ_HEAD(pglist, vm_page);
struct vm_page {
TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (P) */
- TAILQ_ENTRY(vm_page) hashq; /* hash table links (O) */
- TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
+ struct vm_page *hnext; /* hash table link (O,P) */
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
- vm_object_t object; /* which object am I in (O,P) */
+ vm_object_t object; /* which object am I in (O,P)*/
vm_pindex_t pindex; /* offset into object (O,P) */
vm_offset_t phys_addr; /* physical address of page */
u_short queue; /* page queue index */
@@ -130,6 +130,13 @@ struct vm_page {
};
/*
+ * note SWAPBLK_NONE is a flag, basically the high bit.
+ */
+
+#define SWAPBLK_MASK ((daddr_t)((u_daddr_t)-1 >> 1)) /* mask */
+#define SWAPBLK_NONE ((daddr_t)((u_daddr_t)SWAPBLK_MASK + 1))/* flag */
+
+/*
* Page coloring parameters
*/
/* Each of PQ_FREE, PQ_ZERO and PQ_CACHE have PQ_HASH_SIZE entries */
@@ -201,14 +208,15 @@ extern struct vpgqueues {
*
* Note: PG_FILLED and PG_DIRTY are added for the filesystems.
*/
-#define PG_BUSY 0x01 /* page is in transit (O) */
-#define PG_WANTED 0x02 /* someone is waiting for page (O) */
-#define PG_FICTITIOUS 0x08 /* physical page doesn't exist (O) */
-#define PG_WRITEABLE 0x10 /* page is mapped writeable */
-#define PG_MAPPED 0x20 /* page is mapped */
-#define PG_ZERO 0x40 /* page is zeroed */
-#define PG_REFERENCED 0x80 /* page has been referenced */
-#define PG_CLEANCHK 0x100 /* page will be checked for cleaning */
+#define PG_BUSY 0x0001 /* page is in transit (O) */
+#define PG_WANTED 0x0002 /* someone is waiting for page (O) */
+#define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */
+#define PG_WRITEABLE 0x0010 /* page is mapped writeable */
+#define PG_MAPPED 0x0020 /* page is mapped */
+#define PG_ZERO 0x0040 /* page is zeroed */
+#define PG_REFERENCED 0x0080 /* page has been referenced */
+#define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */
+#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
/*
* Misc constants.
@@ -307,16 +315,36 @@ vm_page_busy(vm_page_t m)
vm_page_flag_set(m, PG_BUSY);
}
+/*
+ * vm_page_flash:
+ *
+ * wakeup anyone waiting for the page.
+ */
+
static __inline void
-vm_page_wakeup(vm_page_t m)
+vm_page_flash(vm_page_t m)
{
- vm_page_flag_clear(m, PG_BUSY);
if (m->flags & PG_WANTED) {
vm_page_flag_clear(m, PG_WANTED);
wakeup(m);
}
}
+/*
+ * vm_page_wakeup:
+ *
+ * clear the PG_BUSY flag and wakeup anyone waiting for the
+ * page.
+ *
+ */
+
+static __inline void
+vm_page_wakeup(vm_page_t m)
+{
+ vm_page_flag_clear(m, PG_BUSY);
+ vm_page_flash(m);
+}
+
static __inline void
vm_page_io_start(vm_page_t m)
{
@@ -327,10 +355,8 @@ static __inline void
vm_page_io_finish(vm_page_t m)
{
atomic_subtract_char(&m->busy, 1);
- if ((m->flags & PG_WANTED) && m->busy == 0) {
- vm_page_flag_clear(m, PG_WANTED);
- wakeup(m);
- }
+ if (m->busy == 0)
+ vm_page_flash(m);
}
@@ -353,12 +379,13 @@ vm_page_t vm_page_alloc __P((vm_object_t, vm_pindex_t, int));
vm_page_t vm_page_grab __P((vm_object_t, vm_pindex_t, int));
void vm_page_cache __P((register vm_page_t));
static __inline void vm_page_copy __P((vm_page_t, vm_page_t));
+static __inline void vm_page_free __P((vm_page_t));
+static __inline void vm_page_free_zero __P((vm_page_t));
+void vm_page_destroy __P((vm_page_t));
void vm_page_deactivate __P((vm_page_t));
-void vm_page_free __P((vm_page_t));
-void vm_page_free_zero __P((vm_page_t));
void vm_page_insert __P((vm_page_t, vm_object_t, vm_pindex_t));
vm_page_t vm_page_lookup __P((vm_object_t, vm_pindex_t));
-void vm_page_remove __P((vm_page_t));
+vm_object_t vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
void vm_page_unwire __P((vm_page_t, int));
@@ -374,7 +401,11 @@ int vm_page_bits __P((int, int));
vm_page_t vm_page_list_find __P((int, int));
int vm_page_queue_index __P((vm_offset_t, int));
vm_page_t vm_page_select __P((vm_object_t, vm_pindex_t, int));
+#if 0
int vm_page_sleep(vm_page_t m, char *msg, char *busy);
+int vm_page_asleep(vm_page_t m, char *msg, char *busy);
+#endif
+void vm_page_free_toq(vm_page_t m, int queue);
/*
* Keep page from being freed by the page daemon
@@ -438,5 +469,64 @@ vm_page_copy(src_m, dest_m)
dest_m->valid = VM_PAGE_BITS_ALL;
}
+/*
+ * vm_page_free:
+ *
+ * Free a page
+ */
+static __inline void
+vm_page_free(m)
+ vm_page_t m;
+{
+ vm_page_free_toq(m, PQ_FREE);
+}
+
+/*
+ * vm_page_free_zero:
+ *
+ * Free a page to the zerod-pages queue
+ */
+static __inline void
+vm_page_free_zero(m)
+ vm_page_t m;
+{
+ vm_page_free_toq(m, PQ_ZERO);
+}
+
+/*
+ * vm_page_sleep_busy:
+ *
+ * Wait until page is no longer PG_BUSY or (if also_m_busy is TRUE)
+ * m->busy is zero. Returns TRUE if it had to sleep ( including if
+ * it almost had to sleep and made temporary spl*() mods), FALSE
+ * otherwise.
+ *
+ * This routine assumes that interrupts can only remove the busy
+ * status from a page, not set the busy status or change it from
+ * PG_BUSY to m->busy or vise versa (which would create a timing
+ * window).
+ *
+ * Note that being an inline, this code will be well optimized.
+ */
+
+static __inline int
+vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
+{
+ if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) {
+ int s = splvm();
+ if ((m->flags & PG_BUSY) || (also_m_busy && m->busy)) {
+ /*
+ * Page is busy. Wait and retry.
+ */
+ vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
+ tsleep(m, PVM, msg, 0);
+ }
+ splx(s);
+ return(TRUE);
+ /* not reached */
+ }
+ return(FALSE);
+}
+
#endif /* KERNEL */
#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 606981f..06f24d6 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -65,7 +65,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_pageout.c,v 1.128 1998/10/25 17:44:59 phk Exp $
+ * $Id: vm_pageout.c,v 1.129 1998/10/31 17:21:31 peter Exp $
*/
/*
@@ -211,13 +211,10 @@ void pmap_collect(void);
* Clean the page and remove it from the laundry.
*
* We set the busy bit to cause potential page faults on this page to
- * block.
- *
- * And we set pageout-in-progress to keep the object from disappearing
- * during pageout. This guarantees that the page won't move from the
- * inactive queue. (However, any other page on the inactive queue may
- * move!)
+ * block. Note the careful timing, however, the busy bit isn't set till
+ * late and we cannot do anything that will mess with the page.
*/
+
static int
vm_pageout_clean(m)
vm_page_t m;
@@ -231,12 +228,23 @@ vm_pageout_clean(m)
object = m->object;
/*
+ * It doesn't cost us anything to pageout OBJT_DEFAULT or OBJT_SWAP
+ * with the new swapper, but we could have serious problems paging
+ * out other object types if there is insufficient memory.
+ *
+ * Unfortunately, checking free memory here is far too late, so the
+ * check has been moved up a procedural level.
+ */
+
+#if 0
+ /*
* If not OBJT_SWAP, additional memory may be needed to do the pageout.
* Try to avoid the deadlock.
*/
if ((object->type == OBJT_DEFAULT) &&
((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min))
return 0;
+#endif
/*
* Don't mess with the page if it's busy.
@@ -245,12 +253,21 @@ vm_pageout_clean(m)
((m->busy != 0) || (m->flags & PG_BUSY)))
return 0;
+#if 0
+ /*
+ * XXX REMOVED XXX. vm_object_collapse() can block, which can
+ * change the page state. Calling vm_object_collapse() might also
+ * destroy or rename the page because we have not busied it yet!!!
+ * So this code segment is removed.
+ */
/*
- * Try collapsing before it's too late.
+ * Try collapsing before it's too late. XXX huh? Why are we doing
+ * this here?
*/
if (object->backing_object) {
vm_object_collapse(object);
}
+#endif
mc[vm_pageout_page_count] = m;
pageout_count = 1;
@@ -351,6 +368,16 @@ do_backward:
return vm_pageout_flush(&mc[page_base], pageout_count, 0);
}
+/*
+ * vm_pageout_flush() - launder the given pages
+ *
+ * The given pages are laundered. Note that we setup for the start of
+ * I/O ( i.e. busy the page ), mark it read-only, and bump the object
+ * reference count all in here rather then in the parent. If we want
+ * the parent to do more sophisticated things we may have to change
+ * the ordering.
+ */
+
int
vm_pageout_flush(mc, count, flags)
vm_page_t *mc;
@@ -362,6 +389,14 @@ vm_pageout_flush(mc, count, flags)
int numpagedout = 0;
int i;
+ /*
+ * Initiate I/O. Bump the vm_page_t->busy counter and
+ * mark the pages read-only.
+ *
+ * We do not have to fixup the clean/dirty bits here... we can
+ * allow the pager to do it after the I/O completes.
+ */
+
for (i = 0; i < count; i++) {
vm_page_io_start(mc[i]);
vm_page_protect(mc[i], VM_PROT_READ);
@@ -585,25 +620,24 @@ vm_pageout_map_deactivate_pages(map, desired)
}
#endif
+/*
+ * Don't try to be fancy - being fancy can lead to VOP_LOCK's and therefore
+ * to vnode deadlocks. We only do it for OBJT_DEFAULT and OBJT_SWAP objects
+ * which we know can be trivially freed.
+ */
+
void
vm_pageout_page_free(vm_page_t m) {
- struct vnode *vp;
- vm_object_t object;
-
- object = m->object;
- object->ref_count++;
-
- if (object->type == OBJT_VNODE) {
- vp = object->handle;
- vp->v_usecount++;
- if (VSHOULDBUSY(vp))
- vbusy(vp);
- }
+ vm_object_t object = m->object;
+ int type = object->type;
+ if (type == OBJT_SWAP || type == OBJT_DEFAULT)
+ vm_object_reference(object);
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
- vm_object_deallocate(object);
+ if (type == OBJT_SWAP || type == OBJT_DEFAULT)
+ vm_object_deallocate(object);
}
/*
@@ -613,9 +647,10 @@ static int
vm_pageout_scan()
{
vm_page_t m, next;
- int page_shortage, addl_page_shortage, maxscan, pcount;
+ int page_shortage, maxscan, pcount;
+ int addl_page_shortage, addl_page_shortage_init;
int maxlaunder;
- int pages_freed;
+ int launder_loop = 0;
struct proc *p, *bigproc;
vm_offset_t size, bigsize;
vm_object_t object;
@@ -629,31 +664,53 @@ vm_pageout_scan()
*/
pmap_collect();
- /*
- * Start scanning the inactive queue for pages we can free. We keep
- * scanning until we have enough free pages or we have scanned through
- * the entire queue. If we encounter dirty pages, we start cleaning
- * them.
- */
-
- pages_freed = 0;
- addl_page_shortage = vm_pageout_deficit;
+ addl_page_shortage_init = vm_pageout_deficit;
vm_pageout_deficit = 0;
if (max_page_launder == 0)
max_page_launder = 1;
- maxlaunder = (cnt.v_inactive_target > max_page_launder) ?
- max_page_launder : cnt.v_inactive_target;
-rescan0:
- maxscan = cnt.v_inactive_count;
- for( m = TAILQ_FIRST(&vm_page_queue_inactive);
+ /*
+ * Calculate the number of pages we want to either free or move
+ * to the cache.
+ */
+
+ page_shortage = (cnt.v_free_target + cnt.v_cache_min) -
+ (cnt.v_free_count + cnt.v_cache_count);
+ page_shortage += addl_page_shortage_init;
+
+ /*
+ * Figure out what to do with dirty pages when they are encountered.
+ * Assume that 1/3 of the pages on the inactive list are clean. If
+ * we think we can reach our target, disable laundering (do not
+ * clean any dirty pages). If we miss the target we will loop back
+ * up and do a laundering run.
+ */
- (m != NULL) && (maxscan-- > 0) &&
- ((cnt.v_cache_count + cnt.v_free_count) <
- (cnt.v_cache_min + cnt.v_free_target));
+ if (cnt.v_inactive_count / 3 > page_shortage) {
+ maxlaunder = 0;
+ launder_loop = 0;
+ } else {
+ maxlaunder =
+ (cnt.v_inactive_target > max_page_launder) ?
+ max_page_launder : cnt.v_inactive_target;
+ launder_loop = 1;
+ }
- m = next) {
+ /*
+ * Start scanning the inactive queue for pages we can move to the
+ * cache or free. The scan will stop when the target is reached or
+ * we have scanned the entire inactive queue.
+ */
+
+rescan0:
+ addl_page_shortage = addl_page_shortage_init;
+ maxscan = cnt.v_inactive_count;
+ for (
+ m = TAILQ_FIRST(&vm_page_queue_inactive);
+ m != NULL && maxscan-- > 0 && page_shortage > 0;
+ m = next
+ ) {
cnt.v_pdpages++;
@@ -681,19 +738,21 @@ rescan0:
}
/*
- * If the object is not being used, we ignore previous references.
+ * If the object is not being used, we ignore previous
+ * references.
*/
if (m->object->ref_count == 0) {
vm_page_flag_clear(m, PG_REFERENCED);
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
/*
- * Otherwise, if the page has been referenced while in the inactive
- * queue, we bump the "activation count" upwards, making it less
- * likely that the page will be added back to the inactive queue
- * prematurely again. Here we check the page tables (or emulated
- * bits, if any), given the upper level VM system not knowing anything
- * about existing references.
+ * Otherwise, if the page has been referenced while in the
+ * inactive queue, we bump the "activation count" upwards,
+ * making it less likely that the page will be added back to
+ * the inactive queue prematurely again. Here we check the
+ * page tables (or emulated bits, if any), given the upper
+ * level VM system not knowing anything about existing
+ * references.
*/
} else if (((m->flags & PG_REFERENCED) == 0) &&
(actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)))) {
@@ -703,10 +762,10 @@ rescan0:
}
/*
- * If the upper level VM system knows about any page references,
- * we activate the page. We also set the "activation count" higher
- * than normal so that we will less likely place pages back onto the
- * inactive queue again.
+ * If the upper level VM system knows about any page
+ * references, we activate the page. We also set the
+ * "activation count" higher than normal so that we will less
+ * likely place pages back onto the inactive queue again.
*/
if ((m->flags & PG_REFERENCED) != 0) {
vm_page_flag_clear(m, PG_REFERENCED);
@@ -717,9 +776,10 @@ rescan0:
}
/*
- * If the upper level VM system doesn't know anything about the
- * page being dirty, we have to check for it again. As far as the
- * VM code knows, any partially dirty pages are fully dirty.
+ * If the upper level VM system doesn't know anything about
+ * the page being dirty, we have to check for it again. As
+ * far as the VM code knows, any partially dirty pages are
+ * fully dirty.
*/
if (m->dirty == 0) {
vm_page_test_dirty(m);
@@ -733,14 +793,14 @@ rescan0:
if (m->valid == 0) {
vm_pageout_page_free(m);
cnt.v_dfree++;
- pages_freed++;
+ --page_shortage;
/*
* Clean pages can be placed onto the cache queue.
*/
} else if (m->dirty == 0) {
vm_page_cache(m);
- pages_freed++;
+ --page_shortage;
/*
* Dirty pages need to be paged out. Note that we clean
@@ -763,8 +823,8 @@ rescan0:
}
/*
- * We don't bother paging objects that are "dead". Those
- * objects are in a "rundown" state.
+ * We don't bother paging objects that are "dead".
+ * Those objects are in a "rundown" state.
*/
if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) {
s = splvm();
@@ -774,10 +834,61 @@ rescan0:
continue;
}
- if ((object->type == OBJT_VNODE) &&
- (object->flags & OBJ_DEAD) == 0) {
+ /*
+ * For now we protect against potential memory
+ * deadlocks by requiring significant memory to be
+ * free if the object is not OBJT_DEFAULT or OBJT_SWAP.
+ * We do not 'trust' any other object type to operate
+ * with low memory, not even OBJT_DEVICE. The VM
+ * allocator will special case allocations done by
+ * the pageout daemon so the check below actually
+ * does have some hysteresis in it. It isn't the best
+ * solution, though.
+ */
+
+ if (
+ object->type != OBJT_DEFAULT &&
+ object->type != OBJT_SWAP &&
+ cnt.v_free_count < cnt.v_free_reserved
+ ) {
+ s = splvm();
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ splx(s);
+ continue;
+ }
+
+ /*
+ * Presumably we have sufficient free memory to do
+ * the more sophisticated checks and locking required
+ * for vnodes.
+ *
+ * The object is already known NOT to be dead. The
+ * vget() may still block, though, because
+ * VOP_ISLOCKED() doesn't check to see if an inode
+ * (v_data) is associated with the vnode. If it isn't,
+ * vget() will load in it from disk. Worse, vget()
+ * may actually get stuck waiting on "inode" if another
+ * process is in the process of bringing the inode in.
+ * This is bad news for us either way.
+ *
+ * So for the moment we check v_data == NULL as a
+ * workaround. This means that vnodes which do not
+ * use v_data in the way we expect probably will not
+ * wind up being paged out by the pager and it will be
+ * up to the syncer to get them. That's better then
+ * us blocking here.
+ *
+ * This whole code section is bogus - we need to fix
+ * the vnode pager to handle vm_page_t's without us
+ * having to do any sophisticated VOP tests.
+ */
+
+ if (object->type == OBJT_VNODE) {
vp = object->handle;
+
if (VOP_ISLOCKED(vp) ||
+ vp->v_data == NULL ||
vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
if ((m->queue == PQ_INACTIVE) &&
(m->hold_count == 0) &&
@@ -844,19 +955,34 @@ rescan0:
}
/*
- * Compute the page shortage. If we are still very low on memory be
- * sure that we will move a minimal amount of pages from active to
- * inactive.
+ * If we still have a page shortage and we didn't launder anything,
+ * run the inactive scan again and launder something this time.
+ */
+
+ if (launder_loop == 0 && page_shortage > 0) {
+ launder_loop = 1;
+ maxlaunder =
+ (cnt.v_inactive_target > max_page_launder) ?
+ max_page_launder : cnt.v_inactive_target;
+ goto rescan0;
+ }
+
+ /*
+ * Compute the page shortage from the point of view of having to
+ * move pages from the active queue to the inactive queue.
*/
+
page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
(cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
page_shortage += addl_page_shortage;
- if (page_shortage <= 0) {
- page_shortage = 0;
- }
+
+ /*
+ * Scan the active queue for things we can deactivate
+ */
pcount = cnt.v_active_count;
m = TAILQ_FIRST(&vm_page_queue_active);
+
while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
/*
@@ -943,10 +1069,14 @@ rescan0:
}
s = splvm();
+
/*
* We try to maintain some *really* free pages, this allows interrupt
- * code to be guaranteed space.
+ * code to be guaranteed space. Since both cache and free queues
+ * are considered basically 'free', moving pages from cache to free
+ * does not effect other calculations.
*/
+
while (cnt.v_free_count < cnt.v_free_reserved) {
static int cache_rover = 0;
m = vm_page_list_find(PQ_CACHE, cache_rover);
@@ -995,7 +1125,6 @@ rescan0:
#endif
}
-
/*
* make sure that we have swap space -- if we are low on memory and
* swap -- then kill the biggest process.
@@ -1242,10 +1371,8 @@ vm_pageout()
cnt.v_pdwakeups++;
vm_pages_needed = 0;
splx(s);
- vm_pager_sync();
vm_pageout_scan();
vm_pageout_deficit = 0;
- vm_pager_sync();
wakeup(&cnt.v_free_count);
}
}
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index a864896..68c0561 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_pageout.h,v 1.22 1998/01/12 01:44:46 dyson Exp $
+ * $Id: vm_pageout.h,v 1.23 1998/01/22 17:30:43 dyson Exp $
*/
#ifndef _VM_VM_PAGEOUT_H_
@@ -100,7 +100,9 @@ extern int vm_pageout_deficit;
extern void pagedaemon_wakeup __P((void));
#define VM_WAIT vm_wait()
+#define VM_AWAIT vm_await()
extern void vm_wait __P((void));
+extern void vm_await __P((void));
#ifdef KERNEL
void vm_pageout_page __P((vm_page_t, vm_object_t));
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index 18df05d..62fe6e8 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_pager.c,v 1.39 1998/10/31 15:31:29 peter Exp $
+ * $Id: vm_pager.c,v 1.40 1998/11/10 09:16:27 peter Exp $
*/
/*
@@ -91,6 +91,8 @@ extern struct pagerops swappagerops;
extern struct pagerops vnodepagerops;
extern struct pagerops devicepagerops;
+int cluster_pbuf_freecnt = -1; /* unlimited to begin with */
+
static int dead_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
static vm_object_t dead_pager_alloc __P((void *, vm_ooffset_t, vm_prot_t,
vm_ooffset_t));
@@ -164,14 +166,15 @@ struct pagerops deadpagerops = {
NULL
};
-static struct pagerops *pagertab[] = {
+struct pagerops *pagertab[] = {
&defaultpagerops, /* OBJT_DEFAULT */
&swappagerops, /* OBJT_SWAP */
&vnodepagerops, /* OBJT_VNODE */
&devicepagerops, /* OBJT_DEVICE */
&deadpagerops /* OBJT_DEAD */
};
-static int npagers = sizeof(pagertab) / sizeof(pagertab[0]);
+
+int npagers = sizeof(pagertab) / sizeof(pagertab[0]);
/*
* Kernel address space for mapping pages.
@@ -217,6 +220,8 @@ vm_pager_bufferinit()
bp->b_xflags = 0;
}
+ cluster_pbuf_freecnt = nswbuf / 2;
+
swapbkva = kmem_alloc_pageable(pager_map, nswbuf * MAXPHYS);
if (!swapbkva)
panic("Not enough pager_map VM space for physical buffers");
@@ -246,41 +251,21 @@ vm_pager_deallocate(object)
(*pagertab[object->type]->pgo_dealloc) (object);
}
+/*
+ * vm_pager_get_pages() - inline, see vm/vm_pager.h
+ * vm_pager_put_pages() - inline, see vm/vm_pager.h
+ * vm_pager_has_page() - inline, see vm/vm_pager.h
+ * vm_pager_page_inserted() - inline, see vm/vm_pager.h
+ * vm_pager_page_removed() - inline, see vm/vm_pager.h
+ */
-int
-vm_pager_get_pages(object, m, count, reqpage)
- vm_object_t object;
- vm_page_t *m;
- int count;
- int reqpage;
-{
- return ((*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage));
-}
-
-int
-vm_pager_put_pages(object, m, count, flags, rtvals)
- vm_object_t object;
- vm_page_t *m;
- int count;
- int flags;
- int *rtvals;
-{
- return ((*pagertab[object->type]->pgo_putpages)(object, m, count, flags, rtvals));
-}
-
-boolean_t
-vm_pager_has_page(object, offset, before, after)
- vm_object_t object;
- vm_pindex_t offset;
- int *before;
- int *after;
-{
- return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after));
-}
-
+#if 0
/*
- * Called by pageout daemon before going back to sleep.
- * Gives pagers a chance to clean up any completed async pageing operations.
+ * vm_pager_sync:
+ *
+ * Called by pageout daemon before going back to sleep.
+ * Gives pagers a chance to clean up any completed async pageing
+ * operations.
*/
void
vm_pager_sync()
@@ -292,6 +277,8 @@ vm_pager_sync()
(*(*pgops)->pgo_sync) ();
}
+#endif
+
vm_offset_t
vm_pager_map_page(m)
vm_page_t m;
@@ -342,20 +329,42 @@ initpbuf(struct buf *bp) {
/*
* allocate a physical buffer
+ *
+ * There are a limited number (nswbuf) of physical buffers. We need
+ * to make sure that no single subsystem is able to hog all of them,
+ * so each subsystem implements a counter which is typically initialized
+ * to 1/2 nswbuf. getpbuf() decrements this counter in allocation and
+ * increments it on release, and blocks if the counter hits zero. A
+ * subsystem may initialize the counter to -1 to disable the feature,
+ * but it must still be sure to match up all uses of getpbuf() with
+ * relpbuf() using the same variable.
+ *
+ * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
+ * relatively soon when the rest of the subsystems get smart about it. XXX
*/
struct buf *
-getpbuf()
+getpbuf(pfreecnt)
+ int *pfreecnt;
{
int s;
struct buf *bp;
s = splvm();
+
+ if (pfreecnt) {
+ while (*pfreecnt == 0) {
+ tsleep(pfreecnt, PVM, "wswbuf0", 0);
+ }
+ }
+
/* get a bp from the swap buffer header pool */
while ((bp = TAILQ_FIRST(&bswlist)) == NULL) {
bswneeded = 1;
- tsleep(&bswneeded, PVM, "wswbuf", 0);
+ tsleep(&bswneeded, PVM, "wswbuf1", 0);
}
TAILQ_REMOVE(&bswlist, bp, b_freelist);
+ if (pfreecnt)
+ --*pfreecnt;
splx(s);
initpbuf(bp);
@@ -363,20 +372,27 @@ getpbuf()
}
/*
- * allocate a physical buffer, if one is available
+ * allocate a physical buffer, if one is available.
+ *
+ * Note that there is no NULL hack here - all subsystems using this
+ * call understand how to use pfreecnt.
*/
struct buf *
-trypbuf()
+trypbuf(pfreecnt)
+ int *pfreecnt;
{
int s;
struct buf *bp;
s = splvm();
- if ((bp = TAILQ_FIRST(&bswlist)) == NULL) {
+ if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) {
splx(s);
return NULL;
}
TAILQ_REMOVE(&bswlist, bp, b_freelist);
+
+ --*pfreecnt;
+
splx(s);
initpbuf(bp);
@@ -386,10 +402,14 @@ trypbuf()
/*
* release a physical buffer
+ *
+ * NOTE: pfreecnt can be NULL, but this 'feature' will be removed
+ * relatively soon when the rest of the subsystems get smart about it. XXX
*/
void
-relpbuf(bp)
+relpbuf(bp, pfreecnt)
struct buf *bp;
+ int *pfreecnt;
{
int s;
@@ -403,6 +423,7 @@ relpbuf(bp)
crfree(bp->b_wcred);
bp->b_wcred = NOCRED;
}
+
if (bp->b_vp)
pbrelvp(bp);
@@ -415,5 +436,9 @@ relpbuf(bp)
bswneeded = 0;
wakeup(&bswneeded);
}
+ if (pfreecnt) {
+ if (++*pfreecnt == 1)
+ wakeup(pfreecnt);
+ }
splx(s);
}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index 6b8eb42..0e8d894 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vm_pager.h 8.4 (Berkeley) 1/12/94
- * $Id: vm_pager.h,v 1.16 1998/03/07 21:37:27 dyson Exp $
+ * $Id: vm_pager.h,v 1.17 1998/10/13 08:24:44 dg Exp $
*/
/*
@@ -57,7 +57,7 @@ struct pagerops {
int (*pgo_getpages) __P((vm_object_t, vm_page_t *, int, int)); /* Get (read) page. */
int (*pgo_putpages) __P((vm_object_t, vm_page_t *, int, int, int *)); /* Put (write) page. */
boolean_t (*pgo_haspage) __P((vm_object_t, vm_pindex_t, int *, int *)); /* Does pager have page? */
- void (*pgo_sync) __P((void));
+ void (*pgo_pageunswapped) __P((vm_page_t));
};
/*
@@ -87,20 +87,69 @@ MALLOC_DECLARE(M_VMPGDATA);
extern vm_map_t pager_map;
extern int pager_map_size;
+extern struct pagerops *pagertab[];
vm_object_t vm_pager_allocate __P((objtype_t, void *, vm_ooffset_t, vm_prot_t, vm_ooffset_t));
void vm_pager_bufferinit __P((void));
void vm_pager_deallocate __P((vm_object_t));
-int vm_pager_get_pages __P((vm_object_t, vm_page_t *, int, int));
-boolean_t vm_pager_has_page __P((vm_object_t, vm_pindex_t, int *, int *));
+static __inline int vm_pager_get_pages __P((vm_object_t, vm_page_t *, int, int));
+static __inline boolean_t vm_pager_has_page __P((vm_object_t, vm_pindex_t, int *, int *));
void vm_pager_init __P((void));
vm_object_t vm_pager_object_lookup __P((struct pagerlst *, void *));
vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
vm_offset_t vm_pager_map_page __P((vm_page_t));
-int vm_pager_put_pages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
+static __inline int vm_pager_put_pages __P((vm_object_t, vm_page_t *, int, boolean_t, int *));
void vm_pager_sync __P((void));
void vm_pager_unmap_pages __P((vm_offset_t, int));
void vm_pager_unmap_page __P((vm_offset_t));
+
+static __inline int
+vm_pager_get_pages(
+ vm_object_t object,
+ vm_page_t *m,
+ int count,
+ int reqpage
+) {
+ return ((*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage));
+}
+
+static __inline int
+vm_pager_put_pages(
+ vm_object_t object,
+ vm_page_t *m,
+ int count,
+ int flags,
+ int *rtvals
+) {
+ return ((*pagertab[object->type]->pgo_putpages)(object, m, count, flags, rtvals));
+}
+
+static __inline boolean_t
+vm_pager_has_page(
+ vm_object_t object,
+ vm_pindex_t offset,
+ int *before,
+ int *after
+) {
+ return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after));
+}
+
+/*
+ * vm_pager_page_unswapped
+ *
+ * called at splvm() to destroy swap associated with the page.
+ *
+ * This function may not block.
+ */
+
+static __inline void
+vm_pager_page_unswapped(vm_page_t m)
+{
+ if (pagertab[m->object->type]->pgo_pageunswapped)
+ (*pagertab[m->object->type]->pgo_pageunswapped)(m);
+}
+
+
#endif
#endif /* _VM_PAGER_ */
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
index bfcebdc..f973631 100644
--- a/sys/vm/vm_swap.c
+++ b/sys/vm/vm_swap.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)vm_swap.c 8.5 (Berkeley) 2/17/94
- * $Id: vm_swap.c,v 1.56 1998/07/04 22:30:26 julian Exp $
+ * $Id: vm_swap.c,v 1.57 1998/10/25 19:24:04 bde Exp $
*/
#include "opt_devfs.h"
@@ -50,7 +50,7 @@
#include <sys/dmap.h> /* XXX */
#include <sys/vnode.h>
#include <sys/fcntl.h>
-#include <sys/rlist.h>
+#include <sys/blist.h>
#include <sys/kernel.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@@ -94,8 +94,7 @@ static dev_t swapdev = makedev(BDEV_MAJOR, 0);
static struct swdevt should_be_malloced[NSWAPDEV];
static struct swdevt *swdevt = should_be_malloced;
struct vnode *swapdev_vp;
-/* XXX swapinfo(8) needs this one I belive */
-int nswap; /* first block after the interleaved devs */
+static int nswap; /* first block after the interleaved devs */
static int nswdev = NSWAPDEV;
int vm_swap_size;
@@ -119,7 +118,13 @@ swstrategy(bp)
register struct swdevt *sp;
struct vnode *vp;
- sz = howmany(bp->b_bcount, DEV_BSIZE);
+ sz = howmany(bp->b_bcount, PAGE_SIZE);
+ /*
+ * Convert interleaved swap into per-device swap. Note that
+ * the block size is left in PAGE_SIZE'd chunks (for the newswap)
+ * here.
+ */
+
if (nswdev > 1) {
off = bp->b_blkno % dmmax;
if (off + sz > dmmax) {
@@ -132,8 +137,9 @@ swstrategy(bp)
index = seg % nswdev;
seg /= nswdev;
bp->b_blkno = seg * dmmax + off;
- } else
+ } else {
index = 0;
+ }
sp = &swdevt[index];
if (bp->b_blkno + sz > sp->sw_nblks) {
bp->b_error = EINVAL;
@@ -148,6 +154,12 @@ swstrategy(bp)
biodone(bp);
return;
}
+
+ /*
+ * Convert from PAGE_SIZE'd to DEV_BSIZE'd chunks for the actual I/O
+ */
+ bp->b_blkno = ctodb(bp->b_blkno);
+
vhold(sp->sw_vp);
s = splvm();
if ((bp->b_flags & B_READ) == 0) {
@@ -161,10 +173,8 @@ swstrategy(bp)
}
sp->sw_vp->v_numoutput++;
}
- if (bp->b_vp != NULL)
- pbrelvp(bp);
+ pbreassignbuf(bp, sp->sw_vp);
splx(s);
- bp->b_vp = sp->sw_vp;
VOP_STRATEGY(bp->b_vp, bp);
}
@@ -240,6 +250,11 @@ swapon(p, uap)
* Each of the nswdev devices provides 1/nswdev'th of the swap
* space, which is laid out with blocks of dmmax pages circularly
* among the devices.
+ *
+ * The new swap code uses page-sized blocks. The old swap code used
+ * DEV_BSIZE'd chunks.
+ *
+ * XXX locking when multiple swapon's run in parallel
*/
int
swaponvp(p, vp, dev, nblks)
@@ -277,18 +292,37 @@ swaponvp(p, vp, dev, nblks)
(void) VOP_CLOSE(vp, FREAD | FWRITE, p->p_ucred, p);
return (ENXIO);
}
+ /*
+ * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
+ * First chop nblks off to page-align it, then convert.
+ *
+ * sw->sw_nblks is in page-sized chunks now too.
+ */
+ nblks &= ~(ctodb(1) - 1);
+ nblks = dbtoc(nblks);
+
sp->sw_vp = vp;
sp->sw_dev = dev;
sp->sw_flags |= SW_FREED;
sp->sw_nblks = nblks;
+ /*
+ * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not
+ * DEV_BSIZE'd.
+ */
+
if (nblks * nswdev > nswap)
nswap = (nblks+1) * nswdev;
+ if (swapblist == NULL)
+ swapblist = blist_create(nswap);
+ else
+ blist_resize(&swapblist, nswap, 0);
+
for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) {
- blk = min(nblks - dvbase,dmmax);
+ blk = min(nblks - dvbase, dmmax);
vsbase = index * dmmax + dvbase * nswdev;
- rlist_free(&swaplist, vsbase, vsbase + blk - 1);
+ blist_free(swapblist, vsbase, blk);
vm_swap_size += blk;
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index fba7e2f..fe04da4 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
- * $Id: vnode_pager.c,v 1.100 1998/10/13 08:24:44 dg Exp $
+ * $Id: vnode_pager.c,v 1.101 1998/12/04 18:39:44 rvb Exp $
*/
/*
@@ -88,6 +88,8 @@ struct pagerops vnodepagerops = {
NULL
};
+int vnode_pbuf_freecnt = -1; /* start out unlimited */
+
/*
* Allocate (or lookup) pager for a vnode.
@@ -106,6 +108,13 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
if (handle == NULL)
return (NULL);
+ /*
+ * XXX hack - This initialization should be put somewhere else.
+ */
+ if (vnode_pbuf_freecnt < 0) {
+ vnode_pbuf_freecnt = nswbuf / 2 + 1;
+ }
+
vp = (struct vnode *) handle;
/*
@@ -395,7 +404,7 @@ vnode_pager_input_smlfs(object, m)
fileaddr = vnode_pager_addr(vp,
IDX_TO_OFF(m->pindex) + i * bsize, (int *)0);
if (fileaddr != -1) {
- bp = getpbuf();
+ bp = getpbuf(&vnode_pbuf_freecnt);
/* build a minimal buffer header */
bp->b_flags = B_BUSY | B_READ | B_CALL;
@@ -428,7 +437,7 @@ vnode_pager_input_smlfs(object, m)
/*
* free the buffer header back to the swap buffer pool
*/
- relpbuf(bp);
+ relpbuf(bp, &vnode_pbuf_freecnt);
if (error)
break;
@@ -707,7 +716,7 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
if (dp->v_type == VBLK || dp->v_type == VCHR)
size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
- bp = getpbuf();
+ bp = getpbuf(&vnode_pbuf_freecnt);
kva = (vm_offset_t) bp->b_data;
/*
@@ -755,7 +764,7 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
/*
* free the buffer header back to the swap buffer pool
*/
- relpbuf(bp);
+ relpbuf(bp, &vnode_pbuf_freecnt);
for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) {
vm_page_t mt;
OpenPOWER on IntegriCloud