summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authorrgrimes <rgrimes@FreeBSD.org>1994-05-25 09:21:21 +0000
committerrgrimes <rgrimes@FreeBSD.org>1994-05-25 09:21:21 +0000
commit2469c867a164210ce96143517059f21db7f1fd17 (patch)
tree9179427ac860211c445df663fd2b86267366bfba /sys/vm
parentcb0aba89af15a48e2655e898a503946ac4cb42ae (diff)
downloadFreeBSD-src-2469c867a164210ce96143517059f21db7f1fd17.zip
FreeBSD-src-2469c867a164210ce96143517059f21db7f1fd17.tar.gz
The big 4.4BSD Lite to FreeBSD 2.0.0 (Development) patch.
Reviewed by: Rodney W. Grimes Submitted by: John Dyson and David Greenman
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/device_pager.c72
-rw-r--r--sys/vm/swap_pager.c2107
-rw-r--r--sys/vm/swap_pager.h65
-rw-r--r--sys/vm/vm.h4
-rw-r--r--sys/vm/vm_extern.h12
-rw-r--r--sys/vm/vm_fault.c440
-rw-r--r--sys/vm/vm_glue.c500
-rw-r--r--sys/vm/vm_init.c8
-rw-r--r--sys/vm/vm_kern.c6
-rw-r--r--sys/vm/vm_kern.h4
-rw-r--r--sys/vm/vm_map.c127
-rw-r--r--sys/vm/vm_map.h10
-rw-r--r--sys/vm/vm_meter.c1
-rw-r--r--sys/vm/vm_mmap.c4
-rw-r--r--sys/vm/vm_object.c495
-rw-r--r--sys/vm/vm_page.c391
-rw-r--r--sys/vm/vm_page.h30
-rw-r--r--sys/vm/vm_pageout.c1063
-rw-r--r--sys/vm/vm_pageout.h30
-rw-r--r--sys/vm/vm_pager.c173
-rw-r--r--sys/vm/vm_pager.h37
-rw-r--r--sys/vm/vm_param.h30
-rw-r--r--sys/vm/vm_prot.h2
-rw-r--r--sys/vm/vm_swap.c16
-rw-r--r--sys/vm/vm_unix.c41
-rw-r--r--sys/vm/vm_user.c4
-rw-r--r--sys/vm/vnode_pager.c1334
-rw-r--r--sys/vm/vnode_pager.h3
28 files changed, 4909 insertions, 2100 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
index 235c917a..b8083df 100644
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@@ -35,7 +35,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)device_pager.c 8.5 (Berkeley) 1/12/94
+ * @(#)device_pager.c 8.1 (Berkeley) 6/11/93
*/
/*
@@ -53,8 +53,8 @@
#include <vm/vm_page.h>
#include <vm/device_pager.h>
-struct pagerlst dev_pager_list; /* list of managed devices */
-struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
+struct pagerlst dev_pager_list; /* list of managed devices */
+struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
#ifdef DEBUG
int dpagerdebug = 0;
@@ -68,11 +68,11 @@ static vm_pager_t dev_pager_alloc
__P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
static void dev_pager_dealloc __P((vm_pager_t));
static int dev_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t, boolean_t));
static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t));
static void dev_pager_init __P((void));
static int dev_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t, boolean_t));
static vm_page_t dev_pager_getfake __P((vm_offset_t));
static void dev_pager_putfake __P((vm_page_t));
@@ -81,9 +81,10 @@ struct pagerops devicepagerops = {
dev_pager_alloc,
dev_pager_dealloc,
dev_pager_getpage,
+ 0,
dev_pager_putpage,
- dev_pager_haspage,
- vm_pager_clusternull
+ 0,
+ dev_pager_haspage
};
static void
@@ -109,7 +110,7 @@ dev_pager_alloc(handle, size, prot, foff)
int (*mapfunc)();
vm_object_t object;
dev_pager_t devp;
- int npages, off;
+ unsigned int npages, off;
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
@@ -127,7 +128,7 @@ dev_pager_alloc(handle, size, prot, foff)
/*
* Make sure this device can be mapped.
*/
- dev = (dev_t)handle;
+ dev = (dev_t)(u_long)handle;
mapfunc = cdevsw[major(dev)].d_mmap;
if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
return(NULL);
@@ -135,7 +136,7 @@ dev_pager_alloc(handle, size, prot, foff)
/*
* Offset should be page aligned.
*/
- if (foff & PAGE_MASK)
+ if (foff & (PAGE_SIZE-1))
return(NULL);
/*
@@ -169,15 +170,15 @@ top:
pager->pg_handle = handle;
pager->pg_ops = &devicepagerops;
pager->pg_type = PG_DEVICE;
+ pager->pg_data = (caddr_t)devp;
pager->pg_flags = 0;
- pager->pg_data = devp;
TAILQ_INIT(&devp->devp_pglist);
/*
* Allocate object and associate it with the pager.
*/
object = devp->devp_object = vm_object_allocate(0);
vm_object_enter(object, pager);
- vm_object_setpager(object, pager, (vm_offset_t)0, FALSE);
+ vm_object_setpager(object, pager, (vm_offset_t)foff, FALSE);
/*
* Finally, put it on the managed list so other can find it.
* First we re-lookup in case someone else beat us to this
@@ -239,7 +240,7 @@ dev_pager_dealloc(pager)
/*
* Free up our fake pages.
*/
- while ((m = devp->devp_pglist.tqh_first) != NULL) {
+ while (m=devp->devp_pglist.tqh_first) {
TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
dev_pager_putfake(m);
}
@@ -248,39 +249,33 @@ dev_pager_dealloc(pager)
}
static int
-dev_pager_getpage(pager, mlist, npages, sync)
+dev_pager_getpage(pager, m, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t m;
boolean_t sync;
{
register vm_object_t object;
vm_offset_t offset, paddr;
vm_page_t page;
dev_t dev;
+ int s;
int (*mapfunc)(), prot;
- vm_page_t m;
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_getpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
+ printf("dev_pager_getpage(%x, %x)\n", pager, m);
#endif
- if (npages != 1)
- panic("dev_pager_getpage: cannot handle multiple pages");
- m = *mlist;
-
object = m->object;
- dev = (dev_t)pager->pg_handle;
+ dev = (dev_t)(u_long)pager->pg_handle;
offset = m->offset + object->paging_offset;
prot = PROT_READ; /* XXX should pass in? */
mapfunc = cdevsw[major(dev)].d_mmap;
-#ifdef DIAGNOSTIC
+
if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
panic("dev_pager_getpage: no map function");
-#endif
- paddr = pmap_phys_address((*mapfunc)(dev, (int)offset, prot));
+
+ paddr = pmap_phys_address((*mapfunc)((dev_t)dev, (int)offset, prot));
#ifdef DIAGNOSTIC
if (paddr == -1)
panic("dev_pager_getpage: map function returns error");
@@ -290,13 +285,15 @@ dev_pager_getpage(pager, mlist, npages, sync)
* up the original.
*/
page = dev_pager_getfake(paddr);
- TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist, page,
- pageq);
+ TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist,
+ page, pageq);
vm_object_lock(object);
vm_page_lock_queues();
vm_page_free(m);
- vm_page_insert(page, object, offset);
vm_page_unlock_queues();
+ s = splhigh();
+ vm_page_insert(page, object, offset);
+ splx(s);
PAGE_WAKEUP(m);
if (offset + PAGE_SIZE > object->size)
object->size = offset + PAGE_SIZE; /* XXX anal */
@@ -306,19 +303,17 @@ dev_pager_getpage(pager, mlist, npages, sync)
}
static int
-dev_pager_putpage(pager, mlist, npages, sync)
+dev_pager_putpage(pager, m, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t m;
boolean_t sync;
{
#ifdef DEBUG
if (dpagerdebug & DDB_FOLLOW)
- printf("dev_pager_putpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
+ printf("dev_pager_putpage(%x, %x)\n", pager, m);
#endif
if (pager == NULL)
- return;
+ return 0;
panic("dev_pager_putpage called");
}
@@ -350,9 +345,12 @@ dev_pager_getfake(paddr)
}
m = dev_pager_fakelist.tqh_first;
TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
+
m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS;
- m->phys_addr = paddr;
+
m->wire_count = 1;
+ m->phys_addr = paddr;
+
return(m);
}
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 899a6cf..5a1efae 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 1994 John S. Dyson
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -51,179 +52,145 @@
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/buf.h>
-#include <sys/map.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <miscfs/specfs/specdev.h>
+#include <sys/rlist.h>
#include <vm/vm.h>
+#include <vm/vm_pager.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
#include <vm/swap_pager.h>
-#define NSWSIZES 16 /* size of swtab */
-#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
#ifndef NPENDINGIO
-#define NPENDINGIO 64 /* max # of pending cleans */
+#define NPENDINGIO 16
#endif
-#ifdef DEBUG
-int swpagerdebug = 0x100;
-#define SDB_FOLLOW 0x001
-#define SDB_INIT 0x002
-#define SDB_ALLOC 0x004
-#define SDB_IO 0x008
-#define SDB_WRITE 0x010
-#define SDB_FAIL 0x020
-#define SDB_ALLOCBLK 0x040
-#define SDB_FULL 0x080
-#define SDB_ANOM 0x100
-#define SDB_ANOMPANIC 0x200
-#define SDB_CLUSTER 0x400
-#define SDB_PARANOIA 0x800
-#endif
+extern int nswbuf;
+int nswiodone;
+extern int vm_pageout_rate_limit;
+static int cleandone;
+extern int hz;
+int swap_pager_full;
+extern vm_map_t pager_map;
+extern int vm_pageout_pages_needed;
+extern int vm_swap_size;
+extern struct vnode *swapdev_vp;
+
+#define MAX_PAGEOUT_CLUSTER 8
TAILQ_HEAD(swpclean, swpagerclean);
+typedef struct swpagerclean *swp_clean_t;
+
struct swpagerclean {
TAILQ_ENTRY(swpagerclean) spc_list;
int spc_flags;
struct buf *spc_bp;
sw_pager_t spc_swp;
vm_offset_t spc_kva;
- vm_page_t spc_m;
- int spc_npages;
-} swcleanlist[NPENDINGIO];
-typedef struct swpagerclean *swp_clean_t;
-
-/* spc_flags values */
-#define SPC_FREE 0x00
-#define SPC_BUSY 0x01
-#define SPC_DONE 0x02
-#define SPC_ERROR 0x04
-
-struct swtab {
- vm_size_t st_osize; /* size of object (bytes) */
- int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
-#ifdef DEBUG
- u_long st_inuse; /* number in this range in use */
- u_long st_usecnt; /* total used of this size */
-#endif
-} swtab[NSWSIZES+1];
+ vm_offset_t spc_altkva;
+ int spc_count;
+ vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
+} swcleanlist [NPENDINGIO] ;
-#ifdef DEBUG
-int swap_pager_poip; /* pageouts in progress */
-int swap_pager_piip; /* pageins in progress */
-#endif
-int swap_pager_maxcluster; /* maximum cluster size */
-int swap_pager_npendingio; /* number of pager clean structs */
+extern vm_map_t kernel_map;
-struct swpclean swap_pager_inuse; /* list of pending page cleans */
-struct swpclean swap_pager_free; /* list of free pager clean structs */
-struct pagerlst swap_pager_list; /* list of "named" anon regions */
+/* spc_flags values */
+#define SPC_ERROR 0x01
+
+#define SWB_EMPTY (-1)
+
+void swap_pager_init(void);
+vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t);
+void swap_pager_dealloc(vm_pager_t);
+boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t);
+boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t);
+int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int);
+void swap_pager_iodone(struct buf *);
+boolean_t swap_pager_clean();
+
+extern struct pagerops swappagerops;
+
+struct swpclean swap_pager_done; /* list of compileted page cleans */
+struct swpclean swap_pager_inuse; /* list of pending page cleans */
+struct swpclean swap_pager_free; /* list of free pager clean structs */
+struct pagerlst swap_pager_list; /* list of "named" anon regions */
+struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */
+
+#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
+int swap_pager_needflags;
+struct rlist *swapfrag;
+
+struct pagerlst *swp_qs[]={
+ &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
+};
-static void swap_pager_init __P((void));
-static vm_pager_t swap_pager_alloc
- __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
-static void swap_pager_clean __P((int));
-#ifdef DEBUG
-static void swap_pager_clean_check __P((vm_page_t *, int, int));
-#endif
-static void swap_pager_cluster
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
-static void swap_pager_dealloc __P((vm_pager_t));
-static int swap_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
-static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
-static void swap_pager_iodone __P((struct buf *));
-static int swap_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+int swap_pager_putmulti();
struct pagerops swappagerops = {
swap_pager_init,
swap_pager_alloc,
swap_pager_dealloc,
swap_pager_getpage,
+ swap_pager_getmulti,
swap_pager_putpage,
- swap_pager_haspage,
- swap_pager_cluster
+ swap_pager_putmulti,
+ swap_pager_haspage
};
-static void
+extern int nswbuf;
+
+int npendingio = NPENDINGIO;
+int pendingiowait;
+int require_swap_init;
+void swap_pager_finish();
+int dmmin, dmmax;
+extern int vm_page_count;
+
+struct buf * getpbuf() ;
+void relpbuf(struct buf *bp) ;
+
+static inline void swapsizecheck() {
+ if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
+ if( swap_pager_full)
+ printf("swap_pager: out of space\n");
+ swap_pager_full = 1;
+ } else if( vm_swap_size > 192*btodb(PAGE_SIZE))
+ swap_pager_full = 0;
+}
+
+void
swap_pager_init()
{
- register swp_clean_t spc;
- register int i, bsize;
extern int dmmin, dmmax;
- int maxbsize;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
- printf("swpg_init()\n");
-#endif
dfltpagerops = &swappagerops;
- TAILQ_INIT(&swap_pager_list);
- /*
- * Allocate async IO structures.
- *
- * XXX it would be nice if we could do this dynamically based on
- * the value of nswbuf (since we are ultimately limited by that)
- * but neither nswbuf or malloc has been initialized yet. So the
- * structs are statically allocated above.
- */
- swap_pager_npendingio = NPENDINGIO;
+ TAILQ_INIT(&swap_pager_list);
+ TAILQ_INIT(&swap_pager_un_list);
/*
* Initialize clean lists
*/
TAILQ_INIT(&swap_pager_inuse);
+ TAILQ_INIT(&swap_pager_done);
TAILQ_INIT(&swap_pager_free);
- for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
- spc->spc_flags = SPC_FREE;
- }
+
+ require_swap_init = 1;
/*
* Calculate the swap allocation constants.
*/
- if (dmmin == 0) {
- dmmin = DMMIN;
- if (dmmin < CLBYTES/DEV_BSIZE)
- dmmin = CLBYTES/DEV_BSIZE;
- }
- if (dmmax == 0)
- dmmax = DMMAX;
-
- /*
- * Fill in our table of object size vs. allocation size
- */
- bsize = btodb(PAGE_SIZE);
- if (bsize < dmmin)
- bsize = dmmin;
- maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
- if (maxbsize > dmmax)
- maxbsize = dmmax;
- for (i = 0; i < NSWSIZES; i++) {
- swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
- swtab[i].st_bsize = bsize;
- if (bsize <= btodb(MAXPHYS))
- swap_pager_maxcluster = dbtob(bsize);
-#ifdef DEBUG
- if (swpagerdebug & SDB_INIT)
- printf("swpg_init: ix %d, size %x, bsize %x\n",
- i, swtab[i].st_osize, swtab[i].st_bsize);
-#endif
- if (bsize >= maxbsize)
- break;
- bsize *= 2;
- }
- swtab[i].st_osize = 0;
- swtab[i].st_bsize = bsize;
+
+ dmmin = CLBYTES/DEV_BSIZE;
+ dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
+
}
/*
@@ -231,22 +198,43 @@ swap_pager_init()
* Note that if we are called from the pageout daemon (handle == NULL)
* we should not wait for memory as it could resulting in deadlock.
*/
-static vm_pager_t
-swap_pager_alloc(handle, size, prot, foff)
+vm_pager_t
+swap_pager_alloc(handle, size, prot, offset)
caddr_t handle;
register vm_size_t size;
vm_prot_t prot;
- vm_offset_t foff;
+ vm_offset_t offset;
{
register vm_pager_t pager;
register sw_pager_t swp;
- struct swtab *swt;
int waitok;
-
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
-#endif
+ int i,j;
+
+ if (require_swap_init) {
+ swp_clean_t spc;
+ struct buf *bp;
+ /*
+ * kva's are allocated here so that we dont need to keep
+ * doing kmem_alloc pageables at runtime
+ */
+ for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
+ spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE);
+ if (!spc->spc_kva) {
+ break;
+ }
+ spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
+ if (!spc->spc_bp) {
+ kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
+ break;
+ }
+ spc->spc_flags = 0;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ }
+ require_swap_init = 0;
+ if( size == 0)
+ return(NULL);
+ }
+
/*
* If this is a "named" anonymous region, look it up and
* return the appropriate pager if it exists.
@@ -264,50 +252,43 @@ swap_pager_alloc(handle, size, prot, foff)
return(pager);
}
}
+
+ if (swap_pager_full) {
+ return(NULL);
+ }
+
/*
* Pager doesn't exist, allocate swap management resources
* and initialize.
*/
- waitok = handle ? M_WAITOK : M_NOWAIT;
+ waitok = handle ? M_WAITOK : M_NOWAIT;
pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
if (pager == NULL)
return(NULL);
swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
if (swp == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: swpager malloc failed\n");
-#endif
free((caddr_t)pager, M_VMPAGER);
return(NULL);
}
size = round_page(size);
- for (swt = swtab; swt->st_osize; swt++)
- if (size <= swt->st_osize)
- break;
-#ifdef DEBUG
- swt->st_inuse++;
- swt->st_usecnt++;
-#endif
swp->sw_osize = size;
- swp->sw_bsize = swt->st_bsize;
- swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
+ swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
swp->sw_blocks = (sw_blk_t)
malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
- M_VMPGDATA, M_NOWAIT);
+ M_VMPGDATA, waitok);
if (swp->sw_blocks == NULL) {
free((caddr_t)swp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_alloc: sw_blocks malloc failed\n");
- swt->st_inuse--;
- swt->st_usecnt--;
-#endif
- return(FALSE);
+ return(NULL);
+ }
+
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ swp->sw_blocks[i].swb_valid = 0;
+ swp->sw_blocks[i].swb_locked = 0;
+ for (j = 0; j < SWB_NPAGES; j++)
+ swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
}
- bzero((caddr_t)swp->sw_blocks,
- swp->sw_nblocks * sizeof(*swp->sw_blocks));
+
swp->sw_poip = 0;
if (handle) {
vm_object_t object;
@@ -324,686 +305,1530 @@ swap_pager_alloc(handle, size, prot, foff)
vm_object_setpager(object, pager, 0, FALSE);
} else {
swp->sw_flags = 0;
- pager->pg_list.tqe_next = NULL;
- pager->pg_list.tqe_prev = NULL;
+ TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
}
pager->pg_handle = handle;
pager->pg_ops = &swappagerops;
pager->pg_type = PG_SWAP;
- pager->pg_flags = PG_CLUSTERPUT;
- pager->pg_data = swp;
+ pager->pg_data = (caddr_t)swp;
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOC)
- printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
- swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
-#endif
return(pager);
}
+/*
+ * returns disk block associated with pager and offset
+ * additionally, as a side effect returns a flag indicating
+ * if the block has been written
+ */
+
+static int *
+swap_pager_diskaddr(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int *valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ if (valid)
+ *valid = 0;
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (valid)
+ *valid = swb->swb_valid & (1<<ix);
+ return &swb->swb_block[ix];
+}
+
+/*
+ * Utility routine to set the valid (written) bit for
+ * a block associated with a pager and offset
+ */
static void
+swap_pager_setvalid(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
+ return;
+
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (valid)
+ swb->swb_valid |= (1 << ix);
+ else
+ swb->swb_valid &= ~(1 << ix);
+ return;
+}
+
+/*
+ * this routine allocates swap space with a fragmentation
+ * minimization policy.
+ */
+int
+swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
+ unsigned tmpalloc;
+ unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
+ if( amount < nblocksfrag) {
+ if( rlist_alloc(&swapfrag, amount, rtval))
+ return 1;
+ if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc))
+ return 0;
+ rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1);
+ *rtval = tmpalloc;
+ return 1;
+ }
+ if( !rlist_alloc(&swapmap, amount, rtval))
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * this routine frees swap space with a fragmentation
+ * minimization policy.
+ */
+void
+swap_pager_freeswapspace( unsigned from, unsigned to) {
+ unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
+ unsigned tmpalloc;
+ if( ((to + 1) - from) >= nblocksfrag) {
+ while( (from + nblocksfrag) <= to + 1) {
+ rlist_free(&swapmap, from, from + nblocksfrag - 1);
+ from += nblocksfrag;
+ }
+ }
+ if( from >= to)
+ return;
+ rlist_free(&swapfrag, from, to);
+ while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) {
+ rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1);
+ }
+}
+/*
+ * this routine frees swap blocks from a specified pager
+ */
+void
+_swap_pager_freespace(swp, start, size)
+ sw_pager_t swp;
+ vm_offset_t start;
+ vm_offset_t size;
+{
+ vm_offset_t i;
+ int s;
+
+ s = splbio();
+ for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
+ int valid;
+ int *addr = swap_pager_diskaddr(swp, i, &valid);
+ if (addr && *addr != SWB_EMPTY) {
+ swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
+ if( valid) {
+ vm_swap_size += btodb(PAGE_SIZE);
+ swap_pager_setvalid(swp, i, 0);
+ }
+ *addr = SWB_EMPTY;
+ }
+ }
+ swapsizecheck();
+ splx(s);
+}
+
+void
+swap_pager_freespace(pager, start, size)
+ vm_pager_t pager;
+ vm_offset_t start;
+ vm_offset_t size;
+{
+ _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
+}
+
+/*
+ * swap_pager_reclaim frees up over-allocated space from all pagers
+ * this eliminates internal fragmentation due to allocation of space
+ * for segments that are never swapped to. It has been written so that
+ * it does not block until the rlist_free operation occurs; it keeps
+ * the queues consistant.
+ */
+
+/*
+ * Maximum number of blocks (pages) to reclaim per pass
+ */
+#define MAXRECLAIM 256
+
+void
+swap_pager_reclaim()
+{
+ vm_pager_t p;
+ sw_pager_t swp;
+ int i, j, k;
+ int s;
+ int reclaimcount;
+ static int reclaims[MAXRECLAIM];
+ static int in_reclaim;
+
+/*
+ * allow only one process to be in the swap_pager_reclaim subroutine
+ */
+ s = splbio();
+ if (in_reclaim) {
+ tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
+ splx(s);
+ return;
+ }
+ in_reclaim = 1;
+ reclaimcount = 0;
+
+ /* for each pager queue */
+ for (k = 0; swp_qs[k]; k++) {
+
+ p = swp_qs[k]->tqh_first;
+ while (p && (reclaimcount < MAXRECLAIM)) {
+
+ /*
+ * see if any blocks associated with a pager has been
+ * allocated but not used (written)
+ */
+ swp = (sw_pager_t) p->pg_data;
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ sw_blk_t swb = &swp->sw_blocks[i];
+ if( swb->swb_locked)
+ continue;
+ for (j = 0; j < SWB_NPAGES; j++) {
+ if (swb->swb_block[j] != SWB_EMPTY &&
+ (swb->swb_valid & (1 << j)) == 0) {
+ reclaims[reclaimcount++] = swb->swb_block[j];
+ swb->swb_block[j] = SWB_EMPTY;
+ if (reclaimcount >= MAXRECLAIM)
+ goto rfinished;
+ }
+ }
+ }
+ p = p->pg_list.tqe_next;
+ }
+ }
+
+rfinished:
+
+/*
+ * free the blocks that have been added to the reclaim list
+ */
+ for (i = 0; i < reclaimcount; i++) {
+ swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
+ swapsizecheck();
+ wakeup((caddr_t) &in_reclaim);
+ }
+
+ splx(s);
+ in_reclaim = 0;
+ wakeup((caddr_t) &in_reclaim);
+}
+
+
+/*
+ * swap_pager_copy copies blocks from one pager to another and
+ * destroys the source pager
+ */
+
+void
+swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
+ vm_pager_t srcpager;
+ vm_offset_t srcoffset;
+ vm_pager_t dstpager;
+ vm_offset_t dstoffset;
+ vm_offset_t offset;
+{
+ sw_pager_t srcswp, dstswp;
+ vm_offset_t i;
+ int s;
+
+ srcswp = (sw_pager_t) srcpager->pg_data;
+ dstswp = (sw_pager_t) dstpager->pg_data;
+
+/*
+ * remove the source pager from the swap_pager internal queue
+ */
+ s = splbio();
+ if (srcswp->sw_flags & SW_NAMED) {
+ TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
+ srcswp->sw_flags &= ~SW_NAMED;
+ } else {
+ TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
+ }
+
+ while (srcswp->sw_poip) {
+ tsleep((caddr_t)srcswp, PVM, "spgout", 0);
+ }
+ splx(s);
+
+/*
+ * clean all of the pages that are currently active and finished
+ */
+ (void) swap_pager_clean();
+
+ s = splbio();
+/*
+ * clear source block before destination object
+ * (release allocated space)
+ */
+ for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
+ int valid;
+ int *addr = swap_pager_diskaddr(srcswp, i, &valid);
+ if (addr && *addr != SWB_EMPTY) {
+ swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
+ if( valid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ swapsizecheck();
+ *addr = SWB_EMPTY;
+ }
+ }
+/*
+ * transfer source to destination
+ */
+ for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
+ int srcvalid, dstvalid;
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
+ &srcvalid);
+ int *dstaddrp;
+ /*
+ * see if the source has space allocated
+ */
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ /*
+ * if the source is valid and the dest has no space, then
+ * copy the allocation from the srouce to the dest.
+ */
+ if (srcvalid) {
+ dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
+ /*
+ * if the dest already has a valid block, deallocate the
+ * source block without copying.
+ */
+ if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
+ *dstaddrp = SWB_EMPTY;
+ }
+ if (dstaddrp && *dstaddrp == SWB_EMPTY) {
+ *dstaddrp = *srcaddrp;
+ *srcaddrp = SWB_EMPTY;
+ swap_pager_setvalid(dstswp, i + dstoffset, 1);
+ vm_swap_size -= btodb(PAGE_SIZE);
+ }
+ }
+ /*
+ * if the source is not empty at this point, then deallocate the space.
+ */
+ if (*srcaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
+ if( srcvalid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ *srcaddrp = SWB_EMPTY;
+ }
+ }
+ }
+
+/*
+ * deallocate the rest of the source object
+ */
+ for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
+ int valid;
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
+ if( valid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ *srcaddrp = SWB_EMPTY;
+ }
+ }
+
+ swapsizecheck();
+ splx(s);
+
+ free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
+ srcswp->sw_blocks = 0;
+ free((caddr_t)srcswp, M_VMPGDATA);
+ srcpager->pg_data = 0;
+ free((caddr_t)srcpager, M_VMPAGER);
+
+ return;
+}
+
+
+void
swap_pager_dealloc(pager)
vm_pager_t pager;
{
- register int i;
+ register int i,j;
register sw_blk_t bp;
register sw_pager_t swp;
- struct swtab *swt;
int s;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
- printf("swpg_dealloc(%x)\n", pager);
-#endif
/*
* Remove from list right away so lookups will fail if we
* block for pageout completion.
*/
+ s = splbio();
swp = (sw_pager_t) pager->pg_data;
if (swp->sw_flags & SW_NAMED) {
TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
swp->sw_flags &= ~SW_NAMED;
+ } else {
+ TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
}
-#ifdef DEBUG
- for (swt = swtab; swt->st_osize; swt++)
- if (swp->sw_osize <= swt->st_osize)
- break;
- swt->st_inuse--;
-#endif
-
/*
* Wait for all pageouts to finish and remove
* all entries from cleaning list.
*/
- s = splbio();
+
while (swp->sw_poip) {
- swp->sw_flags |= SW_WANTED;
- (void) tsleep(swp, PVM, "swpgdealloc", 0);
+ tsleep((caddr_t)swp, PVM, "swpout", 0);
}
splx(s);
- swap_pager_clean(B_WRITE);
+
+
+ (void) swap_pager_clean();
/*
* Free left over swap blocks
*/
- for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
- if (bp->swb_block) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
- printf("swpg_dealloc: blk %x\n",
- bp->swb_block);
-#endif
- rmfree(swapmap, swp->sw_bsize, bp->swb_block);
+ s = splbio();
+ for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
+ for (j = 0; j < SWB_NPAGES; j++)
+ if (bp->swb_block[j] != SWB_EMPTY) {
+ swap_pager_freeswapspace((unsigned)bp->swb_block[j],
+ (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
+ if( bp->swb_valid & (1<<j))
+ vm_swap_size += btodb(PAGE_SIZE);
+ bp->swb_block[j] = SWB_EMPTY;
}
+ }
+ splx(s);
+ swapsizecheck();
+
/*
* Free swap management resources
*/
free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+ swp->sw_blocks = 0;
free((caddr_t)swp, M_VMPGDATA);
+ pager->pg_data = 0;
free((caddr_t)pager, M_VMPAGER);
}
-static int
-swap_pager_getpage(pager, mlist, npages, sync)
+/*
+ * swap_pager_getmulti can get multiple pages.
+ */
+int
+swap_pager_getmulti(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+ if( reqpage >= count)
+ panic("swap_pager_getmulti: reqpage >= count\n");
+ return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
+}
+
+/*
+ * swap_pager_getpage gets individual pages
+ */
+int
+swap_pager_getpage(pager, m, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t m;
boolean_t sync;
{
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_getpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- return(swap_pager_io((sw_pager_t)pager->pg_data,
- mlist, npages, B_READ));
+ vm_page_t marray[1];
+
+ marray[0] = m;
+ return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
}
-static int
-swap_pager_putpage(pager, mlist, npages, sync)
+int
+swap_pager_putmulti(pager, m, c, sync, rtvals)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t *m;
+ int c;
boolean_t sync;
+ int *rtvals;
{
int flags;
-#ifdef DEBUG
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_putpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
if (pager == NULL) {
- swap_pager_clean(B_WRITE);
- return (VM_PAGER_OK); /* ??? */
+ (void) swap_pager_clean();
+ return VM_PAGER_OK;
}
+
flags = B_WRITE;
if (!sync)
flags |= B_ASYNC;
- return(swap_pager_io((sw_pager_t)pager->pg_data,
- mlist, npages, flags));
+
+ return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
}
-static boolean_t
-swap_pager_haspage(pager, offset)
+/*
+ * swap_pager_putpage writes individual pages
+ */
+int
+swap_pager_putpage(pager, m, sync)
vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+ int flags;
+ vm_page_t marray[1];
+ int rtvals[1];
+
+
+ if (pager == NULL) {
+ (void) swap_pager_clean();
+ return VM_PAGER_OK;
+ }
+
+ marray[0] = m;
+ flags = B_WRITE;
+ if (!sync)
+ flags |= B_ASYNC;
+
+ swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
+
+ return rtvals[0];
+}
+
+static inline int
+const swap_pager_block_index(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return (offset / (SWB_NPAGES*PAGE_SIZE));
+}
+
+static inline int
+const swap_pager_block_offset(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
+}
+
+/*
+ * _swap_pager_haspage returns TRUE if the pager has data that has
+ * been written out.
+ */
+static boolean_t
+_swap_pager_haspage(swp, offset)
+ sw_pager_t swp;
vm_offset_t offset;
{
- register sw_pager_t swp;
register sw_blk_t swb;
int ix;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage(%x, %x) ", pager, offset);
-#endif
- swp = (sw_pager_t) pager->pg_data;
- ix = offset / dbtob(swp->sw_bsize);
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
- printf("swpg_haspage: %x bad offset %x, ix %x\n",
- swp->sw_blocks, offset, ix);
-#endif
return(FALSE);
}
swb = &swp->sw_blocks[ix];
- if (swb->swb_block)
- ix = atop(offset % dbtob(swp->sw_bsize));
-#ifdef DEBUG
- if (swpagerdebug & SDB_ALLOCBLK)
- printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
- if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
- printf("-> %c\n",
- "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
-#endif
- if (swb->swb_block && (swb->swb_mask & (1 << ix)))
- return(TRUE);
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (swb->swb_block[ix] != SWB_EMPTY) {
+ if (swb->swb_valid & (1 << ix))
+ return TRUE;
+ }
+
return(FALSE);
}
+/*
+ * swap_pager_haspage is the externally accessible version of
+ * _swap_pager_haspage above. this routine takes a vm_pager_t
+ * for an argument instead of sw_pager_t.
+ */
+boolean_t
+swap_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
+}
+
+/*
+ * swap_pager_freepage is a convienience routine that clears the busy
+ * bit and deallocates a page.
+ */
static void
-swap_pager_cluster(pager, offset, loffset, hoffset)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loffset;
- vm_offset_t *hoffset;
+swap_pager_freepage(m)
+ vm_page_t m;
{
- sw_pager_t swp;
- register int bsize;
- vm_offset_t loff, hoff;
+ PAGE_WAKEUP(m);
+ vm_page_free(m);
+}
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
- printf("swpg_cluster(%x, %x) ", pager, offset);
-#endif
- swp = (sw_pager_t) pager->pg_data;
- bsize = dbtob(swp->sw_bsize);
- if (bsize > swap_pager_maxcluster)
- bsize = swap_pager_maxcluster;
-
- loff = offset - (offset % bsize);
- if (loff >= swp->sw_osize)
- panic("swap_pager_cluster: bad offset");
-
- hoff = loff + bsize;
- if (hoff > swp->sw_osize)
- hoff = swp->sw_osize;
-
- *loffset = loff;
- *hoffset = hoff;
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
- printf("returns [%x-%x]\n", loff, hoff);
-#endif
+/*
+ * swap_pager_ridpages is a convienience routine that deallocates all
+ * but the required page. this is usually used in error returns that
+ * need to invalidate the "extra" readahead pages.
+ */
+static void
+swap_pager_ridpages(m, count, reqpage)
+ vm_page_t *m;
+ int count;
+ int reqpage;
+{
+ int i;
+ for (i = 0; i < count; i++)
+ if (i != reqpage)
+ swap_pager_freepage(m[i]);
}
+int swapwritecount=0;
+
/*
- * Scaled down version of swap().
- * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
- * BOGUS: lower level IO routines expect a KVA so we have to map our
- * provided physical page into the KVA to keep them happy.
+ * swap_pager_iodone1 is the completion routine for both reads and async writes
*/
-static int
-swap_pager_io(swp, mlist, npages, flags)
+void
+swap_pager_iodone1(bp)
+ struct buf *bp;
+{
+ bp->b_flags |= B_DONE;
+ bp->b_flags &= ~B_ASYNC;
+ wakeup((caddr_t)bp);
+/*
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+*/
+}
+
+
+int
+swap_pager_input(swp, m, count, reqpage)
register sw_pager_t swp;
- vm_page_t *mlist;
- int npages;
- int flags;
+ vm_page_t *m;
+ int count, reqpage;
{
register struct buf *bp;
- register sw_blk_t swb;
+ sw_blk_t swb[count];
register int s;
- int ix, mask;
+ int i;
boolean_t rv;
- vm_offset_t kva, off;
+ vm_offset_t kva, off[count];
swp_clean_t spc;
- vm_page_t m;
+ vm_offset_t paging_offset;
+ vm_object_t object;
+ int reqaddr[count];
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return (VM_PAGER_FAIL); /* XXX: correct return? */
- if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
- printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags);
- if (flags & B_READ) {
- if (flags & B_ASYNC)
- panic("swap_pager_io: cannot do ASYNC reads");
- if (npages != 1)
- panic("swap_pager_io: cannot do clustered reads");
- }
-#endif
+ int first, last;
+ int failed;
+ int reqdskregion;
+ object = m[reqpage]->object;
+ paging_offset = object->paging_offset;
/*
* First determine if the page exists in the pager if this is
* a sync read. This quickly handles cases where we are
* following shadow chains looking for the top level object
* with the page.
*/
- m = *mlist;
- off = m->offset + m->object->paging_offset;
- ix = off / dbtob(swp->sw_bsize);
- if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
-#ifdef DEBUG
- if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
- printf("swap_pager_io: no swap block on write\n");
- return(VM_PAGER_BAD);
- }
-#endif
+ if (swp->sw_blocks == NULL) {
+ swap_pager_ridpages(m, count, reqpage);
return(VM_PAGER_FAIL);
}
- swb = &swp->sw_blocks[ix];
- off = off % dbtob(swp->sw_bsize);
- if ((flags & B_READ) &&
- (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
+
+ for(i = 0; i < count; i++) {
+ vm_offset_t foff = m[i]->offset + paging_offset;
+ int ix = swap_pager_block_index(swp, foff);
+ if (ix >= swp->sw_nblocks) {
+ int j;
+ if( i <= reqpage) {
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+ for(j = i; j < count; j++) {
+ swap_pager_freepage(m[j]);
+ }
+ count = i;
+ break;
+ }
+
+ swb[i] = &swp->sw_blocks[ix];
+ off[i] = swap_pager_block_offset(swp, foff);
+ reqaddr[i] = swb[i]->swb_block[off[i]];
+ }
+
+ /* make sure that our required input request is existant */
+
+ if (reqaddr[reqpage] == SWB_EMPTY ||
+ (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
+ swap_pager_ridpages(m, count, reqpage);
return(VM_PAGER_FAIL);
+ }
+
+
+ reqdskregion = reqaddr[reqpage] / dmmax;
/*
- * For reads (pageins) and synchronous writes, we clean up
- * all completed async pageouts.
+ * search backwards for the first contiguous page to transfer
*/
- if ((flags & B_ASYNC) == 0) {
- s = splbio();
- swap_pager_clean(flags&B_READ);
-#ifdef DEBUG
- if (swpagerdebug & SDB_PARANOIA)
- swap_pager_clean_check(mlist, npages, flags&B_READ);
-#endif
- splx(s);
+ failed = 0;
+ first = 0;
+ for (i = reqpage - 1; i >= 0; --i) {
+ if ( failed || (reqaddr[i] == SWB_EMPTY) ||
+ (swb[i]->swb_valid & (1 << off[i])) == 0 ||
+ (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
+ ((reqaddr[i] / dmmax) != reqdskregion)) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ if (first == 0)
+ first = i + 1;
+ }
}
/*
- * For async writes (pageouts), we cleanup completed pageouts so
- * that all available resources are freed. Also tells us if this
- * page is already being cleaned. If it is, or no resources
- * are available, we try again later.
+ * search forwards for the last contiguous page to transfer
*/
- else {
- swap_pager_clean(B_WRITE);
-#ifdef DEBUG
- if (swpagerdebug & SDB_PARANOIA)
- swap_pager_clean_check(mlist, npages, B_WRITE);
-#endif
- if (swap_pager_free.tqh_first == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("%s: no available io headers\n",
- "swap_pager_io");
-#endif
- return(VM_PAGER_AGAIN);
+ failed = 0;
+ last = count;
+ for (i = reqpage + 1; i < count; i++) {
+ if ( failed || (reqaddr[i] == SWB_EMPTY) ||
+ (swb[i]->swb_valid & (1 << off[i])) == 0 ||
+ (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
+ ((reqaddr[i] / dmmax) != reqdskregion)) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ if (last == count)
+ last = i;
+ }
+ }
+
+ count = last;
+ if (first != 0) {
+ for (i = first; i < count; i++) {
+ m[i-first] = m[i];
+ reqaddr[i-first] = reqaddr[i];
+ off[i-first] = off[i];
}
+ count -= first;
+ reqpage -= first;
}
+ ++swb[reqpage]->swb_locked;
+
/*
- * Allocate a swap block if necessary.
+ * at this point:
+ * "m" is a pointer to the array of vm_page_t for paging I/O
+ * "count" is the number of vm_page_t entries represented by "m"
+ * "object" is the vm_object_t for I/O
+ * "reqpage" is the index into "m" for the page actually faulted
*/
- if (swb->swb_block == 0) {
- swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
- if (swb->swb_block == 0) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("swpg_io: rmalloc of %x failed\n",
- swp->sw_bsize);
-#endif
- /*
- * XXX this is technically a resource shortage that
- * should return AGAIN, but the situation isn't likely
- * to be remedied just by delaying a little while and
- * trying again (the pageout daemon's current response
- * to AGAIN) so we just return FAIL.
- */
- return(VM_PAGER_FAIL);
+
+ spc = NULL; /* we might not use an spc data structure */
+ kva = 0;
+
+ /*
+ * we allocate a new kva for transfers > 1 page
+ * but for transfers == 1 page, the swap_pager_free list contains
+ * entries that have pre-allocated kva's (for efficiency).
+ */
+ if (count > 1) {
+ kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
+ }
+
+
+ if (!kva) {
+ /*
+ * if a kva has not been allocated, we can only do a one page transfer,
+ * so we free the other pages that might have been allocated by
+ * vm_fault.
+ */
+ swap_pager_ridpages(m, count, reqpage);
+ m[0] = m[reqpage];
+ reqaddr[0] = reqaddr[reqpage];
+
+ count = 1;
+ reqpage = 0;
+ /*
+ * get a swap pager clean data structure, block until we get it
+ */
+ if (swap_pager_free.tqh_first == NULL) {
+ s = splbio();
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ while (swap_pager_free.tqh_first == NULL) {
+ swap_pager_needflags |= SWAP_FREE_NEEDED;
+ tsleep((caddr_t)&swap_pager_free,
+ PVM, "swpfre", 0);
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ }
+ splx(s);
}
-#ifdef DEBUG
- if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
- printf("swpg_io: %x alloc blk %x at ix %x\n",
- swp->sw_blocks, swb->swb_block, ix);
-#endif
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+ kva = spc->spc_kva;
}
+
/*
- * Allocate a kernel virtual address and initialize so that PTE
- * is available for lower level IO drivers.
+ * map our page(s) into kva for input
*/
- kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
- if (kva == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_FAIL)
- printf("%s: no KVA space to map pages\n",
- "swap_pager_io");
-#endif
- return(VM_PAGER_AGAIN);
+ for (i = 0; i < count; i++) {
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
}
+ pmap_update();
+
/*
- * Get a swap buffer header and initialize it.
+ * Get a swap buffer header and perform the IO
*/
- s = splbio();
- while (bswlist.b_actf == NULL) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_ANOM)
- printf("swap_pager_io: wait on swbuf for %x (%d)\n",
- m, flags);
-#endif
- bswlist.b_flags |= B_WANTED;
- tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
+ if( spc) {
+ bp = spc->spc_bp;
+ bzero(bp, sizeof *bp);
+ bp->b_spc = spc;
+ } else {
+ bp = getpbuf();
}
- bp = bswlist.b_actf;
- bswlist.b_actf = bp->b_actf;
- splx(s);
- bp->b_flags = B_BUSY | (flags & B_READ);
+
+ s = splbio();
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = swap_pager_iodone1;
bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
- bp->b_data = (caddr_t)kva;
- bp->b_blkno = swb->swb_block + btodb(off);
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ crhold(bp->b_rcred);
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr[0];
+ bp->b_bcount = PAGE_SIZE*count;
+ bp->b_bufsize = PAGE_SIZE*count;
+
+/*
VHOLD(swapdev_vp);
bp->b_vp = swapdev_vp;
if (swapdev_vp->v_type == VBLK)
bp->b_dev = swapdev_vp->v_rdev;
- bp->b_bcount = npages * PAGE_SIZE;
+*/
+ bgetvp( swapdev_vp, bp);
+
+ swp->sw_piip++;
/*
- * For writes we set up additional buffer fields, record a pageout
- * in progress and mark that these swap blocks are now allocated.
+ * perform the I/O
*/
- if ((bp->b_flags & B_READ) == 0) {
- bp->b_dirtyoff = 0;
- bp->b_dirtyend = npages * PAGE_SIZE;
- swapdev_vp->v_numoutput++;
+ VOP_STRATEGY(bp);
+
+ /*
+ * wait for the sync I/O to complete
+ */
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "swread", 0);
+ }
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
+
+ --swp->sw_piip;
+ if (swp->sw_piip == 0)
+ wakeup((caddr_t) swp);
+
+ /*
+ * relpbuf does this, but we maintain our own buffer
+ * list also...
+ */
+ if (bp->b_vp)
+ brelvp(bp);
+
+ splx(s);
+ --swb[reqpage]->swb_locked;
+
+ /*
+ * remove the mapping for kernel virtual
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE);
+
+ if (spc) {
+ /*
+ * if we have used an spc, we need to free it.
+ */
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ }
+ } else {
+ /*
+ * free the kernel virtual addresses
+ */
+ kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE);
+ /*
+ * release the physical I/O buffer
+ */
+ relpbuf(bp);
+ /*
+ * finish up input if everything is ok
+ */
+ if( rv == VM_PAGER_OK) {
+ for (i = 0; i < count; i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ if (i != reqpage) {
+ /*
+ * whether or not to leave the page activated
+ * is up in the air, but we should put the page
+ * on a page queue somewhere. (it already is in
+ * the object).
+ * After some emperical results, it is best
+ * to deactivate the readahead pages.
+ */
+ vm_page_deactivate(m[i]);
+ m[i]->act_count = 2;
+
+ /*
+ * just in case someone was asking for this
+ * page we now tell them that it is ok to use
+ */
+ m[i]->flags &= ~PG_FAKE;
+ PAGE_WAKEUP(m[i]);
+ }
+ }
+ if( swap_pager_full) {
+ _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
+ }
+ } else {
+ swap_pager_ridpages(m, count, reqpage);
+ }
+ }
+ return(rv);
+}
+
+int
+swap_pager_output(swp, m, count, flags, rtvals)
+ register sw_pager_t swp;
+ vm_page_t *m;
+ int count;
+ int flags;
+ int *rtvals;
+{
+ register struct buf *bp;
+ sw_blk_t swb[count];
+ register int s;
+ int i, j, ix;
+ boolean_t rv;
+ vm_offset_t kva, off, foff;
+ swp_clean_t spc;
+ vm_offset_t paging_offset;
+ vm_object_t object;
+ int reqaddr[count];
+ int failed;
+
+/*
+ if( count > 1)
+ printf("off: 0x%x, count: %d\n", m[0]->offset, count);
+*/
+ spc = NULL;
+
+ object = m[0]->object;
+ paging_offset = object->paging_offset;
+
+ failed = 0;
+ for(j=0;j<count;j++) {
+ foff = m[j]->offset + paging_offset;
+ ix = swap_pager_block_index(swp, foff);
+ swb[j] = 0;
+ if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ rtvals[j] = VM_PAGER_FAIL;
+ failed = 1;
+ continue;
+ } else {
+ rtvals[j] = VM_PAGER_OK;
+ }
+ swb[j] = &swp->sw_blocks[ix];
+ ++swb[j]->swb_locked;
+ if( failed) {
+ rtvals[j] = VM_PAGER_FAIL;
+ continue;
+ }
+ off = swap_pager_block_offset(swp, foff);
+ reqaddr[j] = swb[j]->swb_block[off];
+ if( reqaddr[j] == SWB_EMPTY) {
+ int blk;
+ int tries;
+ int ntoget;
+ tries = 0;
+ s = splbio();
+
+ /*
+ * if any other pages have been allocated in this block, we
+ * only try to get one page.
+ */
+ for (i = 0; i < SWB_NPAGES; i++) {
+ if (swb[j]->swb_block[i] != SWB_EMPTY)
+ break;
+ }
+
+
+ ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
+ /*
+ * this code is alittle conservative, but works
+ * (the intent of this code is to allocate small chunks
+ * for small objects)
+ */
+ if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
+ ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
+ }
+
+retrygetspace:
+ if (!swap_pager_full && ntoget > 1 &&
+ swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
+
+ for (i = 0; i < ntoget; i++) {
+ swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
+ swb[j]->swb_valid = 0;
+ }
+
+ reqaddr[j] = swb[j]->swb_block[off];
+ } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
+ &swb[j]->swb_block[off])) {
+ /*
+ * if the allocation has failed, we try to reclaim space and
+ * retry.
+ */
+ if (++tries == 1) {
+ swap_pager_reclaim();
+ goto retrygetspace;
+ }
+ rtvals[j] = VM_PAGER_AGAIN;
+ failed = 1;
+ } else {
+ reqaddr[j] = swb[j]->swb_block[off];
+ swb[j]->swb_valid &= ~(1<<off);
+ }
+ splx(s);
+ }
+ }
+
+ /*
+ * search forwards for the last contiguous page to transfer
+ */
+ failed = 0;
+ for (i = 0; i < count; i++) {
+ if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
+ (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
+ (rtvals[i] != VM_PAGER_OK)) {
+ failed = 1;
+ if( rtvals[i] == VM_PAGER_OK)
+ rtvals[i] = VM_PAGER_AGAIN;
+ }
+ }
+
+ for(i = 0; i < count; i++) {
+ if( rtvals[i] != VM_PAGER_OK) {
+ if( swb[i])
+ --swb[i]->swb_locked;
+ }
+ }
+
+ for(i = 0; i < count; i++)
+ if( rtvals[i] != VM_PAGER_OK)
+ break;
+
+ if( i == 0) {
+ return VM_PAGER_AGAIN;
+ }
+
+ count = i;
+ for(i=0;i<count;i++) {
+ if( reqaddr[i] == SWB_EMPTY)
+ printf("I/O to empty block????\n");
+ }
+
+ /*
+ */
+
+ /*
+ * For synchronous writes, we clean up
+ * all completed async pageouts.
+ */
+ if ((flags & B_ASYNC) == 0) {
+ swap_pager_clean();
+ }
+
+ kva = 0;
+
+ /*
+ * we allocate a new kva for transfers > 1 page
+ * but for transfers == 1 page, the swap_pager_free list contains
+ * entries that have pre-allocated kva's (for efficiency).
+ */
+ if ( count > 1) {
+ kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
+ if( !kva) {
+ for (i = 0; i < count; i++) {
+ if( swb[i])
+ --swb[i]->swb_locked;
+ rtvals[i] = VM_PAGER_AGAIN;
+ }
+ return VM_PAGER_AGAIN;
+ }
+ }
+
+ /*
+ * get a swap pager clean data structure, block until we get it
+ */
+ if (swap_pager_free.tqh_first == NULL) {
+/*
+ if (flags & B_ASYNC) {
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_AGAIN;
+ if( swb[i])
+ --swb[i]->swb_locked;
+ }
+ return VM_PAGER_AGAIN;
+ }
+*/
+
s = splbio();
- swp->sw_poip++;
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ while (swap_pager_free.tqh_first == NULL) {
+ swap_pager_needflags |= SWAP_FREE_NEEDED;
+ tsleep((caddr_t)&swap_pager_free,
+ PVM, "swpfre", 0);
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ }
splx(s);
- mask = (~(~0 << npages)) << atop(off);
-#ifdef DEBUG
- swap_pager_poip++;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_io: write: bp=%x swp=%x poip=%d\n",
- bp, swp, swp->sw_poip);
- if ((swpagerdebug & SDB_ALLOCBLK) &&
- (swb->swb_mask & mask) != mask)
- printf("swpg_io: %x write %d pages at %x+%x\n",
- swp->sw_blocks, npages, swb->swb_block,
- atop(off));
- if (swpagerdebug & SDB_CLUSTER)
- printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n",
- off, npages, mask, swb->swb_mask);
-#endif
- swb->swb_mask |= mask;
}
+
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+ if( !kva) {
+ kva = spc->spc_kva;
+ spc->spc_altkva = 0;
+ } else {
+ spc->spc_altkva = kva;
+ }
+
+ /*
+ * map our page(s) into kva for I/O
+ */
+ for (i = 0; i < count; i++) {
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+ }
+ pmap_update();
+
/*
- * If this is an async write we set up still more buffer fields
+ * get the base I/O offset into the swap file
+ */
+ for(i=0;i<count;i++) {
+ foff = m[i]->offset + paging_offset;
+ off = swap_pager_block_offset(swp, foff);
+ /*
+ * if we are setting the valid bit anew,
+ * then diminish the swap free space
+ */
+ if( (swb[i]->swb_valid & (1 << off)) == 0)
+ vm_swap_size -= btodb(PAGE_SIZE);
+
+ /*
+ * set the valid bit
+ */
+ swb[i]->swb_valid |= (1 << off);
+ /*
+ * and unlock the data structure
+ */
+ --swb[i]->swb_locked;
+ }
+
+ s = splbio();
+ /*
+ * Get a swap buffer header and perform the IO
+ */
+ bp = spc->spc_bp;
+ bzero(bp, sizeof *bp);
+ bp->b_spc = spc;
+
+ bp->b_flags = B_BUSY;
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ crhold(bp->b_rcred);
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr[0];
+ bgetvp( swapdev_vp, bp);
+/*
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+*/
+ bp->b_bcount = PAGE_SIZE*count;
+ bp->b_bufsize = PAGE_SIZE*count;
+ swapdev_vp->v_numoutput++;
+
+ /*
+ * If this is an async write we set up additional buffer fields
* and place a "cleaning" entry on the inuse queue.
*/
- if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DEBUG
- if (swap_pager_free.tqh_first == NULL)
- panic("swpg_io: lost spc");
-#endif
- spc = swap_pager_free.tqh_first;
- TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
-#ifdef DEBUG
- if (spc->spc_flags != SPC_FREE)
- panic("swpg_io: bad free spc");
-#endif
- spc->spc_flags = SPC_BUSY;
- spc->spc_bp = bp;
+ if ( flags & B_ASYNC ) {
+ spc->spc_flags = 0;
spc->spc_swp = swp;
- spc->spc_kva = kva;
+ for(i=0;i<count;i++)
+ spc->spc_m[i] = m[i];
+ spc->spc_count = count;
/*
- * Record the first page. This allows swap_pager_clean
- * to efficiently handle the common case of a single page.
- * For clusters, it allows us to locate the object easily
- * and we then reconstruct the rest of the mlist from spc_kva.
+ * the completion routine for async writes
*/
- spc->spc_m = m;
- spc->spc_npages = npages;
bp->b_flags |= B_CALL;
bp->b_iodone = swap_pager_iodone;
- s = splbio();
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bp->b_bcount;
+ swp->sw_poip++;
TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
+ } else {
+ swp->sw_poip++;
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone1;
+ }
+ /*
+ * perform the I/O
+ */
+ VOP_STRATEGY(bp);
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
+ if ((bp->b_flags & B_DONE) == B_DONE) {
+ swap_pager_clean();
+ }
splx(s);
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_PEND;
+ }
+ return VM_PAGER_PEND;
}
/*
- * Finally, start the IO operation.
- * If it is async we are all done, otherwise we must wait for
- * completion and cleanup afterwards.
+ * wait for the sync I/O to complete
*/
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
- bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
-#endif
- VOP_STRATEGY(bp);
- if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO started: bp %x\n", bp);
-#endif
- return(VM_PAGER_PEND);
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "swwrt", 0);
}
- s = splbio();
-#ifdef DEBUG
- if (flags & B_READ)
- swap_pager_piip++;
- else
- swap_pager_poip++;
-#endif
- while ((bp->b_flags & B_DONE) == 0)
- (void) tsleep(bp, PVM, "swpgio", 0);
- if ((flags & B_READ) == 0)
- --swp->sw_poip;
-#ifdef DEBUG
- if (flags & B_READ)
- --swap_pager_piip;
- else
- --swap_pager_poip;
-#endif
- rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
- bp->b_actf = bswlist.b_actf;
- bswlist.b_actf = bp;
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
+
+ --swp->sw_poip;
+ if (swp->sw_poip == 0)
+ wakeup((caddr_t) swp);
+
if (bp->b_vp)
brelvp(bp);
- if (bswlist.b_flags & B_WANTED) {
- bswlist.b_flags &= ~B_WANTED;
- wakeup(&bswlist);
+
+ splx(s);
+
+ /*
+ * remove the mapping for kernel virtual
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE);
+
+ /*
+ * if we have written the page, then indicate that the page
+ * is clean.
+ */
+ if (rv == VM_PAGER_OK) {
+ for(i=0;i<count;i++) {
+ if( rtvals[i] == VM_PAGER_OK) {
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ /*
+ * optimization, if a page has been read during the
+ * pageout process, we activate it.
+ */
+ if ( (m[i]->flags & PG_ACTIVE) == 0 &&
+ pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
+ vm_page_activate(m[i]);
+ }
+ }
+ } else {
+ for(i=0;i<count;i++) {
+ rtvals[i] = rv;
+ m[i]->flags |= PG_LAUNDRY;
+ }
}
- if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+
+ if( spc->spc_altkva)
+ kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE);
+
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
}
- splx(s);
-#ifdef DEBUG
- if (swpagerdebug & SDB_IO)
- printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv);
- if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
- printf("swpg_io: IO error\n");
-#endif
- vm_pager_unmap_pages(kva, npages);
+
return(rv);
}
-static void
-swap_pager_clean(rw)
- int rw;
+boolean_t
+swap_pager_clean()
{
- register swp_clean_t spc;
- register int s, i;
- vm_object_t object;
- vm_page_t m;
-
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_clean(%x)\n", rw);
-#endif
+ register swp_clean_t spc, tspc;
+ register int s;
+ tspc = NULL;
+ if (swap_pager_done.tqh_first == NULL)
+ return FALSE;
for (;;) {
+ s = splbio();
/*
- * Look up and removal from inuse list must be done
+ * Look up and removal from done list must be done
* at splbio() to avoid conflicts with swap_pager_iodone.
*/
- s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next) {
- /*
- * If the operation is done, remove it from the
- * list and process it.
- *
- * XXX if we can't get the object lock we also
- * leave it on the list and try again later.
- * Is there something better we could do?
- */
- if ((spc->spc_flags & SPC_DONE) &&
- vm_object_lock_try(spc->spc_m->object)) {
- TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
- break;
+ while (spc = swap_pager_done.tqh_first) {
+ if( spc->spc_altkva) {
+ pmap_remove(vm_map_pmap(pager_map), spc->spc_altkva, spc->spc_altkva + spc->spc_count * PAGE_SIZE);
+ kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE);
+ spc->spc_altkva = 0;
+ } else {
+ pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, spc->spc_kva + PAGE_SIZE);
}
+ swap_pager_finish(spc);
+ TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
+ goto doclean;
}
- splx(s);
/*
* No operations done, thats all we can do for now.
*/
- if (spc == NULL)
- break;
- /*
- * Found a completed operation so finish it off.
- * Note: no longer at splbio since entry is off the list.
- */
- m = spc->spc_m;
- object = m->object;
+ splx(s);
+ break;
/*
- * Process each page in the cluster.
- * The first page is explicitly kept in the cleaning
- * entry, others must be reconstructed from the KVA.
+ * The desired page was found to be busy earlier in
+ * the scan but has since completed.
*/
- for (i = 0; i < spc->spc_npages; i++) {
- if (i)
- m = vm_pager_atop(spc->spc_kva + ptoa(i));
- /*
- * If no error mark as clean and inform the pmap
- * system. If there was an error, mark as dirty
- * so we will try again.
- *
- * XXX could get stuck doing this, should give up
- * after awhile.
- */
- if (spc->spc_flags & SPC_ERROR) {
- printf("%s: clean of page %x failed\n",
- "swap_pager_clean",
- VM_PAGE_TO_PHYS(m));
- m->flags |= PG_LAUNDRY;
- } else {
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- }
- m->flags &= ~PG_BUSY;
- PAGE_WAKEUP(m);
+doclean:
+ if (tspc && tspc == spc) {
+ tspc = NULL;
}
+ spc->spc_flags = 0;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ }
+ ++cleandone;
+ splx(s);
+ }
- /*
- * Done with the object, decrement the paging count
- * and unlock it.
- */
- if (--object->paging_in_progress == 0)
- wakeup(object);
- vm_object_unlock(object);
+ return(tspc ? TRUE : FALSE);
+}
+void
+swap_pager_finish(spc)
+ register swp_clean_t spc;
+{
+ vm_object_t object = spc->spc_m[0]->object;
+ int i;
+
+ if ((object->paging_in_progress -= spc->spc_count) == 0)
+ thread_wakeup((int) object);
+
+ /*
+ * If no error mark as clean and inform the pmap system.
+ * If error, mark as dirty so we will try again.
+ * (XXX could get stuck doing this, should give up after awhile)
+ */
+ if (spc->spc_flags & SPC_ERROR) {
+ for(i=0;i<spc->spc_count;i++) {
+ printf("swap_pager_finish: clean of page %x failed\n",
+ VM_PAGE_TO_PHYS(spc->spc_m[i]));
+ spc->spc_m[i]->flags |= PG_LAUNDRY;
+ }
+ } else {
+ for(i=0;i<spc->spc_count;i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
+ spc->spc_m[i]->flags |= PG_CLEAN;
+ }
+ }
+
+
+ for(i=0;i<spc->spc_count;i++) {
/*
- * Free up KVM used and put the entry back on the list.
+ * we wakeup any processes that are waiting on
+ * these pages.
*/
- vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
- spc->spc_flags = SPC_FREE;
- TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
-#ifdef DEBUG
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_clean: free spc %x\n", spc);
-#endif
+ PAGE_WAKEUP(spc->spc_m[i]);
}
+ nswiodone -= spc->spc_count;
+
+ return;
}
-#ifdef DEBUG
-static void
-swap_pager_clean_check(mlist, npages, rw)
- vm_page_t *mlist;
- int npages;
- int rw;
+/*
+ * swap_pager_iodone
+ */
+void
+swap_pager_iodone(bp)
+ register struct buf *bp;
{
register swp_clean_t spc;
- boolean_t bad;
- int i, j, s;
- vm_page_t m;
+ int s;
- if (panicstr)
- return;
+ s = splbio();
+ spc = (swp_clean_t) bp->b_spc;
+ TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
+ TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
+ if (bp->b_flags & B_ERROR) {
+ spc->spc_flags |= SPC_ERROR;
+ printf("error %d blkno %d sz %d ",
+ bp->b_error, bp->b_blkno, bp->b_bcount);
+ }
+
+/*
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+*/
+
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
+ if (bp->b_vp) {
+ brelvp(bp);
+ }
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+
+ nswiodone += spc->spc_count;
+ if (--spc->spc_swp->sw_poip == 0) {
+ wakeup((caddr_t)spc->spc_swp);
+ }
+
+ if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
+ swap_pager_inuse.tqh_first == 0) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+
+ if (vm_pageout_pages_needed) {
+ wakeup((caddr_t)&vm_pageout_pages_needed);
+ }
+
+ if ((swap_pager_inuse.tqh_first == NULL) ||
+ (cnt.v_free_count < cnt.v_free_min &&
+ nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+ splx(s);
+}
+
+int bswneeded;
+/* TAILQ_HEAD(swqueue, buf) bswlist; */
+/*
+ * allocate a physical buffer
+ */
+struct buf *
+getpbuf() {
+ int s;
+ struct buf *bp;
- bad = FALSE;
s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next) {
- for (j = 0; j < spc->spc_npages; j++) {
- m = vm_pager_atop(spc->spc_kva + ptoa(j));
- for (i = 0; i < npages; i++)
- if (m == mlist[i]) {
- if (swpagerdebug & SDB_ANOM)
- printf(
- "swpg_clean_check: %s: page %x on list, flags %x\n",
- rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
- bad = TRUE;
- }
- }
+ /* get a bp from the swap buffer header pool */
+ while ((bp = bswlist.tqh_first) == NULL) {
+ bswneeded = 1;
+ tsleep((caddr_t)&bswneeded, PVM, "wswbuf", 0);
}
+ TAILQ_REMOVE(&bswlist, bp, b_freelist);
+
splx(s);
- if (bad)
- panic("swpg_clean_check");
+
+ bzero(bp, sizeof *bp);
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ return bp;
}
-#endif
-static void
-swap_pager_iodone(bp)
- register struct buf *bp;
-{
- register swp_clean_t spc;
- daddr_t blk;
+/*
+ * allocate a physical buffer, if one is available
+ */
+struct buf *
+trypbuf() {
int s;
+ struct buf *bp;
-#ifdef DEBUG
- /* save panic time state */
- if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
- return;
- if (swpagerdebug & SDB_FOLLOW)
- printf("swpg_iodone(%x)\n", bp);
-#endif
s = splbio();
- for (spc = swap_pager_inuse.tqh_first;
- spc != NULL;
- spc = spc->spc_list.tqe_next)
- if (spc->spc_bp == bp)
- break;
-#ifdef DEBUG
- if (spc == NULL)
- panic("swap_pager_iodone: bp not found");
-#endif
+ if ((bp = bswlist.tqh_first) == NULL) {
+ splx(s);
+ return NULL;
+ }
+ TAILQ_REMOVE(&bswlist, bp, b_freelist);
+ splx(s);
- spc->spc_flags &= ~SPC_BUSY;
- spc->spc_flags |= SPC_DONE;
- if (bp->b_flags & B_ERROR)
- spc->spc_flags |= SPC_ERROR;
- spc->spc_bp = NULL;
- blk = bp->b_blkno;
-
-#ifdef DEBUG
- --swap_pager_poip;
- if (swpagerdebug & SDB_WRITE)
- printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
- bp, spc->spc_swp, spc->spc_swp->sw_flags,
- spc, spc->spc_swp->sw_poip);
-#endif
+ bzero(bp, sizeof *bp);
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ return bp;
+}
+
+/*
+ * release a physical buffer
+ */
+void
+relpbuf(bp)
+ struct buf *bp;
+{
+ int s;
- spc->spc_swp->sw_poip--;
- if (spc->spc_swp->sw_flags & SW_WANTED) {
- spc->spc_swp->sw_flags &= ~SW_WANTED;
- wakeup(spc->spc_swp);
+ s = splbio();
+
+ if (bp->b_rcred != NOCRED) {
+ crfree(bp->b_rcred);
+ bp->b_rcred = NOCRED;
}
-
- bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
- bp->b_actf = bswlist.b_actf;
- bswlist.b_actf = bp;
+ if (bp->b_wcred != NOCRED) {
+ crfree(bp->b_wcred);
+ bp->b_wcred = NOCRED;
+ }
+
if (bp->b_vp)
brelvp(bp);
- if (bswlist.b_flags & B_WANTED) {
- bswlist.b_flags &= ~B_WANTED;
- wakeup(&bswlist);
+
+ TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
+
+ if (bswneeded) {
+ bswneeded = 0;
+ wakeup((caddr_t)&bswlist);
}
- wakeup(&vm_pages_needed);
splx(s);
}
+
+/*
+ * return true if any swap control structures can be allocated
+ */
+int
+swap_pager_ready() {
+ if( swap_pager_free.tqh_first)
+ return 1;
+ else
+ return 0;
+}
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
index 497d92a..853edd5 100644
--- a/sys/vm/swap_pager.h
+++ b/sys/vm/swap_pager.h
@@ -1,7 +1,7 @@
/*
* Copyright (c) 1990 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
@@ -35,39 +35,31 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)swap_pager.h 8.1 (Berkeley) 6/11/93
+ * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90
+ * $Id: swap_pager.h,v 1.9 1994/03/14 21:54:23 davidg Exp $
+ */
+
+/*
+ * Modifications to the block allocation data structure by John S. Dyson
+ * 18 Dec 93.
*/
#ifndef _SWAP_PAGER_
#define _SWAP_PAGER_ 1
/*
- * In the swap pager, the backing store for an object is organized as an
- * array of some number of "swap blocks". A swap block consists of a bitmask
- * and some number of contiguous DEV_BSIZE disk blocks. The minimum size
- * of a swap block is:
- *
- * max(PAGE_SIZE, dmmin*DEV_BSIZE) [ 32k currently ]
- *
- * bytes (since the pager interface is page oriented), the maximum size is:
- *
- * min(#bits(swb_mask)*PAGE_SIZE, dmmax*DEV_BSIZE) [ 128k currently ]
- *
- * where dmmin and dmmax are left over from the old VM interface. The bitmask
- * (swb_mask) is used by swap_pager_haspage() to determine if a particular
- * page has actually been written; i.e. the pager copy of the page is valid.
- * All swap blocks in the backing store of an object will be the same size.
- *
- * The reason for variable sized swap blocks is to reduce fragmentation of
- * swap resources. Whenever possible we allocate smaller swap blocks to
- * smaller objects. The swap block size is determined from a table of
- * object-size vs. swap-block-size computed at boot time.
+ * SWB_NPAGES can be set to any value from 1 to 16 pages per allocation,
+ * however, due to the allocation spilling into non-swap pager backed memory,
+ * suggest keeping SWB_NPAGES small (1-4). If high performance is manditory
+ * perhaps up to 8 pages might be in order????
+ * Above problem has been fixed, now we support 16 pages per block. Unused
+ * space is recovered by the swap pager now...
*/
-typedef int sw_bm_t; /* pager bitmask */
-
+#define SWB_NPAGES 8
struct swblock {
- sw_bm_t swb_mask; /* bitmask of valid pages in this block */
- daddr_t swb_block; /* starting disk block for this block */
+ unsigned short swb_valid; /* bitmask for valid pages */
+ unsigned short swb_locked; /* block locked */
+ int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */
};
typedef struct swblock *sw_blk_t;
@@ -76,15 +68,32 @@ typedef struct swblock *sw_blk_t;
*/
struct swpager {
vm_size_t sw_osize; /* size of object we are backing (bytes) */
- int sw_bsize; /* size of swap blocks (DEV_BSIZE units) */
int sw_nblocks;/* number of blocks in list (sw_blk_t units) */
sw_blk_t sw_blocks; /* pointer to list of swap blocks */
short sw_flags; /* flags */
short sw_poip; /* pageouts in progress */
+ short sw_piip; /* pageins in progress */
};
typedef struct swpager *sw_pager_t;
#define SW_WANTED 0x01
#define SW_NAMED 0x02
+#ifdef KERNEL
+
+void swap_pager_init(void);
+vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t);
+void swap_pager_dealloc(vm_pager_t);
+boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t);
+boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t);
+int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int);
+void swap_pager_iodone(struct buf *);
+boolean_t swap_pager_clean();
+
+extern struct pagerops swappagerops;
+
+#endif
+
#endif /* _SWAP_PAGER_ */
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 85f892f..bc18dd2 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -36,7 +36,7 @@
#ifndef VM_H
#define VM_H
-typedef int vm_inherit_t; /* XXX: inheritance codes */
+typedef char vm_inherit_t; /* XXX: inheritance codes */
union vm_map_object;
typedef union vm_map_object vm_map_object_t;
@@ -58,6 +58,7 @@ typedef struct pager_struct *vm_pager_t;
#include <sys/vmmeter.h>
#include <sys/queue.h>
+#include <machine/cpufunc.h>
#include <vm/vm_param.h>
#include <vm/lock.h>
#include <vm/vm_prot.h>
@@ -87,5 +88,6 @@ struct vmspace {
caddr_t vm_taddr; /* user virtual address of text XXX */
caddr_t vm_daddr; /* user virtual address of data XXX */
caddr_t vm_maxsaddr; /* user VA at max stack growth */
+ caddr_t vm_minsaddr; /* user VA at max stack growth */
};
#endif /* VM_H */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index bae5f00..bc62e42 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -45,6 +45,16 @@ struct vnode;
void chgkprot __P((caddr_t, int, int));
#endif
+/*
+ * Try to get semi-meaningful wait messages into thread_sleep...
+ */
+extern void thread_sleep_(int, simple_lock_t, char *);
+#if __GNUC__ >= 2
+#define thread_sleep(a,b,c) thread_sleep_((a), (b), __FUNCTION__)
+#else
+#define thread_sleep(a,b,c) thread_sleep_((a), (b), "vmslp")
+#endif
+
#ifdef KERNEL
#ifdef TYPEDEF_FOR_UAP
int getpagesize __P((struct proc *p, void *, int *));
@@ -88,7 +98,7 @@ void swapout __P((struct proc *));
void swapout_threads __P((void));
int swfree __P((struct proc *, int));
void swstrategy __P((struct buf *));
-void thread_block __P((void));
+void thread_block __P((char *));
void thread_sleep __P((int, simple_lock_t, boolean_t));
void thread_wakeup __P((int));
int useracc __P((caddr_t, int, int));
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index f60abf2..3ce2d6e 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1,6 +1,11 @@
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ *
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
@@ -68,11 +73,21 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+
+#define VM_FAULT_READ_AHEAD 4
+#define VM_FAULT_READ_AHEAD_MIN 1
+#define VM_FAULT_READ_BEHIND 3
+#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
+extern int swap_pager_full;
+extern int vm_pageout_proc_limit;
+
/*
* vm_fault:
*
@@ -103,7 +118,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
vm_map_entry_t entry;
register vm_object_t object;
register vm_offset_t offset;
- register vm_page_t m;
+ vm_page_t m;
vm_page_t first_m;
vm_prot_t prot;
int result;
@@ -113,6 +128,10 @@ vm_fault(map, vaddr, fault_type, change_wiring)
boolean_t page_exists;
vm_page_t old_m;
vm_object_t next_object;
+ vm_page_t marray[VM_FAULT_READ];
+ int reqpage;
+ int spl;
+ int hardfault=0;
cnt.v_faults++; /* needs lock XXX */
/*
@@ -141,11 +160,15 @@ vm_fault(map, vaddr, fault_type, change_wiring)
#define UNLOCK_THINGS { \
object->paging_in_progress--; \
+ if (object->paging_in_progress == 0) \
+ wakeup((caddr_t)object); \
vm_object_unlock(object); \
if (object != first_object) { \
vm_object_lock(first_object); \
FREE_PAGE(first_m); \
first_object->paging_in_progress--; \
+ if (first_object->paging_in_progress == 0) \
+ wakeup((caddr_t)first_object); \
vm_object_unlock(first_object); \
} \
UNLOCK_MAP; \
@@ -156,6 +179,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
vm_object_deallocate(first_object); \
}
+
RetryFault: ;
/*
@@ -164,8 +188,8 @@ vm_fault(map, vaddr, fault_type, change_wiring)
*/
if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
- &first_object, &first_offset,
- &prot, &wired, &su)) != KERN_SUCCESS) {
+ &first_object, &first_offset,
+ &prot, &wired, &su)) != KERN_SUCCESS) {
return(result);
}
lookup_still_valid = TRUE;
@@ -241,25 +265,13 @@ vm_fault(map, vaddr, fault_type, change_wiring)
* wait for it and then retry.
*/
if (m->flags & PG_BUSY) {
-#ifdef DOTHREADS
- int wait_result;
-
- PAGE_ASSERT_WAIT(m, !change_wiring);
- UNLOCK_THINGS;
- thread_block();
- wait_result = current_thread()->wait_result;
- vm_object_deallocate(first_object);
- if (wait_result != THREAD_AWAKENED)
- return(KERN_SUCCESS);
- goto RetryFault;
-#else
- PAGE_ASSERT_WAIT(m, !change_wiring);
UNLOCK_THINGS;
- cnt.v_intrans++;
- thread_block();
+ if (m->flags & PG_BUSY) {
+ m->flags |= PG_WANTED;
+ tsleep((caddr_t)m,PSWP,"vmpfw",0);
+ }
vm_object_deallocate(first_object);
goto RetryFault;
-#endif
}
/*
@@ -268,6 +280,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
*/
vm_page_lock_queues();
+ spl = splimp();
if (m->flags & PG_INACTIVE) {
TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
m->flags &= ~PG_INACTIVE;
@@ -280,6 +293,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
m->flags &= ~PG_ACTIVE;
cnt.v_active_count--;
}
+ splx(spl);
vm_page_unlock_queues();
/*
@@ -290,9 +304,31 @@ vm_fault(map, vaddr, fault_type, change_wiring)
}
if (((object->pager != NULL) &&
- (!change_wiring || wired))
+ (!change_wiring || wired))
|| (object == first_object)) {
+#if 0
+ if (curproc && (vaddr < VM_MAXUSER_ADDRESS) &&
+ (curproc->p_rlimit[RLIMIT_RSS].rlim_max <
+ curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) {
+ UNLOCK_AND_DEALLOCATE;
+ vm_fault_free_pages(curproc);
+ goto RetryFault;
+ }
+#endif
+
+ if (swap_pager_full && !object->shadow && (!object->pager ||
+ (object->pager && object->pager->pg_type == PG_SWAP &&
+ !vm_pager_has_page(object->pager, offset+object->paging_offset)))) {
+ if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ {
+ printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid);
+ psignal(curproc, SIGKILL);
+ curproc->p_estcpu = 0;
+ curproc->p_nice = PRIO_MIN;
+ setpriority(curproc);
+ }
+ }
+
/*
* Allocate a new page for this object/offset
* pair.
@@ -309,33 +345,46 @@ vm_fault(map, vaddr, fault_type, change_wiring)
if (object->pager != NULL && (!change_wiring || wired)) {
int rv;
+ int faultcount;
+ int reqpage;
/*
* Now that we have a busy page, we can
* release the object lock.
*/
vm_object_unlock(object);
-
/*
- * Call the pager to retrieve the data, if any,
- * after releasing the lock on the map.
+ * now we find out if any other pages should
+ * be paged in at this time
+ * this routine checks to see if the pages surrounding this fault
+ * reside in the same object as the page for this fault. If
+ * they do, then they are faulted in also into the
+ * object. The array "marray" returned contains an array of
+ * vm_page_t structs where one of them is the vm_page_t passed to
+ * the routine. The reqpage return value is the index into the
+ * marray for the vm_page_t passed to the routine.
*/
- UNLOCK_MAP;
cnt.v_pageins++;
- rv = vm_pager_get(object->pager, m, TRUE);
+ faultcount = vm_fault_additional_pages(first_object, first_offset,
+ m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, marray, &reqpage);
/*
- * Reaquire the object lock to preserve our
- * invariant.
+ * Call the pager to retrieve the data, if any,
+ * after releasing the lock on the map.
*/
- vm_object_lock(object);
+ UNLOCK_MAP;
- /*
- * Found the page.
- * Leave it busy while we play with it.
- */
+ rv = faultcount ?
+ vm_pager_get_pages(object->pager,
+ marray, faultcount, reqpage, TRUE): VM_PAGER_FAIL;
if (rv == VM_PAGER_OK) {
/*
+ * Found the page.
+ * Leave it busy while we play with it.
+ */
+ vm_object_lock(object);
+
+ /*
* Relookup in case pager changed page.
* Pager is responsible for disposition
* of old page if moved.
@@ -344,36 +393,42 @@ vm_fault(map, vaddr, fault_type, change_wiring)
cnt.v_pgpgin++;
m->flags &= ~PG_FAKE;
- m->flags |= PG_CLEAN;
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ hardfault++;
break;
}
/*
- * IO error or page outside the range of the pager:
- * cleanup and return an error.
+ * Remove the bogus page (which does not
+ * exist at this object/offset); before
+ * doing so, we must get back our object
+ * lock to preserve our invariant.
+ *
+ * Also wake up any other thread that may want
+ * to bring in this page.
+ *
+ * If this is the top-level object, we must
+ * leave the busy page to prevent another
+ * thread from rushing past us, and inserting
+ * the page in that object at the same time
+ * that we are.
*/
- if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
+
+ vm_object_lock(object);
+ /*
+ * Data outside the range of the pager; an error
+ */
+ if ((rv == VM_PAGER_ERROR) || (rv == VM_PAGER_BAD)) {
FREE_PAGE(m);
UNLOCK_AND_DEALLOCATE;
return(KERN_PROTECTION_FAILURE); /* XXX */
}
- /*
- * rv == VM_PAGER_FAIL:
- *
- * Page does not exist at this object/offset.
- * Free the bogus page (waking up anyone waiting
- * for it) and continue on to the next object.
- *
- * If this is the top-level object, we must
- * leave the busy page to prevent another
- * thread from rushing past us, and inserting
- * the page in that object at the same time
- * that we are.
- */
if (object != first_object) {
FREE_PAGE(m);
- /* note that `m' is not used after this */
+ /*
+ * XXX - we cannot just fall out at this
+ * point, m has been freed and is invalid!
+ */
}
}
@@ -398,6 +453,8 @@ vm_fault(map, vaddr, fault_type, change_wiring)
*/
if (object != first_object) {
object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
vm_object_unlock(object);
object = first_object;
@@ -414,16 +471,20 @@ vm_fault(map, vaddr, fault_type, change_wiring)
}
else {
vm_object_lock(next_object);
- if (object != first_object)
+ if (object != first_object) {
object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ }
vm_object_unlock(object);
object = next_object;
object->paging_in_progress++;
}
}
- if ((m->flags & (PG_ACTIVE | PG_INACTIVE | PG_BUSY)) != PG_BUSY)
- panic("vm_fault: active, inactive or !busy after main loop");
+ if ((m->flags & (PG_ACTIVE|PG_INACTIVE) != 0) ||
+ (m->flags & PG_BUSY) == 0)
+ panic("vm_fault: absent or active or inactive or not busy after main loop");
/*
* PAGE HAS BEEN FOUND.
@@ -486,9 +547,11 @@ vm_fault(map, vaddr, fault_type, change_wiring)
*/
vm_page_lock_queues();
+
vm_page_activate(m);
- vm_page_deactivate(m);
pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ if ((m->flags & PG_CLEAN) == 0)
+ m->flags |= PG_LAUNDRY;
vm_page_unlock_queues();
/*
@@ -496,6 +559,8 @@ vm_fault(map, vaddr, fault_type, change_wiring)
*/
PAGE_WAKEUP(m);
object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
vm_object_unlock(object);
/*
@@ -517,6 +582,8 @@ vm_fault(map, vaddr, fault_type, change_wiring)
* paging_in_progress to do that...
*/
object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
vm_object_collapse(object);
object->paging_in_progress++;
}
@@ -572,38 +639,18 @@ vm_fault(map, vaddr, fault_type, change_wiring)
copy_m = vm_page_lookup(copy_object, copy_offset);
if (page_exists = (copy_m != NULL)) {
if (copy_m->flags & PG_BUSY) {
-#ifdef DOTHREADS
- int wait_result;
-
- /*
- * If the page is being brought
- * in, wait for it and then retry.
- */
- PAGE_ASSERT_WAIT(copy_m, !change_wiring);
- RELEASE_PAGE(m);
- copy_object->ref_count--;
- vm_object_unlock(copy_object);
- UNLOCK_THINGS;
- thread_block();
- wait_result = current_thread()->wait_result;
- vm_object_deallocate(first_object);
- if (wait_result != THREAD_AWAKENED)
- return(KERN_SUCCESS);
- goto RetryFault;
-#else
/*
* If the page is being brought
* in, wait for it and then retry.
*/
- PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+ PAGE_ASSERT_WAIT(copy_m, !change_wiring);
RELEASE_PAGE(m);
copy_object->ref_count--;
vm_object_unlock(copy_object);
UNLOCK_THINGS;
- thread_block();
+ thread_block("fltcpy");
vm_object_deallocate(first_object);
goto RetryFault;
-#endif
}
}
@@ -625,8 +672,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
* found that the copy_object's pager
* doesn't have the page...
*/
- copy_m = vm_page_alloc(copy_object,
- copy_offset);
+ copy_m = vm_page_alloc(copy_object, copy_offset);
if (copy_m == NULL) {
/*
* Wait for a page, then retry.
@@ -700,10 +746,16 @@ vm_fault(map, vaddr, fault_type, change_wiring)
* pmaps use it.)
*/
vm_page_lock_queues();
+
+ vm_page_activate(old_m);
+
+
pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
VM_PROT_NONE);
+ if ((old_m->flags & PG_CLEAN) == 0)
+ old_m->flags |= PG_LAUNDRY;
copy_m->flags &= ~PG_CLEAN;
- vm_page_activate(copy_m); /* XXX */
+ vm_page_activate(copy_m);
vm_page_unlock_queues();
PAGE_WAKEUP(copy_m);
@@ -832,8 +884,18 @@ vm_fault(map, vaddr, fault_type, change_wiring)
else
vm_page_unwire(m);
}
- else
+ else {
vm_page_activate(m);
+ }
+
+ if( curproc && curproc->p_stats) {
+ if (hardfault) {
+ curproc->p_stats->p_ru.ru_majflt++;
+ } else {
+ curproc->p_stats->p_ru.ru_minflt++;
+ }
+ }
+
vm_page_unlock_queues();
/*
@@ -857,9 +919,10 @@ vm_fault_wire(map, start, end)
vm_map_t map;
vm_offset_t start, end;
{
+
register vm_offset_t va;
register pmap_t pmap;
- int rv;
+ int rv;
pmap = vm_map_pmap(map);
@@ -893,7 +956,8 @@ vm_fault_wire(map, start, end)
*
* Unwire a range of virtual addresses in a map.
*/
-void vm_fault_unwire(map, start, end)
+void
+vm_fault_unwire(map, start, end)
vm_map_t map;
vm_offset_t start, end;
{
@@ -942,13 +1006,13 @@ void vm_fault_unwire(map, start, end)
* entry corresponding to a main map entry that is wired down).
*/
-void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
+void
+vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
vm_map_t dst_map;
vm_map_t src_map;
vm_map_entry_t dst_entry;
vm_map_entry_t src_entry;
{
-
vm_object_t dst_object;
vm_object_t src_object;
vm_offset_t dst_offset;
@@ -960,7 +1024,7 @@ void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
#ifdef lint
src_map++;
-#endif
+#endif lint
src_object = src_entry->object.vm_object;
src_offset = src_entry->offset;
@@ -1031,5 +1095,211 @@ void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
PAGE_WAKEUP(dst_m);
vm_object_unlock(dst_object);
}
+}
+
+
+/*
+ * looks page up in shadow chain
+ */
+
+int
+vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm)
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_object_t *rtobject;
+ vm_offset_t *rtoffset;
+ vm_page_t *rtm;
+{
+ vm_page_t m;
+ vm_object_t first_object = object;
+
+ *rtm = 0;
+ *rtobject = 0;
+ *rtoffset = 0;
+
+
+ while (!(m=vm_page_lookup(object, offset))) {
+ if (object->pager) {
+ if (vm_pager_has_page(object->pager, object->paging_offset+offset)) {
+ *rtobject = object;
+ *rtoffset = offset;
+ return 1;
+ }
+ }
+
+ if (!object->shadow)
+ return 0;
+ else {
+ offset += object->shadow_offset;
+ object = object->shadow;
+ }
+ }
+ *rtobject = object;
+ *rtoffset = offset;
+ *rtm = m;
+ return 1;
+}
+
+/*
+ * This routine checks around the requested page for other pages that
+ * might be able to be faulted in.
+ *
+ * Inputs:
+ * first_object, first_offset, m, rbehind, rahead
+ *
+ * Outputs:
+ * marray (array of vm_page_t), reqpage (index of requested page)
+ *
+ * Return value:
+ * number of pages in marray
+ */
+int
+vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage)
+ vm_object_t first_object;
+ vm_offset_t first_offset;
+ vm_page_t m;
+ int rbehind;
+ int raheada;
+ vm_page_t *marray;
+ int *reqpage;
+{
+ int i;
+ vm_page_t tmpm;
+ vm_object_t object;
+ vm_offset_t offset, startoffset, endoffset, toffset, size;
+ vm_object_t rtobject;
+ vm_page_t rtm;
+ vm_offset_t rtoffset;
+ vm_offset_t offsetdiff;
+ int rahead;
+ int treqpage;
+
+ object = m->object;
+ offset = m->offset;
+
+ offsetdiff = offset - first_offset;
+
+ /*
+ * if the requested page is not available, then give up now
+ */
+
+ if (!vm_pager_has_page(object->pager, object->paging_offset+offset))
+ return 0;
+
+ /*
+ * if there is no getmulti routine for this pager, then just allow
+ * one page to be read.
+ */
+/*
+ if (!object->pager->pg_ops->pgo_getpages) {
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+*/
+
+ /*
+ * try to do any readahead that we might have free pages for.
+ */
+ rahead = raheada;
+ if (rahead > (cnt.v_free_count - cnt.v_free_reserved)) {
+ rahead = cnt.v_free_count - cnt.v_free_reserved;
+ rbehind = 0;
+ }
+
+ if (cnt.v_free_count < cnt.v_free_min) {
+ if (rahead > VM_FAULT_READ_AHEAD_MIN)
+ rahead = VM_FAULT_READ_AHEAD_MIN;
+ rbehind = 0;
+ }
+
+ /*
+ * if we don't have any free pages, then just read one page.
+ */
+ if (rahead <= 0) {
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+
+ /*
+ * scan backward for the read behind pages --
+ * in memory or on disk not in same object
+ */
+ toffset = offset - NBPG;
+ if( rbehind*NBPG > offset)
+ rbehind = offset / NBPG;
+ startoffset = offset - rbehind*NBPG;
+ while (toffset >= startoffset) {
+ if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) ||
+ rtm != 0 || rtobject != object) {
+ startoffset = toffset + NBPG;
+ break;
+ }
+ if( toffset == 0)
+ break;
+ toffset -= NBPG;
+ }
+
+ /*
+ * scan forward for the read ahead pages --
+ * in memory or on disk not in same object
+ */
+ toffset = offset + NBPG;
+ endoffset = offset + (rahead+1)*NBPG;
+ while (toffset < object->size && toffset < endoffset) {
+ if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) ||
+ rtm != 0 || rtobject != object) {
+ break;
+ }
+ toffset += NBPG;
+ }
+ endoffset = toffset;
+ /* calculate number of bytes of pages */
+ size = (endoffset - startoffset) / NBPG;
+
+ /* calculate the page offset of the required page */
+ treqpage = (offset - startoffset) / NBPG;
+
+ /* see if we have space (again) */
+ if (cnt.v_free_count >= cnt.v_free_reserved + size) {
+ bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t));
+ /*
+ * get our pages and don't block for them
+ */
+ for (i = 0; i < size; i++) {
+ if (i != treqpage)
+ rtm = vm_page_alloc(object, startoffset + i * NBPG);
+ else
+ rtm = m;
+ marray[i] = rtm;
+ }
+
+ for (i = 0; i < size; i++) {
+ if (marray[i] == 0)
+ break;
+ }
+
+ /*
+ * if we could not get our block of pages, then
+ * free the readahead/readbehind pages.
+ */
+ if (i < size) {
+ for (i = 0; i < size; i++) {
+ if (i != treqpage && marray[i])
+ FREE_PAGE(marray[i]);
+ }
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+
+ *reqpage = treqpage;
+ return size;
+ }
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
}
+
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 5676ff3..f181ab0 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -67,16 +67,22 @@
#include <sys/buf.h>
#include <sys/user.h>
+#include <sys/kernel.h>
+#include <sys/dkstat.h>
+
#include <vm/vm.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_kern.h>
-#include <machine/cpu.h>
+#include <machine/stdarg.h>
+extern char kstack[];
int avefree = 0; /* XXX */
-unsigned maxdmap = MAXDSIZ; /* XXX */
int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
+/* vm_map_t upages_map; */
+void swapout(struct proc *p);
int
kernacc(addr, len, rw)
caddr_t addr;
@@ -89,18 +95,6 @@ kernacc(addr, len, rw)
saddr = trunc_page(addr);
eaddr = round_page(addr+len);
rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
- /*
- * XXX there are still some things (e.g. the buffer cache) that
- * are managed behind the VM system's back so even though an
- * address is accessible in the mind of the VM system, there may
- * not be physical pages where the VM thinks there is. This can
- * lead to bogus allocation of pages in the kernel address space
- * or worse, inconsistencies at the pmap level. We only worry
- * about the buffer cache for now.
- */
- if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
- saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
- rv = FALSE;
return(rv == TRUE);
}
@@ -112,6 +106,23 @@ useracc(addr, len, rw)
boolean_t rv;
vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+ /*
+ * XXX - specially disallow access to user page tables - they are
+ * in the map.
+ *
+ * XXX - don't specially disallow access to the user area - treat
+ * it as incorrectly as elsewhere.
+ *
+ * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was
+ * only used (as an end address) in trap.c. Use it as an end
+ * address here too.
+ */
+ if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS
+ || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS
+ || (vm_offset_t) addr + len <= (vm_offset_t) addr) {
+ return (FALSE);
+ }
+
rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
trunc_page(addr), round_page(addr+len), prot);
return(rv == TRUE);
@@ -121,40 +132,18 @@ useracc(addr, len, rw)
/*
* Change protections on kernel pages from addr to addr+len
* (presumably so debugger can plant a breakpoint).
- *
- * We force the protection change at the pmap level. If we were
- * to use vm_map_protect a change to allow writing would be lazily-
- * applied meaning we would still take a protection fault, something
- * we really don't want to do. It would also fragment the kernel
- * map unnecessarily. We cannot use pmap_protect since it also won't
- * enforce a write-enable request. Using pmap_enter is the only way
- * we can ensure the change takes place properly.
+ * All addresses are assumed to reside in the Sysmap,
*/
-void
chgkprot(addr, len, rw)
register caddr_t addr;
int len, rw;
{
- vm_prot_t prot;
- vm_offset_t pa, sva, eva;
-
- prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
- eva = round_page(addr + len);
- for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
- /*
- * Extract physical address for the page.
- * We use a cheezy hack to differentiate physical
- * page 0 from an invalid mapping, not that it
- * really matters...
- */
- pa = pmap_extract(kernel_pmap, sva|1);
- if (pa == 0)
- panic("chgkprot: invalid page");
- pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
- }
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ vm_map_protect(kernel_map, trunc_page(addr),
+ round_page(addr+len), prot, FALSE);
}
#endif
-
void
vslock(addr, len)
caddr_t addr;
@@ -172,8 +161,8 @@ vsunlock(addr, len, dirtied)
{
#ifdef lint
dirtied++;
-#endif
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+#endif lint
+ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
round_page(addr+len), TRUE);
}
@@ -194,16 +183,19 @@ vm_fork(p1, p2, isvfork)
int isvfork;
{
register struct user *up;
- vm_offset_t addr;
+ vm_offset_t addr, ptaddr;
+ int i;
+ struct vm_map *vp;
+
+ while( cnt.v_free_count < cnt.v_free_min)
+ VM_WAIT;
-#ifdef i386
/*
* avoid copying any of the parent's pagetables or other per-process
* objects that reside in the map by marking all of them non-inheritable
*/
(void)vm_map_inherit(&p1->p_vmspace->vm_map,
- UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
-#endif
+ UPT_MIN_ADDRESS - UPAGES * NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
p2->p_vmspace = vmspace_fork(p1->p_vmspace);
#ifdef SYSVSHM
@@ -211,23 +203,40 @@ vm_fork(p1, p2, isvfork)
shmfork(p1, p2, isvfork);
#endif
-#ifndef i386
/*
* Allocate a wired-down (for now) pcb and kernel stack for the process
*/
- addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
- if (addr == 0)
- panic("vm_fork: no more kernel virtual memory");
- vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
-#else
-/* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
-and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
-not yet clear, yet it does... */
- addr = kmem_alloc(kernel_map, ctob(UPAGES));
- if (addr == 0)
- panic("vm_fork: no more kernel virtual memory");
-#endif
- up = (struct user *)addr;
+
+ addr = (vm_offset_t) kstack;
+
+ vp = &p2->p_vmspace->vm_map;
+
+ /* ream out old pagetables and kernel stack */
+ (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
+
+ /* get new pagetables and kernel stack */
+ (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
+
+ /* force in the page table encompassing the UPAGES */
+ ptaddr = trunc_page((u_int)vtopte(addr));
+ vm_map_pageable(vp, ptaddr, ptaddr + NBPG, FALSE);
+
+ /* and force in (demand-zero) the UPAGES */
+ vm_map_pageable(vp, addr, addr + UPAGES * NBPG, FALSE);
+
+ /* get a kernel virtual address for the UPAGES for this proc */
+ up = (struct user *)kmem_alloc_pageable(kernel_map, UPAGES * NBPG);
+
+ /* and force-map the upages into the kernel pmap */
+ for (i = 0; i < UPAGES; i++)
+ pmap_enter(vm_map_pmap(kernel_map),
+ ((vm_offset_t) up) + NBPG * i,
+ pmap_extract(vp->pmap, addr + NBPG * i),
+ VM_PROT_READ|VM_PROT_WRITE, 1);
+
+ /* and allow the UPAGES page table entry to be paged (at the vm system level) */
+ vm_map_pageable(vp, ptaddr, ptaddr + NBPG, TRUE);
+
p2->p_addr = up;
/*
@@ -246,15 +255,7 @@ not yet clear, yet it does... */
((caddr_t)&up->u_stats.pstat_endcopy -
(caddr_t)&up->u_stats.pstat_startcopy));
-#ifdef i386
- { u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
-
- vp = &p2->p_vmspace->vm_map;
- (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
- (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
- (void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
- }
-#endif
+
/*
* cpu_fork will copy and update the kernel stack and pcb,
* and make the child ready to run. It marks the child
@@ -273,6 +274,7 @@ void
vm_init_limits(p)
register struct proc *p;
{
+ int tmp;
/*
* Set up the initial limits on process VM.
@@ -285,11 +287,13 @@ vm_init_limits(p)
p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
- p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
+ tmp = ((2 * cnt.v_free_count) / 3) - 32;
+ if (cnt.v_free_count < 512)
+ tmp = cnt.v_free_count;
+ p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(tmp);
+ p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
}
-#include <vm/vm_pageout.h>
-
#ifdef DEBUG
int enableswap = 1;
int swapdebug = 0;
@@ -298,12 +302,67 @@ int swapdebug = 0;
#define SDB_SWAPOUT 4
#endif
+void
+faultin(p)
+struct proc *p;
+{
+ vm_offset_t i;
+ vm_offset_t vaddr, ptaddr;
+ vm_offset_t v, v1;
+ struct user *up;
+ int s;
+ int opflag;
+
+ if ((p->p_flag & P_INMEM) == 0) {
+ int rv0, rv1;
+ vm_map_t map;
+
+ ++p->p_lock;
+
+ map = &p->p_vmspace->vm_map;
+ /* force the page table encompassing the kernel stack (upages) */
+ ptaddr = trunc_page((u_int)vtopte(kstack));
+ vm_map_pageable(map, ptaddr, ptaddr + NBPG, FALSE);
+
+ /* wire in the UPAGES */
+ vm_map_pageable(map, (vm_offset_t) kstack,
+ (vm_offset_t) kstack + UPAGES * NBPG, FALSE);
+
+ /* and map them nicely into the kernel pmap */
+ for (i = 0; i < UPAGES; i++) {
+ vm_offset_t off = i * NBPG;
+ vm_offset_t pa = (vm_offset_t)
+ pmap_extract(&p->p_vmspace->vm_pmap,
+ (vm_offset_t) kstack + off);
+ pmap_enter(vm_map_pmap(kernel_map),
+ ((vm_offset_t)p->p_addr) + off,
+ pa, VM_PROT_READ|VM_PROT_WRITE, 1);
+ }
+
+ /* and let the page table pages go (at least above pmap level) */
+ vm_map_pageable(map, ptaddr, ptaddr + NBPG, TRUE);
+
+ s = splhigh();
+
+ if (p->p_stat == SRUN)
+ setrunqueue(p);
+
+ p->p_flag |= P_INMEM;
+
+ /* undo the effect of setting SLOCK above */
+ --p->p_lock;
+ splx(s);
+
+ }
+
+}
+
+int swapinreq;
+int percentactive;
/*
- * Brutally simple:
- * 1. Attempt to swapin every swaped-out, runnable process in
- * order of priority.
- * 2. If not enough memory, wake the pageout daemon and let it
- * clear some space.
+ * This swapin algorithm attempts to swap-in processes only if there
+ * is enough space for them. Of course, if a process waits for a long
+ * time, it will be swapped in anyway.
*/
void
scheduler()
@@ -313,88 +372,104 @@ scheduler()
struct proc *pp;
int ppri;
vm_offset_t addr;
- vm_size_t size;
+ int lastidle, lastrun;
+ int curidle, currun;
+ int forceload;
+ int percent;
+ int ntries;
+
+ lastidle = 0;
+ lastrun = 0;
loop:
-#ifdef DEBUG
- while (!enableswap)
- sleep((caddr_t)&proc0, PVM);
-#endif
+ ntries = 0;
+ vmmeter();
+
+ curidle = cp_time[CP_IDLE];
+ currun = cp_time[CP_USER] + cp_time[CP_SYS] + cp_time[CP_NICE];
+ percent = (100*(currun-lastrun)) / ( 1 + (currun-lastrun) + (curidle-lastidle));
+ lastrun = currun;
+ lastidle = curidle;
+ if( percent > 100)
+ percent = 100;
+ percentactive = percent;
+
+ if( percentactive < 25)
+ forceload = 1;
+ else
+ forceload = 0;
+
+loop1:
pp = NULL;
ppri = INT_MIN;
for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+ int mempri;
pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
- if (pri > ppri) {
+ mempri = pri > 0 ? pri : 0;
+ /*
+ * if this process is higher priority and there is
+ * enough space, then select this process instead
+ * of the previous selection.
+ */
+ if (pri > ppri &&
+ (((cnt.v_free_count + (mempri * (4*PAGE_SIZE) / PAGE_SIZE) >= (p->p_vmspace->vm_swrss)) || (ntries > 0 && forceload)))) {
pp = p;
ppri = pri;
}
}
}
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("sched: running, procp %x pri %d\n", pp, ppri);
-#endif
+
+ if ((pp == NULL) && (ntries == 0) && forceload) {
+ ++ntries;
+ goto loop1;
+ }
+
/*
* Nothing to do, back to sleep
*/
if ((p = pp) == NULL) {
- sleep((caddr_t)&proc0, PVM);
+ tsleep((caddr_t)&proc0, PVM, "sched", 0);
goto loop;
}
/*
- * We would like to bring someone in.
- * This part is really bogus cuz we could deadlock on memory
- * despite our feeble check.
+ * We would like to bring someone in. (only if there is space).
*/
- size = round_page(ctob(UPAGES));
- addr = (vm_offset_t) p->p_addr;
- if (cnt.v_free_count > atop(size)) {
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPIN)
- printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
- p->p_pid, p->p_comm, p->p_addr,
- ppri, cnt.v_free_count);
-#endif
- vm_map_pageable(kernel_map, addr, addr+size, FALSE);
- /*
- * Some architectures need to be notified when the
- * user area has moved to new physical page(s) (e.g.
- * see pmax/pmax/vm_machdep.c).
- */
- cpu_swapin(p);
- (void) splstatclock();
- if (p->p_stat == SRUN)
- setrunqueue(p);
- p->p_flag |= P_INMEM;
- (void) spl0();
+/*
+ printf("swapin: %d, free: %d, res: %d, min: %d\n",
+ p->p_pid, cnt.v_free_count, cnt.v_free_reserved, cnt.v_free_min);
+*/
+ (void) splhigh();
+ if ((forceload && (cnt.v_free_count > (cnt.v_free_reserved + UPAGES + 1))) ||
+ (cnt.v_free_count >= cnt.v_free_min)) {
+ spl0();
+ faultin(p);
p->p_swtime = 0;
goto loop;
- }
+ }
+ /*
+ * log the memory shortage
+ */
+ swapinreq += p->p_vmspace->vm_swrss;
/*
* Not enough memory, jab the pageout daemon and wait til the
* coast is clear.
*/
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("sched: no room for pid %d(%s), free %d\n",
- p->p_pid, p->p_comm, cnt.v_free_count);
-#endif
- (void) splhigh();
- VM_WAIT;
+ if( cnt.v_free_count < cnt.v_free_min) {
+ VM_WAIT;
+ } else {
+ tsleep((caddr_t)&proc0, PVM, "sched", 0);
+ }
(void) spl0();
-#ifdef DEBUG
- if (swapdebug & SDB_FOLLOW)
- printf("sched: room again, free %d\n", cnt.v_free_count);
-#endif
goto loop;
}
-#define swappable(p) \
- (((p)->p_flag & \
- (P_SYSTEM | P_INMEM | P_NOSWAP | P_WEXIT | P_PHYSIO)) == P_INMEM)
+#define swappable(p) \
+ (((p)->p_lock == 0) && \
+ ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO)) == P_INMEM)
+extern int vm_pageout_free_min;
/*
* Swapout is driven by the pageout daemon. Very simple, we find eligible
* procs and unwire their u-areas. We try to always "swap" at least one
@@ -409,54 +484,86 @@ swapout_threads()
register struct proc *p;
struct proc *outp, *outp2;
int outpri, outpri2;
+ int tpri;
int didswap = 0;
+ int swapneeded = swapinreq;
extern int maxslp;
+ int runnablenow;
+ int s;
-#ifdef DEBUG
- if (!enableswap)
- return;
-#endif
+swapmore:
+ runnablenow = 0;
outp = outp2 = NULL;
- outpri = outpri2 = 0;
+ outpri = outpri2 = INT_MIN;
for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
if (!swappable(p))
continue;
switch (p->p_stat) {
case SRUN:
- if (p->p_swtime > outpri2) {
+ ++runnablenow;
+ /*
+ * count the process as being in a runnable state
+ */
+ if ((tpri = p->p_swtime + p->p_nice * 8) > outpri2) {
outp2 = p;
- outpri2 = p->p_swtime;
+ outpri2 = tpri;
}
continue;
case SSLEEP:
case SSTOP:
- if (p->p_slptime >= maxslp) {
+ /*
+ * do not swapout a process that is waiting for VM datastructures
+ * there is a possible deadlock.
+ */
+ if (!lock_try_write( &p->p_vmspace->vm_map.lock)) {
+ continue;
+ }
+ vm_map_unlock( &p->p_vmspace->vm_map);
+ if (p->p_slptime > maxslp) {
swapout(p);
didswap++;
- } else if (p->p_slptime > outpri) {
+ } else if ((tpri = p->p_slptime + p->p_nice * 8) > outpri) {
outp = p;
- outpri = p->p_slptime;
+ outpri = tpri ;
}
continue;
}
}
/*
- * If we didn't get rid of any real duds, toss out the next most
- * likely sleeping/stopped or running candidate. We only do this
- * if we are real low on memory since we don't gain much by doing
- * it (UPAGES pages).
+ * We swapout only if there are more than two runnable processes or if
+ * another process needs some space to swapin.
*/
- if (didswap == 0 &&
- cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
- if ((p = outp) == 0)
- p = outp2;
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPOUT)
- printf("swapout_threads: no duds, try procp %x\n", p);
-#endif
- if (p)
+ if ((swapinreq || ((percentactive > 90) && (runnablenow > 2))) &&
+ (((cnt.v_free_count + cnt.v_inactive_count) <= (cnt.v_free_target + cnt.v_inactive_target)) ||
+ (cnt.v_free_count < cnt.v_free_min))) {
+ if ((p = outp) == 0) {
+ p = outp2;
+ }
+
+ if (p) {
swapout(p);
+ didswap = 1;
+ }
+ }
+
+ /*
+ * if we previously had found a process to swapout, and we need to swapout
+ * more then try again.
+ */
+#if 0
+ if( p && swapinreq)
+ goto swapmore;
+#endif
+
+ /*
+ * If we swapped something out, and another process needed memory,
+ * then wakeup the sched process.
+ */
+ if (didswap) {
+ if (swapneeded)
+ wakeup((caddr_t)&proc0);
+ swapinreq = 0;
}
}
@@ -465,59 +572,37 @@ swapout(p)
register struct proc *p;
{
vm_offset_t addr;
- vm_size_t size;
+ struct pmap *pmap = &p->p_vmspace->vm_pmap;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ vm_offset_t ptaddr;
+ int i;
-#ifdef DEBUG
- if (swapdebug & SDB_SWAPOUT)
- printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
- p->p_pid, p->p_comm, p->p_addr, p->p_stat,
- p->p_slptime, cnt.v_free_count);
-#endif
- size = round_page(ctob(UPAGES));
- addr = (vm_offset_t) p->p_addr;
-#if defined(hp300) || defined(luna68k)
+ ++p->p_stats->p_ru.ru_nswap;
/*
- * Ugh! u-area is double mapped to a fixed address behind the
- * back of the VM system and accesses are usually through that
- * address rather than the per-process address. Hence reference
- * and modify information are recorded at the fixed address and
- * lost at context switch time. We assume the u-struct and
- * kernel stack are always accessed/modified and force it to be so.
+ * remember the process resident count
*/
- {
- register int i;
- volatile long tmp;
-
- for (i = 0; i < UPAGES; i++) {
- tmp = *(long *)addr; *(long *)addr = tmp;
- addr += NBPG;
- }
- addr = (vm_offset_t) p->p_addr;
- }
-#endif
-#ifdef mips
+ p->p_vmspace->vm_swrss =
+ p->p_vmspace->vm_pmap.pm_stats.resident_count;
/*
- * Be sure to save the floating point coprocessor state before
- * paging out the u-struct.
+ * and decrement the amount of needed space
*/
- {
- extern struct proc *machFPCurProcPtr;
+ swapinreq -= min(swapinreq, p->p_vmspace->vm_pmap.pm_stats.resident_count);
- if (p == machFPCurProcPtr) {
- MachSaveCurFPState(p);
- machFPCurProcPtr = (struct proc *)0;
- }
- }
-#endif
-#ifndef i386 /* temporary measure till we find spontaineous unwire of kstack */
- vm_map_pageable(kernel_map, addr, addr+size, TRUE);
- pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
-#endif
(void) splhigh();
p->p_flag &= ~P_INMEM;
if (p->p_stat == SRUN)
remrq(p);
(void) spl0();
+
+ ++p->p_lock;
+/* let the upages be paged */
+ pmap_remove(vm_map_pmap(kernel_map),
+ (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + UPAGES * NBPG);
+
+ vm_map_pageable(map, (vm_offset_t) kstack,
+ (vm_offset_t) kstack + UPAGES * NBPG, TRUE);
+
+ --p->p_lock;
p->p_swtime = 0;
}
@@ -525,6 +610,7 @@ swapout(p)
* The rest of these routines fake thread handling
*/
+#ifndef assert_wait
void
assert_wait(event, ruptible)
int event;
@@ -535,44 +621,38 @@ assert_wait(event, ruptible)
#endif
curproc->p_thread = event;
}
+#endif
void
-thread_block()
+thread_block(char *msg)
{
- int s = splhigh();
-
if (curproc->p_thread)
- sleep((caddr_t)curproc->p_thread, PVM);
- splx(s);
+ tsleep((caddr_t)curproc->p_thread, PVM, msg, 0);
}
+
void
-thread_sleep(event, lock, ruptible)
+thread_sleep_(event, lock, wmesg)
int event;
simple_lock_t lock;
- boolean_t ruptible;
+ char *wmesg;
{
-#ifdef lint
- ruptible++;
-#endif
- int s = splhigh();
curproc->p_thread = event;
simple_unlock(lock);
- if (curproc->p_thread)
- sleep((caddr_t)event, PVM);
- splx(s);
+ if (curproc->p_thread) {
+ tsleep((caddr_t)event, PVM, wmesg, 0);
+ }
}
+#ifndef thread_wakeup
void
thread_wakeup(event)
int event;
{
- int s = splhigh();
-
wakeup((caddr_t)event);
- splx(s);
}
+#endif
/*
* DEBUG stuff
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index 4874f9e..a0eac70 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -1,3 +1,4 @@
+
/*
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
@@ -79,7 +80,8 @@
* The start and end address of physical memory is passed in.
*/
-void vm_mem_init()
+void
+vm_mem_init()
{
extern vm_offset_t avail_start, avail_end;
extern vm_offset_t virtual_avail, virtual_end;
@@ -89,9 +91,9 @@ void vm_mem_init()
* From here on, all physical memory is accounted for,
* and we use only virtual addresses.
*/
- vm_set_page_size();
- vm_page_startup(&avail_start, &avail_end);
+ vm_set_page_size();
+ virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail);
/*
* Initialize other VM packages
*/
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 7e4db63..55a0949 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -292,9 +292,13 @@ kmem_malloc(map, size, canwait)
vm_map_lock(map);
if (vm_map_findspace(map, 0, size, &addr)) {
vm_map_unlock(map);
+#if 0
if (canwait) /* XXX should wait */
panic("kmem_malloc: %s too small",
map == kmem_map ? "kmem_map" : "mb_map");
+#endif
+ if (canwait)
+ panic("kmem_malloc: map too small");
return (0);
}
offset = addr - vm_map_min(kmem_map);
@@ -404,7 +408,7 @@ vm_offset_t kmem_alloc_wait(map, size)
}
assert_wait((int)map, TRUE);
vm_map_unlock(map);
- thread_block();
+ thread_block("kmaw");
}
vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
vm_map_unlock(map);
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
index d0d2c35..c032560 100644
--- a/sys/vm/vm_kern.h
+++ b/sys/vm/vm_kern.h
@@ -65,8 +65,10 @@
/* Kernel memory management definitions. */
vm_map_t buffer_map;
-vm_map_t exec_map;
vm_map_t kernel_map;
vm_map_t kmem_map;
vm_map_t mb_map;
+vm_map_t io_map;
+vm_map_t clean_map;
+vm_map_t pager_map;
vm_map_t phys_map;
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 425fe0d..ffffa96 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -73,6 +73,7 @@
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
/*
* Virtual memory maps provide for the mapping, protection,
@@ -137,6 +138,11 @@ vm_size_t kentry_data_size;
vm_map_entry_t kentry_free;
vm_map_t kmap_free;
+int kentry_count;
+vm_map_t kmap_free;
+static vm_offset_t mapvm=0;
+static int mapvmpgcnt=0;
+
static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
@@ -273,27 +279,71 @@ vm_map_init(map, min, max, pageable)
* Allocates a VM map entry for insertion.
* No entry fields are filled in. This routine is
*/
-vm_map_entry_t vm_map_entry_create(map)
+static struct vm_map_entry *mappool;
+static int mappoolcnt;
+void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
+
+vm_map_entry_t
+vm_map_entry_create(map)
vm_map_t map;
{
vm_map_entry_t entry;
-#ifdef DEBUG
- extern vm_map_t kernel_map, kmem_map, mb_map, pager_map;
- boolean_t isspecial;
-
- isspecial = (map == kernel_map || map == kmem_map ||
- map == mb_map || map == pager_map);
- if (isspecial && map->entries_pageable ||
- !isspecial && !map->entries_pageable)
- panic("vm_map_entry_create: bogus map");
-#endif
- if (map->entries_pageable) {
+ int s;
+ int i;
+#define KENTRY_LOW_WATER 64
+#define MAPENTRY_LOW_WATER 64
+
+ /*
+ * This is a *very* nasty (and sort of incomplete) hack!!!!
+ */
+ if (kentry_count < KENTRY_LOW_WATER) {
+ if (mapvmpgcnt && mapvm) {
+ vm_page_t m;
+ if (m = vm_page_alloc(kmem_object, mapvm-vm_map_min(kmem_map))) {
+ int newentries;
+ newentries = (NBPG/sizeof (struct vm_map_entry));
+ vm_page_wire(m);
+ m->flags &= ~PG_BUSY;
+ pmap_enter(vm_map_pmap(kmem_map), mapvm,
+ VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1);
+
+ entry = (vm_map_entry_t) mapvm;
+ mapvm += NBPG;
+ --mapvmpgcnt;
+
+ for (i = 0; i < newentries; i++) {
+ vm_map_entry_dispose(kernel_map, entry);
+ entry++;
+ }
+ }
+ }
+ }
+
+ if (map == kernel_map || map == kmem_map || map == pager_map) {
+
+ if (entry = kentry_free) {
+ kentry_free = entry->next;
+ --kentry_count;
+ return entry;
+ }
+
+ if (entry = mappool) {
+ mappool = entry->next;
+ --mappoolcnt;
+ return entry;
+ }
+
+ } else {
+ if (entry = mappool) {
+ mappool = entry->next;
+ --mappoolcnt;
+ return entry;
+ }
+
MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
M_VMMAPENT, M_WAITOK);
- } else {
- if (entry = kentry_free)
- kentry_free = kentry_free->next;
}
+dopanic:
if (entry == NULL)
panic("vm_map_entry_create: out of map entries");
@@ -305,25 +355,28 @@ vm_map_entry_t vm_map_entry_create(map)
*
* Inverse of vm_map_entry_create.
*/
-void vm_map_entry_dispose(map, entry)
+void
+vm_map_entry_dispose(map, entry)
vm_map_t map;
vm_map_entry_t entry;
{
-#ifdef DEBUG
- extern vm_map_t kernel_map, kmem_map, mb_map, pager_map;
- boolean_t isspecial;
-
- isspecial = (map == kernel_map || map == kmem_map ||
- map == mb_map || map == pager_map);
- if (isspecial && map->entries_pageable ||
- !isspecial && !map->entries_pageable)
- panic("vm_map_entry_dispose: bogus map");
-#endif
- if (map->entries_pageable) {
- FREE(entry, M_VMMAPENT);
- } else {
+ extern vm_map_t kernel_map, kmem_map, pager_map;
+ int s;
+
+ if (map == kernel_map || map == kmem_map || map == pager_map ||
+ kentry_count < KENTRY_LOW_WATER) {
entry->next = kentry_free;
kentry_free = entry;
+ ++kentry_count;
+ } else {
+ if (mappoolcnt < MAPENTRY_LOW_WATER) {
+ entry->next = mappool;
+ mappool = entry;
+ ++mappoolcnt;
+ return;
+ }
+
+ FREE(entry, M_VMMAPENT);
}
}
@@ -799,7 +852,7 @@ static void _vm_map_clip_start(map, entry, start)
* See if we can simplify this entry first
*/
- vm_map_simplify_entry(map, entry);
+ /* vm_map_simplify_entry(map, entry); */
/*
* Split off the front portion --
@@ -1130,7 +1183,7 @@ vm_map_pageable(map, start, end, new_pageable)
{
register vm_map_entry_t entry;
vm_map_entry_t start_entry;
- register vm_offset_t failed;
+ register vm_offset_t failed = 0;
int rv;
vm_map_lock(map);
@@ -2546,11 +2599,13 @@ void vm_map_simplify(map, start)
if (map->first_free == this_entry)
map->first_free = prev_entry;
- SAVE_HINT(map, prev_entry);
- vm_map_entry_unlink(map, this_entry);
- prev_entry->end = this_entry->end;
- vm_object_deallocate(this_entry->object.vm_object);
- vm_map_entry_dispose(map, this_entry);
+ if (!this_entry->object.vm_object->paging_in_progress) {
+ SAVE_HINT(map, prev_entry);
+ vm_map_entry_unlink(map, this_entry);
+ prev_entry->end = this_entry->end;
+ vm_object_deallocate(this_entry->object.vm_object);
+ vm_map_entry_dispose(map, this_entry);
+ }
}
vm_map_unlock(map);
}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index d25b7a2..ee253ef 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -102,11 +102,11 @@ struct vm_map_entry {
vm_offset_t end; /* end address */
union vm_map_object object; /* object I point to */
vm_offset_t offset; /* offset into object */
- boolean_t is_a_map; /* Is "object" a map? */
- boolean_t is_sub_map; /* Is "object" a submap? */
+ boolean_t is_a_map:1, /* Is "object" a map? */
+ is_sub_map:1, /* Is "object" a submap? */
/* Only in sharing maps: */
- boolean_t copy_on_write; /* is data copy-on-write */
- boolean_t needs_copy; /* does object need to be copied */
+ copy_on_write:1,/* is data copy-on-write */
+ needs_copy:1; /* does object need to be copied */
/* Only in task maps: */
vm_prot_t protection; /* protection code */
vm_prot_t max_protection; /* maximum protection */
@@ -176,7 +176,7 @@ typedef struct {
/* XXX: number of kernel maps and entries to statically allocate */
#define MAX_KMAP 10
-#define MAX_KMAPENT 500
+#define MAX_KMAPENT 128
#ifdef KERNEL
boolean_t vm_map_check_protection __P((vm_map_t,
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 9db6f50..2a8029b 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -95,6 +95,7 @@ loadav(avg)
/*
* Attributes associated with virtual memory.
*/
+int
vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
int *name;
u_int namelen;
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 340cded..2e7204a 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -217,8 +217,10 @@ mmap(p, uap, retval)
if (flags & MAP_FIXED) {
if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
return (EINVAL);
+#ifndef i386
if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
return (EINVAL);
+#endif
if (addr > addr + size)
return (EINVAL);
}
@@ -400,8 +402,10 @@ munmap(p, uap, retval)
*/
if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
return (EINVAL);
+#ifndef i386
if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
return (EINVAL);
+#endif
if (addr > addr + size)
return (EINVAL);
map = &p->p_vmspace->vm_map;
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index d11fa8b..a6419dc 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -72,6 +72,12 @@
#include <vm/vm.h>
#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+static void _vm_object_allocate(vm_size_t, vm_object_t);
+void vm_object_deactivate_pages(vm_object_t);
+void vm_object_cache_trim(void);
+void vm_object_remove(vm_pager_t);
/*
* Virtual memory objects maintain the actual data
@@ -99,26 +105,56 @@
*
*/
+
struct vm_object kernel_object_store;
struct vm_object kmem_object_store;
+extern int vm_cache_max;
#define VM_OBJECT_HASH_COUNT 157
-int vm_cache_max = 100; /* can patch if necessary */
-struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
+struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
long object_collapses = 0;
long object_bypasses = 0;
-static void _vm_object_allocate __P((vm_size_t, vm_object_t));
+static void
+_vm_object_allocate(size, object)
+ vm_size_t size;
+ register vm_object_t object;
+{
+ bzero(object, sizeof *object);
+ TAILQ_INIT(&object->memq);
+ vm_object_lock_init(object);
+ object->ref_count = 1;
+ object->resident_page_count = 0;
+ object->size = size;
+ object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */
+ object->paging_in_progress = 0;
+ object->copy = NULL;
+
+ /*
+ * Object starts out read-write, with no pager.
+ */
+
+ object->pager = NULL;
+ object->paging_offset = 0;
+ object->shadow = NULL;
+ object->shadow_offset = (vm_offset_t) 0;
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ cnt.v_nzfod += atop(size);
+ simple_unlock(&vm_object_list_lock);
+}
/*
* vm_object_init:
*
* Initialize the VM objects module.
*/
-void vm_object_init(size)
- vm_size_t size;
+void
+vm_object_init(vm_offset_t nothing)
{
register int i;
@@ -132,10 +168,12 @@ void vm_object_init(size)
TAILQ_INIT(&vm_object_hashtable[i]);
kernel_object = &kernel_object_store;
- _vm_object_allocate(size, kernel_object);
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ kernel_object);
kmem_object = &kmem_object_store;
- _vm_object_allocate(VM_KMEM_SIZE + VM_MBUF_SIZE, kmem_object);
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ kmem_object);
}
/*
@@ -144,55 +182,30 @@ void vm_object_init(size)
* Returns a new object with the given size.
*/
-vm_object_t vm_object_allocate(size)
+vm_object_t
+vm_object_allocate(size)
vm_size_t size;
{
register vm_object_t result;
+ int s;
result = (vm_object_t)
malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
+
_vm_object_allocate(size, result);
return(result);
}
-static void
-_vm_object_allocate(size, object)
- vm_size_t size;
- register vm_object_t object;
-{
- TAILQ_INIT(&object->memq);
- vm_object_lock_init(object);
- object->ref_count = 1;
- object->resident_page_count = 0;
- object->size = size;
- object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */
- object->paging_in_progress = 0;
- object->copy = NULL;
-
- /*
- * Object starts out read-write, with no pager.
- */
-
- object->pager = NULL;
- object->paging_offset = 0;
- object->shadow = NULL;
- object->shadow_offset = (vm_offset_t) 0;
-
- simple_lock(&vm_object_list_lock);
- TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
- vm_object_count++;
- cnt.v_nzfod += atop(size);
- simple_unlock(&vm_object_list_lock);
-}
/*
* vm_object_reference:
*
* Gets another reference to the given object.
*/
-void vm_object_reference(object)
+inline void
+vm_object_reference(object)
register vm_object_t object;
{
if (object == NULL)
@@ -214,8 +227,9 @@ void vm_object_reference(object)
*
* No object may be locked.
*/
-void vm_object_deallocate(object)
- register vm_object_t object;
+void
+vm_object_deallocate(object)
+ vm_object_t object;
{
vm_object_t temp;
@@ -235,11 +249,11 @@ void vm_object_deallocate(object)
vm_object_lock(object);
if (--(object->ref_count) != 0) {
+ vm_object_unlock(object);
/*
* If there are still references, then
* we are done.
*/
- vm_object_unlock(object);
vm_object_cache_unlock();
return;
}
@@ -257,7 +271,12 @@ void vm_object_deallocate(object)
vm_object_cached++;
vm_object_cache_unlock();
- vm_object_deactivate_pages(object);
+/*
+ * this code segment was removed because it kills performance with
+ * large -- repetively used binaries. The functionality now resides
+ * in the pageout daemon
+ * vm_object_deactivate_pages(object);
+ */
vm_object_unlock(object);
vm_object_cache_trim();
@@ -269,7 +288,7 @@ void vm_object_deallocate(object)
*/
vm_object_remove(object->pager);
vm_object_cache_unlock();
-
+
temp = object->shadow;
vm_object_terminate(object);
/* unlocks and deallocates object */
@@ -277,18 +296,19 @@ void vm_object_deallocate(object)
}
}
-
/*
* vm_object_terminate actually destroys the specified object, freeing
* up all previously used resources.
*
* The object must be locked.
*/
-void vm_object_terminate(object)
+void
+vm_object_terminate(object)
register vm_object_t object;
{
register vm_page_t p;
vm_object_t shadow_object;
+ int s;
/*
* Detach the object from its shadow if we are the shadow's
@@ -298,28 +318,68 @@ void vm_object_terminate(object)
vm_object_lock(shadow_object);
if (shadow_object->copy == object)
shadow_object->copy = NULL;
-#if 0
+/*
else if (shadow_object->copy != NULL)
panic("vm_object_terminate: copy/shadow inconsistency");
-#endif
+*/
vm_object_unlock(shadow_object);
}
/*
- * Wait until the pageout daemon is through with the object.
+ * Wait until the pageout daemon is through
+ * with the object.
*/
+
while (object->paging_in_progress) {
vm_object_sleep((int)object, object, FALSE);
vm_object_lock(object);
}
/*
- * If not an internal object clean all the pages, removing them
- * from paging queues as we go.
+ * While the paging system is locked,
+ * pull the object's pages off the active
+ * and inactive queues. This keeps the
+ * pageout daemon from playing with them
+ * during vm_pager_deallocate.
*
- * XXX need to do something in the event of a cleaning error.
+ * We can't free the pages yet, because the
+ * object's pager may have to write them out
+ * before deallocating the paging space.
+ */
+
+ for( p = object->memq.tqh_first; p; p=p->listq.tqe_next) {
+ VM_PAGE_CHECK(p);
+
+ vm_page_lock_queues();
+ s = splimp();
+ if (p->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ p->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+
+ if (p->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, p, pageq);
+ p->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ }
+ splx(s);
+ vm_page_unlock_queues();
+ }
+
+ vm_object_unlock(object);
+
+ if (object->paging_in_progress != 0)
+ panic("vm_object_deallocate: pageout in progress");
+
+ /*
+ * Clean and free the pages, as appropriate.
+ * All references to the object are gone,
+ * so we don't need to lock it.
*/
+
if ((object->flags & OBJ_INTERNAL) == 0) {
+ vm_object_lock(object);
(void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
vm_object_unlock(object);
}
@@ -335,23 +395,24 @@ void vm_object_terminate(object)
cnt.v_pfree++;
vm_page_unlock_queues();
}
- if ((object->flags & OBJ_INTERNAL) == 0)
- vm_object_unlock(object);
/*
- * Let the pager know object is dead.
+ * Let the pager know object is dead.
*/
+
if (object->pager != NULL)
vm_pager_deallocate(object->pager);
+
simple_lock(&vm_object_list_lock);
TAILQ_REMOVE(&vm_object_list, object, object_list);
vm_object_count--;
simple_unlock(&vm_object_list_lock);
/*
- * Free the space for the object.
+ * Free the space for the object.
*/
+
free((caddr_t)object, M_VMOBJ);
}
@@ -359,6 +420,69 @@ void vm_object_terminate(object)
* vm_object_page_clean
*
* Clean all dirty pages in the specified range of object.
+ * Leaves page on whatever queue it is currently on.
+ *
+ * Odd semantics: if start == end, we clean everything.
+ *
+ * The object must be locked.
+ */
+#if 1
+boolean_t
+vm_object_page_clean(object, start, end, syncio, de_queue)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ boolean_t syncio;
+ boolean_t de_queue;
+{
+ register vm_page_t p, nextp;
+ int s;
+ int size;
+
+ if (object->pager == NULL)
+ return 1;
+
+ if (start != end) {
+ start = trunc_page(start);
+ end = round_page(end);
+ }
+ size = end - start;
+
+again:
+ /*
+ * Wait until the pageout daemon is through with the object.
+ */
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ }
+
+ nextp = object->memq.tqh_first;
+ while ( (p = nextp) && ((start == end) || (size != 0) ) ) {
+ nextp = p->listq.tqe_next;
+ if (start == end || (p->offset >= start && p->offset < end)) {
+ if (p->flags & PG_BUSY)
+ continue;
+
+ size -= PAGE_SIZE;
+
+ if ((p->flags & PG_CLEAN)
+ && pmap_is_modified(VM_PAGE_TO_PHYS(p)))
+ p->flags &= ~PG_CLEAN;
+
+ if ((p->flags & PG_CLEAN) == 0) {
+ vm_pageout_clean(p,VM_PAGEOUT_FORCE);
+ goto again;
+ }
+ }
+ }
+ wakeup((caddr_t)object);
+ return 1;
+}
+#endif
+/*
+ * vm_object_page_clean
+ *
+ * Clean all dirty pages in the specified range of object.
* If syncio is TRUE, page cleaning is done synchronously.
* If de_queue is TRUE, pages are removed from any paging queue
* they were on, otherwise they are left on whatever queue they
@@ -372,6 +496,7 @@ void vm_object_terminate(object)
* somewhere. We attempt to clean (and dequeue) all pages regardless
* of where an error occurs.
*/
+#if 0
boolean_t
vm_object_page_clean(object, start, end, syncio, de_queue)
register vm_object_t object;
@@ -421,6 +546,7 @@ again:
* Loop through the object page list cleaning as necessary.
*/
for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ onqueue = 0;
if ((start == end || p->offset >= start && p->offset < end) &&
!(p->flags & PG_FICTITIOUS)) {
if ((p->flags & PG_CLEAN) &&
@@ -493,6 +619,7 @@ again:
}
return (noerror);
}
+#endif
/*
* vm_object_deactivate_pages
@@ -539,6 +666,7 @@ vm_object_cache_trim()
vm_object_cache_unlock();
}
+
/*
* vm_object_pmap_copy:
*
@@ -576,7 +704,8 @@ void vm_object_pmap_copy(object, start, end)
*
* The object must *not* be locked.
*/
-void vm_object_pmap_remove(object, start, end)
+void
+vm_object_pmap_remove(object, start, end)
register vm_object_t object;
register vm_offset_t start;
register vm_offset_t end;
@@ -587,9 +716,19 @@ void vm_object_pmap_remove(object, start, end)
return;
vm_object_lock(object);
- for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
- if ((start <= p->offset) && (p->offset < end))
+again:
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopmr", 0);
+ goto again;
+ }
pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ if ((p->flags & PG_CLEAN) == 0)
+ p->flags |= PG_LAUNDRY;
+ }
+ }
vm_object_unlock(object);
}
@@ -629,6 +768,7 @@ void vm_object_copy(src_object, src_offset, size,
return;
}
+
/*
* If the object's pager is null_pager or the
* default pager, we don't have to make a copy
@@ -637,7 +777,15 @@ void vm_object_copy(src_object, src_offset, size,
*/
vm_object_lock(src_object);
+
+ /*
+ * Try to collapse the object before copying it.
+ */
+
+ vm_object_collapse(src_object);
+
if (src_object->pager == NULL ||
+ src_object->pager->pg_type == PG_SWAP ||
(src_object->flags & OBJ_INTERNAL)) {
/*
@@ -664,10 +812,6 @@ void vm_object_copy(src_object, src_offset, size,
return;
}
- /*
- * Try to collapse the object before copying it.
- */
- vm_object_collapse(src_object);
/*
* If the object has a pager, the pager wants to
@@ -798,7 +942,8 @@ void vm_object_copy(src_object, src_offset, size,
* are returned in the source parameters.
*/
-void vm_object_shadow(object, offset, length)
+void
+vm_object_shadow(object, offset, length)
vm_object_t *object; /* IN/OUT */
vm_offset_t *offset; /* IN/OUT */
vm_size_t length;
@@ -843,7 +988,8 @@ void vm_object_shadow(object, offset, length)
* Set the specified object's pager to the specified pager.
*/
-void vm_object_setpager(object, pager, paging_offset,
+void
+vm_object_setpager(object, pager, paging_offset,
read_only)
vm_object_t object;
vm_pager_t pager;
@@ -852,9 +998,12 @@ void vm_object_setpager(object, pager, paging_offset,
{
#ifdef lint
read_only++; /* No longer used */
-#endif
+#endif lint
vm_object_lock(object); /* XXX ? */
+ if (object->pager && object->pager != pager) {
+ panic("!!!pager already allocated!!!\n");
+ }
object->pager = pager;
object->paging_offset = paging_offset;
vm_object_unlock(object); /* XXX ? */
@@ -865,7 +1014,7 @@ void vm_object_setpager(object, pager, paging_offset,
*/
#define vm_object_hash(pager) \
- (((unsigned)pager)%VM_OBJECT_HASH_COUNT)
+ (((unsigned)pager >> 5)%VM_OBJECT_HASH_COUNT)
/*
* vm_object_lookup looks in the object cache for an object with the
@@ -965,38 +1114,6 @@ vm_object_remove(pager)
}
}
-/*
- * vm_object_cache_clear removes all objects from the cache.
- *
- */
-
-void vm_object_cache_clear()
-{
- register vm_object_t object;
-
- /*
- * Remove each object in the cache by scanning down the
- * list of cached objects.
- */
- vm_object_cache_lock();
- while ((object = vm_object_cached_list.tqh_first) != NULL) {
- vm_object_cache_unlock();
-
- /*
- * Note: it is important that we use vm_object_lookup
- * to gain a reference, and not vm_object_reference, because
- * the logic for removing an object from the cache lies in
- * lookup.
- */
- if (object != vm_object_lookup(object->pager))
- panic("vm_object_cache_clear: I'm sooo confused.");
- pager_cache(object, FALSE);
-
- vm_object_cache_lock();
- }
- vm_object_cache_unlock();
-}
-
boolean_t vm_object_collapse_allowed = TRUE;
/*
* vm_object_collapse:
@@ -1008,8 +1125,12 @@ boolean_t vm_object_collapse_allowed = TRUE;
* Requires that the object be locked and the page
* queues be unlocked.
*
+ * This routine has significant changes by John S. Dyson
+ * to fix some swap memory leaks. 18 Dec 93
+ *
*/
-void vm_object_collapse(object)
+void
+vm_object_collapse(object)
register vm_object_t object;
{
@@ -1027,11 +1148,10 @@ void vm_object_collapse(object)
* Verify that the conditions are right for collapse:
*
* The object exists and no pages in it are currently
- * being paged out (or have ever been paged out).
+ * being paged out.
*/
if (object == NULL ||
- object->paging_in_progress != 0 ||
- object->pager != NULL)
+ object->paging_in_progress != 0)
return;
/*
@@ -1067,12 +1187,24 @@ void vm_object_collapse(object)
* parent object.
*/
if (backing_object->shadow != NULL &&
- backing_object->shadow->copy != NULL) {
+ backing_object->shadow->copy == backing_object) {
vm_object_unlock(backing_object);
return;
}
/*
+ * we can deal only with the swap pager
+ */
+ if ((object->pager &&
+ object->pager->pg_type != PG_SWAP) ||
+ (backing_object->pager &&
+ backing_object->pager->pg_type != PG_SWAP)) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+
+ /*
* We know that we can either collapse the backing
* object (if the parent is the only reference to
* it) or (perhaps) remove the parent's reference
@@ -1098,7 +1230,8 @@ void vm_object_collapse(object)
* pages that shadow them.
*/
- while ((p = backing_object->memq.tqh_first) != NULL) {
+ while (p = backing_object->memq.tqh_first) {
+
new_offset = (p->offset - backing_offset);
/*
@@ -1116,19 +1249,12 @@ void vm_object_collapse(object)
vm_page_unlock_queues();
} else {
pp = vm_page_lookup(object, new_offset);
- if (pp != NULL && !(pp->flags & PG_FAKE)) {
+ if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
+ object->paging_offset + new_offset))) {
vm_page_lock_queues();
vm_page_free(p);
vm_page_unlock_queues();
- }
- else {
- if (pp) {
- /* may be someone waiting for it */
- PAGE_WAKEUP(pp);
- vm_page_lock_queues();
- vm_page_free(pp);
- vm_page_unlock_queues();
- }
+ } else {
vm_page_rename(p, object, new_offset);
}
}
@@ -1136,19 +1262,50 @@ void vm_object_collapse(object)
/*
* Move the pager from backing_object to object.
- *
- * XXX We're only using part of the paging space
- * for keeps now... we ought to discard the
- * unused portion.
*/
if (backing_object->pager) {
- object->pager = backing_object->pager;
- object->paging_offset = backing_offset +
- backing_object->paging_offset;
- backing_object->pager = NULL;
+ backing_object->paging_in_progress++;
+ if (object->pager) {
+ vm_pager_t bopager;
+ object->paging_in_progress++;
+ /*
+ * copy shadow object pages into ours
+ * and destroy unneeded pages in shadow object.
+ */
+ bopager = backing_object->pager;
+ backing_object->pager = NULL;
+ vm_object_remove(backing_object->pager);
+ swap_pager_copy(
+ bopager, backing_object->paging_offset,
+ object->pager, object->paging_offset,
+ object->shadow_offset);
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t)object);
+ } else {
+ object->paging_in_progress++;
+ /*
+ * grab the shadow objects pager
+ */
+ object->pager = backing_object->pager;
+ object->paging_offset = backing_object->paging_offset + backing_offset;
+ vm_object_remove(backing_object->pager);
+ backing_object->pager = NULL;
+ /*
+ * free unnecessary blocks
+ */
+ swap_pager_freespace(object->pager, 0, object->paging_offset);
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t)object);
+ }
+ backing_object->paging_in_progress--;
+ if (backing_object->paging_in_progress == 0)
+ wakeup((caddr_t)backing_object);
}
+
/*
* Object now shadows whatever backing_object did.
* Note that the reference to backing_object->shadow
@@ -1173,7 +1330,7 @@ void vm_object_collapse(object)
simple_lock(&vm_object_list_lock);
TAILQ_REMOVE(&vm_object_list, backing_object,
- object_list);
+ object_list);
vm_object_count--;
simple_unlock(&vm_object_list_lock);
@@ -1204,9 +1361,7 @@ void vm_object_collapse(object)
* of pages here.
*/
- for (p = backing_object->memq.tqh_first;
- p != NULL;
- p = p->listq.tqe_next) {
+ for( p = backing_object->memq.tqh_first;p;p=p->listq.tqe_next) {
new_offset = (p->offset - backing_offset);
/*
@@ -1219,10 +1374,9 @@ void vm_object_collapse(object)
*/
if (p->offset >= backing_offset &&
- new_offset < size &&
- ((pp = vm_page_lookup(object, new_offset))
- == NULL ||
- (pp->flags & PG_FAKE))) {
+ new_offset <= size &&
+ ((pp = vm_page_lookup(object, new_offset)) == NULL || (pp->flags & PG_FAKE)) &&
+ (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset+new_offset))) {
/*
* Page still needed.
* Can't go any further.
@@ -1239,23 +1393,24 @@ void vm_object_collapse(object)
* count is at least 2.
*/
- object->shadow = backing_object->shadow;
- vm_object_reference(object->shadow);
+ vm_object_reference(object->shadow = backing_object->shadow);
object->shadow_offset += backing_object->shadow_offset;
/*
- * Backing object might have had a copy pointer
- * to us. If it did, clear it.
+ * Backing object might have had a copy pointer
+ * to us. If it did, clear it.
*/
if (backing_object->copy == object) {
backing_object->copy = NULL;
}
-
+
/* Drop the reference count on backing_object.
* Since its ref_count was at least 2, it
* will not vanish; so we don't need to call
* vm_object_deallocate.
*/
+ if (backing_object->ref_count == 1)
+ printf("should have called obj deallocate\n");
backing_object->ref_count--;
vm_object_unlock(backing_object);
@@ -1277,23 +1432,55 @@ void vm_object_collapse(object)
*
* The object must be locked.
*/
-void vm_object_page_remove(object, start, end)
+void
+vm_object_page_remove(object, start, end)
register vm_object_t object;
register vm_offset_t start;
register vm_offset_t end;
{
register vm_page_t p, next;
+ vm_offset_t size;
+ int cnt;
+ int s;
if (object == NULL)
return;
- for (p = object->memq.tqh_first; p != NULL; p = next) {
- next = p->listq.tqe_next;
- if ((start <= p->offset) && (p->offset < end)) {
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- vm_page_lock_queues();
- vm_page_free(p);
- vm_page_unlock_queues();
+ start = trunc_page(start);
+ end = round_page(end);
+again:
+ size = end-start;
+ if (size > 4*PAGE_SIZE || size >= object->size/4) {
+ for (p = object->memq.tqh_first; (p != NULL && size > 0); p = next) {
+ next = p->listq.tqe_next;
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopar", 0);
+ goto again;
+ }
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ size -= PAGE_SIZE;
+ }
+ }
+ } else {
+ while (size > 0) {
+ while (p = vm_page_lookup(object, start)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopar", 0);
+ goto again;
+ }
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ start += PAGE_SIZE;
+ size -= PAGE_SIZE;
}
}
}
@@ -1389,6 +1576,27 @@ boolean_t vm_object_coalesce(prev_object, next_object,
}
/*
+ * returns page after looking up in shadow chain
+ */
+
+vm_page_t
+vm_object_page_lookup(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ vm_page_t m;
+ if (!(m=vm_page_lookup(object, offset))) {
+ if (!object->shadow)
+ return 0;
+ else
+ return vm_object_page_lookup(object->shadow, offset + object->shadow_offset);
+ }
+ return m;
+}
+
+#define DEBUG
+#if defined(DEBUG) || (NDDB > 0)
+/*
* vm_object_print: [ debug ]
*/
void vm_object_print(object, full)
@@ -1434,3 +1642,4 @@ void vm_object_print(object, full)
printf("\n");
indent -= 2;
}
+#endif /* defined(DEBUG) || (NDDB > 0) */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 0cd9d87..38d320f 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
@@ -33,9 +33,11 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
- *
- *
+ * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
+ * $Id: vm_page.c,v 1.17 1994/04/20 07:07:14 davidg Exp $
+ */
+
+/*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
* All rights reserved.
*
@@ -68,6 +70,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
@@ -123,7 +126,6 @@ void vm_set_page_size()
break;
}
-
/*
* vm_page_startup:
*
@@ -133,17 +135,55 @@ void vm_set_page_size()
* for the object/offset-to-page hash table headers.
* Each page cell is initialized and placed on the free list.
*/
-void vm_page_startup(start, end)
- vm_offset_t *start;
- vm_offset_t *end;
+
+vm_offset_t
+vm_page_startup(starta, enda, vaddr)
+ register vm_offset_t starta;
+ vm_offset_t enda;
+ register vm_offset_t vaddr;
{
+ register vm_offset_t mapped;
register vm_page_t m;
- register struct pglist *bucket;
- vm_size_t npages;
+ register struct pglist *bucket;
+ vm_size_t npages, page_range;
+ register vm_offset_t new_start;
int i;
vm_offset_t pa;
+ int nblocks;
+ vm_offset_t first_managed_page;
+ int size;
+
extern vm_offset_t kentry_data;
extern vm_size_t kentry_data_size;
+ extern vm_offset_t phys_avail[];
+/* the biggest memory array is the second group of pages */
+ vm_offset_t start;
+ vm_offset_t biggestone, biggestsize;
+
+ vm_offset_t total;
+
+ total = 0;
+ biggestsize = 0;
+ biggestone = 0;
+ nblocks = 0;
+ vaddr = round_page(vaddr);
+
+ for (i = 0; phys_avail[i + 1]; i += 2) {
+ phys_avail[i] = round_page(phys_avail[i]);
+ phys_avail[i+1] = trunc_page(phys_avail[i+1]);
+ }
+
+ for (i = 0; phys_avail[i + 1]; i += 2) {
+ int size = phys_avail[i+1] - phys_avail[i];
+ if (size > biggestsize) {
+ biggestone = i;
+ biggestsize = size;
+ }
+ ++nblocks;
+ total += size;
+ }
+
+ start = phys_avail[biggestone];
/*
@@ -163,7 +203,7 @@ void vm_page_startup(start, end)
TAILQ_INIT(&vm_page_queue_inactive);
/*
- * Calculate the number of hash table buckets.
+ * Allocate (and initialize) the hash table buckets.
*
* The number of buckets MUST BE a power of 2, and
* the actual value is the next power of 2 greater
@@ -172,23 +212,31 @@ void vm_page_startup(start, end)
* Note:
* This computation can be tweaked if desired.
*/
-
+ vm_page_buckets = (struct pglist *)vaddr;
+ bucket = vm_page_buckets;
if (vm_page_bucket_count == 0) {
vm_page_bucket_count = 1;
- while (vm_page_bucket_count < atop(*end - *start))
+ while (vm_page_bucket_count < atop(total))
vm_page_bucket_count <<= 1;
}
+
vm_page_hash_mask = vm_page_bucket_count - 1;
/*
- * Allocate (and initialize) the hash table buckets.
+ * Validate these addresses.
*/
- vm_page_buckets = (struct pglist *)
- pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
- bucket = vm_page_buckets;
- for (i = vm_page_bucket_count; i--;) {
+ new_start = start + vm_page_bucket_count * sizeof(struct pglist);
+ new_start = round_page(new_start);
+ mapped = vaddr;
+ vaddr = pmap_map(mapped, start, new_start,
+ VM_PROT_READ|VM_PROT_WRITE);
+ start = new_start;
+ bzero((caddr_t) mapped, vaddr - mapped);
+ mapped = vaddr;
+
+ for (i = 0; i< vm_page_bucket_count; i++) {
TAILQ_INIT(bucket);
bucket++;
}
@@ -196,11 +244,9 @@ void vm_page_startup(start, end)
simple_lock_init(&bucket_lock);
/*
- * Truncate the remainder of physical memory to our page size.
+ * round (or truncate) the addresses to our page size.
*/
- *end = trunc_page(*end);
-
/*
* Pre-allocate maps and map entries that cannot be dynamically
* allocated via malloc(). The maps include the kernel_map and
@@ -213,9 +259,20 @@ void vm_page_startup(start, end)
* map (they should use their own maps).
*/
- kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
- MAX_KMAPENT*sizeof(struct vm_map_entry));
- kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
+ kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
+ MAX_KMAPENT * sizeof(struct vm_map_entry);
+ kentry_data_size = round_page(kentry_data_size);
+ kentry_data = (vm_offset_t) vaddr;
+ vaddr += kentry_data_size;
+
+ /*
+ * Validate these zone addresses.
+ */
+
+ new_start = start + (vaddr - mapped);
+ pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE);
+ bzero((caddr_t) mapped, (vaddr - mapped));
+ start = round_page(new_start);
/*
* Compute the number of pages of memory that will be
@@ -223,53 +280,53 @@ void vm_page_startup(start, end)
* of a page structure per page).
*/
- cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
- / (PAGE_SIZE + sizeof(struct vm_page));
+ npages = (total - (start - phys_avail[biggestone])) / (PAGE_SIZE + sizeof(struct vm_page));
+ first_page = phys_avail[0] / PAGE_SIZE;
+ page_range = (phys_avail[(nblocks-1)*2 + 1] - phys_avail[0]) / PAGE_SIZE;
/*
- * Record the extent of physical memory that the
- * virtual memory system manages.
+ * Initialize the mem entry structures now, and
+ * put them in the free queue.
*/
- first_page = *start;
- first_page += npages*sizeof(struct vm_page);
- first_page = atop(round_page(first_page));
- last_page = first_page + npages - 1;
-
- first_phys_addr = ptoa(first_page);
- last_phys_addr = ptoa(last_page) + PAGE_MASK;
+ vm_page_array = (vm_page_t) vaddr;
+ mapped = vaddr;
/*
- * Allocate and clear the mem entry structures.
+ * Validate these addresses.
*/
- m = vm_page_array = (vm_page_t)
- pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
+ new_start = round_page(start + page_range * sizeof (struct vm_page));
+ mapped = pmap_map(mapped, start, new_start,
+ VM_PROT_READ|VM_PROT_WRITE);
+ start = new_start;
+
+ first_managed_page = start / PAGE_SIZE;
/*
- * Initialize the mem entry structures now, and
- * put them in the free queue.
+ * Clear all of the page structures
*/
+ bzero((caddr_t)vm_page_array, page_range * sizeof(struct vm_page));
- pa = first_phys_addr;
- while (npages--) {
- m->flags = 0;
- m->object = NULL;
- m->phys_addr = pa;
-#ifdef i386
- if (pmap_isvalidphys(m->phys_addr)) {
+ cnt.v_page_count = 0;
+ cnt.v_free_count= 0;
+ for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
+ if (i == biggestone)
+ pa = ptoa(first_managed_page);
+ else
+ pa = phys_avail[i];
+ while (pa < phys_avail[i + 1] && npages-- > 0) {
+ ++cnt.v_page_count;
+ ++cnt.v_free_count;
+ m = PHYS_TO_VM_PAGE(pa);
+ m->flags = 0;
+ m->object = 0;
+ m->phys_addr = pa;
+ m->hold_count = 0;
TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
- } else {
- /* perhaps iomem needs it's own type, or dev pager? */
- m->flags |= PG_FICTITIOUS | PG_BUSY;
- cnt.v_free_count--;
+ pa += PAGE_SIZE;
}
-#else /* i386 */
- TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
-#endif /* i386 */
- m++;
- pa += PAGE_SIZE;
}
/*
@@ -278,8 +335,7 @@ void vm_page_startup(start, end)
*/
simple_lock_init(&vm_pages_needed_lock);
- /* from now on, pmap_bootstrap_alloc can't be used */
- vm_page_startup_initialized = TRUE;
+ return(mapped);
}
/*
@@ -289,8 +345,13 @@ void vm_page_startup(start, end)
*
* NOTE: This macro depends on vm_page_bucket_count being a power of 2.
*/
-#define vm_page_hash(object, offset) \
- (((unsigned)object+(unsigned)atop(offset))&vm_page_hash_mask)
+inline const int
+vm_page_hash(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ return ((unsigned)object + offset/NBPG) & vm_page_hash_mask;
+}
/*
* vm_page_insert: [ internal use only ]
@@ -307,7 +368,7 @@ void vm_page_insert(mem, object, offset)
register vm_offset_t offset;
{
register struct pglist *bucket;
- int spl;
+ int s;
VM_PAGE_CHECK(mem);
@@ -326,11 +387,11 @@ void vm_page_insert(mem, object, offset)
*/
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
- spl = splimp();
+ s = splimp();
simple_lock(&bucket_lock);
TAILQ_INSERT_TAIL(bucket, mem, hashq);
simple_unlock(&bucket_lock);
- (void) splx(spl);
+ (void) splx(s);
/*
* Now link into the object's list of backed pages.
@@ -361,7 +422,7 @@ void vm_page_remove(mem)
register vm_page_t mem;
{
register struct pglist *bucket;
- int spl;
+ int s;
VM_PAGE_CHECK(mem);
@@ -373,11 +434,11 @@ void vm_page_remove(mem)
*/
bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
- spl = splimp();
+ s = splimp();
simple_lock(&bucket_lock);
TAILQ_REMOVE(bucket, mem, hashq);
simple_unlock(&bucket_lock);
- (void) splx(spl);
+ (void) splx(s);
/*
* Now remove from the object's list of backed pages.
@@ -410,7 +471,7 @@ vm_page_t vm_page_lookup(object, offset)
{
register vm_page_t mem;
register struct pglist *bucket;
- int spl;
+ int s;
/*
* Search the hash table for this object/offset pair
@@ -418,19 +479,19 @@ vm_page_t vm_page_lookup(object, offset)
bucket = &vm_page_buckets[vm_page_hash(object, offset)];
- spl = splimp();
+ s = splimp();
simple_lock(&bucket_lock);
for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
VM_PAGE_CHECK(mem);
if ((mem->object == object) && (mem->offset == offset)) {
simple_unlock(&bucket_lock);
- splx(spl);
+ splx(s);
return(mem);
}
}
simple_unlock(&bucket_lock);
- splx(spl);
+ splx(s);
return(NULL);
}
@@ -465,46 +526,62 @@ void vm_page_rename(mem, new_object, new_offset)
*
* Object must be locked.
*/
-vm_page_t vm_page_alloc(object, offset)
+vm_page_t
+vm_page_alloc(object, offset)
vm_object_t object;
vm_offset_t offset;
{
register vm_page_t mem;
- int spl;
+ int s;
- spl = splimp(); /* XXX */
+ s = splimp();
simple_lock(&vm_page_queue_free_lock);
- if (vm_page_queue_free.tqh_first == NULL) {
+ if ( object != kernel_object &&
+ object != kmem_object &&
+ curproc != pageproc && curproc != &proc0 &&
+ cnt.v_free_count < cnt.v_free_reserved) {
+
simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
+ splx(s);
+ /*
+ * this wakeup seems unnecessary, but there is code that
+ * might just check to see if there are free pages, and
+ * punt if there aren't. VM_WAIT does this too, but
+ * redundant wakeups aren't that bad...
+ */
+ if (curproc != pageproc)
+ wakeup((caddr_t) &vm_pages_needed);
+ return(NULL);
+ }
+ if (( mem = vm_page_queue_free.tqh_first) == 0) {
+ simple_unlock(&vm_page_queue_free_lock);
+ printf("No pages???\n");
+ splx(s);
+ /*
+ * comment above re: wakeups applies here too...
+ */
+ if (curproc != pageproc)
+ wakeup((caddr_t) &vm_pages_needed);
return(NULL);
}
- mem = vm_page_queue_free.tqh_first;
TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
cnt.v_free_count--;
simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
VM_PAGE_INIT(mem, object, offset);
+ splx(s);
- /*
- * Decide if we should poke the pageout daemon.
- * We do this if the free count is less than the low
- * water mark, or if the free count is less than the high
- * water mark (but above the low water mark) and the inactive
- * count is less than its target.
- *
- * We don't have the counts locked ... if they change a little,
- * it doesn't really matter.
- */
+/*
+ * don't wakeup too often, so we wakeup the pageout daemon when
+ * we would be nearly out of memory.
+ */
+ if (curproc != pageproc &&
+ (cnt.v_free_count < cnt.v_free_reserved))
+ wakeup((caddr_t) &vm_pages_needed);
- if (cnt.v_free_count < cnt.v_free_min ||
- (cnt.v_free_count < cnt.v_free_target &&
- cnt.v_inactive_count < cnt.v_inactive_target))
- thread_wakeup((int)&vm_pages_needed);
- return (mem);
+ return(mem);
}
/*
@@ -518,6 +595,8 @@ vm_page_t vm_page_alloc(object, offset)
void vm_page_free(mem)
register vm_page_t mem;
{
+ int s;
+ s = splimp();
vm_page_remove(mem);
if (mem->flags & PG_ACTIVE) {
TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
@@ -532,18 +611,46 @@ void vm_page_free(mem)
}
if (!(mem->flags & PG_FICTITIOUS)) {
- int spl;
- spl = splimp();
simple_lock(&vm_page_queue_free_lock);
+ if (mem->wire_count) {
+ cnt.v_wire_count--;
+ mem->wire_count = 0;
+ }
TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
cnt.v_free_count++;
simple_unlock(&vm_page_queue_free_lock);
- splx(spl);
+ splx(s);
+ /*
+ * if pageout daemon needs pages, then tell it that there
+ * are some free.
+ */
+ if (vm_pageout_pages_needed)
+ wakeup((caddr_t)&vm_pageout_pages_needed);
+
+ /*
+ * wakeup processes that are waiting on memory if we
+ * hit a high water mark.
+ */
+ if (cnt.v_free_count == cnt.v_free_min) {
+ wakeup((caddr_t)&cnt.v_free_count);
+ }
+
+ /*
+ * wakeup scheduler process if we have lots of memory.
+ * this process will swapin processes.
+ */
+ if (cnt.v_free_count == cnt.v_free_target) {
+ wakeup((caddr_t)&proc0);
+ }
+ } else {
+ splx(s);
}
+ wakeup((caddr_t) mem);
}
+
/*
* vm_page_wire:
*
@@ -556,9 +663,11 @@ void vm_page_free(mem)
void vm_page_wire(mem)
register vm_page_t mem;
{
+ int s;
VM_PAGE_CHECK(mem);
if (mem->wire_count == 0) {
+ s = splimp();
if (mem->flags & PG_ACTIVE) {
TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
cnt.v_active_count--;
@@ -569,6 +678,7 @@ void vm_page_wire(mem)
cnt.v_inactive_count--;
mem->flags &= ~PG_INACTIVE;
}
+ splx(s);
cnt.v_wire_count++;
}
mem->wire_count++;
@@ -585,17 +695,77 @@ void vm_page_wire(mem)
void vm_page_unwire(mem)
register vm_page_t mem;
{
+ int s;
VM_PAGE_CHECK(mem);
- mem->wire_count--;
+ s = splimp();
+
+ if( mem->wire_count)
+ mem->wire_count--;
if (mem->wire_count == 0) {
TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
cnt.v_active_count++;
mem->flags |= PG_ACTIVE;
cnt.v_wire_count--;
}
+ splx(s);
}
+#if 0
+/*
+ * vm_page_deactivate:
+ *
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page. [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(m)
+ register vm_page_t m;
+{
+ int spl;
+ VM_PAGE_CHECK(m);
+
+ /*
+ * Only move active pages -- ignore locked or already
+ * inactive ones.
+ *
+ * XXX: sometimes we get pages which aren't wired down
+ * or on any queue - we need to put them on the inactive
+ * queue also, otherwise we lose track of them.
+ * Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
+ */
+
+ spl = splimp();
+ if (!(m->flags & PG_INACTIVE) && m->wire_count == 0 &&
+ m->hold_count == 0) {
+
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ if (m->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ m->flags |= PG_INACTIVE;
+ cnt.v_inactive_count++;
+#define NOT_DEACTIVATE_PROTECTS
+#ifndef NOT_DEACTIVATE_PROTECTS
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+#else
+ if ((m->flags & PG_CLEAN) &&
+ pmap_is_modified(VM_PAGE_TO_PHYS(m)))
+ m->flags &= ~PG_CLEAN;
+#endif
+ if ((m->flags & PG_CLEAN) == 0)
+ m->flags |= PG_LAUNDRY;
+ }
+ splx(spl);
+}
+#endif
+#if 1
/*
* vm_page_deactivate:
*
@@ -608,14 +778,16 @@ void vm_page_unwire(mem)
void vm_page_deactivate(m)
register vm_page_t m;
{
+ int s;
VM_PAGE_CHECK(m);
+ s = splimp();
/*
* Only move active pages -- ignore locked or already
* inactive ones.
*/
- if (m->flags & PG_ACTIVE) {
+ if ((m->flags & PG_ACTIVE) && (m->hold_count == 0)) {
pmap_clear_reference(VM_PAGE_TO_PHYS(m));
TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
@@ -623,15 +795,21 @@ void vm_page_deactivate(m)
m->flags |= PG_INACTIVE;
cnt.v_active_count--;
cnt.v_inactive_count++;
+#define NOT_DEACTIVATE_PROTECTS
+#ifndef NOT_DEACTIVATE_PROTECTS
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+#else
if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
m->flags &= ~PG_CLEAN;
+#endif
if (m->flags & PG_CLEAN)
m->flags &= ~PG_LAUNDRY;
else
m->flags |= PG_LAUNDRY;
}
+ splx(s);
}
-
+#endif
/*
* vm_page_activate:
*
@@ -643,8 +821,10 @@ void vm_page_deactivate(m)
void vm_page_activate(m)
register vm_page_t m;
{
+ int s;
VM_PAGE_CHECK(m);
+ s = splimp();
if (m->flags & PG_INACTIVE) {
TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
cnt.v_inactive_count--;
@@ -656,8 +836,12 @@ void vm_page_activate(m)
TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
m->flags |= PG_ACTIVE;
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ m->act_count = 10;
cnt.v_active_count++;
}
+ splx(s);
}
/*
@@ -668,12 +852,12 @@ void vm_page_activate(m)
* be used by the zero-fill object.
*/
-boolean_t vm_page_zero_fill(m)
+boolean_t
+vm_page_zero_fill(m)
vm_page_t m;
{
VM_PAGE_CHECK(m);
- m->flags &= ~PG_CLEAN;
pmap_zero_page(VM_PAGE_TO_PHYS(m));
return(TRUE);
}
@@ -683,14 +867,13 @@ boolean_t vm_page_zero_fill(m)
*
* Copy one page to another
*/
-
-void vm_page_copy(src_m, dest_m)
+void
+vm_page_copy(src_m, dest_m)
vm_page_t src_m;
vm_page_t dest_m;
{
VM_PAGE_CHECK(src_m);
VM_PAGE_CHECK(dest_m);
- dest_m->flags &= ~PG_CLEAN;
pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 8bf5146..e8049c4 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -107,6 +107,8 @@ struct vm_page {
u_short wire_count; /* wired down maps refs (P) */
u_short flags; /* see below */
+ short hold_count; /* page hold count */
+ u_short act_count; /* page usage count */
vm_offset_t phys_addr; /* physical address of page */
};
@@ -209,7 +211,7 @@ simple_lock_data_t vm_page_queue_free_lock;
(m)->flags &= ~PG_BUSY; \
if ((m)->flags & PG_WANTED) { \
(m)->flags &= ~PG_WANTED; \
- thread_wakeup((int) (m)); \
+ wakeup((caddr_t) (m)); \
} \
}
@@ -222,6 +224,8 @@ simple_lock_data_t vm_page_queue_free_lock;
(mem)->flags = PG_BUSY | PG_CLEAN | PG_FAKE; \
vm_page_insert((mem), (object), (offset)); \
(mem)->wire_count = 0; \
+ (mem)->hold_count = 0; \
+ (mem)->act_count = 0; \
}
void vm_page_activate __P((vm_page_t));
@@ -233,10 +237,32 @@ void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t));
vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t));
void vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t));
-void vm_page_startup __P((vm_offset_t *, vm_offset_t *));
+vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
void vm_page_unwire __P((vm_page_t));
void vm_page_wire __P((vm_page_t));
boolean_t vm_page_zero_fill __P((vm_page_t));
+
+/*
+ * Keep page from being freed by the page daemon
+ * much of the same effect as wiring, except much lower
+ * overhead and should be used only for *very* temporary
+ * holding ("wiring").
+ */
+static inline void
+vm_page_hold(mem)
+ vm_page_t mem;
+{
+ mem->hold_count++;
+}
+
+static inline void
+vm_page_unhold(mem)
+ vm_page_t mem;
+{
+ if( --mem->hold_count < 0)
+ panic("vm_page_unhold: hold count < 0!!!");
+}
+
#endif /* KERNEL */
#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 6795405..202bf03 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1,6 +1,10 @@
/*
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
@@ -33,7 +37,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
+ * @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
*
*
* Copyright (c) 1987, 1990 Carnegie-Mellon University.
@@ -60,6 +64,8 @@
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
+ *
+ * $Id: vm_pageout.c,v 1.20 1994/04/20 07:07:15 davidg Exp $
*/
/*
@@ -67,501 +73,802 @@
*/
#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
-#ifndef VM_PAGE_FREE_MIN
-#define VM_PAGE_FREE_MIN (cnt.v_free_count / 20)
-#endif
+extern vm_map_t kmem_map;
+int vm_pages_needed; /* Event on which pageout daemon sleeps */
+int vm_pagescanner; /* Event on which pagescanner sleeps */
+int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */
+
+int vm_pageout_pages_needed = 0; /* flag saying that the pageout daemon needs pages */
+int vm_page_pagesfreed;
-#ifndef VM_PAGE_FREE_TARGET
-#define VM_PAGE_FREE_TARGET ((cnt.v_free_min * 4) / 3)
-#endif
+extern int npendingio;
+extern int hz;
+int vm_pageout_proc_limit;
+extern int nswiodone;
+extern int swap_pager_full;
+extern int swap_pager_ready();
-int vm_page_free_min_min = 16 * 1024;
-int vm_page_free_min_max = 256 * 1024;
+#define MAXREF 32767
-int vm_pages_needed; /* Event on which pageout daemon sleeps */
+#define MAXSCAN 512 /* maximum number of pages to scan in active queue */
+ /* set the "clock" hands to be (MAXSCAN * 4096) Bytes */
+#define ACT_DECLINE 1
+#define ACT_ADVANCE 6
+#define ACT_MAX 300
+
+#define LOWATER ((2048*1024)/NBPG)
+
+#define VM_PAGEOUT_PAGE_COUNT 8
+static vm_offset_t vm_space_needed;
+int vm_pageout_req_do_stats;
int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
-#ifdef CLUSTERED_PAGEOUT
-#define MAXPOCLUSTER (MAXPHYS/NBPG) /* XXX */
-int doclustered_pageout = 1;
-#endif
/*
- * vm_pageout_scan does the dirty work for the pageout daemon.
+ * vm_pageout_clean:
+ * cleans a vm_page
*/
-void
-vm_pageout_scan()
+int
+vm_pageout_clean(m, sync)
+ register vm_page_t m;
+ int sync;
{
- register vm_page_t m, next;
- register int page_shortage;
- register int s;
- register int pages_freed;
- int free;
- vm_object_t object;
+ /*
+ * Clean the page and remove it from the
+ * laundry.
+ *
+ * We set the busy bit to cause
+ * potential page faults on this page to
+ * block.
+ *
+ * And we set pageout-in-progress to keep
+ * the object from disappearing during
+ * pageout. This guarantees that the
+ * page won't move from the inactive
+ * queue. (However, any other page on
+ * the inactive queue may move!)
+ */
+
+ register vm_object_t object;
+ register vm_pager_t pager;
+ int pageout_status[VM_PAGEOUT_PAGE_COUNT];
+ vm_page_t ms[VM_PAGEOUT_PAGE_COUNT];
+ int pageout_count;
+ int anyok=0;
+ int i;
+ vm_offset_t offset = m->offset;
+
+ object = m->object;
+ if (!object) {
+ printf("pager: object missing\n");
+ return 0;
+ }
/*
- * Only continue when we want more pages to be "free"
+ * Try to collapse the object before
+ * making a pager for it. We must
+ * unlock the page queues first.
+ * We try to defer the creation of a pager
+ * until all shadows are not paging. This
+ * allows vm_object_collapse to work better and
+ * helps control swap space size.
+ * (J. Dyson 11 Nov 93)
*/
- cnt.v_rev++;
+ if (!object->pager &&
+ cnt.v_free_count < vm_pageout_free_min)
+ return 0;
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
- free = cnt.v_free_count;
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
+ if (!object->pager &&
+ object->shadow &&
+ object->shadow->paging_in_progress)
+ return 0;
- if (free < cnt.v_free_target) {
- swapout_threads();
+ if( !sync) {
+ if (object->shadow) {
+ vm_object_collapse(object);
+ if (!vm_page_lookup(object, offset))
+ return 0;
+ }
- /*
- * Be sure the pmap system is updated so
- * we can scan the inactive queue.
- */
+ if ((m->flags & PG_BUSY) || (m->hold_count != 0)) {
+ return 0;
+ }
+ }
+
+ pageout_count = 1;
+ ms[0] = m;
+
+ if( pager = object->pager) {
+ for(i=1;i<VM_PAGEOUT_PAGE_COUNT;i++) {
+ if( ms[i] = vm_page_lookup( object, offset+i*NBPG)) {
+ if((((ms[i]->flags & (PG_CLEAN|PG_INACTIVE|PG_BUSY)) == PG_INACTIVE)
+ || (( ms[i]->flags & PG_CLEAN) == 0 && sync == VM_PAGEOUT_FORCE))
+ && (ms[i]->wire_count == 0)
+ && (ms[i]->hold_count == 0))
+ pageout_count++;
+ else
+ break;
+ } else
+ break;
+ }
+ for(i=0;i<pageout_count;i++) {
+ ms[i]->flags |= PG_BUSY;
+ pmap_page_protect(VM_PAGE_TO_PHYS(ms[i]), VM_PROT_READ);
+ }
+ object->paging_in_progress += pageout_count;
+ cnt.v_pageouts += pageout_count;
+ } else {
+
+ m->flags |= PG_BUSY;
- pmap_update();
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ);
+
+ cnt.v_pageouts++;
+
+ object->paging_in_progress++;
+
+ pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
+ object->size, VM_PROT_ALL, 0);
+ if (pager != NULL) {
+ vm_object_setpager(object, pager, 0, FALSE);
+ }
}
/*
- * Acquire the resident page system lock,
- * as we may be changing what's resident quite a bit.
+ * If there is no pager for the page,
+ * use the default pager. If there's
+ * no place to put the page at the
+ * moment, leave it in the laundry and
+ * hope that there will be paging space
+ * later.
*/
- vm_page_lock_queues();
- /*
- * Start scanning the inactive queue for pages we can free.
- * We keep scanning until we have enough free pages or
- * we have scanned through the entire queue. If we
- * encounter dirty pages, we start cleaning them.
- */
+ if ((pager && pager->pg_type == PG_SWAP) ||
+ cnt.v_free_count >= vm_pageout_free_min) {
+ if( pageout_count == 1) {
+ pageout_status[0] = pager ?
+ vm_pager_put(pager, m,
+ ((sync || (object == kernel_object)) ? TRUE: FALSE)) :
+ VM_PAGER_FAIL;
+ } else {
+ if( !pager) {
+ for(i=0;i<pageout_count;i++)
+ pageout_status[i] = VM_PAGER_FAIL;
+ } else {
+ vm_pager_put_pages(pager, ms, pageout_count,
+ ((sync || (object == kernel_object)) ? TRUE : FALSE),
+ pageout_status);
+ }
+ }
+
+ } else {
+ for(i=0;i<pageout_count;i++)
+ pageout_status[i] = VM_PAGER_FAIL;
+ }
- pages_freed = 0;
- for (m = vm_page_queue_inactive.tqh_first; m != NULL; m = next) {
- s = splimp();
- simple_lock(&vm_page_queue_free_lock);
- free = cnt.v_free_count;
- simple_unlock(&vm_page_queue_free_lock);
- splx(s);
- if (free >= cnt.v_free_target)
+ for(i=0;i<pageout_count;i++) {
+ switch (pageout_status[i]) {
+ case VM_PAGER_OK:
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ++anyok;
+ break;
+ case VM_PAGER_PEND:
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ++anyok;
+ break;
+ case VM_PAGER_BAD:
+ /*
+ * Page outside of range of object.
+ * Right now we essentially lose the
+ * changes by pretending it worked.
+ */
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ms[i]->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i]));
+ break;
+ case VM_PAGER_ERROR:
+ case VM_PAGER_FAIL:
+ /*
+ * If page couldn't be paged out, then
+ * reactivate the page so it doesn't
+ * clog the inactive list. (We will
+ * try paging out it again later).
+ */
+ if (ms[i]->flags & PG_INACTIVE)
+ vm_page_activate(ms[i]);
+ break;
+ case VM_PAGER_AGAIN:
break;
+ }
- cnt.v_scan++;
- next = m->pageq.tqe_next;
/*
- * If the page has been referenced, move it back to the
- * active queue.
+ * If the operation is still going, leave
+ * the page busy to block all other accesses.
+ * Also, leave the paging in progress
+ * indicator set so that we don't attempt an
+ * object collapse.
*/
- if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
- vm_page_activate(m);
- cnt.v_reactivated++;
- continue;
+ if (pageout_status[i] != VM_PAGER_PEND) {
+ PAGE_WAKEUP(ms[i]);
+ if (--object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i]));
+ if( ms[i]->flags & PG_INACTIVE)
+ vm_page_activate(ms[i]);
+ }
}
+ }
+ return anyok;
+}
+/*
+ * vm_pageout_object_deactivate_pages
+ *
+ * deactivate enough pages to satisfy the inactive target
+ * requirements or if vm_page_proc_limit is set, then
+ * deactivate all of the pages in the object and its
+ * shadows.
+ *
+ * The object and map must be locked.
+ */
+int
+vm_pageout_object_deactivate_pages(map, object, count)
+ vm_map_t map;
+ vm_object_t object;
+ int count;
+{
+ register vm_page_t p, next;
+ int rcount;
+ int s;
+ int dcount;
+
+ dcount = 0;
+ if (count == 0)
+ count = 1;
+
+ if (object->shadow) {
+ int scount = count;
+ if( object->shadow->ref_count > 1)
+ scount /= object->shadow->ref_count;
+ if( scount)
+ dcount += vm_pageout_object_deactivate_pages(map, object->shadow, scount);
+ }
+
+ if (object->paging_in_progress)
+ return dcount;
+
+ /*
+ * scan the objects entire memory queue
+ */
+ rcount = object->resident_page_count;
+ p = object->memq.tqh_first;
+ while (p && (rcount-- > 0)) {
+ next = p->listq.tqe_next;
+ vm_page_lock_queues();
/*
- * If the page is clean, free it up.
+ * if a page is active, not wired and is in the processes pmap,
+ * then deactivate the page.
*/
- if (m->flags & PG_CLEAN) {
- object = m->object;
- if (vm_object_lock_try(object)) {
- pmap_page_protect(VM_PAGE_TO_PHYS(m),
- VM_PROT_NONE);
- vm_page_free(m);
- pages_freed++;
- cnt.v_dfree++;
- vm_object_unlock(object);
+ if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE &&
+ p->wire_count == 0 &&
+ p->hold_count == 0 &&
+ pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
+ if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) {
+ p->act_count -= min(p->act_count, ACT_DECLINE);
+ /*
+ * if the page act_count is zero -- then we deactivate
+ */
+ if (!p->act_count) {
+ vm_page_deactivate(p);
+ pmap_page_protect(VM_PAGE_TO_PHYS(p),
+ VM_PROT_NONE);
+ /*
+ * else if on the next go-around we will deactivate the page
+ * we need to place the page on the end of the queue to age
+ * the other pages in memory.
+ */
+ } else {
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
+ TAILQ_REMOVE(&object->memq, p, listq);
+ TAILQ_INSERT_TAIL(&object->memq, p, listq);
+ }
+ /*
+ * see if we are done yet
+ */
+ if (p->flags & PG_INACTIVE) {
+ --count;
+ ++dcount;
+ if (count <= 0 &&
+ cnt.v_inactive_count > cnt.v_inactive_target) {
+ vm_page_unlock_queues();
+ return dcount;
+ }
+ }
+
+ } else {
+ /*
+ * Move the page to the bottom of the queue.
+ */
+ pmap_clear_reference(VM_PAGE_TO_PHYS(p));
+ if (p->act_count < ACT_MAX)
+ p->act_count += ACT_ADVANCE;
+
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
+ TAILQ_REMOVE(&object->memq, p, listq);
+ TAILQ_INSERT_TAIL(&object->memq, p, listq);
}
- continue;
}
+ vm_page_unlock_queues();
+ p = next;
+ }
+ return dcount;
+}
+
+
+/*
+ * deactivate some number of pages in a map, try to do it fairly, but
+ * that is really hard to do.
+ */
+
+void
+vm_pageout_map_deactivate_pages(map, entry, count, freeer)
+ vm_map_t map;
+ vm_map_entry_t entry;
+ int *count;
+ int (*freeer)(vm_map_t, vm_object_t, int);
+{
+ vm_map_t tmpm;
+ vm_map_entry_t tmpe;
+ vm_object_t obj;
+ if (*count <= 0)
+ return;
+ vm_map_reference(map);
+ if (!lock_try_read(&map->lock)) {
+ vm_map_deallocate(map);
+ return;
+ }
+ if (entry == 0) {
+ tmpe = map->header.next;
+ while (tmpe != &map->header && *count > 0) {
+ vm_pageout_map_deactivate_pages(map, tmpe, count, freeer);
+ tmpe = tmpe->next;
+ };
+ } else if (entry->is_sub_map || entry->is_a_map) {
+ tmpm = entry->object.share_map;
+ tmpe = tmpm->header.next;
+ while (tmpe != &tmpm->header && *count > 0) {
+ vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer);
+ tmpe = tmpe->next;
+ };
+ } else if (obj = entry->object.vm_object) {
+ *count -= (*freeer)(map, obj, *count);
+ }
+ lock_read_done(&map->lock);
+ vm_map_deallocate(map);
+ return;
+}
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ */
+int
+vm_pageout_scan()
+{
+ vm_page_t m;
+ int page_shortage, maxscan, maxlaunder;
+ int pages_freed, free, nproc;
+ int desired_free;
+ vm_page_t next;
+ struct proc *p;
+ vm_object_t object;
+ int s;
+ int force_wakeup = 0;
+
+morefree:
+ /*
+ * scan the processes for exceeding their rlimits or if process
+ * is swapped out -- deactivate pages
+ */
+
+rescanproc1:
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ vm_offset_t size;
+ int overage;
+ vm_offset_t limit;
+
/*
- * If the page is dirty but already being washed, skip it.
+ * if this is a system process or if we have already
+ * looked at this process, skip it.
*/
- if ((m->flags & PG_LAUNDRY) == 0)
+ if (p->p_flag & (P_SYSTEM|P_WEXIT)) {
continue;
+ }
/*
- * Otherwise the page is dirty and still in the laundry,
- * so we start the cleaning operation and remove it from
- * the laundry.
+ * if the process is in a non-running type state,
+ * don't touch it.
*/
- object = m->object;
- if (!vm_object_lock_try(object))
+ if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
continue;
- cnt.v_pageouts++;
-#ifdef CLUSTERED_PAGEOUT
- if (object->pager &&
- vm_pager_cancluster(object->pager, PG_CLUSTERPUT))
- vm_pageout_cluster(m, object);
- else
-#endif
- vm_pageout_page(m, object);
- thread_wakeup((int) object);
- vm_object_unlock(object);
+ }
+
/*
- * Former next page may no longer even be on the inactive
- * queue (due to potential blocking in the pager with the
- * queues unlocked). If it isn't, we just start over.
+ * get a limit
*/
- if (next && (next->flags & PG_INACTIVE) == 0)
- next = vm_page_queue_inactive.tqh_first;
- }
-
- /*
- * Compute the page shortage. If we are still very low on memory
- * be sure that we will move a minimal amount of pages from active
- * to inactive.
- */
-
- page_shortage = cnt.v_inactive_target - cnt.v_inactive_count;
- if (page_shortage <= 0 && pages_freed == 0)
- page_shortage = 1;
-
- while (page_shortage > 0) {
+ limit = min(p->p_rlimit[RLIMIT_RSS].rlim_cur,
+ p->p_rlimit[RLIMIT_RSS].rlim_max);
+
/*
- * Move some more pages from active to inactive.
+ * let processes that are swapped out really be swapped out
+ * set the limit to nothing (will force a swap-out.)
*/
+ if ((p->p_flag & P_INMEM) == 0)
+ limit = 0;
+
+ size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG;
+ if (size >= limit) {
+ overage = (size - limit) / NBPG;
+ vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
+ (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages);
+ }
- if ((m = vm_page_queue_active.tqh_first) == NULL)
- break;
- vm_page_deactivate(m);
- page_shortage--;
}
- vm_page_unlock_queues();
-}
+ if (((cnt.v_free_count + cnt.v_inactive_count) >=
+ (cnt.v_inactive_target + cnt.v_free_target)) &&
+ (cnt.v_free_count >= cnt.v_free_target))
+ return force_wakeup;
-/*
- * Called with object and page queues locked.
- * If reactivate is TRUE, a pager error causes the page to be
- * put back on the active queue, ow it is left on the inactive queue.
- */
-void
-vm_pageout_page(m, object)
- vm_page_t m;
- vm_object_t object;
-{
- vm_pager_t pager;
- int pageout_status;
+ pages_freed = 0;
+ desired_free = cnt.v_free_target;
/*
- * We set the busy bit to cause potential page faults on
- * this page to block.
- *
- * We also set pageout-in-progress to keep the object from
- * disappearing during pageout. This guarantees that the
- * page won't move from the inactive queue. (However, any
- * other page on the inactive queue may move!)
+ * Start scanning the inactive queue for pages we can free.
+ * We keep scanning until we have enough free pages or
+ * we have scanned through the entire queue. If we
+ * encounter dirty pages, we start cleaning them.
*/
- pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
- m->flags |= PG_BUSY;
- /*
- * Try to collapse the object before making a pager for it.
- * We must unlock the page queues first.
- */
- vm_page_unlock_queues();
- if (object->pager == NULL)
- vm_object_collapse(object);
+ maxlaunder = (cnt.v_free_target - cnt.v_free_count);
+ maxscan = cnt.v_inactive_count;
+rescan1:
+ m = vm_page_queue_inactive.tqh_first;
+ while (m && (maxscan-- > 0) &&
+ (cnt.v_free_count < desired_free) ) {
+ vm_page_t next;
- object->paging_in_progress++;
- vm_object_unlock(object);
+ next = m->pageq.tqe_next;
- /*
- * Do a wakeup here in case the following operations block.
- */
- thread_wakeup((int) &cnt.v_free_count);
+ if( (m->flags & PG_INACTIVE) == 0) {
+ printf("vm_pageout_scan: page not inactive?");
+ continue;
+ }
- /*
- * If there is no pager for the page, use the default pager.
- * If there is no place to put the page at the moment,
- * leave it in the laundry and hope that there will be
- * paging space later.
- */
- if ((pager = object->pager) == NULL) {
- pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
- VM_PROT_ALL, (vm_offset_t)0);
- if (pager != NULL)
- vm_object_setpager(object, pager, 0, FALSE);
- }
- pageout_status = pager ? vm_pager_put(pager, m, FALSE) : VM_PAGER_FAIL;
- vm_object_lock(object);
- vm_page_lock_queues();
-
- switch (pageout_status) {
- case VM_PAGER_OK:
- case VM_PAGER_PEND:
- cnt.v_pgpgout++;
- m->flags &= ~PG_LAUNDRY;
- break;
- case VM_PAGER_BAD:
/*
- * Page outside of range of object. Right now we
- * essentially lose the changes by pretending it
- * worked.
- *
- * XXX dubious, what should we do?
+ * activate held pages
*/
- m->flags &= ~PG_LAUNDRY;
- m->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(m));
- break;
- case VM_PAGER_AGAIN:
- {
- extern int lbolt;
+ if (m->hold_count != 0) {
+ vm_page_activate(m);
+ m = next;
+ continue;
+ }
/*
- * FAIL on a write is interpreted to mean a resource
- * shortage, so we put pause for awhile and try again.
- * XXX could get stuck here.
+ * dont mess with busy pages
*/
- (void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
- break;
- }
- case VM_PAGER_FAIL:
- case VM_PAGER_ERROR:
+ if (m->flags & PG_BUSY) {
+ m = next;
+ continue;
+ }
+
/*
- * If page couldn't be paged out, then reactivate
- * the page so it doesn't clog the inactive list.
- * (We will try paging out it again later).
+ * if page is clean and but the page has been referenced,
+ * then reactivate the page, but if we are very low on memory
+ * or the page has not been referenced, then we free it to the
+ * vm system.
*/
- vm_page_activate(m);
- cnt.v_reactivated++;
- break;
- }
+ if (m->flags & PG_CLEAN) {
+ if ((cnt.v_free_count > vm_pageout_free_min) /* XXX */
+ && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ vm_page_activate(m);
+ } else if (!m->act_count) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(m),
+ VM_PROT_NONE);
+ vm_page_free(m);
+ ++pages_freed;
+ } else {
+ m->act_count -= min(m->act_count, ACT_DECLINE);
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ }
+ } else if ((m->flags & PG_LAUNDRY) && maxlaunder > 0) {
+ int written;
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ vm_page_activate(m);
+ m = next;
+ continue;
+ }
+ /*
+ * If a page is dirty, then it is either
+ * being washed (but not yet cleaned)
+ * or it is still in the laundry. If it is
+ * still in the laundry, then we start the
+ * cleaning operation.
+ */
- pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ if (written = vm_pageout_clean(m,0)) {
+ maxlaunder -= written;
+ }
+ /*
+ * if the next page has been re-activated, start scanning again
+ */
+ if (next && (next->flags & PG_INACTIVE) == 0)
+ goto rescan1;
+ } else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ vm_page_activate(m);
+ }
+ m = next;
+ }
/*
- * If the operation is still going, leave the page busy
- * to block all other accesses. Also, leave the paging
- * in progress indicator set so that we don't attempt an
- * object collapse.
+ * now check malloc area or swap processes out if we are in low
+ * memory conditions
*/
- if (pageout_status != VM_PAGER_PEND) {
- m->flags &= ~PG_BUSY;
- PAGE_WAKEUP(m);
- object->paging_in_progress--;
+ if (cnt.v_free_count <= cnt.v_free_min) {
+ /*
+ * swap out inactive processes
+ */
+ swapout_threads();
}
-}
-
-#ifdef CLUSTERED_PAGEOUT
-#define PAGEOUTABLE(p) \
- ((((p)->flags & (PG_INACTIVE|PG_CLEAN|PG_LAUNDRY)) == \
- (PG_INACTIVE|PG_LAUNDRY)) && !pmap_is_referenced(VM_PAGE_TO_PHYS(p)))
-
-/*
- * Attempt to pageout as many contiguous (to ``m'') dirty pages as possible
- * from ``object''. Using information returned from the pager, we assemble
- * a sorted list of contiguous dirty pages and feed them to the pager in one
- * chunk. Called with paging queues and object locked. Also, object must
- * already have a pager.
- */
-void
-vm_pageout_cluster(m, object)
- vm_page_t m;
- vm_object_t object;
-{
- vm_offset_t offset, loff, hoff;
- vm_page_t plist[MAXPOCLUSTER], *plistp, p;
- int postatus, ix, count;
/*
- * Determine the range of pages that can be part of a cluster
- * for this object/offset. If it is only our single page, just
- * do it normally.
+ * Compute the page shortage. If we are still very low on memory
+ * be sure that we will move a minimal amount of pages from active
+ * to inactive.
*/
- vm_pager_cluster(object->pager, m->offset, &loff, &hoff);
- if (hoff - loff == PAGE_SIZE) {
- vm_pageout_page(m, object);
- return;
+
+ page_shortage = cnt.v_inactive_target -
+ (cnt.v_free_count + cnt.v_inactive_count);
+
+ if (page_shortage <= 0) {
+ if (pages_freed == 0) {
+ if( cnt.v_free_count < cnt.v_free_min) {
+ page_shortage = cnt.v_free_min - cnt.v_free_count;
+ } else if(((cnt.v_free_count + cnt.v_inactive_count) <
+ (cnt.v_free_min + cnt.v_inactive_target))) {
+ page_shortage = 1;
+ } else {
+ page_shortage = 0;
+ }
+ }
+
}
- plistp = plist;
+ maxscan = cnt.v_active_count;
+ m = vm_page_queue_active.tqh_first;
+ while (m && maxscan-- && (page_shortage > 0)) {
- /*
- * Target page is always part of the cluster.
- */
- pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
- m->flags |= PG_BUSY;
- plistp[atop(m->offset - loff)] = m;
- count = 1;
+ next = m->pageq.tqe_next;
- /*
- * Backup from the given page til we find one not fulfilling
- * the pageout criteria or we hit the lower bound for the
- * cluster. For each page determined to be part of the
- * cluster, unmap it and busy it out so it won't change.
- */
- ix = atop(m->offset - loff);
- offset = m->offset;
- while (offset > loff && count < MAXPOCLUSTER-1) {
- p = vm_page_lookup(object, offset - PAGE_SIZE);
- if (p == NULL || !PAGEOUTABLE(p))
- break;
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- p->flags |= PG_BUSY;
- plistp[--ix] = p;
- offset -= PAGE_SIZE;
- count++;
+ /*
+ * Don't deactivate pages that are busy.
+ */
+ if ((m->flags & PG_BUSY) || (m->hold_count != 0)) {
+ m = next;
+ continue;
+ }
+
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ if (m->act_count < ACT_MAX)
+ m->act_count += ACT_ADVANCE;
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ } else {
+ m->act_count -= min(m->act_count, ACT_DECLINE);
+
+ /*
+ * if the page act_count is zero -- then we deactivate
+ */
+ if (!m->act_count) {
+ vm_page_deactivate(m);
+ --page_shortage;
+ /*
+ * else if on the next go-around we will deactivate the page
+ * we need to place the page on the end of the queue to age
+ * the other pages in memory.
+ */
+ } else {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ }
+ }
+
+ m = next;
}
- plistp += atop(offset - loff);
- loff = offset;
/*
- * Now do the same moving forward from the target.
+ * if we have not freed any pages and we are desparate for memory
+ * then we keep trying until we get some (any) memory.
*/
- ix = atop(m->offset - loff) + 1;
- offset = m->offset + PAGE_SIZE;
- while (offset < hoff && count < MAXPOCLUSTER) {
- p = vm_page_lookup(object, offset);
- if (p == NULL || !PAGEOUTABLE(p))
- break;
- pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
- p->flags |= PG_BUSY;
- plistp[ix++] = p;
- offset += PAGE_SIZE;
- count++;
+
+ if( !force_wakeup && (swap_pager_full || !force_wakeup ||
+ (pages_freed == 0 && (cnt.v_free_count < cnt.v_free_min)))){
+ vm_pager_sync();
+ force_wakeup = 1;
+ goto morefree;
}
- hoff = offset;
+ vm_page_pagesfreed += pages_freed;
+ return force_wakeup;
+}
- /*
- * Pageout the page.
- * Unlock everything and do a wakeup prior to the pager call
- * in case it blocks.
- */
- vm_page_unlock_queues();
- object->paging_in_progress++;
- vm_object_unlock(object);
-again:
- thread_wakeup((int) &cnt.v_free_count);
- postatus = vm_pager_put_pages(object->pager, plistp, count, FALSE);
- /*
- * XXX rethink this
- */
- if (postatus == VM_PAGER_AGAIN) {
- extern int lbolt;
+void
+vm_pagescan()
+{
+ int maxscan, pages_scanned, pages_referenced, nextscan, scantick = hz/20;
+ int m_ref, next_ref;
+ vm_page_t m, next;
- (void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
- goto again;
- } else if (postatus == VM_PAGER_BAD)
- panic("vm_pageout_cluster: VM_PAGER_BAD");
- vm_object_lock(object);
- vm_page_lock_queues();
+ (void) spl0();
+
+ nextscan = scantick;
+
+scanloop:
+
+ pages_scanned = 0;
+ pages_referenced = 0;
+ maxscan = min(cnt.v_active_count, MAXSCAN);
/*
- * Loop through the affected pages, reflecting the outcome of
- * the operation.
+ * Gather statistics on page usage.
*/
- for (ix = 0; ix < count; ix++) {
- p = *plistp++;
- switch (postatus) {
- case VM_PAGER_OK:
- case VM_PAGER_PEND:
- cnt.v_pgpgout++;
- p->flags &= ~PG_LAUNDRY;
- break;
- case VM_PAGER_FAIL:
- case VM_PAGER_ERROR:
- /*
- * Pageout failed, reactivate the target page so it
- * doesn't clog the inactive list. Other pages are
- * left as they are.
- */
- if (p == m) {
- vm_page_activate(p);
- cnt.v_reactivated++;
- }
- break;
- }
- pmap_clear_reference(VM_PAGE_TO_PHYS(p));
+ m = vm_page_queue_active.tqh_first;
+ while (m && (maxscan-- > 0)) {
+
+ ++pages_scanned;
+
+ next = m->pageq.tqe_next;
+
/*
- * If the operation is still going, leave the page busy
- * to block all other accesses.
+ * Dont mess with pages that are busy.
*/
- if (postatus != VM_PAGER_PEND) {
- p->flags &= ~PG_BUSY;
- PAGE_WAKEUP(p);
+ if ((m->flags & PG_BUSY) || (m->hold_count != 0)) {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ m = next;
+ continue;
+ }
+ /*
+ * Advance pages that have been referenced, decline pages that
+ * have not.
+ */
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ pages_referenced++;
+ if (m->act_count < ACT_MAX)
+ m->act_count += ACT_ADVANCE;
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ } else {
+ m->act_count -= min(m->act_count, ACT_DECLINE);
+ /*
+ * if the page act_count is zero, and we are low on mem -- then we deactivate
+ */
+ if (!m->act_count &&
+ (cnt.v_free_count+cnt.v_inactive_count < cnt.v_free_target+cnt.v_inactive_target )) {
+ vm_page_deactivate(m);
+ /*
+ * else if on the next go-around we will deactivate the page
+ * we need to place the page on the end of the queue to age
+ * the other pages in memory.
+ */
+ } else {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ }
}
+ m = next;
}
- /*
- * If the operation is still going, leave the paging in progress
- * indicator set so that we don't attempt an object collapse.
- */
- if (postatus != VM_PAGER_PEND)
- object->paging_in_progress--;
+ if (pages_referenced) {
+ nextscan = (pages_scanned / pages_referenced) * scantick;
+ nextscan = max(nextscan, scantick);
+ nextscan = min(nextscan, hz);
+ } else
+ nextscan = hz;
+ tsleep((caddr_t) &vm_pagescanner, PVM, "scanw", nextscan);
+
+ goto scanloop;
}
-#endif
/*
* vm_pageout is the high level pageout daemon.
*/
-
-void vm_pageout()
+void
+vm_pageout()
{
+ extern npendingio, swiopend;
+ static nowakeup;
(void) spl0();
/*
* Initialize some paging parameters.
*/
- if (cnt.v_free_min == 0) {
- cnt.v_free_min = VM_PAGE_FREE_MIN;
- vm_page_free_min_min /= cnt.v_page_size;
- vm_page_free_min_max /= cnt.v_page_size;
- if (cnt.v_free_min < vm_page_free_min_min)
- cnt.v_free_min = vm_page_free_min_min;
- if (cnt.v_free_min > vm_page_free_min_max)
- cnt.v_free_min = vm_page_free_min_max;
- }
-
- if (cnt.v_free_target == 0)
- cnt.v_free_target = VM_PAGE_FREE_TARGET;
-
- if (cnt.v_free_target <= cnt.v_free_min)
- cnt.v_free_target = cnt.v_free_min + 1;
-
- /* XXX does not really belong here */
+vmretry:
+ cnt.v_free_min = 12;
+ cnt.v_free_reserved = 8;
+ if (cnt.v_free_min < 8)
+ cnt.v_free_min = 8;
+ if (cnt.v_free_min > 32)
+ cnt.v_free_min = 32;
+ vm_pageout_free_min = 4;
+ cnt.v_free_target = 2*cnt.v_free_min + cnt.v_free_reserved;
+ cnt.v_inactive_target = cnt.v_free_count / 12;
+ cnt.v_free_min += cnt.v_free_reserved;
+
+ /* XXX does not really belong here */
if (vm_page_max_wired == 0)
vm_page_max_wired = cnt.v_free_count / 3;
+
+ (void) swap_pager_alloc(0, 0, 0, 0);
+
/*
* The pageout daemon is never done, so loop
* forever.
*/
-
- simple_lock(&vm_pages_needed_lock);
while (TRUE) {
- thread_sleep((int) &vm_pages_needed, &vm_pages_needed_lock,
- FALSE);
- /*
- * Compute the inactive target for this scan.
- * We need to keep a reasonable amount of memory in the
- * inactive list to better simulate LRU behavior.
- */
- cnt.v_inactive_target =
- (cnt.v_active_count + cnt.v_inactive_count) / 3;
- if (cnt.v_inactive_target <= cnt.v_free_target)
- cnt.v_inactive_target = cnt.v_free_target + 1;
-
+ int force_wakeup;
+ extern struct loadavg averunnable;
+/*
+ cnt.v_free_min = 12 + averunnable.ldavg[0] / 1024;
+ cnt.v_free_target = 2*cnt.v_free_min + cnt.v_free_reserved;
+ cnt.v_inactive_target = cnt.v_free_target*2;
+*/
+
+ tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0);
+
+ vm_pager_sync();
/*
- * Only make a scan if we are likely to do something.
- * Otherwise we might have been awakened by a pager
- * to clean up async pageouts.
+ * The force wakeup hack added to eliminate delays and potiential
+ * deadlock. It was possible for the page daemon to indefintely
+ * postpone waking up a process that it might be waiting for memory
+ * on. The putmulti stuff seems to have aggravated the situation.
*/
- if (cnt.v_free_count < cnt.v_free_target ||
- cnt.v_inactive_count < cnt.v_inactive_target)
- vm_pageout_scan();
+ force_wakeup = vm_pageout_scan();
vm_pager_sync();
- simple_lock(&vm_pages_needed_lock);
- thread_wakeup((int) &cnt.v_free_count);
+ if( force_wakeup)
+ wakeup( (caddr_t) &cnt.v_free_count);
+ cnt.v_scan++;
+ wakeup((caddr_t) kmem_map);
}
}
+
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index a82a0ea..834aee5 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -72,7 +72,11 @@
extern int vm_pages_needed; /* should be some "event" structure */
simple_lock_data_t vm_pages_needed_lock;
+extern int vm_pageout_pages_needed;
+#define VM_PAGEOUT_ASYNC 0
+#define VM_PAGEOUT_SYNC 1
+#define VM_PAGEOUT_FORCE 2
/*
* Exported routines.
@@ -82,15 +86,27 @@ simple_lock_data_t vm_pages_needed_lock;
* Signal pageout-daemon and wait for it.
*/
-#define VM_WAIT { \
- simple_lock(&vm_pages_needed_lock); \
- thread_wakeup((int)&vm_pages_needed); \
- thread_sleep((int)&cnt.v_free_count, \
- &vm_pages_needed_lock, FALSE); \
- }
+#define VM_WAIT vm_wait()
+
+inline static void vm_wait() {
+ extern struct proc *curproc, *pageproc;
+ int s;
+ s = splhigh();
+ if (curproc == pageproc) {
+ vm_pageout_pages_needed = 1;
+ tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0);
+ vm_pageout_pages_needed = 0;
+ } else {
+ wakeup((caddr_t) &vm_pages_needed);
+ tsleep((caddr_t) &cnt.v_free_count, PVM, "vmwait", 0);
+ }
+ splx(s);
+}
+
+
#ifdef KERNEL
void vm_pageout __P((void));
-void vm_pageout_scan __P((void));
+int vm_pageout_scan __P((void));
void vm_pageout_page __P((vm_page_t, vm_object_t));
void vm_pageout_cluster __P((vm_page_t, vm_object_t));
#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index 7123abb..1e4b201 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -75,34 +75,14 @@
#include <vm/vm_page.h>
#include <vm/vm_kern.h>
-#ifdef SWAPPAGER
extern struct pagerops swappagerops;
-#endif
-
-#ifdef VNODEPAGER
extern struct pagerops vnodepagerops;
-#endif
-
-#ifdef DEVPAGER
extern struct pagerops devicepagerops;
-#endif
struct pagerops *pagertab[] = {
-#ifdef SWAPPAGER
&swappagerops, /* PG_SWAP */
-#else
- NULL,
-#endif
-#ifdef VNODEPAGER
&vnodepagerops, /* PG_VNODE */
-#else
- NULL,
-#endif
-#ifdef DEVPAGER
&devicepagerops, /* PG_DEV */
-#else
- NULL,
-#endif
};
int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
@@ -118,6 +98,7 @@ struct pagerops *dfltpagerops = NULL; /* default pager */
*/
#define PAGER_MAP_SIZE (4 * 1024 * 1024)
+int pager_map_size = PAGER_MAP_SIZE;
vm_map_t pager_map;
boolean_t pager_map_wanted;
vm_offset_t pager_sva, pager_eva;
@@ -130,8 +111,10 @@ vm_pager_init()
/*
* Allocate a kernel submap for tracking get/put page mappings
*/
+/*
pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
PAGER_MAP_SIZE, FALSE);
+*/
/*
* Initialize known pagers
*/
@@ -173,38 +156,61 @@ vm_pager_deallocate(pager)
(*pager->pg_ops->pgo_dealloc)(pager);
}
+
int
-vm_pager_get_pages(pager, mlist, npages, sync)
+vm_pager_get_pages(pager, m, count, reqpage, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t *m;
+ int count;
+ int reqpage;
boolean_t sync;
{
- int rv;
+ extern boolean_t vm_page_zero_fill();
+ extern int vm_pageout_count;
+ int i;
if (pager == NULL) {
- rv = VM_PAGER_OK;
- while (npages--)
- if (!vm_page_zero_fill(*mlist)) {
- rv = VM_PAGER_FAIL;
- break;
- } else
- mlist++;
- return (rv);
+ for (i=0;i<count;i++) {
+ if( i != reqpage) {
+ PAGE_WAKEUP(m[i]);
+ vm_page_free(m[i]);
+ }
+ }
+ vm_page_zero_fill(m[reqpage]);
+ return VM_PAGER_OK;
+ }
+
+ if( pager->pg_ops->pgo_getpages == 0) {
+ for(i=0;i<count;i++) {
+ if( i != reqpage) {
+ PAGE_WAKEUP(m[i]);
+ vm_page_free(m[i]);
+ }
+ }
+ return(VM_PAGER_GET(pager, m[reqpage], sync));
+ } else {
+ return(VM_PAGER_GET_MULTI(pager, m, count, reqpage, sync));
}
- return ((*pager->pg_ops->pgo_getpages)(pager, mlist, npages, sync));
}
int
-vm_pager_put_pages(pager, mlist, npages, sync)
+vm_pager_put_pages(pager, m, count, sync, rtvals)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t *m;
+ int count;
boolean_t sync;
+ int *rtvals;
{
- if (pager == NULL)
- panic("vm_pager_put_pages: null pager");
- return ((*pager->pg_ops->pgo_putpages)(pager, mlist, npages, sync));
+ int i;
+
+ if( pager->pg_ops->pgo_putpages)
+ return(VM_PAGER_PUT_MULTI(pager, m, count, sync, rtvals));
+ else {
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_PUT( pager, m[i], sync);
+ }
+ return rtvals[0];
+ }
}
boolean_t
@@ -228,9 +234,10 @@ vm_pager_sync()
for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
if (pgops)
- (*(*pgops)->pgo_putpages)(NULL, NULL, 0, FALSE);
+ (*(*pgops)->pgo_putpage)(NULL, NULL, 0);
}
+#if 0
void
vm_pager_cluster(pager, offset, loff, hoff)
vm_pager_t pager;
@@ -242,91 +249,25 @@ vm_pager_cluster(pager, offset, loff, hoff)
panic("vm_pager_cluster: null pager");
return ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
}
-
-void
-vm_pager_clusternull(pager, offset, loff, hoff)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loff;
- vm_offset_t *hoff;
-{
- panic("vm_pager_nullcluster called");
-}
+#endif
vm_offset_t
-vm_pager_map_pages(mlist, npages, canwait)
- vm_page_t *mlist;
- int npages;
- boolean_t canwait;
+vm_pager_map_page(m)
+ vm_page_t m;
{
- vm_offset_t kva, va;
- vm_size_t size;
- vm_page_t m;
+ vm_offset_t kva;
- /*
- * Allocate space in the pager map, if none available return 0.
- * This is basically an expansion of kmem_alloc_wait with optional
- * blocking on no space.
- */
- size = npages * PAGE_SIZE;
- vm_map_lock(pager_map);
- while (vm_map_findspace(pager_map, 0, size, &kva)) {
- if (!canwait) {
- vm_map_unlock(pager_map);
- return (0);
- }
- pager_map_wanted = TRUE;
- vm_map_unlock(pager_map);
- (void) tsleep(pager_map, PVM, "pager_map", 0);
- vm_map_lock(pager_map);
- }
- vm_map_insert(pager_map, NULL, 0, kva, kva + size);
- vm_map_unlock(pager_map);
-
- for (va = kva; npages--; va += PAGE_SIZE) {
- m = *mlist++;
-#ifdef DEBUG
- if ((m->flags & PG_BUSY) == 0)
- panic("vm_pager_map_pages: page not busy");
- if (m->flags & PG_PAGEROWNED)
- panic("vm_pager_map_pages: page already in pager");
-#endif
-#ifdef DEBUG
- m->flags |= PG_PAGEROWNED;
-#endif
- pmap_enter(vm_map_pmap(pager_map), va, VM_PAGE_TO_PHYS(m),
- VM_PROT_DEFAULT, TRUE);
- }
- return (kva);
+ kva = kmem_alloc_wait(pager_map, PAGE_SIZE);
+ pmap_enter(vm_map_pmap(pager_map), kva, VM_PAGE_TO_PHYS(m),
+ VM_PROT_DEFAULT, TRUE);
+ return(kva);
}
void
-vm_pager_unmap_pages(kva, npages)
+vm_pager_unmap_page(kva)
vm_offset_t kva;
- int npages;
{
- vm_size_t size = npages * PAGE_SIZE;
-
-#ifdef DEBUG
- vm_offset_t va;
- vm_page_t m;
- int np = npages;
-
- for (va = kva; np--; va += PAGE_SIZE) {
- m = vm_pager_atop(va);
- if (m->flags & PG_PAGEROWNED)
- m->flags &= ~PG_PAGEROWNED;
- else
- printf("vm_pager_unmap_pages: %x(%x/%x) not owned\n",
- m, va, VM_PAGE_TO_PHYS(m));
- }
-#endif
- pmap_remove(vm_map_pmap(pager_map), kva, kva + size);
- vm_map_lock(pager_map);
- (void) vm_map_delete(pager_map, kva, kva + size);
- if (pager_map_wanted)
- wakeup(pager_map);
- vm_map_unlock(pager_map);
+ kmem_free_wakeup(pager_map, kva, PAGE_SIZE);
}
vm_page_t
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index e4659c2..3e20e50 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -1,3 +1,4 @@
+
/*
* Copyright (c) 1990 University of Utah.
* Copyright (c) 1991, 1993
@@ -74,17 +75,26 @@ struct pagerops {
__P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
void (*pgo_dealloc) /* Disassociate. */
__P((vm_pager_t));
+ int (*pgo_getpage)
+ __P((vm_pager_t, vm_page_t, boolean_t));
int (*pgo_getpages) /* Get (read) page. */
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t *, int, int, boolean_t));
+ int (*pgo_putpage)
+ __P((vm_pager_t, vm_page_t, boolean_t));
int (*pgo_putpages) /* Put (write) page. */
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t *, int, boolean_t, int *));
boolean_t (*pgo_haspage) /* Does pager have page? */
__P((vm_pager_t, vm_offset_t));
- void (*pgo_cluster) /* Return range of cluster. */
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
};
+#define VM_PAGER_ALLOC(h, s, p, o) (*(pg)->pg_ops->pgo_alloc)(h, s, p, o)
+#define VM_PAGER_DEALLOC(pg) (*(pg)->pg_ops->pgo_dealloc)(pg)
+#define VM_PAGER_GET(pg, m, s) (*(pg)->pg_ops->pgo_getpage)(pg, m, s)
+#define VM_PAGER_GET_MULTI(pg, m, c, r, s) (*(pg)->pg_ops->pgo_getpages)(pg, m, c, r, s)
+#define VM_PAGER_PUT(pg, m, s) (*(pg)->pg_ops->pgo_putpage)(pg, m, s)
+#define VM_PAGER_PUT_MULTI(pg, m, c, s, rtval) (*(pg)->pg_ops->pgo_putpages)(pg, m, c, s, rtval)
+#define VM_PAGER_HASPAGE(pg, o) (*(pg)->pg_ops->pgo_haspage)(pg, o)
+
/*
* get/put return values
* OK operation was successful
@@ -107,21 +117,15 @@ extern struct pagerops *dfltpagerops;
vm_pager_t vm_pager_allocate
__P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
vm_page_t vm_pager_atop __P((vm_offset_t));
-void vm_pager_cluster
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
-void vm_pager_clusternull
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
void vm_pager_deallocate __P((vm_pager_t));
int vm_pager_get_pages
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t *, int, int, boolean_t));
boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t));
void vm_pager_init __P((void));
vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t));
vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
int vm_pager_put_pages
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+ __P((vm_pager_t, vm_page_t *, int, boolean_t, int *));
void vm_pager_sync __P((void));
void vm_pager_unmap_pages __P((vm_offset_t, int));
@@ -134,13 +138,16 @@ void vm_pager_unmap_pages __P((vm_offset_t, int));
({ \
vm_page_t ml[1]; \
ml[0] = (m); \
- vm_pager_get_pages(p, ml, 1, s); \
+ vm_pager_get_pages(p, ml, 1, 0, s); \
})
+
#define vm_pager_put(p, m, s) \
({ \
+ int rtval; \
vm_page_t ml[1]; \
ml[0] = (m); \
- vm_pager_put_pages(p, ml, 1, s); \
+ vm_pager_put_pages(p, ml, 1, s, &rtval); \
+ rtval; \
})
#endif
diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h
index 2d2c715..4a785ce 100644
--- a/sys/vm/vm_param.h
+++ b/sys/vm/vm_param.h
@@ -84,14 +84,25 @@ typedef int boolean_t;
*/
#define DEFAULT_PAGE_SIZE 4096
+#if 0
+
/*
* All references to the size of a page should be done with PAGE_SIZE
* or PAGE_SHIFT. The fact they are variables is hidden here so that
* we can easily make them constant if we so desire.
*/
+#ifndef PAGE_SIZE
#define PAGE_SIZE cnt.v_page_size /* size of page */
+#endif
+#ifndef PAGE_MASK
#define PAGE_MASK page_mask /* size of page - 1 */
+#endif
+#ifndef PAGE_SHIFT
#define PAGE_SHIFT page_shift /* bits to shift for pages */
+#endif
+
+#endif
+
#ifdef KERNEL
extern vm_size_t page_mask;
extern int page_shift;
@@ -129,17 +140,34 @@ extern int page_shift;
* No rounding is used.
*/
#ifdef KERNEL
+
+#if 0
+
+#ifndef atop
#define atop(x) (((unsigned)(x)) >> PAGE_SHIFT)
+#endif
+#ifndef ptoa
#define ptoa(x) ((vm_offset_t)((x) << PAGE_SHIFT))
+#endif
/*
* Round off or truncate to the nearest page. These will work
* for either addresses or counts (i.e., 1 byte rounds to 1 page).
*/
+#ifndef round_page
#define round_page(x) \
((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK))
+#endif
+#ifndef trunc_page
#define trunc_page(x) \
((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK))
+#endif
+#ifndef num_pages
+#define num_pages(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
+#endif
+
+#endif
#define num_pages(x) \
((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
@@ -148,11 +176,13 @@ extern vm_offset_t first_addr; /* first physical page */
extern vm_offset_t last_addr; /* last physical page */
#else
+#if 0
/* out-of-kernel versions of round_page and trunc_page */
#define round_page(x) \
((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size)
#define trunc_page(x) \
((((vm_offset_t)(x)) / vm_page_size) * vm_page_size)
+#endif
#endif /* KERNEL */
#endif /* ASSEMBLER */
diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h
index b3bae43..ee009bc 100644
--- a/sys/vm/vm_prot.h
+++ b/sys/vm/vm_prot.h
@@ -75,7 +75,7 @@
* vm_prot_t VM protection values.
*/
-typedef int vm_prot_t;
+typedef u_char vm_prot_t;
/*
* Protection values, defined as bits within the vm_prot_t type
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
index 10b7523..5008a09 100644
--- a/sys/vm/vm_swap.c
+++ b/sys/vm/vm_swap.c
@@ -51,6 +51,7 @@
*/
int nswap, nswdev;
+int vm_swap_size;
#ifdef SEQSWAP
int niswdev; /* number of interleaved swap devices */
int niswap; /* size of interleaved swap area */
@@ -143,9 +144,8 @@ swapinit()
/*
* Now set up swap buffer headers.
*/
- bswlist.b_actf = sp;
for (i = 0; i < nswbuf - 1; i++, sp++) {
- sp->b_actf = sp + 1;
+ TAILQ_INSERT_HEAD(&bswlist, sp, b_freelist);
sp->b_rcred = sp->b_wcred = p->p_ucred;
sp->b_vnbufs.le_next = NOLIST;
}
@@ -390,12 +390,18 @@ swfree(p, index)
blk = niswap;
for (swp = &swdevt[niswdev]; swp != sp; swp++)
blk += swp->sw_nblks;
+#if 0
rmfree(swapmap, nblks, blk);
return (0);
+#endif
+ rlist_free(&swapmap, blk, blk + nblks - 1);
+ vm_swap_size += nblks;
+ return (0);
}
#endif
for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {
blk = nblks - dvbase;
+
#ifdef SEQSWAP
if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)
panic("swfree");
@@ -405,6 +411,7 @@ swfree(p, index)
#endif
if (blk > dmmax)
blk = dmmax;
+#if 0
if (vsbase == 0) {
/*
* First of all chunks... initialize the swapmap.
@@ -422,6 +429,11 @@ swfree(p, index)
vsbase + ctod(CLSIZE));
} else
rmfree(swapmap, blk, vsbase);
+#endif
+ /* XXX -- we need to exclude the first cluster as above */
+ /* but for now, this will work fine... */
+ rlist_free(&swapmap, vsbase, vsbase + blk - 1);
+ vm_swap_size += blk;
}
return (0);
}
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index 3d49ea7..ee6ddf6 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -50,9 +50,12 @@
#include <vm/vm.h>
+extern int swap_pager_full;
+
struct obreak_args {
char *nsiz;
};
+
/* ARGSUSED */
int
obreak(p, uap, retval)
@@ -72,9 +75,11 @@ obreak(p, uap, retval)
old = round_page(old + ctob(vm->vm_dsize));
diff = new - old;
if (diff > 0) {
+ if (swap_pager_full) {
+ return(ENOMEM);
+ }
rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
if (rv != KERN_SUCCESS) {
- uprintf("sbrk: grow failed, return = %d\n", rv);
return(ENOMEM);
}
vm->vm_dsize += btoc(diff);
@@ -82,7 +87,6 @@ obreak(p, uap, retval)
diff = -diff;
rv = vm_deallocate(&vm->vm_map, new, diff);
if (rv != KERN_SUCCESS) {
- uprintf("sbrk: shrink failed, return = %d\n", rv);
return(ENOMEM);
}
vm->vm_dsize -= btoc(diff);
@@ -90,41 +94,10 @@ obreak(p, uap, retval)
return(0);
}
-/*
- * Enlarge the "stack segment" to include the specified
- * stack pointer for the process.
- */
-int
-grow(p, sp)
- struct proc *p;
- unsigned sp;
-{
- register struct vmspace *vm = p->p_vmspace;
- register int si;
-
- /*
- * For user defined stacks (from sendsig).
- */
- if (sp < (unsigned)vm->vm_maxsaddr)
- return (0);
- /*
- * For common case of already allocated (from trap).
- */
- if (sp >= USRSTACK - ctob(vm->vm_ssize))
- return (1);
- /*
- * Really need to check vs limit and increment stack size if ok.
- */
- si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
- if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
- return (0);
- vm->vm_ssize += si;
- return (1);
-}
-
struct ovadvise_args {
int anom;
};
+
/* ARGSUSED */
int
ovadvise(p, uap, retval)
diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c
index 20172c6..0f2c234 100644
--- a/sys/vm/vm_user.c
+++ b/sys/vm/vm_user.c
@@ -168,6 +168,7 @@ svm_protect(p, uap, retval)
return((int)rv);
}
+#endif
/*
* vm_inherit sets the inheritence of the specified range in the
* specified map.
@@ -203,7 +204,6 @@ vm_protect(map, start, size, set_maximum, new_protection)
return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum));
}
-#endif
/*
* vm_allocate allocates "zero fill" memory in the specfied
@@ -255,6 +255,7 @@ vm_deallocate(map, start, size)
return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
}
+#if 1
/*
* Similar to vm_allocate but assigns an explicit pager.
*/
@@ -310,3 +311,4 @@ vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal)
vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
return(result);
}
+#endif
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 9c2f826..b8e5a19 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1,7 +1,8 @@
/*
* Copyright (c) 1990 University of Utah.
- * Copyright (c) 1991, 1993
- * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1993,1994 John S. Dyson
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
@@ -35,7 +36,8 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94
+ * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
+ * $Id: vnode_pager.c,v 1.17 1994/04/05 03:23:53 davidg Exp $
*/
/*
@@ -46,6 +48,24 @@
* fix credential use (uses current process credentials now)
*/
+/*
+ * MODIFICATIONS:
+ * John S. Dyson 08 Dec 93
+ *
+ * This file in conjunction with some vm_fault mods, eliminate the performance
+ * advantage for using the buffer cache and minimize memory copies.
+ *
+ * 1) Supports multiple - block reads
+ * 2) Bypasses buffer cache for reads
+ *
+ * TODO:
+ *
+ * 1) Totally bypass buffer cache for reads
+ * (Currently will still sometimes use buffer cache for reads)
+ * 2) Bypass buffer cache for writes
+ * (Code does not support it, but mods are simple)
+ */
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
@@ -58,51 +78,44 @@
#include <vm/vm_page.h>
#include <vm/vnode_pager.h>
-struct pagerlst vnode_pager_list; /* list of managed vnodes */
+#include <sys/buf.h>
+#include <miscfs/specfs/specdev.h>
-#ifdef DEBUG
-int vpagerdebug = 0x00;
-#define VDB_FOLLOW 0x01
-#define VDB_INIT 0x02
-#define VDB_IO 0x04
-#define VDB_FAIL 0x08
-#define VDB_ALLOC 0x10
-#define VDB_SIZE 0x20
-#endif
+int vnode_pager_putmulti();
-static vm_pager_t vnode_pager_alloc
- __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
-static void vnode_pager_cluster
- __P((vm_pager_t, vm_offset_t,
- vm_offset_t *, vm_offset_t *));
-static void vnode_pager_dealloc __P((vm_pager_t));
-static int vnode_pager_getpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
-static boolean_t vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
-static void vnode_pager_init __P((void));
-static int vnode_pager_io
- __P((vn_pager_t, vm_page_t *, int,
- boolean_t, enum uio_rw));
-static boolean_t vnode_pager_putpage
- __P((vm_pager_t, vm_page_t *, int, boolean_t));
+void vnode_pager_init();
+vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t);
+void vnode_pager_dealloc();
+int vnode_pager_getpage();
+int vnode_pager_getmulti();
+int vnode_pager_putpage();
+boolean_t vnode_pager_haspage();
struct pagerops vnodepagerops = {
vnode_pager_init,
vnode_pager_alloc,
vnode_pager_dealloc,
vnode_pager_getpage,
+ vnode_pager_getmulti,
vnode_pager_putpage,
- vnode_pager_haspage,
- vnode_pager_cluster
+ vnode_pager_putmulti,
+ vnode_pager_haspage
};
-static void
+static int vnode_pager_input(vn_pager_t vnp, vm_page_t *m, int count, int reqpage);
+static int vnode_pager_output(vn_pager_t vnp, vm_page_t *m, int count, int *rtvals);
+struct buf * getpbuf() ;
+void relpbuf(struct buf *bp) ;
+
+extern vm_map_t pager_map;
+
+struct pagerlst vnode_pager_list; /* list of managed vnodes */
+
+#define MAXBP (PAGE_SIZE/DEV_BSIZE);
+
+void
vnode_pager_init()
{
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_init()\n");
-#endif
TAILQ_INIT(&vnode_pager_list);
}
@@ -110,12 +123,12 @@ vnode_pager_init()
* Allocate (or lookup) pager for a vnode.
* Handle is a vnode pointer.
*/
-static vm_pager_t
-vnode_pager_alloc(handle, size, prot, foff)
+vm_pager_t
+vnode_pager_alloc(handle, size, prot, offset)
caddr_t handle;
vm_size_t size;
vm_prot_t prot;
- vm_offset_t foff;
+ vm_offset_t offset;
{
register vm_pager_t pager;
register vn_pager_t vnp;
@@ -124,10 +137,6 @@ vnode_pager_alloc(handle, size, prot, foff)
struct vnode *vp;
struct proc *p = curproc; /* XXX */
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
- printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
-#endif
/*
* Pageout to vnode, no can do yet.
*/
@@ -171,12 +180,12 @@ vnode_pager_alloc(handle, size, prot, foff)
vnp->vnp_flags = 0;
vnp->vnp_vp = vp;
vnp->vnp_size = vattr.va_size;
+
TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
pager->pg_handle = handle;
pager->pg_type = PG_VNODE;
- pager->pg_flags = 0;
pager->pg_ops = &vnodepagerops;
- pager->pg_data = vnp;
+ pager->pg_data = (caddr_t)vnp;
vp->v_vmdata = (caddr_t)pager;
} else {
/*
@@ -184,121 +193,104 @@ vnode_pager_alloc(handle, size, prot, foff)
* cache if found and also gain a reference to the object.
*/
object = vm_object_lookup(pager);
-#ifdef DEBUG
- vnp = (vn_pager_t)pager->pg_data;
-#endif
}
-#ifdef DEBUG
- if (vpagerdebug & VDB_ALLOC)
- printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
- vp, vnp->vnp_size, pager, object);
-#endif
return(pager);
}
-static void
+void
vnode_pager_dealloc(pager)
vm_pager_t pager;
{
register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
register struct vnode *vp;
-#ifdef NOTDEF
struct proc *p = curproc; /* XXX */
-#endif
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_dealloc(%x)\n", pager);
-#endif
if (vp = vnp->vnp_vp) {
vp->v_vmdata = NULL;
vp->v_flag &= ~VTEXT;
-#if NOTDEF
+#if 0
/* can hang if done at reboot on NFS FS */
(void) VOP_FSYNC(vp, p->p_ucred, p);
#endif
vrele(vp);
}
+
TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
free((caddr_t)vnp, M_VMPGDATA);
free((caddr_t)pager, M_VMPAGER);
}
-static int
-vnode_pager_getpage(pager, mlist, npages, sync)
+int
+vnode_pager_getmulti(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+
+ return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage);
+}
+
+int
+vnode_pager_getpage(pager, m, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t m;
boolean_t sync;
{
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_getpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
- return(vnode_pager_io((vn_pager_t)pager->pg_data,
- mlist, npages, sync, UIO_READ));
+ int err;
+ vm_page_t marray[1];
+ if (pager == NULL)
+ return FALSE;
+ marray[0] = m;
+
+ return vnode_pager_input((vn_pager_t)pager->pg_data, marray, 1, 0);
}
-static boolean_t
-vnode_pager_putpage(pager, mlist, npages, sync)
+boolean_t
+vnode_pager_putpage(pager, m, sync)
vm_pager_t pager;
- vm_page_t *mlist;
- int npages;
+ vm_page_t m;
boolean_t sync;
{
int err;
+ vm_page_t marray[1];
+ int rtvals[1];
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_putpage(%x, %x, %x, %x)\n",
- pager, mlist, npages, sync);
-#endif
if (pager == NULL)
- return (FALSE); /* ??? */
- err = vnode_pager_io((vn_pager_t)pager->pg_data,
- mlist, npages, sync, UIO_WRITE);
- /*
- * If the operation was successful, mark the pages clean.
- */
- if (err == VM_PAGER_OK) {
- while (npages--) {
- (*mlist)->flags |= PG_CLEAN;
- pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
- mlist++;
- }
- }
- return(err);
+ return FALSE;
+ marray[0] = m;
+ vnode_pager_output((vn_pager_t)pager->pg_data, marray, 1, rtvals);
+ return rtvals[0];
+}
+
+int
+vnode_pager_putmulti(pager, m, c, sync, rtvals)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int c;
+ boolean_t sync;
+ int *rtvals;
+{
+ return vnode_pager_output((vn_pager_t)pager->pg_data, m, c, rtvals);
}
-static boolean_t
+
+boolean_t
vnode_pager_haspage(pager, offset)
vm_pager_t pager;
vm_offset_t offset;
{
register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
daddr_t bn;
+ int run;
int err;
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
-#endif
-
/*
* Offset beyond end of file, do not have the page
- * Lock the vnode first to make sure we have the most recent
- * version of the size.
*/
- VOP_LOCK(vnp->vnp_vp);
if (offset >= vnp->vnp_size) {
- VOP_UNLOCK(vnp->vnp_vp);
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
- printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
- pager, offset, vnp->vnp_size);
-#endif
return(FALSE);
}
@@ -311,53 +303,14 @@ vnode_pager_haspage(pager, offset)
*/
err = VOP_BMAP(vnp->vnp_vp,
offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
- (struct vnode **)0, &bn, NULL);
- VOP_UNLOCK(vnp->vnp_vp);
+ (struct vnode **)0, &bn, 0);
if (err) {
-#ifdef DEBUG
- if (vpagerdebug & VDB_FAIL)
- printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
- err, pager, offset);
-#endif
return(TRUE);
}
return((long)bn < 0 ? FALSE : TRUE);
}
-static void
-vnode_pager_cluster(pager, offset, loffset, hoffset)
- vm_pager_t pager;
- vm_offset_t offset;
- vm_offset_t *loffset;
- vm_offset_t *hoffset;
-{
- vn_pager_t vnp = (vn_pager_t)pager->pg_data;
- vm_offset_t loff, hoff;
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_cluster(%x, %x) ", pager, offset);
-#endif
- loff = offset;
- if (loff >= vnp->vnp_size)
- panic("vnode_pager_cluster: bad offset");
- /*
- * XXX could use VOP_BMAP to get maxcontig value
- */
- hoff = loff + MAXBSIZE;
- if (hoff > round_page(vnp->vnp_size))
- hoff = round_page(vnp->vnp_size);
-
- *loffset = loff;
- *hoffset = hoff;
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("returns [%x-%x]\n", loff, hoff);
-#endif
-}
-
/*
- * (XXX)
* Lets the VM system know about a change in size for a file.
* If this vnode is mapped into some address space (i.e. we have a pager
* for it) we adjust our own internal size and flush any cached pages in
@@ -399,19 +352,14 @@ vnode_pager_setsize(vp, nsize)
if (object == NULL)
return;
-#ifdef DEBUG
- if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
- printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
- vp, object, vnp->vnp_size, nsize);
-#endif
/*
* File has shrunk.
* Toss any cached pages beyond the new EOF.
*/
- if (nsize < vnp->vnp_size) {
+ if (round_page(nsize) < round_page(vnp->vnp_size)) {
vm_object_lock(object);
vm_object_page_remove(object,
- (vm_offset_t)nsize, vnp->vnp_size);
+ (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size));
vm_object_unlock(object);
}
vnp->vnp_size = (vm_offset_t)nsize;
@@ -425,24 +373,67 @@ vnode_pager_umount(mp)
register vm_pager_t pager, npager;
struct vnode *vp;
- for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
+ pager = vnode_pager_list.tqh_first;
+ while( pager) {
/*
* Save the next pointer now since uncaching may
* terminate the object and render pager invalid
*/
- npager = pager->pg_list.tqe_next;
vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
- if (mp == (struct mount *)0 || vp->v_mount == mp) {
- VOP_LOCK(vp);
+ npager = pager->pg_list.tqe_next;
+ if (mp == (struct mount *)0 || vp->v_mount == mp)
(void) vnode_pager_uncache(vp);
- VOP_UNLOCK(vp);
- }
+ pager = npager;
}
}
/*
* Remove vnode associated object from the object cache.
*
+ * Note: this routine may be invoked as a result of a pager put
+ * operation (possibly at object termination time), so we must be careful.
+ */
+boolean_t
+vnode_pager_uncache(vp)
+ register struct vnode *vp;
+{
+ register vm_object_t object;
+ boolean_t uncached, locked;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL)
+ return (TRUE);
+ /*
+ * Unlock the vnode if it is currently locked.
+ * We do this since uncaching the object may result
+ * in its destruction which may initiate paging
+ * activity which may necessitate locking the vnode.
+ */
+ locked = VOP_ISLOCKED(vp);
+ if (locked)
+ VOP_UNLOCK(vp);
+ /*
+ * Must use vm_object_lookup() as it actually removes
+ * the object from the cache list.
+ */
+ object = vm_object_lookup(pager);
+ if (object) {
+ uncached = (object->ref_count <= 1);
+ pager_cache(object, FALSE);
+ } else
+ uncached = TRUE;
+ if (locked)
+ VOP_LOCK(vp);
+ return(uncached);
+}
+#if 0
+/*
+ * Remove vnode associated object from the object cache.
+ *
* XXX unlock the vnode if it is currently locked.
* We must do this since uncaching the object may result in its
* destruction which may initiate paging activity which may necessitate
@@ -462,14 +453,6 @@ vnode_pager_uncache(vp)
pager = (vm_pager_t)vp->v_vmdata;
if (pager == NULL)
return (TRUE);
-#ifdef DEBUG
- if (!VOP_ISLOCKED(vp)) {
- extern int (**nfsv2_vnodeop_p)();
-
- if (vp->v_op != nfsv2_vnodeop_p)
- panic("vnode_pager_uncache: vnode not locked!");
- }
-#endif
/*
* Must use vm_object_lookup() as it actually removes
* the object from the cache list.
@@ -484,97 +467,958 @@ vnode_pager_uncache(vp)
uncached = TRUE;
return(uncached);
}
+#endif
-static int
-vnode_pager_io(vnp, mlist, npages, sync, rw)
- register vn_pager_t vnp;
- vm_page_t *mlist;
- int npages;
- boolean_t sync;
- enum uio_rw rw;
+
+void
+vnode_pager_freepage(m)
+ vm_page_t m;
+{
+ PAGE_WAKEUP(m);
+ vm_page_free(m);
+}
+
+/*
+ * calculate the linear (byte) disk address of specified virtual
+ * file address
+ */
+vm_offset_t
+vnode_pager_addr(vp, address)
+ struct vnode *vp;
+ vm_offset_t address;
+{
+ int rtaddress;
+ int bsize;
+ vm_offset_t block;
+ struct vnode *rtvp;
+ int err;
+ int vblock, voffset;
+ int run;
+
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ vblock = address / bsize;
+ voffset = address % bsize;
+
+ err = VOP_BMAP(vp,vblock,&rtvp,&block,0);
+
+ if( err)
+ rtaddress = -1;
+ else
+ rtaddress = block * DEV_BSIZE + voffset;
+
+ return rtaddress;
+}
+
+/*
+ * interrupt routine for I/O completion
+ */
+void
+vnode_pager_iodone(bp)
+ struct buf *bp;
{
+ bp->b_flags |= B_DONE;
+ wakeup((caddr_t)bp);
+}
+
+/*
+ * small block file system vnode pager input
+ */
+int
+vnode_pager_input_smlfs(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
+ int s;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t foff;
+ vm_offset_t kva;
+ int fileaddr;
+ int block;
+ vm_offset_t bsize;
+ int error = 0;
+ int run;
+
+ paging_offset = m->object->paging_offset;
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ foff = m->offset + paging_offset;
+
+ VOP_BMAP(vp, foff, &dp, 0, 0);
+
+ kva = vm_pager_map_page(m);
+
+ for(i=0;i<PAGE_SIZE/bsize;i++) {
+ /*
+ * calculate logical block and offset
+ */
+ block = foff / bsize + i;
+ s = splbio();
+ while (bp = incore(vp, block)) {
+ int amount;
+
+ /*
+ * wait until the buffer is avail or gone
+ */
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ tsleep ((caddr_t)bp, PVM, "vnwblk", 0);
+ continue;
+ }
+
+ amount = bsize;
+ if ((foff + bsize) > vnp->vnp_size)
+ amount = vnp->vnp_size - foff;
+
+ /*
+ * make sure that this page is in the buffer
+ */
+ if ((amount > 0) && amount <= bp->b_bcount) {
+ bp->b_flags |= B_BUSY;
+ splx(s);
+
+ /*
+ * copy the data from the buffer
+ */
+ bcopy(bp->b_un.b_addr, (caddr_t)kva + i * bsize, amount);
+ if (amount < bsize) {
+ bzero((caddr_t)kva + amount, bsize - amount);
+ }
+ bp->b_flags &= ~B_BUSY;
+ wakeup((caddr_t)bp);
+ goto nextblock;
+ }
+ break;
+ }
+ splx(s);
+ fileaddr = vnode_pager_addr(vp, foff + i * bsize);
+ if( fileaddr != -1) {
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva + i * bsize;
+ bp->b_blkno = fileaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ bp->b_bcount = bsize;
+ bp->b_bufsize = bsize;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ /* we definitely need to be at splbio here */
+
+ s = splbio();
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnsrd", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+ if( error)
+ break;
+ } else {
+ bzero((caddr_t) kva + i * bsize, bsize);
+ }
+nextblock:
+ }
+ vm_pager_unmap_page(kva);
+ if( error) {
+ return VM_PAGER_FAIL;
+ }
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_CLEAN;
+ m->flags &= ~PG_LAUNDRY;
+ return VM_PAGER_OK;
+
+}
+
+
+/*
+ * old style vnode pager output routine
+ */
+int
+vnode_pager_input_old(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
struct uio auio;
struct iovec aiov;
+ int error;
+ int size;
+ vm_offset_t foff;
+ vm_offset_t kva;
+
+ error = 0;
+ foff = m->offset + m->object->paging_offset;
+ /*
+ * Return failure if beyond current EOF
+ */
+ if (foff >= vnp->vnp_size) {
+ return VM_PAGER_BAD;
+ } else {
+ size = PAGE_SIZE;
+ if (foff + size > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+/*
+ * Allocate a kernel virtual address and initialize so that
+ * we can use VOP_READ/WRITE routines.
+ */
+ kva = vm_pager_map_page(m);
+ aiov.iov_base = (caddr_t)kva;
+ aiov.iov_len = size;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = foff;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = size;
+ auio.uio_procp = (struct proc *)0;
+
+ error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred);
+ if (!error) {
+ register int count = size - auio.uio_resid;
+
+ if (count == 0)
+ error = EINVAL;
+ else if (count != PAGE_SIZE)
+ bzero((caddr_t)kva + count, PAGE_SIZE - count);
+ }
+ vm_pager_unmap_page(kva);
+ }
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_CLEAN;
+ m->flags &= ~PG_LAUNDRY;
+ return error?VM_PAGER_FAIL:VM_PAGER_OK;
+}
+
+/*
+ * generic vnode pager input routine
+ */
+int
+vnode_pager_input(vnp, m, count, reqpage)
+ register vn_pager_t vnp;
+ vm_page_t *m;
+ int count, reqpage;
+{
+ int i,j;
vm_offset_t kva, foff;
- int error, size;
+ int size;
struct proc *p = curproc; /* XXX */
+ vm_object_t object;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ vm_offset_t mapsize;
+ int bsize;
+
+ int first, last;
+ int reqaddr, firstaddr;
+ int run;
+ int block, offset;
+
+ int nbp;
+ struct buf *bp;
+ int s;
+ int failflag;
+
+ int errtype=0; /* 0 is file type otherwise vm type */
+ int error = 0;
+
+ object = m[reqpage]->object; /* all vm_page_t items are in same object */
+ paging_offset = object->paging_offset;
+
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+
+ /* get the UNDERLYING device for the file with VOP_BMAP() */
+ /*
+ * originally, we did not check for an error return
+ * value -- assuming an fs always has a bmap entry point
+ * -- that assumption is wrong!!!
+ */
+ kva = 0;
+ mapsize = 0;
+ foff = m[reqpage]->offset + paging_offset;
+ if (!VOP_BMAP(vp, foff, &dp, 0, 0)) {
+ /*
+ * we do not block for a kva, notice we default to a kva
+ * conservative behavior
+ */
+ kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE));
+ if( !kva) {
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ m[0] = m[reqpage];
+ kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE);
+ reqpage = 0;
+ count = 1;
+ }
+ }
- /* XXX */
- vm_page_t m;
- if (npages != 1)
- panic("vnode_pager_io: cannot handle multiple pages");
- m = *mlist;
- /* XXX */
-
-#ifdef DEBUG
- if (vpagerdebug & VDB_FOLLOW)
- printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
- vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
-#endif
- foff = m->offset + m->object->paging_offset;
/*
- * Allocate a kernel virtual address and initialize so that
- * we can use VOP_READ/WRITE routines.
+ * if we can't get a kva or we can't bmap, use old VOP code
*/
- kva = vm_pager_map_pages(mlist, npages, sync);
- if (kva == NULL)
- return(VM_PAGER_AGAIN);
+ if (!kva) {
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ return vnode_pager_input_old(vnp, m[reqpage]);
/*
- * After all of the potentially blocking operations have been
- * performed, we can do the size checks:
- * read beyond EOF (returns error)
- * short read
+ * if the blocksize is smaller than a page size, then use
+ * special small filesystem code. NFS sometimes has a small
+ * blocksize, but it can handle large reads itself.
*/
- VOP_LOCK(vnp->vnp_vp);
- if (foff >= vnp->vnp_size) {
- VOP_UNLOCK(vnp->vnp_vp);
- vm_pager_unmap_pages(kva, npages);
-#ifdef DEBUG
- if (vpagerdebug & VDB_SIZE)
- printf("vnode_pager_io: vp %x, off %d size %d\n",
- vnp->vnp_vp, foff, vnp->vnp_size);
-#endif
- return(VM_PAGER_BAD);
+ } else if( (PAGE_SIZE / bsize) > 1 &&
+ (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
+
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ return vnode_pager_input_smlfs(vnp, m[reqpage]);
+ }
+
+/*
+ * here on direct device I/O
+ */
+
+
+ /*
+ * This pathetic hack gets data from the buffer cache, if it's there.
+ * I believe that this is not really necessary, and the ends can
+ * be gotten by defaulting to the normal vfs read behavior, but this
+ * might be more efficient, because the will NOT invoke read-aheads
+ * and one of the purposes of this code is to bypass the buffer
+ * cache and keep from flushing it by reading in a program.
+ */
+ /*
+ * calculate logical block and offset
+ */
+ block = foff / bsize;
+ offset = foff % bsize;
+ s = splbio();
+
+ /*
+ * if we have a buffer in core, then try to use it
+ */
+ while (bp = incore(vp, block)) {
+ int amount;
+
+ /*
+ * wait until the buffer is avail or gone
+ */
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ tsleep ((caddr_t)bp, PVM, "vnwblk", 0);
+ continue;
+ }
+
+ amount = PAGE_SIZE;
+ if ((foff + amount) > vnp->vnp_size)
+ amount = vnp->vnp_size - foff;
+
+ /*
+ * make sure that this page is in the buffer
+ */
+ if ((amount > 0) && (offset + amount) <= bp->b_bcount) {
+ bp->b_flags |= B_BUSY;
+ splx(s);
+
+ /*
+ * map the requested page
+ */
+ pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage]));
+ pmap_update();
+
+ /*
+ * copy the data from the buffer
+ */
+ bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount);
+ if (amount < PAGE_SIZE) {
+ bzero((caddr_t)kva + amount, PAGE_SIZE - amount);
+ }
+ /*
+ * unmap the page and free the kva
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+ /*
+ * release the buffer back to the block subsystem
+ */
+ bp->b_flags &= ~B_BUSY;
+ wakeup((caddr_t)bp);
+ /*
+ * we did not have to do any work to get the requested
+ * page, the read behind/ahead does not justify a read
+ */
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ count = 1;
+ reqpage = 0;
+ m[0] = m[reqpage];
+
+ /*
+ * sorry for the goto
+ */
+ goto finishup;
+ }
+ /*
+ * buffer is nowhere to be found, read from the disk
+ */
+ break;
+ }
+ splx(s);
+
+ reqaddr = vnode_pager_addr(vp, foff);
+ s = splbio();
+ /*
+ * Make sure that our I/O request is contiguous.
+ * Scan backward and stop for the first discontiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ failflag = 0;
+ first = reqpage;
+ for (i = reqpage - 1; i >= 0; --i) {
+ if (failflag ||
+ incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) ||
+ (vnode_pager_addr(vp, m[i]->offset + paging_offset))
+ != reqaddr + (i - reqpage) * PAGE_SIZE) {
+ vnode_pager_freepage(m[i]);
+ failflag = 1;
+ } else {
+ first = i;
+ }
+ }
+
+ /*
+ * Scan forward and stop for the first non-contiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ failflag = 0;
+ last = reqpage + 1;
+ for (i = reqpage + 1; i < count; i++) {
+ if (failflag ||
+ incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) ||
+ (vnode_pager_addr(vp, m[i]->offset + paging_offset))
+ != reqaddr + (i - reqpage) * PAGE_SIZE) {
+ vnode_pager_freepage(m[i]);
+ failflag = 1;
+ } else {
+ last = i + 1;
+ }
+ }
+ splx(s);
+
+ /*
+ * the first and last page have been calculated now, move input
+ * pages to be zero based...
+ */
+ count = last;
+ if (first != 0) {
+ for (i = first; i < count; i++) {
+ m[i - first] = m[i];
+ }
+ count -= first;
+ reqpage -= first;
}
- if (foff + PAGE_SIZE > vnp->vnp_size)
+
+ /*
+ * calculate the file virtual address for the transfer
+ */
+ foff = m[0]->offset + paging_offset;
+ /*
+ * and get the disk physical address (in bytes)
+ */
+ firstaddr = vnode_pager_addr(vp, foff);
+
+ /*
+ * calculate the size of the transfer
+ */
+ size = count * PAGE_SIZE;
+ if ((foff + size) > vnp->vnp_size)
size = vnp->vnp_size - foff;
- else
+
+ /*
+ * round up physical size for real devices
+ */
+ if( dp->v_type == VBLK || dp->v_type == VCHR)
+ size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+
+ /*
+ * and map the pages to be read into the kva
+ */
+ for (i = 0; i < count; i++)
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+
+ pmap_update();
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ /* B_PHYS is not set, but it is nice to fill this in */
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = firstaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ bp->b_bcount = size;
+ bp->b_bufsize = size;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ s = splbio();
+ /* we definitely need to be at splbio here */
+
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnread", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ if (!error) {
+ if (size != count * PAGE_SIZE)
+ bzero((caddr_t)kva + size, PAGE_SIZE * count - size);
+ }
+
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+
+finishup:
+ for (i = 0; i < count; i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ if (i != reqpage) {
+ /*
+ * whether or not to leave the page activated
+ * is up in the air, but we should put the page
+ * on a page queue somewhere. (it already is in
+ * the object).
+ * Result: It appears that emperical results show
+ * that deactivating pages is best.
+ */
+ /*
+ * just in case someone was asking for this
+ * page we now tell them that it is ok to use
+ */
+ if (!error) {
+ vm_page_deactivate(m[i]);
+ PAGE_WAKEUP(m[i]);
+ m[i]->flags &= ~PG_FAKE;
+ m[i]->act_count = 2;
+ } else {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ }
+ if (error) {
+ printf("vnode pager read error: %d\n", error);
+ }
+ if (errtype)
+ return error;
+ return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
+}
+
+/*
+ * old-style vnode pager output routine
+ */
+int
+vnode_pager_output_old(vnp, m)
+ register vn_pager_t vnp;
+ vm_page_t m;
+{
+ vm_offset_t foff;
+ vm_offset_t kva;
+ vm_offset_t size;
+ struct iovec aiov;
+ struct uio auio;
+ struct vnode *vp;
+ int error;
+
+ vp = vnp->vnp_vp;
+ foff = m->offset + m->object->paging_offset;
+ /*
+ * Return failure if beyond current EOF
+ */
+ if (foff >= vnp->vnp_size) {
+ return VM_PAGER_BAD;
+ } else {
size = PAGE_SIZE;
- aiov.iov_base = (caddr_t)kva;
- aiov.iov_len = size;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_offset = foff;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = rw;
- auio.uio_resid = size;
- auio.uio_procp = (struct proc *)0;
-#ifdef DEBUG
- if (vpagerdebug & VDB_IO)
- printf("vnode_pager_io: vp %x kva %x foff %x size %x",
- vnp->vnp_vp, kva, foff, size);
-#endif
- if (rw == UIO_READ)
- error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
+ if (foff + size > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+/*
+ * Allocate a kernel virtual address and initialize so that
+ * we can use VOP_WRITE routines.
+ */
+ kva = vm_pager_map_page(m);
+ aiov.iov_base = (caddr_t)kva;
+ aiov.iov_len = size;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = foff;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_resid = size;
+ auio.uio_procp = (struct proc *)0;
+
+ error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred);
+
+ if (!error) {
+ if ((size - auio.uio_resid) == 0) {
+ error = EINVAL;
+ }
+ }
+ vm_pager_unmap_page(kva);
+ return error?VM_PAGER_FAIL:VM_PAGER_OK;
+ }
+}
+
+/*
+ * vnode pager output on a small-block file system
+ */
+int
+vnode_pager_output_smlfs(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
+ int s;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t foff;
+ vm_offset_t kva;
+ int fileaddr;
+ int block;
+ vm_offset_t bsize;
+ int run;
+ int error = 0;
+
+ paging_offset = m->object->paging_offset;
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ foff = m->offset + paging_offset;
+
+ VOP_BMAP(vp, foff, &dp, 0, 0);
+ kva = vm_pager_map_page(m);
+ for(i = 0; !error && i < (PAGE_SIZE/bsize); i++) {
+ /*
+ * calculate logical block and offset
+ */
+ fileaddr = vnode_pager_addr(vp, foff + i * bsize);
+ if( fileaddr != -1) {
+ s = splbio();
+ if( bp = incore( vp, (foff/bsize) + i)) {
+ bp = getblk(vp, (foff/bsize) + i, bp->b_bufsize,0, 0);
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ splx(s);
+
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_CALL | B_WRITE;
+ bp->b_iodone = vnode_pager_iodone;
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva + i * bsize;
+ bp->b_blkno = fileaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ ++dp->v_numoutput;
+ /* for NFS */
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bsize;
+ bp->b_bcount = bsize;
+ bp->b_bufsize = bsize;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ /* we definitely need to be at splbio here */
+
+ s = splbio();
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnswrt", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+ }
+ }
+ vm_pager_unmap_page(kva);
+ if( error)
+ return VM_PAGER_FAIL;
else
- error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
- VOP_UNLOCK(vnp->vnp_vp);
-#ifdef DEBUG
- if (vpagerdebug & VDB_IO) {
- if (error || auio.uio_resid)
- printf(" returns error %x, resid %x",
- error, auio.uio_resid);
- printf("\n");
+ return VM_PAGER_OK;
+}
+
+/*
+ * generic vnode pager output routine
+ */
+int
+vnode_pager_output(vnp, m, count, rtvals)
+ vn_pager_t vnp;
+ vm_page_t *m;
+ int count;
+ int *rtvals;
+{
+ int i,j;
+ vm_offset_t kva, foff;
+ int size;
+ struct proc *p = curproc; /* XXX */
+ vm_object_t object;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t reqaddr;
+ int run;
+ int bsize;
+ int s;
+
+ int error = 0;
+
+retryoutput:
+ object = m[0]->object; /* all vm_page_t items are in same object */
+ paging_offset = object->paging_offset;
+
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+
+ for(i=0;i<count;i++)
+ rtvals[i] = VM_PAGER_AGAIN;
+
+ /*
+ * if the filesystem does not have a bmap, then use the
+ * old code
+ */
+ if (VOP_BMAP(vp, m[0]->offset+paging_offset, &dp, 0, 0)) {
+
+ rtvals[0] = vnode_pager_output_old(vnp, m[0]);
+
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[0]));
+ m[0]->flags |= PG_CLEAN;
+ m[0]->flags &= ~PG_LAUNDRY;
+ return rtvals[0];
}
-#endif
- if (!error) {
- register int count = size - auio.uio_resid;
- if (count == 0)
- error = EINVAL;
- else if (count != PAGE_SIZE && rw == UIO_READ)
- bzero((void *)(kva + count), PAGE_SIZE - count);
+ /*
+ * if the filesystem has a small blocksize, then use
+ * the small block filesystem output code
+ */
+ if ((bsize < PAGE_SIZE) &&
+ (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
+
+ for(i=0;i<count;i++) {
+ rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]);
+ if( rtvals[i] == VM_PAGER_OK) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ }
+ }
+ return rtvals[0];
}
- vm_pager_unmap_pages(kva, npages);
- return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
+
+ /*
+ * get some kva for the output
+ */
+ kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE));
+ if( !kva) {
+ kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE));
+ count = 1;
+ if( !kva)
+ return rtvals[0];
+ }
+
+ for(i=0;i<count;i++) {
+ foff = m[i]->offset + paging_offset;
+ if (foff >= vnp->vnp_size) {
+ for(j=i;j<count;j++)
+ rtvals[j] = VM_PAGER_BAD;
+ count = i;
+ break;
+ }
+ }
+ if (count == 0) {
+ return rtvals[0];
+ }
+ foff = m[0]->offset + paging_offset;
+ reqaddr = vnode_pager_addr(vp, foff);
+ /*
+ * Scan forward and stop for the first non-contiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ for (i = 1; i < count; i++) {
+ if ( vnode_pager_addr(vp, m[i]->offset + paging_offset)
+ != reqaddr + i * PAGE_SIZE) {
+ count = i;
+ break;
+ }
+ }
+
+ /*
+ * calculate the size of the transfer
+ */
+ size = count * PAGE_SIZE;
+ if ((foff + size) > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+
+ /*
+ * round up physical size for real devices
+ */
+ if( dp->v_type == VBLK || dp->v_type == VCHR)
+ size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+
+ /*
+ * and map the pages to be read into the kva
+ */
+ for (i = 0; i < count; i++)
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+ pmap_update();
+/*
+ printf("vnode: writing foff: %d, devoff: %d, size: %d\n",
+ foff, reqaddr, size);
+*/
+ /*
+ * next invalidate the incore vfs_bio data
+ */
+ for (i = 0; i < count; i++) {
+ int filblock = (foff + i * PAGE_SIZE) / bsize;
+ struct buf *fbp;
+
+ s = splbio();
+ if( fbp = incore( vp, filblock)) {
+ /* printf("invalidating: %d\n", filblock); */
+ fbp = getblk(vp, filblock, fbp->b_bufsize,0,0);
+ fbp->b_flags |= B_INVAL;
+ brelse(fbp);
+ }
+ splx(s);
+ }
+
+
+ bp = getpbuf();
+ VHOLD(vp);
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_WRITE | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ /* B_PHYS is not set, but it is nice to fill this in */
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ ++dp->v_numoutput;
+
+ /* for NFS */
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = size;
+
+ bp->b_bcount = size;
+ bp->b_bufsize = size;
+
+ /* do the output */
+ VOP_STRATEGY(bp);
+
+ s = splbio();
+
+ /* we definitely need to be at splbio here */
+
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnwrite", 0);
+ }
+ splx(s);
+
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+
+ if( !error) {
+ for(i=0;i<count;i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ rtvals[i] = VM_PAGER_OK;
+ }
+ } else if( count != 1) {
+ error = 0;
+ count = 1;
+ goto retryoutput;
+ }
+
+ if (error) {
+ printf("vnode pager write error: %d\n", error);
+ }
+ return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
}
+
diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h
index 95c9545..b01dc54 100644
--- a/sys/vm/vnode_pager.h
+++ b/sys/vm/vnode_pager.h
@@ -53,7 +53,4 @@ typedef struct vnpager *vn_pager_t;
#define VN_PAGER_NULL ((vn_pager_t)0)
-#define VNP_PAGING 0x01 /* vnode used for pageout */
-#define VNP_CACHED 0x02 /* vnode is cached */
-
#endif /* _VNODE_PAGER_ */
OpenPOWER on IntegriCloud