summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/device_pager.c366
-rw-r--r--sys/vm/device_pager.h53
-rw-r--r--sys/vm/kern_lock.c534
-rw-r--r--sys/vm/lock.h172
-rw-r--r--sys/vm/pmap.h122
-rw-r--r--sys/vm/swap_pager.c1833
-rw-r--r--sys/vm/swap_pager.h99
-rw-r--r--sys/vm/vm.h93
-rw-r--r--sys/vm/vm_extern.h135
-rw-r--r--sys/vm/vm_fault.c1305
-rw-r--r--sys/vm/vm_glue.c685
-rw-r--r--sys/vm/vm_inherit.h83
-rw-r--r--sys/vm/vm_init.c105
-rw-r--r--sys/vm/vm_kern.c454
-rw-r--r--sys/vm/vm_kern.h74
-rw-r--r--sys/vm/vm_map.c2681
-rw-r--r--sys/vm/vm_map.h228
-rw-r--r--sys/vm/vm_meter.c225
-rw-r--r--sys/vm/vm_mmap.c836
-rw-r--r--sys/vm/vm_object.c1645
-rw-r--r--sys/vm/vm_object.h173
-rw-r--r--sys/vm/vm_page.c879
-rw-r--r--sys/vm/vm_page.h268
-rw-r--r--sys/vm/vm_pageout.c790
-rw-r--r--sys/vm/vm_pageout.h112
-rw-r--r--sys/vm/vm_pager.c322
-rw-r--r--sys/vm/vm_pager.h154
-rw-r--r--sys/vm/vm_param.h189
-rw-r--r--sys/vm/vm_prot.h102
-rw-r--r--sys/vm/vm_swap.c439
-rw-r--r--sys/vm/vm_unix.c110
-rw-r--r--sys/vm/vm_user.c314
-rw-r--r--sys/vm/vnode_pager.c1424
-rw-r--r--sys/vm/vnode_pager.h56
34 files changed, 17060 insertions, 0 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
new file mode 100644
index 0000000..b8083df
--- /dev/null
+++ b/sys/vm/device_pager.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)device_pager.c 8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Page to/from special files.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/mman.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/device_pager.h>
+
+struct pagerlst dev_pager_list; /* list of managed devices */
+struct pglist dev_pager_fakelist; /* list of available vm_page_t's */
+
+#ifdef DEBUG
+int dpagerdebug = 0;
+#define DDB_FOLLOW 0x01
+#define DDB_INIT 0x02
+#define DDB_ALLOC 0x04
+#define DDB_FAIL 0x08
+#endif
+
+static vm_pager_t dev_pager_alloc
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void dev_pager_dealloc __P((vm_pager_t));
+static int dev_pager_getpage
+ __P((vm_pager_t, vm_page_t, boolean_t));
+static boolean_t dev_pager_haspage __P((vm_pager_t, vm_offset_t));
+static void dev_pager_init __P((void));
+static int dev_pager_putpage
+ __P((vm_pager_t, vm_page_t, boolean_t));
+static vm_page_t dev_pager_getfake __P((vm_offset_t));
+static void dev_pager_putfake __P((vm_page_t));
+
+struct pagerops devicepagerops = {
+ dev_pager_init,
+ dev_pager_alloc,
+ dev_pager_dealloc,
+ dev_pager_getpage,
+ 0,
+ dev_pager_putpage,
+ 0,
+ dev_pager_haspage
+};
+
+static void
+dev_pager_init()
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_init()\n");
+#endif
+ TAILQ_INIT(&dev_pager_list);
+ TAILQ_INIT(&dev_pager_fakelist);
+}
+
+static vm_pager_t
+dev_pager_alloc(handle, size, prot, foff)
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t foff;
+{
+ dev_t dev;
+ vm_pager_t pager;
+ int (*mapfunc)();
+ vm_object_t object;
+ dev_pager_t devp;
+ unsigned int npages, off;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_alloc(%x, %x, %x, %x)\n",
+ handle, size, prot, foff);
+#endif
+#ifdef DIAGNOSTIC
+ /*
+ * Pageout to device, should never happen.
+ */
+ if (handle == NULL)
+ panic("dev_pager_alloc called");
+#endif
+
+ /*
+ * Make sure this device can be mapped.
+ */
+ dev = (dev_t)(u_long)handle;
+ mapfunc = cdevsw[major(dev)].d_mmap;
+ if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+ return(NULL);
+
+ /*
+ * Offset should be page aligned.
+ */
+ if (foff & (PAGE_SIZE-1))
+ return(NULL);
+
+ /*
+ * Check that the specified range of the device allows the
+ * desired protection.
+ *
+ * XXX assumes VM_PROT_* == PROT_*
+ */
+ npages = atop(round_page(size));
+ for (off = foff; npages--; off += PAGE_SIZE)
+ if ((*mapfunc)(dev, off, (int)prot) == -1)
+ return(NULL);
+
+ /*
+ * Look up pager, creating as necessary.
+ */
+top:
+ pager = vm_pager_lookup(&dev_pager_list, handle);
+ if (pager == NULL) {
+ /*
+ * Allocate and initialize pager structs
+ */
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+ if (pager == NULL)
+ return(NULL);
+ devp = (dev_pager_t)malloc(sizeof *devp, M_VMPGDATA, M_WAITOK);
+ if (devp == NULL) {
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ pager->pg_handle = handle;
+ pager->pg_ops = &devicepagerops;
+ pager->pg_type = PG_DEVICE;
+ pager->pg_data = (caddr_t)devp;
+ pager->pg_flags = 0;
+ TAILQ_INIT(&devp->devp_pglist);
+ /*
+ * Allocate object and associate it with the pager.
+ */
+ object = devp->devp_object = vm_object_allocate(0);
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, (vm_offset_t)foff, FALSE);
+ /*
+ * Finally, put it on the managed list so other can find it.
+ * First we re-lookup in case someone else beat us to this
+ * point (due to blocking in the various mallocs). If so,
+ * we free everything and start over.
+ */
+ if (vm_pager_lookup(&dev_pager_list, handle)) {
+ free((caddr_t)devp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+ goto top;
+ }
+ TAILQ_INSERT_TAIL(&dev_pager_list, pager, pg_list);
+#ifdef DEBUG
+ if (dpagerdebug & DDB_ALLOC) {
+ printf("dev_pager_alloc: pager %x devp %x object %x\n",
+ pager, devp, object);
+ vm_object_print(object, FALSE);
+ }
+#endif
+ } else {
+ /*
+ * vm_object_lookup() gains a reference and also
+ * removes the object from the cache.
+ */
+ object = vm_object_lookup(pager);
+#ifdef DIAGNOSTIC
+ devp = (dev_pager_t)pager->pg_data;
+ if (object != devp->devp_object)
+ panic("dev_pager_setup: bad object");
+#endif
+ }
+ return(pager);
+}
+
+static void
+dev_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ dev_pager_t devp;
+ vm_object_t object;
+ vm_page_t m;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_dealloc(%x)\n", pager);
+#endif
+ TAILQ_REMOVE(&dev_pager_list, pager, pg_list);
+ /*
+ * Get the object.
+ * Note: cannot use vm_object_lookup since object has already
+ * been removed from the hash chain.
+ */
+ devp = (dev_pager_t)pager->pg_data;
+ object = devp->devp_object;
+#ifdef DEBUG
+ if (dpagerdebug & DDB_ALLOC)
+ printf("dev_pager_dealloc: devp %x object %x\n", devp, object);
+#endif
+ /*
+ * Free up our fake pages.
+ */
+ while (m=devp->devp_pglist.tqh_first) {
+ TAILQ_REMOVE(&devp->devp_pglist, m, pageq);
+ dev_pager_putfake(m);
+ }
+ free((caddr_t)devp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+dev_pager_getpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+ register vm_object_t object;
+ vm_offset_t offset, paddr;
+ vm_page_t page;
+ dev_t dev;
+ int s;
+ int (*mapfunc)(), prot;
+
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_getpage(%x, %x)\n", pager, m);
+#endif
+
+ object = m->object;
+ dev = (dev_t)(u_long)pager->pg_handle;
+ offset = m->offset + object->paging_offset;
+ prot = PROT_READ; /* XXX should pass in? */
+ mapfunc = cdevsw[major(dev)].d_mmap;
+
+ if (mapfunc == NULL || mapfunc == enodev || mapfunc == nullop)
+ panic("dev_pager_getpage: no map function");
+
+ paddr = pmap_phys_address((*mapfunc)((dev_t)dev, (int)offset, prot));
+#ifdef DIAGNOSTIC
+ if (paddr == -1)
+ panic("dev_pager_getpage: map function returns error");
+#endif
+ /*
+ * Replace the passed in page with our own fake page and free
+ * up the original.
+ */
+ page = dev_pager_getfake(paddr);
+ TAILQ_INSERT_TAIL(&((dev_pager_t)pager->pg_data)->devp_pglist,
+ page, pageq);
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ vm_page_free(m);
+ vm_page_unlock_queues();
+ s = splhigh();
+ vm_page_insert(page, object, offset);
+ splx(s);
+ PAGE_WAKEUP(m);
+ if (offset + PAGE_SIZE > object->size)
+ object->size = offset + PAGE_SIZE; /* XXX anal */
+ vm_object_unlock(object);
+
+ return(VM_PAGER_OK);
+}
+
+static int
+dev_pager_putpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_putpage(%x, %x)\n", pager, m);
+#endif
+ if (pager == NULL)
+ return 0;
+ panic("dev_pager_putpage called");
+}
+
+static boolean_t
+dev_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+#ifdef DEBUG
+ if (dpagerdebug & DDB_FOLLOW)
+ printf("dev_pager_haspage(%x, %x)\n", pager, offset);
+#endif
+ return(TRUE);
+}
+
+static vm_page_t
+dev_pager_getfake(paddr)
+ vm_offset_t paddr;
+{
+ vm_page_t m;
+ int i;
+
+ if (dev_pager_fakelist.tqh_first == NULL) {
+ m = (vm_page_t)malloc(PAGE_SIZE, M_VMPGDATA, M_WAITOK);
+ for (i = PAGE_SIZE / sizeof(*m); i > 0; i--) {
+ TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+ m++;
+ }
+ }
+ m = dev_pager_fakelist.tqh_first;
+ TAILQ_REMOVE(&dev_pager_fakelist, m, pageq);
+
+ m->flags = PG_BUSY | PG_CLEAN | PG_FAKE | PG_FICTITIOUS;
+
+ m->wire_count = 1;
+ m->phys_addr = paddr;
+
+ return(m);
+}
+
+static void
+dev_pager_putfake(m)
+ vm_page_t m;
+{
+#ifdef DIAGNOSTIC
+ if (!(m->flags & PG_FICTITIOUS))
+ panic("dev_pager_putfake: bad page");
+#endif
+ TAILQ_INSERT_TAIL(&dev_pager_fakelist, m, pageq);
+}
diff --git a/sys/vm/device_pager.h b/sys/vm/device_pager.h
new file mode 100644
index 0000000..88406229
--- /dev/null
+++ b/sys/vm/device_pager.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)device_pager.h 8.3 (Berkeley) 12/13/93
+ */
+
+#ifndef _DEVICE_PAGER_
+#define _DEVICE_PAGER_ 1
+
+/*
+ * Device pager private data.
+ */
+struct devpager {
+ struct pglist devp_pglist; /* list of pages allocated */
+ vm_object_t devp_object; /* object representing this device */
+};
+typedef struct devpager *dev_pager_t;
+
+#endif /* _DEVICE_PAGER_ */
diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c
new file mode 100644
index 0000000..c4fa052
--- /dev/null
+++ b/sys/vm/kern_lock.c
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_lock.c 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Locking primitives implementation
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+/* XXX */
+#include <sys/proc.h>
+typedef int *thread_t;
+#define current_thread() ((thread_t)&curproc->p_thread)
+/* XXX */
+
+#if NCPUS > 1
+
+/*
+ * Module: lock
+ * Function:
+ * Provide reader/writer sychronization.
+ * Implementation:
+ * Simple interlock on a bit. Readers first interlock
+ * increment the reader count, then let go. Writers hold
+ * the interlock (thus preventing further readers), and
+ * wait for already-accepted readers to go away.
+ */
+
+/*
+ * The simple-lock routines are the primitives out of which
+ * the lock package is built. The implementation is left
+ * to the machine-dependent code.
+ */
+
+#ifdef notdef
+/*
+ * A sample implementation of simple locks.
+ * assumes:
+ * boolean_t test_and_set(boolean_t *)
+ * indivisibly sets the boolean to TRUE
+ * and returns its old value
+ * and that setting a boolean to FALSE is indivisible.
+ */
+/*
+ * simple_lock_init initializes a simple lock. A simple lock
+ * may only be used for exclusive locks.
+ */
+
+void simple_lock_init(l)
+ simple_lock_t l;
+{
+ *(boolean_t *)l = FALSE;
+}
+
+void simple_lock(l)
+ simple_lock_t l;
+{
+ while (test_and_set((boolean_t *)l))
+ continue;
+}
+
+void simple_unlock(l)
+ simple_lock_t l;
+{
+ *(boolean_t *)l = FALSE;
+}
+
+boolean_t simple_lock_try(l)
+ simple_lock_t l;
+{
+ return (!test_and_set((boolean_t *)l));
+}
+#endif /* notdef */
+#endif /* NCPUS > 1 */
+
+#if NCPUS > 1
+int lock_wait_time = 100;
+#else /* NCPUS > 1 */
+
+ /*
+ * It is silly to spin on a uni-processor as if we
+ * thought something magical would happen to the
+ * want_write bit while we are executing.
+ */
+int lock_wait_time = 0;
+#endif /* NCPUS > 1 */
+
+
+/*
+ * Routine: lock_init
+ * Function:
+ * Initialize a lock; required before use.
+ * Note that clients declare the "struct lock"
+ * variables and then initialize them, rather
+ * than getting a new one from this module.
+ */
+void lock_init(l, can_sleep)
+ lock_t l;
+ boolean_t can_sleep;
+{
+ bzero(l, sizeof(lock_data_t));
+ simple_lock_init(&l->interlock);
+ l->want_write = FALSE;
+ l->want_upgrade = FALSE;
+ l->read_count = 0;
+ l->can_sleep = can_sleep;
+ l->thread = (char *)-1; /* XXX */
+ l->recursion_depth = 0;
+}
+
+void lock_sleepable(l, can_sleep)
+ lock_t l;
+ boolean_t can_sleep;
+{
+ simple_lock(&l->interlock);
+ l->can_sleep = can_sleep;
+ simple_unlock(&l->interlock);
+}
+
+
+/*
+ * Sleep locks. These use the same data structure and algorithm
+ * as the spin locks, but the process sleeps while it is waiting
+ * for the lock. These work on uniprocessor systems.
+ */
+
+void lock_write(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return;
+ }
+
+ /*
+ * Try to acquire the want_write bit.
+ */
+ while (l->want_write) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && l->want_write)
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && l->want_write) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+ l->want_write = TRUE;
+
+ /* Wait for readers (and upgrades) to finish */
+
+ while ((l->read_count != 0) || l->want_upgrade) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && (l->read_count != 0 ||
+ l->want_upgrade))
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && (l->read_count != 0 || l->want_upgrade)) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+ simple_unlock(&l->interlock);
+}
+
+void lock_done(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ if (l->read_count != 0)
+ l->read_count--;
+ else
+ if (l->recursion_depth != 0)
+ l->recursion_depth--;
+ else
+ if (l->want_upgrade)
+ l->want_upgrade = FALSE;
+ else
+ l->want_write = FALSE;
+
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+ simple_unlock(&l->interlock);
+}
+
+void lock_read(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return;
+ }
+
+ while (l->want_write || l->want_upgrade) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && (l->want_write || l->want_upgrade))
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && (l->want_write || l->want_upgrade)) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+
+ l->read_count++;
+ simple_unlock(&l->interlock);
+}
+
+/*
+ * Routine: lock_read_to_write
+ * Function:
+ * Improves a read-only lock to one with
+ * write permission. If another reader has
+ * already requested an upgrade to a write lock,
+ * no lock is held upon return.
+ *
+ * Returns TRUE if the upgrade *failed*.
+ */
+boolean_t lock_read_to_write(l)
+ register lock_t l;
+{
+ register int i;
+
+ simple_lock(&l->interlock);
+
+ l->read_count--;
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock.
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ if (l->want_upgrade) {
+ /*
+ * Someone else has requested upgrade.
+ * Since we've released a read lock, wake
+ * him up.
+ */
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+
+ simple_unlock(&l->interlock);
+ return (TRUE);
+ }
+
+ l->want_upgrade = TRUE;
+
+ while (l->read_count != 0) {
+ if ((i = lock_wait_time) > 0) {
+ simple_unlock(&l->interlock);
+ while (--i > 0 && l->read_count != 0)
+ continue;
+ simple_lock(&l->interlock);
+ }
+
+ if (l->can_sleep && l->read_count != 0) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+ }
+
+ simple_unlock(&l->interlock);
+ return (FALSE);
+}
+
+void lock_write_to_read(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ l->read_count++;
+ if (l->recursion_depth != 0)
+ l->recursion_depth--;
+ else
+ if (l->want_upgrade)
+ l->want_upgrade = FALSE;
+ else
+ l->want_write = FALSE;
+
+ if (l->waiting) {
+ l->waiting = FALSE;
+ thread_wakeup((int) l);
+ }
+
+ simple_unlock(&l->interlock);
+}
+
+
+/*
+ * Routine: lock_try_write
+ * Function:
+ * Tries to get a write lock.
+ *
+ * Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_write(l)
+ register lock_t l;
+{
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_write || l->want_upgrade || l->read_count) {
+ /*
+ * Can't get lock.
+ */
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ /*
+ * Have lock.
+ */
+
+ l->want_write = TRUE;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Routine: lock_try_read
+ * Function:
+ * Tries to get a read lock.
+ *
+ * Returns FALSE if the lock is not held on return.
+ */
+
+boolean_t lock_try_read(l)
+ register lock_t l;
+{
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_write || l->want_upgrade) {
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+
+ l->read_count++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Routine: lock_try_read_to_write
+ * Function:
+ * Improves a read-only lock to one with
+ * write permission. If another reader has
+ * already requested an upgrade to a write lock,
+ * the read lock is still held upon return.
+ *
+ * Returns FALSE if the upgrade *failed*.
+ */
+boolean_t lock_try_read_to_write(l)
+ register lock_t l;
+{
+
+ simple_lock(&l->interlock);
+
+ if (((thread_t)l->thread) == current_thread()) {
+ /*
+ * Recursive lock
+ */
+ l->read_count--;
+ l->recursion_depth++;
+ simple_unlock(&l->interlock);
+ return(TRUE);
+ }
+
+ if (l->want_upgrade) {
+ simple_unlock(&l->interlock);
+ return(FALSE);
+ }
+ l->want_upgrade = TRUE;
+ l->read_count--;
+
+ while (l->read_count != 0) {
+ l->waiting = TRUE;
+ thread_sleep((int) l, &l->interlock, FALSE);
+ simple_lock(&l->interlock);
+ }
+
+ simple_unlock(&l->interlock);
+ return(TRUE);
+}
+
+/*
+ * Allow a process that has a lock for write to acquire it
+ * recursively (for read, write, or update).
+ */
+void lock_set_recursive(l)
+ lock_t l;
+{
+ simple_lock(&l->interlock);
+ if (!l->want_write) {
+ panic("lock_set_recursive: don't have write lock");
+ }
+ l->thread = (char *) current_thread();
+ simple_unlock(&l->interlock);
+}
+
+/*
+ * Prevent a lock from being re-acquired.
+ */
+void lock_clear_recursive(l)
+ lock_t l;
+{
+ simple_lock(&l->interlock);
+ if (((thread_t) l->thread) != current_thread()) {
+ panic("lock_clear_recursive: wrong thread");
+ }
+ if (l->recursion_depth == 0)
+ l->thread = (char *)-1; /* XXX */
+ simple_unlock(&l->interlock);
+}
diff --git a/sys/vm/lock.h b/sys/vm/lock.h
new file mode 100644
index 0000000..26bed1f
--- /dev/null
+++ b/sys/vm/lock.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lock.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Locking primitives definitions
+ */
+
+#ifndef _LOCK_H_
+#define _LOCK_H_
+
+#define NCPUS 1 /* XXX */
+
+/*
+ * A simple spin lock.
+ */
+
+struct slock {
+ int lock_data; /* in general 1 bit is sufficient */
+};
+
+typedef struct slock simple_lock_data_t;
+typedef struct slock *simple_lock_t;
+
+/*
+ * The general lock structure. Provides for multiple readers,
+ * upgrading from read to write, and sleeping until the lock
+ * can be gained.
+ */
+
+struct lock {
+#ifdef vax
+ /*
+ * Efficient VAX implementation -- see field description below.
+ */
+ unsigned int read_count:16,
+ want_upgrade:1,
+ want_write:1,
+ waiting:1,
+ can_sleep:1,
+ :0;
+
+ simple_lock_data_t interlock;
+#else /* vax */
+#ifdef ns32000
+ /*
+ * Efficient ns32000 implementation --
+ * see field description below.
+ */
+ simple_lock_data_t interlock;
+ unsigned int read_count:16,
+ want_upgrade:1,
+ want_write:1,
+ waiting:1,
+ can_sleep:1,
+ :0;
+
+#else /* ns32000 */
+ /* Only the "interlock" field is used for hardware exclusion;
+ * other fields are modified with normal instructions after
+ * acquiring the interlock bit.
+ */
+ simple_lock_data_t
+ interlock; /* Interlock for remaining fields */
+ boolean_t want_write; /* Writer is waiting, or locked for write */
+ boolean_t want_upgrade; /* Read-to-write upgrade waiting */
+ boolean_t waiting; /* Someone is sleeping on lock */
+ boolean_t can_sleep; /* Can attempts to lock go to sleep */
+ int read_count; /* Number of accepted readers */
+#endif /* ns32000 */
+#endif /* vax */
+ char *thread; /* Thread that has lock, if recursive locking allowed */
+ /* (should be thread_t, but but we then have mutually
+ recursive definitions) */
+ int recursion_depth;/* Depth of recursion */
+};
+
+typedef struct lock lock_data_t;
+typedef struct lock *lock_t;
+
+#if NCPUS > 1
+__BEGIN_DECLS
+void simple_lock __P((simple_lock_t));
+void simple_lock_init __P((simple_lock_t));
+boolean_t simple_lock_try __P((simple_lock_t));
+void simple_unlock __P((simple_lock_t));
+__END_DECLS
+#else /* No multiprocessor locking is necessary. */
+#define simple_lock(l)
+#define simple_lock_init(l)
+#define simple_lock_try(l) (1) /* Always succeeds. */
+#define simple_unlock(l)
+#endif
+
+/* Sleep locks must work even if no multiprocessing. */
+
+#define lock_read_done(l) lock_done(l)
+#define lock_write_done(l) lock_done(l)
+
+void lock_clear_recursive __P((lock_t));
+void lock_done __P((lock_t));
+void lock_init __P((lock_t, boolean_t));
+void lock_read __P((lock_t));
+boolean_t lock_read_to_write __P((lock_t));
+void lock_set_recursive __P((lock_t));
+void lock_sleepable __P((lock_t, boolean_t));
+boolean_t lock_try_read __P((lock_t));
+boolean_t lock_try_read_to_write __P((lock_t));
+boolean_t lock_try_write __P((lock_t));
+void lock_write __P((lock_t));
+void lock_write_to_read __P((lock_t));
+#endif /* !_LOCK_H_ */
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
new file mode 100644
index 0000000..63a83c9
--- /dev/null
+++ b/sys/vm/pmap.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)pmap.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Machine address mapping definitions -- machine-independent
+ * section. [For machine-dependent section, see "machine/pmap.h".]
+ */
+
+#ifndef _PMAP_VM_
+#define _PMAP_VM_
+
+/*
+ * Each machine dependent implementation is expected to
+ * keep certain statistics. They may do this anyway they
+ * so choose, but are expected to return the statistics
+ * in the following structure.
+ */
+struct pmap_statistics {
+ long resident_count; /* # of pages mapped (total)*/
+ long wired_count; /* # of pages wired */
+};
+typedef struct pmap_statistics *pmap_statistics_t;
+
+#include <machine/pmap.h>
+
+#ifdef KERNEL
+__BEGIN_DECLS
+void *pmap_bootstrap_alloc __P((int));
+void pmap_bootstrap( /* machine dependent */ );
+void pmap_change_wiring __P((pmap_t, vm_offset_t, boolean_t));
+void pmap_clear_modify __P((vm_offset_t pa));
+void pmap_clear_reference __P((vm_offset_t pa));
+void pmap_collect __P((pmap_t));
+void pmap_copy __P((pmap_t,
+ pmap_t, vm_offset_t, vm_size_t, vm_offset_t));
+void pmap_copy_page __P((vm_offset_t, vm_offset_t));
+pmap_t pmap_create __P((vm_size_t));
+void pmap_destroy __P((pmap_t));
+void pmap_enter __P((pmap_t,
+ vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+vm_offset_t pmap_extract __P((pmap_t, vm_offset_t));
+void pmap_init __P((vm_offset_t, vm_offset_t));
+boolean_t pmap_is_modified __P((vm_offset_t pa));
+boolean_t pmap_is_referenced __P((vm_offset_t pa));
+vm_offset_t pmap_map __P((vm_offset_t, vm_offset_t, vm_offset_t, int));
+void pmap_page_protect __P((vm_offset_t, vm_prot_t));
+void pmap_pageable __P((pmap_t,
+ vm_offset_t, vm_offset_t, boolean_t));
+vm_offset_t pmap_phys_address __P((int));
+void pmap_pinit __P((pmap_t));
+void pmap_protect __P((pmap_t,
+ vm_offset_t, vm_offset_t, vm_prot_t));
+void pmap_reference __P((pmap_t));
+void pmap_release __P((pmap_t));
+void pmap_remove __P((pmap_t, vm_offset_t, vm_offset_t));
+void pmap_update __P((void));
+void pmap_zero_page __P((vm_offset_t));
+__END_DECLS
+#endif
+
+#endif /* _PMAP_VM_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
new file mode 100644
index 0000000..a534d42
--- /dev/null
+++ b/sys/vm/swap_pager.c
@@ -0,0 +1,1833 @@
+/*
+ * Copyright (c) 1994 John S. Dyson
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
+ *
+ * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
+ */
+
+/*
+ * Quick hack to page to dedicated partition(s).
+ * TODO:
+ * Add multiprocessor locks
+ * Deal with async writes in a better fashion
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <sys/rlist.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/swap_pager.h>
+
+#ifndef NPENDINGIO
+#define NPENDINGIO 16
+#endif
+
+extern int nswbuf;
+int nswiodone;
+extern int vm_pageout_rate_limit;
+static int cleandone;
+extern int hz;
+int swap_pager_full;
+extern vm_map_t pager_map;
+extern int vm_pageout_pages_needed;
+extern int vm_swap_size;
+extern struct vnode *swapdev_vp;
+
+#define MAX_PAGEOUT_CLUSTER 8
+
+TAILQ_HEAD(swpclean, swpagerclean);
+
+typedef struct swpagerclean *swp_clean_t;
+
+struct swpagerclean {
+ TAILQ_ENTRY(swpagerclean) spc_list;
+ int spc_flags;
+ struct buf *spc_bp;
+ sw_pager_t spc_swp;
+ vm_offset_t spc_kva;
+ vm_offset_t spc_altkva;
+ int spc_count;
+ vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
+} swcleanlist [NPENDINGIO] ;
+
+
+extern vm_map_t kernel_map;
+
+/* spc_flags values */
+#define SPC_ERROR 0x01
+
+#define SWB_EMPTY (-1)
+
+void swap_pager_init(void);
+vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t);
+void swap_pager_dealloc(vm_pager_t);
+boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t);
+boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t);
+int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int);
+void swap_pager_iodone(struct buf *);
+boolean_t swap_pager_clean();
+
+extern struct pagerops swappagerops;
+
+struct swpclean swap_pager_done; /* list of compileted page cleans */
+struct swpclean swap_pager_inuse; /* list of pending page cleans */
+struct swpclean swap_pager_free; /* list of free pager clean structs */
+struct pagerlst swap_pager_list; /* list of "named" anon regions */
+struct pagerlst swap_pager_un_list; /* list of "unnamed" anon pagers */
+
+#define SWAP_FREE_NEEDED 0x1 /* need a swap block */
+int swap_pager_needflags;
+struct rlist *swapfrag;
+
+struct pagerlst *swp_qs[]={
+ &swap_pager_list, &swap_pager_un_list, (struct pagerlst *) 0
+};
+
+int swap_pager_putmulti();
+
+struct pagerops swappagerops = {
+ swap_pager_init,
+ swap_pager_alloc,
+ swap_pager_dealloc,
+ swap_pager_getpage,
+ swap_pager_getmulti,
+ swap_pager_putpage,
+ swap_pager_putmulti,
+ swap_pager_haspage
+};
+
+extern int nswbuf;
+
+int npendingio = NPENDINGIO;
+int pendingiowait;
+int require_swap_init;
+void swap_pager_finish();
+int dmmin, dmmax;
+extern int vm_page_count;
+
+struct buf * getpbuf() ;
+void relpbuf(struct buf *bp) ;
+
+static inline void swapsizecheck() {
+ if( vm_swap_size < 128*btodb(PAGE_SIZE)) {
+ if( swap_pager_full)
+ printf("swap_pager: out of space\n");
+ swap_pager_full = 1;
+ } else if( vm_swap_size > 192*btodb(PAGE_SIZE))
+ swap_pager_full = 0;
+}
+
+void
+swap_pager_init()
+{
+ extern int dmmin, dmmax;
+
+ dfltpagerops = &swappagerops;
+
+ TAILQ_INIT(&swap_pager_list);
+ TAILQ_INIT(&swap_pager_un_list);
+
+ /*
+ * Initialize clean lists
+ */
+ TAILQ_INIT(&swap_pager_inuse);
+ TAILQ_INIT(&swap_pager_done);
+ TAILQ_INIT(&swap_pager_free);
+
+ require_swap_init = 1;
+
+ /*
+ * Calculate the swap allocation constants.
+ */
+
+ dmmin = CLBYTES/DEV_BSIZE;
+ dmmax = btodb(SWB_NPAGES*PAGE_SIZE)*2;
+
+}
+
+/*
+ * Allocate a pager structure and associated resources.
+ * Note that if we are called from the pageout daemon (handle == NULL)
+ * we should not wait for memory as it could resulting in deadlock.
+ */
+vm_pager_t
+swap_pager_alloc(handle, size, prot, offset)
+ caddr_t handle;
+ register vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t offset;
+{
+ register vm_pager_t pager;
+ register sw_pager_t swp;
+ int waitok;
+ int i,j;
+
+ if (require_swap_init) {
+ swp_clean_t spc;
+ struct buf *bp;
+ /*
+ * kva's are allocated here so that we dont need to keep
+ * doing kmem_alloc pageables at runtime
+ */
+ for (i = 0, spc = swcleanlist; i < npendingio ; i++, spc++) {
+ spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE);
+ if (!spc->spc_kva) {
+ break;
+ }
+ spc->spc_bp = malloc( sizeof( *bp), M_TEMP, M_NOWAIT);
+ if (!spc->spc_bp) {
+ kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
+ break;
+ }
+ spc->spc_flags = 0;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ }
+ require_swap_init = 0;
+ if( size == 0)
+ return(NULL);
+ }
+
+ /*
+ * If this is a "named" anonymous region, look it up and
+ * return the appropriate pager if it exists.
+ */
+ if (handle) {
+ pager = vm_pager_lookup(&swap_pager_list, handle);
+ if (pager != NULL) {
+ /*
+ * Use vm_object_lookup to gain a reference
+ * to the object and also to remove from the
+ * object cache.
+ */
+ if (vm_object_lookup(pager) == NULL)
+ panic("swap_pager_alloc: bad object");
+ return(pager);
+ }
+ }
+
+ if (swap_pager_full) {
+ return(NULL);
+ }
+
+ /*
+ * Pager doesn't exist, allocate swap management resources
+ * and initialize.
+ */
+ waitok = handle ? M_WAITOK : M_NOWAIT;
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
+ if (pager == NULL)
+ return(NULL);
+ swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
+ if (swp == NULL) {
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ size = round_page(size);
+ swp->sw_osize = size;
+ swp->sw_nblocks = (btodb(size) + btodb(SWB_NPAGES * PAGE_SIZE) - 1) / btodb(SWB_NPAGES*PAGE_SIZE);
+ swp->sw_blocks = (sw_blk_t)
+ malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
+ M_VMPGDATA, waitok);
+ if (swp->sw_blocks == NULL) {
+ free((caddr_t)swp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ swp->sw_blocks[i].swb_valid = 0;
+ swp->sw_blocks[i].swb_locked = 0;
+ for (j = 0; j < SWB_NPAGES; j++)
+ swp->sw_blocks[i].swb_block[j] = SWB_EMPTY;
+ }
+
+ swp->sw_poip = 0;
+ if (handle) {
+ vm_object_t object;
+
+ swp->sw_flags = SW_NAMED;
+ TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
+ /*
+ * Consistant with other pagers: return with object
+ * referenced. Can't do this with handle == NULL
+ * since it might be the pageout daemon calling.
+ */
+ object = vm_object_allocate(size);
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, 0, FALSE);
+ } else {
+ swp->sw_flags = 0;
+ TAILQ_INSERT_TAIL(&swap_pager_un_list, pager, pg_list);
+ }
+ pager->pg_handle = handle;
+ pager->pg_ops = &swappagerops;
+ pager->pg_type = PG_SWAP;
+ pager->pg_data = (caddr_t)swp;
+
+ return(pager);
+}
+
+/*
+ * returns disk block associated with pager and offset
+ * additionally, as a side effect returns a flag indicating
+ * if the block has been written
+ */
+
+static int *
+swap_pager_diskaddr(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int *valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ if (valid)
+ *valid = 0;
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (valid)
+ *valid = swb->swb_valid & (1<<ix);
+ return &swb->swb_block[ix];
+}
+
+/*
+ * Utility routine to set the valid (written) bit for
+ * a block associated with a pager and offset
+ */
+static void
+swap_pager_setvalid(swp, offset, valid)
+ sw_pager_t swp;
+ vm_offset_t offset;
+ int valid;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks)
+ return;
+
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (valid)
+ swb->swb_valid |= (1 << ix);
+ else
+ swb->swb_valid &= ~(1 << ix);
+ return;
+}
+
+/*
+ * this routine allocates swap space with a fragmentation
+ * minimization policy.
+ */
+int
+swap_pager_getswapspace( unsigned amount, unsigned *rtval) {
+ unsigned tmpalloc;
+ unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
+ if( amount < nblocksfrag) {
+ if( rlist_alloc(&swapfrag, amount, rtval))
+ return 1;
+ if( !rlist_alloc(&swapmap, nblocksfrag, &tmpalloc))
+ return 0;
+ rlist_free( &swapfrag, tmpalloc+amount, tmpalloc + nblocksfrag - 1);
+ *rtval = tmpalloc;
+ return 1;
+ }
+ if( !rlist_alloc(&swapmap, amount, rtval))
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * this routine frees swap space with a fragmentation
+ * minimization policy.
+ */
+void
+swap_pager_freeswapspace( unsigned from, unsigned to) {
+ unsigned nblocksfrag = btodb(SWB_NPAGES*PAGE_SIZE);
+ unsigned tmpalloc;
+ if( ((to + 1) - from) >= nblocksfrag) {
+ while( (from + nblocksfrag) <= to + 1) {
+ rlist_free(&swapmap, from, from + nblocksfrag - 1);
+ from += nblocksfrag;
+ }
+ }
+ if( from >= to)
+ return;
+ rlist_free(&swapfrag, from, to);
+ while( rlist_alloc(&swapfrag, nblocksfrag, &tmpalloc)) {
+ rlist_free(&swapmap, tmpalloc, tmpalloc + nblocksfrag-1);
+ }
+}
+/*
+ * this routine frees swap blocks from a specified pager
+ */
+void
+_swap_pager_freespace(swp, start, size)
+ sw_pager_t swp;
+ vm_offset_t start;
+ vm_offset_t size;
+{
+ vm_offset_t i;
+ int s;
+
+ s = splbio();
+ for (i = start; i < round_page(start + size - 1); i += PAGE_SIZE) {
+ int valid;
+ int *addr = swap_pager_diskaddr(swp, i, &valid);
+ if (addr && *addr != SWB_EMPTY) {
+ swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
+ if( valid) {
+ vm_swap_size += btodb(PAGE_SIZE);
+ swap_pager_setvalid(swp, i, 0);
+ }
+ *addr = SWB_EMPTY;
+ }
+ }
+ swapsizecheck();
+ splx(s);
+}
+
+void
+swap_pager_freespace(pager, start, size)
+ vm_pager_t pager;
+ vm_offset_t start;
+ vm_offset_t size;
+{
+ _swap_pager_freespace((sw_pager_t) pager->pg_data, start, size);
+}
+
+/*
+ * swap_pager_reclaim frees up over-allocated space from all pagers
+ * this eliminates internal fragmentation due to allocation of space
+ * for segments that are never swapped to. It has been written so that
+ * it does not block until the rlist_free operation occurs; it keeps
+ * the queues consistant.
+ */
+
+/*
+ * Maximum number of blocks (pages) to reclaim per pass
+ */
+#define MAXRECLAIM 256
+
+void
+swap_pager_reclaim()
+{
+ vm_pager_t p;
+ sw_pager_t swp;
+ int i, j, k;
+ int s;
+ int reclaimcount;
+ static int reclaims[MAXRECLAIM];
+ static int in_reclaim;
+
+/*
+ * allow only one process to be in the swap_pager_reclaim subroutine
+ */
+ s = splbio();
+ if (in_reclaim) {
+ tsleep((caddr_t) &in_reclaim, PSWP, "swrclm", 0);
+ splx(s);
+ return;
+ }
+ in_reclaim = 1;
+ reclaimcount = 0;
+
+ /* for each pager queue */
+ for (k = 0; swp_qs[k]; k++) {
+
+ p = swp_qs[k]->tqh_first;
+ while (p && (reclaimcount < MAXRECLAIM)) {
+
+ /*
+ * see if any blocks associated with a pager has been
+ * allocated but not used (written)
+ */
+ swp = (sw_pager_t) p->pg_data;
+ for (i = 0; i < swp->sw_nblocks; i++) {
+ sw_blk_t swb = &swp->sw_blocks[i];
+ if( swb->swb_locked)
+ continue;
+ for (j = 0; j < SWB_NPAGES; j++) {
+ if (swb->swb_block[j] != SWB_EMPTY &&
+ (swb->swb_valid & (1 << j)) == 0) {
+ reclaims[reclaimcount++] = swb->swb_block[j];
+ swb->swb_block[j] = SWB_EMPTY;
+ if (reclaimcount >= MAXRECLAIM)
+ goto rfinished;
+ }
+ }
+ }
+ p = p->pg_list.tqe_next;
+ }
+ }
+
+rfinished:
+
+/*
+ * free the blocks that have been added to the reclaim list
+ */
+ for (i = 0; i < reclaimcount; i++) {
+ swap_pager_freeswapspace(reclaims[i], reclaims[i]+btodb(PAGE_SIZE) - 1);
+ swapsizecheck();
+ wakeup((caddr_t) &in_reclaim);
+ }
+
+ splx(s);
+ in_reclaim = 0;
+ wakeup((caddr_t) &in_reclaim);
+}
+
+
+/*
+ * swap_pager_copy copies blocks from one pager to another and
+ * destroys the source pager
+ */
+
+void
+swap_pager_copy(srcpager, srcoffset, dstpager, dstoffset, offset)
+ vm_pager_t srcpager;
+ vm_offset_t srcoffset;
+ vm_pager_t dstpager;
+ vm_offset_t dstoffset;
+ vm_offset_t offset;
+{
+ sw_pager_t srcswp, dstswp;
+ vm_offset_t i;
+ int s;
+
+ srcswp = (sw_pager_t) srcpager->pg_data;
+ dstswp = (sw_pager_t) dstpager->pg_data;
+
+/*
+ * remove the source pager from the swap_pager internal queue
+ */
+ s = splbio();
+ if (srcswp->sw_flags & SW_NAMED) {
+ TAILQ_REMOVE(&swap_pager_list, srcpager, pg_list);
+ srcswp->sw_flags &= ~SW_NAMED;
+ } else {
+ TAILQ_REMOVE(&swap_pager_un_list, srcpager, pg_list);
+ }
+
+ while (srcswp->sw_poip) {
+ tsleep((caddr_t)srcswp, PVM, "spgout", 0);
+ }
+ splx(s);
+
+/*
+ * clean all of the pages that are currently active and finished
+ */
+ (void) swap_pager_clean();
+
+ s = splbio();
+/*
+ * clear source block before destination object
+ * (release allocated space)
+ */
+ for (i = 0; i < offset + srcoffset; i += PAGE_SIZE) {
+ int valid;
+ int *addr = swap_pager_diskaddr(srcswp, i, &valid);
+ if (addr && *addr != SWB_EMPTY) {
+ swap_pager_freeswapspace(*addr, *addr+btodb(PAGE_SIZE) - 1);
+ if( valid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ swapsizecheck();
+ *addr = SWB_EMPTY;
+ }
+ }
+/*
+ * transfer source to destination
+ */
+ for (i = 0; i < dstswp->sw_osize; i += PAGE_SIZE) {
+ int srcvalid, dstvalid;
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i + offset + srcoffset,
+ &srcvalid);
+ int *dstaddrp;
+ /*
+ * see if the source has space allocated
+ */
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ /*
+ * if the source is valid and the dest has no space, then
+ * copy the allocation from the srouce to the dest.
+ */
+ if (srcvalid) {
+ dstaddrp = swap_pager_diskaddr(dstswp, i + dstoffset, &dstvalid);
+ /*
+ * if the dest already has a valid block, deallocate the
+ * source block without copying.
+ */
+ if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*dstaddrp, *dstaddrp+btodb(PAGE_SIZE) - 1);
+ *dstaddrp = SWB_EMPTY;
+ }
+ if (dstaddrp && *dstaddrp == SWB_EMPTY) {
+ *dstaddrp = *srcaddrp;
+ *srcaddrp = SWB_EMPTY;
+ swap_pager_setvalid(dstswp, i + dstoffset, 1);
+ vm_swap_size -= btodb(PAGE_SIZE);
+ }
+ }
+ /*
+ * if the source is not empty at this point, then deallocate the space.
+ */
+ if (*srcaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
+ if( srcvalid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ *srcaddrp = SWB_EMPTY;
+ }
+ }
+ }
+
+/*
+ * deallocate the rest of the source object
+ */
+ for (i = dstswp->sw_osize + offset + srcoffset; i < srcswp->sw_osize; i += PAGE_SIZE) {
+ int valid;
+ int *srcaddrp = swap_pager_diskaddr(srcswp, i, &valid);
+ if (srcaddrp && *srcaddrp != SWB_EMPTY) {
+ swap_pager_freeswapspace(*srcaddrp, *srcaddrp+btodb(PAGE_SIZE) - 1);
+ if( valid)
+ vm_swap_size += btodb(PAGE_SIZE);
+ *srcaddrp = SWB_EMPTY;
+ }
+ }
+
+ swapsizecheck();
+ splx(s);
+
+ free((caddr_t)srcswp->sw_blocks, M_VMPGDATA);
+ srcswp->sw_blocks = 0;
+ free((caddr_t)srcswp, M_VMPGDATA);
+ srcpager->pg_data = 0;
+ free((caddr_t)srcpager, M_VMPAGER);
+
+ return;
+}
+
+
+void
+swap_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ register int i,j;
+ register sw_blk_t bp;
+ register sw_pager_t swp;
+ int s;
+
+ /*
+ * Remove from list right away so lookups will fail if we
+ * block for pageout completion.
+ */
+ s = splbio();
+ swp = (sw_pager_t) pager->pg_data;
+ if (swp->sw_flags & SW_NAMED) {
+ TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
+ swp->sw_flags &= ~SW_NAMED;
+ } else {
+ TAILQ_REMOVE(&swap_pager_un_list, pager, pg_list);
+ }
+ /*
+ * Wait for all pageouts to finish and remove
+ * all entries from cleaning list.
+ */
+
+ while (swp->sw_poip) {
+ tsleep((caddr_t)swp, PVM, "swpout", 0);
+ }
+ splx(s);
+
+
+ (void) swap_pager_clean();
+
+ /*
+ * Free left over swap blocks
+ */
+ s = splbio();
+ for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) {
+ for (j = 0; j < SWB_NPAGES; j++)
+ if (bp->swb_block[j] != SWB_EMPTY) {
+ swap_pager_freeswapspace((unsigned)bp->swb_block[j],
+ (unsigned)bp->swb_block[j] + btodb(PAGE_SIZE) - 1);
+ if( bp->swb_valid & (1<<j))
+ vm_swap_size += btodb(PAGE_SIZE);
+ bp->swb_block[j] = SWB_EMPTY;
+ }
+ }
+ splx(s);
+ swapsizecheck();
+
+ /*
+ * Free swap management resources
+ */
+ free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+ swp->sw_blocks = 0;
+ free((caddr_t)swp, M_VMPGDATA);
+ pager->pg_data = 0;
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+/*
+ * swap_pager_getmulti can get multiple pages.
+ */
+int
+swap_pager_getmulti(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+ if( reqpage >= count)
+ panic("swap_pager_getmulti: reqpage >= count\n");
+ return swap_pager_input((sw_pager_t) pager->pg_data, m, count, reqpage);
+}
+
+/*
+ * swap_pager_getpage gets individual pages
+ */
+int
+swap_pager_getpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+ vm_page_t marray[1];
+
+ marray[0] = m;
+ return swap_pager_input((sw_pager_t)pager->pg_data, marray, 1, 0);
+}
+
+int
+swap_pager_putmulti(pager, m, c, sync, rtvals)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int c;
+ boolean_t sync;
+ int *rtvals;
+{
+ int flags;
+
+ if (pager == NULL) {
+ (void) swap_pager_clean();
+ return VM_PAGER_OK;
+ }
+
+ flags = B_WRITE;
+ if (!sync)
+ flags |= B_ASYNC;
+
+ return swap_pager_output((sw_pager_t)pager->pg_data, m, c, flags, rtvals);
+}
+
+/*
+ * swap_pager_putpage writes individual pages
+ */
+int
+swap_pager_putpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+ int flags;
+ vm_page_t marray[1];
+ int rtvals[1];
+
+
+ if (pager == NULL) {
+ (void) swap_pager_clean();
+ return VM_PAGER_OK;
+ }
+
+ marray[0] = m;
+ flags = B_WRITE;
+ if (!sync)
+ flags |= B_ASYNC;
+
+ swap_pager_output((sw_pager_t)pager->pg_data, marray, 1, flags, rtvals);
+
+ return rtvals[0];
+}
+
+static inline int
+const swap_pager_block_index(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return (offset / (SWB_NPAGES*PAGE_SIZE));
+}
+
+static inline int
+const swap_pager_block_offset(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ return ((offset % (PAGE_SIZE*SWB_NPAGES)) / PAGE_SIZE);
+}
+
+/*
+ * _swap_pager_haspage returns TRUE if the pager has data that has
+ * been written out.
+ */
+static boolean_t
+_swap_pager_haspage(swp, offset)
+ sw_pager_t swp;
+ vm_offset_t offset;
+{
+ register sw_blk_t swb;
+ int ix;
+
+ ix = offset / (SWB_NPAGES*PAGE_SIZE);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ ix = (offset % (SWB_NPAGES*PAGE_SIZE)) / PAGE_SIZE;
+ if (swb->swb_block[ix] != SWB_EMPTY) {
+ if (swb->swb_valid & (1 << ix))
+ return TRUE;
+ }
+
+ return(FALSE);
+}
+
+/*
+ * swap_pager_haspage is the externally accessible version of
+ * _swap_pager_haspage above. this routine takes a vm_pager_t
+ * for an argument instead of sw_pager_t.
+ */
+boolean_t
+swap_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ return _swap_pager_haspage((sw_pager_t) pager->pg_data, offset);
+}
+
+/*
+ * swap_pager_freepage is a convienience routine that clears the busy
+ * bit and deallocates a page.
+ */
+static void
+swap_pager_freepage(m)
+ vm_page_t m;
+{
+ PAGE_WAKEUP(m);
+ vm_page_free(m);
+}
+
+/*
+ * swap_pager_ridpages is a convienience routine that deallocates all
+ * but the required page. this is usually used in error returns that
+ * need to invalidate the "extra" readahead pages.
+ */
+static void
+swap_pager_ridpages(m, count, reqpage)
+ vm_page_t *m;
+ int count;
+ int reqpage;
+{
+ int i;
+ for (i = 0; i < count; i++)
+ if (i != reqpage)
+ swap_pager_freepage(m[i]);
+}
+
+int swapwritecount=0;
+
+/*
+ * swap_pager_iodone1 is the completion routine for both reads and async writes
+ */
+void
+swap_pager_iodone1(bp)
+ struct buf *bp;
+{
+ bp->b_flags |= B_DONE;
+ bp->b_flags &= ~B_ASYNC;
+ wakeup((caddr_t)bp);
+/*
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+*/
+}
+
+
+int
+swap_pager_input(swp, m, count, reqpage)
+ register sw_pager_t swp;
+ vm_page_t *m;
+ int count, reqpage;
+{
+ register struct buf *bp;
+ sw_blk_t swb[count];
+ register int s;
+ int i;
+ boolean_t rv;
+ vm_offset_t kva, off[count];
+ swp_clean_t spc;
+ vm_offset_t paging_offset;
+ vm_object_t object;
+ int reqaddr[count];
+
+ int first, last;
+ int failed;
+ int reqdskregion;
+
+ object = m[reqpage]->object;
+ paging_offset = object->paging_offset;
+ /*
+ * First determine if the page exists in the pager if this is
+ * a sync read. This quickly handles cases where we are
+ * following shadow chains looking for the top level object
+ * with the page.
+ */
+ if (swp->sw_blocks == NULL) {
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+
+ for(i = 0; i < count; i++) {
+ vm_offset_t foff = m[i]->offset + paging_offset;
+ int ix = swap_pager_block_index(swp, foff);
+ if (ix >= swp->sw_nblocks) {
+ int j;
+ if( i <= reqpage) {
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+ for(j = i; j < count; j++) {
+ swap_pager_freepage(m[j]);
+ }
+ count = i;
+ break;
+ }
+
+ swb[i] = &swp->sw_blocks[ix];
+ off[i] = swap_pager_block_offset(swp, foff);
+ reqaddr[i] = swb[i]->swb_block[off[i]];
+ }
+
+ /* make sure that our required input request is existant */
+
+ if (reqaddr[reqpage] == SWB_EMPTY ||
+ (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
+ swap_pager_ridpages(m, count, reqpage);
+ return(VM_PAGER_FAIL);
+ }
+
+
+ reqdskregion = reqaddr[reqpage] / dmmax;
+
+ /*
+ * search backwards for the first contiguous page to transfer
+ */
+ failed = 0;
+ first = 0;
+ for (i = reqpage - 1; i >= 0; --i) {
+ if ( failed || (reqaddr[i] == SWB_EMPTY) ||
+ (swb[i]->swb_valid & (1 << off[i])) == 0 ||
+ (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
+ ((reqaddr[i] / dmmax) != reqdskregion)) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ if (first == 0)
+ first = i + 1;
+ }
+ }
+ /*
+ * search forwards for the last contiguous page to transfer
+ */
+ failed = 0;
+ last = count;
+ for (i = reqpage + 1; i < count; i++) {
+ if ( failed || (reqaddr[i] == SWB_EMPTY) ||
+ (swb[i]->swb_valid & (1 << off[i])) == 0 ||
+ (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
+ ((reqaddr[i] / dmmax) != reqdskregion)) {
+ failed = 1;
+ swap_pager_freepage(m[i]);
+ if (last == count)
+ last = i;
+ }
+ }
+
+ count = last;
+ if (first != 0) {
+ for (i = first; i < count; i++) {
+ m[i-first] = m[i];
+ reqaddr[i-first] = reqaddr[i];
+ off[i-first] = off[i];
+ }
+ count -= first;
+ reqpage -= first;
+ }
+
+ ++swb[reqpage]->swb_locked;
+
+ /*
+ * at this point:
+ * "m" is a pointer to the array of vm_page_t for paging I/O
+ * "count" is the number of vm_page_t entries represented by "m"
+ * "object" is the vm_object_t for I/O
+ * "reqpage" is the index into "m" for the page actually faulted
+ */
+
+ spc = NULL; /* we might not use an spc data structure */
+ kva = 0;
+
+ /*
+ * we allocate a new kva for transfers > 1 page
+ * but for transfers == 1 page, the swap_pager_free list contains
+ * entries that have pre-allocated kva's (for efficiency).
+ */
+ if (count > 1) {
+ kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
+ }
+
+
+ if (!kva) {
+ /*
+ * if a kva has not been allocated, we can only do a one page transfer,
+ * so we free the other pages that might have been allocated by
+ * vm_fault.
+ */
+ swap_pager_ridpages(m, count, reqpage);
+ m[0] = m[reqpage];
+ reqaddr[0] = reqaddr[reqpage];
+
+ count = 1;
+ reqpage = 0;
+ /*
+ * get a swap pager clean data structure, block until we get it
+ */
+ if (swap_pager_free.tqh_first == NULL) {
+ s = splbio();
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ while (swap_pager_free.tqh_first == NULL) {
+ swap_pager_needflags |= SWAP_FREE_NEEDED;
+ tsleep((caddr_t)&swap_pager_free,
+ PVM, "swpfre", 0);
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ }
+ splx(s);
+ }
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+ kva = spc->spc_kva;
+ }
+
+
+ /*
+ * map our page(s) into kva for input
+ */
+ for (i = 0; i < count; i++) {
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+ }
+ pmap_update();
+
+
+ /*
+ * Get a swap buffer header and perform the IO
+ */
+ if( spc) {
+ bp = spc->spc_bp;
+ bzero(bp, sizeof *bp);
+ bp->b_spc = spc;
+ } else {
+ bp = getpbuf();
+ }
+
+ s = splbio();
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = swap_pager_iodone1;
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ crhold(bp->b_rcred);
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr[0];
+ bp->b_bcount = PAGE_SIZE*count;
+ bp->b_bufsize = PAGE_SIZE*count;
+
+/*
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+*/
+ bgetvp( swapdev_vp, bp);
+
+ swp->sw_piip++;
+
+ /*
+ * perform the I/O
+ */
+ VOP_STRATEGY(bp);
+
+ /*
+ * wait for the sync I/O to complete
+ */
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "swread", 0);
+ }
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
+
+ --swp->sw_piip;
+ if (swp->sw_piip == 0)
+ wakeup((caddr_t) swp);
+
+ /*
+ * relpbuf does this, but we maintain our own buffer
+ * list also...
+ */
+ if (bp->b_vp)
+ brelvp(bp);
+
+ splx(s);
+ --swb[reqpage]->swb_locked;
+
+ /*
+ * remove the mapping for kernel virtual
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE);
+
+ if (spc) {
+ /*
+ * if we have used an spc, we need to free it.
+ */
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ }
+ } else {
+ /*
+ * free the kernel virtual addresses
+ */
+ kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE);
+ /*
+ * release the physical I/O buffer
+ */
+ relpbuf(bp);
+ /*
+ * finish up input if everything is ok
+ */
+ if( rv == VM_PAGER_OK) {
+ for (i = 0; i < count; i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ if (i != reqpage) {
+ /*
+ * whether or not to leave the page activated
+ * is up in the air, but we should put the page
+ * on a page queue somewhere. (it already is in
+ * the object).
+ * After some emperical results, it is best
+ * to deactivate the readahead pages.
+ */
+ vm_page_deactivate(m[i]);
+
+ /*
+ * just in case someone was asking for this
+ * page we now tell them that it is ok to use
+ */
+ m[i]->flags &= ~PG_FAKE;
+ PAGE_WAKEUP(m[i]);
+ }
+ }
+ if( swap_pager_full) {
+ _swap_pager_freespace( swp, m[0]->offset+paging_offset, count*PAGE_SIZE);
+ }
+ } else {
+ swap_pager_ridpages(m, count, reqpage);
+ }
+ }
+ return(rv);
+}
+
+int
+swap_pager_output(swp, m, count, flags, rtvals)
+ register sw_pager_t swp;
+ vm_page_t *m;
+ int count;
+ int flags;
+ int *rtvals;
+{
+ register struct buf *bp;
+ sw_blk_t swb[count];
+ register int s;
+ int i, j, ix;
+ boolean_t rv;
+ vm_offset_t kva, off, foff;
+ swp_clean_t spc;
+ vm_offset_t paging_offset;
+ vm_object_t object;
+ int reqaddr[count];
+ int failed;
+
+/*
+ if( count > 1)
+ printf("off: 0x%x, count: %d\n", m[0]->offset, count);
+*/
+ spc = NULL;
+
+ object = m[0]->object;
+ paging_offset = object->paging_offset;
+
+ failed = 0;
+ for(j=0;j<count;j++) {
+ foff = m[j]->offset + paging_offset;
+ ix = swap_pager_block_index(swp, foff);
+ swb[j] = 0;
+ if( swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+ rtvals[j] = VM_PAGER_FAIL;
+ failed = 1;
+ continue;
+ } else {
+ rtvals[j] = VM_PAGER_OK;
+ }
+ swb[j] = &swp->sw_blocks[ix];
+ ++swb[j]->swb_locked;
+ if( failed) {
+ rtvals[j] = VM_PAGER_FAIL;
+ continue;
+ }
+ off = swap_pager_block_offset(swp, foff);
+ reqaddr[j] = swb[j]->swb_block[off];
+ if( reqaddr[j] == SWB_EMPTY) {
+ int blk;
+ int tries;
+ int ntoget;
+ tries = 0;
+ s = splbio();
+
+ /*
+ * if any other pages have been allocated in this block, we
+ * only try to get one page.
+ */
+ for (i = 0; i < SWB_NPAGES; i++) {
+ if (swb[j]->swb_block[i] != SWB_EMPTY)
+ break;
+ }
+
+
+ ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
+ /*
+ * this code is alittle conservative, but works
+ * (the intent of this code is to allocate small chunks
+ * for small objects)
+ */
+ if( (m[j]->offset == 0) && (ntoget*PAGE_SIZE > object->size)) {
+ ntoget = (object->size + (PAGE_SIZE-1))/PAGE_SIZE;
+ }
+
+retrygetspace:
+ if (!swap_pager_full && ntoget > 1 &&
+ swap_pager_getswapspace(ntoget * btodb(PAGE_SIZE), &blk)) {
+
+ for (i = 0; i < ntoget; i++) {
+ swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
+ swb[j]->swb_valid = 0;
+ }
+
+ reqaddr[j] = swb[j]->swb_block[off];
+ } else if (!swap_pager_getswapspace(btodb(PAGE_SIZE),
+ &swb[j]->swb_block[off])) {
+ /*
+ * if the allocation has failed, we try to reclaim space and
+ * retry.
+ */
+ if (++tries == 1) {
+ swap_pager_reclaim();
+ goto retrygetspace;
+ }
+ rtvals[j] = VM_PAGER_AGAIN;
+ failed = 1;
+ } else {
+ reqaddr[j] = swb[j]->swb_block[off];
+ swb[j]->swb_valid &= ~(1<<off);
+ }
+ splx(s);
+ }
+ }
+
+ /*
+ * search forwards for the last contiguous page to transfer
+ */
+ failed = 0;
+ for (i = 0; i < count; i++) {
+ if( failed || (reqaddr[i] != reqaddr[0] + i*btodb(PAGE_SIZE)) ||
+ (reqaddr[i] / dmmax) != (reqaddr[0] / dmmax) ||
+ (rtvals[i] != VM_PAGER_OK)) {
+ failed = 1;
+ if( rtvals[i] == VM_PAGER_OK)
+ rtvals[i] = VM_PAGER_AGAIN;
+ }
+ }
+
+ for(i = 0; i < count; i++) {
+ if( rtvals[i] != VM_PAGER_OK) {
+ if( swb[i])
+ --swb[i]->swb_locked;
+ }
+ }
+
+ for(i = 0; i < count; i++)
+ if( rtvals[i] != VM_PAGER_OK)
+ break;
+
+ if( i == 0) {
+ return VM_PAGER_AGAIN;
+ }
+
+ count = i;
+ for(i=0;i<count;i++) {
+ if( reqaddr[i] == SWB_EMPTY)
+ printf("I/O to empty block????\n");
+ }
+
+ /*
+ */
+
+ /*
+ * For synchronous writes, we clean up
+ * all completed async pageouts.
+ */
+ if ((flags & B_ASYNC) == 0) {
+ swap_pager_clean();
+ }
+
+ kva = 0;
+
+ /*
+ * we allocate a new kva for transfers > 1 page
+ * but for transfers == 1 page, the swap_pager_free list contains
+ * entries that have pre-allocated kva's (for efficiency).
+ */
+ if ( count > 1) {
+ kva = kmem_alloc_pageable(pager_map, count*PAGE_SIZE);
+ if( !kva) {
+ for (i = 0; i < count; i++) {
+ if( swb[i])
+ --swb[i]->swb_locked;
+ rtvals[i] = VM_PAGER_AGAIN;
+ }
+ return VM_PAGER_AGAIN;
+ }
+ }
+
+ /*
+ * get a swap pager clean data structure, block until we get it
+ */
+ if (swap_pager_free.tqh_first == NULL) {
+/*
+ if (flags & B_ASYNC) {
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_AGAIN;
+ if( swb[i])
+ --swb[i]->swb_locked;
+ }
+ return VM_PAGER_AGAIN;
+ }
+*/
+
+ s = splbio();
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ while (swap_pager_free.tqh_first == NULL) {
+ swap_pager_needflags |= SWAP_FREE_NEEDED;
+ tsleep((caddr_t)&swap_pager_free,
+ PVM, "swpfre", 0);
+ if( curproc == pageproc)
+ (void) swap_pager_clean();
+ else
+ wakeup((caddr_t) &vm_pages_needed);
+ }
+ splx(s);
+ }
+
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+ if( !kva) {
+ kva = spc->spc_kva;
+ spc->spc_altkva = 0;
+ } else {
+ spc->spc_altkva = kva;
+ }
+
+ /*
+ * map our page(s) into kva for I/O
+ */
+ for (i = 0; i < count; i++) {
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+ }
+ pmap_update();
+
+ /*
+ * get the base I/O offset into the swap file
+ */
+ for(i=0;i<count;i++) {
+ foff = m[i]->offset + paging_offset;
+ off = swap_pager_block_offset(swp, foff);
+ /*
+ * if we are setting the valid bit anew,
+ * then diminish the swap free space
+ */
+ if( (swb[i]->swb_valid & (1 << off)) == 0)
+ vm_swap_size -= btodb(PAGE_SIZE);
+
+ /*
+ * set the valid bit
+ */
+ swb[i]->swb_valid |= (1 << off);
+ /*
+ * and unlock the data structure
+ */
+ --swb[i]->swb_locked;
+ }
+
+ s = splbio();
+ /*
+ * Get a swap buffer header and perform the IO
+ */
+ bp = spc->spc_bp;
+ bzero(bp, sizeof *bp);
+ bp->b_spc = spc;
+
+ bp->b_flags = B_BUSY;
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ crhold(bp->b_rcred);
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr[0];
+ bgetvp( swapdev_vp, bp);
+/*
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+*/
+ bp->b_bcount = PAGE_SIZE*count;
+ bp->b_bufsize = PAGE_SIZE*count;
+ swapdev_vp->v_numoutput++;
+
+ /*
+ * If this is an async write we set up additional buffer fields
+ * and place a "cleaning" entry on the inuse queue.
+ */
+ if ( flags & B_ASYNC ) {
+ spc->spc_flags = 0;
+ spc->spc_swp = swp;
+ for(i=0;i<count;i++)
+ spc->spc_m[i] = m[i];
+ spc->spc_count = count;
+ /*
+ * the completion routine for async writes
+ */
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone;
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bp->b_bcount;
+ swp->sw_poip++;
+ TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
+ } else {
+ swp->sw_poip++;
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone1;
+ }
+ /*
+ * perform the I/O
+ */
+ VOP_STRATEGY(bp);
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC ) {
+ if ((bp->b_flags & B_DONE) == B_DONE) {
+ swap_pager_clean();
+ }
+ splx(s);
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_PEND;
+ }
+ return VM_PAGER_PEND;
+ }
+
+ /*
+ * wait for the sync I/O to complete
+ */
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "swwrt", 0);
+ }
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_CALL|B_DONE);
+
+ --swp->sw_poip;
+ if (swp->sw_poip == 0)
+ wakeup((caddr_t) swp);
+
+ if (bp->b_vp)
+ brelvp(bp);
+
+ splx(s);
+
+ /*
+ * remove the mapping for kernel virtual
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + count * PAGE_SIZE);
+
+ /*
+ * if we have written the page, then indicate that the page
+ * is clean.
+ */
+ if (rv == VM_PAGER_OK) {
+ for(i=0;i<count;i++) {
+ if( rtvals[i] == VM_PAGER_OK) {
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ /*
+ * optimization, if a page has been read during the
+ * pageout process, we activate it.
+ */
+ if ( (m[i]->flags & PG_ACTIVE) == 0 &&
+ pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))
+ vm_page_activate(m[i]);
+ }
+ }
+ } else {
+ for(i=0;i<count;i++) {
+ rtvals[i] = rv;
+ m[i]->flags |= PG_LAUNDRY;
+ }
+ }
+
+ if( spc->spc_altkva)
+ kmem_free_wakeup(pager_map, kva, count * PAGE_SIZE);
+
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ }
+
+ return(rv);
+}
+
+boolean_t
+swap_pager_clean()
+{
+ register swp_clean_t spc, tspc;
+ register int s;
+
+ tspc = NULL;
+ if (swap_pager_done.tqh_first == NULL)
+ return FALSE;
+ for (;;) {
+ s = splbio();
+ /*
+ * Look up and removal from done list must be done
+ * at splbio() to avoid conflicts with swap_pager_iodone.
+ */
+ while (spc = swap_pager_done.tqh_first) {
+ if( spc->spc_altkva) {
+ pmap_remove(vm_map_pmap(pager_map), spc->spc_altkva, spc->spc_altkva + spc->spc_count * PAGE_SIZE);
+ kmem_free_wakeup(pager_map, spc->spc_altkva, spc->spc_count * PAGE_SIZE);
+ spc->spc_altkva = 0;
+ } else {
+ pmap_remove(vm_map_pmap(pager_map), spc->spc_kva, spc->spc_kva + PAGE_SIZE);
+ }
+ swap_pager_finish(spc);
+ TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
+ goto doclean;
+ }
+
+ /*
+ * No operations done, thats all we can do for now.
+ */
+
+ splx(s);
+ break;
+
+ /*
+ * The desired page was found to be busy earlier in
+ * the scan but has since completed.
+ */
+doclean:
+ if (tspc && tspc == spc) {
+ tspc = NULL;
+ }
+ spc->spc_flags = 0;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ if (swap_pager_needflags & SWAP_FREE_NEEDED) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ }
+ ++cleandone;
+ splx(s);
+ }
+
+ return(tspc ? TRUE : FALSE);
+}
+
+void
+swap_pager_finish(spc)
+ register swp_clean_t spc;
+{
+ vm_object_t object = spc->spc_m[0]->object;
+ int i;
+
+ if ((object->paging_in_progress -= spc->spc_count) == 0)
+ thread_wakeup((int) object);
+
+ /*
+ * If no error mark as clean and inform the pmap system.
+ * If error, mark as dirty so we will try again.
+ * (XXX could get stuck doing this, should give up after awhile)
+ */
+ if (spc->spc_flags & SPC_ERROR) {
+ for(i=0;i<spc->spc_count;i++) {
+ printf("swap_pager_finish: clean of page %x failed\n",
+ VM_PAGE_TO_PHYS(spc->spc_m[i]));
+ spc->spc_m[i]->flags |= PG_LAUNDRY;
+ }
+ } else {
+ for(i=0;i<spc->spc_count;i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m[i]));
+ spc->spc_m[i]->flags |= PG_CLEAN;
+ }
+ }
+
+
+ for(i=0;i<spc->spc_count;i++) {
+ /*
+ * we wakeup any processes that are waiting on
+ * these pages.
+ */
+ PAGE_WAKEUP(spc->spc_m[i]);
+ }
+ nswiodone -= spc->spc_count;
+
+ return;
+}
+
+/*
+ * swap_pager_iodone
+ */
+void
+swap_pager_iodone(bp)
+ register struct buf *bp;
+{
+ register swp_clean_t spc;
+ int s;
+
+ s = splbio();
+ spc = (swp_clean_t) bp->b_spc;
+ TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
+ TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
+ if (bp->b_flags & B_ERROR) {
+ spc->spc_flags |= SPC_ERROR;
+ printf("error %d blkno %d sz %d ",
+ bp->b_error, bp->b_blkno, bp->b_bcount);
+ }
+
+/*
+ if ((bp->b_flags & B_READ) == 0)
+ vwakeup(bp);
+*/
+
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_DIRTY|B_ASYNC);
+ if (bp->b_vp) {
+ brelvp(bp);
+ }
+ if( bp->b_rcred != NOCRED)
+ crfree(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crfree(bp->b_wcred);
+
+ nswiodone += spc->spc_count;
+ if (--spc->spc_swp->sw_poip == 0) {
+ wakeup((caddr_t)spc->spc_swp);
+ }
+
+ if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
+ swap_pager_inuse.tqh_first == 0) {
+ swap_pager_needflags &= ~SWAP_FREE_NEEDED;
+ wakeup((caddr_t)&swap_pager_free);
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+
+ if (vm_pageout_pages_needed) {
+ wakeup((caddr_t)&vm_pageout_pages_needed);
+ }
+
+ if ((swap_pager_inuse.tqh_first == NULL) ||
+ (cnt.v_free_count < cnt.v_free_min &&
+ nswiodone + cnt.v_free_count >= cnt.v_free_min) ) {
+ wakeup((caddr_t)&vm_pages_needed);
+ }
+ splx(s);
+}
+
+int bswneeded;
+/* TAILQ_HEAD(swqueue, buf) bswlist; */
+/*
+ * allocate a physical buffer
+ */
+struct buf *
+getpbuf() {
+ int s;
+ struct buf *bp;
+
+ s = splbio();
+ /* get a bp from the swap buffer header pool */
+ while ((bp = bswlist.tqh_first) == NULL) {
+ bswneeded = 1;
+ tsleep((caddr_t)&bswneeded, PVM, "wswbuf", 0);
+ }
+ TAILQ_REMOVE(&bswlist, bp, b_freelist);
+
+ splx(s);
+
+ bzero(bp, sizeof *bp);
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ return bp;
+}
+
+/*
+ * allocate a physical buffer, if one is available
+ */
+struct buf *
+trypbuf() {
+ int s;
+ struct buf *bp;
+
+ s = splbio();
+ if ((bp = bswlist.tqh_first) == NULL) {
+ splx(s);
+ return NULL;
+ }
+ TAILQ_REMOVE(&bswlist, bp, b_freelist);
+ splx(s);
+
+ bzero(bp, sizeof *bp);
+ bp->b_rcred = NOCRED;
+ bp->b_wcred = NOCRED;
+ return bp;
+}
+
+/*
+ * release a physical buffer
+ */
+void
+relpbuf(bp)
+ struct buf *bp;
+{
+ int s;
+
+ s = splbio();
+
+ if (bp->b_rcred != NOCRED) {
+ crfree(bp->b_rcred);
+ bp->b_rcred = NOCRED;
+ }
+ if (bp->b_wcred != NOCRED) {
+ crfree(bp->b_wcred);
+ bp->b_wcred = NOCRED;
+ }
+
+ if (bp->b_vp)
+ brelvp(bp);
+
+ TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
+
+ if (bswneeded) {
+ bswneeded = 0;
+ wakeup((caddr_t)&bswlist);
+ }
+ splx(s);
+}
+
+/*
+ * return true if any swap control structures can be allocated
+ */
+int
+swap_pager_ready() {
+ if( swap_pager_free.tqh_first)
+ return 1;
+ else
+ return 0;
+}
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
new file mode 100644
index 0000000..853edd5
--- /dev/null
+++ b/sys/vm/swap_pager.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)swap_pager.h 7.1 (Berkeley) 12/5/90
+ * $Id: swap_pager.h,v 1.9 1994/03/14 21:54:23 davidg Exp $
+ */
+
+/*
+ * Modifications to the block allocation data structure by John S. Dyson
+ * 18 Dec 93.
+ */
+
+#ifndef _SWAP_PAGER_
+#define _SWAP_PAGER_ 1
+
+/*
+ * SWB_NPAGES can be set to any value from 1 to 16 pages per allocation,
+ * however, due to the allocation spilling into non-swap pager backed memory,
+ * suggest keeping SWB_NPAGES small (1-4). If high performance is manditory
+ * perhaps up to 8 pages might be in order????
+ * Above problem has been fixed, now we support 16 pages per block. Unused
+ * space is recovered by the swap pager now...
+ */
+#define SWB_NPAGES 8
+struct swblock {
+ unsigned short swb_valid; /* bitmask for valid pages */
+ unsigned short swb_locked; /* block locked */
+ int swb_block[SWB_NPAGES]; /* unfortunately int instead of daddr_t */
+};
+typedef struct swblock *sw_blk_t;
+
+/*
+ * Swap pager private data.
+ */
+struct swpager {
+ vm_size_t sw_osize; /* size of object we are backing (bytes) */
+ int sw_nblocks;/* number of blocks in list (sw_blk_t units) */
+ sw_blk_t sw_blocks; /* pointer to list of swap blocks */
+ short sw_flags; /* flags */
+ short sw_poip; /* pageouts in progress */
+ short sw_piip; /* pageins in progress */
+};
+typedef struct swpager *sw_pager_t;
+
+#define SW_WANTED 0x01
+#define SW_NAMED 0x02
+
+#ifdef KERNEL
+
+void swap_pager_init(void);
+vm_pager_t swap_pager_alloc(caddr_t, vm_size_t, vm_prot_t, vm_offset_t);
+void swap_pager_dealloc(vm_pager_t);
+boolean_t swap_pager_getpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_putpage(vm_pager_t, vm_page_t, boolean_t);
+boolean_t swap_pager_getmulti(vm_pager_t, vm_page_t *, int, int, boolean_t);
+boolean_t swap_pager_haspage(vm_pager_t, vm_offset_t);
+int swap_pager_io(sw_pager_t, vm_page_t *, int, int, int);
+void swap_pager_iodone(struct buf *);
+boolean_t swap_pager_clean();
+
+extern struct pagerops swappagerops;
+
+#endif
+
+#endif /* _SWAP_PAGER_ */
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
new file mode 100644
index 0000000..bc18dd2
--- /dev/null
+++ b/sys/vm/vm.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm.h 8.2 (Berkeley) 12/13/93
+ */
+
+#ifndef VM_H
+#define VM_H
+
+typedef char vm_inherit_t; /* XXX: inheritance codes */
+
+union vm_map_object;
+typedef union vm_map_object vm_map_object_t;
+
+struct vm_map_entry;
+typedef struct vm_map_entry *vm_map_entry_t;
+
+struct vm_map;
+typedef struct vm_map *vm_map_t;
+
+struct vm_object;
+typedef struct vm_object *vm_object_t;
+
+struct vm_page;
+typedef struct vm_page *vm_page_t;
+
+struct pager_struct;
+typedef struct pager_struct *vm_pager_t;
+
+#include <sys/vmmeter.h>
+#include <sys/queue.h>
+#include <machine/cpufunc.h>
+#include <vm/vm_param.h>
+#include <vm/lock.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_inherit.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/pmap.h>
+#include <vm/vm_extern.h>
+
+/*
+ * Shareable process virtual address space.
+ * May eventually be merged with vm_map.
+ * Several fields are temporary (text, data stuff).
+ */
+struct vmspace {
+ struct vm_map vm_map; /* VM address map */
+ struct pmap vm_pmap; /* private physical map */
+ int vm_refcnt; /* number of references */
+ caddr_t vm_shm; /* SYS5 shared memory private data XXX */
+/* we copy from vm_startcopy to the end of the structure on fork */
+#define vm_startcopy vm_rssize
+ segsz_t vm_rssize; /* current resident set size in pages */
+ segsz_t vm_swrss; /* resident set size before last swap */
+ segsz_t vm_tsize; /* text size (pages) XXX */
+ segsz_t vm_dsize; /* data size (pages) XXX */
+ segsz_t vm_ssize; /* stack size (pages) */
+ caddr_t vm_taddr; /* user virtual address of text XXX */
+ caddr_t vm_daddr; /* user virtual address of data XXX */
+ caddr_t vm_maxsaddr; /* user VA at max stack growth */
+ caddr_t vm_minsaddr; /* user VA at max stack growth */
+};
+#endif /* VM_H */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
new file mode 100644
index 0000000..bc62e42
--- /dev/null
+++ b/sys/vm/vm_extern.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_extern.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct loadavg;
+struct proc;
+struct vmspace;
+struct vmtotal;
+struct mount;
+struct vnode;
+
+#ifdef KGDB
+void chgkprot __P((caddr_t, int, int));
+#endif
+
+/*
+ * Try to get semi-meaningful wait messages into thread_sleep...
+ */
+extern void thread_sleep_(int, simple_lock_t, char *);
+#if __GNUC__ >= 2
+#define thread_sleep(a,b,c) thread_sleep_((a), (b), __FUNCTION__)
+#else
+#define thread_sleep(a,b,c) thread_sleep_((a), (b), "vmslp")
+#endif
+
+#ifdef KERNEL
+#ifdef TYPEDEF_FOR_UAP
+int getpagesize __P((struct proc *p, void *, int *));
+int madvise __P((struct proc *, void *, int *));
+int mincore __P((struct proc *, void *, int *));
+int mprotect __P((struct proc *, void *, int *));
+int msync __P((struct proc *, void *, int *));
+int munmap __P((struct proc *, void *, int *));
+int obreak __P((struct proc *, void *, int *));
+int sbrk __P((struct proc *, void *, int *));
+int smmap __P((struct proc *, void *, int *));
+int sstk __P((struct proc *, void *, int *));
+#endif
+
+void assert_wait __P((int, boolean_t));
+int grow __P((struct proc *, u_int));
+void iprintf __P((const char *, ...));
+int kernacc __P((caddr_t, int, int));
+int kinfo_loadavg __P((int, char *, int *, int, int *));
+int kinfo_meter __P((int, caddr_t, int *, int, int *));
+vm_offset_t kmem_alloc __P((vm_map_t, vm_size_t));
+vm_offset_t kmem_alloc_pageable __P((vm_map_t, vm_size_t));
+vm_offset_t kmem_alloc_wait __P((vm_map_t, vm_size_t));
+void kmem_free __P((vm_map_t, vm_offset_t, vm_size_t));
+void kmem_free_wakeup __P((vm_map_t, vm_offset_t, vm_size_t));
+void kmem_init __P((vm_offset_t, vm_offset_t));
+vm_offset_t kmem_malloc __P((vm_map_t, vm_size_t, boolean_t));
+vm_map_t kmem_suballoc __P((vm_map_t, vm_offset_t *, vm_offset_t *,
+ vm_size_t, boolean_t));
+void loadav __P((struct loadavg *));
+void munmapfd __P((int));
+int pager_cache __P((vm_object_t, boolean_t));
+void sched __P((void));
+int svm_allocate __P((struct proc *, void *, int *));
+int svm_deallocate __P((struct proc *, void *, int *));
+int svm_inherit __P((struct proc *, void *, int *));
+int svm_protect __P((struct proc *, void *, int *));
+void swapinit __P((void));
+int swapon __P((struct proc *, void *, int *));
+void swapout __P((struct proc *));
+void swapout_threads __P((void));
+int swfree __P((struct proc *, int));
+void swstrategy __P((struct buf *));
+void thread_block __P((char *));
+void thread_sleep __P((int, simple_lock_t, boolean_t));
+void thread_wakeup __P((int));
+int useracc __P((caddr_t, int, int));
+int vm_allocate __P((vm_map_t,
+ vm_offset_t *, vm_size_t, boolean_t));
+int vm_allocate_with_pager __P((vm_map_t, vm_offset_t *,
+ vm_size_t, boolean_t, vm_pager_t, vm_offset_t, boolean_t));
+int vm_deallocate __P((vm_map_t, vm_offset_t, vm_size_t));
+int vm_fault __P((vm_map_t, vm_offset_t, vm_prot_t, boolean_t));
+void vm_fault_copy_entry __P((vm_map_t,
+ vm_map_t, vm_map_entry_t, vm_map_entry_t));
+void vm_fault_unwire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int vm_fault_wire __P((vm_map_t, vm_offset_t, vm_offset_t));
+int vm_fork __P((struct proc *, struct proc *, int));
+int vm_inherit __P((vm_map_t,
+ vm_offset_t, vm_size_t, vm_inherit_t));
+void vm_init_limits __P((struct proc *));
+void vm_mem_init __P((void));
+int vm_mmap __P((vm_map_t, vm_offset_t *, vm_size_t,
+ vm_prot_t, vm_prot_t, int, caddr_t, vm_offset_t));
+int vm_protect __P((vm_map_t,
+ vm_offset_t, vm_size_t, boolean_t, vm_prot_t));
+void vm_set_page_size __P((void));
+void vmmeter __P((void));
+struct vmspace *vmspace_alloc __P((vm_offset_t, vm_offset_t, int));
+struct vmspace *vmspace_fork __P((struct vmspace *));
+void vmspace_free __P((struct vmspace *));
+void vmtotal __P((struct vmtotal *));
+void vnode_pager_setsize __P((struct vnode *, u_long));
+void vnode_pager_umount __P((struct mount *));
+boolean_t vnode_pager_uncache __P((struct vnode *));
+void vslock __P((caddr_t, u_int));
+void vsunlock __P((caddr_t, u_int, int));
+#endif
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
new file mode 100644
index 0000000..3ce2d6e
--- /dev/null
+++ b/sys/vm/vm_fault.c
@@ -0,0 +1,1305 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ *
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_fault.c 8.4 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Page fault handling module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+
+#define VM_FAULT_READ_AHEAD 4
+#define VM_FAULT_READ_AHEAD_MIN 1
+#define VM_FAULT_READ_BEHIND 3
+#define VM_FAULT_READ (VM_FAULT_READ_AHEAD+VM_FAULT_READ_BEHIND+1)
+extern int swap_pager_full;
+extern int vm_pageout_proc_limit;
+
+/*
+ * vm_fault:
+ *
+ * Handle a page fault occuring at the given address,
+ * requiring the given permissions, in the map specified.
+ * If successful, the page is inserted into the
+ * associated physical map.
+ *
+ * NOTE: the given address should be truncated to the
+ * proper page address.
+ *
+ * KERN_SUCCESS is returned if the page fault is handled; otherwise,
+ * a standard error specifying why the fault is fatal is returned.
+ *
+ *
+ * The map in question must be referenced, and remains so.
+ * Caller may hold no locks.
+ */
+int
+vm_fault(map, vaddr, fault_type, change_wiring)
+ vm_map_t map;
+ vm_offset_t vaddr;
+ vm_prot_t fault_type;
+ boolean_t change_wiring;
+{
+ vm_object_t first_object;
+ vm_offset_t first_offset;
+ vm_map_entry_t entry;
+ register vm_object_t object;
+ register vm_offset_t offset;
+ vm_page_t m;
+ vm_page_t first_m;
+ vm_prot_t prot;
+ int result;
+ boolean_t wired;
+ boolean_t su;
+ boolean_t lookup_still_valid;
+ boolean_t page_exists;
+ vm_page_t old_m;
+ vm_object_t next_object;
+ vm_page_t marray[VM_FAULT_READ];
+ int reqpage;
+ int spl;
+ int hardfault=0;
+
+ cnt.v_faults++; /* needs lock XXX */
+/*
+ * Recovery actions
+ */
+#define FREE_PAGE(m) { \
+ PAGE_WAKEUP(m); \
+ vm_page_lock_queues(); \
+ vm_page_free(m); \
+ vm_page_unlock_queues(); \
+}
+
+#define RELEASE_PAGE(m) { \
+ PAGE_WAKEUP(m); \
+ vm_page_lock_queues(); \
+ vm_page_activate(m); \
+ vm_page_unlock_queues(); \
+}
+
+#define UNLOCK_MAP { \
+ if (lookup_still_valid) { \
+ vm_map_lookup_done(map, entry); \
+ lookup_still_valid = FALSE; \
+ } \
+}
+
+#define UNLOCK_THINGS { \
+ object->paging_in_progress--; \
+ if (object->paging_in_progress == 0) \
+ wakeup((caddr_t)object); \
+ vm_object_unlock(object); \
+ if (object != first_object) { \
+ vm_object_lock(first_object); \
+ FREE_PAGE(first_m); \
+ first_object->paging_in_progress--; \
+ if (first_object->paging_in_progress == 0) \
+ wakeup((caddr_t)first_object); \
+ vm_object_unlock(first_object); \
+ } \
+ UNLOCK_MAP; \
+}
+
+#define UNLOCK_AND_DEALLOCATE { \
+ UNLOCK_THINGS; \
+ vm_object_deallocate(first_object); \
+}
+
+
+ RetryFault: ;
+
+ /*
+ * Find the backing store object and offset into
+ * it to begin the search.
+ */
+
+ if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry,
+ &first_object, &first_offset,
+ &prot, &wired, &su)) != KERN_SUCCESS) {
+ return(result);
+ }
+ lookup_still_valid = TRUE;
+
+ if (wired)
+ fault_type = prot;
+
+ first_m = NULL;
+
+ /*
+ * Make a reference to this object to
+ * prevent its disposal while we are messing with
+ * it. Once we have the reference, the map is free
+ * to be diddled. Since objects reference their
+ * shadows (and copies), they will stay around as well.
+ */
+
+ vm_object_lock(first_object);
+
+ first_object->ref_count++;
+ first_object->paging_in_progress++;
+
+ /*
+ * INVARIANTS (through entire routine):
+ *
+ * 1) At all times, we must either have the object
+ * lock or a busy page in some object to prevent
+ * some other thread from trying to bring in
+ * the same page.
+ *
+ * Note that we cannot hold any locks during the
+ * pager access or when waiting for memory, so
+ * we use a busy page then.
+ *
+ * Note also that we aren't as concerned about
+ * more than one thead attempting to pager_data_unlock
+ * the same page at once, so we don't hold the page
+ * as busy then, but do record the highest unlock
+ * value so far. [Unlock requests may also be delivered
+ * out of order.]
+ *
+ * 2) Once we have a busy page, we must remove it from
+ * the pageout queues, so that the pageout daemon
+ * will not grab it away.
+ *
+ * 3) To prevent another thread from racing us down the
+ * shadow chain and entering a new page in the top
+ * object before we do, we must keep a busy page in
+ * the top object while following the shadow chain.
+ *
+ * 4) We must increment paging_in_progress on any object
+ * for which we have a busy page, to prevent
+ * vm_object_collapse from removing the busy page
+ * without our noticing.
+ */
+
+ /*
+ * Search for the page at object/offset.
+ */
+
+ object = first_object;
+ offset = first_offset;
+
+ /*
+ * See whether this page is resident
+ */
+
+ while (TRUE) {
+ m = vm_page_lookup(object, offset);
+ if (m != NULL) {
+ /*
+ * If the page is being brought in,
+ * wait for it and then retry.
+ */
+ if (m->flags & PG_BUSY) {
+ UNLOCK_THINGS;
+ if (m->flags & PG_BUSY) {
+ m->flags |= PG_WANTED;
+ tsleep((caddr_t)m,PSWP,"vmpfw",0);
+ }
+ vm_object_deallocate(first_object);
+ goto RetryFault;
+ }
+
+ /*
+ * Remove the page from the pageout daemon's
+ * reach while we play with it.
+ */
+
+ vm_page_lock_queues();
+ spl = splimp();
+ if (m->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ m->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ cnt.v_reactivated++;
+ }
+
+ if (m->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+ splx(spl);
+ vm_page_unlock_queues();
+
+ /*
+ * Mark page busy for other threads.
+ */
+ m->flags |= PG_BUSY;
+ break;
+ }
+
+ if (((object->pager != NULL) &&
+ (!change_wiring || wired))
+ || (object == first_object)) {
+
+#if 0
+ if (curproc && (vaddr < VM_MAXUSER_ADDRESS) &&
+ (curproc->p_rlimit[RLIMIT_RSS].rlim_max <
+ curproc->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG)) {
+ UNLOCK_AND_DEALLOCATE;
+ vm_fault_free_pages(curproc);
+ goto RetryFault;
+ }
+#endif
+
+ if (swap_pager_full && !object->shadow && (!object->pager ||
+ (object->pager && object->pager->pg_type == PG_SWAP &&
+ !vm_pager_has_page(object->pager, offset+object->paging_offset)))) {
+ if (vaddr < VM_MAXUSER_ADDRESS && curproc && curproc->p_pid >= 48) /* XXX */ {
+ printf("Process %d killed by vm_fault -- out of swap\n", curproc->p_pid);
+ psignal(curproc, SIGKILL);
+ curproc->p_estcpu = 0;
+ curproc->p_nice = PRIO_MIN;
+ setpriority(curproc);
+ }
+ }
+
+ /*
+ * Allocate a new page for this object/offset
+ * pair.
+ */
+
+ m = vm_page_alloc(object, offset);
+
+ if (m == NULL) {
+ UNLOCK_AND_DEALLOCATE;
+ VM_WAIT;
+ goto RetryFault;
+ }
+ }
+
+ if (object->pager != NULL && (!change_wiring || wired)) {
+ int rv;
+ int faultcount;
+ int reqpage;
+
+ /*
+ * Now that we have a busy page, we can
+ * release the object lock.
+ */
+ vm_object_unlock(object);
+ /*
+ * now we find out if any other pages should
+ * be paged in at this time
+ * this routine checks to see if the pages surrounding this fault
+ * reside in the same object as the page for this fault. If
+ * they do, then they are faulted in also into the
+ * object. The array "marray" returned contains an array of
+ * vm_page_t structs where one of them is the vm_page_t passed to
+ * the routine. The reqpage return value is the index into the
+ * marray for the vm_page_t passed to the routine.
+ */
+ cnt.v_pageins++;
+ faultcount = vm_fault_additional_pages(first_object, first_offset,
+ m, VM_FAULT_READ_BEHIND, VM_FAULT_READ_AHEAD, marray, &reqpage);
+
+ /*
+ * Call the pager to retrieve the data, if any,
+ * after releasing the lock on the map.
+ */
+ UNLOCK_MAP;
+
+ rv = faultcount ?
+ vm_pager_get_pages(object->pager,
+ marray, faultcount, reqpage, TRUE): VM_PAGER_FAIL;
+ if (rv == VM_PAGER_OK) {
+ /*
+ * Found the page.
+ * Leave it busy while we play with it.
+ */
+ vm_object_lock(object);
+
+ /*
+ * Relookup in case pager changed page.
+ * Pager is responsible for disposition
+ * of old page if moved.
+ */
+ m = vm_page_lookup(object, offset);
+
+ cnt.v_pgpgin++;
+ m->flags &= ~PG_FAKE;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ hardfault++;
+ break;
+ }
+
+ /*
+ * Remove the bogus page (which does not
+ * exist at this object/offset); before
+ * doing so, we must get back our object
+ * lock to preserve our invariant.
+ *
+ * Also wake up any other thread that may want
+ * to bring in this page.
+ *
+ * If this is the top-level object, we must
+ * leave the busy page to prevent another
+ * thread from rushing past us, and inserting
+ * the page in that object at the same time
+ * that we are.
+ */
+
+ vm_object_lock(object);
+ /*
+ * Data outside the range of the pager; an error
+ */
+ if ((rv == VM_PAGER_ERROR) || (rv == VM_PAGER_BAD)) {
+ FREE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return(KERN_PROTECTION_FAILURE); /* XXX */
+ }
+ if (object != first_object) {
+ FREE_PAGE(m);
+ /*
+ * XXX - we cannot just fall out at this
+ * point, m has been freed and is invalid!
+ */
+ }
+ }
+
+ /*
+ * We get here if the object has no pager (or unwiring)
+ * or the pager doesn't have the page.
+ */
+ if (object == first_object)
+ first_m = m;
+
+ /*
+ * Move on to the next object. Lock the next
+ * object before unlocking the current one.
+ */
+
+ offset += object->shadow_offset;
+ next_object = object->shadow;
+ if (next_object == NULL) {
+ /*
+ * If there's no object left, fill the page
+ * in the top object with zeros.
+ */
+ if (object != first_object) {
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ vm_object_unlock(object);
+
+ object = first_object;
+ offset = first_offset;
+ m = first_m;
+ vm_object_lock(object);
+ }
+ first_m = NULL;
+
+ vm_page_zero_fill(m);
+ cnt.v_zfod++;
+ m->flags &= ~PG_FAKE;
+ break;
+ }
+ else {
+ vm_object_lock(next_object);
+ if (object != first_object) {
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ }
+ vm_object_unlock(object);
+ object = next_object;
+ object->paging_in_progress++;
+ }
+ }
+
+ if ((m->flags & (PG_ACTIVE|PG_INACTIVE) != 0) ||
+ (m->flags & PG_BUSY) == 0)
+ panic("vm_fault: absent or active or inactive or not busy after main loop");
+
+ /*
+ * PAGE HAS BEEN FOUND.
+ * [Loop invariant still holds -- the object lock
+ * is held.]
+ */
+
+ old_m = m; /* save page that would be copied */
+
+ /*
+ * If the page is being written, but isn't
+ * already owned by the top-level object,
+ * we have to copy it into a new page owned
+ * by the top-level object.
+ */
+
+ if (object != first_object) {
+ /*
+ * We only really need to copy if we
+ * want to write it.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+
+ /*
+ * If we try to collapse first_object at this
+ * point, we may deadlock when we try to get
+ * the lock on an intermediate object (since we
+ * have the bottom object locked). We can't
+ * unlock the bottom object, because the page
+ * we found may move (by collapse) if we do.
+ *
+ * Instead, we first copy the page. Then, when
+ * we have no more use for the bottom object,
+ * we unlock it and try to collapse.
+ *
+ * Note that we copy the page even if we didn't
+ * need to... that's the breaks.
+ */
+
+ /*
+ * We already have an empty page in
+ * first_object - use it.
+ */
+
+ vm_page_copy(m, first_m);
+ first_m->flags &= ~PG_FAKE;
+
+ /*
+ * If another map is truly sharing this
+ * page with us, we have to flush all
+ * uses of the original page, since we
+ * can't distinguish those which want the
+ * original from those which need the
+ * new copy.
+ *
+ * XXX If we know that only one map has
+ * access to this page, then we could
+ * avoid the pmap_page_protect() call.
+ */
+
+ vm_page_lock_queues();
+
+ vm_page_activate(m);
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+ if ((m->flags & PG_CLEAN) == 0)
+ m->flags |= PG_LAUNDRY;
+ vm_page_unlock_queues();
+
+ /*
+ * We no longer need the old page or object.
+ */
+ PAGE_WAKEUP(m);
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ vm_object_unlock(object);
+
+ /*
+ * Only use the new page below...
+ */
+
+ cnt.v_cow_faults++;
+ m = first_m;
+ object = first_object;
+ offset = first_offset;
+
+ /*
+ * Now that we've gotten the copy out of the
+ * way, let's try to collapse the top object.
+ */
+ vm_object_lock(object);
+ /*
+ * But we have to play ugly games with
+ * paging_in_progress to do that...
+ */
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ vm_object_collapse(object);
+ object->paging_in_progress++;
+ }
+ else {
+ prot &= ~VM_PROT_WRITE;
+ m->flags |= PG_COPYONWRITE;
+ }
+ }
+
+ if (m->flags & (PG_ACTIVE|PG_INACTIVE))
+ panic("vm_fault: active or inactive before copy object handling");
+
+ /*
+ * If the page is being written, but hasn't been
+ * copied to the copy-object, we have to copy it there.
+ */
+ RetryCopy:
+ if (first_object->copy != NULL) {
+ vm_object_t copy_object = first_object->copy;
+ vm_offset_t copy_offset;
+ vm_page_t copy_m;
+
+ /*
+ * We only need to copy if we want to write it.
+ */
+ if ((fault_type & VM_PROT_WRITE) == 0) {
+ prot &= ~VM_PROT_WRITE;
+ m->flags |= PG_COPYONWRITE;
+ }
+ else {
+ /*
+ * Try to get the lock on the copy_object.
+ */
+ if (!vm_object_lock_try(copy_object)) {
+ vm_object_unlock(object);
+ /* should spin a bit here... */
+ vm_object_lock(object);
+ goto RetryCopy;
+ }
+
+ /*
+ * Make another reference to the copy-object,
+ * to keep it from disappearing during the
+ * copy.
+ */
+ copy_object->ref_count++;
+
+ /*
+ * Does the page exist in the copy?
+ */
+ copy_offset = first_offset
+ - copy_object->shadow_offset;
+ copy_m = vm_page_lookup(copy_object, copy_offset);
+ if (page_exists = (copy_m != NULL)) {
+ if (copy_m->flags & PG_BUSY) {
+ /*
+ * If the page is being brought
+ * in, wait for it and then retry.
+ */
+ PAGE_ASSERT_WAIT(copy_m, !change_wiring);
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ UNLOCK_THINGS;
+ thread_block("fltcpy");
+ vm_object_deallocate(first_object);
+ goto RetryFault;
+ }
+ }
+
+ /*
+ * If the page is not in memory (in the object)
+ * and the object has a pager, we have to check
+ * if the pager has the data in secondary
+ * storage.
+ */
+ if (!page_exists) {
+
+ /*
+ * If we don't allocate a (blank) page
+ * here... another thread could try
+ * to page it in, allocate a page, and
+ * then block on the busy page in its
+ * shadow (first_object). Then we'd
+ * trip over the busy page after we
+ * found that the copy_object's pager
+ * doesn't have the page...
+ */
+ copy_m = vm_page_alloc(copy_object, copy_offset);
+ if (copy_m == NULL) {
+ /*
+ * Wait for a page, then retry.
+ */
+ RELEASE_PAGE(m);
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ UNLOCK_AND_DEALLOCATE;
+ VM_WAIT;
+ goto RetryFault;
+ }
+
+ if (copy_object->pager != NULL) {
+ vm_object_unlock(object);
+ vm_object_unlock(copy_object);
+ UNLOCK_MAP;
+
+ page_exists = vm_pager_has_page(
+ copy_object->pager,
+ (copy_offset + copy_object->paging_offset));
+
+ vm_object_lock(copy_object);
+
+ /*
+ * Since the map is unlocked, someone
+ * else could have copied this object
+ * and put a different copy_object
+ * between the two. Or, the last
+ * reference to the copy-object (other
+ * than the one we have) may have
+ * disappeared - if that has happened,
+ * we don't need to make the copy.
+ */
+ if (copy_object->shadow != object ||
+ copy_object->ref_count == 1) {
+ /*
+ * Gaah... start over!
+ */
+ FREE_PAGE(copy_m);
+ vm_object_unlock(copy_object);
+ vm_object_deallocate(copy_object);
+ /* may block */
+ vm_object_lock(object);
+ goto RetryCopy;
+ }
+ vm_object_lock(object);
+
+ if (page_exists) {
+ /*
+ * We didn't need the page
+ */
+ FREE_PAGE(copy_m);
+ }
+ }
+ }
+ if (!page_exists) {
+ /*
+ * Must copy page into copy-object.
+ */
+ vm_page_copy(m, copy_m);
+ copy_m->flags &= ~PG_FAKE;
+
+ /*
+ * Things to remember:
+ * 1. The copied page must be marked 'dirty'
+ * so it will be paged out to the copy
+ * object.
+ * 2. If the old page was in use by any users
+ * of the copy-object, it must be removed
+ * from all pmaps. (We can't know which
+ * pmaps use it.)
+ */
+ vm_page_lock_queues();
+
+ vm_page_activate(old_m);
+
+
+ pmap_page_protect(VM_PAGE_TO_PHYS(old_m),
+ VM_PROT_NONE);
+ if ((old_m->flags & PG_CLEAN) == 0)
+ old_m->flags |= PG_LAUNDRY;
+ copy_m->flags &= ~PG_CLEAN;
+ vm_page_activate(copy_m);
+ vm_page_unlock_queues();
+
+ PAGE_WAKEUP(copy_m);
+ }
+ /*
+ * The reference count on copy_object must be
+ * at least 2: one for our extra reference,
+ * and at least one from the outside world
+ * (we checked that when we last locked
+ * copy_object).
+ */
+ copy_object->ref_count--;
+ vm_object_unlock(copy_object);
+ m->flags &= ~PG_COPYONWRITE;
+ }
+ }
+
+ if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+ panic("vm_fault: active or inactive before retrying lookup");
+
+ /*
+ * We must verify that the maps have not changed
+ * since our last lookup.
+ */
+
+ if (!lookup_still_valid) {
+ vm_object_t retry_object;
+ vm_offset_t retry_offset;
+ vm_prot_t retry_prot;
+
+ /*
+ * Since map entries may be pageable, make sure we can
+ * take a page fault on them.
+ */
+ vm_object_unlock(object);
+
+ /*
+ * To avoid trying to write_lock the map while another
+ * thread has it read_locked (in vm_map_pageable), we
+ * do not try for write permission. If the page is
+ * still writable, we will get write permission. If it
+ * is not, or has been marked needs_copy, we enter the
+ * mapping without write permission, and will merely
+ * take another fault.
+ */
+ result = vm_map_lookup(&map, vaddr,
+ fault_type & ~VM_PROT_WRITE, &entry,
+ &retry_object, &retry_offset, &retry_prot,
+ &wired, &su);
+
+ vm_object_lock(object);
+
+ /*
+ * If we don't need the page any longer, put it on the
+ * active list (the easiest thing to do here). If no
+ * one needs it, pageout will grab it eventually.
+ */
+
+ if (result != KERN_SUCCESS) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ return(result);
+ }
+
+ lookup_still_valid = TRUE;
+
+ if ((retry_object != first_object) ||
+ (retry_offset != first_offset)) {
+ RELEASE_PAGE(m);
+ UNLOCK_AND_DEALLOCATE;
+ goto RetryFault;
+ }
+
+ /*
+ * Check whether the protection has changed or the object
+ * has been copied while we left the map unlocked.
+ * Changing from read to write permission is OK - we leave
+ * the page write-protected, and catch the write fault.
+ * Changing from write to read permission means that we
+ * can't mark the page write-enabled after all.
+ */
+ prot &= retry_prot;
+ if (m->flags & PG_COPYONWRITE)
+ prot &= ~VM_PROT_WRITE;
+ }
+
+ /*
+ * (the various bits we're fiddling with here are locked by
+ * the object's lock)
+ */
+
+ /* XXX This distorts the meaning of the copy_on_write bit */
+
+ if (prot & VM_PROT_WRITE)
+ m->flags &= ~PG_COPYONWRITE;
+
+ /*
+ * It's critically important that a wired-down page be faulted
+ * only once in each map for which it is wired.
+ */
+
+ if (m->flags & (PG_ACTIVE | PG_INACTIVE))
+ panic("vm_fault: active or inactive before pmap_enter");
+
+ vm_object_unlock(object);
+
+ /*
+ * Put this page into the physical map.
+ * We had to do the unlock above because pmap_enter
+ * may cause other faults. We don't put the
+ * page back on the active queue until later so
+ * that the page-out daemon won't find us (yet).
+ */
+
+ pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), prot, wired);
+
+ /*
+ * If the page is not wired down, then put it where the
+ * pageout daemon can find it.
+ */
+ vm_object_lock(object);
+ vm_page_lock_queues();
+ if (change_wiring) {
+ if (wired)
+ vm_page_wire(m);
+ else
+ vm_page_unwire(m);
+ }
+ else {
+ vm_page_activate(m);
+ }
+
+ if( curproc && curproc->p_stats) {
+ if (hardfault) {
+ curproc->p_stats->p_ru.ru_majflt++;
+ } else {
+ curproc->p_stats->p_ru.ru_minflt++;
+ }
+ }
+
+ vm_page_unlock_queues();
+
+ /*
+ * Unlock everything, and return
+ */
+
+ PAGE_WAKEUP(m);
+ UNLOCK_AND_DEALLOCATE;
+
+ return(KERN_SUCCESS);
+
+}
+
+/*
+ * vm_fault_wire:
+ *
+ * Wire down a range of virtual addresses in a map.
+ */
+int
+vm_fault_wire(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+
+ register vm_offset_t va;
+ register pmap_t pmap;
+ int rv;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Inform the physical mapping system that the
+ * range of addresses may not fault, so that
+ * page tables and such can be locked down as well.
+ */
+
+ pmap_pageable(pmap, start, end, FALSE);
+
+ /*
+ * We simulate a fault to get the page and enter it
+ * in the physical map.
+ */
+
+ for (va = start; va < end; va += PAGE_SIZE) {
+ rv = vm_fault(map, va, VM_PROT_NONE, TRUE);
+ if (rv) {
+ if (va != start)
+ vm_fault_unwire(map, start, va);
+ return(rv);
+ }
+ }
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * vm_fault_unwire:
+ *
+ * Unwire a range of virtual addresses in a map.
+ */
+void
+vm_fault_unwire(map, start, end)
+ vm_map_t map;
+ vm_offset_t start, end;
+{
+
+ register vm_offset_t va, pa;
+ register pmap_t pmap;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * Since the pages are wired down, we must be able to
+ * get their mappings from the physical map system.
+ */
+
+ vm_page_lock_queues();
+
+ for (va = start; va < end; va += PAGE_SIZE) {
+ pa = pmap_extract(pmap, va);
+ if (pa == (vm_offset_t) 0) {
+ panic("unwire: page not in pmap");
+ }
+ pmap_change_wiring(pmap, va, FALSE);
+ vm_page_unwire(PHYS_TO_VM_PAGE(pa));
+ }
+ vm_page_unlock_queues();
+
+ /*
+ * Inform the physical mapping system that the range
+ * of addresses may fault, so that page tables and
+ * such may be unwired themselves.
+ */
+
+ pmap_pageable(pmap, start, end, TRUE);
+
+}
+
+/*
+ * Routine:
+ * vm_fault_copy_entry
+ * Function:
+ * Copy all of the pages from a wired-down map entry to another.
+ *
+ * In/out conditions:
+ * The source and destination maps must be locked for write.
+ * The source map entry must be wired down (or be a sharing map
+ * entry corresponding to a main map entry that is wired down).
+ */
+
+void
+vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
+ vm_map_t dst_map;
+ vm_map_t src_map;
+ vm_map_entry_t dst_entry;
+ vm_map_entry_t src_entry;
+{
+ vm_object_t dst_object;
+ vm_object_t src_object;
+ vm_offset_t dst_offset;
+ vm_offset_t src_offset;
+ vm_prot_t prot;
+ vm_offset_t vaddr;
+ vm_page_t dst_m;
+ vm_page_t src_m;
+
+#ifdef lint
+ src_map++;
+#endif lint
+
+ src_object = src_entry->object.vm_object;
+ src_offset = src_entry->offset;
+
+ /*
+ * Create the top-level object for the destination entry.
+ * (Doesn't actually shadow anything - we copy the pages
+ * directly.)
+ */
+ dst_object = vm_object_allocate(
+ (vm_size_t) (dst_entry->end - dst_entry->start));
+
+ dst_entry->object.vm_object = dst_object;
+ dst_entry->offset = 0;
+
+ prot = dst_entry->max_protection;
+
+ /*
+ * Loop through all of the pages in the entry's range, copying
+ * each one from the source object (it should be there) to the
+ * destination object.
+ */
+ for (vaddr = dst_entry->start, dst_offset = 0;
+ vaddr < dst_entry->end;
+ vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) {
+
+ /*
+ * Allocate a page in the destination object
+ */
+ vm_object_lock(dst_object);
+ do {
+ dst_m = vm_page_alloc(dst_object, dst_offset);
+ if (dst_m == NULL) {
+ vm_object_unlock(dst_object);
+ VM_WAIT;
+ vm_object_lock(dst_object);
+ }
+ } while (dst_m == NULL);
+
+ /*
+ * Find the page in the source object, and copy it in.
+ * (Because the source is wired down, the page will be
+ * in memory.)
+ */
+ vm_object_lock(src_object);
+ src_m = vm_page_lookup(src_object, dst_offset + src_offset);
+ if (src_m == NULL)
+ panic("vm_fault_copy_wired: page missing");
+
+ vm_page_copy(src_m, dst_m);
+
+ /*
+ * Enter it in the pmap...
+ */
+ vm_object_unlock(src_object);
+ vm_object_unlock(dst_object);
+
+ pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m),
+ prot, FALSE);
+
+ /*
+ * Mark it no longer busy, and put it on the active list.
+ */
+ vm_object_lock(dst_object);
+ vm_page_lock_queues();
+ vm_page_activate(dst_m);
+ vm_page_unlock_queues();
+ PAGE_WAKEUP(dst_m);
+ vm_object_unlock(dst_object);
+ }
+}
+
+
+/*
+ * looks page up in shadow chain
+ */
+
+int
+vm_fault_page_lookup(object, offset, rtobject, rtoffset, rtm)
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_object_t *rtobject;
+ vm_offset_t *rtoffset;
+ vm_page_t *rtm;
+{
+ vm_page_t m;
+ vm_object_t first_object = object;
+
+ *rtm = 0;
+ *rtobject = 0;
+ *rtoffset = 0;
+
+
+ while (!(m=vm_page_lookup(object, offset))) {
+ if (object->pager) {
+ if (vm_pager_has_page(object->pager, object->paging_offset+offset)) {
+ *rtobject = object;
+ *rtoffset = offset;
+ return 1;
+ }
+ }
+
+ if (!object->shadow)
+ return 0;
+ else {
+ offset += object->shadow_offset;
+ object = object->shadow;
+ }
+ }
+ *rtobject = object;
+ *rtoffset = offset;
+ *rtm = m;
+ return 1;
+}
+
+/*
+ * This routine checks around the requested page for other pages that
+ * might be able to be faulted in.
+ *
+ * Inputs:
+ * first_object, first_offset, m, rbehind, rahead
+ *
+ * Outputs:
+ * marray (array of vm_page_t), reqpage (index of requested page)
+ *
+ * Return value:
+ * number of pages in marray
+ */
+int
+vm_fault_additional_pages(first_object, first_offset, m, rbehind, raheada, marray, reqpage)
+ vm_object_t first_object;
+ vm_offset_t first_offset;
+ vm_page_t m;
+ int rbehind;
+ int raheada;
+ vm_page_t *marray;
+ int *reqpage;
+{
+ int i;
+ vm_page_t tmpm;
+ vm_object_t object;
+ vm_offset_t offset, startoffset, endoffset, toffset, size;
+ vm_object_t rtobject;
+ vm_page_t rtm;
+ vm_offset_t rtoffset;
+ vm_offset_t offsetdiff;
+ int rahead;
+ int treqpage;
+
+ object = m->object;
+ offset = m->offset;
+
+ offsetdiff = offset - first_offset;
+
+ /*
+ * if the requested page is not available, then give up now
+ */
+
+ if (!vm_pager_has_page(object->pager, object->paging_offset+offset))
+ return 0;
+
+ /*
+ * if there is no getmulti routine for this pager, then just allow
+ * one page to be read.
+ */
+/*
+ if (!object->pager->pg_ops->pgo_getpages) {
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+*/
+
+ /*
+ * try to do any readahead that we might have free pages for.
+ */
+ rahead = raheada;
+ if (rahead > (cnt.v_free_count - cnt.v_free_reserved)) {
+ rahead = cnt.v_free_count - cnt.v_free_reserved;
+ rbehind = 0;
+ }
+
+ if (cnt.v_free_count < cnt.v_free_min) {
+ if (rahead > VM_FAULT_READ_AHEAD_MIN)
+ rahead = VM_FAULT_READ_AHEAD_MIN;
+ rbehind = 0;
+ }
+
+ /*
+ * if we don't have any free pages, then just read one page.
+ */
+ if (rahead <= 0) {
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+
+ /*
+ * scan backward for the read behind pages --
+ * in memory or on disk not in same object
+ */
+ toffset = offset - NBPG;
+ if( rbehind*NBPG > offset)
+ rbehind = offset / NBPG;
+ startoffset = offset - rbehind*NBPG;
+ while (toffset >= startoffset) {
+ if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) ||
+ rtm != 0 || rtobject != object) {
+ startoffset = toffset + NBPG;
+ break;
+ }
+ if( toffset == 0)
+ break;
+ toffset -= NBPG;
+ }
+
+ /*
+ * scan forward for the read ahead pages --
+ * in memory or on disk not in same object
+ */
+ toffset = offset + NBPG;
+ endoffset = offset + (rahead+1)*NBPG;
+ while (toffset < object->size && toffset < endoffset) {
+ if (!vm_fault_page_lookup(first_object, toffset - offsetdiff, &rtobject, &rtoffset, &rtm) ||
+ rtm != 0 || rtobject != object) {
+ break;
+ }
+ toffset += NBPG;
+ }
+ endoffset = toffset;
+
+ /* calculate number of bytes of pages */
+ size = (endoffset - startoffset) / NBPG;
+
+ /* calculate the page offset of the required page */
+ treqpage = (offset - startoffset) / NBPG;
+
+ /* see if we have space (again) */
+ if (cnt.v_free_count >= cnt.v_free_reserved + size) {
+ bzero(marray, (rahead + rbehind + 1) * sizeof(vm_page_t));
+ /*
+ * get our pages and don't block for them
+ */
+ for (i = 0; i < size; i++) {
+ if (i != treqpage)
+ rtm = vm_page_alloc(object, startoffset + i * NBPG);
+ else
+ rtm = m;
+ marray[i] = rtm;
+ }
+
+ for (i = 0; i < size; i++) {
+ if (marray[i] == 0)
+ break;
+ }
+
+ /*
+ * if we could not get our block of pages, then
+ * free the readahead/readbehind pages.
+ */
+ if (i < size) {
+ for (i = 0; i < size; i++) {
+ if (i != treqpage && marray[i])
+ FREE_PAGE(marray[i]);
+ }
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+ }
+
+ *reqpage = treqpage;
+ return size;
+ }
+ *reqpage = 0;
+ marray[0] = m;
+ return 1;
+}
+
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
new file mode 100644
index 0000000..f181ab0
--- /dev/null
+++ b/sys/vm/vm_glue.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/buf.h>
+#include <sys/user.h>
+
+#include <sys/kernel.h>
+#include <sys/dkstat.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+#include <machine/stdarg.h>
+
+extern char kstack[];
+int avefree = 0; /* XXX */
+int readbuffers = 0; /* XXX allow kgdb to read kernel buffer pool */
+/* vm_map_t upages_map; */
+
+void swapout(struct proc *p);
+int
+kernacc(addr, len, rw)
+ caddr_t addr;
+ int len, rw;
+{
+ boolean_t rv;
+ vm_offset_t saddr, eaddr;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ saddr = trunc_page(addr);
+ eaddr = round_page(addr+len);
+ rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
+ return(rv == TRUE);
+}
+
+int
+useracc(addr, len, rw)
+ caddr_t addr;
+ int len, rw;
+{
+ boolean_t rv;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ /*
+ * XXX - specially disallow access to user page tables - they are
+ * in the map.
+ *
+ * XXX - don't specially disallow access to the user area - treat
+ * it as incorrectly as elsewhere.
+ *
+ * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was
+ * only used (as an end address) in trap.c. Use it as an end
+ * address here too.
+ */
+ if ((vm_offset_t) addr >= VM_MAXUSER_ADDRESS
+ || (vm_offset_t) addr + len > VM_MAXUSER_ADDRESS
+ || (vm_offset_t) addr + len <= (vm_offset_t) addr) {
+ return (FALSE);
+ }
+
+ rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
+ trunc_page(addr), round_page(addr+len), prot);
+ return(rv == TRUE);
+}
+
+#ifdef KGDB
+/*
+ * Change protections on kernel pages from addr to addr+len
+ * (presumably so debugger can plant a breakpoint).
+ * All addresses are assumed to reside in the Sysmap,
+ */
+chgkprot(addr, len, rw)
+ register caddr_t addr;
+ int len, rw;
+{
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ vm_map_protect(kernel_map, trunc_page(addr),
+ round_page(addr+len), prot, FALSE);
+}
+#endif
+void
+vslock(addr, len)
+ caddr_t addr;
+ u_int len;
+{
+ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+ round_page(addr+len), FALSE);
+}
+
+void
+vsunlock(addr, len, dirtied)
+ caddr_t addr;
+ u_int len;
+ int dirtied;
+{
+#ifdef lint
+ dirtied++;
+#endif lint
+ vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
+ round_page(addr+len), TRUE);
+}
+
+/*
+ * Implement fork's actions on an address space.
+ * Here we arrange for the address space to be copied or referenced,
+ * allocate a user struct (pcb and kernel stack), then call the
+ * machine-dependent layer to fill those in and make the new process
+ * ready to run.
+ * NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables may be invalid
+ * after cpu_fork returns in the child process. We do nothing here
+ * after cpu_fork returns.
+ */
+int
+vm_fork(p1, p2, isvfork)
+ register struct proc *p1, *p2;
+ int isvfork;
+{
+ register struct user *up;
+ vm_offset_t addr, ptaddr;
+ int i;
+ struct vm_map *vp;
+
+ while( cnt.v_free_count < cnt.v_free_min)
+ VM_WAIT;
+
+ /*
+ * avoid copying any of the parent's pagetables or other per-process
+ * objects that reside in the map by marking all of them non-inheritable
+ */
+ (void)vm_map_inherit(&p1->p_vmspace->vm_map,
+ UPT_MIN_ADDRESS - UPAGES * NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
+ p2->p_vmspace = vmspace_fork(p1->p_vmspace);
+
+#ifdef SYSVSHM
+ if (p1->p_vmspace->vm_shm)
+ shmfork(p1, p2, isvfork);
+#endif
+
+ /*
+ * Allocate a wired-down (for now) pcb and kernel stack for the process
+ */
+
+ addr = (vm_offset_t) kstack;
+
+ vp = &p2->p_vmspace->vm_map;
+
+ /* ream out old pagetables and kernel stack */
+ (void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
+
+ /* get new pagetables and kernel stack */
+ (void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
+
+ /* force in the page table encompassing the UPAGES */
+ ptaddr = trunc_page((u_int)vtopte(addr));
+ vm_map_pageable(vp, ptaddr, ptaddr + NBPG, FALSE);
+
+ /* and force in (demand-zero) the UPAGES */
+ vm_map_pageable(vp, addr, addr + UPAGES * NBPG, FALSE);
+
+ /* get a kernel virtual address for the UPAGES for this proc */
+ up = (struct user *)kmem_alloc_pageable(kernel_map, UPAGES * NBPG);
+
+ /* and force-map the upages into the kernel pmap */
+ for (i = 0; i < UPAGES; i++)
+ pmap_enter(vm_map_pmap(kernel_map),
+ ((vm_offset_t) up) + NBPG * i,
+ pmap_extract(vp->pmap, addr + NBPG * i),
+ VM_PROT_READ|VM_PROT_WRITE, 1);
+
+ /* and allow the UPAGES page table entry to be paged (at the vm system level) */
+ vm_map_pageable(vp, ptaddr, ptaddr + NBPG, TRUE);
+
+ p2->p_addr = up;
+
+ /*
+ * p_stats and p_sigacts currently point at fields
+ * in the user struct but not at &u, instead at p_addr.
+ * Copy p_sigacts and parts of p_stats; zero the rest
+ * of p_stats (statistics).
+ */
+ p2->p_stats = &up->u_stats;
+ p2->p_sigacts = &up->u_sigacts;
+ up->u_sigacts = *p1->p_sigacts;
+ bzero(&up->u_stats.pstat_startzero,
+ (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
+ (caddr_t)&up->u_stats.pstat_startzero));
+ bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
+ ((caddr_t)&up->u_stats.pstat_endcopy -
+ (caddr_t)&up->u_stats.pstat_startcopy));
+
+
+ /*
+ * cpu_fork will copy and update the kernel stack and pcb,
+ * and make the child ready to run. It marks the child
+ * so that it can return differently than the parent.
+ * It returns twice, once in the parent process and
+ * once in the child.
+ */
+ return (cpu_fork(p1, p2));
+}
+
+/*
+ * Set default limits for VM system.
+ * Called for proc 0, and then inherited by all others.
+ */
+void
+vm_init_limits(p)
+ register struct proc *p;
+{
+ int tmp;
+
+ /*
+ * Set up the initial limits on process VM.
+ * Set the maximum resident set size to be all
+ * of (reasonably) available memory. This causes
+ * any single, large process to start random page
+ * replacement once it fills memory.
+ */
+ p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
+ p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
+ tmp = ((2 * cnt.v_free_count) / 3) - 32;
+ if (cnt.v_free_count < 512)
+ tmp = cnt.v_free_count;
+ p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(tmp);
+ p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
+}
+
+#ifdef DEBUG
+int enableswap = 1;
+int swapdebug = 0;
+#define SDB_FOLLOW 1
+#define SDB_SWAPIN 2
+#define SDB_SWAPOUT 4
+#endif
+
+void
+faultin(p)
+struct proc *p;
+{
+ vm_offset_t i;
+ vm_offset_t vaddr, ptaddr;
+ vm_offset_t v, v1;
+ struct user *up;
+ int s;
+ int opflag;
+
+ if ((p->p_flag & P_INMEM) == 0) {
+ int rv0, rv1;
+ vm_map_t map;
+
+ ++p->p_lock;
+
+ map = &p->p_vmspace->vm_map;
+ /* force the page table encompassing the kernel stack (upages) */
+ ptaddr = trunc_page((u_int)vtopte(kstack));
+ vm_map_pageable(map, ptaddr, ptaddr + NBPG, FALSE);
+
+ /* wire in the UPAGES */
+ vm_map_pageable(map, (vm_offset_t) kstack,
+ (vm_offset_t) kstack + UPAGES * NBPG, FALSE);
+
+ /* and map them nicely into the kernel pmap */
+ for (i = 0; i < UPAGES; i++) {
+ vm_offset_t off = i * NBPG;
+ vm_offset_t pa = (vm_offset_t)
+ pmap_extract(&p->p_vmspace->vm_pmap,
+ (vm_offset_t) kstack + off);
+ pmap_enter(vm_map_pmap(kernel_map),
+ ((vm_offset_t)p->p_addr) + off,
+ pa, VM_PROT_READ|VM_PROT_WRITE, 1);
+ }
+
+ /* and let the page table pages go (at least above pmap level) */
+ vm_map_pageable(map, ptaddr, ptaddr + NBPG, TRUE);
+
+ s = splhigh();
+
+ if (p->p_stat == SRUN)
+ setrunqueue(p);
+
+ p->p_flag |= P_INMEM;
+
+ /* undo the effect of setting SLOCK above */
+ --p->p_lock;
+ splx(s);
+
+ }
+
+}
+
+int swapinreq;
+int percentactive;
+/*
+ * This swapin algorithm attempts to swap-in processes only if there
+ * is enough space for them. Of course, if a process waits for a long
+ * time, it will be swapped in anyway.
+ */
+void
+scheduler()
+{
+ register struct proc *p;
+ register int pri;
+ struct proc *pp;
+ int ppri;
+ vm_offset_t addr;
+ int lastidle, lastrun;
+ int curidle, currun;
+ int forceload;
+ int percent;
+ int ntries;
+
+ lastidle = 0;
+ lastrun = 0;
+
+loop:
+ ntries = 0;
+ vmmeter();
+
+ curidle = cp_time[CP_IDLE];
+ currun = cp_time[CP_USER] + cp_time[CP_SYS] + cp_time[CP_NICE];
+ percent = (100*(currun-lastrun)) / ( 1 + (currun-lastrun) + (curidle-lastidle));
+ lastrun = currun;
+ lastidle = curidle;
+ if( percent > 100)
+ percent = 100;
+ percentactive = percent;
+
+ if( percentactive < 25)
+ forceload = 1;
+ else
+ forceload = 0;
+
+loop1:
+ pp = NULL;
+ ppri = INT_MIN;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+ int mempri;
+ pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
+ mempri = pri > 0 ? pri : 0;
+ /*
+ * if this process is higher priority and there is
+ * enough space, then select this process instead
+ * of the previous selection.
+ */
+ if (pri > ppri &&
+ (((cnt.v_free_count + (mempri * (4*PAGE_SIZE) / PAGE_SIZE) >= (p->p_vmspace->vm_swrss)) || (ntries > 0 && forceload)))) {
+ pp = p;
+ ppri = pri;
+ }
+ }
+ }
+
+ if ((pp == NULL) && (ntries == 0) && forceload) {
+ ++ntries;
+ goto loop1;
+ }
+
+ /*
+ * Nothing to do, back to sleep
+ */
+ if ((p = pp) == NULL) {
+ tsleep((caddr_t)&proc0, PVM, "sched", 0);
+ goto loop;
+ }
+
+ /*
+ * We would like to bring someone in. (only if there is space).
+ */
+/*
+ printf("swapin: %d, free: %d, res: %d, min: %d\n",
+ p->p_pid, cnt.v_free_count, cnt.v_free_reserved, cnt.v_free_min);
+*/
+ (void) splhigh();
+ if ((forceload && (cnt.v_free_count > (cnt.v_free_reserved + UPAGES + 1))) ||
+ (cnt.v_free_count >= cnt.v_free_min)) {
+ spl0();
+ faultin(p);
+ p->p_swtime = 0;
+ goto loop;
+ }
+ /*
+ * log the memory shortage
+ */
+ swapinreq += p->p_vmspace->vm_swrss;
+ /*
+ * Not enough memory, jab the pageout daemon and wait til the
+ * coast is clear.
+ */
+ if( cnt.v_free_count < cnt.v_free_min) {
+ VM_WAIT;
+ } else {
+ tsleep((caddr_t)&proc0, PVM, "sched", 0);
+ }
+ (void) spl0();
+ goto loop;
+}
+
+#define swappable(p) \
+ (((p)->p_lock == 0) && \
+ ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO)) == P_INMEM)
+
+extern int vm_pageout_free_min;
+/*
+ * Swapout is driven by the pageout daemon. Very simple, we find eligible
+ * procs and unwire their u-areas. We try to always "swap" at least one
+ * process in case we need the room for a swapin.
+ * If any procs have been sleeping/stopped for at least maxslp seconds,
+ * they are swapped. Else, we swap the longest-sleeping or stopped process,
+ * if any, otherwise the longest-resident process.
+ */
+void
+swapout_threads()
+{
+ register struct proc *p;
+ struct proc *outp, *outp2;
+ int outpri, outpri2;
+ int tpri;
+ int didswap = 0;
+ int swapneeded = swapinreq;
+ extern int maxslp;
+ int runnablenow;
+ int s;
+
+swapmore:
+ runnablenow = 0;
+ outp = outp2 = NULL;
+ outpri = outpri2 = INT_MIN;
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (!swappable(p))
+ continue;
+ switch (p->p_stat) {
+ case SRUN:
+ ++runnablenow;
+ /*
+ * count the process as being in a runnable state
+ */
+ if ((tpri = p->p_swtime + p->p_nice * 8) > outpri2) {
+ outp2 = p;
+ outpri2 = tpri;
+ }
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ /*
+ * do not swapout a process that is waiting for VM datastructures
+ * there is a possible deadlock.
+ */
+ if (!lock_try_write( &p->p_vmspace->vm_map.lock)) {
+ continue;
+ }
+ vm_map_unlock( &p->p_vmspace->vm_map);
+ if (p->p_slptime > maxslp) {
+ swapout(p);
+ didswap++;
+ } else if ((tpri = p->p_slptime + p->p_nice * 8) > outpri) {
+ outp = p;
+ outpri = tpri ;
+ }
+ continue;
+ }
+ }
+ /*
+ * We swapout only if there are more than two runnable processes or if
+ * another process needs some space to swapin.
+ */
+ if ((swapinreq || ((percentactive > 90) && (runnablenow > 2))) &&
+ (((cnt.v_free_count + cnt.v_inactive_count) <= (cnt.v_free_target + cnt.v_inactive_target)) ||
+ (cnt.v_free_count < cnt.v_free_min))) {
+ if ((p = outp) == 0) {
+ p = outp2;
+ }
+
+ if (p) {
+ swapout(p);
+ didswap = 1;
+ }
+ }
+
+ /*
+ * if we previously had found a process to swapout, and we need to swapout
+ * more then try again.
+ */
+#if 0
+ if( p && swapinreq)
+ goto swapmore;
+#endif
+
+ /*
+ * If we swapped something out, and another process needed memory,
+ * then wakeup the sched process.
+ */
+ if (didswap) {
+ if (swapneeded)
+ wakeup((caddr_t)&proc0);
+ swapinreq = 0;
+ }
+}
+
+void
+swapout(p)
+ register struct proc *p;
+{
+ vm_offset_t addr;
+ struct pmap *pmap = &p->p_vmspace->vm_pmap;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ vm_offset_t ptaddr;
+ int i;
+
+ ++p->p_stats->p_ru.ru_nswap;
+ /*
+ * remember the process resident count
+ */
+ p->p_vmspace->vm_swrss =
+ p->p_vmspace->vm_pmap.pm_stats.resident_count;
+ /*
+ * and decrement the amount of needed space
+ */
+ swapinreq -= min(swapinreq, p->p_vmspace->vm_pmap.pm_stats.resident_count);
+
+ (void) splhigh();
+ p->p_flag &= ~P_INMEM;
+ if (p->p_stat == SRUN)
+ remrq(p);
+ (void) spl0();
+
+ ++p->p_lock;
+/* let the upages be paged */
+ pmap_remove(vm_map_pmap(kernel_map),
+ (vm_offset_t) p->p_addr, ((vm_offset_t) p->p_addr) + UPAGES * NBPG);
+
+ vm_map_pageable(map, (vm_offset_t) kstack,
+ (vm_offset_t) kstack + UPAGES * NBPG, TRUE);
+
+ --p->p_lock;
+ p->p_swtime = 0;
+}
+
+/*
+ * The rest of these routines fake thread handling
+ */
+
+#ifndef assert_wait
+void
+assert_wait(event, ruptible)
+ int event;
+ boolean_t ruptible;
+{
+#ifdef lint
+ ruptible++;
+#endif
+ curproc->p_thread = event;
+}
+#endif
+
+void
+thread_block(char *msg)
+{
+ if (curproc->p_thread)
+ tsleep((caddr_t)curproc->p_thread, PVM, msg, 0);
+}
+
+
+void
+thread_sleep_(event, lock, wmesg)
+ int event;
+ simple_lock_t lock;
+ char *wmesg;
+{
+
+ curproc->p_thread = event;
+ simple_unlock(lock);
+ if (curproc->p_thread) {
+ tsleep((caddr_t)event, PVM, wmesg, 0);
+ }
+}
+
+#ifndef thread_wakeup
+void
+thread_wakeup(event)
+ int event;
+{
+ wakeup((caddr_t)event);
+}
+#endif
+
+/*
+ * DEBUG stuff
+ */
+
+int indent = 0;
+
+#include <machine/stdarg.h> /* see subr_prf.c */
+
+/*ARGSUSED2*/
+void
+#if __STDC__
+iprintf(const char *fmt, ...)
+#else
+iprintf(fmt /* , va_alist */)
+ char *fmt;
+ /* va_dcl */
+#endif
+{
+ register int i;
+ va_list ap;
+
+ for (i = indent; i >= 8; i -= 8)
+ printf("\t");
+ while (--i >= 0)
+ printf(" ");
+ va_start(ap, fmt);
+ printf("%r", fmt, ap);
+ va_end(ap);
+}
diff --git a/sys/vm/vm_inherit.h b/sys/vm/vm_inherit.h
new file mode 100644
index 0000000..455f91c
--- /dev/null
+++ b/sys/vm/vm_inherit.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_inherit.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory map inheritance definitions.
+ */
+
+#ifndef _VM_INHERIT_
+#define _VM_INHERIT_
+
+/*
+ * Enumeration of valid values for vm_inherit_t.
+ */
+
+#define VM_INHERIT_SHARE ((vm_inherit_t) 0) /* share with child */
+#define VM_INHERIT_COPY ((vm_inherit_t) 1) /* copy into child */
+#define VM_INHERIT_NONE ((vm_inherit_t) 2) /* absent from child */
+#define VM_INHERIT_DONATE_COPY ((vm_inherit_t) 3) /* copy and delete */
+
+#define VM_INHERIT_DEFAULT VM_INHERIT_COPY
+
+#endif /* _VM_INHERIT_ */
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
new file mode 100644
index 0000000..a0eac70
--- /dev/null
+++ b/sys/vm/vm_init.c
@@ -0,0 +1,105 @@
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_init.c 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Initialize the Virtual Memory subsystem.
+ */
+
+#include <sys/param.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+/*
+ * vm_init initializes the virtual memory system.
+ * This is done only by the first cpu up.
+ *
+ * The start and end address of physical memory is passed in.
+ */
+
+void
+vm_mem_init()
+{
+ extern vm_offset_t avail_start, avail_end;
+ extern vm_offset_t virtual_avail, virtual_end;
+
+ /*
+ * Initializes resident memory structures.
+ * From here on, all physical memory is accounted for,
+ * and we use only virtual addresses.
+ */
+
+ vm_set_page_size();
+ virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail);
+ /*
+ * Initialize other VM packages
+ */
+ vm_object_init(virtual_end - VM_MIN_KERNEL_ADDRESS);
+ vm_map_startup();
+ kmem_init(virtual_avail, virtual_end);
+ pmap_init(avail_start, avail_end);
+ vm_pager_init();
+}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
new file mode 100644
index 0000000..55a0949
--- /dev/null
+++ b/sys/vm/vm_kern.c
@@ -0,0 +1,454 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Kernel memory management.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_kern.h>
+
+/*
+ * kmem_alloc_pageable:
+ *
+ * Allocate pageable memory to the kernel's address map.
+ * map must be "kernel_map" below.
+ */
+
+vm_offset_t kmem_alloc_pageable(map, size)
+ vm_map_t map;
+ register vm_size_t size;
+{
+ vm_offset_t addr;
+ register int result;
+
+#if 0
+ if (map != kernel_map)
+ panic("kmem_alloc_pageable: not called with kernel_map");
+#endif
+
+ size = round_page(size);
+
+ addr = vm_map_min(map);
+ result = vm_map_find(map, NULL, (vm_offset_t) 0,
+ &addr, size, TRUE);
+ if (result != KERN_SUCCESS) {
+ return(0);
+ }
+
+ return(addr);
+}
+
+/*
+ * Allocate wired-down memory in the kernel's address map
+ * or a submap.
+ */
+vm_offset_t kmem_alloc(map, size)
+ register vm_map_t map;
+ register vm_size_t size;
+{
+ vm_offset_t addr;
+ register vm_offset_t offset;
+ extern vm_object_t kernel_object;
+ vm_offset_t i;
+
+ size = round_page(size);
+
+ /*
+ * Use the kernel object for wired-down kernel pages.
+ * Assume that no region of the kernel object is
+ * referenced more than once.
+ */
+
+ /*
+ * Locate sufficient space in the map. This will give us the
+ * final virtual address for the new memory, and thus will tell
+ * us the offset within the kernel map.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr)) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+ vm_object_reference(kernel_object);
+ vm_map_insert(map, kernel_object, offset, addr, addr + size);
+ vm_map_unlock(map);
+
+ /*
+ * Guarantee that there are pages already in this object
+ * before calling vm_map_pageable. This is to prevent the
+ * following scenario:
+ *
+ * 1) Threads have swapped out, so that there is a
+ * pager for the kernel_object.
+ * 2) The kmsg zone is empty, and so we are kmem_allocing
+ * a new page for it.
+ * 3) vm_map_pageable calls vm_fault; there is no page,
+ * but there is a pager, so we call
+ * pager_data_request. But the kmsg zone is empty,
+ * so we must kmem_alloc.
+ * 4) goto 1
+ * 5) Even if the kmsg zone is not empty: when we get
+ * the data back from the pager, it will be (very
+ * stale) non-zero data. kmem_alloc is defined to
+ * return zero-filled memory.
+ *
+ * We're intentionally not activating the pages we allocate
+ * to prevent a race with page-out. vm_map_pageable will wire
+ * the pages.
+ */
+
+ vm_object_lock(kernel_object);
+ for (i = 0 ; i < size; i+= PAGE_SIZE) {
+ vm_page_t mem;
+
+ while ((mem = vm_page_alloc(kernel_object, offset+i)) == NULL) {
+ vm_object_unlock(kernel_object);
+ VM_WAIT;
+ vm_object_lock(kernel_object);
+ }
+ vm_page_zero_fill(mem);
+ mem->flags &= ~PG_BUSY;
+ }
+ vm_object_unlock(kernel_object);
+
+ /*
+ * And finally, mark the data as non-pageable.
+ */
+
+ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+
+ /*
+ * Try to coalesce the map
+ */
+
+ vm_map_simplify(map, addr);
+
+ return(addr);
+}
+
+/*
+ * kmem_free:
+ *
+ * Release a region of kernel virtual memory allocated
+ * with kmem_alloc, and return the physical pages
+ * associated with that region.
+ */
+void kmem_free(map, addr, size)
+ vm_map_t map;
+ register vm_offset_t addr;
+ vm_size_t size;
+{
+ (void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+}
+
+/*
+ * kmem_suballoc:
+ *
+ * Allocates a map to manage a subrange
+ * of the kernel virtual address space.
+ *
+ * Arguments are as follows:
+ *
+ * parent Map to take range from
+ * size Size of range to find
+ * min, max Returned endpoints of map
+ * pageable Can the region be paged
+ */
+vm_map_t kmem_suballoc(parent, min, max, size, pageable)
+ register vm_map_t parent;
+ vm_offset_t *min, *max;
+ register vm_size_t size;
+ boolean_t pageable;
+{
+ register int ret;
+ vm_map_t result;
+
+ size = round_page(size);
+
+ *min = (vm_offset_t) vm_map_min(parent);
+ ret = vm_map_find(parent, NULL, (vm_offset_t) 0,
+ min, size, TRUE);
+ if (ret != KERN_SUCCESS) {
+ printf("kmem_suballoc: bad status return of %d.\n", ret);
+ panic("kmem_suballoc");
+ }
+ *max = *min + size;
+ pmap_reference(vm_map_pmap(parent));
+ result = vm_map_create(vm_map_pmap(parent), *min, *max, pageable);
+ if (result == NULL)
+ panic("kmem_suballoc: cannot create submap");
+ if ((ret = vm_map_submap(parent, *min, *max, result)) != KERN_SUCCESS)
+ panic("kmem_suballoc: unable to change range to submap");
+ return(result);
+}
+
+/*
+ * Allocate wired-down memory in the kernel's address map for the higher
+ * level kernel memory allocator (kern/kern_malloc.c). We cannot use
+ * kmem_alloc() because we may need to allocate memory at interrupt
+ * level where we cannot block (canwait == FALSE).
+ *
+ * This routine has its own private kernel submap (kmem_map) and object
+ * (kmem_object). This, combined with the fact that only malloc uses
+ * this routine, ensures that we will never block in map or object waits.
+ *
+ * Note that this still only works in a uni-processor environment and
+ * when called at splhigh().
+ *
+ * We don't worry about expanding the map (adding entries) since entries
+ * for wired maps are statically allocated.
+ */
+vm_offset_t
+kmem_malloc(map, size, canwait)
+ register vm_map_t map;
+ register vm_size_t size;
+ boolean_t canwait;
+{
+ register vm_offset_t offset, i;
+ vm_map_entry_t entry;
+ vm_offset_t addr;
+ vm_page_t m;
+ extern vm_object_t kmem_object;
+
+ if (map != kmem_map && map != mb_map)
+ panic("kern_malloc_alloc: map != {kmem,mb}_map");
+
+ size = round_page(size);
+ addr = vm_map_min(map);
+
+ /*
+ * Locate sufficient space in the map. This will give us the
+ * final virtual address for the new memory, and thus will tell
+ * us the offset within the kernel map.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr)) {
+ vm_map_unlock(map);
+#if 0
+ if (canwait) /* XXX should wait */
+ panic("kmem_malloc: %s too small",
+ map == kmem_map ? "kmem_map" : "mb_map");
+#endif
+ if (canwait)
+ panic("kmem_malloc: map too small");
+ return (0);
+ }
+ offset = addr - vm_map_min(kmem_map);
+ vm_object_reference(kmem_object);
+ vm_map_insert(map, kmem_object, offset, addr, addr + size);
+
+ /*
+ * If we can wait, just mark the range as wired
+ * (will fault pages as necessary).
+ */
+ if (canwait) {
+ vm_map_unlock(map);
+ (void) vm_map_pageable(map, (vm_offset_t) addr, addr + size,
+ FALSE);
+ vm_map_simplify(map, addr);
+ return(addr);
+ }
+
+ /*
+ * If we cannot wait then we must allocate all memory up front,
+ * pulling it off the active queue to prevent pageout.
+ */
+ vm_object_lock(kmem_object);
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ m = vm_page_alloc(kmem_object, offset + i);
+
+ /*
+ * Ran out of space, free everything up and return.
+ * Don't need to lock page queues here as we know
+ * that the pages we got aren't on any queues.
+ */
+ if (m == NULL) {
+ while (i != 0) {
+ i -= PAGE_SIZE;
+ m = vm_page_lookup(kmem_object, offset + i);
+ vm_page_free(m);
+ }
+ vm_object_unlock(kmem_object);
+ vm_map_delete(map, addr, addr + size);
+ vm_map_unlock(map);
+ return(0);
+ }
+#if 0
+ vm_page_zero_fill(m);
+#endif
+ m->flags &= ~PG_BUSY;
+ }
+ vm_object_unlock(kmem_object);
+
+ /*
+ * Mark map entry as non-pageable.
+ * Assert: vm_map_insert() will never be able to extend the previous
+ * entry so there will be a new entry exactly corresponding to this
+ * address range and it will have wired_count == 0.
+ */
+ if (!vm_map_lookup_entry(map, addr, &entry) ||
+ entry->start != addr || entry->end != addr + size ||
+ entry->wired_count)
+ panic("kmem_malloc: entry not found or misaligned");
+ entry->wired_count++;
+
+ /*
+ * Loop thru pages, entering them in the pmap.
+ * (We cannot add them to the wired count without
+ * wrapping the vm_page_queue_lock in splimp...)
+ */
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ vm_object_lock(kmem_object);
+ m = vm_page_lookup(kmem_object, offset + i);
+ vm_object_unlock(kmem_object);
+ pmap_enter(map->pmap, addr + i, VM_PAGE_TO_PHYS(m),
+ VM_PROT_DEFAULT, TRUE);
+ }
+ vm_map_unlock(map);
+
+ vm_map_simplify(map, addr);
+ return(addr);
+}
+
+/*
+ * kmem_alloc_wait
+ *
+ * Allocates pageable memory from a sub-map of the kernel. If the submap
+ * has no room, the caller sleeps waiting for more memory in the submap.
+ *
+ */
+vm_offset_t kmem_alloc_wait(map, size)
+ vm_map_t map;
+ vm_size_t size;
+{
+ vm_offset_t addr;
+
+ size = round_page(size);
+
+ for (;;) {
+ /*
+ * To make this work for more than one map,
+ * use the map's lock to lock out sleepers/wakers.
+ */
+ vm_map_lock(map);
+ if (vm_map_findspace(map, 0, size, &addr) == 0)
+ break;
+ /* no space now; see if we can ever get space */
+ if (vm_map_max(map) - vm_map_min(map) < size) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ assert_wait((int)map, TRUE);
+ vm_map_unlock(map);
+ thread_block("kmaw");
+ }
+ vm_map_insert(map, NULL, (vm_offset_t)0, addr, addr + size);
+ vm_map_unlock(map);
+ return (addr);
+}
+
+/*
+ * kmem_free_wakeup
+ *
+ * Returns memory to a submap of the kernel, and wakes up any threads
+ * waiting for memory in that map.
+ */
+void kmem_free_wakeup(map, addr, size)
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+{
+ vm_map_lock(map);
+ (void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
+ thread_wakeup((int)map);
+ vm_map_unlock(map);
+}
+
+/*
+ * Create the kernel map; insert a mapping covering kernel text, data, bss,
+ * and all space allocated thus far (`boostrap' data). The new map will thus
+ * map the range between VM_MIN_KERNEL_ADDRESS and `start' as allocated, and
+ * the range between `start' and `end' as free.
+ */
+void kmem_init(start, end)
+ vm_offset_t start, end;
+{
+ register vm_map_t m;
+
+ m = vm_map_create(kernel_pmap, VM_MIN_KERNEL_ADDRESS, end, FALSE);
+ vm_map_lock(m);
+ /* N.B.: cannot use kgdb to debug, starting with this assignment ... */
+ kernel_map = m;
+ (void) vm_map_insert(m, NULL, (vm_offset_t)0,
+ VM_MIN_KERNEL_ADDRESS, start);
+ /* ... and ending with the completion of the above `insert' */
+ vm_map_unlock(m);
+}
diff --git a/sys/vm/vm_kern.h b/sys/vm/vm_kern.h
new file mode 100644
index 0000000..c032560
--- /dev/null
+++ b/sys/vm/vm_kern.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_kern.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/* Kernel memory management definitions. */
+
+vm_map_t buffer_map;
+vm_map_t kernel_map;
+vm_map_t kmem_map;
+vm_map_t mb_map;
+vm_map_t io_map;
+vm_map_t clean_map;
+vm_map_t pager_map;
+vm_map_t phys_map;
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
new file mode 100644
index 0000000..ffffa96
--- /dev/null
+++ b/sys/vm/vm_map.c
@@ -0,0 +1,2681 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory mapping module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
+
+/*
+ * Virtual memory maps provide for the mapping, protection,
+ * and sharing of virtual memory objects. In addition,
+ * this module provides for an efficient virtual copy of
+ * memory from one map to another.
+ *
+ * Synchronization is required prior to most operations.
+ *
+ * Maps consist of an ordered doubly-linked list of simple
+ * entries; a single hint is used to speed up lookups.
+ *
+ * In order to properly represent the sharing of virtual
+ * memory regions among maps, the map structure is bi-level.
+ * Top-level ("address") maps refer to regions of sharable
+ * virtual memory. These regions are implemented as
+ * ("sharing") maps, which then refer to the actual virtual
+ * memory objects. When two address maps "share" memory,
+ * their top-level maps both have references to the same
+ * sharing map. When memory is virtual-copied from one
+ * address map to another, the references in the sharing
+ * maps are actually copied -- no copying occurs at the
+ * virtual memory object level.
+ *
+ * Since portions of maps are specified by start/end addreses,
+ * which may not align with existing map entries, all
+ * routines merely "clip" entries to these start/end values.
+ * [That is, an entry is split into two, bordering at a
+ * start or end value.] Note that these clippings may not
+ * always be necessary (as the two resulting entries are then
+ * not changed); however, the clipping is done for convenience.
+ * No attempt is currently made to "glue back together" two
+ * abutting entries.
+ *
+ * As mentioned above, virtual copy operations are performed
+ * by copying VM object references from one sharing map to
+ * another, and then marking both regions as copy-on-write.
+ * It is important to note that only one writeable reference
+ * to a VM object region exists in any map -- this means that
+ * shadow object creation can be delayed until a write operation
+ * occurs.
+ */
+
+/*
+ * vm_map_startup:
+ *
+ * Initialize the vm_map module. Must be called before
+ * any other vm_map routines.
+ *
+ * Map and entry structures are allocated from the general
+ * purpose memory pool with some exceptions:
+ *
+ * - The kernel map and kmem submap are allocated statically.
+ * - Kernel map entries are allocated out of a static pool.
+ *
+ * These restrictions are necessary since malloc() uses the
+ * maps and requires map entries.
+ */
+
+vm_offset_t kentry_data;
+vm_size_t kentry_data_size;
+vm_map_entry_t kentry_free;
+vm_map_t kmap_free;
+
+int kentry_count;
+vm_map_t kmap_free;
+static vm_offset_t mapvm=0;
+static int mapvmpgcnt=0;
+
+static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
+
+void vm_map_startup()
+{
+ register int i;
+ register vm_map_entry_t mep;
+ vm_map_t mp;
+
+ /*
+ * Static map structures for allocation before initialization of
+ * kernel map or kmem map. vm_map_create knows how to deal with them.
+ */
+ kmap_free = mp = (vm_map_t) kentry_data;
+ i = MAX_KMAP;
+ while (--i > 0) {
+ mp->header.next = (vm_map_entry_t) (mp + 1);
+ mp++;
+ }
+ mp++->header.next = NULL;
+
+ /*
+ * Form a free list of statically allocated kernel map entries
+ * with the rest.
+ */
+ kentry_free = mep = (vm_map_entry_t) mp;
+ i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
+ while (--i > 0) {
+ mep->next = mep + 1;
+ mep++;
+ }
+ mep->next = NULL;
+}
+
+/*
+ * Allocate a vmspace structure, including a vm_map and pmap,
+ * and initialize those structures. The refcnt is set to 1.
+ * The remaining fields must be initialized by the caller.
+ */
+struct vmspace *
+vmspace_alloc(min, max, pageable)
+ vm_offset_t min, max;
+ int pageable;
+{
+ register struct vmspace *vm;
+
+ MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
+ bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
+ vm_map_init(&vm->vm_map, min, max, pageable);
+ pmap_pinit(&vm->vm_pmap);
+ vm->vm_map.pmap = &vm->vm_pmap; /* XXX */
+ vm->vm_refcnt = 1;
+ return (vm);
+}
+
+void
+vmspace_free(vm)
+ register struct vmspace *vm;
+{
+
+ if (--vm->vm_refcnt == 0) {
+ /*
+ * Lock the map, to wait out all other references to it.
+ * Delete all of the mappings and pages they hold,
+ * then call the pmap module to reclaim anything left.
+ */
+ vm_map_lock(&vm->vm_map);
+ (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
+ vm->vm_map.max_offset);
+ pmap_release(&vm->vm_pmap);
+ FREE(vm, M_VMMAP);
+ }
+}
+
+/*
+ * vm_map_create:
+ *
+ * Creates and returns a new empty VM map with
+ * the given physical map structure, and having
+ * the given lower and upper address bounds.
+ */
+vm_map_t vm_map_create(pmap, min, max, pageable)
+ pmap_t pmap;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ register vm_map_t result;
+ extern vm_map_t kmem_map;
+
+ if (kmem_map == NULL) {
+ result = kmap_free;
+ kmap_free = (vm_map_t) result->header.next;
+ if (result == NULL)
+ panic("vm_map_create: out of maps");
+ } else
+ MALLOC(result, vm_map_t, sizeof(struct vm_map),
+ M_VMMAP, M_WAITOK);
+
+ vm_map_init(result, min, max, pageable);
+ result->pmap = pmap;
+ return(result);
+}
+
+/*
+ * Initialize an existing vm_map structure
+ * such as that in the vmspace structure.
+ * The pmap is set elsewhere.
+ */
+void
+vm_map_init(map, min, max, pageable)
+ register struct vm_map *map;
+ vm_offset_t min, max;
+ boolean_t pageable;
+{
+ map->header.next = map->header.prev = &map->header;
+ map->nentries = 0;
+ map->size = 0;
+ map->ref_count = 1;
+ map->is_main_map = TRUE;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->entries_pageable = pageable;
+ map->first_free = &map->header;
+ map->hint = &map->header;
+ map->timestamp = 0;
+ lock_init(&map->lock, TRUE);
+ simple_lock_init(&map->ref_lock);
+ simple_lock_init(&map->hint_lock);
+}
+
+/*
+ * vm_map_entry_create: [ internal use only ]
+ *
+ * Allocates a VM map entry for insertion.
+ * No entry fields are filled in. This routine is
+ */
+static struct vm_map_entry *mappool;
+static int mappoolcnt;
+void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
+
+vm_map_entry_t
+vm_map_entry_create(map)
+ vm_map_t map;
+{
+ vm_map_entry_t entry;
+ int s;
+ int i;
+#define KENTRY_LOW_WATER 64
+#define MAPENTRY_LOW_WATER 64
+
+ /*
+ * This is a *very* nasty (and sort of incomplete) hack!!!!
+ */
+ if (kentry_count < KENTRY_LOW_WATER) {
+ if (mapvmpgcnt && mapvm) {
+ vm_page_t m;
+ if (m = vm_page_alloc(kmem_object, mapvm-vm_map_min(kmem_map))) {
+ int newentries;
+ newentries = (NBPG/sizeof (struct vm_map_entry));
+ vm_page_wire(m);
+ m->flags &= ~PG_BUSY;
+ pmap_enter(vm_map_pmap(kmem_map), mapvm,
+ VM_PAGE_TO_PHYS(m), VM_PROT_DEFAULT, 1);
+
+ entry = (vm_map_entry_t) mapvm;
+ mapvm += NBPG;
+ --mapvmpgcnt;
+
+ for (i = 0; i < newentries; i++) {
+ vm_map_entry_dispose(kernel_map, entry);
+ entry++;
+ }
+ }
+ }
+ }
+
+ if (map == kernel_map || map == kmem_map || map == pager_map) {
+
+ if (entry = kentry_free) {
+ kentry_free = entry->next;
+ --kentry_count;
+ return entry;
+ }
+
+ if (entry = mappool) {
+ mappool = entry->next;
+ --mappoolcnt;
+ return entry;
+ }
+
+ } else {
+ if (entry = mappool) {
+ mappool = entry->next;
+ --mappoolcnt;
+ return entry;
+ }
+
+ MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
+ M_VMMAPENT, M_WAITOK);
+ }
+dopanic:
+ if (entry == NULL)
+ panic("vm_map_entry_create: out of map entries");
+
+ return(entry);
+}
+
+/*
+ * vm_map_entry_dispose: [ internal use only ]
+ *
+ * Inverse of vm_map_entry_create.
+ */
+void
+vm_map_entry_dispose(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+ extern vm_map_t kernel_map, kmem_map, pager_map;
+ int s;
+
+ if (map == kernel_map || map == kmem_map || map == pager_map ||
+ kentry_count < KENTRY_LOW_WATER) {
+ entry->next = kentry_free;
+ kentry_free = entry;
+ ++kentry_count;
+ } else {
+ if (mappoolcnt < MAPENTRY_LOW_WATER) {
+ entry->next = mappool;
+ mappool = entry;
+ ++mappoolcnt;
+ return;
+ }
+
+ FREE(entry, M_VMMAPENT);
+ }
+}
+
+/*
+ * vm_map_entry_{un,}link:
+ *
+ * Insert/remove entries from maps.
+ */
+#define vm_map_entry_link(map, after_where, entry) \
+ { \
+ (map)->nentries++; \
+ (entry)->prev = (after_where); \
+ (entry)->next = (after_where)->next; \
+ (entry)->prev->next = (entry); \
+ (entry)->next->prev = (entry); \
+ }
+#define vm_map_entry_unlink(map, entry) \
+ { \
+ (map)->nentries--; \
+ (entry)->next->prev = (entry)->prev; \
+ (entry)->prev->next = (entry)->next; \
+ }
+
+/*
+ * vm_map_reference:
+ *
+ * Creates another valid reference to the given map.
+ *
+ */
+void vm_map_reference(map)
+ register vm_map_t map;
+{
+ if (map == NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ map->ref_count++;
+ simple_unlock(&map->ref_lock);
+}
+
+/*
+ * vm_map_deallocate:
+ *
+ * Removes a reference from the specified map,
+ * destroying it if no references remain.
+ * The map should not be locked.
+ */
+void vm_map_deallocate(map)
+ register vm_map_t map;
+{
+ register int c;
+
+ if (map == NULL)
+ return;
+
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+
+ if (c > 0) {
+ return;
+ }
+
+ /*
+ * Lock the map, to wait out all other references
+ * to it.
+ */
+
+ vm_map_lock(map);
+
+ (void) vm_map_delete(map, map->min_offset, map->max_offset);
+
+ pmap_destroy(map->pmap);
+
+ FREE(map, M_VMMAP);
+}
+
+/*
+ * vm_map_insert:
+ *
+ * Inserts the given whole VM object into the target
+ * map at the specified address range. The object's
+ * size should match that of the address range.
+ *
+ * Requires that the map be locked, and leaves it so.
+ */
+int
+vm_map_insert(map, object, offset, start, end)
+ vm_map_t map;
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_offset_t start;
+ vm_offset_t end;
+{
+ register vm_map_entry_t new_entry;
+ register vm_map_entry_t prev_entry;
+ vm_map_entry_t temp_entry;
+
+ /*
+ * Check that the start and end points are not bogus.
+ */
+
+ if ((start < map->min_offset) || (end > map->max_offset) ||
+ (start >= end))
+ return(KERN_INVALID_ADDRESS);
+
+ /*
+ * Find the entry prior to the proposed
+ * starting address; if it's part of an
+ * existing entry, this range is bogus.
+ */
+
+ if (vm_map_lookup_entry(map, start, &temp_entry))
+ return(KERN_NO_SPACE);
+
+ prev_entry = temp_entry;
+
+ /*
+ * Assert that the next entry doesn't overlap the
+ * end point.
+ */
+
+ if ((prev_entry->next != &map->header) &&
+ (prev_entry->next->start < end))
+ return(KERN_NO_SPACE);
+
+ /*
+ * See if we can avoid creating a new entry by
+ * extending one of our neighbors.
+ */
+
+ if (object == NULL) {
+ if ((prev_entry != &map->header) &&
+ (prev_entry->end == start) &&
+ (map->is_main_map) &&
+ (prev_entry->is_a_map == FALSE) &&
+ (prev_entry->is_sub_map == FALSE) &&
+ (prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
+ (prev_entry->protection == VM_PROT_DEFAULT) &&
+ (prev_entry->max_protection == VM_PROT_DEFAULT) &&
+ (prev_entry->wired_count == 0)) {
+
+ if (vm_object_coalesce(prev_entry->object.vm_object,
+ NULL,
+ prev_entry->offset,
+ (vm_offset_t) 0,
+ (vm_size_t)(prev_entry->end
+ - prev_entry->start),
+ (vm_size_t)(end - prev_entry->end))) {
+ /*
+ * Coalesced the two objects - can extend
+ * the previous map entry to include the
+ * new range.
+ */
+ map->size += (end - prev_entry->end);
+ prev_entry->end = end;
+ return(KERN_SUCCESS);
+ }
+ }
+ }
+
+ /*
+ * Create a new entry
+ */
+
+ new_entry = vm_map_entry_create(map);
+ new_entry->start = start;
+ new_entry->end = end;
+
+ new_entry->is_a_map = FALSE;
+ new_entry->is_sub_map = FALSE;
+ new_entry->object.vm_object = object;
+ new_entry->offset = offset;
+
+ new_entry->copy_on_write = FALSE;
+ new_entry->needs_copy = FALSE;
+
+ if (map->is_main_map) {
+ new_entry->inheritance = VM_INHERIT_DEFAULT;
+ new_entry->protection = VM_PROT_DEFAULT;
+ new_entry->max_protection = VM_PROT_DEFAULT;
+ new_entry->wired_count = 0;
+ }
+
+ /*
+ * Insert the new entry into the list
+ */
+
+ vm_map_entry_link(map, prev_entry, new_entry);
+ map->size += new_entry->end - new_entry->start;
+
+ /*
+ * Update the free space hint
+ */
+
+ if ((map->first_free == prev_entry) && (prev_entry->end >= new_entry->start))
+ map->first_free = new_entry;
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * SAVE_HINT:
+ *
+ * Saves the specified entry as the hint for
+ * future lookups. Performs necessary interlocks.
+ */
+#define SAVE_HINT(map,value) \
+ simple_lock(&(map)->hint_lock); \
+ (map)->hint = (value); \
+ simple_unlock(&(map)->hint_lock);
+
+/*
+ * vm_map_lookup_entry: [ internal use only ]
+ *
+ * Finds the map entry containing (or
+ * immediately preceding) the specified address
+ * in the given map; the entry is returned
+ * in the "entry" parameter. The boolean
+ * result indicates whether the address is
+ * actually contained in the map.
+ */
+boolean_t vm_map_lookup_entry(map, address, entry)
+ register vm_map_t map;
+ register vm_offset_t address;
+ vm_map_entry_t *entry; /* OUT */
+{
+ register vm_map_entry_t cur;
+ register vm_map_entry_t last;
+
+ /*
+ * Start looking either from the head of the
+ * list, or from the hint.
+ */
+
+ simple_lock(&map->hint_lock);
+ cur = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if (cur == &map->header)
+ cur = cur->next;
+
+ if (address >= cur->start) {
+ /*
+ * Go from hint to end of list.
+ *
+ * But first, make a quick check to see if
+ * we are already looking at the entry we
+ * want (which is usually the case).
+ * Note also that we don't need to save the hint
+ * here... it is the same hint (unless we are
+ * at the header, in which case the hint didn't
+ * buy us anything anyway).
+ */
+ last = &map->header;
+ if ((cur != last) && (cur->end > address)) {
+ *entry = cur;
+ return(TRUE);
+ }
+ }
+ else {
+ /*
+ * Go from start to hint, *inclusively*
+ */
+ last = cur->next;
+ cur = map->header.next;
+ }
+
+ /*
+ * Search linearly
+ */
+
+ while (cur != last) {
+ if (cur->end > address) {
+ if (address >= cur->start) {
+ /*
+ * Save this lookup for future
+ * hints, and return
+ */
+
+ *entry = cur;
+ SAVE_HINT(map, cur);
+ return(TRUE);
+ }
+ break;
+ }
+ cur = cur->next;
+ }
+ *entry = cur->prev;
+ SAVE_HINT(map, *entry);
+ return(FALSE);
+}
+
+/*
+ * Find sufficient space for `length' bytes in the given map, starting at
+ * `start'. The map must be locked. Returns 0 on success, 1 on no space.
+ */
+int
+vm_map_findspace(map, start, length, addr)
+ register vm_map_t map;
+ register vm_offset_t start;
+ vm_size_t length;
+ vm_offset_t *addr;
+{
+ register vm_map_entry_t entry, next;
+ register vm_offset_t end;
+
+ if (start < map->min_offset)
+ start = map->min_offset;
+ if (start > map->max_offset)
+ return (1);
+
+ /*
+ * Look for the first possible address; if there's already
+ * something at this address, we have to start after it.
+ */
+ if (start == map->min_offset) {
+ if ((entry = map->first_free) != &map->header)
+ start = entry->end;
+ } else {
+ vm_map_entry_t tmp;
+ if (vm_map_lookup_entry(map, start, &tmp))
+ start = tmp->end;
+ entry = tmp;
+ }
+
+ /*
+ * Look through the rest of the map, trying to fit a new region in
+ * the gap between existing regions, or after the very last region.
+ */
+ for (;; start = (entry = next)->end) {
+ /*
+ * Find the end of the proposed new region. Be sure we didn't
+ * go beyond the end of the map, or wrap around the address;
+ * if so, we lose. Otherwise, if this is the last entry, or
+ * if the proposed new region fits before the next entry, we
+ * win.
+ */
+ end = start + length;
+ if (end > map->max_offset || end < start)
+ return (1);
+ next = entry->next;
+ if (next == &map->header || next->start >= end)
+ break;
+ }
+ SAVE_HINT(map, entry);
+ *addr = start;
+ return (0);
+}
+
+/*
+ * vm_map_find finds an unallocated region in the target address
+ * map with the given length. The search is defined to be
+ * first-fit from the specified address; the region found is
+ * returned in the same parameter.
+ *
+ */
+int
+vm_map_find(map, object, offset, addr, length, find_space)
+ vm_map_t map;
+ vm_object_t object;
+ vm_offset_t offset;
+ vm_offset_t *addr; /* IN/OUT */
+ vm_size_t length;
+ boolean_t find_space;
+{
+ register vm_offset_t start;
+ int result;
+
+ start = *addr;
+ vm_map_lock(map);
+ if (find_space) {
+ if (vm_map_findspace(map, start, length, addr)) {
+ vm_map_unlock(map);
+ return (KERN_NO_SPACE);
+ }
+ start = *addr;
+ }
+ result = vm_map_insert(map, object, offset, start, start + length);
+ vm_map_unlock(map);
+ return (result);
+}
+
+/*
+ * vm_map_simplify_entry: [ internal use only ]
+ *
+ * Simplify the given map entry by:
+ * removing extra sharing maps
+ * [XXX maybe later] merging with a neighbor
+ */
+void vm_map_simplify_entry(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+#ifdef lint
+ map++;
+#endif
+
+ /*
+ * If this entry corresponds to a sharing map, then
+ * see if we can remove the level of indirection.
+ * If it's not a sharing map, then it points to
+ * a VM object, so see if we can merge with either
+ * of our neighbors.
+ */
+
+ if (entry->is_sub_map)
+ return;
+ if (entry->is_a_map) {
+#if 0
+ vm_map_t my_share_map;
+ int count;
+
+ my_share_map = entry->object.share_map;
+ simple_lock(&my_share_map->ref_lock);
+ count = my_share_map->ref_count;
+ simple_unlock(&my_share_map->ref_lock);
+
+ if (count == 1) {
+ /* Can move the region from
+ * entry->start to entry->end (+ entry->offset)
+ * in my_share_map into place of entry.
+ * Later.
+ */
+ }
+#endif
+ }
+ else {
+ /*
+ * Try to merge with our neighbors.
+ *
+ * Conditions for merge are:
+ *
+ * 1. entries are adjacent.
+ * 2. both entries point to objects
+ * with null pagers.
+ *
+ * If a merge is possible, we replace the two
+ * entries with a single entry, then merge
+ * the two objects into a single object.
+ *
+ * Now, all that is left to do is write the
+ * code!
+ */
+ }
+}
+
+/*
+ * vm_map_clip_start: [ internal use only ]
+ *
+ * Asserts that the given entry begins at or after
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+#define vm_map_clip_start(map, entry, startaddr) \
+{ \
+ if (startaddr > entry->start) \
+ _vm_map_clip_start(map, entry, startaddr); \
+}
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+static void _vm_map_clip_start(map, entry, start)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+ register vm_offset_t start;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * See if we can simplify this entry first
+ */
+
+ /* vm_map_simplify_entry(map, entry); */
+
+ /*
+ * Split off the front portion --
+ * note that we must insert the new
+ * entry BEFORE this one, so that
+ * this entry has the specified starting
+ * address.
+ */
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+
+ new_entry->end = start;
+ entry->offset += (start - entry->start);
+ entry->start = start;
+
+ vm_map_entry_link(map, entry->prev, new_entry);
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_reference(new_entry->object.share_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * vm_map_clip_end: [ internal use only ]
+ *
+ * Asserts that the given entry ends at or before
+ * the specified address; if necessary,
+ * it splits the entry into two.
+ */
+
+#define vm_map_clip_end(map, entry, endaddr) \
+{ \
+ if (endaddr < entry->end) \
+ _vm_map_clip_end(map, entry, endaddr); \
+}
+
+/*
+ * This routine is called only when it is known that
+ * the entry must be split.
+ */
+static void _vm_map_clip_end(map, entry, end)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+ register vm_offset_t end;
+{
+ register vm_map_entry_t new_entry;
+
+ /*
+ * Create a new entry and insert it
+ * AFTER the specified entry
+ */
+
+ new_entry = vm_map_entry_create(map);
+ *new_entry = *entry;
+
+ new_entry->start = entry->end = end;
+ new_entry->offset += (end - entry->start);
+
+ vm_map_entry_link(map, entry, new_entry);
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_reference(new_entry->object.share_map);
+ else
+ vm_object_reference(new_entry->object.vm_object);
+}
+
+/*
+ * VM_MAP_RANGE_CHECK: [ internal use only ]
+ *
+ * Asserts that the starting and ending region
+ * addresses fall within the valid range of the map.
+ */
+#define VM_MAP_RANGE_CHECK(map, start, end) \
+ { \
+ if (start < vm_map_min(map)) \
+ start = vm_map_min(map); \
+ if (end > vm_map_max(map)) \
+ end = vm_map_max(map); \
+ if (start > end) \
+ start = end; \
+ }
+
+/*
+ * vm_map_submap: [ kernel use only ]
+ *
+ * Mark the given range as handled by a subordinate map.
+ *
+ * This range must have been created with vm_map_find,
+ * and no other operations may have been performed on this
+ * range prior to calling vm_map_submap.
+ *
+ * Only a limited number of operations can be performed
+ * within this rage after calling vm_map_submap:
+ * vm_fault
+ * [Don't try vm_map_copy!]
+ *
+ * To remove a submapping, one must first remove the
+ * range from the superior map, and then destroy the
+ * submap (if desired). [Better yet, don't try it.]
+ */
+int
+vm_map_submap(map, start, end, submap)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ vm_map_t submap;
+{
+ vm_map_entry_t entry;
+ register int result = KERN_INVALID_ARGUMENT;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->next;
+
+ vm_map_clip_end(map, entry, end);
+
+ if ((entry->start == start) && (entry->end == end) &&
+ (!entry->is_a_map) &&
+ (entry->object.vm_object == NULL) &&
+ (!entry->copy_on_write)) {
+ entry->is_a_map = FALSE;
+ entry->is_sub_map = TRUE;
+ vm_map_reference(entry->object.sub_map = submap);
+ result = KERN_SUCCESS;
+ }
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_protect:
+ *
+ * Sets the protection of the specified address
+ * region in the target map. If "set_max" is
+ * specified, the maximum protection is to be set;
+ * otherwise, only the current protection is affected.
+ */
+int
+vm_map_protect(map, start, end, new_prot, set_max)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t new_prot;
+ register boolean_t set_max;
+{
+ register vm_map_entry_t current;
+ vm_map_entry_t entry;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = entry->next;
+
+ /*
+ * Make a first pass to check for protection
+ * violations.
+ */
+
+ current = entry;
+ while ((current != &map->header) && (current->start < end)) {
+ if (current->is_sub_map)
+ return(KERN_INVALID_ARGUMENT);
+ if ((new_prot & current->max_protection) != new_prot) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ current = current->next;
+ }
+
+ /*
+ * Go back and fix up protections.
+ * [Note that clipping is not necessary the second time.]
+ */
+
+ current = entry;
+
+ while ((current != &map->header) && (current->start < end)) {
+ vm_prot_t old_prot;
+
+ vm_map_clip_end(map, current, end);
+
+ old_prot = current->protection;
+ if (set_max)
+ current->protection =
+ (current->max_protection = new_prot) &
+ old_prot;
+ else
+ current->protection = new_prot;
+
+ /*
+ * Update physical map if necessary.
+ * Worry about copy-on-write here -- CHECK THIS XXX
+ */
+
+ if (current->protection != old_prot) {
+
+#define MASK(entry) ((entry)->copy_on_write ? ~VM_PROT_WRITE : \
+ VM_PROT_ALL)
+#define max(a,b) ((a) > (b) ? (a) : (b))
+
+ if (current->is_a_map) {
+ vm_map_entry_t share_entry;
+ vm_offset_t share_end;
+
+ vm_map_lock(current->object.share_map);
+ (void) vm_map_lookup_entry(
+ current->object.share_map,
+ current->offset,
+ &share_entry);
+ share_end = current->offset +
+ (current->end - current->start);
+ while ((share_entry !=
+ &current->object.share_map->header) &&
+ (share_entry->start < share_end)) {
+
+ pmap_protect(map->pmap,
+ (max(share_entry->start,
+ current->offset) -
+ current->offset +
+ current->start),
+ min(share_entry->end,
+ share_end) -
+ current->offset +
+ current->start,
+ current->protection &
+ MASK(share_entry));
+
+ share_entry = share_entry->next;
+ }
+ vm_map_unlock(current->object.share_map);
+ }
+ else
+ pmap_protect(map->pmap, current->start,
+ current->end,
+ current->protection & MASK(entry));
+#undef max
+#undef MASK
+ }
+ current = current->next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_inherit:
+ *
+ * Sets the inheritance of the specified address
+ * range in the target map. Inheritance
+ * affects how the map will be shared with
+ * child maps at the time of vm_map_fork.
+ */
+int
+vm_map_inherit(map, start, end, new_inheritance)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_inherit_t new_inheritance;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t temp_entry;
+
+ switch (new_inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (vm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ vm_map_clip_start(map, entry, start);
+ }
+ else
+ entry = temp_entry->next;
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->inheritance = new_inheritance;
+
+ entry = entry->next;
+ }
+
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_pageable:
+ *
+ * Sets the pageability of the specified address
+ * range in the target map. Regions specified
+ * as not pageable require locked-down physical
+ * memory and physical page maps.
+ *
+ * The map must not be locked, but a reference
+ * must remain to the map throughout the call.
+ */
+int
+vm_map_pageable(map, start, end, new_pageable)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register boolean_t new_pageable;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t start_entry;
+ register vm_offset_t failed = 0;
+ int rv;
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ /*
+ * Only one pageability change may take place at one
+ * time, since vm_fault assumes it will be called
+ * only once for each wiring/unwiring. Therefore, we
+ * have to make sure we're actually changing the pageability
+ * for the entire region. We do so before making any changes.
+ */
+
+ if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ entry = start_entry;
+
+ /*
+ * Actions are rather different for wiring and unwiring,
+ * so we have two separate cases.
+ */
+
+ if (new_pageable) {
+
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Unwiring. First ensure that the range to be
+ * unwired is really wired down and that there
+ * are no holes.
+ */
+ while ((entry != &map->header) && (entry->start < end)) {
+
+ if (entry->wired_count == 0 ||
+ (entry->end < end &&
+ (entry->next == &map->header ||
+ entry->next->start > entry->end))) {
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * Now decrement the wiring count for each region.
+ * If a region becomes completely unwired,
+ * unwire its physical pages and mappings.
+ */
+ lock_set_recursive(&map->lock);
+
+ entry = start_entry;
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_clip_end(map, entry, end);
+
+ entry->wired_count--;
+ if (entry->wired_count == 0)
+ vm_fault_unwire(map, entry->start, entry->end);
+
+ entry = entry->next;
+ }
+ lock_clear_recursive(&map->lock);
+ }
+
+ else {
+ /*
+ * Wiring. We must do this in two passes:
+ *
+ * 1. Holding the write lock, we create any shadow
+ * or zero-fill objects that need to be created.
+ * Then we clip each map entry to the region to be
+ * wired and increment its wiring count. We
+ * create objects before clipping the map entries
+ * to avoid object proliferation.
+ *
+ * 2. We downgrade to a read lock, and call
+ * vm_fault_wire to fault in the pages for any
+ * newly wired area (wired_count is 1).
+ *
+ * Downgrading to a read lock for vm_fault_wire avoids
+ * a possible deadlock with another thread that may have
+ * faulted on one of the pages to be wired (it would mark
+ * the page busy, blocking us, then in turn block on the
+ * map lock that we hold). Because of problems in the
+ * recursive lock package, we cannot upgrade to a write
+ * lock in vm_map_lookup. Thus, any actions that require
+ * the write lock must be done beforehand. Because we
+ * keep the read lock on the map, the copy-on-write status
+ * of the entries we modify here cannot change.
+ */
+
+ /*
+ * Pass 1.
+ */
+ while ((entry != &map->header) && (entry->start < end)) {
+ if (entry->wired_count == 0) {
+
+ /*
+ * Perform actions of vm_map_lookup that need
+ * the write lock on the map: create a shadow
+ * object for a copy-on-write region, or an
+ * object for a zero-fill region.
+ *
+ * We don't have to do this for entries that
+ * point to sharing maps, because we won't hold
+ * the lock on the sharing map.
+ */
+ if (!entry->is_a_map) {
+ if (entry->needs_copy &&
+ ((entry->protection & VM_PROT_WRITE) != 0)) {
+
+ vm_object_shadow(&entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t)(entry->end
+ - entry->start));
+ entry->needs_copy = FALSE;
+ }
+ else if (entry->object.vm_object == NULL) {
+ entry->object.vm_object =
+ vm_object_allocate((vm_size_t)(entry->end
+ - entry->start));
+ entry->offset = (vm_offset_t)0;
+ }
+ }
+ }
+ vm_map_clip_start(map, entry, start);
+ vm_map_clip_end(map, entry, end);
+ entry->wired_count++;
+
+ /*
+ * Check for holes
+ */
+ if (entry->end < end &&
+ (entry->next == &map->header ||
+ entry->next->start > entry->end)) {
+ /*
+ * Found one. Object creation actions
+ * do not need to be undone, but the
+ * wired counts need to be restored.
+ */
+ while (entry != &map->header && entry->end > start) {
+ entry->wired_count--;
+ entry = entry->prev;
+ }
+ vm_map_unlock(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * Pass 2.
+ */
+
+ /*
+ * HACK HACK HACK HACK
+ *
+ * If we are wiring in the kernel map or a submap of it,
+ * unlock the map to avoid deadlocks. We trust that the
+ * kernel threads are well-behaved, and therefore will
+ * not do anything destructive to this region of the map
+ * while we have it unlocked. We cannot trust user threads
+ * to do the same.
+ *
+ * HACK HACK HACK HACK
+ */
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_unlock(map); /* trust me ... */
+ }
+ else {
+ lock_set_recursive(&map->lock);
+ lock_write_to_read(&map->lock);
+ }
+
+ rv = 0;
+ entry = start_entry;
+ while (entry != &map->header && entry->start < end) {
+ /*
+ * If vm_fault_wire fails for any page we need to
+ * undo what has been done. We decrement the wiring
+ * count for those pages which have not yet been
+ * wired (now) and unwire those that have (later).
+ *
+ * XXX this violates the locking protocol on the map,
+ * needs to be fixed.
+ */
+ if (rv)
+ entry->wired_count--;
+ else if (entry->wired_count == 1) {
+ rv = vm_fault_wire(map, entry->start, entry->end);
+ if (rv) {
+ failed = entry->start;
+ entry->wired_count--;
+ }
+ }
+ entry = entry->next;
+ }
+
+ if (vm_map_pmap(map) == kernel_pmap) {
+ vm_map_lock(map);
+ }
+ else {
+ lock_clear_recursive(&map->lock);
+ }
+ if (rv) {
+ vm_map_unlock(map);
+ (void) vm_map_pageable(map, start, failed, TRUE);
+ return(rv);
+ }
+ }
+
+ vm_map_unlock(map);
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_clean
+ *
+ * Push any dirty cached pages in the address range to their pager.
+ * If syncio is TRUE, dirty pages are written synchronously.
+ * If invalidate is TRUE, any cached pages are freed as well.
+ *
+ * Returns an error if any part of the specified range is not mapped.
+ */
+int
+vm_map_clean(map, start, end, syncio, invalidate)
+ vm_map_t map;
+ vm_offset_t start;
+ vm_offset_t end;
+ boolean_t syncio;
+ boolean_t invalidate;
+{
+ register vm_map_entry_t current;
+ vm_map_entry_t entry;
+ vm_size_t size;
+ vm_object_t object;
+ vm_offset_t offset;
+
+ vm_map_lock_read(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ if (!vm_map_lookup_entry(map, start, &entry)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * Make a first pass to check for holes.
+ */
+ for (current = entry; current->start < end; current = current->next) {
+ if (current->is_sub_map) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ if (end > current->end &&
+ (current->next == &map->header ||
+ current->end != current->next->start)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ }
+
+ /*
+ * Make a second pass, cleaning/uncaching pages from the indicated
+ * objects as we go.
+ */
+ for (current = entry; current->start < end; current = current->next) {
+ offset = current->offset + (start - current->start);
+ size = (end <= current->end ? end : current->end) - start;
+ if (current->is_a_map) {
+ register vm_map_t smap;
+ vm_map_entry_t tentry;
+ vm_size_t tsize;
+
+ smap = current->object.share_map;
+ vm_map_lock_read(smap);
+ (void) vm_map_lookup_entry(smap, offset, &tentry);
+ tsize = tentry->end - offset;
+ if (tsize < size)
+ size = tsize;
+ object = tentry->object.vm_object;
+ offset = tentry->offset + (offset - tentry->start);
+ vm_object_lock(object);
+ vm_map_unlock_read(smap);
+ } else {
+ object = current->object.vm_object;
+ vm_object_lock(object);
+ }
+ /*
+ * Flush pages if writing is allowed.
+ * XXX should we continue on an error?
+ */
+ if ((current->protection & VM_PROT_WRITE) &&
+ !vm_object_page_clean(object, offset, offset+size,
+ syncio, FALSE)) {
+ vm_object_unlock(object);
+ vm_map_unlock_read(map);
+ return(KERN_FAILURE);
+ }
+ if (invalidate)
+ vm_object_page_remove(object, offset, offset+size);
+ vm_object_unlock(object);
+ start += size;
+ }
+
+ vm_map_unlock_read(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_entry_unwire: [ internal use only ]
+ *
+ * Make the region specified by this entry pageable.
+ *
+ * The map in question should be locked.
+ * [This is the reason for this routine's existence.]
+ */
+void vm_map_entry_unwire(map, entry)
+ vm_map_t map;
+ register vm_map_entry_t entry;
+{
+ vm_fault_unwire(map, entry->start, entry->end);
+ entry->wired_count = 0;
+}
+
+/*
+ * vm_map_entry_delete: [ internal use only ]
+ *
+ * Deallocate the given entry from the target map.
+ */
+void vm_map_entry_delete(map, entry)
+ register vm_map_t map;
+ register vm_map_entry_t entry;
+{
+ if (entry->wired_count != 0)
+ vm_map_entry_unwire(map, entry);
+
+ vm_map_entry_unlink(map, entry);
+ map->size -= entry->end - entry->start;
+
+ if (entry->is_a_map || entry->is_sub_map)
+ vm_map_deallocate(entry->object.share_map);
+ else
+ vm_object_deallocate(entry->object.vm_object);
+
+ vm_map_entry_dispose(map, entry);
+}
+
+/*
+ * vm_map_delete: [ internal use only ]
+ *
+ * Deallocates the given address range from the target
+ * map.
+ *
+ * When called with a sharing map, removes pages from
+ * that region from all physical maps.
+ */
+int
+vm_map_delete(map, start, end)
+ register vm_map_t map;
+ vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t first_entry;
+
+ /*
+ * Find the start of the region, and clip it
+ */
+
+ if (!vm_map_lookup_entry(map, start, &first_entry))
+ entry = first_entry->next;
+ else {
+ entry = first_entry;
+ vm_map_clip_start(map, entry, start);
+
+ /*
+ * Fix the lookup hint now, rather than each
+ * time though the loop.
+ */
+
+ SAVE_HINT(map, entry->prev);
+ }
+
+ /*
+ * Save the free space hint
+ */
+
+ if (map->first_free->start >= start)
+ map->first_free = entry->prev;
+
+ /*
+ * Step through all entries in this region
+ */
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ vm_map_entry_t next;
+ register vm_offset_t s, e;
+ register vm_object_t object;
+
+ vm_map_clip_end(map, entry, end);
+
+ next = entry->next;
+ s = entry->start;
+ e = entry->end;
+
+ /*
+ * Unwire before removing addresses from the pmap;
+ * otherwise, unwiring will put the entries back in
+ * the pmap.
+ */
+
+ object = entry->object.vm_object;
+ if (entry->wired_count != 0)
+ vm_map_entry_unwire(map, entry);
+
+ /*
+ * If this is a sharing map, we must remove
+ * *all* references to this data, since we can't
+ * find all of the physical maps which are sharing
+ * it.
+ */
+
+ if (object == kernel_object || object == kmem_object)
+ vm_object_page_remove(object, entry->offset,
+ entry->offset + (e - s));
+ else if (!map->is_main_map)
+ vm_object_pmap_remove(object,
+ entry->offset,
+ entry->offset + (e - s));
+ else
+ pmap_remove(map->pmap, s, e);
+
+ /*
+ * Delete the entry (which may delete the object)
+ * only after removing all pmap entries pointing
+ * to its pages. (Otherwise, its page frames may
+ * be reallocated, and any modify bits will be
+ * set in the wrong object!)
+ */
+
+ vm_map_entry_delete(map, entry);
+ entry = next;
+ }
+ return(KERN_SUCCESS);
+}
+
+/*
+ * vm_map_remove:
+ *
+ * Remove the given address range from the target map.
+ * This is the exported form of vm_map_delete.
+ */
+int
+vm_map_remove(map, start, end)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register int result;
+
+ vm_map_lock(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ result = vm_map_delete(map, start, end);
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+/*
+ * vm_map_check_protection:
+ *
+ * Assert that the target map allows the specified
+ * privilege on the entire address region given.
+ * The entire region must be allocated.
+ */
+boolean_t vm_map_check_protection(map, start, end, protection)
+ register vm_map_t map;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ register vm_prot_t protection;
+{
+ register vm_map_entry_t entry;
+ vm_map_entry_t tmp_entry;
+
+ if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
+ return(FALSE);
+ }
+
+ entry = tmp_entry;
+
+ while (start < end) {
+ if (entry == &map->header) {
+ return(FALSE);
+ }
+
+ /*
+ * No holes allowed!
+ */
+
+ if (start < entry->start) {
+ return(FALSE);
+ }
+
+ /*
+ * Check protection associated with entry.
+ */
+
+ if ((entry->protection & protection) != protection) {
+ return(FALSE);
+ }
+
+ /* go to next entry */
+
+ start = entry->end;
+ entry = entry->next;
+ }
+ return(TRUE);
+}
+
+/*
+ * vm_map_copy_entry:
+ *
+ * Copies the contents of the source entry to the destination
+ * entry. The entries *must* be aligned properly.
+ */
+void vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
+ vm_map_t src_map, dst_map;
+ register vm_map_entry_t src_entry, dst_entry;
+{
+ vm_object_t temp_object;
+
+ if (src_entry->is_sub_map || dst_entry->is_sub_map)
+ return;
+
+ if (dst_entry->object.vm_object != NULL &&
+ (dst_entry->object.vm_object->flags & OBJ_INTERNAL) == 0)
+ printf("vm_map_copy_entry: copying over permanent data!\n");
+
+ /*
+ * If our destination map was wired down,
+ * unwire it now.
+ */
+
+ if (dst_entry->wired_count != 0)
+ vm_map_entry_unwire(dst_map, dst_entry);
+
+ /*
+ * If we're dealing with a sharing map, we
+ * must remove the destination pages from
+ * all maps (since we cannot know which maps
+ * this sharing map belongs in).
+ */
+
+ if (dst_map->is_main_map)
+ pmap_remove(dst_map->pmap, dst_entry->start, dst_entry->end);
+ else
+ vm_object_pmap_remove(dst_entry->object.vm_object,
+ dst_entry->offset,
+ dst_entry->offset +
+ (dst_entry->end - dst_entry->start));
+
+ if (src_entry->wired_count == 0) {
+
+ boolean_t src_needs_copy;
+
+ /*
+ * If the source entry is marked needs_copy,
+ * it is already write-protected.
+ */
+ if (!src_entry->needs_copy) {
+
+ boolean_t su;
+
+ /*
+ * If the source entry has only one mapping,
+ * we can just protect the virtual address
+ * range.
+ */
+ if (!(su = src_map->is_main_map)) {
+ simple_lock(&src_map->ref_lock);
+ su = (src_map->ref_count == 1);
+ simple_unlock(&src_map->ref_lock);
+ }
+
+ if (su) {
+ pmap_protect(src_map->pmap,
+ src_entry->start,
+ src_entry->end,
+ src_entry->protection & ~VM_PROT_WRITE);
+ }
+ else {
+ vm_object_pmap_copy(src_entry->object.vm_object,
+ src_entry->offset,
+ src_entry->offset + (src_entry->end
+ -src_entry->start));
+ }
+ }
+
+ /*
+ * Make a copy of the object.
+ */
+ temp_object = dst_entry->object.vm_object;
+ vm_object_copy(src_entry->object.vm_object,
+ src_entry->offset,
+ (vm_size_t)(src_entry->end -
+ src_entry->start),
+ &dst_entry->object.vm_object,
+ &dst_entry->offset,
+ &src_needs_copy);
+ /*
+ * If we didn't get a copy-object now, mark the
+ * source map entry so that a shadow will be created
+ * to hold its changed pages.
+ */
+ if (src_needs_copy)
+ src_entry->needs_copy = TRUE;
+
+ /*
+ * The destination always needs to have a shadow
+ * created.
+ */
+ dst_entry->needs_copy = TRUE;
+
+ /*
+ * Mark the entries copy-on-write, so that write-enabling
+ * the entry won't make copy-on-write pages writable.
+ */
+ src_entry->copy_on_write = TRUE;
+ dst_entry->copy_on_write = TRUE;
+ /*
+ * Get rid of the old object.
+ */
+ vm_object_deallocate(temp_object);
+
+ pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
+ dst_entry->end - dst_entry->start, src_entry->start);
+ }
+ else {
+ /*
+ * Of course, wired down pages can't be set copy-on-write.
+ * Cause wired pages to be copied into the new
+ * map by simulating faults (the new pages are
+ * pageable)
+ */
+ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
+ }
+}
+
+/*
+ * vm_map_copy:
+ *
+ * Perform a virtual memory copy from the source
+ * address map/range to the destination map/range.
+ *
+ * If src_destroy or dst_alloc is requested,
+ * the source and destination regions should be
+ * disjoint, not only in the top-level map, but
+ * in the sharing maps as well. [The best way
+ * to guarantee this is to use a new intermediate
+ * map to make copies. This also reduces map
+ * fragmentation.]
+ */
+int
+vm_map_copy(dst_map, src_map,
+ dst_addr, len, src_addr,
+ dst_alloc, src_destroy)
+ vm_map_t dst_map;
+ vm_map_t src_map;
+ vm_offset_t dst_addr;
+ vm_size_t len;
+ vm_offset_t src_addr;
+ boolean_t dst_alloc;
+ boolean_t src_destroy;
+{
+ register
+ vm_map_entry_t src_entry;
+ register
+ vm_map_entry_t dst_entry;
+ vm_map_entry_t tmp_entry;
+ vm_offset_t src_start;
+ vm_offset_t src_end;
+ vm_offset_t dst_start;
+ vm_offset_t dst_end;
+ vm_offset_t src_clip;
+ vm_offset_t dst_clip;
+ int result;
+ boolean_t old_src_destroy;
+
+ /*
+ * XXX While we figure out why src_destroy screws up,
+ * we'll do it by explicitly vm_map_delete'ing at the end.
+ */
+
+ old_src_destroy = src_destroy;
+ src_destroy = FALSE;
+
+ /*
+ * Compute start and end of region in both maps
+ */
+
+ src_start = src_addr;
+ src_end = src_start + len;
+ dst_start = dst_addr;
+ dst_end = dst_start + len;
+
+ /*
+ * Check that the region can exist in both source
+ * and destination.
+ */
+
+ if ((dst_end < dst_start) || (src_end < src_start))
+ return(KERN_NO_SPACE);
+
+ /*
+ * Lock the maps in question -- we avoid deadlock
+ * by ordering lock acquisition by map value
+ */
+
+ if (src_map == dst_map) {
+ vm_map_lock(src_map);
+ }
+ else if ((int) src_map < (int) dst_map) {
+ vm_map_lock(src_map);
+ vm_map_lock(dst_map);
+ } else {
+ vm_map_lock(dst_map);
+ vm_map_lock(src_map);
+ }
+
+ result = KERN_SUCCESS;
+
+ /*
+ * Check protections... source must be completely readable and
+ * destination must be completely writable. [Note that if we're
+ * allocating the destination region, we don't have to worry
+ * about protection, but instead about whether the region
+ * exists.]
+ */
+
+ if (src_map->is_main_map && dst_map->is_main_map) {
+ if (!vm_map_check_protection(src_map, src_start, src_end,
+ VM_PROT_READ)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto Return;
+ }
+
+ if (dst_alloc) {
+ /* XXX Consider making this a vm_map_find instead */
+ if ((result = vm_map_insert(dst_map, NULL,
+ (vm_offset_t) 0, dst_start, dst_end)) != KERN_SUCCESS)
+ goto Return;
+ }
+ else if (!vm_map_check_protection(dst_map, dst_start, dst_end,
+ VM_PROT_WRITE)) {
+ result = KERN_PROTECTION_FAILURE;
+ goto Return;
+ }
+ }
+
+ /*
+ * Find the start entries and clip.
+ *
+ * Note that checking protection asserts that the
+ * lookup cannot fail.
+ *
+ * Also note that we wait to do the second lookup
+ * until we have done the first clip, as the clip
+ * may affect which entry we get!
+ */
+
+ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+ src_entry = tmp_entry;
+ vm_map_clip_start(src_map, src_entry, src_start);
+
+ (void) vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry);
+ dst_entry = tmp_entry;
+ vm_map_clip_start(dst_map, dst_entry, dst_start);
+
+ /*
+ * If both source and destination entries are the same,
+ * retry the first lookup, as it may have changed.
+ */
+
+ if (src_entry == dst_entry) {
+ (void) vm_map_lookup_entry(src_map, src_addr, &tmp_entry);
+ src_entry = tmp_entry;
+ }
+
+ /*
+ * If source and destination entries are still the same,
+ * a null copy is being performed.
+ */
+
+ if (src_entry == dst_entry)
+ goto Return;
+
+ /*
+ * Go through entries until we get to the end of the
+ * region.
+ */
+
+ while (src_start < src_end) {
+ /*
+ * Clip the entries to the endpoint of the entire region.
+ */
+
+ vm_map_clip_end(src_map, src_entry, src_end);
+ vm_map_clip_end(dst_map, dst_entry, dst_end);
+
+ /*
+ * Clip each entry to the endpoint of the other entry.
+ */
+
+ src_clip = src_entry->start + (dst_entry->end - dst_entry->start);
+ vm_map_clip_end(src_map, src_entry, src_clip);
+
+ dst_clip = dst_entry->start + (src_entry->end - src_entry->start);
+ vm_map_clip_end(dst_map, dst_entry, dst_clip);
+
+ /*
+ * Both entries now match in size and relative endpoints.
+ *
+ * If both entries refer to a VM object, we can
+ * deal with them now.
+ */
+
+ if (!src_entry->is_a_map && !dst_entry->is_a_map) {
+ vm_map_copy_entry(src_map, dst_map, src_entry,
+ dst_entry);
+ }
+ else {
+ register vm_map_t new_dst_map;
+ vm_offset_t new_dst_start;
+ vm_size_t new_size;
+ vm_map_t new_src_map;
+ vm_offset_t new_src_start;
+
+ /*
+ * We have to follow at least one sharing map.
+ */
+
+ new_size = (dst_entry->end - dst_entry->start);
+
+ if (src_entry->is_a_map) {
+ new_src_map = src_entry->object.share_map;
+ new_src_start = src_entry->offset;
+ }
+ else {
+ new_src_map = src_map;
+ new_src_start = src_entry->start;
+ lock_set_recursive(&src_map->lock);
+ }
+
+ if (dst_entry->is_a_map) {
+ vm_offset_t new_dst_end;
+
+ new_dst_map = dst_entry->object.share_map;
+ new_dst_start = dst_entry->offset;
+
+ /*
+ * Since the destination sharing entries
+ * will be merely deallocated, we can
+ * do that now, and replace the region
+ * with a null object. [This prevents
+ * splitting the source map to match
+ * the form of the destination map.]
+ * Note that we can only do so if the
+ * source and destination do not overlap.
+ */
+
+ new_dst_end = new_dst_start + new_size;
+
+ if (new_dst_map != new_src_map) {
+ vm_map_lock(new_dst_map);
+ (void) vm_map_delete(new_dst_map,
+ new_dst_start,
+ new_dst_end);
+ (void) vm_map_insert(new_dst_map,
+ NULL,
+ (vm_offset_t) 0,
+ new_dst_start,
+ new_dst_end);
+ vm_map_unlock(new_dst_map);
+ }
+ }
+ else {
+ new_dst_map = dst_map;
+ new_dst_start = dst_entry->start;
+ lock_set_recursive(&dst_map->lock);
+ }
+
+ /*
+ * Recursively copy the sharing map.
+ */
+
+ (void) vm_map_copy(new_dst_map, new_src_map,
+ new_dst_start, new_size, new_src_start,
+ FALSE, FALSE);
+
+ if (dst_map == new_dst_map)
+ lock_clear_recursive(&dst_map->lock);
+ if (src_map == new_src_map)
+ lock_clear_recursive(&src_map->lock);
+ }
+
+ /*
+ * Update variables for next pass through the loop.
+ */
+
+ src_start = src_entry->end;
+ src_entry = src_entry->next;
+ dst_start = dst_entry->end;
+ dst_entry = dst_entry->next;
+
+ /*
+ * If the source is to be destroyed, here is the
+ * place to do it.
+ */
+
+ if (src_destroy && src_map->is_main_map &&
+ dst_map->is_main_map)
+ vm_map_entry_delete(src_map, src_entry->prev);
+ }
+
+ /*
+ * Update the physical maps as appropriate
+ */
+
+ if (src_map->is_main_map && dst_map->is_main_map) {
+ if (src_destroy)
+ pmap_remove(src_map->pmap, src_addr, src_addr + len);
+ }
+
+ /*
+ * Unlock the maps
+ */
+
+ Return: ;
+
+ if (old_src_destroy)
+ vm_map_delete(src_map, src_addr, src_addr + len);
+
+ vm_map_unlock(src_map);
+ if (src_map != dst_map)
+ vm_map_unlock(dst_map);
+
+ return(result);
+}
+
+/*
+ * vmspace_fork:
+ * Create a new process vmspace structure and vm_map
+ * based on those of an existing process. The new map
+ * is based on the old map, according to the inheritance
+ * values on the regions in that map.
+ *
+ * The source map must not be locked.
+ */
+struct vmspace *
+vmspace_fork(vm1)
+ register struct vmspace *vm1;
+{
+ register struct vmspace *vm2;
+ vm_map_t old_map = &vm1->vm_map;
+ vm_map_t new_map;
+ vm_map_entry_t old_entry;
+ vm_map_entry_t new_entry;
+ pmap_t new_pmap;
+
+ vm_map_lock(old_map);
+
+ vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
+ old_map->entries_pageable);
+ bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
+ (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+ new_pmap = &vm2->vm_pmap; /* XXX */
+ new_map = &vm2->vm_map; /* XXX */
+
+ old_entry = old_map->header.next;
+
+ while (old_entry != &old_map->header) {
+ if (old_entry->is_sub_map)
+ panic("vm_map_fork: encountered a submap");
+
+ switch (old_entry->inheritance) {
+ case VM_INHERIT_NONE:
+ break;
+
+ case VM_INHERIT_SHARE:
+ /*
+ * If we don't already have a sharing map:
+ */
+
+ if (!old_entry->is_a_map) {
+ vm_map_t new_share_map;
+ vm_map_entry_t new_share_entry;
+
+ /*
+ * Create a new sharing map
+ */
+
+ new_share_map = vm_map_create(NULL,
+ old_entry->start,
+ old_entry->end,
+ TRUE);
+ new_share_map->is_main_map = FALSE;
+
+ /*
+ * Create the only sharing entry from the
+ * old task map entry.
+ */
+
+ new_share_entry =
+ vm_map_entry_create(new_share_map);
+ *new_share_entry = *old_entry;
+ new_share_entry->wired_count = 0;
+
+ /*
+ * Insert the entry into the new sharing
+ * map
+ */
+
+ vm_map_entry_link(new_share_map,
+ new_share_map->header.prev,
+ new_share_entry);
+
+ /*
+ * Fix up the task map entry to refer
+ * to the sharing map now.
+ */
+
+ old_entry->is_a_map = TRUE;
+ old_entry->object.share_map = new_share_map;
+ old_entry->offset = old_entry->start;
+ }
+
+ /*
+ * Clone the entry, referencing the sharing map.
+ */
+
+ new_entry = vm_map_entry_create(new_map);
+ *new_entry = *old_entry;
+ new_entry->wired_count = 0;
+ vm_map_reference(new_entry->object.share_map);
+
+ /*
+ * Insert the entry into the new map -- we
+ * know we're inserting at the end of the new
+ * map.
+ */
+
+ vm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+
+ /*
+ * Update the physical map
+ */
+
+ pmap_copy(new_map->pmap, old_map->pmap,
+ new_entry->start,
+ (old_entry->end - old_entry->start),
+ old_entry->start);
+ break;
+
+ case VM_INHERIT_COPY:
+ /*
+ * Clone the entry and link into the map.
+ */
+
+ new_entry = vm_map_entry_create(new_map);
+ *new_entry = *old_entry;
+ new_entry->wired_count = 0;
+ new_entry->object.vm_object = NULL;
+ new_entry->is_a_map = FALSE;
+ vm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+ if (old_entry->is_a_map) {
+ int check;
+
+ check = vm_map_copy(new_map,
+ old_entry->object.share_map,
+ new_entry->start,
+ (vm_size_t)(new_entry->end -
+ new_entry->start),
+ old_entry->offset,
+ FALSE, FALSE);
+ if (check != KERN_SUCCESS)
+ printf("vm_map_fork: copy in share_map region failed\n");
+ }
+ else {
+ vm_map_copy_entry(old_map, new_map, old_entry,
+ new_entry);
+ }
+ break;
+ }
+ old_entry = old_entry->next;
+ }
+
+ new_map->size = old_map->size;
+ vm_map_unlock(old_map);
+
+ return(vm2);
+}
+
+/*
+ * vm_map_lookup:
+ *
+ * Finds the VM object, offset, and
+ * protection for a given virtual address in the
+ * specified map, assuming a page fault of the
+ * type specified.
+ *
+ * Leaves the map in question locked for read; return
+ * values are guaranteed until a vm_map_lookup_done
+ * call is performed. Note that the map argument
+ * is in/out; the returned map must be used in
+ * the call to vm_map_lookup_done.
+ *
+ * A handle (out_entry) is returned for use in
+ * vm_map_lookup_done, to make that fast.
+ *
+ * If a lookup is requested with "write protection"
+ * specified, the map may be changed to perform virtual
+ * copying operations, although the data referenced will
+ * remain the same.
+ */
+int
+vm_map_lookup(var_map, vaddr, fault_type, out_entry,
+ object, offset, out_prot, wired, single_use)
+ vm_map_t *var_map; /* IN/OUT */
+ register vm_offset_t vaddr;
+ register vm_prot_t fault_type;
+
+ vm_map_entry_t *out_entry; /* OUT */
+ vm_object_t *object; /* OUT */
+ vm_offset_t *offset; /* OUT */
+ vm_prot_t *out_prot; /* OUT */
+ boolean_t *wired; /* OUT */
+ boolean_t *single_use; /* OUT */
+{
+ vm_map_t share_map;
+ vm_offset_t share_offset;
+ register vm_map_entry_t entry;
+ register vm_map_t map = *var_map;
+ register vm_prot_t prot;
+ register boolean_t su;
+
+ RetryLookup: ;
+
+ /*
+ * Lookup the faulting address.
+ */
+
+ vm_map_lock_read(map);
+
+#define RETURN(why) \
+ { \
+ vm_map_unlock_read(map); \
+ return(why); \
+ }
+
+ /*
+ * If the map has an interesting hint, try it before calling
+ * full blown lookup routine.
+ */
+
+ simple_lock(&map->hint_lock);
+ entry = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ *out_entry = entry;
+
+ if ((entry == &map->header) ||
+ (vaddr < entry->start) || (vaddr >= entry->end)) {
+ vm_map_entry_t tmp_entry;
+
+ /*
+ * Entry was either not a valid hint, or the vaddr
+ * was not contained in the entry, so do a full lookup.
+ */
+ if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
+ RETURN(KERN_INVALID_ADDRESS);
+
+ entry = tmp_entry;
+ *out_entry = entry;
+ }
+
+ /*
+ * Handle submaps.
+ */
+
+ if (entry->is_sub_map) {
+ vm_map_t old_map = map;
+
+ *var_map = map = entry->object.sub_map;
+ vm_map_unlock_read(old_map);
+ goto RetryLookup;
+ }
+
+ /*
+ * Check whether this task is allowed to have
+ * this page.
+ */
+
+ prot = entry->protection;
+ if ((fault_type & (prot)) != fault_type)
+ RETURN(KERN_PROTECTION_FAILURE);
+
+ /*
+ * If this page is not pageable, we have to get
+ * it for all possible accesses.
+ */
+
+ if (*wired = (entry->wired_count != 0))
+ prot = fault_type = entry->protection;
+
+ /*
+ * If we don't already have a VM object, track
+ * it down.
+ */
+
+ if (su = !entry->is_a_map) {
+ share_map = map;
+ share_offset = vaddr;
+ }
+ else {
+ vm_map_entry_t share_entry;
+
+ /*
+ * Compute the sharing map, and offset into it.
+ */
+
+ share_map = entry->object.share_map;
+ share_offset = (vaddr - entry->start) + entry->offset;
+
+ /*
+ * Look for the backing store object and offset
+ */
+
+ vm_map_lock_read(share_map);
+
+ if (!vm_map_lookup_entry(share_map, share_offset,
+ &share_entry)) {
+ vm_map_unlock_read(share_map);
+ RETURN(KERN_INVALID_ADDRESS);
+ }
+ entry = share_entry;
+ }
+
+ /*
+ * If the entry was copy-on-write, we either ...
+ */
+
+ if (entry->needs_copy) {
+ /*
+ * If we want to write the page, we may as well
+ * handle that now since we've got the sharing
+ * map locked.
+ *
+ * If we don't need to write the page, we just
+ * demote the permissions allowed.
+ */
+
+ if (fault_type & VM_PROT_WRITE) {
+ /*
+ * Make a new object, and place it in the
+ * object chain. Note that no new references
+ * have appeared -- one just moved from the
+ * share map to the new object.
+ */
+
+ if (lock_read_to_write(&share_map->lock)) {
+ if (share_map != map)
+ vm_map_unlock_read(map);
+ goto RetryLookup;
+ }
+
+ vm_object_shadow(
+ &entry->object.vm_object,
+ &entry->offset,
+ (vm_size_t) (entry->end - entry->start));
+
+ entry->needs_copy = FALSE;
+
+ lock_write_to_read(&share_map->lock);
+ }
+ else {
+ /*
+ * We're attempting to read a copy-on-write
+ * page -- don't allow writes.
+ */
+
+ prot &= (~VM_PROT_WRITE);
+ }
+ }
+
+ /*
+ * Create an object if necessary.
+ */
+ if (entry->object.vm_object == NULL) {
+
+ if (lock_read_to_write(&share_map->lock)) {
+ if (share_map != map)
+ vm_map_unlock_read(map);
+ goto RetryLookup;
+ }
+
+ entry->object.vm_object = vm_object_allocate(
+ (vm_size_t)(entry->end - entry->start));
+ entry->offset = 0;
+ lock_write_to_read(&share_map->lock);
+ }
+
+ /*
+ * Return the object/offset from this entry. If the entry
+ * was copy-on-write or empty, it has been fixed up.
+ */
+
+ *offset = (share_offset - entry->start) + entry->offset;
+ *object = entry->object.vm_object;
+
+ /*
+ * Return whether this is the only map sharing this data.
+ */
+
+ if (!su) {
+ simple_lock(&share_map->ref_lock);
+ su = (share_map->ref_count == 1);
+ simple_unlock(&share_map->ref_lock);
+ }
+
+ *out_prot = prot;
+ *single_use = su;
+
+ return(KERN_SUCCESS);
+
+#undef RETURN
+}
+
+/*
+ * vm_map_lookup_done:
+ *
+ * Releases locks acquired by a vm_map_lookup
+ * (according to the handle returned by that lookup).
+ */
+
+void vm_map_lookup_done(map, entry)
+ register vm_map_t map;
+ vm_map_entry_t entry;
+{
+ /*
+ * If this entry references a map, unlock it first.
+ */
+
+ if (entry->is_a_map)
+ vm_map_unlock_read(entry->object.share_map);
+
+ /*
+ * Unlock the main-level map
+ */
+
+ vm_map_unlock_read(map);
+}
+
+/*
+ * Routine: vm_map_simplify
+ * Purpose:
+ * Attempt to simplify the map representation in
+ * the vicinity of the given starting address.
+ * Note:
+ * This routine is intended primarily to keep the
+ * kernel maps more compact -- they generally don't
+ * benefit from the "expand a map entry" technology
+ * at allocation time because the adjacent entry
+ * is often wired down.
+ */
+void vm_map_simplify(map, start)
+ vm_map_t map;
+ vm_offset_t start;
+{
+ vm_map_entry_t this_entry;
+ vm_map_entry_t prev_entry;
+
+ vm_map_lock(map);
+ if (
+ (vm_map_lookup_entry(map, start, &this_entry)) &&
+ ((prev_entry = this_entry->prev) != &map->header) &&
+
+ (prev_entry->end == start) &&
+ (map->is_main_map) &&
+
+ (prev_entry->is_a_map == FALSE) &&
+ (prev_entry->is_sub_map == FALSE) &&
+
+ (this_entry->is_a_map == FALSE) &&
+ (this_entry->is_sub_map == FALSE) &&
+
+ (prev_entry->inheritance == this_entry->inheritance) &&
+ (prev_entry->protection == this_entry->protection) &&
+ (prev_entry->max_protection == this_entry->max_protection) &&
+ (prev_entry->wired_count == this_entry->wired_count) &&
+
+ (prev_entry->copy_on_write == this_entry->copy_on_write) &&
+ (prev_entry->needs_copy == this_entry->needs_copy) &&
+
+ (prev_entry->object.vm_object == this_entry->object.vm_object) &&
+ ((prev_entry->offset + (prev_entry->end - prev_entry->start))
+ == this_entry->offset)
+ ) {
+ if (map->first_free == this_entry)
+ map->first_free = prev_entry;
+
+ if (!this_entry->object.vm_object->paging_in_progress) {
+ SAVE_HINT(map, prev_entry);
+ vm_map_entry_unlink(map, this_entry);
+ prev_entry->end = this_entry->end;
+ vm_object_deallocate(this_entry->object.vm_object);
+ vm_map_entry_dispose(map, this_entry);
+ }
+ }
+ vm_map_unlock(map);
+}
+
+/*
+ * vm_map_print: [ debug ]
+ */
+void vm_map_print(map, full)
+ register vm_map_t map;
+ boolean_t full;
+{
+ register vm_map_entry_t entry;
+ extern int indent;
+
+ iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
+ (map->is_main_map ? "Task" : "Share"),
+ (int) map, (int) (map->pmap), map->ref_count, map->nentries,
+ map->timestamp);
+
+ if (!full && indent)
+ return;
+
+ indent += 2;
+ for (entry = map->header.next; entry != &map->header;
+ entry = entry->next) {
+ iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
+ (int) entry, (int) entry->start, (int) entry->end);
+ if (map->is_main_map) {
+ static char *inheritance_name[4] =
+ { "share", "copy", "none", "donate_copy"};
+ printf("prot=%x/%x/%s, ",
+ entry->protection,
+ entry->max_protection,
+ inheritance_name[entry->inheritance]);
+ if (entry->wired_count != 0)
+ printf("wired, ");
+ }
+
+ if (entry->is_a_map || entry->is_sub_map) {
+ printf("share=0x%x, offset=0x%x\n",
+ (int) entry->object.share_map,
+ (int) entry->offset);
+ if ((entry->prev == &map->header) ||
+ (!entry->prev->is_a_map) ||
+ (entry->prev->object.share_map !=
+ entry->object.share_map)) {
+ indent += 2;
+ vm_map_print(entry->object.share_map, full);
+ indent -= 2;
+ }
+
+ }
+ else {
+ printf("object=0x%x, offset=0x%x",
+ (int) entry->object.vm_object,
+ (int) entry->offset);
+ if (entry->copy_on_write)
+ printf(", copy (%s)",
+ entry->needs_copy ? "needed" : "done");
+ printf("\n");
+
+ if ((entry->prev == &map->header) ||
+ (entry->prev->is_a_map) ||
+ (entry->prev->object.vm_object !=
+ entry->object.vm_object)) {
+ indent += 2;
+ vm_object_print(entry->object.vm_object, full);
+ indent -= 2;
+ }
+ }
+ }
+ indent -= 2;
+}
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
new file mode 100644
index 0000000..ee253ef
--- /dev/null
+++ b/sys/vm/vm_map.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.h 8.3 (Berkeley) 3/15/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory map module definitions.
+ */
+
+#ifndef _VM_MAP_
+#define _VM_MAP_
+
+/*
+ * Types defined:
+ *
+ * vm_map_t the high-level address map data structure.
+ * vm_map_entry_t an entry in an address map.
+ * vm_map_version_t a timestamp of a map, for use with vm_map_lookup
+ */
+
+/*
+ * Objects which live in maps may be either VM objects, or
+ * another map (called a "sharing map") which denotes read-write
+ * sharing with other maps.
+ */
+
+union vm_map_object {
+ struct vm_object *vm_object; /* object object */
+ struct vm_map *share_map; /* share map */
+ struct vm_map *sub_map; /* belongs to another map */
+};
+
+/*
+ * Address map entries consist of start and end addresses,
+ * a VM object (or sharing map) and offset into that object,
+ * and user-exported inheritance and protection information.
+ * Also included is control information for virtual copy operations.
+ */
+struct vm_map_entry {
+ struct vm_map_entry *prev; /* previous entry */
+ struct vm_map_entry *next; /* next entry */
+ vm_offset_t start; /* start address */
+ vm_offset_t end; /* end address */
+ union vm_map_object object; /* object I point to */
+ vm_offset_t offset; /* offset into object */
+ boolean_t is_a_map:1, /* Is "object" a map? */
+ is_sub_map:1, /* Is "object" a submap? */
+ /* Only in sharing maps: */
+ copy_on_write:1,/* is data copy-on-write */
+ needs_copy:1; /* does object need to be copied */
+ /* Only in task maps: */
+ vm_prot_t protection; /* protection code */
+ vm_prot_t max_protection; /* maximum protection */
+ vm_inherit_t inheritance; /* inheritance */
+ int wired_count; /* can be paged if = 0 */
+};
+
+/*
+ * Maps are doubly-linked lists of map entries, kept sorted
+ * by address. A single hint is provided to start
+ * searches again from the last successful search,
+ * insertion, or removal.
+ */
+struct vm_map {
+ struct pmap * pmap; /* Physical map */
+ lock_data_t lock; /* Lock for map data */
+ struct vm_map_entry header; /* List of entries */
+ int nentries; /* Number of entries */
+ vm_size_t size; /* virtual size */
+ boolean_t is_main_map; /* Am I a main map? */
+ int ref_count; /* Reference count */
+ simple_lock_data_t ref_lock; /* Lock for ref_count field */
+ vm_map_entry_t hint; /* hint for quick lookups */
+ simple_lock_data_t hint_lock; /* lock for hint storage */
+ vm_map_entry_t first_free; /* First free space hint */
+ boolean_t entries_pageable; /* map entries pageable?? */
+ unsigned int timestamp; /* Version number */
+#define min_offset header.start
+#define max_offset header.end
+};
+
+/*
+ * Map versions are used to validate a previous lookup attempt.
+ *
+ * Since lookup operations may involve both a main map and
+ * a sharing map, it is necessary to have a timestamp from each.
+ * [If the main map timestamp has changed, the share_map and
+ * associated timestamp are no longer valid; the map version
+ * does not include a reference for the imbedded share_map.]
+ */
+typedef struct {
+ int main_timestamp;
+ vm_map_t share_map;
+ int share_timestamp;
+} vm_map_version_t;
+
+/*
+ * Macros: vm_map_lock, etc.
+ * Function:
+ * Perform locking on the data portion of a map.
+ */
+
+#define vm_map_lock(map) { \
+ lock_write(&(map)->lock); \
+ (map)->timestamp++; \
+}
+#define vm_map_unlock(map) lock_write_done(&(map)->lock)
+#define vm_map_lock_read(map) lock_read(&(map)->lock)
+#define vm_map_unlock_read(map) lock_read_done(&(map)->lock)
+
+/*
+ * Functions implemented as macros
+ */
+#define vm_map_min(map) ((map)->min_offset)
+#define vm_map_max(map) ((map)->max_offset)
+#define vm_map_pmap(map) ((map)->pmap)
+
+/* XXX: number of kernel maps and entries to statically allocate */
+#define MAX_KMAP 10
+#define MAX_KMAPENT 128
+
+#ifdef KERNEL
+boolean_t vm_map_check_protection __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_prot_t));
+int vm_map_copy __P((vm_map_t, vm_map_t, vm_offset_t,
+ vm_size_t, vm_offset_t, boolean_t, boolean_t));
+void vm_map_copy_entry __P((vm_map_t,
+ vm_map_t, vm_map_entry_t, vm_map_entry_t));
+struct pmap;
+vm_map_t vm_map_create __P((struct pmap *,
+ vm_offset_t, vm_offset_t, boolean_t));
+void vm_map_deallocate __P((vm_map_t));
+int vm_map_delete __P((vm_map_t, vm_offset_t, vm_offset_t));
+vm_map_entry_t vm_map_entry_create __P((vm_map_t));
+void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
+void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
+void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+int vm_map_find __P((vm_map_t, vm_object_t,
+ vm_offset_t, vm_offset_t *, vm_size_t, boolean_t));
+int vm_map_findspace __P((vm_map_t,
+ vm_offset_t, vm_size_t, vm_offset_t *));
+int vm_map_inherit __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_inherit_t));
+void vm_map_init __P((struct vm_map *,
+ vm_offset_t, vm_offset_t, boolean_t));
+int vm_map_insert __P((vm_map_t,
+ vm_object_t, vm_offset_t, vm_offset_t, vm_offset_t));
+int vm_map_lookup __P((vm_map_t *, vm_offset_t, vm_prot_t,
+ vm_map_entry_t *, vm_object_t *, vm_offset_t *, vm_prot_t *,
+ boolean_t *, boolean_t *));
+void vm_map_lookup_done __P((vm_map_t, vm_map_entry_t));
+boolean_t vm_map_lookup_entry __P((vm_map_t,
+ vm_offset_t, vm_map_entry_t *));
+int vm_map_pageable __P((vm_map_t,
+ vm_offset_t, vm_offset_t, boolean_t));
+int vm_map_clean __P((vm_map_t,
+ vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void vm_map_print __P((vm_map_t, boolean_t));
+int vm_map_protect __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_prot_t, boolean_t));
+void vm_map_reference __P((vm_map_t));
+int vm_map_remove __P((vm_map_t, vm_offset_t, vm_offset_t));
+void vm_map_simplify __P((vm_map_t, vm_offset_t));
+void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t));
+void vm_map_startup __P((void));
+int vm_map_submap __P((vm_map_t,
+ vm_offset_t, vm_offset_t, vm_map_t));
+#endif
+#endif /* _VM_MAP_ */
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
new file mode 100644
index 0000000..2a8029b
--- /dev/null
+++ b/sys/vm/vm_meter.c
@@ -0,0 +1,225 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+struct loadavg averunnable; /* load average, of runnable procs */
+
+int maxslp = MAXSLP;
+int saferss = SAFERSS;
+
+void
+vmmeter()
+{
+
+ if (time.tv_sec % 5 == 0)
+ loadav(&averunnable);
+ if (proc0.p_slptime > maxslp/2)
+ wakeup((caddr_t)&proc0);
+}
+
+/*
+ * Constants for averages over 1, 5, and 15 minutes
+ * when sampling at 5 second intervals.
+ */
+fixpt_t cexp[3] = {
+ 0.9200444146293232 * FSCALE, /* exp(-1/12) */
+ 0.9834714538216174 * FSCALE, /* exp(-1/60) */
+ 0.9944598480048967 * FSCALE, /* exp(-1/180) */
+};
+
+/*
+ * Compute a tenex style load average of a quantity on
+ * 1, 5 and 15 minute intervals.
+ */
+void
+loadav(avg)
+ register struct loadavg *avg;
+{
+ register int i, nrun;
+ register struct proc *p;
+
+ for (nrun = 0, p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ switch (p->p_stat) {
+ case SSLEEP:
+ if (p->p_priority > PZERO || p->p_slptime != 0)
+ continue;
+ /* fall through */
+ case SRUN:
+ case SIDL:
+ nrun++;
+ }
+ }
+ for (i = 0; i < 3; i++)
+ avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
+ nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+}
+
+/*
+ * Attributes associated with virtual memory.
+ */
+int
+vm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct vmtotal vmtotals;
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case VM_LOADAVG:
+ averunnable.fscale = FSCALE;
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
+ sizeof(averunnable)));
+ case VM_METER:
+ vmtotal(&vmtotals);
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
+ sizeof(vmtotals)));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Calculate the current state of the system.
+ * Done on demand from getkerninfo().
+ */
+void
+vmtotal(totalp)
+ register struct vmtotal *totalp;
+{
+ register struct proc *p;
+ register vm_map_entry_t entry;
+ register vm_object_t object;
+ register vm_map_t map;
+ int paging;
+
+ bzero(totalp, sizeof *totalp);
+ /*
+ * Mark all objects as inactive.
+ */
+ simple_lock(&vm_object_list_lock);
+ for (object = vm_object_list.tqh_first;
+ object != NULL;
+ object = object->object_list.tqe_next)
+ object->flags &= ~OBJ_ACTIVE;
+ simple_unlock(&vm_object_list_lock);
+ /*
+ * Calculate process statistics.
+ */
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ if (p->p_flag & P_SYSTEM)
+ continue;
+ switch (p->p_stat) {
+ case 0:
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ if (p->p_flag & P_INMEM) {
+ if (p->p_priority <= PZERO)
+ totalp->t_dw++;
+ else if (p->p_slptime < maxslp)
+ totalp->t_sl++;
+ } else if (p->p_slptime < maxslp)
+ totalp->t_sw++;
+ if (p->p_slptime >= maxslp)
+ continue;
+ break;
+
+ case SRUN:
+ case SIDL:
+ if (p->p_flag & P_INMEM)
+ totalp->t_rq++;
+ else
+ totalp->t_sw++;
+ if (p->p_stat == SIDL)
+ continue;
+ break;
+ }
+ /*
+ * Note active objects.
+ */
+ paging = 0;
+ for (map = &p->p_vmspace->vm_map, entry = map->header.next;
+ entry != &map->header; entry = entry->next) {
+ if (entry->is_a_map || entry->is_sub_map ||
+ entry->object.vm_object == NULL)
+ continue;
+ entry->object.vm_object->flags |= OBJ_ACTIVE;
+ paging |= entry->object.vm_object->paging_in_progress;
+ }
+ if (paging)
+ totalp->t_pw++;
+ }
+ /*
+ * Calculate object memory usage statistics.
+ */
+ simple_lock(&vm_object_list_lock);
+ for (object = vm_object_list.tqh_first;
+ object != NULL;
+ object = object->object_list.tqe_next) {
+ totalp->t_vm += num_pages(object->size);
+ totalp->t_rm += object->resident_page_count;
+ if (object->flags & OBJ_ACTIVE) {
+ totalp->t_avm += num_pages(object->size);
+ totalp->t_arm += object->resident_page_count;
+ }
+ if (object->ref_count > 1) {
+ /* shared object */
+ totalp->t_vmshr += num_pages(object->size);
+ totalp->t_rmshr += object->resident_page_count;
+ if (object->flags & OBJ_ACTIVE) {
+ totalp->t_avmshr += num_pages(object->size);
+ totalp->t_armshr += object->resident_page_count;
+ }
+ }
+ }
+ totalp->t_free = cnt.v_free_count;
+}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
new file mode 100644
index 0000000..2e7204a
--- /dev/null
+++ b/sys/vm/vm_mmap.c
@@ -0,0 +1,836 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
+ *
+ * @(#)vm_mmap.c 8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Mapped file (mmap) interface to VM
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/mman.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_prot.h>
+
+#ifdef DEBUG
+int mmapdebug = 0;
+#define MDB_FOLLOW 0x01
+#define MDB_SYNC 0x02
+#define MDB_MAPIT 0x04
+#endif
+
+struct sbrk_args {
+ int incr;
+};
+/* ARGSUSED */
+int
+sbrk(p, uap, retval)
+ struct proc *p;
+ struct sbrk_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct sstk_args {
+ int incr;
+};
+/* ARGSUSED */
+int
+sstk(p, uap, retval)
+ struct proc *p;
+ struct sstk_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+struct getpagesize_args {
+ int dummy;
+};
+/* ARGSUSED */
+int
+ogetpagesize(p, uap, retval)
+ struct proc *p;
+ struct getpagesize_args *uap;
+ int *retval;
+{
+
+ *retval = PAGE_SIZE;
+ return (0);
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+struct mmap_args {
+ caddr_t addr;
+ size_t len;
+ int prot;
+ int flags;
+ int fd;
+ long pad;
+ off_t pos;
+};
+
+#ifdef COMPAT_43
+struct ommap_args {
+ caddr_t addr;
+ int len;
+ int prot;
+ int flags;
+ int fd;
+ long pos;
+};
+int
+ommap(p, uap, retval)
+ struct proc *p;
+ register struct ommap_args *uap;
+ int *retval;
+{
+ struct mmap_args nargs;
+ static const char cvtbsdprot[8] = {
+ 0,
+ PROT_EXEC,
+ PROT_WRITE,
+ PROT_EXEC|PROT_WRITE,
+ PROT_READ,
+ PROT_EXEC|PROT_READ,
+ PROT_WRITE|PROT_READ,
+ PROT_EXEC|PROT_WRITE|PROT_READ,
+ };
+#define OMAP_ANON 0x0002
+#define OMAP_COPY 0x0020
+#define OMAP_SHARED 0x0010
+#define OMAP_FIXED 0x0100
+#define OMAP_INHERIT 0x0800
+
+ nargs.addr = uap->addr;
+ nargs.len = uap->len;
+ nargs.prot = cvtbsdprot[uap->prot&0x7];
+ nargs.flags = 0;
+ if (uap->flags & OMAP_ANON)
+ nargs.flags |= MAP_ANON;
+ if (uap->flags & OMAP_COPY)
+ nargs.flags |= MAP_COPY;
+ if (uap->flags & OMAP_SHARED)
+ nargs.flags |= MAP_SHARED;
+ else
+ nargs.flags |= MAP_PRIVATE;
+ if (uap->flags & OMAP_FIXED)
+ nargs.flags |= MAP_FIXED;
+ if (uap->flags & OMAP_INHERIT)
+ nargs.flags |= MAP_INHERIT;
+ nargs.fd = uap->fd;
+ nargs.pos = uap->pos;
+ return (mmap(p, &nargs, retval));
+}
+#endif
+
+int
+mmap(p, uap, retval)
+ struct proc *p;
+ register struct mmap_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vnode *vp;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_prot_t prot, maxprot;
+ caddr_t handle;
+ int flags, error;
+
+ prot = uap->prot & VM_PROT_ALL;
+ flags = uap->flags;
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
+ p->p_pid, uap->addr, uap->len, prot,
+ flags, uap->fd, (vm_offset_t)uap->pos);
+#endif
+ /*
+ * Address (if FIXED) must be page aligned.
+ * Size is implicitly rounded to a page boundary.
+ */
+ addr = (vm_offset_t) uap->addr;
+ if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
+ (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
+ return (EINVAL);
+ size = (vm_size_t) round_page(uap->len);
+ /*
+ * Check for illegal addresses. Watch out for address wrap...
+ * Note that VM_*_ADDRESS are not constants due to casts (argh).
+ */
+ if (flags & MAP_FIXED) {
+ if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+#ifndef i386
+ if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+ return (EINVAL);
+#endif
+ if (addr > addr + size)
+ return (EINVAL);
+ }
+ /*
+ * XXX if no hint provided for a non-fixed mapping place it after
+ * the end of the largest possible heap.
+ *
+ * There should really be a pmap call to determine a reasonable
+ * location.
+ */
+ if (addr == 0 && (flags & MAP_FIXED) == 0)
+ addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
+ if (flags & MAP_ANON) {
+ /*
+ * Mapping blank space is trivial.
+ */
+ handle = NULL;
+ maxprot = VM_PROT_ALL;
+ } else {
+ /*
+ * Mapping file, get fp for validation.
+ * Obtain vnode and make sure it is of appropriate type.
+ */
+ if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VREG && vp->v_type != VCHR)
+ return (EINVAL);
+ /*
+ * XXX hack to handle use of /dev/zero to map anon
+ * memory (ala SunOS).
+ */
+ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+ handle = NULL;
+ maxprot = VM_PROT_ALL;
+ flags |= MAP_ANON;
+ } else {
+ /*
+ * Ensure that file and memory protections are
+ * compatible. Note that we only worry about
+ * writability if mapping is shared; in this case,
+ * current and max prot are dictated by the open file.
+ * XXX use the vnode instead? Problem is: what
+ * credentials do we use for determination?
+ * What if proc does a setuid?
+ */
+ maxprot = VM_PROT_EXECUTE; /* ??? */
+ if (fp->f_flag & FREAD)
+ maxprot |= VM_PROT_READ;
+ else if (prot & PROT_READ)
+ return (EACCES);
+ if (flags & MAP_SHARED) {
+ if (fp->f_flag & FWRITE)
+ maxprot |= VM_PROT_WRITE;
+ else if (prot & PROT_WRITE)
+ return (EACCES);
+ } else
+ maxprot |= VM_PROT_WRITE;
+ handle = (caddr_t)vp;
+ }
+ }
+ error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+ flags, handle, (vm_offset_t)uap->pos);
+ if (error == 0)
+ *retval = (int)addr;
+ return (error);
+}
+
+struct msync_args {
+ caddr_t addr;
+ int len;
+};
+int
+msync(p, uap, retval)
+ struct proc *p;
+ struct msync_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_map_t map;
+ int rv;
+ boolean_t syncio, invalidate;
+
+#ifdef DEBUG
+ if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
+ printf("msync(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+ map = &p->p_vmspace->vm_map;
+ addr = (vm_offset_t)uap->addr;
+ size = (vm_size_t)uap->len;
+ /*
+ * XXX Gak! If size is zero we are supposed to sync "all modified
+ * pages with the region containing addr". Unfortunately, we
+ * don't really keep track of individual mmaps so we approximate
+ * by flushing the range of the map entry containing addr.
+ * This can be incorrect if the region splits or is coalesced
+ * with a neighbor.
+ */
+ if (size == 0) {
+ vm_map_entry_t entry;
+
+ vm_map_lock_read(map);
+ rv = vm_map_lookup_entry(map, addr, &entry);
+ vm_map_unlock_read(map);
+ if (rv)
+ return (EINVAL);
+ addr = entry->start;
+ size = entry->end - entry->start;
+ }
+#ifdef DEBUG
+ if (mmapdebug & MDB_SYNC)
+ printf("msync: cleaning/flushing address range [%x-%x)\n",
+ addr, addr+size);
+#endif
+ /*
+ * Could pass this in as a third flag argument to implement
+ * Sun's MS_ASYNC.
+ */
+ syncio = TRUE;
+ /*
+ * XXX bummer, gotta flush all cached pages to ensure
+ * consistency with the file system cache. Otherwise, we could
+ * pass this in to implement Sun's MS_INVALIDATE.
+ */
+ invalidate = TRUE;
+ /*
+ * Clean the pages and interpret the return value.
+ */
+ rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
+ switch (rv) {
+ case KERN_SUCCESS:
+ break;
+ case KERN_INVALID_ADDRESS:
+ return (EINVAL); /* Sun returns ENOMEM? */
+ case KERN_FAILURE:
+ return (EIO);
+ default:
+ return (EINVAL);
+ }
+ return (0);
+}
+
+struct munmap_args {
+ caddr_t addr;
+ int len;
+};
+int
+munmap(p, uap, retval)
+ register struct proc *p;
+ register struct munmap_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_map_t map;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munmap(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+
+ addr = (vm_offset_t) uap->addr;
+ if ((addr & PAGE_MASK) || uap->len < 0)
+ return(EINVAL);
+ size = (vm_size_t) round_page(uap->len);
+ if (size == 0)
+ return(0);
+ /*
+ * Check for illegal addresses. Watch out for address wrap...
+ * Note that VM_*_ADDRESS are not constants due to casts (argh).
+ */
+ if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+#ifndef i386
+ if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
+ return (EINVAL);
+#endif
+ if (addr > addr + size)
+ return (EINVAL);
+ map = &p->p_vmspace->vm_map;
+ /*
+ * Make sure entire range is allocated.
+ */
+ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+ return(EINVAL);
+ /* returns nothing but KERN_SUCCESS anyway */
+ (void) vm_map_remove(map, addr, addr+size);
+ return(0);
+}
+
+void
+munmapfd(fd)
+ int fd;
+{
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
+#endif
+
+ /*
+ * XXX should vm_deallocate any regions mapped to this file
+ */
+ curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+}
+
+struct mprotect_args {
+ caddr_t addr;
+ int len;
+ int prot;
+};
+int
+mprotect(p, uap, retval)
+ struct proc *p;
+ struct mprotect_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ register vm_prot_t prot;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mprotect(%d): addr %x len %x prot %d\n",
+ p->p_pid, uap->addr, uap->len, uap->prot);
+#endif
+
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->len < 0)
+ return(EINVAL);
+ size = (vm_size_t)uap->len;
+ prot = uap->prot & VM_PROT_ALL;
+
+ switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
+ FALSE)) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ }
+ return (EINVAL);
+}
+
+struct madvise_args {
+ caddr_t addr;
+ int len;
+ int behav;
+};
+/* ARGSUSED */
+int
+madvise(p, uap, retval)
+ struct proc *p;
+ struct madvise_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct mincore_args {
+ caddr_t addr;
+ int len;
+ char *vec;
+};
+/* ARGSUSED */
+int
+mincore(p, uap, retval)
+ struct proc *p;
+ struct mincore_args *uap;
+ int *retval;
+{
+
+ /* Not yet implemented */
+ return (EOPNOTSUPP);
+}
+
+struct mlock_args {
+ caddr_t addr;
+ size_t len;
+};
+int
+mlock(p, uap, retval)
+ struct proc *p;
+ struct mlock_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ int error;
+ extern int vm_page_max_wired;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("mlock(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+ size = round_page((vm_size_t)uap->len);
+ if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
+ return (EAGAIN);
+#ifdef pmap_wired_count
+ if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+ p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+ return (EAGAIN);
+#else
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+
+ error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+struct munlock_args {
+ caddr_t addr;
+ size_t len;
+};
+int
+munlock(p, uap, retval)
+ struct proc *p;
+ struct munlock_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ vm_size_t size;
+ int error;
+
+#ifdef DEBUG
+ if (mmapdebug & MDB_FOLLOW)
+ printf("munlock(%d): addr %x len %x\n",
+ p->p_pid, uap->addr, uap->len);
+#endif
+ addr = (vm_offset_t)uap->addr;
+ if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
+ return (EINVAL);
+#ifndef pmap_wired_count
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+#endif
+ size = round_page((vm_size_t)uap->len);
+
+ error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * Internal version of mmap.
+ * Currently used by mmap, exec, and sys5 shared memory.
+ * Handle is either a vnode pointer or NULL for MAP_ANON.
+ */
+int
+vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ vm_prot_t prot, maxprot;
+ register int flags;
+ caddr_t handle; /* XXX should be vp */
+ vm_offset_t foff;
+{
+ register vm_pager_t pager;
+ boolean_t fitit;
+ vm_object_t object;
+ struct vnode *vp = NULL;
+ int type;
+ int rv = KERN_SUCCESS;
+
+ if (size == 0)
+ return (0);
+
+ if ((flags & MAP_FIXED) == 0) {
+ fitit = TRUE;
+ *addr = round_page(*addr);
+ } else {
+ fitit = FALSE;
+ (void)vm_deallocate(map, *addr, size);
+ }
+
+ /*
+ * Lookup/allocate pager. All except an unnamed anonymous lookup
+ * gain a reference to ensure continued existance of the object.
+ * (XXX the exception is to appease the pageout daemon)
+ */
+ if (flags & MAP_ANON)
+ type = PG_DFLT;
+ else {
+ vp = (struct vnode *)handle;
+ if (vp->v_type == VCHR) {
+ type = PG_DEVICE;
+ handle = (caddr_t)vp->v_rdev;
+ } else
+ type = PG_VNODE;
+ }
+ pager = vm_pager_allocate(type, handle, size, prot, foff);
+ if (pager == NULL)
+ return (type == PG_DEVICE ? EINVAL : ENOMEM);
+ /*
+ * Find object and release extra reference gained by lookup
+ */
+ object = vm_object_lookup(pager);
+ vm_object_deallocate(object);
+
+ /*
+ * Anonymous memory.
+ */
+ if (flags & MAP_ANON) {
+ rv = vm_allocate_with_pager(map, addr, size, fitit,
+ pager, foff, TRUE);
+ if (rv != KERN_SUCCESS) {
+ if (handle == NULL)
+ vm_pager_deallocate(pager);
+ else
+ vm_object_deallocate(object);
+ goto out;
+ }
+ /*
+ * Don't cache anonymous objects.
+ * Loses the reference gained by vm_pager_allocate.
+ * Note that object will be NULL when handle == NULL,
+ * this is ok since vm_allocate_with_pager has made
+ * sure that these objects are uncached.
+ */
+ (void) pager_cache(object, FALSE);
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
+ curproc->p_pid, *addr, size, pager);
+#endif
+ }
+ /*
+ * Must be a mapped file.
+ * Distinguish between character special and regular files.
+ */
+ else if (vp->v_type == VCHR) {
+ rv = vm_allocate_with_pager(map, addr, size, fitit,
+ pager, foff, FALSE);
+ /*
+ * Uncache the object and lose the reference gained
+ * by vm_pager_allocate(). If the call to
+ * vm_allocate_with_pager() was sucessful, then we
+ * gained an additional reference ensuring the object
+ * will continue to exist. If the call failed then
+ * the deallocate call below will terminate the
+ * object which is fine.
+ */
+ (void) pager_cache(object, FALSE);
+ if (rv != KERN_SUCCESS)
+ goto out;
+ }
+ /*
+ * A regular file
+ */
+ else {
+#ifdef DEBUG
+ if (object == NULL)
+ printf("vm_mmap: no object: vp %x, pager %x\n",
+ vp, pager);
+#endif
+ /*
+ * Map it directly.
+ * Allows modifications to go out to the vnode.
+ */
+ if (flags & MAP_SHARED) {
+ rv = vm_allocate_with_pager(map, addr, size,
+ fitit, pager,
+ foff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ goto out;
+ }
+ /*
+ * Don't cache the object. This is the easiest way
+ * of ensuring that data gets back to the filesystem
+ * because vnode_pager_deallocate() will fsync the
+ * vnode. pager_cache() will lose the extra ref.
+ */
+ if (prot & VM_PROT_WRITE)
+ pager_cache(object, FALSE);
+ else
+ vm_object_deallocate(object);
+ }
+ /*
+ * Copy-on-write of file. Two flavors.
+ * MAP_COPY is true COW, you essentially get a snapshot of
+ * the region at the time of mapping. MAP_PRIVATE means only
+ * that your changes are not reflected back to the object.
+ * Changes made by others will be seen.
+ */
+ else {
+ vm_map_t tmap;
+ vm_offset_t off;
+
+ /* locate and allocate the target address space */
+ rv = vm_map_find(map, NULL, (vm_offset_t)0,
+ addr, size, fitit);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ goto out;
+ }
+ tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
+ VM_MIN_ADDRESS+size, TRUE);
+ off = VM_MIN_ADDRESS;
+ rv = vm_allocate_with_pager(tmap, &off, size,
+ TRUE, pager,
+ foff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ vm_object_deallocate(object);
+ vm_map_deallocate(tmap);
+ goto out;
+ }
+ /*
+ * (XXX)
+ * MAP_PRIVATE implies that we see changes made by
+ * others. To ensure that we need to guarentee that
+ * no copy object is created (otherwise original
+ * pages would be pushed to the copy object and we
+ * would never see changes made by others). We
+ * totally sleeze it right now by marking the object
+ * internal temporarily.
+ */
+ if ((flags & MAP_COPY) == 0)
+ object->flags |= OBJ_INTERNAL;
+ rv = vm_map_copy(map, tmap, *addr, size, off,
+ FALSE, FALSE);
+ object->flags &= ~OBJ_INTERNAL;
+ /*
+ * (XXX)
+ * My oh my, this only gets worse...
+ * Force creation of a shadow object so that
+ * vm_map_fork will do the right thing.
+ */
+ if ((flags & MAP_COPY) == 0) {
+ vm_map_t tmap;
+ vm_map_entry_t tentry;
+ vm_object_t tobject;
+ vm_offset_t toffset;
+ vm_prot_t tprot;
+ boolean_t twired, tsu;
+
+ tmap = map;
+ vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
+ &tentry, &tobject, &toffset,
+ &tprot, &twired, &tsu);
+ vm_map_lookup_done(tmap, tentry);
+ }
+ /*
+ * (XXX)
+ * Map copy code cannot detect sharing unless a
+ * sharing map is involved. So we cheat and write
+ * protect everything ourselves.
+ */
+ vm_object_pmap_copy(object, foff, foff + size);
+ vm_object_deallocate(object);
+ vm_map_deallocate(tmap);
+ if (rv != KERN_SUCCESS)
+ goto out;
+ }
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
+ curproc->p_pid, *addr, size, pager);
+#endif
+ }
+ /*
+ * Correct protection (default is VM_PROT_ALL).
+ * If maxprot is different than prot, we must set both explicitly.
+ */
+ rv = KERN_SUCCESS;
+ if (maxprot != VM_PROT_ALL)
+ rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
+ if (rv == KERN_SUCCESS && prot != maxprot)
+ rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
+ if (rv != KERN_SUCCESS) {
+ (void) vm_deallocate(map, *addr, size);
+ goto out;
+ }
+ /*
+ * Shared memory is also shared with children.
+ */
+ if (flags & MAP_SHARED) {
+ rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
+ if (rv != KERN_SUCCESS) {
+ (void) vm_deallocate(map, *addr, size);
+ goto out;
+ }
+ }
+out:
+#ifdef DEBUG
+ if (mmapdebug & MDB_MAPIT)
+ printf("vm_mmap: rv %d\n", rv);
+#endif
+ switch (rv) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_INVALID_ADDRESS:
+ case KERN_NO_SPACE:
+ return (ENOMEM);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ default:
+ return (EINVAL);
+ }
+}
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
new file mode 100644
index 0000000..a6419dc
--- /dev/null
+++ b/sys/vm/vm_object.c
@@ -0,0 +1,1645 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_object.c 8.5 (Berkeley) 3/22/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory object module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+static void _vm_object_allocate(vm_size_t, vm_object_t);
+void vm_object_deactivate_pages(vm_object_t);
+void vm_object_cache_trim(void);
+void vm_object_remove(vm_pager_t);
+
+/*
+ * Virtual memory objects maintain the actual data
+ * associated with allocated virtual memory. A given
+ * page of memory exists within exactly one object.
+ *
+ * An object is only deallocated when all "references"
+ * are given up. Only one "reference" to a given
+ * region of an object should be writeable.
+ *
+ * Associated with each object is a list of all resident
+ * memory pages belonging to that object; this list is
+ * maintained by the "vm_page" module, and locked by the object's
+ * lock.
+ *
+ * Each object also records a "pager" routine which is
+ * used to retrieve (and store) pages to the proper backing
+ * storage. In addition, objects may be backed by other
+ * objects from which they were virtual-copied.
+ *
+ * The only items within the object structure which are
+ * modified after time of creation are:
+ * reference count locked by object's lock
+ * pager routine locked by object's lock
+ *
+ */
+
+
+struct vm_object kernel_object_store;
+struct vm_object kmem_object_store;
+
+extern int vm_cache_max;
+#define VM_OBJECT_HASH_COUNT 157
+
+struct vm_object_hash_head vm_object_hashtable[VM_OBJECT_HASH_COUNT];
+
+long object_collapses = 0;
+long object_bypasses = 0;
+
+static void
+_vm_object_allocate(size, object)
+ vm_size_t size;
+ register vm_object_t object;
+{
+ bzero(object, sizeof *object);
+ TAILQ_INIT(&object->memq);
+ vm_object_lock_init(object);
+ object->ref_count = 1;
+ object->resident_page_count = 0;
+ object->size = size;
+ object->flags = OBJ_INTERNAL; /* vm_allocate_with_pager will reset */
+ object->paging_in_progress = 0;
+ object->copy = NULL;
+
+ /*
+ * Object starts out read-write, with no pager.
+ */
+
+ object->pager = NULL;
+ object->paging_offset = 0;
+ object->shadow = NULL;
+ object->shadow_offset = (vm_offset_t) 0;
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ cnt.v_nzfod += atop(size);
+ simple_unlock(&vm_object_list_lock);
+}
+
+/*
+ * vm_object_init:
+ *
+ * Initialize the VM objects module.
+ */
+void
+vm_object_init(vm_offset_t nothing)
+{
+ register int i;
+
+ TAILQ_INIT(&vm_object_cached_list);
+ TAILQ_INIT(&vm_object_list);
+ vm_object_count = 0;
+ simple_lock_init(&vm_cache_lock);
+ simple_lock_init(&vm_object_list_lock);
+
+ for (i = 0; i < VM_OBJECT_HASH_COUNT; i++)
+ TAILQ_INIT(&vm_object_hashtable[i]);
+
+ kernel_object = &kernel_object_store;
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ kernel_object);
+
+ kmem_object = &kmem_object_store;
+ _vm_object_allocate(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ kmem_object);
+}
+
+/*
+ * vm_object_allocate:
+ *
+ * Returns a new object with the given size.
+ */
+
+vm_object_t
+vm_object_allocate(size)
+ vm_size_t size;
+{
+ register vm_object_t result;
+ int s;
+
+ result = (vm_object_t)
+ malloc((u_long)sizeof *result, M_VMOBJ, M_WAITOK);
+
+
+ _vm_object_allocate(size, result);
+
+ return(result);
+}
+
+
+/*
+ * vm_object_reference:
+ *
+ * Gets another reference to the given object.
+ */
+inline void
+vm_object_reference(object)
+ register vm_object_t object;
+{
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+ object->ref_count++;
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_deallocate:
+ *
+ * Release a reference to the specified object,
+ * gained either through a vm_object_allocate
+ * or a vm_object_reference call. When all references
+ * are gone, storage associated with this object
+ * may be relinquished.
+ *
+ * No object may be locked.
+ */
+void
+vm_object_deallocate(object)
+ vm_object_t object;
+{
+ vm_object_t temp;
+
+ while (object != NULL) {
+
+ /*
+ * The cache holds a reference (uncounted) to
+ * the object; we must lock it before removing
+ * the object.
+ */
+
+ vm_object_cache_lock();
+
+ /*
+ * Lose the reference
+ */
+ vm_object_lock(object);
+ if (--(object->ref_count) != 0) {
+
+ vm_object_unlock(object);
+ /*
+ * If there are still references, then
+ * we are done.
+ */
+ vm_object_cache_unlock();
+ return;
+ }
+
+ /*
+ * See if this object can persist. If so, enter
+ * it in the cache, then deactivate all of its
+ * pages.
+ */
+
+ if (object->flags & OBJ_CANPERSIST) {
+
+ TAILQ_INSERT_TAIL(&vm_object_cached_list, object,
+ cached_list);
+ vm_object_cached++;
+ vm_object_cache_unlock();
+
+/*
+ * this code segment was removed because it kills performance with
+ * large -- repetively used binaries. The functionality now resides
+ * in the pageout daemon
+ * vm_object_deactivate_pages(object);
+ */
+ vm_object_unlock(object);
+
+ vm_object_cache_trim();
+ return;
+ }
+
+ /*
+ * Make sure no one can look us up now.
+ */
+ vm_object_remove(object->pager);
+ vm_object_cache_unlock();
+
+ temp = object->shadow;
+ vm_object_terminate(object);
+ /* unlocks and deallocates object */
+ object = temp;
+ }
+}
+
+/*
+ * vm_object_terminate actually destroys the specified object, freeing
+ * up all previously used resources.
+ *
+ * The object must be locked.
+ */
+void
+vm_object_terminate(object)
+ register vm_object_t object;
+{
+ register vm_page_t p;
+ vm_object_t shadow_object;
+ int s;
+
+ /*
+ * Detach the object from its shadow if we are the shadow's
+ * copy.
+ */
+ if ((shadow_object = object->shadow) != NULL) {
+ vm_object_lock(shadow_object);
+ if (shadow_object->copy == object)
+ shadow_object->copy = NULL;
+/*
+ else if (shadow_object->copy != NULL)
+ panic("vm_object_terminate: copy/shadow inconsistency");
+*/
+ vm_object_unlock(shadow_object);
+ }
+
+ /*
+ * Wait until the pageout daemon is through
+ * with the object.
+ */
+
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ vm_object_lock(object);
+ }
+
+ /*
+ * While the paging system is locked,
+ * pull the object's pages off the active
+ * and inactive queues. This keeps the
+ * pageout daemon from playing with them
+ * during vm_pager_deallocate.
+ *
+ * We can't free the pages yet, because the
+ * object's pager may have to write them out
+ * before deallocating the paging space.
+ */
+
+ for( p = object->memq.tqh_first; p; p=p->listq.tqe_next) {
+ VM_PAGE_CHECK(p);
+
+ vm_page_lock_queues();
+ s = splimp();
+ if (p->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ p->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+
+ if (p->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, p, pageq);
+ p->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ }
+ splx(s);
+ vm_page_unlock_queues();
+ }
+
+ vm_object_unlock(object);
+
+ if (object->paging_in_progress != 0)
+ panic("vm_object_deallocate: pageout in progress");
+
+ /*
+ * Clean and free the pages, as appropriate.
+ * All references to the object are gone,
+ * so we don't need to lock it.
+ */
+
+ if ((object->flags & OBJ_INTERNAL) == 0) {
+ vm_object_lock(object);
+ (void) vm_object_page_clean(object, 0, 0, TRUE, TRUE);
+ vm_object_unlock(object);
+ }
+
+ /*
+ * Now free the pages.
+ * For internal objects, this also removes them from paging queues.
+ */
+ while ((p = object->memq.tqh_first) != NULL) {
+ VM_PAGE_CHECK(p);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ cnt.v_pfree++;
+ vm_page_unlock_queues();
+ }
+
+ /*
+ * Let the pager know object is dead.
+ */
+
+ if (object->pager != NULL)
+ vm_pager_deallocate(object->pager);
+
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_REMOVE(&vm_object_list, object, object_list);
+ vm_object_count--;
+ simple_unlock(&vm_object_list_lock);
+
+ /*
+ * Free the space for the object.
+ */
+
+ free((caddr_t)object, M_VMOBJ);
+}
+
+/*
+ * vm_object_page_clean
+ *
+ * Clean all dirty pages in the specified range of object.
+ * Leaves page on whatever queue it is currently on.
+ *
+ * Odd semantics: if start == end, we clean everything.
+ *
+ * The object must be locked.
+ */
+#if 1
+boolean_t
+vm_object_page_clean(object, start, end, syncio, de_queue)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ boolean_t syncio;
+ boolean_t de_queue;
+{
+ register vm_page_t p, nextp;
+ int s;
+ int size;
+
+ if (object->pager == NULL)
+ return 1;
+
+ if (start != end) {
+ start = trunc_page(start);
+ end = round_page(end);
+ }
+ size = end - start;
+
+again:
+ /*
+ * Wait until the pageout daemon is through with the object.
+ */
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ }
+
+ nextp = object->memq.tqh_first;
+ while ( (p = nextp) && ((start == end) || (size != 0) ) ) {
+ nextp = p->listq.tqe_next;
+ if (start == end || (p->offset >= start && p->offset < end)) {
+ if (p->flags & PG_BUSY)
+ continue;
+
+ size -= PAGE_SIZE;
+
+ if ((p->flags & PG_CLEAN)
+ && pmap_is_modified(VM_PAGE_TO_PHYS(p)))
+ p->flags &= ~PG_CLEAN;
+
+ if ((p->flags & PG_CLEAN) == 0) {
+ vm_pageout_clean(p,VM_PAGEOUT_FORCE);
+ goto again;
+ }
+ }
+ }
+ wakeup((caddr_t)object);
+ return 1;
+}
+#endif
+/*
+ * vm_object_page_clean
+ *
+ * Clean all dirty pages in the specified range of object.
+ * If syncio is TRUE, page cleaning is done synchronously.
+ * If de_queue is TRUE, pages are removed from any paging queue
+ * they were on, otherwise they are left on whatever queue they
+ * were on before the cleaning operation began.
+ *
+ * Odd semantics: if start == end, we clean everything.
+ *
+ * The object must be locked.
+ *
+ * Returns TRUE if all was well, FALSE if there was a pager error
+ * somewhere. We attempt to clean (and dequeue) all pages regardless
+ * of where an error occurs.
+ */
+#if 0
+boolean_t
+vm_object_page_clean(object, start, end, syncio, de_queue)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+ boolean_t syncio;
+ boolean_t de_queue;
+{
+ register vm_page_t p;
+ int onqueue;
+ boolean_t noerror = TRUE;
+
+ if (object == NULL)
+ return (TRUE);
+
+ /*
+ * If it is an internal object and there is no pager, attempt to
+ * allocate one. Note that vm_object_collapse may relocate one
+ * from a collapsed object so we must recheck afterward.
+ */
+ if ((object->flags & OBJ_INTERNAL) && object->pager == NULL) {
+ vm_object_collapse(object);
+ if (object->pager == NULL) {
+ vm_pager_t pager;
+
+ vm_object_unlock(object);
+ pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
+ object->size, VM_PROT_ALL,
+ (vm_offset_t)0);
+ if (pager)
+ vm_object_setpager(object, pager, 0, FALSE);
+ vm_object_lock(object);
+ }
+ }
+ if (object->pager == NULL)
+ return (FALSE);
+
+again:
+ /*
+ * Wait until the pageout daemon is through with the object.
+ */
+ while (object->paging_in_progress) {
+ vm_object_sleep((int)object, object, FALSE);
+ vm_object_lock(object);
+ }
+ /*
+ * Loop through the object page list cleaning as necessary.
+ */
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ onqueue = 0;
+ if ((start == end || p->offset >= start && p->offset < end) &&
+ !(p->flags & PG_FICTITIOUS)) {
+ if ((p->flags & PG_CLEAN) &&
+ pmap_is_modified(VM_PAGE_TO_PHYS(p)))
+ p->flags &= ~PG_CLEAN;
+ /*
+ * Remove the page from any paging queue.
+ * This needs to be done if either we have been
+ * explicitly asked to do so or it is about to
+ * be cleaned (see comment below).
+ */
+ if (de_queue || !(p->flags & PG_CLEAN)) {
+ vm_page_lock_queues();
+ if (p->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active,
+ p, pageq);
+ p->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ onqueue = 1;
+ } else if (p->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive,
+ p, pageq);
+ p->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ onqueue = -1;
+ } else
+ onqueue = 0;
+ vm_page_unlock_queues();
+ }
+ /*
+ * To ensure the state of the page doesn't change
+ * during the clean operation we do two things.
+ * First we set the busy bit and write-protect all
+ * mappings to ensure that write accesses to the
+ * page block (in vm_fault). Second, we remove
+ * the page from any paging queue to foil the
+ * pageout daemon (vm_pageout_scan).
+ */
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+ if (!(p->flags & PG_CLEAN)) {
+ p->flags |= PG_BUSY;
+ object->paging_in_progress++;
+ vm_object_unlock(object);
+ /*
+ * XXX if put fails we mark the page as
+ * clean to avoid an infinite loop.
+ * Will loose changes to the page.
+ */
+ if (vm_pager_put(object->pager, p, syncio)) {
+ printf("%s: pager_put error\n",
+ "vm_object_page_clean");
+ p->flags |= PG_CLEAN;
+ noerror = FALSE;
+ }
+ vm_object_lock(object);
+ object->paging_in_progress--;
+ if (!de_queue && onqueue) {
+ vm_page_lock_queues();
+ if (onqueue > 0)
+ vm_page_activate(p);
+ else
+ vm_page_deactivate(p);
+ vm_page_unlock_queues();
+ }
+ p->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(p);
+ goto again;
+ }
+ }
+ }
+ return (noerror);
+}
+#endif
+
+/*
+ * vm_object_deactivate_pages
+ *
+ * Deactivate all pages in the specified object. (Keep its pages
+ * in memory even though it is no longer referenced.)
+ *
+ * The object must be locked.
+ */
+void
+vm_object_deactivate_pages(object)
+ register vm_object_t object;
+{
+ register vm_page_t p, next;
+
+ for (p = object->memq.tqh_first; p != NULL; p = next) {
+ next = p->listq.tqe_next;
+ vm_page_lock_queues();
+ vm_page_deactivate(p);
+ vm_page_unlock_queues();
+ }
+}
+
+/*
+ * Trim the object cache to size.
+ */
+void
+vm_object_cache_trim()
+{
+ register vm_object_t object;
+
+ vm_object_cache_lock();
+ while (vm_object_cached > vm_cache_max) {
+ object = vm_object_cached_list.tqh_first;
+ vm_object_cache_unlock();
+
+ if (object != vm_object_lookup(object->pager))
+ panic("vm_object_deactivate: I'm sooo confused.");
+
+ pager_cache(object, FALSE);
+
+ vm_object_cache_lock();
+ }
+ vm_object_cache_unlock();
+}
+
+
+/*
+ * vm_object_pmap_copy:
+ *
+ * Makes all physical pages in the specified
+ * object range copy-on-write. No writeable
+ * references to these pages should remain.
+ *
+ * The object must *not* be locked.
+ */
+void vm_object_pmap_copy(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p;
+
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if ((start <= p->offset) && (p->offset < end)) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
+ p->flags |= PG_COPYONWRITE;
+ }
+ }
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_pmap_remove:
+ *
+ * Removes all physical pages in the specified
+ * object range from all physical maps.
+ *
+ * The object must *not* be locked.
+ */
+void
+vm_object_pmap_remove(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p;
+
+ if (object == NULL)
+ return;
+
+ vm_object_lock(object);
+again:
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopmr", 0);
+ goto again;
+ }
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ if ((p->flags & PG_CLEAN) == 0)
+ p->flags |= PG_LAUNDRY;
+ }
+ }
+ vm_object_unlock(object);
+}
+
+/*
+ * vm_object_copy:
+ *
+ * Create a new object which is a copy of an existing
+ * object, and mark all of the pages in the existing
+ * object 'copy-on-write'. The new object has one reference.
+ * Returns the new object.
+ *
+ * May defer the copy until later if the object is not backed
+ * up by a non-default pager.
+ */
+void vm_object_copy(src_object, src_offset, size,
+ dst_object, dst_offset, src_needs_copy)
+ register vm_object_t src_object;
+ vm_offset_t src_offset;
+ vm_size_t size;
+ vm_object_t *dst_object; /* OUT */
+ vm_offset_t *dst_offset; /* OUT */
+ boolean_t *src_needs_copy; /* OUT */
+{
+ register vm_object_t new_copy;
+ register vm_object_t old_copy;
+ vm_offset_t new_start, new_end;
+
+ register vm_page_t p;
+
+ if (src_object == NULL) {
+ /*
+ * Nothing to copy
+ */
+ *dst_object = NULL;
+ *dst_offset = 0;
+ *src_needs_copy = FALSE;
+ return;
+ }
+
+
+ /*
+ * If the object's pager is null_pager or the
+ * default pager, we don't have to make a copy
+ * of it. Instead, we set the needs copy flag and
+ * make a shadow later.
+ */
+
+ vm_object_lock(src_object);
+
+ /*
+ * Try to collapse the object before copying it.
+ */
+
+ vm_object_collapse(src_object);
+
+ if (src_object->pager == NULL ||
+ src_object->pager->pg_type == PG_SWAP ||
+ (src_object->flags & OBJ_INTERNAL)) {
+
+ /*
+ * Make another reference to the object
+ */
+ src_object->ref_count++;
+
+ /*
+ * Mark all of the pages copy-on-write.
+ */
+ for (p = src_object->memq.tqh_first; p; p = p->listq.tqe_next)
+ if (src_offset <= p->offset &&
+ p->offset < src_offset + size)
+ p->flags |= PG_COPYONWRITE;
+ vm_object_unlock(src_object);
+
+ *dst_object = src_object;
+ *dst_offset = src_offset;
+
+ /*
+ * Must make a shadow when write is desired
+ */
+ *src_needs_copy = TRUE;
+ return;
+ }
+
+
+ /*
+ * If the object has a pager, the pager wants to
+ * see all of the changes. We need a copy-object
+ * for the changed pages.
+ *
+ * If there is a copy-object, and it is empty,
+ * no changes have been made to the object since the
+ * copy-object was made. We can use the same copy-
+ * object.
+ */
+
+ Retry1:
+ old_copy = src_object->copy;
+ if (old_copy != NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+
+ /* should spin a bit here... */
+ vm_object_lock(src_object);
+ goto Retry1;
+ }
+
+ if (old_copy->resident_page_count == 0 &&
+ old_copy->pager == NULL) {
+ /*
+ * Return another reference to
+ * the existing copy-object.
+ */
+ old_copy->ref_count++;
+ vm_object_unlock(old_copy);
+ vm_object_unlock(src_object);
+ *dst_object = old_copy;
+ *dst_offset = src_offset;
+ *src_needs_copy = FALSE;
+ return;
+ }
+ vm_object_unlock(old_copy);
+ }
+ vm_object_unlock(src_object);
+
+ /*
+ * If the object has a pager, the pager wants
+ * to see all of the changes. We must make
+ * a copy-object and put the changed pages there.
+ *
+ * The copy-object is always made large enough to
+ * completely shadow the original object, since
+ * it may have several users who want to shadow
+ * the original object at different points.
+ */
+
+ new_copy = vm_object_allocate(src_object->size);
+
+ Retry2:
+ vm_object_lock(src_object);
+ /*
+ * Copy object may have changed while we were unlocked
+ */
+ old_copy = src_object->copy;
+ if (old_copy != NULL) {
+ /*
+ * Try to get the locks (out of order)
+ */
+ if (!vm_object_lock_try(old_copy)) {
+ vm_object_unlock(src_object);
+ goto Retry2;
+ }
+
+ /*
+ * Consistency check
+ */
+ if (old_copy->shadow != src_object ||
+ old_copy->shadow_offset != (vm_offset_t) 0)
+ panic("vm_object_copy: copy/shadow inconsistency");
+
+ /*
+ * Make the old copy-object shadow the new one.
+ * It will receive no more pages from the original
+ * object.
+ */
+
+ src_object->ref_count--; /* remove ref. from old_copy */
+ old_copy->shadow = new_copy;
+ new_copy->ref_count++; /* locking not needed - we
+ have the only pointer */
+ vm_object_unlock(old_copy); /* done with old_copy */
+ }
+
+ new_start = (vm_offset_t) 0; /* always shadow original at 0 */
+ new_end = (vm_offset_t) new_copy->size; /* for the whole object */
+
+ /*
+ * Point the new copy at the existing object.
+ */
+
+ new_copy->shadow = src_object;
+ new_copy->shadow_offset = new_start;
+ src_object->ref_count++;
+ src_object->copy = new_copy;
+
+ /*
+ * Mark all the affected pages of the existing object
+ * copy-on-write.
+ */
+ for (p = src_object->memq.tqh_first; p != NULL; p = p->listq.tqe_next)
+ if ((new_start <= p->offset) && (p->offset < new_end))
+ p->flags |= PG_COPYONWRITE;
+
+ vm_object_unlock(src_object);
+
+ *dst_object = new_copy;
+ *dst_offset = src_offset - new_start;
+ *src_needs_copy = FALSE;
+}
+
+/*
+ * vm_object_shadow:
+ *
+ * Create a new object which is backed by the
+ * specified existing object range. The source
+ * object reference is deallocated.
+ *
+ * The new object and offset into that object
+ * are returned in the source parameters.
+ */
+
+void
+vm_object_shadow(object, offset, length)
+ vm_object_t *object; /* IN/OUT */
+ vm_offset_t *offset; /* IN/OUT */
+ vm_size_t length;
+{
+ register vm_object_t source;
+ register vm_object_t result;
+
+ source = *object;
+
+ /*
+ * Allocate a new object with the given length
+ */
+
+ if ((result = vm_object_allocate(length)) == NULL)
+ panic("vm_object_shadow: no object for shadowing");
+
+ /*
+ * The new object shadows the source object, adding
+ * a reference to it. Our caller changes his reference
+ * to point to the new object, removing a reference to
+ * the source object. Net result: no change of reference
+ * count.
+ */
+ result->shadow = source;
+
+ /*
+ * Store the offset into the source object,
+ * and fix up the offset into the new object.
+ */
+
+ result->shadow_offset = *offset;
+
+ /*
+ * Return the new things
+ */
+
+ *offset = 0;
+ *object = result;
+}
+
+/*
+ * Set the specified object's pager to the specified pager.
+ */
+
+void
+vm_object_setpager(object, pager, paging_offset,
+ read_only)
+ vm_object_t object;
+ vm_pager_t pager;
+ vm_offset_t paging_offset;
+ boolean_t read_only;
+{
+#ifdef lint
+ read_only++; /* No longer used */
+#endif lint
+
+ vm_object_lock(object); /* XXX ? */
+ if (object->pager && object->pager != pager) {
+ panic("!!!pager already allocated!!!\n");
+ }
+ object->pager = pager;
+ object->paging_offset = paging_offset;
+ vm_object_unlock(object); /* XXX ? */
+}
+
+/*
+ * vm_object_hash hashes the pager/id pair.
+ */
+
+#define vm_object_hash(pager) \
+ (((unsigned)pager >> 5)%VM_OBJECT_HASH_COUNT)
+
+/*
+ * vm_object_lookup looks in the object cache for an object with the
+ * specified pager and paging id.
+ */
+
+vm_object_t vm_object_lookup(pager)
+ vm_pager_t pager;
+{
+ register vm_object_hash_entry_t entry;
+ vm_object_t object;
+
+ vm_object_cache_lock();
+
+ for (entry = vm_object_hashtable[vm_object_hash(pager)].tqh_first;
+ entry != NULL;
+ entry = entry->hash_links.tqe_next) {
+ object = entry->object;
+ if (object->pager == pager) {
+ vm_object_lock(object);
+ if (object->ref_count == 0) {
+ TAILQ_REMOVE(&vm_object_cached_list, object,
+ cached_list);
+ vm_object_cached--;
+ }
+ object->ref_count++;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+ return(object);
+ }
+ }
+
+ vm_object_cache_unlock();
+ return(NULL);
+}
+
+/*
+ * vm_object_enter enters the specified object/pager/id into
+ * the hash table.
+ */
+
+void vm_object_enter(object, pager)
+ vm_object_t object;
+ vm_pager_t pager;
+{
+ struct vm_object_hash_head *bucket;
+ register vm_object_hash_entry_t entry;
+
+ /*
+ * We don't cache null objects, and we can't cache
+ * objects with the null pager.
+ */
+
+ if (object == NULL)
+ return;
+ if (pager == NULL)
+ return;
+
+ bucket = &vm_object_hashtable[vm_object_hash(pager)];
+ entry = (vm_object_hash_entry_t)
+ malloc((u_long)sizeof *entry, M_VMOBJHASH, M_WAITOK);
+ entry->object = object;
+ object->flags |= OBJ_CANPERSIST;
+
+ vm_object_cache_lock();
+ TAILQ_INSERT_TAIL(bucket, entry, hash_links);
+ vm_object_cache_unlock();
+}
+
+/*
+ * vm_object_remove:
+ *
+ * Remove the pager from the hash table.
+ * Note: This assumes that the object cache
+ * is locked. XXX this should be fixed
+ * by reorganizing vm_object_deallocate.
+ */
+void
+vm_object_remove(pager)
+ register vm_pager_t pager;
+{
+ struct vm_object_hash_head *bucket;
+ register vm_object_hash_entry_t entry;
+ register vm_object_t object;
+
+ bucket = &vm_object_hashtable[vm_object_hash(pager)];
+
+ for (entry = bucket->tqh_first;
+ entry != NULL;
+ entry = entry->hash_links.tqe_next) {
+ object = entry->object;
+ if (object->pager == pager) {
+ TAILQ_REMOVE(bucket, entry, hash_links);
+ free((caddr_t)entry, M_VMOBJHASH);
+ break;
+ }
+ }
+}
+
+boolean_t vm_object_collapse_allowed = TRUE;
+/*
+ * vm_object_collapse:
+ *
+ * Collapse an object with the object backing it.
+ * Pages in the backing object are moved into the
+ * parent, and the backing object is deallocated.
+ *
+ * Requires that the object be locked and the page
+ * queues be unlocked.
+ *
+ * This routine has significant changes by John S. Dyson
+ * to fix some swap memory leaks. 18 Dec 93
+ *
+ */
+void
+vm_object_collapse(object)
+ register vm_object_t object;
+
+{
+ register vm_object_t backing_object;
+ register vm_offset_t backing_offset;
+ register vm_size_t size;
+ register vm_offset_t new_offset;
+ register vm_page_t p, pp;
+
+ if (!vm_object_collapse_allowed)
+ return;
+
+ while (TRUE) {
+ /*
+ * Verify that the conditions are right for collapse:
+ *
+ * The object exists and no pages in it are currently
+ * being paged out.
+ */
+ if (object == NULL ||
+ object->paging_in_progress != 0)
+ return;
+
+ /*
+ * There is a backing object, and
+ */
+
+ if ((backing_object = object->shadow) == NULL)
+ return;
+
+ vm_object_lock(backing_object);
+ /*
+ * ...
+ * The backing object is not read_only,
+ * and no pages in the backing object are
+ * currently being paged out.
+ * The backing object is internal.
+ */
+
+ if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
+ backing_object->paging_in_progress != 0) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * The backing object can't be a copy-object:
+ * the shadow_offset for the copy-object must stay
+ * as 0. Furthermore (for the 'we have all the
+ * pages' case), if we bypass backing_object and
+ * just shadow the next object in the chain, old
+ * pages from that object would then have to be copied
+ * BOTH into the (former) backing_object and into the
+ * parent object.
+ */
+ if (backing_object->shadow != NULL &&
+ backing_object->shadow->copy == backing_object) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * we can deal only with the swap pager
+ */
+ if ((object->pager &&
+ object->pager->pg_type != PG_SWAP) ||
+ (backing_object->pager &&
+ backing_object->pager->pg_type != PG_SWAP)) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+
+ /*
+ * We know that we can either collapse the backing
+ * object (if the parent is the only reference to
+ * it) or (perhaps) remove the parent's reference
+ * to it.
+ */
+
+ backing_offset = object->shadow_offset;
+ size = object->size;
+
+ /*
+ * If there is exactly one reference to the backing
+ * object, we can collapse it into the parent.
+ */
+
+ if (backing_object->ref_count == 1) {
+
+ /*
+ * We can collapse the backing object.
+ *
+ * Move all in-memory pages from backing_object
+ * to the parent. Pages that have been paged out
+ * will be overwritten by any of the parent's
+ * pages that shadow them.
+ */
+
+ while (p = backing_object->memq.tqh_first) {
+
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * dispose of it.
+ *
+ * Otherwise, move it as planned.
+ */
+
+ if (p->offset < backing_offset ||
+ new_offset >= size) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ } else {
+ pp = vm_page_lookup(object, new_offset);
+ if (pp != NULL || (object->pager && vm_pager_has_page(object->pager,
+ object->paging_offset + new_offset))) {
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ } else {
+ vm_page_rename(p, object, new_offset);
+ }
+ }
+ }
+
+ /*
+ * Move the pager from backing_object to object.
+ */
+
+ if (backing_object->pager) {
+ backing_object->paging_in_progress++;
+ if (object->pager) {
+ vm_pager_t bopager;
+ object->paging_in_progress++;
+ /*
+ * copy shadow object pages into ours
+ * and destroy unneeded pages in shadow object.
+ */
+ bopager = backing_object->pager;
+ backing_object->pager = NULL;
+ vm_object_remove(backing_object->pager);
+ swap_pager_copy(
+ bopager, backing_object->paging_offset,
+ object->pager, object->paging_offset,
+ object->shadow_offset);
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t)object);
+ } else {
+ object->paging_in_progress++;
+ /*
+ * grab the shadow objects pager
+ */
+ object->pager = backing_object->pager;
+ object->paging_offset = backing_object->paging_offset + backing_offset;
+ vm_object_remove(backing_object->pager);
+ backing_object->pager = NULL;
+ /*
+ * free unnecessary blocks
+ */
+ swap_pager_freespace(object->pager, 0, object->paging_offset);
+ object->paging_in_progress--;
+ if (object->paging_in_progress == 0)
+ wakeup((caddr_t)object);
+ }
+ backing_object->paging_in_progress--;
+ if (backing_object->paging_in_progress == 0)
+ wakeup((caddr_t)backing_object);
+ }
+
+
+ /*
+ * Object now shadows whatever backing_object did.
+ * Note that the reference to backing_object->shadow
+ * moves from within backing_object to within object.
+ */
+
+ object->shadow = backing_object->shadow;
+ object->shadow_offset += backing_object->shadow_offset;
+ if (object->shadow != NULL &&
+ object->shadow->copy != NULL) {
+ panic("vm_object_collapse: we collapsed a copy-object!");
+ }
+ /*
+ * Discard backing_object.
+ *
+ * Since the backing object has no pages, no
+ * pager left, and no object references within it,
+ * all that is necessary is to dispose of it.
+ */
+
+ vm_object_unlock(backing_object);
+
+ simple_lock(&vm_object_list_lock);
+ TAILQ_REMOVE(&vm_object_list, backing_object,
+ object_list);
+ vm_object_count--;
+ simple_unlock(&vm_object_list_lock);
+
+ free((caddr_t)backing_object, M_VMOBJ);
+
+ object_collapses++;
+ }
+ else {
+ /*
+ * If all of the pages in the backing object are
+ * shadowed by the parent object, the parent
+ * object no longer has to shadow the backing
+ * object; it can shadow the next one in the
+ * chain.
+ *
+ * The backing object must not be paged out - we'd
+ * have to check all of the paged-out pages, as
+ * well.
+ */
+
+ if (backing_object->pager != NULL) {
+ vm_object_unlock(backing_object);
+ return;
+ }
+
+ /*
+ * Should have a check for a 'small' number
+ * of pages here.
+ */
+
+ for( p = backing_object->memq.tqh_first;p;p=p->listq.tqe_next) {
+ new_offset = (p->offset - backing_offset);
+
+ /*
+ * If the parent has a page here, or if
+ * this page falls outside the parent,
+ * keep going.
+ *
+ * Otherwise, the backing_object must be
+ * left in the chain.
+ */
+
+ if (p->offset >= backing_offset &&
+ new_offset <= size &&
+ ((pp = vm_page_lookup(object, new_offset)) == NULL || (pp->flags & PG_FAKE)) &&
+ (!object->pager || !vm_pager_has_page(object->pager, object->paging_offset+new_offset))) {
+ /*
+ * Page still needed.
+ * Can't go any further.
+ */
+ vm_object_unlock(backing_object);
+ return;
+ }
+ }
+
+ /*
+ * Make the parent shadow the next object
+ * in the chain. Deallocating backing_object
+ * will not remove it, since its reference
+ * count is at least 2.
+ */
+
+ vm_object_reference(object->shadow = backing_object->shadow);
+ object->shadow_offset += backing_object->shadow_offset;
+
+ /*
+ * Backing object might have had a copy pointer
+ * to us. If it did, clear it.
+ */
+ if (backing_object->copy == object) {
+ backing_object->copy = NULL;
+ }
+
+ /* Drop the reference count on backing_object.
+ * Since its ref_count was at least 2, it
+ * will not vanish; so we don't need to call
+ * vm_object_deallocate.
+ */
+ if (backing_object->ref_count == 1)
+ printf("should have called obj deallocate\n");
+ backing_object->ref_count--;
+ vm_object_unlock(backing_object);
+
+ object_bypasses ++;
+
+ }
+
+ /*
+ * Try again with this object's new backing object.
+ */
+ }
+}
+
+/*
+ * vm_object_page_remove: [internal]
+ *
+ * Removes all physical pages in the specified
+ * object range from the object's list of pages.
+ *
+ * The object must be locked.
+ */
+void
+vm_object_page_remove(object, start, end)
+ register vm_object_t object;
+ register vm_offset_t start;
+ register vm_offset_t end;
+{
+ register vm_page_t p, next;
+ vm_offset_t size;
+ int cnt;
+ int s;
+
+ if (object == NULL)
+ return;
+
+ start = trunc_page(start);
+ end = round_page(end);
+again:
+ size = end-start;
+ if (size > 4*PAGE_SIZE || size >= object->size/4) {
+ for (p = object->memq.tqh_first; (p != NULL && size > 0); p = next) {
+ next = p->listq.tqe_next;
+ if ((start <= p->offset) && (p->offset < end)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopar", 0);
+ goto again;
+ }
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ size -= PAGE_SIZE;
+ }
+ }
+ } else {
+ while (size > 0) {
+ while (p = vm_page_lookup(object, start)) {
+ if (p->flags & PG_BUSY) {
+ p->flags |= PG_WANTED;
+ tsleep((caddr_t) p, PVM, "vmopar", 0);
+ goto again;
+ }
+ pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_NONE);
+ vm_page_lock_queues();
+ vm_page_free(p);
+ vm_page_unlock_queues();
+ }
+ start += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+ }
+}
+
+/*
+ * Routine: vm_object_coalesce
+ * Function: Coalesces two objects backing up adjoining
+ * regions of memory into a single object.
+ *
+ * returns TRUE if objects were combined.
+ *
+ * NOTE: Only works at the moment if the second object is NULL -
+ * if it's not, which object do we lock first?
+ *
+ * Parameters:
+ * prev_object First object to coalesce
+ * prev_offset Offset into prev_object
+ * next_object Second object into coalesce
+ * next_offset Offset into next_object
+ *
+ * prev_size Size of reference to prev_object
+ * next_size Size of reference to next_object
+ *
+ * Conditions:
+ * The object must *not* be locked.
+ */
+boolean_t vm_object_coalesce(prev_object, next_object,
+ prev_offset, next_offset,
+ prev_size, next_size)
+
+ register vm_object_t prev_object;
+ vm_object_t next_object;
+ vm_offset_t prev_offset, next_offset;
+ vm_size_t prev_size, next_size;
+{
+ vm_size_t newsize;
+
+#ifdef lint
+ next_offset++;
+#endif
+
+ if (next_object != NULL) {
+ return(FALSE);
+ }
+
+ if (prev_object == NULL) {
+ return(TRUE);
+ }
+
+ vm_object_lock(prev_object);
+
+ /*
+ * Try to collapse the object first
+ */
+ vm_object_collapse(prev_object);
+
+ /*
+ * Can't coalesce if:
+ * . more than one reference
+ * . paged out
+ * . shadows another object
+ * . has a copy elsewhere
+ * (any of which mean that the pages not mapped to
+ * prev_entry may be in use anyway)
+ */
+
+ if (prev_object->ref_count > 1 ||
+ prev_object->pager != NULL ||
+ prev_object->shadow != NULL ||
+ prev_object->copy != NULL) {
+ vm_object_unlock(prev_object);
+ return(FALSE);
+ }
+
+ /*
+ * Remove any pages that may still be in the object from
+ * a previous deallocation.
+ */
+
+ vm_object_page_remove(prev_object,
+ prev_offset + prev_size,
+ prev_offset + prev_size + next_size);
+
+ /*
+ * Extend the object if necessary.
+ */
+ newsize = prev_offset + prev_size + next_size;
+ if (newsize > prev_object->size)
+ prev_object->size = newsize;
+
+ vm_object_unlock(prev_object);
+ return(TRUE);
+}
+
+/*
+ * returns page after looking up in shadow chain
+ */
+
+vm_page_t
+vm_object_page_lookup(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ vm_page_t m;
+ if (!(m=vm_page_lookup(object, offset))) {
+ if (!object->shadow)
+ return 0;
+ else
+ return vm_object_page_lookup(object->shadow, offset + object->shadow_offset);
+ }
+ return m;
+}
+
+#define DEBUG
+#if defined(DEBUG) || (NDDB > 0)
+/*
+ * vm_object_print: [ debug ]
+ */
+void vm_object_print(object, full)
+ vm_object_t object;
+ boolean_t full;
+{
+ register vm_page_t p;
+ extern indent;
+
+ register int count;
+
+ if (object == NULL)
+ return;
+
+ iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ",
+ (int) object, (int) object->size,
+ object->resident_page_count, object->ref_count);
+ printf("pager=0x%x+0x%x, shadow=(0x%x)+0x%x\n",
+ (int) object->pager, (int) object->paging_offset,
+ (int) object->shadow, (int) object->shadow_offset);
+ printf("cache: next=0x%x, prev=0x%x\n",
+ object->cached_list.tqe_next, object->cached_list.tqe_prev);
+
+ if (!full)
+ return;
+
+ indent += 2;
+ count = 0;
+ for (p = object->memq.tqh_first; p != NULL; p = p->listq.tqe_next) {
+ if (count == 0)
+ iprintf("memory:=");
+ else if (count == 6) {
+ printf("\n");
+ iprintf(" ...");
+ count = 0;
+ } else
+ printf(",");
+ count++;
+
+ printf("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
+ }
+ if (count != 0)
+ printf("\n");
+ indent -= 2;
+}
+#endif /* defined(DEBUG) || (NDDB > 0) */
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
new file mode 100644
index 0000000..5e220ac
--- /dev/null
+++ b/sys/vm/vm_object.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_object.h 8.3 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory object module definitions.
+ */
+
+#ifndef _VM_OBJECT_
+#define _VM_OBJECT_
+
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+
+/*
+ * Types defined:
+ *
+ * vm_object_t Virtual memory object.
+ */
+
+struct vm_object {
+ struct pglist memq; /* Resident memory */
+ TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
+ u_short flags; /* see below */
+ u_short paging_in_progress; /* Paging (in or out) so
+ don't collapse or destroy */
+ simple_lock_data_t Lock; /* Synchronization */
+ int ref_count; /* How many refs?? */
+ vm_size_t size; /* Object size */
+ int resident_page_count;
+ /* number of resident pages */
+ struct vm_object *copy; /* Object that holds copies of
+ my changed pages */
+ vm_pager_t pager; /* Where to get data */
+ vm_offset_t paging_offset; /* Offset into paging space */
+ struct vm_object *shadow; /* My shadow */
+ vm_offset_t shadow_offset; /* Offset in shadow */
+ TAILQ_ENTRY(vm_object) cached_list; /* for persistence */
+};
+/*
+ * Flags
+ */
+#define OBJ_CANPERSIST 0x0001 /* allow to persist */
+#define OBJ_INTERNAL 0x0002 /* internally created object */
+#define OBJ_ACTIVE 0x0004 /* used to mark active objects */
+
+TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
+
+struct vm_object_hash_entry {
+ TAILQ_ENTRY(vm_object_hash_entry) hash_links; /* hash chain links */
+ vm_object_t object; /* object represened */
+};
+
+typedef struct vm_object_hash_entry *vm_object_hash_entry_t;
+
+#ifdef KERNEL
+TAILQ_HEAD(object_q, vm_object);
+
+struct object_q vm_object_cached_list; /* list of objects persisting */
+int vm_object_cached; /* size of cached list */
+simple_lock_data_t vm_cache_lock; /* lock for object cache */
+
+struct object_q vm_object_list; /* list of allocated objects */
+long vm_object_count; /* count of all objects */
+simple_lock_data_t vm_object_list_lock;
+ /* lock for object list and count */
+
+vm_object_t kernel_object; /* the single kernel object */
+vm_object_t kmem_object;
+
+#define vm_object_cache_lock() simple_lock(&vm_cache_lock)
+#define vm_object_cache_unlock() simple_unlock(&vm_cache_lock)
+#endif /* KERNEL */
+
+#define vm_object_lock_init(object) simple_lock_init(&(object)->Lock)
+#define vm_object_lock(object) simple_lock(&(object)->Lock)
+#define vm_object_unlock(object) simple_unlock(&(object)->Lock)
+#define vm_object_lock_try(object) simple_lock_try(&(object)->Lock)
+#define vm_object_sleep(event, object, interruptible) \
+ thread_sleep((event), &(object)->Lock, (interruptible))
+
+#ifdef KERNEL
+vm_object_t vm_object_allocate __P((vm_size_t));
+void vm_object_cache_clear __P((void));
+void vm_object_cache_trim __P((void));
+boolean_t vm_object_coalesce __P((vm_object_t, vm_object_t,
+ vm_offset_t, vm_offset_t, vm_offset_t, vm_size_t));
+void vm_object_collapse __P((vm_object_t));
+void vm_object_copy __P((vm_object_t, vm_offset_t, vm_size_t,
+ vm_object_t *, vm_offset_t *, boolean_t *));
+void vm_object_deactivate_pages __P((vm_object_t));
+void vm_object_deallocate __P((vm_object_t));
+void vm_object_enter __P((vm_object_t, vm_pager_t));
+void vm_object_init __P((vm_size_t));
+vm_object_t vm_object_lookup __P((vm_pager_t));
+boolean_t vm_object_page_clean __P((vm_object_t,
+ vm_offset_t, vm_offset_t, boolean_t, boolean_t));
+void vm_object_page_remove __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_pmap_copy __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_pmap_remove __P((vm_object_t,
+ vm_offset_t, vm_offset_t));
+void vm_object_print __P((vm_object_t, boolean_t));
+void vm_object_reference __P((vm_object_t));
+void vm_object_remove __P((vm_pager_t));
+void vm_object_setpager __P((vm_object_t,
+ vm_pager_t, vm_offset_t, boolean_t));
+void vm_object_shadow __P((vm_object_t *,
+ vm_offset_t *, vm_size_t));
+void vm_object_terminate __P((vm_object_t));
+#endif
+#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
new file mode 100644
index 0000000..4304100
--- /dev/null
+++ b/sys/vm/vm_page.c
@@ -0,0 +1,879 @@
+/*
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
+ * $Id: vm_page.c,v 1.2 1994/05/25 09:20:05 rgrimes Exp $
+ */
+
+/*
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Resident memory management module.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+
+/*
+ * Associated with page of user-allocatable memory is a
+ * page structure.
+ */
+
+struct pglist *vm_page_buckets; /* Array of buckets */
+int vm_page_bucket_count = 0; /* How big is array? */
+int vm_page_hash_mask; /* Mask for hash function */
+simple_lock_data_t bucket_lock; /* lock for all buckets XXX */
+
+struct pglist vm_page_queue_free;
+struct pglist vm_page_queue_active;
+struct pglist vm_page_queue_inactive;
+simple_lock_data_t vm_page_queue_lock;
+simple_lock_data_t vm_page_queue_free_lock;
+
+/* has physical page allocation been initialized? */
+boolean_t vm_page_startup_initialized;
+
+vm_page_t vm_page_array;
+long first_page;
+long last_page;
+vm_offset_t first_phys_addr;
+vm_offset_t last_phys_addr;
+vm_size_t page_mask;
+int page_shift;
+
+/*
+ * vm_set_page_size:
+ *
+ * Sets the page size, perhaps based upon the memory
+ * size. Must be called before any use of page-size
+ * dependent functions.
+ *
+ * Sets page_shift and page_mask from cnt.v_page_size.
+ */
+void vm_set_page_size()
+{
+
+ if (cnt.v_page_size == 0)
+ cnt.v_page_size = DEFAULT_PAGE_SIZE;
+ page_mask = cnt.v_page_size - 1;
+ if ((page_mask & cnt.v_page_size) != 0)
+ panic("vm_set_page_size: page size not a power of two");
+ for (page_shift = 0; ; page_shift++)
+ if ((1 << page_shift) == cnt.v_page_size)
+ break;
+}
+
+/*
+ * vm_page_startup:
+ *
+ * Initializes the resident memory module.
+ *
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ */
+
+vm_offset_t
+vm_page_startup(starta, enda, vaddr)
+ register vm_offset_t starta;
+ vm_offset_t enda;
+ register vm_offset_t vaddr;
+{
+ register vm_offset_t mapped;
+ register vm_page_t m;
+ register struct pglist *bucket;
+ vm_size_t npages, page_range;
+ register vm_offset_t new_start;
+ int i;
+ vm_offset_t pa;
+ int nblocks;
+ vm_offset_t first_managed_page;
+ int size;
+
+ extern vm_offset_t kentry_data;
+ extern vm_size_t kentry_data_size;
+ extern vm_offset_t phys_avail[];
+/* the biggest memory array is the second group of pages */
+ vm_offset_t start;
+ vm_offset_t biggestone, biggestsize;
+
+ vm_offset_t total;
+
+ total = 0;
+ biggestsize = 0;
+ biggestone = 0;
+ nblocks = 0;
+ vaddr = round_page(vaddr);
+
+ for (i = 0; phys_avail[i + 1]; i += 2) {
+ phys_avail[i] = round_page(phys_avail[i]);
+ phys_avail[i+1] = trunc_page(phys_avail[i+1]);
+ }
+
+ for (i = 0; phys_avail[i + 1]; i += 2) {
+ int size = phys_avail[i+1] - phys_avail[i];
+ if (size > biggestsize) {
+ biggestone = i;
+ biggestsize = size;
+ }
+ ++nblocks;
+ total += size;
+ }
+
+ start = phys_avail[biggestone];
+
+
+ /*
+ * Initialize the locks
+ */
+
+ simple_lock_init(&vm_page_queue_free_lock);
+ simple_lock_init(&vm_page_queue_lock);
+
+ /*
+ * Initialize the queue headers for the free queue,
+ * the active queue and the inactive queue.
+ */
+
+ TAILQ_INIT(&vm_page_queue_free);
+ TAILQ_INIT(&vm_page_queue_active);
+ TAILQ_INIT(&vm_page_queue_inactive);
+
+ /*
+ * Allocate (and initialize) the hash table buckets.
+ *
+ * The number of buckets MUST BE a power of 2, and
+ * the actual value is the next power of 2 greater
+ * than the number of physical pages in the system.
+ *
+ * Note:
+ * This computation can be tweaked if desired.
+ */
+ vm_page_buckets = (struct pglist *)vaddr;
+ bucket = vm_page_buckets;
+ if (vm_page_bucket_count == 0) {
+ vm_page_bucket_count = 1;
+ while (vm_page_bucket_count < atop(total))
+ vm_page_bucket_count <<= 1;
+ }
+
+
+ vm_page_hash_mask = vm_page_bucket_count - 1;
+
+ /*
+ * Validate these addresses.
+ */
+
+ new_start = start + vm_page_bucket_count * sizeof(struct pglist);
+ new_start = round_page(new_start);
+ mapped = vaddr;
+ vaddr = pmap_map(mapped, start, new_start,
+ VM_PROT_READ|VM_PROT_WRITE);
+ start = new_start;
+ bzero((caddr_t) mapped, vaddr - mapped);
+ mapped = vaddr;
+
+ for (i = 0; i< vm_page_bucket_count; i++) {
+ TAILQ_INIT(bucket);
+ bucket++;
+ }
+
+ simple_lock_init(&bucket_lock);
+
+ /*
+ * round (or truncate) the addresses to our page size.
+ */
+
+ /*
+ * Pre-allocate maps and map entries that cannot be dynamically
+ * allocated via malloc(). The maps include the kernel_map and
+ * kmem_map which must be initialized before malloc() will
+ * work (obviously). Also could include pager maps which would
+ * be allocated before kmeminit.
+ *
+ * Allow some kernel map entries... this should be plenty
+ * since people shouldn't be cluttering up the kernel
+ * map (they should use their own maps).
+ */
+
+ kentry_data_size = MAX_KMAP * sizeof(struct vm_map) +
+ MAX_KMAPENT * sizeof(struct vm_map_entry);
+ kentry_data_size = round_page(kentry_data_size);
+ kentry_data = (vm_offset_t) vaddr;
+ vaddr += kentry_data_size;
+
+ /*
+ * Validate these zone addresses.
+ */
+
+ new_start = start + (vaddr - mapped);
+ pmap_map(mapped, start, new_start, VM_PROT_READ|VM_PROT_WRITE);
+ bzero((caddr_t) mapped, (vaddr - mapped));
+ start = round_page(new_start);
+
+ /*
+ * Compute the number of pages of memory that will be
+ * available for use (taking into account the overhead
+ * of a page structure per page).
+ */
+
+ npages = (total - (start - phys_avail[biggestone])) / (PAGE_SIZE + sizeof(struct vm_page));
+ first_page = phys_avail[0] / PAGE_SIZE;
+
+ page_range = (phys_avail[(nblocks-1)*2 + 1] - phys_avail[0]) / PAGE_SIZE;
+ /*
+ * Initialize the mem entry structures now, and
+ * put them in the free queue.
+ */
+
+ vm_page_array = (vm_page_t) vaddr;
+ mapped = vaddr;
+
+
+ /*
+ * Validate these addresses.
+ */
+
+ new_start = round_page(start + page_range * sizeof (struct vm_page));
+ mapped = pmap_map(mapped, start, new_start,
+ VM_PROT_READ|VM_PROT_WRITE);
+ start = new_start;
+
+ first_managed_page = start / PAGE_SIZE;
+
+ /*
+ * Clear all of the page structures
+ */
+ bzero((caddr_t)vm_page_array, page_range * sizeof(struct vm_page));
+
+ cnt.v_page_count = 0;
+ cnt.v_free_count= 0;
+ for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) {
+ if (i == biggestone)
+ pa = ptoa(first_managed_page);
+ else
+ pa = phys_avail[i];
+ while (pa < phys_avail[i + 1] && npages-- > 0) {
+ ++cnt.v_page_count;
+ ++cnt.v_free_count;
+ m = PHYS_TO_VM_PAGE(pa);
+ m->flags = 0;
+ m->object = 0;
+ m->phys_addr = pa;
+ m->hold_count = 0;
+ TAILQ_INSERT_TAIL(&vm_page_queue_free, m, pageq);
+ pa += PAGE_SIZE;
+ }
+ }
+
+ /*
+ * Initialize vm_pages_needed lock here - don't wait for pageout
+ * daemon XXX
+ */
+ simple_lock_init(&vm_pages_needed_lock);
+
+ return(mapped);
+}
+
+/*
+ * vm_page_hash:
+ *
+ * Distributes the object/offset key pair among hash buckets.
+ *
+ * NOTE: This macro depends on vm_page_bucket_count being a power of 2.
+ */
+inline const int
+vm_page_hash(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ return ((unsigned)object + offset/NBPG) & vm_page_hash_mask;
+}
+
+/*
+ * vm_page_insert: [ internal use only ]
+ *
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_insert(mem, object, offset)
+ register vm_page_t mem;
+ register vm_object_t object;
+ register vm_offset_t offset;
+{
+ register struct pglist *bucket;
+ int s;
+
+ VM_PAGE_CHECK(mem);
+
+ if (mem->flags & PG_TABLED)
+ panic("vm_page_insert: already inserted");
+
+ /*
+ * Record the object/offset pair in this page
+ */
+
+ mem->object = object;
+ mem->offset = offset;
+
+ /*
+ * Insert it into the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+ s = splimp();
+ simple_lock(&bucket_lock);
+ TAILQ_INSERT_TAIL(bucket, mem, hashq);
+ simple_unlock(&bucket_lock);
+ (void) splx(s);
+
+ /*
+ * Now link into the object's list of backed pages.
+ */
+
+ TAILQ_INSERT_TAIL(&object->memq, mem, listq);
+ mem->flags |= PG_TABLED;
+
+ /*
+ * And show that the object has one more resident
+ * page.
+ */
+
+ object->resident_page_count++;
+}
+
+/*
+ * vm_page_remove: [ internal use only ]
+ * NOTE: used by device pager as well -wfj
+ *
+ * Removes the given mem entry from the object/offset-page
+ * table and the object page list.
+ *
+ * The object and page must be locked.
+ */
+
+void vm_page_remove(mem)
+ register vm_page_t mem;
+{
+ register struct pglist *bucket;
+ int s;
+
+ VM_PAGE_CHECK(mem);
+
+ if (!(mem->flags & PG_TABLED))
+ return;
+
+ /*
+ * Remove from the object_object/offset hash table
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
+ s = splimp();
+ simple_lock(&bucket_lock);
+ TAILQ_REMOVE(bucket, mem, hashq);
+ simple_unlock(&bucket_lock);
+ (void) splx(s);
+
+ /*
+ * Now remove from the object's list of backed pages.
+ */
+
+ TAILQ_REMOVE(&mem->object->memq, mem, listq);
+
+ /*
+ * And show that the object has one fewer resident
+ * page.
+ */
+
+ mem->object->resident_page_count--;
+
+ mem->flags &= ~PG_TABLED;
+}
+
+/*
+ * vm_page_lookup:
+ *
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, NULL is returned.
+ *
+ * The object must be locked. No side effects.
+ */
+
+vm_page_t vm_page_lookup(object, offset)
+ register vm_object_t object;
+ register vm_offset_t offset;
+{
+ register vm_page_t mem;
+ register struct pglist *bucket;
+ int s;
+
+ /*
+ * Search the hash table for this object/offset pair
+ */
+
+ bucket = &vm_page_buckets[vm_page_hash(object, offset)];
+
+ s = splimp();
+ simple_lock(&bucket_lock);
+ for (mem = bucket->tqh_first; mem != NULL; mem = mem->hashq.tqe_next) {
+ VM_PAGE_CHECK(mem);
+ if ((mem->object == object) && (mem->offset == offset)) {
+ simple_unlock(&bucket_lock);
+ splx(s);
+ return(mem);
+ }
+ }
+
+ simple_unlock(&bucket_lock);
+ splx(s);
+ return(NULL);
+}
+
+/*
+ * vm_page_rename:
+ *
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
+ *
+ * The object must be locked.
+ */
+void vm_page_rename(mem, new_object, new_offset)
+ register vm_page_t mem;
+ register vm_object_t new_object;
+ vm_offset_t new_offset;
+{
+ if (mem->object == new_object)
+ return;
+
+ vm_page_lock_queues(); /* keep page from moving out from
+ under pageout daemon */
+ vm_page_remove(mem);
+ vm_page_insert(mem, new_object, new_offset);
+ vm_page_unlock_queues();
+}
+
+/*
+ * vm_page_alloc:
+ *
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
+ *
+ * Object must be locked.
+ */
+vm_page_t
+vm_page_alloc(object, offset)
+ vm_object_t object;
+ vm_offset_t offset;
+{
+ register vm_page_t mem;
+ int s;
+
+ s = splimp();
+ simple_lock(&vm_page_queue_free_lock);
+ if ( object != kernel_object &&
+ object != kmem_object &&
+ curproc != pageproc && curproc != &proc0 &&
+ cnt.v_free_count < cnt.v_free_reserved) {
+
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(s);
+ /*
+ * this wakeup seems unnecessary, but there is code that
+ * might just check to see if there are free pages, and
+ * punt if there aren't. VM_WAIT does this too, but
+ * redundant wakeups aren't that bad...
+ */
+ if (curproc != pageproc)
+ wakeup((caddr_t) &vm_pages_needed);
+ return(NULL);
+ }
+ if (( mem = vm_page_queue_free.tqh_first) == 0) {
+ simple_unlock(&vm_page_queue_free_lock);
+ printf("No pages???\n");
+ splx(s);
+ /*
+ * comment above re: wakeups applies here too...
+ */
+ if (curproc != pageproc)
+ wakeup((caddr_t) &vm_pages_needed);
+ return(NULL);
+ }
+
+ TAILQ_REMOVE(&vm_page_queue_free, mem, pageq);
+
+ cnt.v_free_count--;
+ simple_unlock(&vm_page_queue_free_lock);
+
+ VM_PAGE_INIT(mem, object, offset);
+ splx(s);
+
+/*
+ * don't wakeup too often, so we wakeup the pageout daemon when
+ * we would be nearly out of memory.
+ */
+ if (curproc != pageproc &&
+ (cnt.v_free_count < cnt.v_free_reserved))
+ wakeup((caddr_t) &vm_pages_needed);
+
+ return(mem);
+}
+
+/*
+ * vm_page_free:
+ *
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
+ *
+ * Object and page must be locked prior to entry.
+ */
+void vm_page_free(mem)
+ register vm_page_t mem;
+{
+ int s;
+ s = splimp();
+ vm_page_remove(mem);
+ if (mem->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+ mem->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+
+ if (mem->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+ mem->flags &= ~PG_INACTIVE;
+ cnt.v_inactive_count--;
+ }
+
+ if (!(mem->flags & PG_FICTITIOUS)) {
+
+ simple_lock(&vm_page_queue_free_lock);
+ if (mem->wire_count) {
+ cnt.v_wire_count--;
+ mem->wire_count = 0;
+ }
+ TAILQ_INSERT_TAIL(&vm_page_queue_free, mem, pageq);
+
+ cnt.v_free_count++;
+ simple_unlock(&vm_page_queue_free_lock);
+ splx(s);
+ /*
+ * if pageout daemon needs pages, then tell it that there
+ * are some free.
+ */
+ if (vm_pageout_pages_needed)
+ wakeup((caddr_t)&vm_pageout_pages_needed);
+
+ /*
+ * wakeup processes that are waiting on memory if we
+ * hit a high water mark.
+ */
+ if (cnt.v_free_count == cnt.v_free_min) {
+ wakeup((caddr_t)&cnt.v_free_count);
+ }
+
+ /*
+ * wakeup scheduler process if we have lots of memory.
+ * this process will swapin processes.
+ */
+ if (cnt.v_free_count == cnt.v_free_target) {
+ wakeup((caddr_t)&proc0);
+ }
+ } else {
+ splx(s);
+ }
+ wakeup((caddr_t) mem);
+}
+
+
+/*
+ * vm_page_wire:
+ *
+ * Mark this page as wired down by yet
+ * another map, removing it from paging queues
+ * as necessary.
+ *
+ * The page queues must be locked.
+ */
+void vm_page_wire(mem)
+ register vm_page_t mem;
+{
+ int s;
+ VM_PAGE_CHECK(mem);
+
+ if (mem->wire_count == 0) {
+ s = splimp();
+ if (mem->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, mem, pageq);
+ cnt.v_active_count--;
+ mem->flags &= ~PG_ACTIVE;
+ }
+ if (mem->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, mem, pageq);
+ cnt.v_inactive_count--;
+ mem->flags &= ~PG_INACTIVE;
+ }
+ splx(s);
+ cnt.v_wire_count++;
+ }
+ mem->wire_count++;
+}
+
+/*
+ * vm_page_unwire:
+ *
+ * Release one wiring of this page, potentially
+ * enabling it to be paged again.
+ *
+ * The page queues must be locked.
+ */
+void vm_page_unwire(mem)
+ register vm_page_t mem;
+{
+ int s;
+ VM_PAGE_CHECK(mem);
+
+ s = splimp();
+
+ if( mem->wire_count)
+ mem->wire_count--;
+ if (mem->wire_count == 0) {
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, mem, pageq);
+ cnt.v_active_count++;
+ mem->flags |= PG_ACTIVE;
+ cnt.v_wire_count--;
+ }
+ splx(s);
+}
+
+#if 0
+/*
+ * vm_page_deactivate:
+ *
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page. [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void
+vm_page_deactivate(m)
+ register vm_page_t m;
+{
+ int spl;
+ VM_PAGE_CHECK(m);
+
+ /*
+ * Only move active pages -- ignore locked or already
+ * inactive ones.
+ *
+ * XXX: sometimes we get pages which aren't wired down
+ * or on any queue - we need to put them on the inactive
+ * queue also, otherwise we lose track of them.
+ * Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93.
+ */
+
+ spl = splimp();
+ if (!(m->flags & PG_INACTIVE) && m->wire_count == 0 &&
+ m->hold_count == 0) {
+
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ if (m->flags & PG_ACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ cnt.v_active_count--;
+ }
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ m->flags |= PG_INACTIVE;
+ cnt.v_inactive_count++;
+#define NOT_DEACTIVATE_PROTECTS
+#ifndef NOT_DEACTIVATE_PROTECTS
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+#else
+ if ((m->flags & PG_CLEAN) &&
+ pmap_is_modified(VM_PAGE_TO_PHYS(m)))
+ m->flags &= ~PG_CLEAN;
+#endif
+ if ((m->flags & PG_CLEAN) == 0)
+ m->flags |= PG_LAUNDRY;
+ }
+ splx(spl);
+}
+#endif
+#if 1
+/*
+ * vm_page_deactivate:
+ *
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page. [Used by the physical mapping system.]
+ *
+ * The page queues must be locked.
+ */
+void vm_page_deactivate(m)
+ register vm_page_t m;
+{
+ int s;
+ VM_PAGE_CHECK(m);
+
+ s = splimp();
+ /*
+ * Only move active pages -- ignore locked or already
+ * inactive ones.
+ */
+
+ if ((m->flags & PG_ACTIVE) && (m->hold_count == 0)) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ m->flags &= ~PG_ACTIVE;
+ m->flags |= PG_INACTIVE;
+ cnt.v_active_count--;
+ cnt.v_inactive_count++;
+#define NOT_DEACTIVATE_PROTECTS
+#ifndef NOT_DEACTIVATE_PROTECTS
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_NONE);
+#else
+ if (pmap_is_modified(VM_PAGE_TO_PHYS(m)))
+ m->flags &= ~PG_CLEAN;
+#endif
+ if (m->flags & PG_CLEAN)
+ m->flags &= ~PG_LAUNDRY;
+ else
+ m->flags |= PG_LAUNDRY;
+ }
+ splx(s);
+}
+#endif
+/*
+ * vm_page_activate:
+ *
+ * Put the specified page on the active list (if appropriate).
+ *
+ * The page queues must be locked.
+ */
+
+void vm_page_activate(m)
+ register vm_page_t m;
+{
+ int s;
+ VM_PAGE_CHECK(m);
+
+ s = splimp();
+ if (m->flags & PG_INACTIVE) {
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ cnt.v_inactive_count--;
+ m->flags &= ~PG_INACTIVE;
+ }
+ if (m->wire_count == 0) {
+ if (m->flags & PG_ACTIVE)
+ panic("vm_page_activate: already active");
+
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ m->flags |= PG_ACTIVE;
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ m->act_count = 1;
+ cnt.v_active_count++;
+ }
+ splx(s);
+}
+
+/*
+ * vm_page_zero_fill:
+ *
+ * Zero-fill the specified page.
+ * Written as a standard pagein routine, to
+ * be used by the zero-fill object.
+ */
+
+boolean_t
+vm_page_zero_fill(m)
+ vm_page_t m;
+{
+ VM_PAGE_CHECK(m);
+
+ pmap_zero_page(VM_PAGE_TO_PHYS(m));
+ return(TRUE);
+}
+
+/*
+ * vm_page_copy:
+ *
+ * Copy one page to another
+ */
+void
+vm_page_copy(src_m, dest_m)
+ vm_page_t src_m;
+ vm_page_t dest_m;
+{
+ VM_PAGE_CHECK(src_m);
+ VM_PAGE_CHECK(dest_m);
+
+ pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
+}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
new file mode 100644
index 0000000..e8049c4
--- /dev/null
+++ b/sys/vm/vm_page.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.h 8.2 (Berkeley) 12/13/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Resident memory system definitions.
+ */
+
+#ifndef _VM_PAGE_
+#define _VM_PAGE_
+
+/*
+ * Management of resident (logical) pages.
+ *
+ * A small structure is kept for each resident
+ * page, indexed by page number. Each structure
+ * is an element of several lists:
+ *
+ * A hash table bucket used to quickly
+ * perform object/offset lookups
+ *
+ * A list of all pages for a given object,
+ * so they can be quickly deactivated at
+ * time of deallocation.
+ *
+ * An ordered list of pages due for pageout.
+ *
+ * In addition, the structure contains the object
+ * and offset to which this page belongs (for pageout),
+ * and sundry status bits.
+ *
+ * Fields in this structure are locked either by the lock on the
+ * object that the page belongs to (O) or by the lock on the page
+ * queues (P).
+ */
+
+TAILQ_HEAD(pglist, vm_page);
+
+struct vm_page {
+ TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO
+ * queue or free list (P) */
+ TAILQ_ENTRY(vm_page) hashq; /* hash table links (O)*/
+ TAILQ_ENTRY(vm_page) listq; /* pages in same object (O)*/
+
+ vm_object_t object; /* which object am I in (O,P)*/
+ vm_offset_t offset; /* offset into object (O,P) */
+
+ u_short wire_count; /* wired down maps refs (P) */
+ u_short flags; /* see below */
+ short hold_count; /* page hold count */
+ u_short act_count; /* page usage count */
+
+ vm_offset_t phys_addr; /* physical address of page */
+};
+
+/*
+ * These are the flags defined for vm_page.
+ *
+ * Note: PG_FILLED and PG_DIRTY are added for the filesystems.
+ */
+#define PG_INACTIVE 0x0001 /* page is in inactive list (P) */
+#define PG_ACTIVE 0x0002 /* page is in active list (P) */
+#define PG_LAUNDRY 0x0004 /* page is being cleaned now (P)*/
+#define PG_CLEAN 0x0008 /* page has not been modified */
+#define PG_BUSY 0x0010 /* page is in transit (O) */
+#define PG_WANTED 0x0020 /* someone is waiting for page (O) */
+#define PG_TABLED 0x0040 /* page is in VP table (O) */
+#define PG_COPYONWRITE 0x0080 /* must copy page before changing (O) */
+#define PG_FICTITIOUS 0x0100 /* physical page doesn't exist (O) */
+#define PG_FAKE 0x0200 /* page is placeholder for pagein (O) */
+#define PG_FILLED 0x0400 /* client flag to set when filled */
+#define PG_DIRTY 0x0800 /* client flag to set when dirty */
+#define PG_PAGEROWNED 0x4000 /* DEBUG: async paging op in progress */
+#define PG_PTPAGE 0x8000 /* DEBUG: is a user page table page */
+
+#if VM_PAGE_DEBUG
+#define VM_PAGE_CHECK(mem) { \
+ if ((((unsigned int) mem) < ((unsigned int) &vm_page_array[0])) || \
+ (((unsigned int) mem) > \
+ ((unsigned int) &vm_page_array[last_page-first_page])) || \
+ ((mem->flags & (PG_ACTIVE | PG_INACTIVE)) == \
+ (PG_ACTIVE | PG_INACTIVE))) \
+ panic("vm_page_check: not valid!"); \
+}
+#else /* VM_PAGE_DEBUG */
+#define VM_PAGE_CHECK(mem)
+#endif /* VM_PAGE_DEBUG */
+
+#ifdef KERNEL
+/*
+ * Each pageable resident page falls into one of three lists:
+ *
+ * free
+ * Available for allocation now.
+ * inactive
+ * Not referenced in any map, but still has an
+ * object/offset-page mapping, and may be dirty.
+ * This is the list of pages that should be
+ * paged out next.
+ * active
+ * A list of pages which have been placed in
+ * at least one physical map. This list is
+ * ordered, in LRU-like fashion.
+ */
+
+extern
+struct pglist vm_page_queue_free; /* memory free queue */
+extern
+struct pglist vm_page_queue_active; /* active memory queue */
+extern
+struct pglist vm_page_queue_inactive; /* inactive memory queue */
+
+extern
+vm_page_t vm_page_array; /* First resident page in table */
+extern
+long first_page; /* first physical page number */
+ /* ... represented in vm_page_array */
+extern
+long last_page; /* last physical page number */
+ /* ... represented in vm_page_array */
+ /* [INCLUSIVE] */
+extern
+vm_offset_t first_phys_addr; /* physical address for first_page */
+extern
+vm_offset_t last_phys_addr; /* physical address for last_page */
+
+#define VM_PAGE_TO_PHYS(entry) ((entry)->phys_addr)
+
+#define IS_VM_PHYSADDR(pa) \
+ ((pa) >= first_phys_addr && (pa) <= last_phys_addr)
+
+#define PHYS_TO_VM_PAGE(pa) \
+ (&vm_page_array[atop(pa) - first_page ])
+
+extern
+simple_lock_data_t vm_page_queue_lock; /* lock on active and inactive
+ page queues */
+extern /* lock on free page queue */
+simple_lock_data_t vm_page_queue_free_lock;
+
+/*
+ * Functions implemented as macros
+ */
+
+#define PAGE_ASSERT_WAIT(m, interruptible) { \
+ (m)->flags |= PG_WANTED; \
+ assert_wait((int) (m), (interruptible)); \
+ }
+
+#define PAGE_WAKEUP(m) { \
+ (m)->flags &= ~PG_BUSY; \
+ if ((m)->flags & PG_WANTED) { \
+ (m)->flags &= ~PG_WANTED; \
+ wakeup((caddr_t) (m)); \
+ } \
+ }
+
+#define vm_page_lock_queues() simple_lock(&vm_page_queue_lock)
+#define vm_page_unlock_queues() simple_unlock(&vm_page_queue_lock)
+
+#define vm_page_set_modified(m) { (m)->flags &= ~PG_CLEAN; }
+
+#define VM_PAGE_INIT(mem, object, offset) { \
+ (mem)->flags = PG_BUSY | PG_CLEAN | PG_FAKE; \
+ vm_page_insert((mem), (object), (offset)); \
+ (mem)->wire_count = 0; \
+ (mem)->hold_count = 0; \
+ (mem)->act_count = 0; \
+}
+
+void vm_page_activate __P((vm_page_t));
+vm_page_t vm_page_alloc __P((vm_object_t, vm_offset_t));
+void vm_page_copy __P((vm_page_t, vm_page_t));
+void vm_page_deactivate __P((vm_page_t));
+void vm_page_free __P((vm_page_t));
+void vm_page_insert __P((vm_page_t, vm_object_t, vm_offset_t));
+vm_page_t vm_page_lookup __P((vm_object_t, vm_offset_t));
+void vm_page_remove __P((vm_page_t));
+void vm_page_rename __P((vm_page_t, vm_object_t, vm_offset_t));
+vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
+void vm_page_unwire __P((vm_page_t));
+void vm_page_wire __P((vm_page_t));
+boolean_t vm_page_zero_fill __P((vm_page_t));
+
+
+/*
+ * Keep page from being freed by the page daemon
+ * much of the same effect as wiring, except much lower
+ * overhead and should be used only for *very* temporary
+ * holding ("wiring").
+ */
+static inline void
+vm_page_hold(mem)
+ vm_page_t mem;
+{
+ mem->hold_count++;
+}
+
+static inline void
+vm_page_unhold(mem)
+ vm_page_t mem;
+{
+ if( --mem->hold_count < 0)
+ panic("vm_page_unhold: hold count < 0!!!");
+}
+
+#endif /* KERNEL */
+#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
new file mode 100644
index 0000000..cabb102
--- /dev/null
+++ b/sys/vm/vm_pageout.c
@@ -0,0 +1,790 @@
+/*
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.c 7.4 (Berkeley) 5/7/91
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ *
+ * $Id: vm_pageout.c,v 1.3 1994/06/06 11:56:27 davidg Exp $
+ */
+
+/*
+ * The proverbial page-out daemon.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+
+extern vm_map_t kmem_map;
+int vm_pages_needed; /* Event on which pageout daemon sleeps */
+int vm_pagescanner; /* Event on which pagescanner sleeps */
+int vm_pageout_free_min = 0; /* Stop pageout to wait for pagers at this free level */
+
+int vm_pageout_pages_needed = 0; /* flag saying that the pageout daemon needs pages */
+int vm_page_pagesfreed;
+
+extern int npendingio;
+extern int hz;
+int vm_pageout_proc_limit;
+extern int nswiodone;
+extern int swap_pager_full;
+extern int swap_pager_ready();
+
+#define MAXREF 32767
+
+#define MAXSCAN 512 /* maximum number of pages to scan in active queue */
+ /* set the "clock" hands to be (MAXSCAN * 4096) Bytes */
+#define ACT_DECLINE 1
+#define ACT_ADVANCE 3
+#define ACT_MAX 300
+
+#define LOWATER ((2048*1024)/NBPG)
+
+#define VM_PAGEOUT_PAGE_COUNT 8
+static vm_offset_t vm_space_needed;
+int vm_pageout_req_do_stats;
+
+int vm_page_max_wired = 0; /* XXX max # of wired pages system-wide */
+
+
+/*
+ * vm_pageout_clean:
+ * cleans a vm_page
+ */
+int
+vm_pageout_clean(m, sync)
+ register vm_page_t m;
+ int sync;
+{
+ /*
+ * Clean the page and remove it from the
+ * laundry.
+ *
+ * We set the busy bit to cause
+ * potential page faults on this page to
+ * block.
+ *
+ * And we set pageout-in-progress to keep
+ * the object from disappearing during
+ * pageout. This guarantees that the
+ * page won't move from the inactive
+ * queue. (However, any other page on
+ * the inactive queue may move!)
+ */
+
+ register vm_object_t object;
+ register vm_pager_t pager;
+ int pageout_status[VM_PAGEOUT_PAGE_COUNT];
+ vm_page_t ms[VM_PAGEOUT_PAGE_COUNT];
+ int pageout_count;
+ int anyok=0;
+ int i;
+ vm_offset_t offset = m->offset;
+
+ object = m->object;
+ if (!object) {
+ printf("pager: object missing\n");
+ return 0;
+ }
+
+ /*
+ * Try to collapse the object before
+ * making a pager for it. We must
+ * unlock the page queues first.
+ * We try to defer the creation of a pager
+ * until all shadows are not paging. This
+ * allows vm_object_collapse to work better and
+ * helps control swap space size.
+ * (J. Dyson 11 Nov 93)
+ */
+
+ if (!object->pager &&
+ cnt.v_free_count < vm_pageout_free_min)
+ return 0;
+
+ if (!object->pager &&
+ object->shadow &&
+ object->shadow->paging_in_progress)
+ return 0;
+
+ if( !sync) {
+ if (object->shadow) {
+ vm_object_collapse(object);
+ if (!vm_page_lookup(object, offset))
+ return 0;
+ }
+
+ if ((m->flags & PG_BUSY) || (m->hold_count != 0)) {
+ return 0;
+ }
+ }
+
+ pageout_count = 1;
+ ms[0] = m;
+
+ if( pager = object->pager) {
+ for(i=1;i<VM_PAGEOUT_PAGE_COUNT;i++) {
+ if( ms[i] = vm_page_lookup( object, offset+i*NBPG)) {
+ if((((ms[i]->flags & (PG_CLEAN|PG_INACTIVE|PG_BUSY)) == PG_INACTIVE)
+ || (( ms[i]->flags & PG_CLEAN) == 0 && sync == VM_PAGEOUT_FORCE))
+ && (ms[i]->wire_count == 0)
+ && (ms[i]->hold_count == 0))
+ pageout_count++;
+ else
+ break;
+ } else
+ break;
+ }
+ for(i=0;i<pageout_count;i++) {
+ ms[i]->flags |= PG_BUSY;
+ pmap_page_protect(VM_PAGE_TO_PHYS(ms[i]), VM_PROT_READ);
+ }
+ object->paging_in_progress += pageout_count;
+ cnt.v_pageouts += pageout_count;
+ } else {
+
+ m->flags |= PG_BUSY;
+
+ pmap_page_protect(VM_PAGE_TO_PHYS(m), VM_PROT_READ);
+
+ cnt.v_pageouts++;
+
+ object->paging_in_progress++;
+
+ pager = vm_pager_allocate(PG_DFLT, (caddr_t)0,
+ object->size, VM_PROT_ALL, 0);
+ if (pager != NULL) {
+ vm_object_setpager(object, pager, 0, FALSE);
+ }
+ }
+
+ /*
+ * If there is no pager for the page,
+ * use the default pager. If there's
+ * no place to put the page at the
+ * moment, leave it in the laundry and
+ * hope that there will be paging space
+ * later.
+ */
+
+ if ((pager && pager->pg_type == PG_SWAP) ||
+ cnt.v_free_count >= vm_pageout_free_min) {
+ if( pageout_count == 1) {
+ pageout_status[0] = pager ?
+ vm_pager_put(pager, m,
+ ((sync || (object == kernel_object)) ? TRUE: FALSE)) :
+ VM_PAGER_FAIL;
+ } else {
+ if( !pager) {
+ for(i=0;i<pageout_count;i++)
+ pageout_status[i] = VM_PAGER_FAIL;
+ } else {
+ vm_pager_put_pages(pager, ms, pageout_count,
+ ((sync || (object == kernel_object)) ? TRUE : FALSE),
+ pageout_status);
+ }
+ }
+
+ } else {
+ for(i=0;i<pageout_count;i++)
+ pageout_status[i] = VM_PAGER_FAIL;
+ }
+
+ for(i=0;i<pageout_count;i++) {
+ switch (pageout_status[i]) {
+ case VM_PAGER_OK:
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ++anyok;
+ break;
+ case VM_PAGER_PEND:
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ++anyok;
+ break;
+ case VM_PAGER_BAD:
+ /*
+ * Page outside of range of object.
+ * Right now we essentially lose the
+ * changes by pretending it worked.
+ */
+ ms[i]->flags &= ~PG_LAUNDRY;
+ ms[i]->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(ms[i]));
+ break;
+ case VM_PAGER_ERROR:
+ case VM_PAGER_FAIL:
+ /*
+ * If page couldn't be paged out, then
+ * reactivate the page so it doesn't
+ * clog the inactive list. (We will
+ * try paging out it again later).
+ */
+ if (ms[i]->flags & PG_INACTIVE)
+ vm_page_activate(ms[i]);
+ break;
+ case VM_PAGER_AGAIN:
+ break;
+ }
+
+
+ /*
+ * If the operation is still going, leave
+ * the page busy to block all other accesses.
+ * Also, leave the paging in progress
+ * indicator set so that we don't attempt an
+ * object collapse.
+ */
+ if (pageout_status[i] != VM_PAGER_PEND) {
+ PAGE_WAKEUP(ms[i]);
+ if (--object->paging_in_progress == 0)
+ wakeup((caddr_t) object);
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(ms[i]))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(ms[i]));
+ if( ms[i]->flags & PG_INACTIVE)
+ vm_page_activate(ms[i]);
+ }
+ }
+ }
+ return anyok;
+}
+
+/*
+ * vm_pageout_object_deactivate_pages
+ *
+ * deactivate enough pages to satisfy the inactive target
+ * requirements or if vm_page_proc_limit is set, then
+ * deactivate all of the pages in the object and its
+ * shadows.
+ *
+ * The object and map must be locked.
+ */
+int
+vm_pageout_object_deactivate_pages(map, object, count)
+ vm_map_t map;
+ vm_object_t object;
+ int count;
+{
+ register vm_page_t p, next;
+ int rcount;
+ int s;
+ int dcount;
+
+ dcount = 0;
+ if (count == 0)
+ count = 1;
+
+ if (object->shadow) {
+ int scount = count;
+ if( object->shadow->ref_count > 1)
+ scount /= object->shadow->ref_count;
+ if( scount)
+ dcount += vm_pageout_object_deactivate_pages(map, object->shadow, scount);
+ }
+
+ if (object->paging_in_progress)
+ return dcount;
+
+ /*
+ * scan the objects entire memory queue
+ */
+ rcount = object->resident_page_count;
+ p = object->memq.tqh_first;
+ while (p && (rcount-- > 0)) {
+ next = p->listq.tqe_next;
+ vm_page_lock_queues();
+ /*
+ * if a page is active, not wired and is in the processes pmap,
+ * then deactivate the page.
+ */
+ if ((p->flags & (PG_ACTIVE|PG_BUSY)) == PG_ACTIVE &&
+ p->wire_count == 0 &&
+ p->hold_count == 0 &&
+ pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
+ if (!pmap_is_referenced(VM_PAGE_TO_PHYS(p))) {
+ p->act_count -= min(p->act_count, ACT_DECLINE);
+ /*
+ * if the page act_count is zero -- then we deactivate
+ */
+ if (!p->act_count) {
+ vm_page_deactivate(p);
+ pmap_page_protect(VM_PAGE_TO_PHYS(p),
+ VM_PROT_NONE);
+ /*
+ * else if on the next go-around we will deactivate the page
+ * we need to place the page on the end of the queue to age
+ * the other pages in memory.
+ */
+ } else {
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
+ TAILQ_REMOVE(&object->memq, p, listq);
+ TAILQ_INSERT_TAIL(&object->memq, p, listq);
+ }
+ /*
+ * see if we are done yet
+ */
+ if (p->flags & PG_INACTIVE) {
+ --count;
+ ++dcount;
+ if (count <= 0 &&
+ cnt.v_inactive_count > cnt.v_inactive_target) {
+ vm_page_unlock_queues();
+ return dcount;
+ }
+ }
+
+ } else {
+ /*
+ * Move the page to the bottom of the queue.
+ */
+ pmap_clear_reference(VM_PAGE_TO_PHYS(p));
+ if (p->act_count < ACT_MAX)
+ p->act_count += ACT_ADVANCE;
+
+ TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
+ TAILQ_REMOVE(&object->memq, p, listq);
+ TAILQ_INSERT_TAIL(&object->memq, p, listq);
+ }
+ }
+
+ vm_page_unlock_queues();
+ p = next;
+ }
+ return dcount;
+}
+
+
+/*
+ * deactivate some number of pages in a map, try to do it fairly, but
+ * that is really hard to do.
+ */
+
+void
+vm_pageout_map_deactivate_pages(map, entry, count, freeer)
+ vm_map_t map;
+ vm_map_entry_t entry;
+ int *count;
+ int (*freeer)(vm_map_t, vm_object_t, int);
+{
+ vm_map_t tmpm;
+ vm_map_entry_t tmpe;
+ vm_object_t obj;
+ if (*count <= 0)
+ return;
+ vm_map_reference(map);
+ if (!lock_try_read(&map->lock)) {
+ vm_map_deallocate(map);
+ return;
+ }
+ if (entry == 0) {
+ tmpe = map->header.next;
+ while (tmpe != &map->header && *count > 0) {
+ vm_pageout_map_deactivate_pages(map, tmpe, count, freeer);
+ tmpe = tmpe->next;
+ };
+ } else if (entry->is_sub_map || entry->is_a_map) {
+ tmpm = entry->object.share_map;
+ tmpe = tmpm->header.next;
+ while (tmpe != &tmpm->header && *count > 0) {
+ vm_pageout_map_deactivate_pages(tmpm, tmpe, count, freeer);
+ tmpe = tmpe->next;
+ };
+ } else if (obj = entry->object.vm_object) {
+ *count -= (*freeer)(map, obj, *count);
+ }
+ lock_read_done(&map->lock);
+ vm_map_deallocate(map);
+ return;
+}
+
+/*
+ * vm_pageout_scan does the dirty work for the pageout daemon.
+ */
+int
+vm_pageout_scan()
+{
+ vm_page_t m;
+ int page_shortage, maxscan, maxlaunder;
+ int pages_freed, free, nproc;
+ int desired_free;
+ vm_page_t next;
+ struct proc *p;
+ vm_object_t object;
+ int s;
+ int force_wakeup = 0;
+
+morefree:
+ /*
+ * scan the processes for exceeding their rlimits or if process
+ * is swapped out -- deactivate pages
+ */
+
+rescanproc1:
+ for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
+ vm_offset_t size;
+ int overage;
+ vm_offset_t limit;
+
+ /*
+ * if this is a system process or if we have already
+ * looked at this process, skip it.
+ */
+ if (p->p_flag & (P_SYSTEM|P_WEXIT)) {
+ continue;
+ }
+
+ /*
+ * if the process is in a non-running type state,
+ * don't touch it.
+ */
+ if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
+ continue;
+ }
+
+ /*
+ * get a limit
+ */
+ limit = min(p->p_rlimit[RLIMIT_RSS].rlim_cur,
+ p->p_rlimit[RLIMIT_RSS].rlim_max);
+
+ /*
+ * let processes that are swapped out really be swapped out
+ * set the limit to nothing (will force a swap-out.)
+ */
+ if ((p->p_flag & P_INMEM) == 0)
+ limit = 0;
+
+ size = p->p_vmspace->vm_pmap.pm_stats.resident_count * NBPG;
+ if (size >= limit) {
+ overage = (size - limit) / NBPG;
+ vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
+ (vm_map_entry_t) 0, &overage, vm_pageout_object_deactivate_pages);
+ }
+
+ }
+
+ if (((cnt.v_free_count + cnt.v_inactive_count) >=
+ (cnt.v_inactive_target + cnt.v_free_target)) &&
+ (cnt.v_free_count >= cnt.v_free_target))
+ return force_wakeup;
+
+ pages_freed = 0;
+ desired_free = cnt.v_free_target;
+
+ /*
+ * Start scanning the inactive queue for pages we can free.
+ * We keep scanning until we have enough free pages or
+ * we have scanned through the entire queue. If we
+ * encounter dirty pages, we start cleaning them.
+ */
+
+ maxlaunder = (cnt.v_free_target - cnt.v_free_count);
+ maxscan = cnt.v_inactive_count;
+rescan1:
+ m = vm_page_queue_inactive.tqh_first;
+ while (m && (maxscan-- > 0) &&
+ (cnt.v_free_count < desired_free) ) {
+ vm_page_t next;
+
+ next = m->pageq.tqe_next;
+
+ if( (m->flags & PG_INACTIVE) == 0) {
+ printf("vm_pageout_scan: page not inactive?");
+ continue;
+ }
+
+ /*
+ * activate held pages
+ */
+ if (m->hold_count != 0) {
+ vm_page_activate(m);
+ m = next;
+ continue;
+ }
+
+ /*
+ * dont mess with busy pages
+ */
+ if (m->flags & PG_BUSY) {
+ m = next;
+ continue;
+ }
+
+ /*
+ * if page is clean and but the page has been referenced,
+ * then reactivate the page, but if we are very low on memory
+ * or the page has not been referenced, then we free it to the
+ * vm system.
+ */
+ if (m->flags & PG_CLEAN) {
+ if ((cnt.v_free_count > vm_pageout_free_min) /* XXX */
+ && pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ vm_page_activate(m);
+ } else if (!m->act_count) {
+ pmap_page_protect(VM_PAGE_TO_PHYS(m),
+ VM_PROT_NONE);
+ vm_page_free(m);
+ ++pages_freed;
+ } else {
+ m->act_count -= min(m->act_count, ACT_DECLINE);
+ TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ }
+ } else if ((m->flags & PG_LAUNDRY) && maxlaunder > 0) {
+ int written;
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ vm_page_activate(m);
+ m = next;
+ continue;
+ }
+ /*
+ * If a page is dirty, then it is either
+ * being washed (but not yet cleaned)
+ * or it is still in the laundry. If it is
+ * still in the laundry, then we start the
+ * cleaning operation.
+ */
+
+ if (written = vm_pageout_clean(m,0)) {
+ maxlaunder -= written;
+ }
+ /*
+ * if the next page has been re-activated, start scanning again
+ */
+ if (next && (next->flags & PG_INACTIVE) == 0)
+ goto rescan1;
+ } else if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ vm_page_activate(m);
+ }
+ m = next;
+ }
+
+ /*
+ * now check malloc area or swap processes out if we are in low
+ * memory conditions
+ */
+ if (cnt.v_free_count <= cnt.v_free_min) {
+ /*
+ * swap out inactive processes
+ */
+ swapout_threads();
+ }
+
+ /*
+ * Compute the page shortage. If we are still very low on memory
+ * be sure that we will move a minimal amount of pages from active
+ * to inactive.
+ */
+
+ page_shortage = cnt.v_inactive_target -
+ (cnt.v_free_count + cnt.v_inactive_count);
+
+ if (page_shortage <= 0) {
+ if (pages_freed == 0) {
+ if( cnt.v_free_count < cnt.v_free_min) {
+ page_shortage = cnt.v_free_min - cnt.v_free_count;
+ } else if(((cnt.v_free_count + cnt.v_inactive_count) <
+ (cnt.v_free_min + cnt.v_inactive_target))) {
+ page_shortage = 1;
+ } else {
+ page_shortage = 0;
+ }
+ }
+
+ }
+
+ maxscan = cnt.v_active_count;
+ m = vm_page_queue_active.tqh_first;
+ while (m && maxscan-- && (page_shortage > 0)) {
+
+ next = m->pageq.tqe_next;
+
+ /*
+ * Don't deactivate pages that are busy.
+ */
+ if ((m->flags & PG_BUSY) || (m->hold_count != 0)) {
+ m = next;
+ continue;
+ }
+
+ if (pmap_is_referenced(VM_PAGE_TO_PHYS(m))) {
+ pmap_clear_reference(VM_PAGE_TO_PHYS(m));
+ if (m->act_count < ACT_MAX)
+ m->act_count += ACT_ADVANCE;
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ } else {
+ m->act_count -= min(m->act_count, ACT_DECLINE);
+
+ /*
+ * if the page act_count is zero -- then we deactivate
+ */
+ if (!m->act_count) {
+ vm_page_deactivate(m);
+ --page_shortage;
+ /*
+ * else if on the next go-around we will deactivate the page
+ * we need to place the page on the end of the queue to age
+ * the other pages in memory.
+ */
+ } else {
+ TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
+ TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
+ TAILQ_REMOVE(&m->object->memq, m, listq);
+ TAILQ_INSERT_TAIL(&m->object->memq, m, listq);
+ }
+ }
+
+ m = next;
+ }
+
+ /*
+ * if we have not freed any pages and we are desparate for memory
+ * then we keep trying until we get some (any) memory.
+ */
+
+ if( !force_wakeup && (swap_pager_full || !force_wakeup ||
+ (pages_freed == 0 && (cnt.v_free_count < cnt.v_free_min)))){
+ vm_pager_sync();
+ force_wakeup = 1;
+ goto morefree;
+ }
+ vm_page_pagesfreed += pages_freed;
+ return force_wakeup;
+}
+
+/*
+ * vm_pageout is the high level pageout daemon.
+ */
+void
+vm_pageout()
+{
+ extern npendingio, swiopend;
+ static nowakeup;
+ (void) spl0();
+
+ /*
+ * Initialize some paging parameters.
+ */
+
+vmretry:
+ cnt.v_free_min = 12;
+ cnt.v_free_reserved = 8;
+ if (cnt.v_free_min < 8)
+ cnt.v_free_min = 8;
+ if (cnt.v_free_min > 32)
+ cnt.v_free_min = 32;
+ vm_pageout_free_min = 4;
+ cnt.v_free_target = 2*cnt.v_free_min + cnt.v_free_reserved;
+ cnt.v_inactive_target = cnt.v_free_count / 12;
+ cnt.v_free_min += cnt.v_free_reserved;
+
+ /* XXX does not really belong here */
+ if (vm_page_max_wired == 0)
+ vm_page_max_wired = cnt.v_free_count / 3;
+
+
+ (void) swap_pager_alloc(0, 0, 0, 0);
+
+ /*
+ * The pageout daemon is never done, so loop
+ * forever.
+ */
+ while (TRUE) {
+ int force_wakeup;
+ extern struct loadavg averunnable;
+/*
+ cnt.v_free_min = 12 + averunnable.ldavg[0] / 1024;
+ cnt.v_free_target = 2*cnt.v_free_min + cnt.v_free_reserved;
+ cnt.v_inactive_target = cnt.v_free_target*2;
+*/
+
+ tsleep((caddr_t) &vm_pages_needed, PVM, "psleep", 0);
+
+ vm_pager_sync();
+ /*
+ * The force wakeup hack added to eliminate delays and potiential
+ * deadlock. It was possible for the page daemon to indefintely
+ * postpone waking up a process that it might be waiting for memory
+ * on. The putmulti stuff seems to have aggravated the situation.
+ */
+ force_wakeup = vm_pageout_scan();
+ vm_pager_sync();
+ if( force_wakeup)
+ wakeup( (caddr_t) &cnt.v_free_count);
+ cnt.v_scan++;
+ wakeup((caddr_t) kmem_map);
+ }
+}
+
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
new file mode 100644
index 0000000..834aee5
--- /dev/null
+++ b/sys/vm/vm_pageout.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.h 8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Avadis Tevanian, Jr.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Header file for pageout daemon.
+ */
+
+/*
+ * Exported data structures.
+ */
+
+extern int vm_pages_needed; /* should be some "event" structure */
+simple_lock_data_t vm_pages_needed_lock;
+extern int vm_pageout_pages_needed;
+
+#define VM_PAGEOUT_ASYNC 0
+#define VM_PAGEOUT_SYNC 1
+#define VM_PAGEOUT_FORCE 2
+
+/*
+ * Exported routines.
+ */
+
+/*
+ * Signal pageout-daemon and wait for it.
+ */
+
+#define VM_WAIT vm_wait()
+
+inline static void vm_wait() {
+ extern struct proc *curproc, *pageproc;
+ int s;
+ s = splhigh();
+ if (curproc == pageproc) {
+ vm_pageout_pages_needed = 1;
+ tsleep((caddr_t) &vm_pageout_pages_needed, PSWP, "vmwait", 0);
+ vm_pageout_pages_needed = 0;
+ } else {
+ wakeup((caddr_t) &vm_pages_needed);
+ tsleep((caddr_t) &cnt.v_free_count, PVM, "vmwait", 0);
+ }
+ splx(s);
+}
+
+
+#ifdef KERNEL
+void vm_pageout __P((void));
+int vm_pageout_scan __P((void));
+void vm_pageout_page __P((vm_page_t, vm_object_t));
+void vm_pageout_cluster __P((vm_page_t, vm_object_t));
+#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
new file mode 100644
index 0000000..1e4b201
--- /dev/null
+++ b/sys/vm/vm_pager.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pager.c 8.6 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Paging space routine stubs. Emulates a matchmaker-like interface
+ * for builtin pagers.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+extern struct pagerops swappagerops;
+extern struct pagerops vnodepagerops;
+extern struct pagerops devicepagerops;
+
+struct pagerops *pagertab[] = {
+ &swappagerops, /* PG_SWAP */
+ &vnodepagerops, /* PG_VNODE */
+ &devicepagerops, /* PG_DEV */
+};
+int npagers = sizeof (pagertab) / sizeof (pagertab[0]);
+
+struct pagerops *dfltpagerops = NULL; /* default pager */
+
+/*
+ * Kernel address space for mapping pages.
+ * Used by pagers where KVAs are needed for IO.
+ *
+ * XXX needs to be large enough to support the number of pending async
+ * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
+ * (MAXPHYS == 64k) if you want to get the most efficiency.
+ */
+#define PAGER_MAP_SIZE (4 * 1024 * 1024)
+
+int pager_map_size = PAGER_MAP_SIZE;
+vm_map_t pager_map;
+boolean_t pager_map_wanted;
+vm_offset_t pager_sva, pager_eva;
+
+void
+vm_pager_init()
+{
+ struct pagerops **pgops;
+
+ /*
+ * Allocate a kernel submap for tracking get/put page mappings
+ */
+/*
+ pager_map = kmem_suballoc(kernel_map, &pager_sva, &pager_eva,
+ PAGER_MAP_SIZE, FALSE);
+*/
+ /*
+ * Initialize known pagers
+ */
+ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+ if (pgops)
+ (*(*pgops)->pgo_init)();
+ if (dfltpagerops == NULL)
+ panic("no default pager");
+}
+
+/*
+ * Allocate an instance of a pager of the given type.
+ * Size, protection and offset parameters are passed in for pagers that
+ * need to perform page-level validation (e.g. the device pager).
+ */
+vm_pager_t
+vm_pager_allocate(type, handle, size, prot, off)
+ int type;
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t off;
+{
+ struct pagerops *ops;
+
+ ops = (type == PG_DFLT) ? dfltpagerops : pagertab[type];
+ if (ops)
+ return ((*ops->pgo_alloc)(handle, size, prot, off));
+ return (NULL);
+}
+
+void
+vm_pager_deallocate(pager)
+ vm_pager_t pager;
+{
+ if (pager == NULL)
+ panic("vm_pager_deallocate: null pager");
+
+ (*pager->pg_ops->pgo_dealloc)(pager);
+}
+
+
+int
+vm_pager_get_pages(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+ extern boolean_t vm_page_zero_fill();
+ extern int vm_pageout_count;
+ int i;
+
+ if (pager == NULL) {
+ for (i=0;i<count;i++) {
+ if( i != reqpage) {
+ PAGE_WAKEUP(m[i]);
+ vm_page_free(m[i]);
+ }
+ }
+ vm_page_zero_fill(m[reqpage]);
+ return VM_PAGER_OK;
+ }
+
+ if( pager->pg_ops->pgo_getpages == 0) {
+ for(i=0;i<count;i++) {
+ if( i != reqpage) {
+ PAGE_WAKEUP(m[i]);
+ vm_page_free(m[i]);
+ }
+ }
+ return(VM_PAGER_GET(pager, m[reqpage], sync));
+ } else {
+ return(VM_PAGER_GET_MULTI(pager, m, count, reqpage, sync));
+ }
+}
+
+int
+vm_pager_put_pages(pager, m, count, sync, rtvals)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ boolean_t sync;
+ int *rtvals;
+{
+ int i;
+
+ if( pager->pg_ops->pgo_putpages)
+ return(VM_PAGER_PUT_MULTI(pager, m, count, sync, rtvals));
+ else {
+ for(i=0;i<count;i++) {
+ rtvals[i] = VM_PAGER_PUT( pager, m[i], sync);
+ }
+ return rtvals[0];
+ }
+}
+
+boolean_t
+vm_pager_has_page(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ if (pager == NULL)
+ panic("vm_pager_has_page: null pager");
+ return ((*pager->pg_ops->pgo_haspage)(pager, offset));
+}
+
+/*
+ * Called by pageout daemon before going back to sleep.
+ * Gives pagers a chance to clean up any completed async pageing operations.
+ */
+void
+vm_pager_sync()
+{
+ struct pagerops **pgops;
+
+ for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
+ if (pgops)
+ (*(*pgops)->pgo_putpage)(NULL, NULL, 0);
+}
+
+#if 0
+void
+vm_pager_cluster(pager, offset, loff, hoff)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loff;
+ vm_offset_t *hoff;
+{
+ if (pager == NULL)
+ panic("vm_pager_cluster: null pager");
+ return ((*pager->pg_ops->pgo_cluster)(pager, offset, loff, hoff));
+}
+#endif
+
+vm_offset_t
+vm_pager_map_page(m)
+ vm_page_t m;
+{
+ vm_offset_t kva;
+
+ kva = kmem_alloc_wait(pager_map, PAGE_SIZE);
+ pmap_enter(vm_map_pmap(pager_map), kva, VM_PAGE_TO_PHYS(m),
+ VM_PROT_DEFAULT, TRUE);
+ return(kva);
+}
+
+void
+vm_pager_unmap_page(kva)
+ vm_offset_t kva;
+{
+ kmem_free_wakeup(pager_map, kva, PAGE_SIZE);
+}
+
+vm_page_t
+vm_pager_atop(kva)
+ vm_offset_t kva;
+{
+ vm_offset_t pa;
+
+ pa = pmap_extract(vm_map_pmap(pager_map), kva);
+ if (pa == 0)
+ panic("vm_pager_atop");
+ return (PHYS_TO_VM_PAGE(pa));
+}
+
+vm_pager_t
+vm_pager_lookup(pglist, handle)
+ register struct pagerlst *pglist;
+ caddr_t handle;
+{
+ register vm_pager_t pager;
+
+ for (pager = pglist->tqh_first; pager; pager = pager->pg_list.tqe_next)
+ if (pager->pg_handle == handle)
+ return (pager);
+ return (NULL);
+}
+
+/*
+ * This routine gains a reference to the object.
+ * Explicit deallocation is necessary.
+ */
+int
+pager_cache(object, should_cache)
+ vm_object_t object;
+ boolean_t should_cache;
+{
+ if (object == NULL)
+ return (KERN_INVALID_ARGUMENT);
+
+ vm_object_cache_lock();
+ vm_object_lock(object);
+ if (should_cache)
+ object->flags |= OBJ_CANPERSIST;
+ else
+ object->flags &= ~OBJ_CANPERSIST;
+ vm_object_unlock(object);
+ vm_object_cache_unlock();
+
+ vm_object_deallocate(object);
+
+ return (KERN_SUCCESS);
+}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
new file mode 100644
index 0000000..3e20e50
--- /dev/null
+++ b/sys/vm/vm_pager.h
@@ -0,0 +1,154 @@
+
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pager.h 8.4 (Berkeley) 1/12/94
+ */
+
+/*
+ * Pager routine interface definition.
+ * For BSD we use a cleaner version of the internal pager interface.
+ */
+
+#ifndef _VM_PAGER_
+#define _VM_PAGER_
+
+TAILQ_HEAD(pagerlst, pager_struct);
+
+struct pager_struct {
+ TAILQ_ENTRY(pager_struct) pg_list; /* links for list management */
+ caddr_t pg_handle; /* ext. handle (vp, dev, fp) */
+ int pg_type; /* type of pager */
+ int pg_flags; /* flags */
+ struct pagerops *pg_ops; /* pager operations */
+ void *pg_data; /* private pager data */
+};
+
+/* pager types */
+#define PG_DFLT -1
+#define PG_SWAP 0
+#define PG_VNODE 1
+#define PG_DEVICE 2
+
+/* flags */
+#define PG_CLUSTERGET 1
+#define PG_CLUSTERPUT 2
+
+struct pagerops {
+ void (*pgo_init) /* Initialize pager. */
+ __P((void));
+ vm_pager_t (*pgo_alloc) /* Allocate pager. */
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+ void (*pgo_dealloc) /* Disassociate. */
+ __P((vm_pager_t));
+ int (*pgo_getpage)
+ __P((vm_pager_t, vm_page_t, boolean_t));
+ int (*pgo_getpages) /* Get (read) page. */
+ __P((vm_pager_t, vm_page_t *, int, int, boolean_t));
+ int (*pgo_putpage)
+ __P((vm_pager_t, vm_page_t, boolean_t));
+ int (*pgo_putpages) /* Put (write) page. */
+ __P((vm_pager_t, vm_page_t *, int, boolean_t, int *));
+ boolean_t (*pgo_haspage) /* Does pager have page? */
+ __P((vm_pager_t, vm_offset_t));
+};
+
+#define VM_PAGER_ALLOC(h, s, p, o) (*(pg)->pg_ops->pgo_alloc)(h, s, p, o)
+#define VM_PAGER_DEALLOC(pg) (*(pg)->pg_ops->pgo_dealloc)(pg)
+#define VM_PAGER_GET(pg, m, s) (*(pg)->pg_ops->pgo_getpage)(pg, m, s)
+#define VM_PAGER_GET_MULTI(pg, m, c, r, s) (*(pg)->pg_ops->pgo_getpages)(pg, m, c, r, s)
+#define VM_PAGER_PUT(pg, m, s) (*(pg)->pg_ops->pgo_putpage)(pg, m, s)
+#define VM_PAGER_PUT_MULTI(pg, m, c, s, rtval) (*(pg)->pg_ops->pgo_putpages)(pg, m, c, s, rtval)
+#define VM_PAGER_HASPAGE(pg, o) (*(pg)->pg_ops->pgo_haspage)(pg, o)
+
+/*
+ * get/put return values
+ * OK operation was successful
+ * BAD specified data was out of the accepted range
+ * FAIL specified data was in range, but doesn't exist
+ * PEND operations was initiated but not completed
+ * ERROR error while accessing data that is in range and exists
+ * AGAIN temporary resource shortage prevented operation from happening
+ */
+#define VM_PAGER_OK 0
+#define VM_PAGER_BAD 1
+#define VM_PAGER_FAIL 2
+#define VM_PAGER_PEND 3
+#define VM_PAGER_ERROR 4
+#define VM_PAGER_AGAIN 5
+
+#ifdef KERNEL
+extern struct pagerops *dfltpagerops;
+
+vm_pager_t vm_pager_allocate
+ __P((int, caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+vm_page_t vm_pager_atop __P((vm_offset_t));
+void vm_pager_deallocate __P((vm_pager_t));
+int vm_pager_get_pages
+ __P((vm_pager_t, vm_page_t *, int, int, boolean_t));
+boolean_t vm_pager_has_page __P((vm_pager_t, vm_offset_t));
+void vm_pager_init __P((void));
+vm_pager_t vm_pager_lookup __P((struct pagerlst *, caddr_t));
+vm_offset_t vm_pager_map_pages __P((vm_page_t *, int, boolean_t));
+int vm_pager_put_pages
+ __P((vm_pager_t, vm_page_t *, int, boolean_t, int *));
+void vm_pager_sync __P((void));
+void vm_pager_unmap_pages __P((vm_offset_t, int));
+
+#define vm_pager_cancluster(p, b) ((p)->pg_flags & (b))
+
+/*
+ * XXX compat with old interface
+ */
+#define vm_pager_get(p, m, s) \
+({ \
+ vm_page_t ml[1]; \
+ ml[0] = (m); \
+ vm_pager_get_pages(p, ml, 1, 0, s); \
+})
+
+#define vm_pager_put(p, m, s) \
+({ \
+ int rtval; \
+ vm_page_t ml[1]; \
+ ml[0] = (m); \
+ vm_pager_put_pages(p, ml, 1, s, &rtval); \
+ rtval; \
+})
+#endif
+
+#endif /* _VM_PAGER_ */
diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h
new file mode 100644
index 0000000..4a785ce
--- /dev/null
+++ b/sys/vm/vm_param.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_param.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Machine independent virtual memory parameters.
+ */
+
+#ifndef _VM_PARAM_
+#define _VM_PARAM_
+
+#include <machine/vmparam.h>
+
+/*
+ * This belongs in types.h, but breaks too many existing programs.
+ */
+typedef int boolean_t;
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * The machine independent pages are refered to as PAGES. A page
+ * is some number of hardware pages, depending on the target machine.
+ */
+#define DEFAULT_PAGE_SIZE 4096
+
+#if 0
+
+/*
+ * All references to the size of a page should be done with PAGE_SIZE
+ * or PAGE_SHIFT. The fact they are variables is hidden here so that
+ * we can easily make them constant if we so desire.
+ */
+#ifndef PAGE_SIZE
+#define PAGE_SIZE cnt.v_page_size /* size of page */
+#endif
+#ifndef PAGE_MASK
+#define PAGE_MASK page_mask /* size of page - 1 */
+#endif
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT page_shift /* bits to shift for pages */
+#endif
+
+#endif
+
+#ifdef KERNEL
+extern vm_size_t page_mask;
+extern int page_shift;
+#endif
+
+/*
+ * CTL_VM identifiers
+ */
+#define VM_METER 1 /* struct vmmeter */
+#define VM_LOADAVG 2 /* struct loadavg */
+#define VM_MAXID 3 /* number of valid vm ids */
+
+#define CTL_VM_NAMES { \
+ { 0, 0 }, \
+ { "vmmeter", CTLTYPE_STRUCT }, \
+ { "loadavg", CTLTYPE_STRUCT }, \
+}
+
+/*
+ * Return values from the VM routines.
+ */
+#define KERN_SUCCESS 0
+#define KERN_INVALID_ADDRESS 1
+#define KERN_PROTECTION_FAILURE 2
+#define KERN_NO_SPACE 3
+#define KERN_INVALID_ARGUMENT 4
+#define KERN_FAILURE 5
+#define KERN_RESOURCE_SHORTAGE 6
+#define KERN_NOT_RECEIVER 7
+#define KERN_NO_ACCESS 8
+
+#ifndef ASSEMBLER
+/*
+ * Convert addresses to pages and vice versa.
+ * No rounding is used.
+ */
+#ifdef KERNEL
+
+#if 0
+
+#ifndef atop
+#define atop(x) (((unsigned)(x)) >> PAGE_SHIFT)
+#endif
+#ifndef ptoa
+#define ptoa(x) ((vm_offset_t)((x) << PAGE_SHIFT))
+#endif
+
+/*
+ * Round off or truncate to the nearest page. These will work
+ * for either addresses or counts (i.e., 1 byte rounds to 1 page).
+ */
+#ifndef round_page
+#define round_page(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) & ~PAGE_MASK))
+#endif
+#ifndef trunc_page
+#define trunc_page(x) \
+ ((vm_offset_t)(((vm_offset_t)(x)) & ~PAGE_MASK))
+#endif
+#ifndef num_pages
+#define num_pages(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
+#endif
+
+#endif
+#define num_pages(x) \
+ ((vm_offset_t)((((vm_offset_t)(x)) + PAGE_MASK) >> PAGE_SHIFT))
+
+extern vm_size_t mem_size; /* size of physical memory (bytes) */
+extern vm_offset_t first_addr; /* first physical page */
+extern vm_offset_t last_addr; /* last physical page */
+
+#else
+#if 0
+/* out-of-kernel versions of round_page and trunc_page */
+#define round_page(x) \
+ ((((vm_offset_t)(x) + (vm_page_size - 1)) / vm_page_size) * vm_page_size)
+#define trunc_page(x) \
+ ((((vm_offset_t)(x)) / vm_page_size) * vm_page_size)
+#endif
+
+#endif /* KERNEL */
+#endif /* ASSEMBLER */
+#endif /* _VM_PARAM_ */
diff --git a/sys/vm/vm_prot.h b/sys/vm/vm_prot.h
new file mode 100644
index 0000000..ee009bc
--- /dev/null
+++ b/sys/vm/vm_prot.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_prot.h 8.1 (Berkeley) 6/11/93
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * Virtual memory protection definitions.
+ */
+
+#ifndef _VM_PROT_
+#define _VM_PROT_
+
+/*
+ * Types defined:
+ *
+ * vm_prot_t VM protection values.
+ */
+
+typedef u_char vm_prot_t;
+
+/*
+ * Protection values, defined as bits within the vm_prot_t type
+ */
+
+#define VM_PROT_NONE ((vm_prot_t) 0x00)
+
+#define VM_PROT_READ ((vm_prot_t) 0x01) /* read permission */
+#define VM_PROT_WRITE ((vm_prot_t) 0x02) /* write permission */
+#define VM_PROT_EXECUTE ((vm_prot_t) 0x04) /* execute permission */
+
+/*
+ * The default protection for newly-created virtual memory
+ */
+
+#define VM_PROT_DEFAULT (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+/*
+ * The maximum privileges possible, for parameter checking.
+ */
+
+#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
+
+#endif /* _VM_PROT_ */
diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c
new file mode 100644
index 0000000..5008a09
--- /dev/null
+++ b/sys/vm/vm_swap.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/dmap.h> /* XXX */
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/file.h>
+
+#include <miscfs/specfs/specdev.h>
+
+/*
+ * Indirect driver for multi-controller paging.
+ */
+
+int nswap, nswdev;
+int vm_swap_size;
+#ifdef SEQSWAP
+int niswdev; /* number of interleaved swap devices */
+int niswap; /* size of interleaved swap area */
+#endif
+
+/*
+ * Set up swap devices.
+ * Initialize linked list of free swap
+ * headers. These do not actually point
+ * to buffers, but rather to pages that
+ * are being swapped in and out.
+ */
+void
+swapinit()
+{
+ register int i;
+ register struct buf *sp = swbuf;
+ register struct proc *p = &proc0; /* XXX */
+ struct swdevt *swp;
+ int error;
+
+ /*
+ * Count swap devices, and adjust total swap space available.
+ * Some of the space will not be countable until later (dynamically
+ * configurable devices) and some of the counted space will not be
+ * available until a swapon() system call is issued, both usually
+ * happen when the system goes multi-user.
+ *
+ * If using NFS for swap, swdevt[0] will already be bdevvp'd. XXX
+ */
+#ifdef SEQSWAP
+ nswdev = niswdev = 0;
+ nswap = niswap = 0;
+ /*
+ * All interleaved devices must come first
+ */
+ for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+ if (swp->sw_flags & SW_SEQUENTIAL)
+ break;
+ niswdev++;
+ if (swp->sw_nblks > niswap)
+ niswap = swp->sw_nblks;
+ }
+ niswap = roundup(niswap, dmmax);
+ niswap *= niswdev;
+ if (swdevt[0].sw_vp == NULL &&
+ bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+ panic("swapvp");
+ /*
+ * The remainder must be sequential
+ */
+ for ( ; swp->sw_dev != NODEV; swp++) {
+ if ((swp->sw_flags & SW_SEQUENTIAL) == 0)
+ panic("binit: mis-ordered swap devices");
+ nswdev++;
+ if (swp->sw_nblks > 0) {
+ if (swp->sw_nblks % dmmax)
+ swp->sw_nblks -= (swp->sw_nblks % dmmax);
+ nswap += swp->sw_nblks;
+ }
+ }
+ nswdev += niswdev;
+ if (nswdev == 0)
+ panic("swapinit");
+ nswap += niswap;
+#else
+ nswdev = 0;
+ nswap = 0;
+ for (swp = swdevt; swp->sw_dev != NODEV || swp->sw_vp != NULL; swp++) {
+ nswdev++;
+ if (swp->sw_nblks > nswap)
+ nswap = swp->sw_nblks;
+ }
+ if (nswdev == 0)
+ panic("swapinit");
+ if (nswdev > 1)
+ nswap = ((nswap + dmmax - 1) / dmmax) * dmmax;
+ nswap *= nswdev;
+ if (swdevt[0].sw_vp == NULL &&
+ bdevvp(swdevt[0].sw_dev, &swdevt[0].sw_vp))
+ panic("swapvp");
+#endif
+ if (nswap == 0)
+ printf("WARNING: no swap space found\n");
+ else if (error = swfree(p, 0)) {
+ printf("swfree errno %d\n", error); /* XXX */
+ panic("swapinit swfree 0");
+ }
+
+ /*
+ * Now set up swap buffer headers.
+ */
+ for (i = 0; i < nswbuf - 1; i++, sp++) {
+ TAILQ_INSERT_HEAD(&bswlist, sp, b_freelist);
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ }
+ sp->b_rcred = sp->b_wcred = p->p_ucred;
+ sp->b_vnbufs.le_next = NOLIST;
+ sp->b_actf = NULL;
+}
+
+void
+swstrategy(bp)
+ register struct buf *bp;
+{
+ int sz, off, seg, index;
+ register struct swdevt *sp;
+ struct vnode *vp;
+
+#ifdef GENERIC
+ /*
+ * A mini-root gets copied into the front of the swap
+ * and we run over top of the swap area just long
+ * enough for us to do a mkfs and restor of the real
+ * root (sure beats rewriting standalone restor).
+ */
+#define MINIROOTSIZE 4096
+ if (rootdev == dumpdev)
+ bp->b_blkno += MINIROOTSIZE;
+#endif
+ sz = howmany(bp->b_bcount, DEV_BSIZE);
+ if (bp->b_blkno + sz > nswap) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ if (nswdev > 1) {
+#ifdef SEQSWAP
+ if (bp->b_blkno < niswap) {
+ if (niswdev > 1) {
+ off = bp->b_blkno % dmmax;
+ if (off+sz > dmmax) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ seg = bp->b_blkno / dmmax;
+ index = seg % niswdev;
+ seg /= niswdev;
+ bp->b_blkno = seg*dmmax + off;
+ } else
+ index = 0;
+ } else {
+ register struct swdevt *swp;
+
+ bp->b_blkno -= niswap;
+ for (index = niswdev, swp = &swdevt[niswdev];
+ swp->sw_dev != NODEV;
+ swp++, index++) {
+ if (bp->b_blkno < swp->sw_nblks)
+ break;
+ bp->b_blkno -= swp->sw_nblks;
+ }
+ if (swp->sw_dev == NODEV ||
+ bp->b_blkno+sz > swp->sw_nblks) {
+ bp->b_error = swp->sw_dev == NODEV ?
+ ENODEV : EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ }
+#else
+ off = bp->b_blkno % dmmax;
+ if (off+sz > dmmax) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ seg = bp->b_blkno / dmmax;
+ index = seg % nswdev;
+ seg /= nswdev;
+ bp->b_blkno = seg*dmmax + off;
+#endif
+ } else
+ index = 0;
+ sp = &swdevt[index];
+ if ((bp->b_dev = sp->sw_dev) == NODEV)
+ panic("swstrategy");
+ if (sp->sw_vp == NULL) {
+ bp->b_error = ENODEV;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return;
+ }
+ VHOLD(sp->sw_vp);
+ if ((bp->b_flags & B_READ) == 0) {
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+ sp->sw_vp->v_numoutput++;
+ }
+ if (bp->b_vp != NULL)
+ brelvp(bp);
+ bp->b_vp = sp->sw_vp;
+ VOP_STRATEGY(bp);
+}
+
+/*
+ * System call swapon(name) enables swapping on device name,
+ * which must be in the swdevsw. Return EBUSY
+ * if already swapping on this device.
+ */
+struct swapon_args {
+ char *name;
+};
+/* ARGSUSED */
+int
+swapon(p, uap, retval)
+ struct proc *p;
+ struct swapon_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct swdevt *sp;
+ dev_t dev;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VBLK) {
+ vrele(vp);
+ return (ENOTBLK);
+ }
+ dev = (dev_t)vp->v_rdev;
+ if (major(dev) >= nblkdev) {
+ vrele(vp);
+ return (ENXIO);
+ }
+ for (sp = &swdevt[0]; sp->sw_dev != NODEV; sp++) {
+ if (sp->sw_dev == dev) {
+ if (sp->sw_flags & SW_FREED) {
+ vrele(vp);
+ return (EBUSY);
+ }
+ sp->sw_vp = vp;
+ if (error = swfree(p, sp - swdevt)) {
+ vrele(vp);
+ return (error);
+ }
+ return (0);
+ }
+#ifdef SEQSWAP
+ /*
+ * If we have reached a non-freed sequential device without
+ * finding what we are looking for, it is an error.
+ * That is because all interleaved devices must come first
+ * and sequential devices must be freed in order.
+ */
+ if ((sp->sw_flags & (SW_SEQUENTIAL|SW_FREED)) == SW_SEQUENTIAL)
+ break;
+#endif
+ }
+ vrele(vp);
+ return (EINVAL);
+}
+
+/*
+ * Swfree(index) frees the index'th portion of the swap map.
+ * Each of the nswdev devices provides 1/nswdev'th of the swap
+ * space, which is laid out with blocks of dmmax pages circularly
+ * among the devices.
+ */
+int
+swfree(p, index)
+ struct proc *p;
+ int index;
+{
+ register struct swdevt *sp;
+ register swblk_t vsbase;
+ register long blk;
+ struct vnode *vp;
+ register swblk_t dvbase;
+ register int nblks;
+ int error;
+
+ sp = &swdevt[index];
+ vp = sp->sw_vp;
+ if (error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p))
+ return (error);
+ sp->sw_flags |= SW_FREED;
+ nblks = sp->sw_nblks;
+ /*
+ * Some devices may not exist til after boot time.
+ * If so, their nblk count will be 0.
+ */
+ if (nblks <= 0) {
+ int perdev;
+ dev_t dev = sp->sw_dev;
+
+ if (bdevsw[major(dev)].d_psize == 0 ||
+ (nblks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+ (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ sp->sw_flags &= ~SW_FREED;
+ return (ENXIO);
+ }
+#ifdef SEQSWAP
+ if (index < niswdev) {
+ perdev = niswap / niswdev;
+ if (nblks > perdev)
+ nblks = perdev;
+ } else {
+ if (nblks % dmmax)
+ nblks -= (nblks % dmmax);
+ nswap += nblks;
+ }
+#else
+ perdev = nswap / nswdev;
+ if (nblks > perdev)
+ nblks = perdev;
+#endif
+ sp->sw_nblks = nblks;
+ }
+ if (nblks == 0) {
+ (void) VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ sp->sw_flags &= ~SW_FREED;
+ return (0); /* XXX error? */
+ }
+#ifdef SEQSWAP
+ if (sp->sw_flags & SW_SEQUENTIAL) {
+ register struct swdevt *swp;
+
+ blk = niswap;
+ for (swp = &swdevt[niswdev]; swp != sp; swp++)
+ blk += swp->sw_nblks;
+#if 0
+ rmfree(swapmap, nblks, blk);
+ return (0);
+#endif
+ rlist_free(&swapmap, blk, blk + nblks - 1);
+ vm_swap_size += nblks;
+ return (0);
+ }
+#endif
+ for (dvbase = 0; dvbase < nblks; dvbase += dmmax) {
+ blk = nblks - dvbase;
+
+#ifdef SEQSWAP
+ if ((vsbase = index*dmmax + dvbase*niswdev) >= niswap)
+ panic("swfree");
+#else
+ if ((vsbase = index*dmmax + dvbase*nswdev) >= nswap)
+ panic("swfree");
+#endif
+ if (blk > dmmax)
+ blk = dmmax;
+#if 0
+ if (vsbase == 0) {
+ /*
+ * First of all chunks... initialize the swapmap.
+ * Don't use the first cluster of the device
+ * in case it starts with a label or boot block.
+ */
+ rminit(swapmap, blk - ctod(CLSIZE),
+ vsbase + ctod(CLSIZE), "swap", nswapmap);
+ } else if (dvbase == 0) {
+ /*
+ * Don't use the first cluster of the device
+ * in case it starts with a label or boot block.
+ */
+ rmfree(swapmap, blk - ctod(CLSIZE),
+ vsbase + ctod(CLSIZE));
+ } else
+ rmfree(swapmap, blk, vsbase);
+#endif
+ /* XXX -- we need to exclude the first cluster as above */
+ /* but for now, this will work fine... */
+ rlist_free(&swapmap, vsbase, vsbase + blk - 1);
+ vm_swap_size += blk;
+ }
+ return (0);
+}
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
new file mode 100644
index 0000000..ee6ddf6
--- /dev/null
+++ b/sys/vm/vm_unix.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 1988 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
+ *
+ * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93
+ */
+
+/*
+ * Traditional sbrk/grow interface to VM
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <vm/vm.h>
+
+extern int swap_pager_full;
+
+struct obreak_args {
+ char *nsiz;
+};
+
+/* ARGSUSED */
+int
+obreak(p, uap, retval)
+ struct proc *p;
+ struct obreak_args *uap;
+ int *retval;
+{
+ register struct vmspace *vm = p->p_vmspace;
+ vm_offset_t new, old;
+ int rv;
+ register int diff;
+
+ old = (vm_offset_t)vm->vm_daddr;
+ new = round_page(uap->nsiz);
+ if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
+ return(ENOMEM);
+ old = round_page(old + ctob(vm->vm_dsize));
+ diff = new - old;
+ if (diff > 0) {
+ if (swap_pager_full) {
+ return(ENOMEM);
+ }
+ rv = vm_allocate(&vm->vm_map, &old, diff, FALSE);
+ if (rv != KERN_SUCCESS) {
+ return(ENOMEM);
+ }
+ vm->vm_dsize += btoc(diff);
+ } else if (diff < 0) {
+ diff = -diff;
+ rv = vm_deallocate(&vm->vm_map, new, diff);
+ if (rv != KERN_SUCCESS) {
+ return(ENOMEM);
+ }
+ vm->vm_dsize -= btoc(diff);
+ }
+ return(0);
+}
+
+struct ovadvise_args {
+ int anom;
+};
+
+/* ARGSUSED */
+int
+ovadvise(p, uap, retval)
+ struct proc *p;
+ struct ovadvise_args *uap;
+ int *retval;
+{
+
+ return (EINVAL);
+}
diff --git a/sys/vm/vm_user.c b/sys/vm/vm_user.c
new file mode 100644
index 0000000..0f2c234
--- /dev/null
+++ b/sys/vm/vm_user.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_user.c 8.2 (Berkeley) 1/12/94
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * User-exported virtual memory functions.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+
+simple_lock_data_t vm_alloc_lock; /* XXX */
+
+#ifdef MACHVMCOMPAT
+/*
+ * BSD style syscall interfaces to MACH calls
+ * All return MACH return values.
+ */
+struct svm_allocate_args {
+ vm_map_t map;
+ vm_offset_t *addr;
+ vm_size_t size;
+ boolean_t anywhere;
+};
+/* ARGSUSED */
+int
+svm_allocate(p, uap, retval)
+ struct proc *p;
+ struct svm_allocate_args *uap;
+ int *retval;
+{
+ vm_offset_t addr;
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+
+ if (copyin((caddr_t)uap->addr, (caddr_t)&addr, sizeof (addr)))
+ rv = KERN_INVALID_ARGUMENT;
+ else
+ rv = vm_allocate(uap->map, &addr, uap->size, uap->anywhere);
+ if (rv == KERN_SUCCESS) {
+ if (copyout((caddr_t)&addr, (caddr_t)uap->addr, sizeof(addr)))
+ rv = KERN_INVALID_ARGUMENT;
+ }
+ return((int)rv);
+}
+
+struct svm_deallocate_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+};
+/* ARGSUSED */
+int
+svm_deallocate(p, uap, retval)
+ struct proc *p;
+ struct svm_deallocate_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_deallocate(uap->map, uap->addr, uap->size);
+ return((int)rv);
+}
+
+struct svm_inherit_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+ vm_inherit_t inherit;
+};
+/* ARGSUSED */
+int
+svm_inherit(p, uap, retval)
+ struct proc *p;
+ struct svm_inherit_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_inherit(uap->map, uap->addr, uap->size, uap->inherit);
+ return((int)rv);
+}
+
+struct svm_protect_args {
+ vm_map_t map;
+ vm_offset_t addr;
+ vm_size_t size;
+ boolean_t setmax;
+ vm_prot_t prot;
+};
+/* ARGSUSED */
+int
+svm_protect(p, uap, retval)
+ struct proc *p;
+ struct svm_protect_args *uap;
+ int *retval;
+{
+ int rv;
+
+ uap->map = p->p_map; /* XXX */
+ rv = vm_protect(uap->map, uap->addr, uap->size, uap->setmax, uap->prot);
+ return((int)rv);
+}
+
+#endif
+/*
+ * vm_inherit sets the inheritence of the specified range in the
+ * specified map.
+ */
+int
+vm_inherit(map, start, size, new_inheritance)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ vm_inherit_t new_inheritance;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_inherit(map, trunc_page(start), round_page(start+size), new_inheritance));
+}
+
+/*
+ * vm_protect sets the protection of the specified range in the
+ * specified map.
+ */
+
+int
+vm_protect(map, start, size, set_maximum, new_protection)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+ boolean_t set_maximum;
+ vm_prot_t new_protection;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ return(vm_map_protect(map, trunc_page(start), round_page(start+size), new_protection, set_maximum));
+}
+
+/*
+ * vm_allocate allocates "zero fill" memory in the specfied
+ * map.
+ */
+int
+vm_allocate(map, addr, size, anywhere)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ boolean_t anywhere;
+{
+ int result;
+
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+ if (size == 0) {
+ *addr = 0;
+ return(KERN_SUCCESS);
+ }
+
+ if (anywhere)
+ *addr = vm_map_min(map);
+ else
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ result = vm_map_find(map, NULL, (vm_offset_t) 0, addr, size, anywhere);
+
+ return(result);
+}
+
+/*
+ * vm_deallocate deallocates the specified range of addresses in the
+ * specified address map.
+ */
+int
+vm_deallocate(map, start, size)
+ register vm_map_t map;
+ vm_offset_t start;
+ vm_size_t size;
+{
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ if (size == (vm_offset_t) 0)
+ return(KERN_SUCCESS);
+
+ return(vm_map_remove(map, trunc_page(start), round_page(start+size)));
+}
+
+#if 1
+/*
+ * Similar to vm_allocate but assigns an explicit pager.
+ */
+int
+vm_allocate_with_pager(map, addr, size, anywhere, pager, poffset, internal)
+ register vm_map_t map;
+ register vm_offset_t *addr;
+ register vm_size_t size;
+ boolean_t anywhere;
+ vm_pager_t pager;
+ vm_offset_t poffset;
+ boolean_t internal;
+{
+ register vm_object_t object;
+ register int result;
+
+ if (map == NULL)
+ return(KERN_INVALID_ARGUMENT);
+
+ *addr = trunc_page(*addr);
+ size = round_page(size);
+
+ /*
+ * Lookup the pager/paging-space in the object cache.
+ * If it's not there, then create a new object and cache
+ * it.
+ */
+ object = vm_object_lookup(pager);
+ cnt.v_lookups++;
+ if (object == NULL) {
+ object = vm_object_allocate(size);
+ /*
+ * From Mike Hibler: "unnamed anonymous objects should never
+ * be on the hash list ... For now you can just change
+ * vm_allocate_with_pager to not do vm_object_enter if this
+ * is an internal object ..."
+ */
+ if (!internal)
+ vm_object_enter(object, pager);
+ } else
+ cnt.v_hits++;
+ if (internal)
+ object->flags |= OBJ_INTERNAL;
+ else {
+ object->flags &= ~OBJ_INTERNAL;
+ cnt.v_nzfod -= atop(size);
+ }
+
+ result = vm_map_find(map, object, poffset, addr, size, anywhere);
+ if (result != KERN_SUCCESS)
+ vm_object_deallocate(object);
+ else if (pager != NULL)
+ vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
+ return(result);
+}
+#endif
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
new file mode 100644
index 0000000..b8e5a19
--- /dev/null
+++ b/sys/vm/vnode_pager.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991 The Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1993,1994 John S. Dyson
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
+ * $Id: vnode_pager.c,v 1.17 1994/04/05 03:23:53 davidg Exp $
+ */
+
+/*
+ * Page to/from files (vnodes).
+ *
+ * TODO:
+ * pageouts
+ * fix credential use (uses current process credentials now)
+ */
+
+/*
+ * MODIFICATIONS:
+ * John S. Dyson 08 Dec 93
+ *
+ * This file in conjunction with some vm_fault mods, eliminate the performance
+ * advantage for using the buffer cache and minimize memory copies.
+ *
+ * 1) Supports multiple - block reads
+ * 2) Bypasses buffer cache for reads
+ *
+ * TODO:
+ *
+ * 1) Totally bypass buffer cache for reads
+ * (Currently will still sometimes use buffer cache for reads)
+ * 2) Bypass buffer cache for writes
+ * (Code does not support it, but mods are simple)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/mount.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
+
+#include <sys/buf.h>
+#include <miscfs/specfs/specdev.h>
+
+int vnode_pager_putmulti();
+
+void vnode_pager_init();
+vm_pager_t vnode_pager_alloc(caddr_t, vm_offset_t, vm_prot_t, vm_offset_t);
+void vnode_pager_dealloc();
+int vnode_pager_getpage();
+int vnode_pager_getmulti();
+int vnode_pager_putpage();
+boolean_t vnode_pager_haspage();
+
+struct pagerops vnodepagerops = {
+ vnode_pager_init,
+ vnode_pager_alloc,
+ vnode_pager_dealloc,
+ vnode_pager_getpage,
+ vnode_pager_getmulti,
+ vnode_pager_putpage,
+ vnode_pager_putmulti,
+ vnode_pager_haspage
+};
+
+static int vnode_pager_input(vn_pager_t vnp, vm_page_t *m, int count, int reqpage);
+static int vnode_pager_output(vn_pager_t vnp, vm_page_t *m, int count, int *rtvals);
+struct buf * getpbuf() ;
+void relpbuf(struct buf *bp) ;
+
+extern vm_map_t pager_map;
+
+struct pagerlst vnode_pager_list; /* list of managed vnodes */
+
+#define MAXBP (PAGE_SIZE/DEV_BSIZE);
+
+void
+vnode_pager_init()
+{
+ TAILQ_INIT(&vnode_pager_list);
+}
+
+/*
+ * Allocate (or lookup) pager for a vnode.
+ * Handle is a vnode pointer.
+ */
+vm_pager_t
+vnode_pager_alloc(handle, size, prot, offset)
+ caddr_t handle;
+ vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t offset;
+{
+ register vm_pager_t pager;
+ register vn_pager_t vnp;
+ vm_object_t object;
+ struct vattr vattr;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Pageout to vnode, no can do yet.
+ */
+ if (handle == NULL)
+ return(NULL);
+
+ /*
+ * Vnodes keep a pointer to any associated pager so no need to
+ * lookup with vm_pager_lookup.
+ */
+ vp = (struct vnode *)handle;
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL) {
+ /*
+ * Allocate pager structures
+ */
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
+ if (pager == NULL)
+ return(NULL);
+ vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
+ if (vnp == NULL) {
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ /*
+ * And an object of the appropriate size
+ */
+ if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
+ object = vm_object_allocate(round_page(vattr.va_size));
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, 0, TRUE);
+ } else {
+ free((caddr_t)vnp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ /*
+ * Hold a reference to the vnode and initialize pager data.
+ */
+ VREF(vp);
+ vnp->vnp_flags = 0;
+ vnp->vnp_vp = vp;
+ vnp->vnp_size = vattr.va_size;
+
+ TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
+ pager->pg_handle = handle;
+ pager->pg_type = PG_VNODE;
+ pager->pg_ops = &vnodepagerops;
+ pager->pg_data = (caddr_t)vnp;
+ vp->v_vmdata = (caddr_t)pager;
+ } else {
+ /*
+ * vm_object_lookup() will remove the object from the
+ * cache if found and also gain a reference to the object.
+ */
+ object = vm_object_lookup(pager);
+ }
+ return(pager);
+}
+
+void
+vnode_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+ register struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+
+ if (vp = vnp->vnp_vp) {
+ vp->v_vmdata = NULL;
+ vp->v_flag &= ~VTEXT;
+#if 0
+ /* can hang if done at reboot on NFS FS */
+ (void) VOP_FSYNC(vp, p->p_ucred, p);
+#endif
+ vrele(vp);
+ }
+
+ TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
+ free((caddr_t)vnp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+int
+vnode_pager_getmulti(pager, m, count, reqpage, sync)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int count;
+ int reqpage;
+ boolean_t sync;
+{
+
+ return vnode_pager_input((vn_pager_t) pager->pg_data, m, count, reqpage);
+}
+
+int
+vnode_pager_getpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+
+ int err;
+ vm_page_t marray[1];
+ if (pager == NULL)
+ return FALSE;
+ marray[0] = m;
+
+ return vnode_pager_input((vn_pager_t)pager->pg_data, marray, 1, 0);
+}
+
+boolean_t
+vnode_pager_putpage(pager, m, sync)
+ vm_pager_t pager;
+ vm_page_t m;
+ boolean_t sync;
+{
+ int err;
+ vm_page_t marray[1];
+ int rtvals[1];
+
+ if (pager == NULL)
+ return FALSE;
+ marray[0] = m;
+ vnode_pager_output((vn_pager_t)pager->pg_data, marray, 1, rtvals);
+ return rtvals[0];
+}
+
+int
+vnode_pager_putmulti(pager, m, c, sync, rtvals)
+ vm_pager_t pager;
+ vm_page_t *m;
+ int c;
+ boolean_t sync;
+ int *rtvals;
+{
+ return vnode_pager_output((vn_pager_t)pager->pg_data, m, c, rtvals);
+}
+
+
+boolean_t
+vnode_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
+ daddr_t bn;
+ int run;
+ int err;
+
+ /*
+ * Offset beyond end of file, do not have the page
+ */
+ if (offset >= vnp->vnp_size) {
+ return(FALSE);
+ }
+
+ /*
+ * Read the index to find the disk block to read
+ * from. If there is no block, report that we don't
+ * have this data.
+ *
+ * Assumes that the vnode has whole page or nothing.
+ */
+ err = VOP_BMAP(vnp->vnp_vp,
+ offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
+ (struct vnode **)0, &bn, 0);
+ if (err) {
+ return(TRUE);
+ }
+ return((long)bn < 0 ? FALSE : TRUE);
+}
+
+/*
+ * Lets the VM system know about a change in size for a file.
+ * If this vnode is mapped into some address space (i.e. we have a pager
+ * for it) we adjust our own internal size and flush any cached pages in
+ * the associated object that are affected by the size change.
+ *
+ * Note: this routine may be invoked as a result of a pager put
+ * operation (possibly at object termination time), so we must be careful.
+ */
+void
+vnode_pager_setsize(vp, nsize)
+ struct vnode *vp;
+ u_long nsize;
+{
+ register vn_pager_t vnp;
+ register vm_object_t object;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
+ return;
+ /*
+ * Hasn't changed size
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ vnp = (vn_pager_t)pager->pg_data;
+ if (nsize == vnp->vnp_size)
+ return;
+ /*
+ * No object.
+ * This can happen during object termination since
+ * vm_object_page_clean is called after the object
+ * has been removed from the hash table, and clean
+ * may cause vnode write operations which can wind
+ * up back here.
+ */
+ object = vm_object_lookup(pager);
+ if (object == NULL)
+ return;
+
+ /*
+ * File has shrunk.
+ * Toss any cached pages beyond the new EOF.
+ */
+ if (round_page(nsize) < round_page(vnp->vnp_size)) {
+ vm_object_lock(object);
+ vm_object_page_remove(object,
+ (vm_offset_t)round_page(nsize), round_page(vnp->vnp_size));
+ vm_object_unlock(object);
+ }
+ vnp->vnp_size = (vm_offset_t)nsize;
+ vm_object_deallocate(object);
+}
+
+void
+vnode_pager_umount(mp)
+ register struct mount *mp;
+{
+ register vm_pager_t pager, npager;
+ struct vnode *vp;
+
+ pager = vnode_pager_list.tqh_first;
+ while( pager) {
+ /*
+ * Save the next pointer now since uncaching may
+ * terminate the object and render pager invalid
+ */
+ vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
+ npager = pager->pg_list.tqe_next;
+ if (mp == (struct mount *)0 || vp->v_mount == mp)
+ (void) vnode_pager_uncache(vp);
+ pager = npager;
+ }
+}
+
+/*
+ * Remove vnode associated object from the object cache.
+ *
+ * Note: this routine may be invoked as a result of a pager put
+ * operation (possibly at object termination time), so we must be careful.
+ */
+boolean_t
+vnode_pager_uncache(vp)
+ register struct vnode *vp;
+{
+ register vm_object_t object;
+ boolean_t uncached, locked;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL)
+ return (TRUE);
+ /*
+ * Unlock the vnode if it is currently locked.
+ * We do this since uncaching the object may result
+ * in its destruction which may initiate paging
+ * activity which may necessitate locking the vnode.
+ */
+ locked = VOP_ISLOCKED(vp);
+ if (locked)
+ VOP_UNLOCK(vp);
+ /*
+ * Must use vm_object_lookup() as it actually removes
+ * the object from the cache list.
+ */
+ object = vm_object_lookup(pager);
+ if (object) {
+ uncached = (object->ref_count <= 1);
+ pager_cache(object, FALSE);
+ } else
+ uncached = TRUE;
+ if (locked)
+ VOP_LOCK(vp);
+ return(uncached);
+}
+#if 0
+/*
+ * Remove vnode associated object from the object cache.
+ *
+ * XXX unlock the vnode if it is currently locked.
+ * We must do this since uncaching the object may result in its
+ * destruction which may initiate paging activity which may necessitate
+ * re-locking the vnode.
+ */
+boolean_t
+vnode_pager_uncache(vp)
+ register struct vnode *vp;
+{
+ register vm_object_t object;
+ boolean_t uncached;
+ vm_pager_t pager;
+
+ /*
+ * Not a mapped vnode
+ */
+ pager = (vm_pager_t)vp->v_vmdata;
+ if (pager == NULL)
+ return (TRUE);
+ /*
+ * Must use vm_object_lookup() as it actually removes
+ * the object from the cache list.
+ */
+ object = vm_object_lookup(pager);
+ if (object) {
+ uncached = (object->ref_count <= 1);
+ VOP_UNLOCK(vp);
+ pager_cache(object, FALSE);
+ VOP_LOCK(vp);
+ } else
+ uncached = TRUE;
+ return(uncached);
+}
+#endif
+
+
+void
+vnode_pager_freepage(m)
+ vm_page_t m;
+{
+ PAGE_WAKEUP(m);
+ vm_page_free(m);
+}
+
+/*
+ * calculate the linear (byte) disk address of specified virtual
+ * file address
+ */
+vm_offset_t
+vnode_pager_addr(vp, address)
+ struct vnode *vp;
+ vm_offset_t address;
+{
+ int rtaddress;
+ int bsize;
+ vm_offset_t block;
+ struct vnode *rtvp;
+ int err;
+ int vblock, voffset;
+ int run;
+
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ vblock = address / bsize;
+ voffset = address % bsize;
+
+ err = VOP_BMAP(vp,vblock,&rtvp,&block,0);
+
+ if( err)
+ rtaddress = -1;
+ else
+ rtaddress = block * DEV_BSIZE + voffset;
+
+ return rtaddress;
+}
+
+/*
+ * interrupt routine for I/O completion
+ */
+void
+vnode_pager_iodone(bp)
+ struct buf *bp;
+{
+ bp->b_flags |= B_DONE;
+ wakeup((caddr_t)bp);
+}
+
+/*
+ * small block file system vnode pager input
+ */
+int
+vnode_pager_input_smlfs(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
+ int s;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t foff;
+ vm_offset_t kva;
+ int fileaddr;
+ int block;
+ vm_offset_t bsize;
+ int error = 0;
+ int run;
+
+ paging_offset = m->object->paging_offset;
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ foff = m->offset + paging_offset;
+
+ VOP_BMAP(vp, foff, &dp, 0, 0);
+
+ kva = vm_pager_map_page(m);
+
+ for(i=0;i<PAGE_SIZE/bsize;i++) {
+ /*
+ * calculate logical block and offset
+ */
+ block = foff / bsize + i;
+ s = splbio();
+ while (bp = incore(vp, block)) {
+ int amount;
+
+ /*
+ * wait until the buffer is avail or gone
+ */
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ tsleep ((caddr_t)bp, PVM, "vnwblk", 0);
+ continue;
+ }
+
+ amount = bsize;
+ if ((foff + bsize) > vnp->vnp_size)
+ amount = vnp->vnp_size - foff;
+
+ /*
+ * make sure that this page is in the buffer
+ */
+ if ((amount > 0) && amount <= bp->b_bcount) {
+ bp->b_flags |= B_BUSY;
+ splx(s);
+
+ /*
+ * copy the data from the buffer
+ */
+ bcopy(bp->b_un.b_addr, (caddr_t)kva + i * bsize, amount);
+ if (amount < bsize) {
+ bzero((caddr_t)kva + amount, bsize - amount);
+ }
+ bp->b_flags &= ~B_BUSY;
+ wakeup((caddr_t)bp);
+ goto nextblock;
+ }
+ break;
+ }
+ splx(s);
+ fileaddr = vnode_pager_addr(vp, foff + i * bsize);
+ if( fileaddr != -1) {
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva + i * bsize;
+ bp->b_blkno = fileaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ bp->b_bcount = bsize;
+ bp->b_bufsize = bsize;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ /* we definitely need to be at splbio here */
+
+ s = splbio();
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnsrd", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+ if( error)
+ break;
+ } else {
+ bzero((caddr_t) kva + i * bsize, bsize);
+ }
+nextblock:
+ }
+ vm_pager_unmap_page(kva);
+ if( error) {
+ return VM_PAGER_FAIL;
+ }
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_CLEAN;
+ m->flags &= ~PG_LAUNDRY;
+ return VM_PAGER_OK;
+
+}
+
+
+/*
+ * old style vnode pager output routine
+ */
+int
+vnode_pager_input_old(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
+ struct uio auio;
+ struct iovec aiov;
+ int error;
+ int size;
+ vm_offset_t foff;
+ vm_offset_t kva;
+
+ error = 0;
+ foff = m->offset + m->object->paging_offset;
+ /*
+ * Return failure if beyond current EOF
+ */
+ if (foff >= vnp->vnp_size) {
+ return VM_PAGER_BAD;
+ } else {
+ size = PAGE_SIZE;
+ if (foff + size > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+/*
+ * Allocate a kernel virtual address and initialize so that
+ * we can use VOP_READ/WRITE routines.
+ */
+ kva = vm_pager_map_page(m);
+ aiov.iov_base = (caddr_t)kva;
+ aiov.iov_len = size;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = foff;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_resid = size;
+ auio.uio_procp = (struct proc *)0;
+
+ error = VOP_READ(vnp->vnp_vp, &auio, 0, curproc->p_ucred);
+ if (!error) {
+ register int count = size - auio.uio_resid;
+
+ if (count == 0)
+ error = EINVAL;
+ else if (count != PAGE_SIZE)
+ bzero((caddr_t)kva + count, PAGE_SIZE - count);
+ }
+ vm_pager_unmap_page(kva);
+ }
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_CLEAN;
+ m->flags &= ~PG_LAUNDRY;
+ return error?VM_PAGER_FAIL:VM_PAGER_OK;
+}
+
+/*
+ * generic vnode pager input routine
+ */
+int
+vnode_pager_input(vnp, m, count, reqpage)
+ register vn_pager_t vnp;
+ vm_page_t *m;
+ int count, reqpage;
+{
+ int i,j;
+ vm_offset_t kva, foff;
+ int size;
+ struct proc *p = curproc; /* XXX */
+ vm_object_t object;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ vm_offset_t mapsize;
+ int bsize;
+
+ int first, last;
+ int reqaddr, firstaddr;
+ int run;
+ int block, offset;
+
+ int nbp;
+ struct buf *bp;
+ int s;
+ int failflag;
+
+ int errtype=0; /* 0 is file type otherwise vm type */
+ int error = 0;
+
+ object = m[reqpage]->object; /* all vm_page_t items are in same object */
+ paging_offset = object->paging_offset;
+
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+
+ /* get the UNDERLYING device for the file with VOP_BMAP() */
+ /*
+ * originally, we did not check for an error return
+ * value -- assuming an fs always has a bmap entry point
+ * -- that assumption is wrong!!!
+ */
+ kva = 0;
+ mapsize = 0;
+ foff = m[reqpage]->offset + paging_offset;
+ if (!VOP_BMAP(vp, foff, &dp, 0, 0)) {
+ /*
+ * we do not block for a kva, notice we default to a kva
+ * conservative behavior
+ */
+ kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE));
+ if( !kva) {
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ m[0] = m[reqpage];
+ kva = kmem_alloc_wait(pager_map, mapsize = PAGE_SIZE);
+ reqpage = 0;
+ count = 1;
+ }
+ }
+
+ /*
+ * if we can't get a kva or we can't bmap, use old VOP code
+ */
+ if (!kva) {
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ return vnode_pager_input_old(vnp, m[reqpage]);
+ /*
+ * if the blocksize is smaller than a page size, then use
+ * special small filesystem code. NFS sometimes has a small
+ * blocksize, but it can handle large reads itself.
+ */
+ } else if( (PAGE_SIZE / bsize) > 1 &&
+ (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
+
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ return vnode_pager_input_smlfs(vnp, m[reqpage]);
+ }
+
+/*
+ * here on direct device I/O
+ */
+
+
+ /*
+ * This pathetic hack gets data from the buffer cache, if it's there.
+ * I believe that this is not really necessary, and the ends can
+ * be gotten by defaulting to the normal vfs read behavior, but this
+ * might be more efficient, because the will NOT invoke read-aheads
+ * and one of the purposes of this code is to bypass the buffer
+ * cache and keep from flushing it by reading in a program.
+ */
+ /*
+ * calculate logical block and offset
+ */
+ block = foff / bsize;
+ offset = foff % bsize;
+ s = splbio();
+
+ /*
+ * if we have a buffer in core, then try to use it
+ */
+ while (bp = incore(vp, block)) {
+ int amount;
+
+ /*
+ * wait until the buffer is avail or gone
+ */
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ tsleep ((caddr_t)bp, PVM, "vnwblk", 0);
+ continue;
+ }
+
+ amount = PAGE_SIZE;
+ if ((foff + amount) > vnp->vnp_size)
+ amount = vnp->vnp_size - foff;
+
+ /*
+ * make sure that this page is in the buffer
+ */
+ if ((amount > 0) && (offset + amount) <= bp->b_bcount) {
+ bp->b_flags |= B_BUSY;
+ splx(s);
+
+ /*
+ * map the requested page
+ */
+ pmap_kenter(kva, VM_PAGE_TO_PHYS(m[reqpage]));
+ pmap_update();
+
+ /*
+ * copy the data from the buffer
+ */
+ bcopy(bp->b_un.b_addr + offset, (caddr_t)kva, amount);
+ if (amount < PAGE_SIZE) {
+ bzero((caddr_t)kva + amount, PAGE_SIZE - amount);
+ }
+ /*
+ * unmap the page and free the kva
+ */
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+ /*
+ * release the buffer back to the block subsystem
+ */
+ bp->b_flags &= ~B_BUSY;
+ wakeup((caddr_t)bp);
+ /*
+ * we did not have to do any work to get the requested
+ * page, the read behind/ahead does not justify a read
+ */
+ for (i = 0; i < count; i++) {
+ if (i != reqpage) {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ count = 1;
+ reqpage = 0;
+ m[0] = m[reqpage];
+
+ /*
+ * sorry for the goto
+ */
+ goto finishup;
+ }
+ /*
+ * buffer is nowhere to be found, read from the disk
+ */
+ break;
+ }
+ splx(s);
+
+ reqaddr = vnode_pager_addr(vp, foff);
+ s = splbio();
+ /*
+ * Make sure that our I/O request is contiguous.
+ * Scan backward and stop for the first discontiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ failflag = 0;
+ first = reqpage;
+ for (i = reqpage - 1; i >= 0; --i) {
+ if (failflag ||
+ incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) ||
+ (vnode_pager_addr(vp, m[i]->offset + paging_offset))
+ != reqaddr + (i - reqpage) * PAGE_SIZE) {
+ vnode_pager_freepage(m[i]);
+ failflag = 1;
+ } else {
+ first = i;
+ }
+ }
+
+ /*
+ * Scan forward and stop for the first non-contiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ failflag = 0;
+ last = reqpage + 1;
+ for (i = reqpage + 1; i < count; i++) {
+ if (failflag ||
+ incore(vp, (foff + (i - reqpage) * PAGE_SIZE) / bsize) ||
+ (vnode_pager_addr(vp, m[i]->offset + paging_offset))
+ != reqaddr + (i - reqpage) * PAGE_SIZE) {
+ vnode_pager_freepage(m[i]);
+ failflag = 1;
+ } else {
+ last = i + 1;
+ }
+ }
+ splx(s);
+
+ /*
+ * the first and last page have been calculated now, move input
+ * pages to be zero based...
+ */
+ count = last;
+ if (first != 0) {
+ for (i = first; i < count; i++) {
+ m[i - first] = m[i];
+ }
+ count -= first;
+ reqpage -= first;
+ }
+
+ /*
+ * calculate the file virtual address for the transfer
+ */
+ foff = m[0]->offset + paging_offset;
+ /*
+ * and get the disk physical address (in bytes)
+ */
+ firstaddr = vnode_pager_addr(vp, foff);
+
+ /*
+ * calculate the size of the transfer
+ */
+ size = count * PAGE_SIZE;
+ if ((foff + size) > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+
+ /*
+ * round up physical size for real devices
+ */
+ if( dp->v_type == VBLK || dp->v_type == VCHR)
+ size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+
+ /*
+ * and map the pages to be read into the kva
+ */
+ for (i = 0; i < count; i++)
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+
+ pmap_update();
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ /* B_PHYS is not set, but it is nice to fill this in */
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = firstaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ bp->b_bcount = size;
+ bp->b_bufsize = size;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ s = splbio();
+ /* we definitely need to be at splbio here */
+
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnread", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ if (!error) {
+ if (size != count * PAGE_SIZE)
+ bzero((caddr_t)kva + size, PAGE_SIZE * count - size);
+ }
+
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+
+finishup:
+ for (i = 0; i < count; i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ if (i != reqpage) {
+ /*
+ * whether or not to leave the page activated
+ * is up in the air, but we should put the page
+ * on a page queue somewhere. (it already is in
+ * the object).
+ * Result: It appears that emperical results show
+ * that deactivating pages is best.
+ */
+ /*
+ * just in case someone was asking for this
+ * page we now tell them that it is ok to use
+ */
+ if (!error) {
+ vm_page_deactivate(m[i]);
+ PAGE_WAKEUP(m[i]);
+ m[i]->flags &= ~PG_FAKE;
+ m[i]->act_count = 2;
+ } else {
+ vnode_pager_freepage(m[i]);
+ }
+ }
+ }
+ if (error) {
+ printf("vnode pager read error: %d\n", error);
+ }
+ if (errtype)
+ return error;
+ return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
+}
+
+/*
+ * old-style vnode pager output routine
+ */
+int
+vnode_pager_output_old(vnp, m)
+ register vn_pager_t vnp;
+ vm_page_t m;
+{
+ vm_offset_t foff;
+ vm_offset_t kva;
+ vm_offset_t size;
+ struct iovec aiov;
+ struct uio auio;
+ struct vnode *vp;
+ int error;
+
+ vp = vnp->vnp_vp;
+ foff = m->offset + m->object->paging_offset;
+ /*
+ * Return failure if beyond current EOF
+ */
+ if (foff >= vnp->vnp_size) {
+ return VM_PAGER_BAD;
+ } else {
+ size = PAGE_SIZE;
+ if (foff + size > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+/*
+ * Allocate a kernel virtual address and initialize so that
+ * we can use VOP_WRITE routines.
+ */
+ kva = vm_pager_map_page(m);
+ aiov.iov_base = (caddr_t)kva;
+ aiov.iov_len = size;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = foff;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_resid = size;
+ auio.uio_procp = (struct proc *)0;
+
+ error = VOP_WRITE(vp, &auio, 0, curproc->p_ucred);
+
+ if (!error) {
+ if ((size - auio.uio_resid) == 0) {
+ error = EINVAL;
+ }
+ }
+ vm_pager_unmap_page(kva);
+ return error?VM_PAGER_FAIL:VM_PAGER_OK;
+ }
+}
+
+/*
+ * vnode pager output on a small-block file system
+ */
+int
+vnode_pager_output_smlfs(vnp, m)
+ vn_pager_t vnp;
+ vm_page_t m;
+{
+ int i;
+ int s;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t foff;
+ vm_offset_t kva;
+ int fileaddr;
+ int block;
+ vm_offset_t bsize;
+ int run;
+ int error = 0;
+
+ paging_offset = m->object->paging_offset;
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ foff = m->offset + paging_offset;
+
+ VOP_BMAP(vp, foff, &dp, 0, 0);
+ kva = vm_pager_map_page(m);
+ for(i = 0; !error && i < (PAGE_SIZE/bsize); i++) {
+ /*
+ * calculate logical block and offset
+ */
+ fileaddr = vnode_pager_addr(vp, foff + i * bsize);
+ if( fileaddr != -1) {
+ s = splbio();
+ if( bp = incore( vp, (foff/bsize) + i)) {
+ bp = getblk(vp, (foff/bsize) + i, bp->b_bufsize,0, 0);
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ splx(s);
+
+ bp = getpbuf();
+ VHOLD(vp);
+
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_CALL | B_WRITE;
+ bp->b_iodone = vnode_pager_iodone;
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva + i * bsize;
+ bp->b_blkno = fileaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ ++dp->v_numoutput;
+ /* for NFS */
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = bsize;
+ bp->b_bcount = bsize;
+ bp->b_bufsize = bsize;
+
+ /* do the input */
+ VOP_STRATEGY(bp);
+
+ /* we definitely need to be at splbio here */
+
+ s = splbio();
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnswrt", 0);
+ }
+ splx(s);
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+ }
+ }
+ vm_pager_unmap_page(kva);
+ if( error)
+ return VM_PAGER_FAIL;
+ else
+ return VM_PAGER_OK;
+}
+
+/*
+ * generic vnode pager output routine
+ */
+int
+vnode_pager_output(vnp, m, count, rtvals)
+ vn_pager_t vnp;
+ vm_page_t *m;
+ int count;
+ int *rtvals;
+{
+ int i,j;
+ vm_offset_t kva, foff;
+ int size;
+ struct proc *p = curproc; /* XXX */
+ vm_object_t object;
+ vm_offset_t paging_offset;
+ struct vnode *dp, *vp;
+ struct buf *bp;
+ vm_offset_t mapsize;
+ vm_offset_t reqaddr;
+ int run;
+ int bsize;
+ int s;
+
+ int error = 0;
+
+retryoutput:
+ object = m[0]->object; /* all vm_page_t items are in same object */
+ paging_offset = object->paging_offset;
+
+ vp = vnp->vnp_vp;
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+
+ for(i=0;i<count;i++)
+ rtvals[i] = VM_PAGER_AGAIN;
+
+ /*
+ * if the filesystem does not have a bmap, then use the
+ * old code
+ */
+ if (VOP_BMAP(vp, m[0]->offset+paging_offset, &dp, 0, 0)) {
+
+ rtvals[0] = vnode_pager_output_old(vnp, m[0]);
+
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[0]));
+ m[0]->flags |= PG_CLEAN;
+ m[0]->flags &= ~PG_LAUNDRY;
+ return rtvals[0];
+ }
+
+ /*
+ * if the filesystem has a small blocksize, then use
+ * the small block filesystem output code
+ */
+ if ((bsize < PAGE_SIZE) &&
+ (vp->v_mount->mnt_stat.f_type != MOUNT_NFS)) {
+
+ for(i=0;i<count;i++) {
+ rtvals[i] = vnode_pager_output_smlfs(vnp, m[i]);
+ if( rtvals[i] == VM_PAGER_OK) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ }
+ }
+ return rtvals[0];
+ }
+
+ /*
+ * get some kva for the output
+ */
+ kva = kmem_alloc_pageable(pager_map, (mapsize = count*PAGE_SIZE));
+ if( !kva) {
+ kva = kmem_alloc_pageable(pager_map, (mapsize = PAGE_SIZE));
+ count = 1;
+ if( !kva)
+ return rtvals[0];
+ }
+
+ for(i=0;i<count;i++) {
+ foff = m[i]->offset + paging_offset;
+ if (foff >= vnp->vnp_size) {
+ for(j=i;j<count;j++)
+ rtvals[j] = VM_PAGER_BAD;
+ count = i;
+ break;
+ }
+ }
+ if (count == 0) {
+ return rtvals[0];
+ }
+ foff = m[0]->offset + paging_offset;
+ reqaddr = vnode_pager_addr(vp, foff);
+ /*
+ * Scan forward and stop for the first non-contiguous
+ * entry or stop for a page being in buffer cache.
+ */
+ for (i = 1; i < count; i++) {
+ if ( vnode_pager_addr(vp, m[i]->offset + paging_offset)
+ != reqaddr + i * PAGE_SIZE) {
+ count = i;
+ break;
+ }
+ }
+
+ /*
+ * calculate the size of the transfer
+ */
+ size = count * PAGE_SIZE;
+ if ((foff + size) > vnp->vnp_size)
+ size = vnp->vnp_size - foff;
+
+ /*
+ * round up physical size for real devices
+ */
+ if( dp->v_type == VBLK || dp->v_type == VCHR)
+ size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+
+ /*
+ * and map the pages to be read into the kva
+ */
+ for (i = 0; i < count; i++)
+ pmap_kenter( kva + PAGE_SIZE * i, VM_PAGE_TO_PHYS(m[i]));
+ pmap_update();
+/*
+ printf("vnode: writing foff: %d, devoff: %d, size: %d\n",
+ foff, reqaddr, size);
+*/
+ /*
+ * next invalidate the incore vfs_bio data
+ */
+ for (i = 0; i < count; i++) {
+ int filblock = (foff + i * PAGE_SIZE) / bsize;
+ struct buf *fbp;
+
+ s = splbio();
+ if( fbp = incore( vp, filblock)) {
+ /* printf("invalidating: %d\n", filblock); */
+ fbp = getblk(vp, filblock, fbp->b_bufsize,0,0);
+ fbp->b_flags |= B_INVAL;
+ brelse(fbp);
+ }
+ splx(s);
+ }
+
+
+ bp = getpbuf();
+ VHOLD(vp);
+ /* build a minimal buffer header */
+ bp->b_flags = B_BUSY | B_WRITE | B_CALL;
+ bp->b_iodone = vnode_pager_iodone;
+ /* B_PHYS is not set, but it is nice to fill this in */
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+
+ if( bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if( bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_un.b_addr = (caddr_t) kva;
+ bp->b_blkno = reqaddr / DEV_BSIZE;
+ bgetvp(dp, bp);
+ ++dp->v_numoutput;
+
+ /* for NFS */
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = size;
+
+ bp->b_bcount = size;
+ bp->b_bufsize = size;
+
+ /* do the output */
+ VOP_STRATEGY(bp);
+
+ s = splbio();
+
+ /* we definitely need to be at splbio here */
+
+ while ((bp->b_flags & B_DONE) == 0) {
+ tsleep((caddr_t)bp, PVM, "vnwrite", 0);
+ }
+ splx(s);
+
+ if ((bp->b_flags & B_ERROR) != 0)
+ error = EIO;
+
+ pmap_remove(vm_map_pmap(pager_map), kva, kva + PAGE_SIZE * count);
+ kmem_free_wakeup(pager_map, kva, mapsize);
+
+ /*
+ * free the buffer header back to the swap buffer pool
+ */
+ relpbuf(bp);
+ HOLDRELE(vp);
+
+ if( !error) {
+ for(i=0;i<count;i++) {
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
+ m[i]->flags |= PG_CLEAN;
+ m[i]->flags &= ~PG_LAUNDRY;
+ rtvals[i] = VM_PAGER_OK;
+ }
+ } else if( count != 1) {
+ error = 0;
+ count = 1;
+ goto retryoutput;
+ }
+
+ if (error) {
+ printf("vnode pager write error: %d\n", error);
+ }
+ return (error ? VM_PAGER_FAIL : VM_PAGER_OK);
+}
+
diff --git a/sys/vm/vnode_pager.h b/sys/vm/vnode_pager.h
new file mode 100644
index 0000000..b01dc54
--- /dev/null
+++ b/sys/vm/vnode_pager.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode_pager.h 8.1 (Berkeley) 6/11/93
+ */
+
+#ifndef _VNODE_PAGER_
+#define _VNODE_PAGER_ 1
+
+/*
+ * VNODE pager private data.
+ */
+struct vnpager {
+ int vnp_flags; /* flags */
+ struct vnode *vnp_vp; /* vnode */
+ vm_size_t vnp_size; /* vnode current size */
+};
+typedef struct vnpager *vn_pager_t;
+
+#define VN_PAGER_NULL ((vn_pager_t)0)
+
+#endif /* _VNODE_PAGER_ */
OpenPOWER on IntegriCloud