summaryrefslogtreecommitdiffstats
path: root/sys/vm/swap_pager.c
diff options
context:
space:
mode:
authorrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
committerrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
commit8fb65ce818b3e3c6f165b583b910af24000768a5 (patch)
treeba751e4f2166aefec707c9d7401c7ff432506642 /sys/vm/swap_pager.c
parenta6ce65d368e623088a4c1a29865889f431b15420 (diff)
downloadFreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.zip
FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.tar.gz
BSD 4.4 Lite Kernel Sources
Diffstat (limited to 'sys/vm/swap_pager.c')
-rw-r--r--sys/vm/swap_pager.c1009
1 files changed, 1009 insertions, 0 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
new file mode 100644
index 0000000..899a6cf
--- /dev/null
+++ b/sys/vm/swap_pager.c
@@ -0,0 +1,1009 @@
+/*
+ * Copyright (c) 1990 University of Utah.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
+ *
+ * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
+ */
+
+/*
+ * Quick hack to page to dedicated partition(s).
+ * TODO:
+ * Add multiprocessor locks
+ * Deal with async writes in a better fashion
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/map.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/swap_pager.h>
+
+#define NSWSIZES 16 /* size of swtab */
+#define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */
+#ifndef NPENDINGIO
+#define NPENDINGIO 64 /* max # of pending cleans */
+#endif
+
+#ifdef DEBUG
+int swpagerdebug = 0x100;
+#define SDB_FOLLOW 0x001
+#define SDB_INIT 0x002
+#define SDB_ALLOC 0x004
+#define SDB_IO 0x008
+#define SDB_WRITE 0x010
+#define SDB_FAIL 0x020
+#define SDB_ALLOCBLK 0x040
+#define SDB_FULL 0x080
+#define SDB_ANOM 0x100
+#define SDB_ANOMPANIC 0x200
+#define SDB_CLUSTER 0x400
+#define SDB_PARANOIA 0x800
+#endif
+
+TAILQ_HEAD(swpclean, swpagerclean);
+
+struct swpagerclean {
+ TAILQ_ENTRY(swpagerclean) spc_list;
+ int spc_flags;
+ struct buf *spc_bp;
+ sw_pager_t spc_swp;
+ vm_offset_t spc_kva;
+ vm_page_t spc_m;
+ int spc_npages;
+} swcleanlist[NPENDINGIO];
+typedef struct swpagerclean *swp_clean_t;
+
+/* spc_flags values */
+#define SPC_FREE 0x00
+#define SPC_BUSY 0x01
+#define SPC_DONE 0x02
+#define SPC_ERROR 0x04
+
+struct swtab {
+ vm_size_t st_osize; /* size of object (bytes) */
+ int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */
+#ifdef DEBUG
+ u_long st_inuse; /* number in this range in use */
+ u_long st_usecnt; /* total used of this size */
+#endif
+} swtab[NSWSIZES+1];
+
+#ifdef DEBUG
+int swap_pager_poip; /* pageouts in progress */
+int swap_pager_piip; /* pageins in progress */
+#endif
+
+int swap_pager_maxcluster; /* maximum cluster size */
+int swap_pager_npendingio; /* number of pager clean structs */
+
+struct swpclean swap_pager_inuse; /* list of pending page cleans */
+struct swpclean swap_pager_free; /* list of free pager clean structs */
+struct pagerlst swap_pager_list; /* list of "named" anon regions */
+
+static void swap_pager_init __P((void));
+static vm_pager_t swap_pager_alloc
+ __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
+static void swap_pager_clean __P((int));
+#ifdef DEBUG
+static void swap_pager_clean_check __P((vm_page_t *, int, int));
+#endif
+static void swap_pager_cluster
+ __P((vm_pager_t, vm_offset_t,
+ vm_offset_t *, vm_offset_t *));
+static void swap_pager_dealloc __P((vm_pager_t));
+static int swap_pager_getpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t));
+static int swap_pager_io __P((sw_pager_t, vm_page_t *, int, int));
+static void swap_pager_iodone __P((struct buf *));
+static int swap_pager_putpage
+ __P((vm_pager_t, vm_page_t *, int, boolean_t));
+
+struct pagerops swappagerops = {
+ swap_pager_init,
+ swap_pager_alloc,
+ swap_pager_dealloc,
+ swap_pager_getpage,
+ swap_pager_putpage,
+ swap_pager_haspage,
+ swap_pager_cluster
+};
+
+static void
+swap_pager_init()
+{
+ register swp_clean_t spc;
+ register int i, bsize;
+ extern int dmmin, dmmax;
+ int maxbsize;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_INIT))
+ printf("swpg_init()\n");
+#endif
+ dfltpagerops = &swappagerops;
+ TAILQ_INIT(&swap_pager_list);
+
+ /*
+ * Allocate async IO structures.
+ *
+ * XXX it would be nice if we could do this dynamically based on
+ * the value of nswbuf (since we are ultimately limited by that)
+ * but neither nswbuf or malloc has been initialized yet. So the
+ * structs are statically allocated above.
+ */
+ swap_pager_npendingio = NPENDINGIO;
+
+ /*
+ * Initialize clean lists
+ */
+ TAILQ_INIT(&swap_pager_inuse);
+ TAILQ_INIT(&swap_pager_free);
+ for (i = 0, spc = swcleanlist; i < swap_pager_npendingio; i++, spc++) {
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+ spc->spc_flags = SPC_FREE;
+ }
+
+ /*
+ * Calculate the swap allocation constants.
+ */
+ if (dmmin == 0) {
+ dmmin = DMMIN;
+ if (dmmin < CLBYTES/DEV_BSIZE)
+ dmmin = CLBYTES/DEV_BSIZE;
+ }
+ if (dmmax == 0)
+ dmmax = DMMAX;
+
+ /*
+ * Fill in our table of object size vs. allocation size
+ */
+ bsize = btodb(PAGE_SIZE);
+ if (bsize < dmmin)
+ bsize = dmmin;
+ maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE);
+ if (maxbsize > dmmax)
+ maxbsize = dmmax;
+ for (i = 0; i < NSWSIZES; i++) {
+ swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize));
+ swtab[i].st_bsize = bsize;
+ if (bsize <= btodb(MAXPHYS))
+ swap_pager_maxcluster = dbtob(bsize);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_INIT)
+ printf("swpg_init: ix %d, size %x, bsize %x\n",
+ i, swtab[i].st_osize, swtab[i].st_bsize);
+#endif
+ if (bsize >= maxbsize)
+ break;
+ bsize *= 2;
+ }
+ swtab[i].st_osize = 0;
+ swtab[i].st_bsize = bsize;
+}
+
+/*
+ * Allocate a pager structure and associated resources.
+ * Note that if we are called from the pageout daemon (handle == NULL)
+ * we should not wait for memory as it could resulting in deadlock.
+ */
+static vm_pager_t
+swap_pager_alloc(handle, size, prot, foff)
+ caddr_t handle;
+ register vm_size_t size;
+ vm_prot_t prot;
+ vm_offset_t foff;
+{
+ register vm_pager_t pager;
+ register sw_pager_t swp;
+ struct swtab *swt;
+ int waitok;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+ printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot);
+#endif
+ /*
+ * If this is a "named" anonymous region, look it up and
+ * return the appropriate pager if it exists.
+ */
+ if (handle) {
+ pager = vm_pager_lookup(&swap_pager_list, handle);
+ if (pager != NULL) {
+ /*
+ * Use vm_object_lookup to gain a reference
+ * to the object and also to remove from the
+ * object cache.
+ */
+ if (vm_object_lookup(pager) == NULL)
+ panic("swap_pager_alloc: bad object");
+ return(pager);
+ }
+ }
+ /*
+ * Pager doesn't exist, allocate swap management resources
+ * and initialize.
+ */
+ waitok = handle ? M_WAITOK : M_NOWAIT;
+ pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
+ if (pager == NULL)
+ return(NULL);
+ swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
+ if (swp == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_alloc: swpager malloc failed\n");
+#endif
+ free((caddr_t)pager, M_VMPAGER);
+ return(NULL);
+ }
+ size = round_page(size);
+ for (swt = swtab; swt->st_osize; swt++)
+ if (size <= swt->st_osize)
+ break;
+#ifdef DEBUG
+ swt->st_inuse++;
+ swt->st_usecnt++;
+#endif
+ swp->sw_osize = size;
+ swp->sw_bsize = swt->st_bsize;
+ swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
+ swp->sw_blocks = (sw_blk_t)
+ malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
+ M_VMPGDATA, M_NOWAIT);
+ if (swp->sw_blocks == NULL) {
+ free((caddr_t)swp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_alloc: sw_blocks malloc failed\n");
+ swt->st_inuse--;
+ swt->st_usecnt--;
+#endif
+ return(FALSE);
+ }
+ bzero((caddr_t)swp->sw_blocks,
+ swp->sw_nblocks * sizeof(*swp->sw_blocks));
+ swp->sw_poip = 0;
+ if (handle) {
+ vm_object_t object;
+
+ swp->sw_flags = SW_NAMED;
+ TAILQ_INSERT_TAIL(&swap_pager_list, pager, pg_list);
+ /*
+ * Consistant with other pagers: return with object
+ * referenced. Can't do this with handle == NULL
+ * since it might be the pageout daemon calling.
+ */
+ object = vm_object_allocate(size);
+ vm_object_enter(object, pager);
+ vm_object_setpager(object, pager, 0, FALSE);
+ } else {
+ swp->sw_flags = 0;
+ pager->pg_list.tqe_next = NULL;
+ pager->pg_list.tqe_prev = NULL;
+ }
+ pager->pg_handle = handle;
+ pager->pg_ops = &swappagerops;
+ pager->pg_type = PG_SWAP;
+ pager->pg_flags = PG_CLUSTERPUT;
+ pager->pg_data = swp;
+
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ALLOC)
+ printf("swpg_alloc: pg_data %x, %x of %x at %x\n",
+ swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
+#endif
+ return(pager);
+}
+
+static void
+swap_pager_dealloc(pager)
+ vm_pager_t pager;
+{
+ register int i;
+ register sw_blk_t bp;
+ register sw_pager_t swp;
+ struct swtab *swt;
+ int s;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC))
+ printf("swpg_dealloc(%x)\n", pager);
+#endif
+ /*
+ * Remove from list right away so lookups will fail if we
+ * block for pageout completion.
+ */
+ swp = (sw_pager_t) pager->pg_data;
+ if (swp->sw_flags & SW_NAMED) {
+ TAILQ_REMOVE(&swap_pager_list, pager, pg_list);
+ swp->sw_flags &= ~SW_NAMED;
+ }
+#ifdef DEBUG
+ for (swt = swtab; swt->st_osize; swt++)
+ if (swp->sw_osize <= swt->st_osize)
+ break;
+ swt->st_inuse--;
+#endif
+
+ /*
+ * Wait for all pageouts to finish and remove
+ * all entries from cleaning list.
+ */
+ s = splbio();
+ while (swp->sw_poip) {
+ swp->sw_flags |= SW_WANTED;
+ (void) tsleep(swp, PVM, "swpgdealloc", 0);
+ }
+ splx(s);
+ swap_pager_clean(B_WRITE);
+
+ /*
+ * Free left over swap blocks
+ */
+ for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++)
+ if (bp->swb_block) {
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL))
+ printf("swpg_dealloc: blk %x\n",
+ bp->swb_block);
+#endif
+ rmfree(swapmap, swp->sw_bsize, bp->swb_block);
+ }
+ /*
+ * Free swap management resources
+ */
+ free((caddr_t)swp->sw_blocks, M_VMPGDATA);
+ free((caddr_t)swp, M_VMPGDATA);
+ free((caddr_t)pager, M_VMPAGER);
+}
+
+static int
+swap_pager_getpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_getpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ return(swap_pager_io((sw_pager_t)pager->pg_data,
+ mlist, npages, B_READ));
+}
+
+static int
+swap_pager_putpage(pager, mlist, npages, sync)
+ vm_pager_t pager;
+ vm_page_t *mlist;
+ int npages;
+ boolean_t sync;
+{
+ int flags;
+
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_putpage(%x, %x, %x, %x)\n",
+ pager, mlist, npages, sync);
+#endif
+ if (pager == NULL) {
+ swap_pager_clean(B_WRITE);
+ return (VM_PAGER_OK); /* ??? */
+ }
+ flags = B_WRITE;
+ if (!sync)
+ flags |= B_ASYNC;
+ return(swap_pager_io((sw_pager_t)pager->pg_data,
+ mlist, npages, flags));
+}
+
+static boolean_t
+swap_pager_haspage(pager, offset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+{
+ register sw_pager_t swp;
+ register sw_blk_t swb;
+ int ix;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("swpg_haspage(%x, %x) ", pager, offset);
+#endif
+ swp = (sw_pager_t) pager->pg_data;
+ ix = offset / dbtob(swp->sw_bsize);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("swpg_haspage: %x bad offset %x, ix %x\n",
+ swp->sw_blocks, offset, ix);
+#endif
+ return(FALSE);
+ }
+ swb = &swp->sw_blocks[ix];
+ if (swb->swb_block)
+ ix = atop(offset % dbtob(swp->sw_bsize));
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ALLOCBLK)
+ printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix);
+ if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK))
+ printf("-> %c\n",
+ "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
+#endif
+ if (swb->swb_block && (swb->swb_mask & (1 << ix)))
+ return(TRUE);
+ return(FALSE);
+}
+
+static void
+swap_pager_cluster(pager, offset, loffset, hoffset)
+ vm_pager_t pager;
+ vm_offset_t offset;
+ vm_offset_t *loffset;
+ vm_offset_t *hoffset;
+{
+ sw_pager_t swp;
+ register int bsize;
+ vm_offset_t loff, hoff;
+
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+ printf("swpg_cluster(%x, %x) ", pager, offset);
+#endif
+ swp = (sw_pager_t) pager->pg_data;
+ bsize = dbtob(swp->sw_bsize);
+ if (bsize > swap_pager_maxcluster)
+ bsize = swap_pager_maxcluster;
+
+ loff = offset - (offset % bsize);
+ if (loff >= swp->sw_osize)
+ panic("swap_pager_cluster: bad offset");
+
+ hoff = loff + bsize;
+ if (hoff > swp->sw_osize)
+ hoff = swp->sw_osize;
+
+ *loffset = loff;
+ *hoffset = hoff;
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FOLLOW|SDB_CLUSTER))
+ printf("returns [%x-%x]\n", loff, hoff);
+#endif
+}
+
+/*
+ * Scaled down version of swap().
+ * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed.
+ * BOGUS: lower level IO routines expect a KVA so we have to map our
+ * provided physical page into the KVA to keep them happy.
+ */
+static int
+swap_pager_io(swp, mlist, npages, flags)
+ register sw_pager_t swp;
+ vm_page_t *mlist;
+ int npages;
+ int flags;
+{
+ register struct buf *bp;
+ register sw_blk_t swb;
+ register int s;
+ int ix, mask;
+ boolean_t rv;
+ vm_offset_t kva, off;
+ swp_clean_t spc;
+ vm_page_t m;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return (VM_PAGER_FAIL); /* XXX: correct return? */
+ if (swpagerdebug & (SDB_FOLLOW|SDB_IO))
+ printf("swpg_io(%x, %x, %x, %x)\n", swp, mlist, npages, flags);
+ if (flags & B_READ) {
+ if (flags & B_ASYNC)
+ panic("swap_pager_io: cannot do ASYNC reads");
+ if (npages != 1)
+ panic("swap_pager_io: cannot do clustered reads");
+ }
+#endif
+
+ /*
+ * First determine if the page exists in the pager if this is
+ * a sync read. This quickly handles cases where we are
+ * following shadow chains looking for the top level object
+ * with the page.
+ */
+ m = *mlist;
+ off = m->offset + m->object->paging_offset;
+ ix = off / dbtob(swp->sw_bsize);
+ if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) {
+#ifdef DEBUG
+ if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
+ printf("swap_pager_io: no swap block on write\n");
+ return(VM_PAGER_BAD);
+ }
+#endif
+ return(VM_PAGER_FAIL);
+ }
+ swb = &swp->sw_blocks[ix];
+ off = off % dbtob(swp->sw_bsize);
+ if ((flags & B_READ) &&
+ (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
+ return(VM_PAGER_FAIL);
+
+ /*
+ * For reads (pageins) and synchronous writes, we clean up
+ * all completed async pageouts.
+ */
+ if ((flags & B_ASYNC) == 0) {
+ s = splbio();
+ swap_pager_clean(flags&B_READ);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_PARANOIA)
+ swap_pager_clean_check(mlist, npages, flags&B_READ);
+#endif
+ splx(s);
+ }
+ /*
+ * For async writes (pageouts), we cleanup completed pageouts so
+ * that all available resources are freed. Also tells us if this
+ * page is already being cleaned. If it is, or no resources
+ * are available, we try again later.
+ */
+ else {
+ swap_pager_clean(B_WRITE);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_PARANOIA)
+ swap_pager_clean_check(mlist, npages, B_WRITE);
+#endif
+ if (swap_pager_free.tqh_first == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("%s: no available io headers\n",
+ "swap_pager_io");
+#endif
+ return(VM_PAGER_AGAIN);
+ }
+ }
+
+ /*
+ * Allocate a swap block if necessary.
+ */
+ if (swb->swb_block == 0) {
+ swb->swb_block = rmalloc(swapmap, swp->sw_bsize);
+ if (swb->swb_block == 0) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("swpg_io: rmalloc of %x failed\n",
+ swp->sw_bsize);
+#endif
+ /*
+ * XXX this is technically a resource shortage that
+ * should return AGAIN, but the situation isn't likely
+ * to be remedied just by delaying a little while and
+ * trying again (the pageout daemon's current response
+ * to AGAIN) so we just return FAIL.
+ */
+ return(VM_PAGER_FAIL);
+ }
+#ifdef DEBUG
+ if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
+ printf("swpg_io: %x alloc blk %x at ix %x\n",
+ swp->sw_blocks, swb->swb_block, ix);
+#endif
+ }
+
+ /*
+ * Allocate a kernel virtual address and initialize so that PTE
+ * is available for lower level IO drivers.
+ */
+ kva = vm_pager_map_pages(mlist, npages, !(flags & B_ASYNC));
+ if (kva == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_FAIL)
+ printf("%s: no KVA space to map pages\n",
+ "swap_pager_io");
+#endif
+ return(VM_PAGER_AGAIN);
+ }
+
+ /*
+ * Get a swap buffer header and initialize it.
+ */
+ s = splbio();
+ while (bswlist.b_actf == NULL) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_ANOM)
+ printf("swap_pager_io: wait on swbuf for %x (%d)\n",
+ m, flags);
+#endif
+ bswlist.b_flags |= B_WANTED;
+ tsleep((caddr_t)&bswlist, PSWP+1, "swpgiobuf", 0);
+ }
+ bp = bswlist.b_actf;
+ bswlist.b_actf = bp->b_actf;
+ splx(s);
+ bp->b_flags = B_BUSY | (flags & B_READ);
+ bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
+ bp->b_data = (caddr_t)kva;
+ bp->b_blkno = swb->swb_block + btodb(off);
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+ bp->b_bcount = npages * PAGE_SIZE;
+
+ /*
+ * For writes we set up additional buffer fields, record a pageout
+ * in progress and mark that these swap blocks are now allocated.
+ */
+ if ((bp->b_flags & B_READ) == 0) {
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = npages * PAGE_SIZE;
+ swapdev_vp->v_numoutput++;
+ s = splbio();
+ swp->sw_poip++;
+ splx(s);
+ mask = (~(~0 << npages)) << atop(off);
+#ifdef DEBUG
+ swap_pager_poip++;
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_io: write: bp=%x swp=%x poip=%d\n",
+ bp, swp, swp->sw_poip);
+ if ((swpagerdebug & SDB_ALLOCBLK) &&
+ (swb->swb_mask & mask) != mask)
+ printf("swpg_io: %x write %d pages at %x+%x\n",
+ swp->sw_blocks, npages, swb->swb_block,
+ atop(off));
+ if (swpagerdebug & SDB_CLUSTER)
+ printf("swpg_io: off=%x, npg=%x, mask=%x, bmask=%x\n",
+ off, npages, mask, swb->swb_mask);
+#endif
+ swb->swb_mask |= mask;
+ }
+ /*
+ * If this is an async write we set up still more buffer fields
+ * and place a "cleaning" entry on the inuse queue.
+ */
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+ if (swap_pager_free.tqh_first == NULL)
+ panic("swpg_io: lost spc");
+#endif
+ spc = swap_pager_free.tqh_first;
+ TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+ if (spc->spc_flags != SPC_FREE)
+ panic("swpg_io: bad free spc");
+#endif
+ spc->spc_flags = SPC_BUSY;
+ spc->spc_bp = bp;
+ spc->spc_swp = swp;
+ spc->spc_kva = kva;
+ /*
+ * Record the first page. This allows swap_pager_clean
+ * to efficiently handle the common case of a single page.
+ * For clusters, it allows us to locate the object easily
+ * and we then reconstruct the rest of the mlist from spc_kva.
+ */
+ spc->spc_m = m;
+ spc->spc_npages = npages;
+ bp->b_flags |= B_CALL;
+ bp->b_iodone = swap_pager_iodone;
+ s = splbio();
+ TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
+ splx(s);
+ }
+
+ /*
+ * Finally, start the IO operation.
+ * If it is async we are all done, otherwise we must wait for
+ * completion and cleanup afterwards.
+ */
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n",
+ bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m));
+#endif
+ VOP_STRATEGY(bp);
+ if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) {
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO started: bp %x\n", bp);
+#endif
+ return(VM_PAGER_PEND);
+ }
+ s = splbio();
+#ifdef DEBUG
+ if (flags & B_READ)
+ swap_pager_piip++;
+ else
+ swap_pager_poip++;
+#endif
+ while ((bp->b_flags & B_DONE) == 0)
+ (void) tsleep(bp, PVM, "swpgio", 0);
+ if ((flags & B_READ) == 0)
+ --swp->sw_poip;
+#ifdef DEBUG
+ if (flags & B_READ)
+ --swap_pager_piip;
+ else
+ --swap_pager_poip;
+#endif
+ rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+ bp->b_actf = bswlist.b_actf;
+ bswlist.b_actf = bp;
+ if (bp->b_vp)
+ brelvp(bp);
+ if (bswlist.b_flags & B_WANTED) {
+ bswlist.b_flags &= ~B_WANTED;
+ wakeup(&bswlist);
+ }
+ if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) {
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ }
+ splx(s);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_IO)
+ printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv);
+ if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR)
+ printf("swpg_io: IO error\n");
+#endif
+ vm_pager_unmap_pages(kva, npages);
+ return(rv);
+}
+
+static void
+swap_pager_clean(rw)
+ int rw;
+{
+ register swp_clean_t spc;
+ register int s, i;
+ vm_object_t object;
+ vm_page_t m;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_clean(%x)\n", rw);
+#endif
+
+ for (;;) {
+ /*
+ * Look up and removal from inuse list must be done
+ * at splbio() to avoid conflicts with swap_pager_iodone.
+ */
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next) {
+ /*
+ * If the operation is done, remove it from the
+ * list and process it.
+ *
+ * XXX if we can't get the object lock we also
+ * leave it on the list and try again later.
+ * Is there something better we could do?
+ */
+ if ((spc->spc_flags & SPC_DONE) &&
+ vm_object_lock_try(spc->spc_m->object)) {
+ TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
+ break;
+ }
+ }
+ splx(s);
+
+ /*
+ * No operations done, thats all we can do for now.
+ */
+ if (spc == NULL)
+ break;
+
+ /*
+ * Found a completed operation so finish it off.
+ * Note: no longer at splbio since entry is off the list.
+ */
+ m = spc->spc_m;
+ object = m->object;
+
+ /*
+ * Process each page in the cluster.
+ * The first page is explicitly kept in the cleaning
+ * entry, others must be reconstructed from the KVA.
+ */
+ for (i = 0; i < spc->spc_npages; i++) {
+ if (i)
+ m = vm_pager_atop(spc->spc_kva + ptoa(i));
+ /*
+ * If no error mark as clean and inform the pmap
+ * system. If there was an error, mark as dirty
+ * so we will try again.
+ *
+ * XXX could get stuck doing this, should give up
+ * after awhile.
+ */
+ if (spc->spc_flags & SPC_ERROR) {
+ printf("%s: clean of page %x failed\n",
+ "swap_pager_clean",
+ VM_PAGE_TO_PHYS(m));
+ m->flags |= PG_LAUNDRY;
+ } else {
+ m->flags |= PG_CLEAN;
+ pmap_clear_modify(VM_PAGE_TO_PHYS(m));
+ }
+ m->flags &= ~PG_BUSY;
+ PAGE_WAKEUP(m);
+ }
+
+ /*
+ * Done with the object, decrement the paging count
+ * and unlock it.
+ */
+ if (--object->paging_in_progress == 0)
+ wakeup(object);
+ vm_object_unlock(object);
+
+ /*
+ * Free up KVM used and put the entry back on the list.
+ */
+ vm_pager_unmap_pages(spc->spc_kva, spc->spc_npages);
+ spc->spc_flags = SPC_FREE;
+ TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
+#ifdef DEBUG
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_clean: free spc %x\n", spc);
+#endif
+ }
+}
+
+#ifdef DEBUG
+static void
+swap_pager_clean_check(mlist, npages, rw)
+ vm_page_t *mlist;
+ int npages;
+ int rw;
+{
+ register swp_clean_t spc;
+ boolean_t bad;
+ int i, j, s;
+ vm_page_t m;
+
+ if (panicstr)
+ return;
+
+ bad = FALSE;
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next) {
+ for (j = 0; j < spc->spc_npages; j++) {
+ m = vm_pager_atop(spc->spc_kva + ptoa(j));
+ for (i = 0; i < npages; i++)
+ if (m == mlist[i]) {
+ if (swpagerdebug & SDB_ANOM)
+ printf(
+ "swpg_clean_check: %s: page %x on list, flags %x\n",
+ rw == B_WRITE ? "write" : "read", mlist[i], spc->spc_flags);
+ bad = TRUE;
+ }
+ }
+ }
+ splx(s);
+ if (bad)
+ panic("swpg_clean_check");
+}
+#endif
+
+static void
+swap_pager_iodone(bp)
+ register struct buf *bp;
+{
+ register swp_clean_t spc;
+ daddr_t blk;
+ int s;
+
+#ifdef DEBUG
+ /* save panic time state */
+ if ((swpagerdebug & SDB_ANOMPANIC) && panicstr)
+ return;
+ if (swpagerdebug & SDB_FOLLOW)
+ printf("swpg_iodone(%x)\n", bp);
+#endif
+ s = splbio();
+ for (spc = swap_pager_inuse.tqh_first;
+ spc != NULL;
+ spc = spc->spc_list.tqe_next)
+ if (spc->spc_bp == bp)
+ break;
+#ifdef DEBUG
+ if (spc == NULL)
+ panic("swap_pager_iodone: bp not found");
+#endif
+
+ spc->spc_flags &= ~SPC_BUSY;
+ spc->spc_flags |= SPC_DONE;
+ if (bp->b_flags & B_ERROR)
+ spc->spc_flags |= SPC_ERROR;
+ spc->spc_bp = NULL;
+ blk = bp->b_blkno;
+
+#ifdef DEBUG
+ --swap_pager_poip;
+ if (swpagerdebug & SDB_WRITE)
+ printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n",
+ bp, spc->spc_swp, spc->spc_swp->sw_flags,
+ spc, spc->spc_swp->sw_poip);
+#endif
+
+ spc->spc_swp->sw_poip--;
+ if (spc->spc_swp->sw_flags & SW_WANTED) {
+ spc->spc_swp->sw_flags &= ~SW_WANTED;
+ wakeup(spc->spc_swp);
+ }
+
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY);
+ bp->b_actf = bswlist.b_actf;
+ bswlist.b_actf = bp;
+ if (bp->b_vp)
+ brelvp(bp);
+ if (bswlist.b_flags & B_WANTED) {
+ bswlist.b_flags &= ~B_WANTED;
+ wakeup(&bswlist);
+ }
+ wakeup(&vm_pages_needed);
+ splx(s);
+}
OpenPOWER on IntegriCloud