summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormarkj <markj@FreeBSD.org>2017-05-23 07:27:30 +0000
committermarkj <markj@FreeBSD.org>2017-05-23 07:27:30 +0000
commitb1d352b152cd31bddbbb83670b7f45a213ca52cb (patch)
treed6aa7d62302443402a45530a50b241fd0f5128b9 /sys
parenta6749049ae872846ba11cc9c7c14e8f8d61425f1 (diff)
downloadFreeBSD-src-b1d352b152cd31bddbbb83670b7f45a213ca52cb.zip
FreeBSD-src-b1d352b152cd31bddbbb83670b7f45a213ca52cb.tar.gz
MFC r308474, r308691, r309203, r309365, r309703, r309898, r310720,
r308489, r308706: Add PQ_LAUNDRY and remove PG_CACHED pages.
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c42
-rw-r--r--sys/arm64/arm64/pmap.c31
-rw-r--r--sys/cddl/compat/opensolaris/sys/vnode.h3
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c4
-rw-r--r--sys/fs/tmpfs/tmpfs_subr.c9
-rw-r--r--sys/i386/i386/pmap.c42
-rw-r--r--sys/kern/kern_exec.c2
-rw-r--r--sys/kern/uipc_shm.c9
-rw-r--r--sys/sys/vmmeter.h24
-rw-r--r--sys/vm/_vm_radix.h4
-rw-r--r--sys/vm/swap_pager.c33
-rw-r--r--sys/vm/vm_fault.c12
-rw-r--r--sys/vm/vm_map.c4
-rw-r--r--sys/vm/vm_meter.c38
-rw-r--r--sys/vm/vm_mmap.c3
-rw-r--r--sys/vm/vm_object.c56
-rw-r--r--sys/vm/vm_object.h19
-rw-r--r--sys/vm/vm_page.c764
-rw-r--r--sys/vm/vm_page.h49
-rw-r--r--sys/vm/vm_pageout.c693
-rw-r--r--sys/vm/vm_phys.c2
-rw-r--r--sys/vm/vm_radix.c75
-rw-r--r--sys/vm/vm_radix.h2
-rw-r--r--sys/vm/vm_reserv.c109
-rw-r--r--sys/vm/vm_reserv.h3
-rw-r--r--sys/vm/vnode_pager.c10
26 files changed, 938 insertions, 1104 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 45d4c1e..551413f 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -614,7 +614,6 @@ static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
-static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
struct rwlock **lockp);
@@ -625,7 +624,7 @@ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
struct spglist *free, struct rwlock **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
-static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
+static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
@@ -2218,29 +2217,17 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
}
/*
- * Looks for a page table page mapping the specified virtual address in the
- * specified pmap's collection of idle page table pages. Returns NULL if there
- * is no page table page corresponding to the specified virtual address.
+ * Removes the page table page mapping the specified virtual address from the
+ * specified pmap's collection of idle page table pages, and returns it.
+ * Otherwise, returns NULL if there is no page table page corresponding to the
+ * specified virtual address.
*/
static __inline vm_page_t
-pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
+pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- return (vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)));
-}
-
-/*
- * Removes the specified page table page from the specified pmap's collection
- * of idle page table pages. The specified page table page must be a member of
- * the pmap's collection.
- */
-static __inline void
-pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
-{
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- vm_radix_remove(&pmap->pm_root, mpte->pindex);
+ return (vm_radix_remove(&pmap->pm_root, pmap_pde_pindex(va)));
}
/*
@@ -3460,10 +3447,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
oldpde = *pde;
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
- if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
- NULL)
- pmap_remove_pt_page(pmap, mpte);
- else {
+ if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) ==
+ NULL) {
KASSERT((oldpde & PG_W) == 0,
("pmap_demote_pde: page table page for a wired mapping"
" is missing"));
@@ -3577,11 +3562,10 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mpte = pmap_lookup_pt_page(pmap, va);
+ mpte = pmap_remove_pt_page(pmap, va);
if (mpte == NULL)
panic("pmap_remove_kernel_pde: Missing pt page.");
- pmap_remove_pt_page(pmap, mpte);
mptepa = VM_PAGE_TO_PHYS(mpte);
newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V;
@@ -3668,9 +3652,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
if (pmap == kernel_pmap) {
pmap_remove_kernel_pde(pmap, pdq, sva);
} else {
- mpte = pmap_lookup_pt_page(pmap, sva);
+ mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
pmap_resident_count_dec(pmap, 1);
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pde: pte page wire count error"));
@@ -5533,9 +5516,8 @@ pmap_remove_pages(pmap_t pmap)
TAILQ_EMPTY(&mt->md.pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
- mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
+ mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
pmap_resident_count_dec(pmap, 1);
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pages: pte page wire count error"));
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 6839758..5e2e7fc 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -2514,29 +2514,17 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
}
/*
- * Looks for a page table page mapping the specified virtual address in the
- * specified pmap's collection of idle page table pages. Returns NULL if there
- * is no page table page corresponding to the specified virtual address.
+ * Removes the page table page mapping the specified virtual address from the
+ * specified pmap's collection of idle page table pages, and returns it.
+ * Otherwise, returns NULL if there is no page table page corresponding to the
+ * specified virtual address.
*/
static __inline vm_page_t
-pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
+pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- return (vm_radix_lookup(&pmap->pm_root, pmap_l2_pindex(va)));
-}
-
-/*
- * Removes the specified page table page from the specified pmap's collection
- * of idle page table pages. The specified page table page must be a member of
- * the pmap's collection.
- */
-static __inline void
-pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
-{
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- vm_radix_remove(&pmap->pm_root, mpte->pindex);
+ return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
}
/*
@@ -3605,10 +3593,9 @@ pmap_remove_pages(pmap_t pmap)
TAILQ_EMPTY(&mt->md.pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
- ml3 = pmap_lookup_pt_page(pmap,
+ ml3 = pmap_remove_pt_page(pmap,
pv->pv_va);
if (ml3 != NULL) {
- pmap_remove_pt_page(pmap, ml3);
pmap_resident_count_dec(pmap,1);
KASSERT(ml3->wire_count == NL3PG,
("pmap_remove_pages: l3 page wire count error"));
@@ -4381,9 +4368,7 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va,
return (NULL);
}
- if ((ml3 = pmap_lookup_pt_page(pmap, va)) != NULL) {
- pmap_remove_pt_page(pmap, ml3);
- } else {
+ if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) {
ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va),
(VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) |
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED);
diff --git a/sys/cddl/compat/opensolaris/sys/vnode.h b/sys/cddl/compat/opensolaris/sys/vnode.h
index e7a92ae..d15cd88 100644
--- a/sys/cddl/compat/opensolaris/sys/vnode.h
+++ b/sys/cddl/compat/opensolaris/sys/vnode.h
@@ -75,8 +75,7 @@ vn_is_readonly(vnode_t *vp)
#define vn_mountedvfs(vp) ((vp)->v_mountedhere)
#define vn_has_cached_data(vp) \
((vp)->v_object != NULL && \
- ((vp)->v_object->resident_page_count > 0 || \
- !vm_object_cache_is_empty((vp)->v_object)))
+ (vp)->v_object->resident_page_count > 0)
#define vn_exists(vp) do { } while (0)
#define vn_invalid(vp) do { } while (0)
#define vn_renamepath(tdvp, svp, tnm, lentnm) do { } while (0)
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
index 3a44201..b715a48 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
@@ -426,10 +426,6 @@ page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
continue;
}
vm_page_sbusy(pp);
- } else if (pp == NULL) {
- pp = vm_page_alloc(obj, OFF_TO_IDX(start),
- VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
- VM_ALLOC_SBUSY);
} else {
ASSERT(pp != NULL && !pp->valid);
pp = NULL;
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index f507807..2aa879a 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -1401,12 +1401,9 @@ retry:
VM_WAIT;
VM_OBJECT_WLOCK(uobj);
goto retry;
- } else if (m->valid != VM_PAGE_BITS_ALL)
- rv = vm_pager_get_pages(uobj, &m, 1,
- NULL, NULL);
- else
- /* A cached page was reactivated. */
- rv = VM_PAGER_OK;
+ }
+ rv = vm_pager_get_pages(uobj, &m, 1, NULL,
+ NULL);
vm_page_lock(m);
if (rv == VM_PAGER_OK) {
vm_page_deactivate(m);
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index db71c4d..1da9241 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -306,7 +306,6 @@ static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static boolean_t pmap_is_referenced_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde);
-static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
@@ -316,7 +315,7 @@ static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
struct spglist *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
struct spglist *free);
-static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
+static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
struct spglist *free);
static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
@@ -1727,29 +1726,17 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
}
/*
- * Looks for a page table page mapping the specified virtual address in the
- * specified pmap's collection of idle page table pages. Returns NULL if there
- * is no page table page corresponding to the specified virtual address.
+ * Removes the page table page mapping the specified virtual address from the
+ * specified pmap's collection of idle page table pages, and returns it.
+ * Otherwise, returns NULL if there is no page table page corresponding to the
+ * specified virtual address.
*/
static __inline vm_page_t
-pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
+pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
{
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT));
-}
-
-/*
- * Removes the specified page table page from the specified pmap's collection
- * of idle page table pages. The specified page table page must be a member of
- * the pmap's collection.
- */
-static __inline void
-pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
-{
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- vm_radix_remove(&pmap->pm_root, mpte->pindex);
+ return (vm_radix_remove(&pmap->pm_root, va >> PDRSHIFT));
}
/*
@@ -2645,10 +2632,8 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
oldpde = *pde;
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
- if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) !=
- NULL)
- pmap_remove_pt_page(pmap, mpte);
- else {
+ if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) ==
+ NULL) {
KASSERT((oldpde & PG_W) == 0,
("pmap_demote_pde: page table page for a wired mapping"
" is missing"));
@@ -2786,11 +2771,10 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
vm_page_t mpte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mpte = pmap_lookup_pt_page(pmap, va);
+ mpte = pmap_remove_pt_page(pmap, va);
if (mpte == NULL)
panic("pmap_remove_kernel_pde: Missing pt page.");
- pmap_remove_pt_page(pmap, mpte);
mptepa = VM_PAGE_TO_PHYS(mpte);
newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
@@ -2872,9 +2856,8 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
if (pmap == kernel_pmap) {
pmap_remove_kernel_pde(pmap, pdq, sva);
} else {
- mpte = pmap_lookup_pt_page(pmap, sva);
+ mpte = pmap_remove_pt_page(pmap, sva);
if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
pmap->pm_stats.resident_count--;
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pde: pte page wire count error"));
@@ -4616,9 +4599,8 @@ pmap_remove_pages(pmap_t pmap)
if (TAILQ_EMPTY(&mt->md.pv_list))
vm_page_aflag_clear(mt, PGA_WRITEABLE);
}
- mpte = pmap_lookup_pt_page(pmap, pv->pv_va);
+ mpte = pmap_remove_pt_page(pmap, pv->pv_va);
if (mpte != NULL) {
- pmap_remove_pt_page(pmap, mpte);
pmap->pm_stats.resident_count--;
KASSERT(mpte->wire_count == NPTEPG,
("pmap_remove_pages: pte page wire count error"));
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 7d9adb0..1a41aac 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1006,7 +1006,7 @@ exec_map_first_page(imgp)
break;
} else {
ma[i] = vm_page_alloc(object, i,
- VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
+ VM_ALLOC_NORMAL);
if (ma[i] == NULL)
break;
}
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 0a45380..0aee62f 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -455,12 +455,9 @@ retry:
VM_WAIT;
VM_OBJECT_WLOCK(object);
goto retry;
- } else if (m->valid != VM_PAGE_BITS_ALL)
- rv = vm_pager_get_pages(object, &m, 1,
- NULL, NULL);
- else
- /* A cached page was reactivated. */
- rv = VM_PAGER_OK;
+ }
+ rv = vm_pager_get_pages(object, &m, 1, NULL,
+ NULL);
vm_page_lock(m);
if (rv == VM_PAGER_OK) {
vm_page_deactivate(m);
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index 55d3053..517be2d 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -75,9 +75,10 @@ struct vmmeter {
u_int v_vnodepgsin; /* (p) vnode_pager pages paged in */
u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
u_int v_intrans; /* (p) intransit blocking page faults */
- u_int v_reactivated; /* (f) pages reactivated from free list */
+ u_int v_reactivated; /* (p) pages reactivated by the pagedaemon */
u_int v_pdwakeups; /* (p) times daemon has awaken from sleep */
u_int v_pdpages; /* (p) pages analyzed by daemon */
+ u_int v_pdshortfalls; /* (p) page reclamation shortfalls */
u_int v_tcached; /* (p) total pages cached */
u_int v_dfree; /* (p) pages freed by daemon */
@@ -96,6 +97,7 @@ struct vmmeter {
u_int v_active_count; /* (q) pages active */
u_int v_inactive_target; /* (c) pages desired inactive */
u_int v_inactive_count; /* (q) pages inactive */
+ u_int v_laundry_count; /* (q) pages eligible for laundering */
u_int v_cache_count; /* (f) pages on cache queue */
u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
@@ -111,7 +113,6 @@ struct vmmeter {
u_int v_vforkpages; /* (p) VM pages affected by vfork() */
u_int v_rforkpages; /* (p) VM pages affected by rfork() */
u_int v_kthreadpages; /* (p) VM pages affected by fork() by kernel */
- u_int v_spare[2];
};
#ifdef _KERNEL
@@ -184,6 +185,25 @@ vm_paging_needed(void)
(u_int)vm_pageout_wakeup_thresh);
}
+/*
+ * Return the number of pages we need to launder.
+ * A positive number indicates that we have a shortfall of clean pages.
+ */
+static inline int
+vm_laundry_target(void)
+{
+
+ return (vm_paging_target());
+}
+
+/*
+ * Obtain the value of a per-CPU counter.
+ */
+#define VM_METER_PCPU_CNT(member) \
+ vm_meter_cnt(__offsetof(struct vmmeter, member))
+
+u_int vm_meter_cnt(size_t);
+
#endif
/* systemwide totals computed every five seconds */
diff --git a/sys/vm/_vm_radix.h b/sys/vm/_vm_radix.h
index 1d06d0a..f066462 100644
--- a/sys/vm/_vm_radix.h
+++ b/sys/vm/_vm_radix.h
@@ -36,12 +36,8 @@
*/
struct vm_radix {
uintptr_t rt_root;
- uint8_t rt_flags;
};
-#define RT_INSERT_INPROG 0x01
-#define RT_TRIE_MODIFIED 0x02
-
#ifdef _KERNEL
static __inline boolean_t
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 30f6d97..0167117 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1126,7 +1126,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind,
if (shift != 0) {
for (i = 1; i <= shift; i++) {
p = vm_page_alloc(object, m[0]->pindex - i,
- VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
+ VM_ALLOC_NORMAL);
if (p == NULL) {
/* Shift allocated pages to the left. */
for (j = 0; j < i - 1; j++)
@@ -1144,8 +1144,7 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int *rbehind,
if (rahead != NULL) {
for (i = 0; i < *rahead; i++) {
p = vm_page_alloc(object,
- m[reqcount - 1]->pindex + i + 1,
- VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
+ m[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL);
if (p == NULL)
break;
bp->b_pages[shift + reqcount + i] = p;
@@ -1549,17 +1548,18 @@ swp_pager_async_iodone(struct buf *bp)
* For write success, clear the dirty
* status, then finish the I/O ( which decrements the
* busy count and possibly wakes waiter's up ).
+ * A page is only written to swap after a period of
+ * inactivity. Therefore, we do not expect it to be
+ * reused.
*/
KASSERT(!pmap_page_is_write_mapped(m),
("swp_pager_async_iodone: page %p is not write"
" protected", m));
vm_page_undirty(m);
+ vm_page_lock(m);
+ vm_page_deactivate_noreuse(m);
+ vm_page_unlock(m);
vm_page_sunbusy(m);
- if (vm_page_count_severe()) {
- vm_page_lock(m);
- vm_page_try_to_cache(m);
- vm_page_unlock(m);
- }
}
}
@@ -1635,12 +1635,15 @@ swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
/*
* SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in
*
- * This routine dissociates the page at the given index within a
- * swap block from its backing store, paging it in if necessary.
- * If the page is paged in, it is placed in the inactive queue,
- * since it had its backing store ripped out from under it.
- * We also attempt to swap in all other pages in the swap block,
- * we only guarantee that the one at the specified index is
+ * This routine dissociates the page at the given index within an object
+ * from its backing store, paging it in if it does not reside in memory.
+ * If the page is paged in, it is marked dirty and placed in the laundry
+ * queue. The page is marked dirty because it no longer has backing
+ * store. It is placed in the laundry queue because it has not been
+ * accessed recently. Otherwise, it would already reside in memory.
+ *
+ * We also attempt to swap in all other pages in the swap block.
+ * However, we only guarantee that the one at the specified index is
* paged in.
*
* XXX - The code to page the whole block in doesn't work, so we
@@ -1669,7 +1672,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
vm_object_pip_wakeup(object);
vm_page_dirty(m);
vm_page_lock(m);
- vm_page_deactivate(m);
+ vm_page_launder(m);
vm_page_unlock(m);
vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index ba0c775..2a90c15 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -485,11 +485,12 @@ int
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags, vm_page_t *m_hold)
{
- vm_prot_t prot;
- vm_object_t next_object;
struct faultstate fs;
struct vnode *vp;
+ vm_object_t next_object, retry_object;
vm_offset_t e_end, e_start;
+ vm_pindex_t retry_pindex;
+ vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
int locked, nera, result, rv;
u_char behavior;
@@ -755,8 +756,7 @@ RetryFault:;
unlock_and_deallocate(&fs);
VM_WAITPFAULT;
goto RetryFault;
- } else if (fs.m->valid == VM_PAGE_BITS_ALL)
- break;
+ }
}
readrest:
@@ -1143,10 +1143,6 @@ readrest:
* lookup.
*/
if (!fs.lookup_still_valid) {
- vm_object_t retry_object;
- vm_pindex_t retry_pindex;
- vm_prot_t retry_prot;
-
if (!vm_map_trylock_read(fs.map)) {
release_page(&fs);
unlock_and_deallocate(&fs);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index cd72cf8..2296fb1 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1858,9 +1858,7 @@ vm_map_submap(
* limited number of page mappings are created at the low-end of the
* specified address range. (For this purpose, a superpage mapping
* counts as one page mapping.) Otherwise, all resident pages within
- * the specified address range are mapped. Because these mappings are
- * being created speculatively, cached pages are not reactivated and
- * mapped.
+ * the specified address range are mapped.
*/
static void
vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 5aa6085..6e2199e 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -209,29 +209,37 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
/*
- * vcnt() - accumulate statistics from all cpus and the global cnt
- * structure.
+ * vm_meter_cnt() - accumulate statistics from all cpus and the global cnt
+ * structure.
*
* The vmmeter structure is now per-cpu as well as global. Those
* statistics which can be kept on a per-cpu basis (to avoid cache
* stalls between cpus) can be moved to the per-cpu vmmeter. Remaining
* statistics, such as v_free_reserved, are left in the global
* structure.
- *
- * (sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req)
*/
-static int
-vcnt(SYSCTL_HANDLER_ARGS)
+u_int
+vm_meter_cnt(size_t offset)
{
- int count = *(int *)arg1;
- int offset = (char *)arg1 - (char *)&vm_cnt;
+ struct pcpu *pcpu;
+ u_int count;
int i;
+ count = *(u_int *)((char *)&vm_cnt + offset);
CPU_FOREACH(i) {
- struct pcpu *pcpu = pcpu_find(i);
- count += *(int *)((char *)&pcpu->pc_cnt + offset);
+ pcpu = pcpu_find(i);
+ count += *(u_int *)((char *)&pcpu->pc_cnt + offset);
}
- return (SYSCTL_OUT(req, &count, sizeof(int)));
+ return (count);
+}
+
+static int
+cnt_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ u_int count;
+
+ count = vm_meter_cnt((char *)arg1 - (char *)&vm_cnt);
+ return (SYSCTL_OUT(req, &count, sizeof(count)));
}
SYSCTL_PROC(_vm, VM_TOTAL, vmtotal, CTLTYPE_OPAQUE|CTLFLAG_RD|CTLFLAG_MPSAFE,
@@ -246,8 +254,8 @@ SYSCTL_NODE(_vm_stats, OID_AUTO, misc, CTLFLAG_RW, 0, "VM meter misc stats");
#define VM_STATS(parent, var, descr) \
SYSCTL_PROC(parent, OID_AUTO, var, \
- CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, &vm_cnt.var, 0, vcnt, \
- "IU", descr)
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, &vm_cnt.var, 0, \
+ cnt_sysctl, "IU", descr)
#define VM_STATS_VM(var, descr) VM_STATS(_vm_stats_vm, var, descr)
#define VM_STATS_SYS(var, descr) VM_STATS(_vm_stats_sys, var, descr)
@@ -271,9 +279,10 @@ VM_STATS_VM(v_vnodeout, "Vnode pager pageouts");
VM_STATS_VM(v_vnodepgsin, "Vnode pages paged in");
VM_STATS_VM(v_vnodepgsout, "Vnode pages paged out");
VM_STATS_VM(v_intrans, "In transit page faults");
-VM_STATS_VM(v_reactivated, "Pages reactivated from free list");
+VM_STATS_VM(v_reactivated, "Pages reactivated by pagedaemon");
VM_STATS_VM(v_pdwakeups, "Pagedaemon wakeups");
VM_STATS_VM(v_pdpages, "Pages analyzed by pagedaemon");
+VM_STATS_VM(v_pdshortfalls, "Page reclamation shortfalls");
VM_STATS_VM(v_tcached, "Total pages cached");
VM_STATS_VM(v_dfree, "Pages freed by pagedaemon");
VM_STATS_VM(v_pfree, "Pages freed by exiting processes");
@@ -288,6 +297,7 @@ VM_STATS_VM(v_wire_count, "Wired pages");
VM_STATS_VM(v_active_count, "Active pages");
VM_STATS_VM(v_inactive_target, "Desired inactive pages");
VM_STATS_VM(v_inactive_count, "Inactive pages");
+VM_STATS_VM(v_laundry_count, "Pages eligible for laundering");
VM_STATS_VM(v_cache_count, "Pages on cache queue");
VM_STATS_VM(v_pageout_free_min, "Min pages reserved for kernel");
VM_STATS_VM(v_interrupt_free_min, "Reserved pages for interrupt code");
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index aae3771..68c2108 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -849,9 +849,6 @@ RestartScan:
pindex = OFF_TO_IDX(current->offset +
(addr - current->start));
m = vm_page_lookup(object, pindex);
- if (m == NULL &&
- vm_page_is_cached(object, pindex))
- mincoreinfo = MINCORE_INCORE;
if (m != NULL && m->valid == 0)
m = NULL;
if (m != NULL)
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 2e9d16f..6db1ac4 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -178,9 +178,6 @@ vm_object_zdtor(void *mem, int size, void *arg)
("object %p has reservations",
object));
#endif
- KASSERT(vm_object_cache_is_empty(object),
- ("object %p has cached pages",
- object));
KASSERT(object->paging_in_progress == 0,
("object %p paging_in_progress = %d",
object, object->paging_in_progress));
@@ -208,12 +205,9 @@ vm_object_zinit(void *mem, int size, int flags)
object->type = OBJT_DEAD;
object->ref_count = 0;
object->rtree.rt_root = 0;
- object->rtree.rt_flags = 0;
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
- object->cache.rt_root = 0;
- object->cache.rt_flags = 0;
mtx_lock(&vm_object_list_mtx);
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
@@ -792,8 +786,6 @@ vm_object_terminate(vm_object_t object)
if (__predict_false(!LIST_EMPTY(&object->rvq)))
vm_reserv_break_all(object);
#endif
- if (__predict_false(!vm_object_cache_is_empty(object)))
- vm_page_cache_free(object, 0, 0);
KASSERT(object->cred == NULL || object->type == OBJT_DEFAULT ||
object->type == OBJT_SWAP,
@@ -1135,13 +1127,6 @@ shadowlookup:
} else if ((tobject->flags & OBJ_UNMANAGED) != 0)
goto unlock_tobject;
m = vm_page_lookup(tobject, tpindex);
- if (m == NULL && advise == MADV_WILLNEED) {
- /*
- * If the page is cached, reactivate it.
- */
- m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED |
- VM_ALLOC_NOBUSY);
- }
if (m == NULL) {
/*
* There may be swap even if there is no backing page
@@ -1371,7 +1356,7 @@ retry:
goto retry;
}
- /* vm_page_rename() will handle dirty and cache. */
+ /* vm_page_rename() will dirty the page. */
if (vm_page_rename(m, new_object, idx)) {
VM_OBJECT_WUNLOCK(new_object);
VM_OBJECT_WUNLOCK(orig_object);
@@ -1406,19 +1391,6 @@ retry:
swap_pager_copy(orig_object, new_object, offidxstart, 0);
TAILQ_FOREACH(m, &new_object->memq, listq)
vm_page_xunbusy(m);
-
- /*
- * Transfer any cached pages from orig_object to new_object.
- * If swap_pager_copy() found swapped out pages within the
- * specified range of orig_object, then it changed
- * new_object's type to OBJT_SWAP when it transferred those
- * pages to new_object. Otherwise, new_object's type
- * should still be OBJT_DEFAULT and orig_object should not
- * contain any cached pages within the specified range.
- */
- if (__predict_false(!vm_object_cache_is_empty(orig_object)))
- vm_page_cache_transfer(orig_object, offidxstart,
- new_object);
}
VM_OBJECT_WUNLOCK(orig_object);
VM_OBJECT_WUNLOCK(new_object);
@@ -1471,6 +1443,13 @@ vm_object_scan_all_shadowed(vm_object_t object)
backing_object = object->backing_object;
+ /*
+ * Initial conditions:
+ *
+ * We do not want to have to test for the existence of swap
+ * pages in the backing object. XXX but with the new swapper this
+ * would be pretty easy to do.
+ */
if (backing_object->type != OBJT_DEFAULT &&
backing_object->type != OBJT_SWAP)
return (false);
@@ -1622,8 +1601,7 @@ vm_object_collapse_scan(vm_object_t object, int op)
* backing object to the main object.
*
* If the page was mapped to a process, it can remain mapped
- * through the rename. vm_page_rename() will handle dirty and
- * cache.
+ * through the rename. vm_page_rename() will dirty the page.
*/
if (vm_page_rename(p, object, new_pindex)) {
next = vm_object_collapse_scan_wait(object, NULL, next,
@@ -1758,13 +1736,6 @@ vm_object_collapse(vm_object_t object)
backing_object,
object,
OFF_TO_IDX(object->backing_object_offset), TRUE);
-
- /*
- * Free any cached pages from backing_object.
- */
- if (__predict_false(
- !vm_object_cache_is_empty(backing_object)))
- vm_page_cache_free(backing_object, 0, 0);
}
/*
* Object now shadows whatever backing_object did.
@@ -1893,7 +1864,7 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
(options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED,
("vm_object_page_remove: illegal options for object %p", object));
if (object->resident_page_count == 0)
- goto skipmemq;
+ return;
vm_object_pip_add(object, 1);
again:
p = vm_page_find_least(object, start);
@@ -1950,9 +1921,6 @@ next:
vm_page_unlock(p);
}
vm_object_pip_wakeup(object);
-skipmemq:
- if (__predict_false(!vm_object_cache_is_empty(object)))
- vm_page_cache_free(object, start, end);
}
/*
@@ -2333,9 +2301,9 @@ sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
* sysctl is only meant to give an
* approximation of the system anyway.
*/
- if (m->queue == PQ_ACTIVE)
+ if (vm_page_active(m))
kvo.kvo_active++;
- else if (m->queue == PQ_INACTIVE)
+ else if (vm_page_inactive(m))
kvo.kvo_inactive++;
}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 5b65d76..9b2192e 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -79,17 +79,6 @@
*
* vm_object_t Virtual memory object.
*
- * The root of cached pages pool is protected by both the per-object lock
- * and the free pages queue mutex.
- * On insert in the cache radix trie, the per-object lock is expected
- * to be already held and the free pages queue mutex will be
- * acquired during the operation too.
- * On remove and lookup from the cache radix trie, only the free
- * pages queue mutex is expected to be locked.
- * These rules allow for reliably checking for the presence of cached
- * pages with only the per-object lock held, thereby reducing contention
- * for the free pages queue mutex.
- *
* List of locks
* (c) const until freed
* (o) per-object lock
@@ -118,7 +107,6 @@ struct vm_object {
vm_ooffset_t backing_object_offset;/* Offset in backing object */
TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
LIST_HEAD(, vm_reserv) rvq; /* list of reservations */
- struct vm_radix cache; /* (o + f) root of the cache page radix trie */
void *handle;
union {
/*
@@ -306,13 +294,6 @@ void vm_object_pip_wakeup(vm_object_t object);
void vm_object_pip_wakeupn(vm_object_t object, short i);
void vm_object_pip_wait(vm_object_t object, char *waitid);
-static __inline boolean_t
-vm_object_cache_is_empty(vm_object_t object)
-{
-
- return (vm_radix_is_empty(&object->cache));
-}
-
void umtx_shm_object_init(vm_object_t object);
void umtx_shm_object_terminated(vm_object_t object);
extern int umtx_shm_vnobj_persistent;
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 7c77b22..6d8b364 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -155,8 +155,7 @@ static int vm_pageout_pages_needed;
static uma_zone_t fakepg_zone;
-static struct vnode *vm_page_alloc_init(vm_page_t m);
-static void vm_page_cache_turn_free(vm_page_t m);
+static void vm_page_alloc_check(vm_page_t m);
static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
static void vm_page_enqueue(uint8_t queue, vm_page_t m);
static void vm_page_free_wakeup(void);
@@ -391,6 +390,10 @@ vm_page_domain_init(struct vm_domain *vmd)
"vm active pagequeue";
*__DECONST(u_int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
&vm_cnt.v_active_count;
+ *__DECONST(char **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_name) =
+ "vm laundry pagequeue";
+ *__DECONST(int **, &vmd->vmd_pagequeues[PQ_LAUNDRY].pq_vcnt) =
+ &vm_cnt.v_laundry_count;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;
@@ -1136,9 +1139,7 @@ void
vm_page_dirty_KBI(vm_page_t m)
{
- /* These assertions refer to this operation by its public name. */
- KASSERT((m->flags & PG_CACHED) == 0,
- ("vm_page_dirty: page in cache!"));
+ /* Refer to this operation by its public name. */
KASSERT(m->valid == VM_PAGE_BITS_ALL,
("vm_page_dirty: page is invalid!"));
m->dirty = VM_PAGE_BITS_ALL;
@@ -1262,9 +1263,8 @@ vm_page_insert_radixdone(vm_page_t m, vm_object_t object, vm_page_t mpred)
/*
* vm_page_remove:
*
- * Removes the given mem entry from the object/offset-page
- * table and the object page list, but do not invalidate/terminate
- * the backing store.
+ * Removes the specified page from its containing object, but does not
+ * invalidate any backing storage.
*
* The object must be locked. The page must be locked if it is managed.
*/
@@ -1272,6 +1272,7 @@ void
vm_page_remove(vm_page_t m)
{
vm_object_t object;
+ vm_page_t mrem;
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_assert_locked(m);
@@ -1280,11 +1281,12 @@ vm_page_remove(vm_page_t m)
VM_OBJECT_ASSERT_WLOCKED(object);
if (vm_page_xbusied(m))
vm_page_xunbusy_maybelocked(m);
+ mrem = vm_radix_remove(&object->rtree, m->pindex);
+ KASSERT(mrem == m, ("removed page %p, expected page %p", mrem, m));
/*
* Now remove from the object's list of backed pages.
*/
- vm_radix_remove(&object->rtree, m->pindex);
TAILQ_REMOVE(&object->memq, m, listq);
/*
@@ -1433,9 +1435,7 @@ vm_page_replace(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex)
*
* Note: we *always* dirty the page. It is necessary both for the
* fact that we moved it, and because we may be invalidating
- * swap. If the page is on the cache, we have to deactivate it
- * or vm_page_dirty() will panic. Dirty pages are not allowed
- * on the cache.
+ * swap.
*
* The objects must be locked.
*/
@@ -1481,142 +1481,6 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
}
/*
- * Convert all of the given object's cached pages that have a
- * pindex within the given range into free pages. If the value
- * zero is given for "end", then the range's upper bound is
- * infinity. If the given object is backed by a vnode and it
- * transitions from having one or more cached pages to none, the
- * vnode's hold count is reduced.
- */
-void
-vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
-{
- vm_page_t m;
- boolean_t empty;
-
- mtx_lock(&vm_page_queue_free_mtx);
- if (__predict_false(vm_radix_is_empty(&object->cache))) {
- mtx_unlock(&vm_page_queue_free_mtx);
- return;
- }
- while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) {
- if (end != 0 && m->pindex >= end)
- break;
- vm_radix_remove(&object->cache, m->pindex);
- vm_page_cache_turn_free(m);
- }
- empty = vm_radix_is_empty(&object->cache);
- mtx_unlock(&vm_page_queue_free_mtx);
- if (object->type == OBJT_VNODE && empty)
- vdrop(object->handle);
-}
-
-/*
- * Returns the cached page that is associated with the given
- * object and offset. If, however, none exists, returns NULL.
- *
- * The free page queue must be locked.
- */
-static inline vm_page_t
-vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
-{
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- return (vm_radix_lookup(&object->cache, pindex));
-}
-
-/*
- * Remove the given cached page from its containing object's
- * collection of cached pages.
- *
- * The free page queue must be locked.
- */
-static void
-vm_page_cache_remove(vm_page_t m)
-{
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- KASSERT((m->flags & PG_CACHED) != 0,
- ("vm_page_cache_remove: page %p is not cached", m));
- vm_radix_remove(&m->object->cache, m->pindex);
- m->object = NULL;
- vm_cnt.v_cache_count--;
-}
-
-/*
- * Transfer all of the cached pages with offset greater than or
- * equal to 'offidxstart' from the original object's cache to the
- * new object's cache. However, any cached pages with offset
- * greater than or equal to the new object's size are kept in the
- * original object. Initially, the new object's cache must be
- * empty. Offset 'offidxstart' in the original object must
- * correspond to offset zero in the new object.
- *
- * The new object must be locked.
- */
-void
-vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
- vm_object_t new_object)
-{
- vm_page_t m;
-
- /*
- * Insertion into an object's collection of cached pages
- * requires the object to be locked. In contrast, removal does
- * not.
- */
- VM_OBJECT_ASSERT_WLOCKED(new_object);
- KASSERT(vm_radix_is_empty(&new_object->cache),
- ("vm_page_cache_transfer: object %p has cached pages",
- new_object));
- mtx_lock(&vm_page_queue_free_mtx);
- while ((m = vm_radix_lookup_ge(&orig_object->cache,
- offidxstart)) != NULL) {
- /*
- * Transfer all of the pages with offset greater than or
- * equal to 'offidxstart' from the original object's
- * cache to the new object's cache.
- */
- if ((m->pindex - offidxstart) >= new_object->size)
- break;
- vm_radix_remove(&orig_object->cache, m->pindex);
- /* Update the page's object and offset. */
- m->object = new_object;
- m->pindex -= offidxstart;
- if (vm_radix_insert(&new_object->cache, m))
- vm_page_cache_turn_free(m);
- }
- mtx_unlock(&vm_page_queue_free_mtx);
-}
-
-/*
- * Returns TRUE if a cached page is associated with the given object and
- * offset, and FALSE otherwise.
- *
- * The object must be locked.
- */
-boolean_t
-vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
-{
- vm_page_t m;
-
- /*
- * Insertion into an object's collection of cached pages requires the
- * object to be locked. Therefore, if the object is locked and the
- * object's collection is empty, there is no need to acquire the free
- * page queues lock in order to prove that the specified page doesn't
- * exist.
- */
- VM_OBJECT_ASSERT_WLOCKED(object);
- if (__predict_true(vm_object_cache_is_empty(object)))
- return (FALSE);
- mtx_lock(&vm_page_queue_free_mtx);
- m = vm_page_cache_lookup(object, pindex);
- mtx_unlock(&vm_page_queue_free_mtx);
- return (m != NULL);
-}
-
-/*
* vm_page_alloc:
*
* Allocate and return a page that is associated with the specified
@@ -1632,9 +1496,6 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
* optional allocation flags:
* VM_ALLOC_COUNT(number) the number of additional pages that the caller
* intends to allocate
- * VM_ALLOC_IFCACHED return page only if it is cached
- * VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page
- * is cached
* VM_ALLOC_NOBUSY do not exclusive busy the page
* VM_ALLOC_NODUMP do not include the page in a kernel core dump
* VM_ALLOC_NOOBJ page is not associated with an object and
@@ -1648,21 +1509,21 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
vm_page_t
vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
{
- struct vnode *vp = NULL;
- vm_object_t m_object;
vm_page_t m, mpred;
int flags, req_class;
- mpred = 0; /* XXX: pacify gcc */
+ mpred = NULL; /* XXX: pacify gcc */
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
(object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
- ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
- req));
+ ("vm_page_alloc: inconsistent object(%p)/req(%x)", object, req));
if (object != NULL)
VM_OBJECT_ASSERT_WLOCKED(object);
+ if (__predict_false((req & VM_ALLOC_IFCACHED) != 0))
+ return (NULL);
+
req_class = req & VM_ALLOC_CLASS_MASK;
/*
@@ -1678,45 +1539,27 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
}
/*
- * The page allocation request can came from consumers which already
- * hold the free page queue mutex, like vm_page_insert() in
- * vm_page_cache().
+ * Allocate a page if the number of free pages exceeds the minimum
+ * for the request class.
*/
- mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
+ mtx_lock(&vm_page_queue_free_mtx);
if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
(req_class == VM_ALLOC_SYSTEM &&
vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
(req_class == VM_ALLOC_INTERRUPT &&
vm_cnt.v_free_count + vm_cnt.v_cache_count > 0)) {
/*
- * Allocate from the free queue if the number of free pages
- * exceeds the minimum for the request class.
+ * Can we allocate the page from a reservation?
*/
- if (object != NULL &&
- (m = vm_page_cache_lookup(object, pindex)) != NULL) {
- if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
- mtx_unlock(&vm_page_queue_free_mtx);
- return (NULL);
- }
- if (vm_phys_unfree_page(m))
- vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, 0);
#if VM_NRESERVLEVEL > 0
- else if (!vm_reserv_reactivate_page(m))
-#else
- else
-#endif
- panic("vm_page_alloc: cache page %p is missing"
- " from the free queue", m);
- } else if ((req & VM_ALLOC_IFCACHED) != 0) {
- mtx_unlock(&vm_page_queue_free_mtx);
- return (NULL);
-#if VM_NRESERVLEVEL > 0
- } else if (object == NULL || (object->flags & (OBJ_COLORED |
+ if (object == NULL || (object->flags & (OBJ_COLORED |
OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
- vm_reserv_alloc_page(object, pindex, mpred)) == NULL) {
-#else
- } else {
+ vm_reserv_alloc_page(object, pindex, mpred)) == NULL)
#endif
+ {
+ /*
+ * If not, allocate it from the free page queues.
+ */
m = vm_phys_alloc_pages(object != NULL ?
VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
#if VM_NRESERVLEVEL > 0
@@ -1742,37 +1585,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
* At this point we had better have found a good page.
*/
KASSERT(m != NULL, ("vm_page_alloc: missing page"));
- KASSERT(m->queue == PQ_NONE,
- ("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
- KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
- KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
- KASSERT(!vm_page_busied(m), ("vm_page_alloc: page %p is busy", m));
- KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
- KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
- ("vm_page_alloc: page %p has unexpected memattr %d", m,
- pmap_page_get_memattr(m)));
- if ((m->flags & PG_CACHED) != 0) {
- KASSERT((m->flags & PG_ZERO) == 0,
- ("vm_page_alloc: cached page %p is PG_ZERO", m));
- KASSERT(m->valid != 0,
- ("vm_page_alloc: cached page %p is invalid", m));
- if (m->object == object && m->pindex == pindex)
- vm_cnt.v_reactivated++;
- else
- m->valid = 0;
- m_object = m->object;
- vm_page_cache_remove(m);
- if (m_object->type == OBJT_VNODE &&
- vm_object_cache_is_empty(m_object))
- vp = m_object->handle;
- } else {
- KASSERT(m->valid == 0,
- ("vm_page_alloc: free page %p is valid", m));
- vm_phys_freecnt_adj(m, -1);
- if ((m->flags & PG_ZERO) != 0)
- vm_page_zero_count--;
- }
+ vm_phys_freecnt_adj(m, -1);
+ if ((m->flags & PG_ZERO) != 0)
+ vm_page_zero_count--;
mtx_unlock(&vm_page_queue_free_mtx);
+ vm_page_alloc_check(m);
/*
* Initialize the page. Only the PG_ZERO flag is inherited.
@@ -1804,18 +1621,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
if (object != NULL) {
if (vm_page_insert_after(m, object, pindex, mpred)) {
- /* See the comment below about hold count. */
- if (vp != NULL)
- vdrop(vp);
pagedaemon_wakeup();
if (req & VM_ALLOC_WIRED) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
m->wire_count = 0;
}
- m->object = NULL;
+ KASSERT(m->object == NULL, ("page %p has object", m));
m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
- vm_page_free(m);
+ /* Don't change PG_ZERO. */
+ vm_page_free_toq(m);
return (NULL);
}
@@ -1827,15 +1642,6 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
m->pindex = pindex;
/*
- * The following call to vdrop() must come after the above call
- * to vm_page_insert() in case both affect the same object and
- * vnode. Otherwise, the affected vnode's hold count could
- * temporarily become zero.
- */
- if (vp != NULL)
- vdrop(vp);
-
- /*
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
@@ -1845,16 +1651,6 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
return (m);
}
-static void
-vm_page_alloc_contig_vdrop(struct spglist *lst)
-{
-
- while (!SLIST_EMPTY(lst)) {
- vdrop((struct vnode *)SLIST_FIRST(lst)-> plinks.s.pv);
- SLIST_REMOVE_HEAD(lst, plinks.s.ss);
- }
-}
-
/*
* vm_page_alloc_contig:
*
@@ -1876,6 +1672,8 @@ vm_page_alloc_contig_vdrop(struct spglist *lst)
* memory attribute setting for the physical pages cannot be configured
* to VM_MEMATTR_DEFAULT.
*
+ * The specified object may not contain fictitious pages.
+ *
* The caller must always specify an allocation class.
*
* allocation classes:
@@ -1899,22 +1697,21 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
vm_paddr_t boundary, vm_memattr_t memattr)
{
- struct vnode *drop;
- struct spglist deferred_vdrop_list;
- vm_page_t m, m_tmp, m_ret;
- u_int flags;
+ vm_page_t m, m_ret, mpred;
+ u_int busy_lock, flags, oflags;
int req_class;
+ mpred = NULL; /* XXX: pacify gcc */
KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
(object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
- ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
+ ("vm_page_alloc_contig: inconsistent object(%p)/req(%x)", object,
req));
if (object != NULL) {
VM_OBJECT_ASSERT_WLOCKED(object);
- KASSERT(object->type == OBJT_PHYS,
- ("vm_page_alloc_contig: object %p isn't OBJT_PHYS",
+ KASSERT((object->flags & OBJ_FICTITIOUS) == 0,
+ ("vm_page_alloc_contig: object %p has fictitious pages",
object));
}
KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
@@ -1926,19 +1723,34 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
if (curproc == pageproc && req_class != VM_ALLOC_INTERRUPT)
req_class = VM_ALLOC_SYSTEM;
- SLIST_INIT(&deferred_vdrop_list);
+ if (object != NULL) {
+ mpred = vm_radix_lookup_le(&object->rtree, pindex);
+ KASSERT(mpred == NULL || mpred->pindex != pindex,
+ ("vm_page_alloc_contig: pindex already allocated"));
+ }
+
+ /*
+ * Can we allocate the pages without the number of free pages falling
+ * below the lower bound for the allocation class?
+ */
mtx_lock(&vm_page_queue_free_mtx);
if (vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
vm_cnt.v_free_reserved || (req_class == VM_ALLOC_SYSTEM &&
vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages +
vm_cnt.v_interrupt_free_min) || (req_class == VM_ALLOC_INTERRUPT &&
vm_cnt.v_free_count + vm_cnt.v_cache_count >= npages)) {
+ /*
+ * Can we allocate the pages from a reservation?
+ */
#if VM_NRESERVLEVEL > 0
retry:
if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
(m_ret = vm_reserv_alloc_contig(object, pindex, npages,
- low, high, alignment, boundary)) == NULL)
+ low, high, alignment, boundary, mpred)) == NULL)
#endif
+ /*
+ * If not, allocate them from the free page queues.
+ */
m_ret = vm_phys_alloc_contig(npages, low, high,
alignment, boundary);
} else {
@@ -1948,17 +1760,7 @@ retry:
return (NULL);
}
if (m_ret != NULL)
- for (m = m_ret; m < &m_ret[npages]; m++) {
- drop = vm_page_alloc_init(m);
- if (drop != NULL) {
- /*
- * Enqueue the vnode for deferred vdrop().
- */
- m->plinks.s.pv = drop;
- SLIST_INSERT_HEAD(&deferred_vdrop_list, m,
- plinks.s.ss);
- }
- }
+ vm_phys_freecnt_adj(m_ret, -npages);
else {
#if VM_NRESERVLEVEL > 0
if (vm_reserv_reclaim_contig(npages, low, high, alignment,
@@ -1966,9 +1768,14 @@ retry:
goto retry;
#endif
}
+ for (m = m_ret; m < &m_ret[npages]; m++)
+ if ((m->flags & PG_ZERO) != 0)
+ vm_page_zero_count--;
mtx_unlock(&vm_page_queue_free_mtx);
if (m_ret == NULL)
return (NULL);
+ for (m = m_ret; m < &m_ret[npages]; m++)
+ vm_page_alloc_check(m);
/*
* Initialize the pages. Only the PG_ZERO flag is inherited.
@@ -1978,6 +1785,13 @@ retry:
flags = PG_ZERO;
if ((req & VM_ALLOC_NODUMP) != 0)
flags |= PG_NODUMP;
+ oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
+ VPO_UNMANAGED : 0;
+ busy_lock = VPB_UNBUSIED;
+ if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
+ busy_lock = VPB_SINGLE_EXCLUSIVER;
+ if ((req & VM_ALLOC_SBUSY) != 0)
+ busy_lock = VPB_SHARERS_WORD(1);
if ((req & VM_ALLOC_WIRED) != 0)
atomic_add_int(&vm_cnt.v_wire_count, npages);
if (object != NULL) {
@@ -1988,98 +1802,61 @@ retry:
for (m = m_ret; m < &m_ret[npages]; m++) {
m->aflags = 0;
m->flags = (m->flags | PG_NODUMP) & flags;
- m->busy_lock = VPB_UNBUSIED;
- if (object != NULL) {
- if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
- m->busy_lock = VPB_SINGLE_EXCLUSIVER;
- if ((req & VM_ALLOC_SBUSY) != 0)
- m->busy_lock = VPB_SHARERS_WORD(1);
- }
+ m->busy_lock = busy_lock;
if ((req & VM_ALLOC_WIRED) != 0)
m->wire_count = 1;
- /* Unmanaged pages don't use "act_count". */
- m->oflags = VPO_UNMANAGED;
+ m->act_count = 0;
+ m->oflags = oflags;
if (object != NULL) {
- if (vm_page_insert(m, object, pindex)) {
- vm_page_alloc_contig_vdrop(
- &deferred_vdrop_list);
- if (vm_paging_needed())
- pagedaemon_wakeup();
+ if (vm_page_insert_after(m, object, pindex, mpred)) {
+ pagedaemon_wakeup();
if ((req & VM_ALLOC_WIRED) != 0)
- atomic_subtract_int(&vm_cnt.v_wire_count,
- npages);
- for (m_tmp = m, m = m_ret;
- m < &m_ret[npages]; m++) {
- if ((req & VM_ALLOC_WIRED) != 0)
+ atomic_subtract_int(
+ &vm_cnt.v_wire_count, npages);
+ KASSERT(m->object == NULL,
+ ("page %p has object", m));
+ mpred = m;
+ for (m = m_ret; m < &m_ret[npages]; m++) {
+ if (m <= mpred &&
+ (req & VM_ALLOC_WIRED) != 0)
m->wire_count = 0;
- if (m >= m_tmp) {
- m->object = NULL;
- m->oflags |= VPO_UNMANAGED;
- }
+ m->oflags = VPO_UNMANAGED;
m->busy_lock = VPB_UNBUSIED;
- vm_page_free(m);
+ /* Don't change PG_ZERO. */
+ vm_page_free_toq(m);
}
return (NULL);
}
+ mpred = m;
} else
m->pindex = pindex;
if (memattr != VM_MEMATTR_DEFAULT)
pmap_page_set_memattr(m, memattr);
pindex++;
}
- vm_page_alloc_contig_vdrop(&deferred_vdrop_list);
if (vm_paging_needed())
pagedaemon_wakeup();
return (m_ret);
}
/*
- * Initialize a page that has been freshly dequeued from a freelist.
- * The caller has to drop the vnode returned, if it is not NULL.
- *
- * This function may only be used to initialize unmanaged pages.
- *
- * To be called with vm_page_queue_free_mtx held.
+ * Check a page that has been freshly dequeued from a freelist.
*/
-static struct vnode *
-vm_page_alloc_init(vm_page_t m)
+static void
+vm_page_alloc_check(vm_page_t m)
{
- struct vnode *drop;
- vm_object_t m_object;
+ KASSERT(m->object == NULL, ("page %p has object", m));
KASSERT(m->queue == PQ_NONE,
- ("vm_page_alloc_init: page %p has unexpected queue %d",
- m, m->queue));
- KASSERT(m->wire_count == 0,
- ("vm_page_alloc_init: page %p is wired", m));
- KASSERT(m->hold_count == 0,
- ("vm_page_alloc_init: page %p is held", m));
- KASSERT(!vm_page_busied(m),
- ("vm_page_alloc_init: page %p is busy", m));
- KASSERT(m->dirty == 0,
- ("vm_page_alloc_init: page %p is dirty", m));
+ ("page %p has unexpected queue %d", m, m->queue));
+ KASSERT(m->wire_count == 0, ("page %p is wired", m));
+ KASSERT(m->hold_count == 0, ("page %p is held", m));
+ KASSERT(!vm_page_busied(m), ("page %p is busy", m));
+ KASSERT(m->dirty == 0, ("page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
- ("vm_page_alloc_init: page %p has unexpected memattr %d",
+ ("page %p has unexpected memattr %d",
m, pmap_page_get_memattr(m)));
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- drop = NULL;
- if ((m->flags & PG_CACHED) != 0) {
- KASSERT((m->flags & PG_ZERO) == 0,
- ("vm_page_alloc_init: cached page %p is PG_ZERO", m));
- m->valid = 0;
- m_object = m->object;
- vm_page_cache_remove(m);
- if (m_object->type == OBJT_VNODE &&
- vm_object_cache_is_empty(m_object))
- drop = m_object->handle;
- } else {
- KASSERT(m->valid == 0,
- ("vm_page_alloc_init: free page %p is valid", m));
- vm_phys_freecnt_adj(m, -1);
- if ((m->flags & PG_ZERO) != 0)
- vm_page_zero_count--;
- }
- return (drop);
+ KASSERT(m->valid == 0, ("free page %p is valid", m));
}
/*
@@ -2105,7 +1882,6 @@ vm_page_alloc_init(vm_page_t m)
vm_page_t
vm_page_alloc_freelist(int flind, int req)
{
- struct vnode *drop;
vm_page_t m;
u_int flags;
int req_class;
@@ -2121,7 +1897,7 @@ vm_page_alloc_freelist(int flind, int req)
/*
* Do not allocate reserved pages unless the req has asked for it.
*/
- mtx_lock_flags(&vm_page_queue_free_mtx, MTX_RECURSE);
+ mtx_lock(&vm_page_queue_free_mtx);
if (vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_free_reserved ||
(req_class == VM_ALLOC_SYSTEM &&
vm_cnt.v_free_count + vm_cnt.v_cache_count > vm_cnt.v_interrupt_free_min) ||
@@ -2139,8 +1915,11 @@ vm_page_alloc_freelist(int flind, int req)
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
}
- drop = vm_page_alloc_init(m);
+ vm_phys_freecnt_adj(m, -1);
+ if ((m->flags & PG_ZERO) != 0)
+ vm_page_zero_count--;
mtx_unlock(&vm_page_queue_free_mtx);
+ vm_page_alloc_check(m);
/*
* Initialize the page. Only the PG_ZERO flag is inherited.
@@ -2160,8 +1939,6 @@ vm_page_alloc_freelist(int flind, int req)
}
/* Unmanaged pages don't use "act_count". */
m->oflags = VPO_UNMANAGED;
- if (drop != NULL)
- vdrop(drop);
if (vm_paging_needed())
pagedaemon_wakeup();
return (m);
@@ -2284,41 +2061,11 @@ retry:
}
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
("page %p is PG_UNHOLDFREE", m));
- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+ /* Don't care: PG_NODUMP, PG_ZERO. */
if (object->type != OBJT_DEFAULT &&
object->type != OBJT_SWAP &&
- object->type != OBJT_VNODE)
+ object->type != OBJT_VNODE) {
run_ext = 0;
- else if ((m->flags & PG_CACHED) != 0 ||
- m != vm_page_lookup(object, m->pindex)) {
- /*
- * The page is cached or recently converted
- * from cached to free.
- */
-#if VM_NRESERVLEVEL > 0
- if (level >= 0) {
- /*
- * The page is reserved. Extend the
- * current run by one page.
- */
- run_ext = 1;
- } else
-#endif
- if ((order = m->order) < VM_NFREEORDER) {
- /*
- * The page is enqueued in the
- * physical memory allocator's cache/
- * free page queues. Moreover, it is
- * the first page in a power-of-two-
- * sized run of contiguous cache/free
- * pages. Add these pages to the end
- * of the current run, and jump
- * ahead.
- */
- run_ext = 1 << order;
- m_inc = 1 << order;
- } else
- run_ext = 0;
#if VM_NRESERVLEVEL > 0
} else if ((options & VPSC_NOSUPER) != 0 &&
(level = vm_reserv_level_iffullpop(m)) >= 0) {
@@ -2351,18 +2098,18 @@ unlock:
} else if (level >= 0) {
/*
* The page is reserved but not yet allocated. In
- * other words, it is still cached or free. Extend
- * the current run by one page.
+ * other words, it is still free. Extend the current
+ * run by one page.
*/
run_ext = 1;
#endif
} else if ((order = m->order) < VM_NFREEORDER) {
/*
* The page is enqueued in the physical memory
- * allocator's cache/free page queues. Moreover, it
- * is the first page in a power-of-two-sized run of
- * contiguous cache/free pages. Add these pages to
- * the end of the current run, and jump ahead.
+ * allocator's free page queues. Moreover, it is the
+ * first page in a power-of-two-sized run of
+ * contiguous free pages. Add these pages to the end
+ * of the current run, and jump ahead.
*/
run_ext = 1 << order;
m_inc = 1 << order;
@@ -2370,16 +2117,15 @@ unlock:
/*
* Skip the page for one of the following reasons: (1)
* It is enqueued in the physical memory allocator's
- * cache/free page queues. However, it is not the
- * first page in a run of contiguous cache/free pages.
- * (This case rarely occurs because the scan is
- * performed in ascending order.) (2) It is not
- * reserved, and it is transitioning from free to
- * allocated. (Conversely, the transition from
- * allocated to free for managed pages is blocked by
- * the page lock.) (3) It is allocated but not
- * contained by an object and not wired, e.g.,
- * allocated by Xen's balloon driver.
+ * free page queues. However, it is not the first
+ * page in a run of contiguous free pages. (This case
+ * rarely occurs because the scan is performed in
+ * ascending order.) (2) It is not reserved, and it is
+ * transitioning from free to allocated. (Conversely,
+ * the transition from allocated to free for managed
+ * pages is blocked by the page lock.) (3) It is
+ * allocated but not contained by an object and not
+ * wired, e.g., allocated by Xen's balloon driver.
*/
run_ext = 0;
}
@@ -2480,20 +2226,12 @@ retry:
}
KASSERT((m->flags & PG_UNHOLDFREE) == 0,
("page %p is PG_UNHOLDFREE", m));
- /* Don't care: PG_NODUMP, PG_WINATCFLS, PG_ZERO. */
+ /* Don't care: PG_NODUMP, PG_ZERO. */
if (object->type != OBJT_DEFAULT &&
object->type != OBJT_SWAP &&
object->type != OBJT_VNODE)
error = EINVAL;
- else if ((m->flags & PG_CACHED) != 0 ||
- m != vm_page_lookup(object, m->pindex)) {
- /*
- * The page is cached or recently converted
- * from cached to free.
- */
- VM_OBJECT_WUNLOCK(object);
- goto cached;
- } else if (object->memattr != VM_MEMATTR_DEFAULT)
+ else if (object->memattr != VM_MEMATTR_DEFAULT)
error = EINVAL;
else if (m->queue != PQ_NONE && !vm_page_busied(m)) {
KASSERT(pmap_page_get_memattr(m) ==
@@ -2594,17 +2332,16 @@ retry:
unlock:
VM_OBJECT_WUNLOCK(object);
} else {
-cached:
mtx_lock(&vm_page_queue_free_mtx);
order = m->order;
if (order < VM_NFREEORDER) {
/*
* The page is enqueued in the physical memory
- * allocator's cache/free page queues.
- * Moreover, it is the first page in a power-
- * of-two-sized run of contiguous cache/free
- * pages. Jump ahead to the last page within
- * that run, and continue from there.
+ * allocator's free page queues. Moreover, it
+ * is the first page in a power-of-two-sized
+ * run of contiguous free pages. Jump ahead
+ * to the last page within that run, and
+ * continue from there.
*/
m += (1 << order) - 1;
}
@@ -2653,9 +2390,9 @@ CTASSERT(powerof2(NRUNS));
* conditions by relocating the virtual pages using that physical memory.
* Returns true if reclamation is successful and false otherwise. Since
* relocation requires the allocation of physical pages, reclamation may
- * fail due to a shortage of cache/free pages. When reclamation fails,
- * callers are expected to perform VM_WAIT before retrying a failed
- * allocation operation, e.g., vm_page_alloc_contig().
+ * fail due to a shortage of free pages. When reclamation fails, callers
+ * are expected to perform VM_WAIT before retrying a failed allocation
+ * operation, e.g., vm_page_alloc_contig().
*
* The caller must always specify an allocation class through "req".
*
@@ -2690,8 +2427,8 @@ vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high,
req_class = VM_ALLOC_SYSTEM;
/*
- * Return if the number of cached and free pages cannot satisfy the
- * requested allocation.
+ * Return if the number of free pages cannot satisfy the requested
+ * allocation.
*/
count = vm_cnt.v_free_count + vm_cnt.v_cache_count;
if (count < npages + vm_cnt.v_free_reserved || (count < npages +
@@ -2809,7 +2546,10 @@ struct vm_pagequeue *
vm_page_pagequeue(vm_page_t m)
{
- return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
+ if (vm_page_in_laundry(m))
+ return (&vm_dom[0].vmd_pagequeues[m->queue]);
+ else
+ return (&vm_phys_domain(m)->vmd_pagequeues[m->queue]);
}
/*
@@ -2871,7 +2611,10 @@ vm_page_enqueue(uint8_t queue, vm_page_t m)
KASSERT(queue < PQ_COUNT,
("vm_page_enqueue: invalid queue %u request for page %p",
queue, m));
- pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
+ if (queue == PQ_LAUNDRY)
+ pq = &vm_dom[0].vmd_pagequeues[queue];
+ else
+ pq = &vm_phys_domain(m)->vmd_pagequeues[queue];
vm_pagequeue_lock(pq);
m->queue = queue;
TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
@@ -2955,9 +2698,8 @@ vm_page_activate(vm_page_t m)
/*
* vm_page_free_wakeup:
*
- * Helper routine for vm_page_free_toq() and vm_page_cache(). This
- * routine is called when a page has been added to the cache or free
- * queues.
+ * Helper routine for vm_page_free_toq(). This routine is called
+ * when a page is added to the free queues.
*
* The page queues must be locked.
*/
@@ -2987,27 +2729,6 @@ vm_page_free_wakeup(void)
}
/*
- * Turn a cached page into a free page, by changing its attributes.
- * Keep the statistics up-to-date.
- *
- * The free page queue must be locked.
- */
-static void
-vm_page_cache_turn_free(vm_page_t m)
-{
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-
- m->object = NULL;
- m->valid = 0;
- KASSERT((m->flags & PG_CACHED) != 0,
- ("vm_page_cache_turn_free: page %p is not cached", m));
- m->flags &= ~PG_CACHED;
- vm_cnt.v_cache_count--;
- vm_phys_freecnt_adj(m, 1);
-}
-
-/*
* vm_page_free_toq:
*
* Returns the given page to the free list,
@@ -3066,8 +2787,8 @@ vm_page_free_toq(vm_page_t m)
pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
/*
- * Insert the page into the physical memory allocator's
- * cache/free page queues.
+ * Insert the page into the physical memory allocator's free
+ * page queues.
*/
mtx_lock(&vm_page_queue_free_mtx);
vm_phys_freecnt_adj(m, 1);
@@ -3159,11 +2880,8 @@ vm_page_unwire(vm_page_t m, uint8_t queue)
if (m->wire_count == 0) {
atomic_subtract_int(&vm_cnt.v_wire_count, 1);
if ((m->oflags & VPO_UNMANAGED) == 0 &&
- m->object != NULL && queue != PQ_NONE) {
- if (queue == PQ_INACTIVE)
- m->flags &= ~PG_WINATCFLS;
+ m->object != NULL && queue != PQ_NONE)
vm_page_enqueue(queue, m);
- }
return (TRUE);
} else
return (FALSE);
@@ -3174,21 +2892,10 @@ vm_page_unwire(vm_page_t m, uint8_t queue)
/*
* Move the specified page to the inactive queue.
*
- * Many pages placed on the inactive queue should actually go
- * into the cache, but it is difficult to figure out which. What
- * we do instead, if the inactive target is well met, is to put
- * clean pages at the head of the inactive queue instead of the tail.
- * This will cause them to be moved to the cache more quickly and
- * if not actively re-referenced, reclaimed more quickly. If we just
- * stick these pages at the end of the inactive queue, heavy filesystem
- * meta-data accesses can cause an unnecessary paging load on memory bound
- * processes. This optimization causes one-time-use metadata to be
- * reused more quickly.
- *
- * Normally noreuse is FALSE, resulting in LRU operation. noreuse is set
- * to TRUE if we want this page to be 'as if it were placed in the cache',
- * except without unmapping it from the process address space. In
- * practice this is implemented by inserting the page at the head of the
+ * Normally, "noreuse" is FALSE, resulting in LRU ordering of the inactive
+ * queue. However, setting "noreuse" to TRUE will accelerate the specified
+ * page's reclamation, but it will not unmap the page from any address space.
+ * This is implemented by inserting the page near the head of the inactive
* queue, using a marker page to guide FIFO insertion ordering.
*
* The page must be locked.
@@ -3216,7 +2923,6 @@ _vm_page_deactivate(vm_page_t m, boolean_t noreuse)
} else {
if (queue != PQ_NONE)
vm_page_dequeue(m);
- m->flags &= ~PG_WINATCFLS;
vm_pagequeue_lock(pq);
}
m->queue = PQ_INACTIVE;
@@ -3256,24 +2962,25 @@ vm_page_deactivate_noreuse(vm_page_t m)
}
/*
- * vm_page_try_to_cache:
+ * vm_page_launder
*
- * Returns 0 on failure, 1 on success
+ * Put a page in the laundry.
*/
-int
-vm_page_try_to_cache(vm_page_t m)
+void
+vm_page_launder(vm_page_t m)
{
+ int queue;
- vm_page_lock_assert(m, MA_OWNED);
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (m->dirty || m->hold_count || m->wire_count ||
- (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
- return (0);
- pmap_remove_all(m);
- if (m->dirty)
- return (0);
- vm_page_cache(m);
- return (1);
+ vm_page_assert_locked(m);
+ if ((queue = m->queue) != PQ_LAUNDRY) {
+ if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
+ if (queue != PQ_NONE)
+ vm_page_dequeue(m);
+ vm_page_enqueue(PQ_LAUNDRY, m);
+ } else
+ KASSERT(queue == PQ_NONE,
+ ("wired page %p is queued", m));
+ }
}
/*
@@ -3300,112 +3007,6 @@ vm_page_try_to_free(vm_page_t m)
}
/*
- * vm_page_cache
- *
- * Put the specified page onto the page cache queue (if appropriate).
- *
- * The object and page must be locked.
- */
-void
-vm_page_cache(vm_page_t m)
-{
- vm_object_t object;
- boolean_t cache_was_empty;
-
- vm_page_lock_assert(m, MA_OWNED);
- object = m->object;
- VM_OBJECT_ASSERT_WLOCKED(object);
- if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
- m->hold_count || m->wire_count)
- panic("vm_page_cache: attempting to cache busy page");
- KASSERT(!pmap_page_is_mapped(m),
- ("vm_page_cache: page %p is mapped", m));
- KASSERT(m->dirty == 0, ("vm_page_cache: page %p is dirty", m));
- if (m->valid == 0 || object->type == OBJT_DEFAULT ||
- (object->type == OBJT_SWAP &&
- !vm_pager_has_page(object, m->pindex, NULL, NULL))) {
- /*
- * Hypothesis: A cache-eligible page belonging to a
- * default object or swap object but without a backing
- * store must be zero filled.
- */
- vm_page_free(m);
- return;
- }
- KASSERT((m->flags & PG_CACHED) == 0,
- ("vm_page_cache: page %p is already cached", m));
-
- /*
- * Remove the page from the paging queues.
- */
- vm_page_remque(m);
-
- /*
- * Remove the page from the object's collection of resident
- * pages.
- */
- vm_radix_remove(&object->rtree, m->pindex);
- TAILQ_REMOVE(&object->memq, m, listq);
- object->resident_page_count--;
-
- /*
- * Restore the default memory attribute to the page.
- */
- if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
- pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
-
- /*
- * Insert the page into the object's collection of cached pages
- * and the physical memory allocator's cache/free page queues.
- */
- m->flags &= ~PG_ZERO;
- mtx_lock(&vm_page_queue_free_mtx);
- cache_was_empty = vm_radix_is_empty(&object->cache);
- if (vm_radix_insert(&object->cache, m)) {
- mtx_unlock(&vm_page_queue_free_mtx);
- if (object->type == OBJT_VNODE &&
- object->resident_page_count == 0)
- vdrop(object->handle);
- m->object = NULL;
- vm_page_free(m);
- return;
- }
-
- /*
- * The above call to vm_radix_insert() could reclaim the one pre-
- * existing cached page from this object, resulting in a call to
- * vdrop().
- */
- if (!cache_was_empty)
- cache_was_empty = vm_radix_is_singleton(&object->cache);
-
- m->flags |= PG_CACHED;
- vm_cnt.v_cache_count++;
- PCPU_INC(cnt.v_tcached);
-#if VM_NRESERVLEVEL > 0
- if (!vm_reserv_free_page(m)) {
-#else
- if (TRUE) {
-#endif
- vm_phys_free_pages(m, 0);
- }
- vm_page_free_wakeup();
- mtx_unlock(&vm_page_queue_free_mtx);
-
- /*
- * Increment the vnode's hold count if this is the object's only
- * cached page. Decrement the vnode's hold count if this was
- * the object's only resident page.
- */
- if (object->type == OBJT_VNODE) {
- if (cache_was_empty && object->resident_page_count != 0)
- vhold(object->handle);
- else if (!cache_was_empty && object->resident_page_count == 0)
- vdrop(object->handle);
- }
-}
-
-/*
* vm_page_advise
*
* Deactivate or do nothing, as appropriate.
@@ -3421,16 +3022,9 @@ vm_page_advise(vm_page_t m, int advice)
if (advice == MADV_FREE)
/*
* Mark the page clean. This will allow the page to be freed
- * up by the system. However, such pages are often reused
- * quickly by malloc() so we do not do anything that would
- * cause a page fault if we can help it.
- *
- * Specifically, we do not try to actually free the page now
- * nor do we try to put it in the cache (which would cause a
- * page fault on reuse).
- *
- * But we do make the page as freeable as we can without
- * actually taking the step of unmapping it.
+ * without first paging it out. MADV_FREE pages are often
+ * quickly reused by malloc(3), so we do not do anything that
+ * would result in a page fault on a later access.
*/
vm_page_undirty(m);
else if (advice != MADV_DONTNEED)
@@ -3448,11 +3042,13 @@ vm_page_advise(vm_page_t m, int advice)
/*
* Place clean pages near the head of the inactive queue rather than
* the tail, thus defeating the queue's LRU operation and ensuring that
- * the page will be reused quickly. Dirty pages are given a chance to
- * cycle once through the inactive queue before becoming eligible for
- * laundering.
+ * the page will be reused quickly. Dirty pages not already in the
+ * laundry are moved there.
*/
- _vm_page_deactivate(m, m->dirty == 0);
+ if (m->dirty == 0)
+ vm_page_deactivate_noreuse(m);
+ else
+ vm_page_launder(m);
}
/*
@@ -3517,8 +3113,7 @@ retrylookup:
VM_WAIT;
VM_OBJECT_WLOCK(object);
goto retrylookup;
- } else if (m->valid != 0)
- return (m);
+ }
if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
return (m);
@@ -3961,6 +3556,7 @@ DB_SHOW_COMMAND(page, vm_page_print_page_info)
db_printf("vm_cnt.v_cache_count: %d\n", vm_cnt.v_cache_count);
db_printf("vm_cnt.v_inactive_count: %d\n", vm_cnt.v_inactive_count);
db_printf("vm_cnt.v_active_count: %d\n", vm_cnt.v_active_count);
+ db_printf("vm_cnt.v_laundry_count: %d\n", vm_cnt.v_laundry_count);
db_printf("vm_cnt.v_wire_count: %d\n", vm_cnt.v_wire_count);
db_printf("vm_cnt.v_free_reserved: %d\n", vm_cnt.v_free_reserved);
db_printf("vm_cnt.v_free_min: %d\n", vm_cnt.v_free_min);
@@ -3975,12 +3571,14 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
db_printf("pq_free %d pq_cache %d\n",
vm_cnt.v_free_count, vm_cnt.v_cache_count);
for (dom = 0; dom < vm_ndomains; dom++) {
- db_printf("dom %d page_cnt %d free %d pq_act %d pq_inact %d\n",
+ db_printf(
+ "dom %d page_cnt %d free %d pq_act %d pq_inact %d pq_laund %d\n",
dom,
vm_dom[dom].vmd_page_count,
vm_dom[dom].vmd_free_count,
vm_dom[dom].vmd_pagequeues[PQ_ACTIVE].pq_cnt,
- vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt);
+ vm_dom[dom].vmd_pagequeues[PQ_INACTIVE].pq_cnt,
+ vm_dom[dom].vmd_pagequeues[PQ_LAUNDRY].pq_cnt);
}
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 85c6ac5..1ee8dde 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -206,7 +206,8 @@ struct vm_page {
#define PQ_NONE 255
#define PQ_INACTIVE 0
#define PQ_ACTIVE 1
-#define PQ_COUNT 2
+#define PQ_LAUNDRY 2
+#define PQ_COUNT 3
TAILQ_HEAD(pglist, vm_page);
SLIST_HEAD(spglist, vm_page);
@@ -228,6 +229,7 @@ struct vm_domain {
boolean_t vmd_oom;
int vmd_oom_seq;
int vmd_last_active_scan;
+ struct vm_page vmd_laundry_marker;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
struct vm_page vmd_inacthead; /* marker for LRU-defeating insertions */
};
@@ -236,6 +238,7 @@ extern struct vm_domain vm_dom[MAXMEMDOM];
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
+#define vm_pagequeue_lockptr(pq) (&(pq)->pq_mutex)
#define vm_pagequeue_unlock(pq) mtx_unlock(&(pq)->pq_mutex)
#ifdef _KERNEL
@@ -323,11 +326,9 @@ extern struct mtx_padalign pa_lock[];
* Page flags. If changed at any other time than page allocation or
* freeing, the modification must be protected by the vm_page lock.
*/
-#define PG_CACHED 0x0001 /* page is cached */
#define PG_FICTITIOUS 0x0004 /* physical page doesn't exist */
#define PG_ZERO 0x0008 /* page is zeroed */
#define PG_MARKER 0x0010 /* special queue marker page */
-#define PG_WINATCFLS 0x0040 /* flush dirty page on inactive q */
#define PG_NODUMP 0x0080 /* don't include this page in a dump */
#define PG_UNHOLDFREE 0x0100 /* delayed free of a held page */
@@ -351,19 +352,16 @@ extern struct mtx_padalign pa_lock[];
* free
* Available for allocation now.
*
- * cache
- * Almost available for allocation. Still associated with
- * an object, but clean and immediately freeable.
- *
- * The following lists are LRU sorted:
- *
* inactive
* Low activity, candidates for reclamation.
+ * This list is approximately LRU ordered.
+ *
+ * laundry
* This is the list of pages that should be
* paged out next.
*
* active
- * Pages that are "active" i.e. they have been
+ * Pages that are "active", i.e., they have been
* recently referenced.
*
*/
@@ -407,8 +405,8 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
#define VM_ALLOC_ZERO 0x0040 /* (acfg) Try to obtain a zeroed page */
#define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */
#define VM_ALLOC_NOBUSY 0x0200 /* (acg) Do not busy the page */
-#define VM_ALLOC_IFCACHED 0x0400 /* (ag) Fail if page is not cached */
-#define VM_ALLOC_IFNOTCACHED 0x0800 /* (ag) Fail if page is cached */
+#define VM_ALLOC_IFCACHED 0x0400
+#define VM_ALLOC_IFNOTCACHED 0x0800
#define VM_ALLOC_IGN_SBUSY 0x1000 /* (g) Ignore shared busy flag */
#define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */
#define VM_ALLOC_SBUSY 0x4000 /* (acg) Shared busy the page */
@@ -451,10 +449,6 @@ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
vm_paddr_t boundary, vm_memattr_t memattr);
vm_page_t vm_page_alloc_freelist(int, int);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
-void vm_page_cache(vm_page_t);
-void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t);
-void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t);
-int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_deactivate (vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
@@ -464,7 +458,7 @@ vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
-boolean_t vm_page_is_cached(vm_object_t object, vm_pindex_t pindex);
+void vm_page_launder(vm_page_t m);
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
vm_page_t vm_page_next(vm_page_t m);
int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
@@ -698,5 +692,26 @@ vm_page_replace_checked(vm_page_t mnew, vm_object_t object, vm_pindex_t pindex,
(void)mret;
}
+static inline bool
+vm_page_active(vm_page_t m)
+{
+
+ return (m->queue == PQ_ACTIVE);
+}
+
+static inline bool
+vm_page_inactive(vm_page_t m)
+{
+
+ return (m->queue == PQ_INACTIVE);
+}
+
+static inline bool
+vm_page_in_laundry(vm_page_t m)
+{
+
+ return (m->queue == PQ_LAUNDRY);
+}
+
#endif /* _KERNEL */
#endif /* !_VM_PAGE_ */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index cd8fe45..cd7bfb6 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -119,7 +119,7 @@ __FBSDID("$FreeBSD$");
/* the kernel process "vm_pageout"*/
static void vm_pageout(void);
static void vm_pageout_init(void);
-static int vm_pageout_clean(vm_page_t m);
+static int vm_pageout_clean(vm_page_t m, int *numpagedout);
static int vm_pageout_cluster(vm_page_t m);
static bool vm_pageout_scan(struct vm_domain *vmd, int pass);
static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
@@ -154,6 +154,9 @@ static struct kproc_desc vm_kp = {
SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
#endif
+/* Pagedaemon activity rates, in subdivisions of one second. */
+#define VM_LAUNDER_RATE 10
+#define VM_INACT_SCAN_RATE 2
int vm_pageout_deficit; /* Estimated number of pages deficit */
int vm_pageout_wakeup_thresh;
@@ -161,6 +164,13 @@ static int vm_pageout_oom_seq = 12;
bool vm_pageout_wanted; /* Event on which pageout daemon sleeps */
bool vm_pages_needed; /* Are threads waiting for free pages? */
+/* Pending request for dirty page laundering. */
+static enum {
+ VM_LAUNDRY_IDLE,
+ VM_LAUNDRY_BACKGROUND,
+ VM_LAUNDRY_SHORTFALL
+} vm_laundry_request = VM_LAUNDRY_IDLE;
+
#if !defined(NO_SWAPPING)
static int vm_pageout_req_swapout; /* XXX */
static int vm_daemon_needed;
@@ -168,9 +178,7 @@ static struct mtx vm_daemon_mtx;
/* Allow for use by vm_pageout before vm_daemon is initialized. */
MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
#endif
-static int vm_max_launder = 32;
static int vm_pageout_update_period;
-static int defer_swap_pageouts;
static int disable_swap_pageouts;
static int lowmem_period = 10;
static time_t lowmem_uptime;
@@ -193,9 +201,6 @@ SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
"free page threshold for waking up the pageout daemon");
-SYSCTL_INT(_vm, OID_AUTO, max_launder,
- CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
-
SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
CTLFLAG_RW, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
@@ -215,9 +220,6 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
#endif
-SYSCTL_INT(_vm, OID_AUTO, defer_swapspace_pageouts,
- CTLFLAG_RW, &defer_swap_pageouts, 0, "Give preference to dirty pages in mem");
-
SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
@@ -229,6 +231,25 @@ SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
CTLFLAG_RW, &vm_pageout_oom_seq, 0,
"back-to-back calls to oom detector to start OOM");
+static int act_scan_laundry_weight = 3;
+SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW,
+ &act_scan_laundry_weight, 0,
+ "weight given to clean vs. dirty pages in active queue scans");
+
+static u_int vm_background_launder_target;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW,
+ &vm_background_launder_target, 0,
+ "background laundering target, in pages");
+
+static u_int vm_background_launder_rate = 4096;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW,
+ &vm_background_launder_rate, 0,
+ "background laundering rate, in kilobytes per second");
+
+static u_int vm_background_launder_max = 20 * 1024;
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW,
+ &vm_background_launder_max, 0, "background laundering cap, in kilobytes");
+
#define VM_PAGEOUT_PAGE_COUNT 16
int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
@@ -236,7 +257,11 @@ int vm_page_max_wired; /* XXX max # of wired pages system-wide */
SYSCTL_INT(_vm, OID_AUTO, max_wired,
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
+static u_int isqrt(u_int num);
static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
+static int vm_pageout_launder(struct vm_domain *vmd, int launder,
+ bool in_shortfall);
+static void vm_pageout_laundry_worker(void *arg);
#if !defined(NO_SWAPPING)
static void vm_pageout_map_deactivate_pages(vm_map_t, long);
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
@@ -387,7 +412,7 @@ vm_pageout_cluster(vm_page_t m)
/*
* We can cluster only if the page is not clean, busy, or held, and
- * the page is inactive.
+ * the page is in the laundry queue.
*
* During heavy mmap/modification loads the pageout
* daemon can really fragment the underlying file
@@ -413,7 +438,7 @@ more:
break;
}
vm_page_lock(p);
- if (p->queue != PQ_INACTIVE ||
+ if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
ib = 0;
@@ -439,7 +464,7 @@ more:
if (p->dirty == 0)
break;
vm_page_lock(p);
- if (p->queue != PQ_INACTIVE ||
+ if (!vm_page_in_laundry(p) ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
break;
@@ -519,23 +544,33 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
("vm_pageout_flush: page %p is not write protected", mt));
switch (pageout_status[i]) {
case VM_PAGER_OK:
+ vm_page_lock(mt);
+ if (vm_page_in_laundry(mt))
+ vm_page_deactivate_noreuse(mt);
+ vm_page_unlock(mt);
+ /* FALLTHROUGH */
case VM_PAGER_PEND:
numpagedout++;
break;
case VM_PAGER_BAD:
/*
- * Page outside of range of object. Right now we
- * essentially lose the changes by pretending it
- * worked.
+ * The page is outside the object's range. We pretend
+ * that the page out worked and clean the page, so the
+ * changes will be lost if the page is reclaimed by
+ * the page daemon.
*/
vm_page_undirty(mt);
+ vm_page_lock(mt);
+ if (vm_page_in_laundry(mt))
+ vm_page_deactivate_noreuse(mt);
+ vm_page_unlock(mt);
break;
case VM_PAGER_ERROR:
case VM_PAGER_FAIL:
/*
- * If page couldn't be paged out, then reactivate the
- * page so it doesn't clog the inactive list. (We
- * will try paging out it again later).
+ * If the page couldn't be paged out, then reactivate
+ * it so that it doesn't clog the laundry and inactive
+ * queues. (We will try paging it out again later).
*/
vm_page_lock(mt);
vm_page_activate(mt);
@@ -617,10 +652,10 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
act_delta = 1;
vm_page_aflag_clear(p, PGA_REFERENCED);
}
- if (p->queue != PQ_ACTIVE && act_delta != 0) {
+ if (!vm_page_active(p) && act_delta != 0) {
vm_page_activate(p);
p->act_count += act_delta;
- } else if (p->queue == PQ_ACTIVE) {
+ } else if (vm_page_active(p)) {
if (act_delta == 0) {
p->act_count -= min(p->act_count,
ACT_DECLINE);
@@ -636,7 +671,7 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
p->act_count += ACT_ADVANCE;
vm_page_requeue(p);
}
- } else if (p->queue == PQ_INACTIVE)
+ } else if (vm_page_inactive(p))
pmap_remove_all(p);
vm_page_unlock(p);
}
@@ -739,7 +774,7 @@ vm_pageout_map_deactivate_pages(map, desired)
* Returns 0 on success and an errno otherwise.
*/
static int
-vm_pageout_clean(vm_page_t m)
+vm_pageout_clean(vm_page_t m, int *numpagedout)
{
struct vnode *vp;
struct mount *mp;
@@ -797,7 +832,7 @@ vm_pageout_clean(vm_page_t m)
* (3) reallocated to a different offset, or
* (4) cleaned.
*/
- if (m->queue != PQ_INACTIVE || m->object != object ||
+ if (!vm_page_in_laundry(m) || m->object != object ||
m->pindex != pindex || m->dirty == 0) {
vm_page_unlock(m);
error = ENXIO;
@@ -821,7 +856,7 @@ vm_pageout_clean(vm_page_t m)
* laundry. If it is still in the laundry, then we
* start the cleaning operation.
*/
- if (vm_pageout_cluster(m) == 0)
+ if ((*numpagedout = vm_pageout_cluster(m)) == 0)
error = EIO;
unlock_all:
@@ -840,11 +875,390 @@ unlock_mp:
}
/*
+ * Attempt to launder the specified number of pages.
+ *
+ * Returns the number of pages successfully laundered.
+ */
+static int
+vm_pageout_launder(struct vm_domain *vmd, int launder, bool in_shortfall)
+{
+ struct vm_pagequeue *pq;
+ vm_object_t object;
+ vm_page_t m, next;
+ int act_delta, error, maxscan, numpagedout, starting_target;
+ int vnodes_skipped;
+ bool pageout_ok, queue_locked;
+
+ starting_target = launder;
+ vnodes_skipped = 0;
+
+ /*
+ * Scan the laundry queue for pages eligible to be laundered. We stop
+ * once the target number of dirty pages have been laundered, or once
+ * we've reached the end of the queue. A single iteration of this loop
+ * may cause more than one page to be laundered because of clustering.
+ *
+ * maxscan ensures that we don't re-examine requeued pages. Any
+ * additional pages written as part of a cluster are subtracted from
+ * maxscan since they must be taken from the laundry queue.
+ */
+ pq = &vmd->vmd_pagequeues[PQ_LAUNDRY];
+ maxscan = pq->pq_cnt;
+
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ for (m = TAILQ_FIRST(&pq->pq_pl);
+ m != NULL && maxscan-- > 0 && launder > 0;
+ m = next) {
+ vm_pagequeue_assert_locked(pq);
+ KASSERT(queue_locked, ("unlocked laundry queue"));
+ KASSERT(vm_page_in_laundry(m),
+ ("page %p has an inconsistent queue", m));
+ next = TAILQ_NEXT(m, plinks.q);
+ if ((m->flags & PG_MARKER) != 0)
+ continue;
+ KASSERT((m->flags & PG_FICTITIOUS) == 0,
+ ("PG_FICTITIOUS page %p cannot be in laundry queue", m));
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("VPO_UNMANAGED page %p cannot be in laundry queue", m));
+ if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
+ vm_page_unlock(m);
+ continue;
+ }
+ object = m->object;
+ if ((!VM_OBJECT_TRYWLOCK(object) &&
+ (!vm_pageout_fallback_object_lock(m, &next) ||
+ m->hold_count != 0)) || vm_page_busied(m)) {
+ VM_OBJECT_WUNLOCK(object);
+ vm_page_unlock(m);
+ continue;
+ }
+
+ /*
+ * Unlock the laundry queue, invalidating the 'next' pointer.
+ * Use a marker to remember our place in the laundry queue.
+ */
+ TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_laundry_marker,
+ plinks.q);
+ vm_pagequeue_unlock(pq);
+ queue_locked = false;
+
+ /*
+ * Invalid pages can be easily freed. They cannot be
+ * mapped; vm_page_free() asserts this.
+ */
+ if (m->valid == 0)
+ goto free_page;
+
+ /*
+ * If the page has been referenced and the object is not dead,
+ * reactivate or requeue the page depending on whether the
+ * object is mapped.
+ */
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
+ act_delta = 1;
+ } else
+ act_delta = 0;
+ if (object->ref_count != 0)
+ act_delta += pmap_ts_referenced(m);
+ else {
+ KASSERT(!pmap_page_is_mapped(m),
+ ("page %p is mapped", m));
+ }
+ if (act_delta != 0) {
+ if (object->ref_count != 0) {
+ PCPU_INC(cnt.v_reactivated);
+ vm_page_activate(m);
+
+ /*
+ * Increase the activation count if the page
+ * was referenced while in the laundry queue.
+ * This makes it less likely that the page will
+ * be returned prematurely to the inactive
+ * queue.
+ */
+ m->act_count += act_delta + ACT_ADVANCE;
+
+ /*
+ * If this was a background laundering, count
+ * activated pages towards our target. The
+ * purpose of background laundering is to ensure
+ * that pages are eventually cycled through the
+ * laundry queue, and an activation is a valid
+ * way out.
+ */
+ if (!in_shortfall)
+ launder--;
+ goto drop_page;
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ goto requeue_page;
+ }
+
+ /*
+ * If the page appears to be clean at the machine-independent
+ * layer, then remove all of its mappings from the pmap in
+ * anticipation of freeing it. If, however, any of the page's
+ * mappings allow write access, then the page may still be
+ * modified until the last of those mappings are removed.
+ */
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
+
+ /*
+ * Clean pages are freed, and dirty pages are paged out unless
+ * they belong to a dead object. Requeueing dirty pages from
+ * dead objects is pointless, as they are being paged out and
+ * freed by the thread that destroyed the object.
+ */
+ if (m->dirty == 0) {
+free_page:
+ vm_page_free(m);
+ PCPU_INC(cnt.v_dfree);
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ if (object->type != OBJT_SWAP &&
+ object->type != OBJT_DEFAULT)
+ pageout_ok = true;
+ else if (disable_swap_pageouts)
+ pageout_ok = false;
+ else
+ pageout_ok = true;
+ if (!pageout_ok) {
+requeue_page:
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ vm_page_requeue_locked(m);
+ goto drop_page;
+ }
+
+ /*
+ * Form a cluster with adjacent, dirty pages from the
+ * same object, and page out that entire cluster.
+ *
+ * The adjacent, dirty pages must also be in the
+ * laundry. However, their mappings are not checked
+ * for new references. Consequently, a recently
+ * referenced page may be paged out. However, that
+ * page will not be prematurely reclaimed. After page
+ * out, the page will be placed in the inactive queue,
+ * where any new references will be detected and the
+ * page reactivated.
+ */
+ error = vm_pageout_clean(m, &numpagedout);
+ if (error == 0) {
+ launder -= numpagedout;
+ maxscan -= numpagedout - 1;
+ } else if (error == EDEADLK) {
+ pageout_lock_miss++;
+ vnodes_skipped++;
+ }
+ goto relock_queue;
+ }
+drop_page:
+ vm_page_unlock(m);
+ VM_OBJECT_WUNLOCK(object);
+relock_queue:
+ if (!queue_locked) {
+ vm_pagequeue_lock(pq);
+ queue_locked = true;
+ }
+ next = TAILQ_NEXT(&vmd->vmd_laundry_marker, plinks.q);
+ TAILQ_REMOVE(&pq->pq_pl, &vmd->vmd_laundry_marker, plinks.q);
+ }
+ vm_pagequeue_unlock(pq);
+
+ /*
+ * Wakeup the sync daemon if we skipped a vnode in a writeable object
+ * and we didn't launder enough pages.
+ */
+ if (vnodes_skipped > 0 && launder > 0)
+ (void)speedup_syncer();
+
+ return (starting_target - launder);
+}
+
+/*
+ * Compute the integer square root.
+ */
+static u_int
+isqrt(u_int num)
+{
+ u_int bit, root, tmp;
+
+ bit = 1u << ((NBBY * sizeof(u_int)) - 2);
+ while (bit > num)
+ bit >>= 2;
+ root = 0;
+ while (bit != 0) {
+ tmp = root + bit;
+ root >>= 1;
+ if (num >= tmp) {
+ num -= tmp;
+ root += bit;
+ }
+ bit >>= 2;
+ }
+ return (root);
+}
+
+/*
+ * Perform the work of the laundry thread: periodically wake up and determine
+ * whether any pages need to be laundered. If so, determine the number of pages
+ * that need to be laundered, and launder them.
+ */
+static void
+vm_pageout_laundry_worker(void *arg)
+{
+ struct vm_domain *domain;
+ struct vm_pagequeue *pq;
+ uint64_t nclean, ndirty;
+ u_int last_launder, wakeups;
+ int domidx, last_target, launder, shortfall, shortfall_cycle, target;
+ bool in_shortfall;
+
+ domidx = (uintptr_t)arg;
+ domain = &vm_dom[domidx];
+ pq = &domain->vmd_pagequeues[PQ_LAUNDRY];
+ KASSERT(domain->vmd_segs != 0, ("domain without segments"));
+ vm_pageout_init_marker(&domain->vmd_laundry_marker, PQ_LAUNDRY);
+
+ shortfall = 0;
+ in_shortfall = false;
+ shortfall_cycle = 0;
+ target = 0;
+ last_launder = 0;
+
+ /*
+ * The pageout laundry worker is never done, so loop forever.
+ */
+ for (;;) {
+ KASSERT(target >= 0, ("negative target %d", target));
+ KASSERT(shortfall_cycle >= 0,
+ ("negative cycle %d", shortfall_cycle));
+ launder = 0;
+ wakeups = VM_METER_PCPU_CNT(v_pdwakeups);
+
+ /*
+ * First determine whether we need to launder pages to meet a
+ * shortage of free pages.
+ */
+ if (shortfall > 0) {
+ in_shortfall = true;
+ shortfall_cycle = VM_LAUNDER_RATE / VM_INACT_SCAN_RATE;
+ target = shortfall;
+ } else if (!in_shortfall)
+ goto trybackground;
+ else if (shortfall_cycle == 0 || vm_laundry_target() <= 0) {
+ /*
+ * We recently entered shortfall and began laundering
+ * pages. If we have completed that laundering run
+ * (and we are no longer in shortfall) or we have met
+ * our laundry target through other activity, then we
+ * can stop laundering pages.
+ */
+ in_shortfall = false;
+ target = 0;
+ goto trybackground;
+ }
+ last_launder = wakeups;
+ launder = target / shortfall_cycle--;
+ goto dolaundry;
+
+ /*
+ * There's no immediate need to launder any pages; see if we
+ * meet the conditions to perform background laundering:
+ *
+ * 1. The ratio of dirty to clean inactive pages exceeds the
+ * background laundering threshold and the pagedaemon has
+ * been woken up to reclaim pages since our last
+ * laundering, or
+ * 2. we haven't yet reached the target of the current
+ * background laundering run.
+ *
+ * The background laundering threshold is not a constant.
+ * Instead, it is a slowly growing function of the number of
+ * page daemon wakeups since the last laundering. Thus, as the
+ * ratio of dirty to clean inactive pages grows, the amount of
+ * memory pressure required to trigger laundering decreases.
+ */
+trybackground:
+ nclean = vm_cnt.v_inactive_count + vm_cnt.v_free_count;
+ ndirty = vm_cnt.v_laundry_count;
+ if (target == 0 && wakeups != last_launder &&
+ ndirty * isqrt(wakeups - last_launder) >= nclean) {
+ target = vm_background_launder_target;
+ }
+
+ /*
+ * We have a non-zero background laundering target. If we've
+ * laundered up to our maximum without observing a page daemon
+ * wakeup, just stop. This is a safety belt that ensures we
+ * don't launder an excessive amount if memory pressure is low
+ * and the ratio of dirty to clean pages is large. Otherwise,
+ * proceed at the background laundering rate.
+ */
+ if (target > 0) {
+ if (wakeups != last_launder) {
+ last_launder = wakeups;
+ last_target = target;
+ } else if (last_target - target >=
+ vm_background_launder_max * PAGE_SIZE / 1024) {
+ target = 0;
+ }
+ launder = vm_background_launder_rate * PAGE_SIZE / 1024;
+ launder /= VM_LAUNDER_RATE;
+ if (launder > target)
+ launder = target;
+ }
+
+dolaundry:
+ if (launder > 0) {
+ /*
+ * Because of I/O clustering, the number of laundered
+ * pages could exceed "target" by the maximum size of
+ * a cluster minus one.
+ */
+ target -= min(vm_pageout_launder(domain, launder,
+ in_shortfall), target);
+ pause("laundp", hz / VM_LAUNDER_RATE);
+ }
+
+ /*
+ * If we're not currently laundering pages and the page daemon
+ * hasn't posted a new request, sleep until the page daemon
+ * kicks us.
+ */
+ vm_pagequeue_lock(pq);
+ if (target == 0 && vm_laundry_request == VM_LAUNDRY_IDLE)
+ (void)mtx_sleep(&vm_laundry_request,
+ vm_pagequeue_lockptr(pq), PVM, "launds", 0);
+
+ /*
+ * If the pagedaemon has indicated that it's in shortfall, start
+ * a shortfall laundering unless we're already in the middle of
+ * one. This may preempt a background laundering.
+ */
+ if (vm_laundry_request == VM_LAUNDRY_SHORTFALL &&
+ (!in_shortfall || shortfall_cycle == 0)) {
+ shortfall = vm_laundry_target() + vm_pageout_deficit;
+ target = 0;
+ } else
+ shortfall = 0;
+
+ if (target == 0)
+ vm_laundry_request = VM_LAUNDRY_IDLE;
+ vm_pagequeue_unlock(pq);
+ }
+}
+
+/*
* vm_pageout_scan does the dirty work for the pageout daemon.
*
- * pass 0 - Update active LRU/deactivate pages
- * pass 1 - Free inactive pages
- * pass 2 - Launder dirty pages
+ * pass == 0: Update active LRU/deactivate pages
+ * pass >= 1: Free inactive pages
*
* Returns true if pass was zero or enough pages were freed by the inactive
* queue scan to meet the target.
@@ -856,10 +1270,9 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
struct vm_pagequeue *pq;
vm_object_t object;
long min_scan;
- int act_delta, addl_page_shortage, deficit, error, inactq_shortage;
- int maxlaunder, maxscan, page_shortage, scan_tick, scanned;
- int starting_page_shortage, vnodes_skipped;
- boolean_t pageout_ok, queue_locked;
+ int act_delta, addl_page_shortage, deficit, inactq_shortage, maxscan;
+ int page_shortage, scan_tick, scanned, starting_page_shortage;
+ boolean_t queue_locked;
/*
* If we need to reclaim memory ask kernel caches to return
@@ -901,23 +1314,6 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
starting_page_shortage = page_shortage;
/*
- * maxlaunder limits the number of dirty pages we flush per scan.
- * For most systems a smaller value (16 or 32) is more robust under
- * extreme memory and disk pressure because any unnecessary writes
- * to disk can result in extreme performance degredation. However,
- * systems with excessive dirty pages (especially when MAP_NOSYNC is
- * used) will die horribly with limited laundering. If the pageout
- * daemon cannot clean enough pages in the first pass, we let it go
- * all out in succeeding passes.
- */
- if ((maxlaunder = vm_max_launder) <= 1)
- maxlaunder = 1;
- if (pass > 1)
- maxlaunder = 10000;
-
- vnodes_skipped = 0;
-
- /*
* Start scanning the inactive queue for pages that we can free. The
* scan will stop when we reach the target or we have scanned the
* entire queue. (Note that m->act_count is not used to make
@@ -932,7 +1328,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
m = next) {
vm_pagequeue_assert_locked(pq);
KASSERT(queue_locked, ("unlocked inactive queue"));
- KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
+ KASSERT(vm_page_inactive(m), ("Inactive queue %p", m));
PCPU_INC(cnt.v_pdpages);
next = TAILQ_NEXT(m, plinks.q);
@@ -995,11 +1391,15 @@ unlock_page:
KASSERT(m->hold_count == 0, ("Held page %p", m));
/*
- * We unlock the inactive page queue, invalidating the
- * 'next' pointer. Use our marker to remember our
- * place.
+ * Dequeue the inactive page and unlock the inactive page
+ * queue, invalidating the 'next' pointer. Dequeueing the
+ * page here avoids a later reacquisition (and release) of
+ * the inactive page queue lock when vm_page_activate(),
+ * vm_page_free(), or vm_page_launder() is called. Use a
+ * marker to remember our place in the inactive queue.
*/
TAILQ_INSERT_AFTER(&pq->pq_pl, m, &vmd->vmd_marker, plinks.q);
+ vm_page_dequeue_locked(m);
vm_pagequeue_unlock(pq);
queue_locked = FALSE;
@@ -1028,6 +1428,7 @@ unlock_page:
}
if (act_delta != 0) {
if (object->ref_count != 0) {
+ PCPU_INC(cnt.v_reactivated);
vm_page_activate(m);
/*
@@ -1039,8 +1440,14 @@ unlock_page:
*/
m->act_count += act_delta + ACT_ADVANCE;
goto drop_page;
- } else if ((object->flags & OBJ_DEAD) == 0)
- goto requeue_page;
+ } else if ((object->flags & OBJ_DEAD) == 0) {
+ vm_pagequeue_lock(pq);
+ queue_locked = TRUE;
+ m->queue = PQ_INACTIVE;
+ TAILQ_INSERT_TAIL(&pq->pq_pl, m, plinks.q);
+ vm_pagequeue_cnt_inc(pq);
+ goto drop_page;
+ }
}
/*
@@ -1056,83 +1463,23 @@ unlock_page:
pmap_remove_all(m);
}
+ /*
+ * Clean pages can be freed, but dirty pages must be sent back
+ * to the laundry, unless they belong to a dead object.
+ * Requeueing dirty pages from dead objects is pointless, as
+ * they are being paged out and freed by the thread that
+ * destroyed the object.
+ */
if (m->dirty == 0) {
- /*
- * Clean pages can be freed.
- */
free_page:
vm_page_free(m);
PCPU_INC(cnt.v_dfree);
--page_shortage;
- } else if ((object->flags & OBJ_DEAD) != 0) {
- /*
- * Leave dirty pages from dead objects at the front of
- * the queue. They are being paged out and freed by
- * the thread that destroyed the object. They will
- * leave the queue shortly after the scan finishes, so
- * they should be discounted from the inactive count.
- */
- addl_page_shortage++;
- } else if ((m->flags & PG_WINATCFLS) == 0 && pass < 2) {
- /*
- * Dirty pages need to be paged out, but flushing
- * a page is extremely expensive versus freeing
- * a clean page. Rather then artificially limiting
- * the number of pages we can flush, we instead give
- * dirty pages extra priority on the inactive queue
- * by forcing them to be cycled through the queue
- * twice before being flushed, after which the
- * (now clean) page will cycle through once more
- * before being freed. This significantly extends
- * the thrash point for a heavily loaded machine.
- */
- m->flags |= PG_WINATCFLS;
-requeue_page:
- vm_pagequeue_lock(pq);
- queue_locked = TRUE;
- vm_page_requeue_locked(m);
- } else if (maxlaunder > 0) {
- /*
- * We always want to try to flush some dirty pages if
- * we encounter them, to keep the system stable.
- * Normally this number is small, but under extreme
- * pressure where there are insufficient clean pages
- * on the inactive queue, we may have to go all out.
- */
-
- if (object->type != OBJT_SWAP &&
- object->type != OBJT_DEFAULT)
- pageout_ok = TRUE;
- else if (disable_swap_pageouts)
- pageout_ok = FALSE;
- else if (defer_swap_pageouts)
- pageout_ok = vm_page_count_min();
- else
- pageout_ok = TRUE;
- if (!pageout_ok)
- goto requeue_page;
- error = vm_pageout_clean(m);
- /*
- * Decrement page_shortage on success to account for
- * the (future) cleaned page. Otherwise we could wind
- * up laundering or cleaning too many pages.
- */
- if (error == 0) {
- page_shortage--;
- maxlaunder--;
- } else if (error == EDEADLK) {
- pageout_lock_miss++;
- vnodes_skipped++;
- } else if (error == EBUSY) {
- addl_page_shortage++;
- }
- vm_page_lock_assert(m, MA_NOTOWNED);
- goto relock_queue;
- }
+ } else if ((object->flags & OBJ_DEAD) == 0)
+ vm_page_launder(m);
drop_page:
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
-relock_queue:
if (!queue_locked) {
vm_pagequeue_lock(pq);
queue_locked = TRUE;
@@ -1142,6 +1489,24 @@ relock_queue:
}
vm_pagequeue_unlock(pq);
+ /*
+ * Wake up the laundry thread so that it can perform any needed
+ * laundering. If we didn't meet our target, we're in shortfall and
+ * need to launder more aggressively.
+ */
+ if (vm_laundry_request == VM_LAUNDRY_IDLE &&
+ starting_page_shortage > 0) {
+ pq = &vm_dom[0].vmd_pagequeues[PQ_LAUNDRY];
+ vm_pagequeue_lock(pq);
+ if (page_shortage > 0) {
+ vm_laundry_request = VM_LAUNDRY_SHORTFALL;
+ PCPU_INC(cnt.v_pdshortfalls);
+ } else if (vm_laundry_request != VM_LAUNDRY_SHORTFALL)
+ vm_laundry_request = VM_LAUNDRY_BACKGROUND;
+ wakeup(&vm_laundry_request);
+ vm_pagequeue_unlock(pq);
+ }
+
#if !defined(NO_SWAPPING)
/*
* Wakeup the swapout daemon if we didn't free the targeted number of
@@ -1152,14 +1517,6 @@ relock_queue:
#endif
/*
- * Wakeup the sync daemon if we skipped a vnode in a writeable object
- * and we didn't free enough pages.
- */
- if (vnodes_skipped > 0 && page_shortage > vm_cnt.v_free_target -
- vm_cnt.v_free_min)
- (void)speedup_syncer();
-
- /*
* If the inactive queue scan fails repeatedly to meet its
* target, kill the largest process.
*/
@@ -1167,10 +1524,20 @@ relock_queue:
/*
* Compute the number of pages we want to try to move from the
- * active queue to the inactive queue.
+ * active queue to either the inactive or laundry queue.
+ *
+ * When scanning active pages, we make clean pages count more heavily
+ * towards the page shortage than dirty pages. This is because dirty
+ * pages must be laundered before they can be reused and thus have less
+ * utility when attempting to quickly alleviate a shortage. However,
+ * this weighting also causes the scan to deactivate dirty pages more
+ * more aggressively, improving the effectiveness of clustering and
+ * ensuring that they can eventually be reused.
*/
- inactq_shortage = vm_cnt.v_inactive_target - vm_cnt.v_inactive_count +
+ inactq_shortage = vm_cnt.v_inactive_target - (vm_cnt.v_inactive_count +
+ vm_cnt.v_laundry_count / act_scan_laundry_weight) +
vm_paging_target() + deficit + addl_page_shortage;
+ page_shortage *= act_scan_laundry_weight;
pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
vm_pagequeue_lock(pq);
@@ -1254,14 +1621,44 @@ relock_queue:
m->act_count -= min(m->act_count, ACT_DECLINE);
/*
- * Move this page to the tail of the active or inactive
+ * Move this page to the tail of the active, inactive or laundry
* queue depending on usage.
*/
if (m->act_count == 0) {
/* Dequeue to avoid later lock recursion. */
vm_page_dequeue_locked(m);
- vm_page_deactivate(m);
- inactq_shortage--;
+
+ /*
+ * When not short for inactive pages, let dirty pages go
+ * through the inactive queue before moving to the
+ * laundry queues. This gives them some extra time to
+ * be reactivated, potentially avoiding an expensive
+ * pageout. During a page shortage, the inactive queue
+ * is necessarily small, so we may move dirty pages
+ * directly to the laundry queue.
+ */
+ if (inactq_shortage <= 0)
+ vm_page_deactivate(m);
+ else {
+ /*
+ * Calling vm_page_test_dirty() here would
+ * require acquisition of the object's write
+ * lock. However, during a page shortage,
+ * directing dirty pages into the laundry
+ * queue is only an optimization and not a
+ * requirement. Therefore, we simply rely on
+ * the opportunistic updates to the page's
+ * dirty field by the pmap.
+ */
+ if (m->dirty == 0) {
+ vm_page_deactivate(m);
+ inactq_shortage -=
+ act_scan_laundry_weight;
+ } else {
+ vm_page_launder(m);
+ inactq_shortage--;
+ }
+ }
} else
vm_page_requeue_locked(m);
vm_page_unlock(m);
@@ -1569,14 +1966,14 @@ vm_pageout_worker(void *arg)
* thread during the previous scan, which must have
* been a level 0 scan, or vm_pageout_wanted was
* already set and the scan failed to free enough
- * pages. If we haven't yet performed a level >= 2
- * scan (unlimited dirty cleaning), then upgrade the
- * level and scan again now. Otherwise, sleep a bit
- * and try again later.
+ * pages. If we haven't yet performed a level >= 1
+ * (page reclamation) scan, then increase the level
+ * and scan again now. Otherwise, sleep a bit and
+ * try again later.
*/
mtx_unlock(&vm_page_queue_free_mtx);
- if (pass > 1)
- pause("psleep", hz / 2);
+ if (pass >= 1)
+ pause("psleep", hz / VM_INACT_SCAN_RATE);
pass++;
} else {
/*
@@ -1647,6 +2044,14 @@ vm_pageout_init(void)
/* XXX does not really belong here */
if (vm_page_max_wired == 0)
vm_page_max_wired = vm_cnt.v_free_count / 3;
+
+ /*
+ * Target amount of memory to move out of the laundry queue during a
+ * background laundering. This is proportional to the amount of system
+ * memory.
+ */
+ vm_background_launder_target = (vm_cnt.v_free_target -
+ vm_cnt.v_free_min) / 10;
}
/*
@@ -1661,6 +2066,10 @@ vm_pageout(void)
#endif
swap_pager_swap_init();
+ error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
+ 0, 0, "laundry: dom0");
+ if (error != 0)
+ panic("starting laundry for domain 0, error %d", error);
#ifdef VM_NUMA_ALLOC
for (i = 1; i < vm_ndomains; i++) {
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index ab48f58..484417b 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -1314,7 +1314,7 @@ vm_phys_zero_pages_idle(void)
for (;;) {
TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) {
for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
- if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
+ if ((m_tmp->flags & PG_ZERO) == 0) {
vm_phys_unfree_page(m_tmp);
vm_phys_freecnt_adj(m, -1);
mtx_unlock(&vm_page_queue_free_mtx);
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
index 80c8bd0..4f0a575 100644
--- a/sys/vm/vm_radix.c
+++ b/sys/vm/vm_radix.c
@@ -339,8 +339,6 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
index = page->pindex;
-restart:
-
/*
* The owner of record for root is not really important because it
* will never be used.
@@ -358,32 +356,10 @@ restart:
panic("%s: key %jx is already present",
__func__, (uintmax_t)index);
clev = vm_radix_keydiff(m->pindex, index);
-
- /*
- * During node allocation the trie that is being
- * walked can be modified because of recursing radix
- * trie operations.
- * If this is the case, the recursing functions signal
- * such situation and the insert operation must
- * start from scratch again.
- * The freed radix node will then be in the UMA
- * caches very likely to avoid the same situation
- * to happen.
- */
- rtree->rt_flags |= RT_INSERT_INPROG;
tmp = vm_radix_node_get(vm_radix_trimkey(index,
clev + 1), 2, clev);
- rtree->rt_flags &= ~RT_INSERT_INPROG;
- if (tmp == NULL) {
- rtree->rt_flags &= ~RT_TRIE_MODIFIED;
+ if (tmp == NULL)
return (ENOMEM);
- }
- if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) {
- rtree->rt_flags &= ~RT_TRIE_MODIFIED;
- tmp->rn_count = 0;
- vm_radix_node_put(tmp);
- goto restart;
- }
*parentp = tmp;
vm_radix_addpage(tmp, index, clev, page);
vm_radix_addpage(tmp, m->pindex, clev, m);
@@ -407,21 +383,9 @@ restart:
*/
newind = rnode->rn_owner;
clev = vm_radix_keydiff(newind, index);
-
- /* See the comments above. */
- rtree->rt_flags |= RT_INSERT_INPROG;
tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev);
- rtree->rt_flags &= ~RT_INSERT_INPROG;
- if (tmp == NULL) {
- rtree->rt_flags &= ~RT_TRIE_MODIFIED;
+ if (tmp == NULL)
return (ENOMEM);
- }
- if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) {
- rtree->rt_flags &= ~RT_TRIE_MODIFIED;
- tmp->rn_count = 0;
- vm_radix_node_put(tmp);
- goto restart;
- }
*parentp = tmp;
vm_radix_addpage(tmp, index, clev, page);
slot = vm_radix_slot(newind, clev);
@@ -696,51 +660,37 @@ descend:
}
/*
- * Remove the specified index from the tree.
- * Panics if the key is not present.
+ * Remove the specified index from the trie, and return the value stored at
+ * that index. If the index is not present, return NULL.
*/
-void
+vm_page_t
vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
{
struct vm_radix_node *rnode, *parent;
vm_page_t m;
int i, slot;
- /*
- * Detect if a page is going to be removed from a trie which is
- * already undergoing another trie operation.
- * Right now this is only possible for vm_radix_remove() recursing
- * into vm_radix_insert().
- * If this is the case, the caller must be notified about this
- * situation. It will also takecare to update the RT_TRIE_MODIFIED
- * accordingly.
- * The RT_TRIE_MODIFIED bit is set here because the remove operation
- * will always succeed.
- */
- if ((rtree->rt_flags & RT_INSERT_INPROG) != 0)
- rtree->rt_flags |= RT_TRIE_MODIFIED;
-
rnode = vm_radix_getroot(rtree);
if (vm_radix_isleaf(rnode)) {
m = vm_radix_topage(rnode);
if (m->pindex != index)
- panic("%s: invalid key found", __func__);
+ return (NULL);
vm_radix_setroot(rtree, NULL);
- return;
+ return (m);
}
parent = NULL;
for (;;) {
if (rnode == NULL)
- panic("vm_radix_remove: impossible to locate the key");
+ return (NULL);
slot = vm_radix_slot(index, rnode->rn_clev);
if (vm_radix_isleaf(rnode->rn_child[slot])) {
m = vm_radix_topage(rnode->rn_child[slot]);
if (m->pindex != index)
- panic("%s: invalid key found", __func__);
+ return (NULL);
rnode->rn_child[slot] = NULL;
rnode->rn_count--;
if (rnode->rn_count > 1)
- break;
+ return (m);
for (i = 0; i < VM_RADIX_COUNT; i++)
if (rnode->rn_child[i] != NULL)
break;
@@ -757,7 +707,7 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
rnode->rn_count--;
rnode->rn_child[i] = NULL;
vm_radix_node_put(rnode);
- break;
+ return (m);
}
parent = rnode;
rnode = rnode->rn_child[slot];
@@ -774,9 +724,6 @@ vm_radix_reclaim_allnodes(struct vm_radix *rtree)
{
struct vm_radix_node *root;
- KASSERT((rtree->rt_flags & RT_INSERT_INPROG) == 0,
- ("vm_radix_reclaim_allnodes: unexpected trie recursion"));
-
root = vm_radix_getroot(rtree);
if (root == NULL)
return;
diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h
index 63d27d4..b8a722d 100644
--- a/sys/vm/vm_radix.h
+++ b/sys/vm/vm_radix.h
@@ -42,7 +42,7 @@ vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index);
void vm_radix_reclaim_allnodes(struct vm_radix *rtree);
-void vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index);
+vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index);
vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage);
#endif /* _KERNEL */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 8bb1788..7e2bfb6 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -62,7 +62,7 @@ __FBSDID("$FreeBSD$");
/*
* The reservation system supports the speculative allocation of large physical
- * pages ("superpages"). Speculative allocation enables the fully-automatic
+ * pages ("superpages"). Speculative allocation enables the fully automatic
* utilization of superpages by the virtual memory system. In other words, no
* programmatic directives are required to use superpages.
*/
@@ -155,11 +155,11 @@ popmap_is_set(popmap_t popmap[], int i)
* physical pages for the range [pindex, pindex + VM_LEVEL_0_NPAGES) of offsets
* within that object. The reservation's "popcnt" tracks the number of these
* small physical pages that are in use at any given time. When and if the
- * reservation is not fully utilized, it appears in the queue of partially-
+ * reservation is not fully utilized, it appears in the queue of partially
* populated reservations. The reservation always appears on the containing
* object's list of reservations.
*
- * A partially-populated reservation can be broken and reclaimed at any time.
+ * A partially populated reservation can be broken and reclaimed at any time.
*/
struct vm_reserv {
TAILQ_ENTRY(vm_reserv) partpopq;
@@ -196,11 +196,11 @@ struct vm_reserv {
static vm_reserv_t vm_reserv_array;
/*
- * The partially-populated reservation queue
+ * The partially populated reservation queue
*
- * This queue enables the fast recovery of an unused cached or free small page
- * from a partially-populated reservation. The reservation at the head of
- * this queue is the least-recently-changed, partially-populated reservation.
+ * This queue enables the fast recovery of an unused free small page from a
+ * partially populated reservation. The reservation at the head of this queue
+ * is the least recently changed, partially populated reservation.
*
* Access to this queue is synchronized by the free page queue lock.
*/
@@ -225,7 +225,7 @@ SYSCTL_PROC(_vm_reserv, OID_AUTO, fullpop, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
static int sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm_reserv, OID_AUTO, partpopq, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
- sysctl_vm_reserv_partpopq, "A", "Partially-populated reservation queues");
+ sysctl_vm_reserv_partpopq, "A", "Partially populated reservation queues");
static long vm_reserv_reclaimed;
SYSCTL_LONG(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD,
@@ -267,7 +267,7 @@ sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS)
}
/*
- * Describes the current state of the partially-populated reservation queue.
+ * Describes the current state of the partially populated reservation queue.
*/
static int
sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
@@ -301,7 +301,7 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
/*
* Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the
- * reservation to the tail of the partially-populated reservation queue if the
+ * reservation to the tail of the partially populated reservation queue if the
* population count is non-zero.
*
* The free page queue lock must be held.
@@ -363,7 +363,7 @@ vm_reserv_has_pindex(vm_reserv_t rv, vm_pindex_t pindex)
/*
* Increases the given reservation's population count. Moves the reservation
- * to the tail of the partially-populated reservation queue.
+ * to the tail of the partially populated reservation queue.
*
* The free page queue must be locked.
*/
@@ -404,14 +404,18 @@ vm_reserv_populate(vm_reserv_t rv, int index)
* physical address boundary that is a multiple of that value. Both
* "alignment" and "boundary" must be a power of two.
*
+ * The page "mpred" must immediately precede the offset "pindex" within the
+ * specified object.
+ *
* The object and free page queue must be locked.
*/
vm_page_t
vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
- vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
+ vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+ vm_page_t mpred)
{
vm_paddr_t pa, size;
- vm_page_t m, m_ret, mpred, msucc;
+ vm_page_t m, m_ret, msucc;
vm_pindex_t first, leftcap, rightcap;
vm_reserv_t rv;
u_long allocpages, maxpages, minpages;
@@ -448,10 +452,11 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
/*
* Look for an existing reservation.
*/
- mpred = vm_radix_lookup_le(&object->rtree, pindex);
if (mpred != NULL) {
+ KASSERT(mpred->object == object,
+ ("vm_reserv_alloc_contig: object doesn't contain mpred"));
KASSERT(mpred->pindex < pindex,
- ("vm_reserv_alloc_contig: pindex already allocated"));
+ ("vm_reserv_alloc_contig: mpred doesn't precede pindex"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
@@ -460,7 +465,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
msucc = TAILQ_FIRST(&object->memq);
if (msucc != NULL) {
KASSERT(msucc->pindex > pindex,
- ("vm_reserv_alloc_contig: pindex already allocated"));
+ ("vm_reserv_alloc_contig: msucc doesn't succeed pindex"));
rv = vm_reserv_from_page(msucc);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
@@ -597,7 +602,7 @@ found:
}
/*
- * Allocates a page from an existing or newly-created reservation.
+ * Allocates a page from an existing or newly created reservation.
*
* The page "mpred" must immediately precede the offset "pindex" within the
* specified object.
@@ -721,12 +726,12 @@ found:
}
/*
- * Breaks the given reservation. Except for the specified cached or free
- * page, all cached and free pages in the reservation are returned to the
- * physical memory allocator. The reservation's population count and map are
- * reset to their initial state.
+ * Breaks the given reservation. Except for the specified free page, all free
+ * pages in the reservation are returned to the physical memory allocator.
+ * The reservation's population count and map are reset to their initial
+ * state.
*
- * The given reservation must not be in the partially-populated reservation
+ * The given reservation must not be in the partially populated reservation
* queue. The free page queue lock must be held.
*/
static void
@@ -895,7 +900,7 @@ vm_reserv_level(vm_page_t m)
}
/*
- * Returns a reservation level if the given page belongs to a fully-populated
+ * Returns a reservation level if the given page belongs to a fully populated
* reservation and -1 otherwise.
*/
int
@@ -908,47 +913,8 @@ vm_reserv_level_iffullpop(vm_page_t m)
}
/*
- * Prepare for the reactivation of a cached page.
- *
- * First, suppose that the given page "m" was allocated individually, i.e., not
- * as part of a reservation, and cached. Then, suppose a reservation
- * containing "m" is allocated by the same object. Although "m" and the
- * reservation belong to the same object, "m"'s pindex may not match the
- * reservation's.
- *
- * The free page queue must be locked.
- */
-boolean_t
-vm_reserv_reactivate_page(vm_page_t m)
-{
- vm_reserv_t rv;
- int index;
-
- mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- rv = vm_reserv_from_page(m);
- if (rv->object == NULL)
- return (FALSE);
- KASSERT((m->flags & PG_CACHED) != 0,
- ("vm_reserv_reactivate_page: page %p is not cached", m));
- if (m->object == rv->object &&
- m->pindex - rv->pindex == (index = VM_RESERV_INDEX(m->object,
- m->pindex)))
- vm_reserv_populate(rv, index);
- else {
- KASSERT(rv->inpartpopq,
- ("vm_reserv_reactivate_page: reserv %p's inpartpopq is FALSE",
- rv));
- TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
- rv->inpartpopq = FALSE;
- /* Don't release "m" to the physical memory allocator. */
- vm_reserv_break(rv, m);
- }
- return (TRUE);
-}
-
-/*
- * Breaks the given partially-populated reservation, releasing its cached and
- * free pages to the physical memory allocator.
+ * Breaks the given partially populated reservation, releasing its free pages
+ * to the physical memory allocator.
*
* The free page queue lock must be held.
*/
@@ -966,9 +932,9 @@ vm_reserv_reclaim(vm_reserv_t rv)
}
/*
- * Breaks the reservation at the head of the partially-populated reservation
- * queue, releasing its cached and free pages to the physical memory
- * allocator. Returns TRUE if a reservation is broken and FALSE otherwise.
+ * Breaks the reservation at the head of the partially populated reservation
+ * queue, releasing its free pages to the physical memory allocator. Returns
+ * TRUE if a reservation is broken and FALSE otherwise.
*
* The free page queue lock must be held.
*/
@@ -986,11 +952,10 @@ vm_reserv_reclaim_inactive(void)
}
/*
- * Searches the partially-populated reservation queue for the least recently
- * active reservation with unused pages, i.e., cached or free, that satisfy the
- * given request for contiguous physical memory. If a satisfactory reservation
- * is found, it is broken. Returns TRUE if a reservation is broken and FALSE
- * otherwise.
+ * Searches the partially populated reservation queue for the least recently
+ * changed reservation with free pages that satisfy the given request for
+ * contiguous physical memory. If a satisfactory reservation is found, it is
+ * broken. Returns TRUE if a reservation is broken and FALSE otherwise.
*
* The free page queue lock must be held.
*/
diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h
index 52f6ab2..8b33b48 100644
--- a/sys/vm/vm_reserv.h
+++ b/sys/vm/vm_reserv.h
@@ -47,7 +47,7 @@
*/
vm_page_t vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex,
u_long npages, vm_paddr_t low, vm_paddr_t high,
- u_long alignment, vm_paddr_t boundary);
+ u_long alignment, vm_paddr_t boundary, vm_page_t mpred);
vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex,
vm_page_t mpred);
void vm_reserv_break_all(vm_object_t object);
@@ -56,7 +56,6 @@ void vm_reserv_init(void);
bool vm_reserv_is_page_free(vm_page_t m);
int vm_reserv_level(vm_page_t m);
int vm_reserv_level_iffullpop(vm_page_t m);
-boolean_t vm_reserv_reactivate_page(vm_page_t m);
boolean_t vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low,
vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
boolean_t vm_reserv_reclaim_inactive(void);
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index f9dfbf0..3349101 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -466,10 +466,6 @@ vnode_pager_setsize(struct vnode *vp, vm_ooffset_t nsize)
* replacement from working properly.
*/
vm_page_clear_dirty(m, base, PAGE_SIZE - base);
- } else if ((nsize & PAGE_MASK) &&
- vm_page_is_cached(object, OFF_TO_IDX(nsize))) {
- vm_page_cache_free(object, OFF_TO_IDX(nsize),
- nobjsize);
}
}
object->un_pager.vnp.vnp_size = nsize;
@@ -894,8 +890,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
for (tpindex = m[0]->pindex - 1;
tpindex >= startpindex && tpindex < m[0]->pindex;
tpindex--, i++) {
- p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
- VM_ALLOC_IFNOTCACHED);
+ p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
if (p == NULL) {
/* Shift the array. */
for (int j = 0; j < i; j++)
@@ -932,8 +927,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
for (tpindex = m[count - 1]->pindex + 1;
tpindex < endpindex; i++, tpindex++) {
- p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
- VM_ALLOC_IFNOTCACHED);
+ p = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
if (p == NULL)
break;
bp->b_pages[i] = p;
OpenPOWER on IntegriCloud