summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c65
-rw-r--r--sys/amd64/include/pmap.h4
-rw-r--r--sys/conf/files1
-rw-r--r--sys/i386/i386/pmap.c63
-rw-r--r--sys/i386/include/pmap.h4
-rw-r--r--sys/i386/xen/pmap.c7
-rw-r--r--sys/vm/_vm_radix.h50
-rw-r--r--sys/vm/vm_object.c13
-rw-r--r--sys/vm/vm_object.h12
-rw-r--r--sys/vm/vm_page.c327
-rw-r--r--sys/vm/vm_page.h3
-rw-r--r--sys/vm/vm_radix.c776
-rw-r--r--sys/vm/vm_radix.h46
-rw-r--r--sys/vm/vm_reserv.c71
14 files changed, 994 insertions, 448 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 4777c08..55d2cff 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -131,6 +131,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
@@ -1497,7 +1498,8 @@ pmap_free_zero_pages(vm_page_t free)
while (free != NULL) {
m = free;
- free = m->right;
+ free = (void *)m->object;
+ m->object = NULL;
/* Preserve the page's PG_ZERO setting. */
vm_page_free_toq(m);
}
@@ -1516,7 +1518,7 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
m->flags |= PG_ZERO;
else
m->flags &= ~PG_ZERO;
- m->right = *free;
+ m->object = (void *)*free;
*free = m;
}
@@ -1526,31 +1528,12 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*/
-static void
+static __inline void
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- root = pmap->pm_root;
- if (root == NULL) {
- mpte->left = NULL;
- mpte->right = NULL;
- } else {
- root = vm_page_splay(mpte->pindex, root);
- if (mpte->pindex < root->pindex) {
- mpte->left = root->left;
- mpte->right = root;
- root->left = NULL;
- } else if (mpte->pindex == root->pindex)
- panic("pmap_insert_pt_page: pindex already inserted");
- else {
- mpte->right = root->right;
- mpte->left = root;
- root->right = NULL;
- }
- }
- pmap->pm_root = mpte;
+ vm_radix_insert(&pmap->pm_root, mpte);
}
/*
@@ -1558,19 +1541,12 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
* specified pmap's collection of idle page table pages. Returns NULL if there
* is no page table page corresponding to the specified virtual address.
*/
-static vm_page_t
+static __inline vm_page_t
pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
{
- vm_page_t mpte;
- vm_pindex_t pindex = pmap_pde_pindex(va);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
- mpte = vm_page_splay(pindex, mpte);
- if ((pmap->pm_root = mpte)->pindex != pindex)
- mpte = NULL;
- }
- return (mpte);
+ return (vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(va)));
}
/*
@@ -1578,25 +1554,12 @@ pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
* of idle page table pages. The specified page table page must be a member of
* the pmap's collection.
*/
-static void
+static __inline void
pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (mpte != pmap->pm_root) {
- root = vm_page_splay(mpte->pindex, pmap->pm_root);
- KASSERT(mpte == root,
- ("pmap_remove_pt_page: mpte %p is missing from pmap %p",
- mpte, pmap));
- }
- if (mpte->left == NULL)
- root = mpte->right;
- else {
- root = vm_page_splay(mpte->pindex, mpte->left);
- root->right = mpte->right;
- }
- pmap->pm_root = root;
+ vm_radix_remove(&pmap->pm_root, mpte->pindex);
}
/*
@@ -1693,7 +1656,7 @@ pmap_pinit0(pmap_t pmap)
PMAP_LOCK_INIT(pmap);
pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1734,7 +1697,7 @@ pmap_pinit(pmap_t pmap)
/* install self-referential address mapping entry(s) */
pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1976,7 +1939,7 @@ pmap_release(pmap_t pmap)
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
- KASSERT(pmap->pm_root == NULL,
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
@@ -2273,7 +2236,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
}
if (m_pc == NULL && free != NULL) {
m_pc = free;
- free = m_pc->right;
+ free = (void *)m_pc->object;
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&cnt.v_wire_count, 1);
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 0fc8867..6d76ec3 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -150,6 +150,8 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <vm/_vm_radix.h>
+
typedef u_int64_t pd_entry_t;
typedef u_int64_t pt_entry_t;
typedef u_int64_t pdp_entry_t;
@@ -250,7 +252,7 @@ struct pmap {
cpuset_t pm_active; /* active on cpus */
/* spare u_int here due to padding */
struct pmap_statistics pm_stats; /* pmap statistics */
- vm_page_t pm_root; /* spare page table pages */
+ struct vm_radix pm_root; /* spare page table pages */
};
typedef struct pmap *pmap_t;
diff --git a/sys/conf/files b/sys/conf/files
index bba5c9d..04e66a1 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3630,6 +3630,7 @@ vm/vm_page.c standard
vm/vm_pageout.c standard
vm/vm_pager.c standard
vm/vm_phys.c standard
+vm/vm_radix.c standard
vm/vm_reserv.c standard
vm/vm_unix.c standard
vm/vm_zeroidle.c standard
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 4e66dbd..6499986 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -133,6 +133,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
@@ -1573,7 +1574,8 @@ pmap_free_zero_pages(vm_page_t free)
while (free != NULL) {
m = free;
- free = m->right;
+ free = (void *)m->object;
+ m->object = NULL;
/* Preserve the page's PG_ZERO setting. */
vm_page_free_toq(m);
}
@@ -1592,7 +1594,7 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
m->flags |= PG_ZERO;
else
m->flags &= ~PG_ZERO;
- m->right = *free;
+ m->object = (void *)*free;
*free = m;
}
@@ -1602,31 +1604,12 @@ pmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO)
* for mapping a distinct range of virtual addresses. The pmap's collection is
* ordered by this virtual address range.
*/
-static void
+static __inline void
pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- root = pmap->pm_root;
- if (root == NULL) {
- mpte->left = NULL;
- mpte->right = NULL;
- } else {
- root = vm_page_splay(mpte->pindex, root);
- if (mpte->pindex < root->pindex) {
- mpte->left = root->left;
- mpte->right = root;
- root->left = NULL;
- } else if (mpte->pindex == root->pindex)
- panic("pmap_insert_pt_page: pindex already inserted");
- else {
- mpte->right = root->right;
- mpte->left = root;
- root->right = NULL;
- }
- }
- pmap->pm_root = mpte;
+ vm_radix_insert(&pmap->pm_root, mpte);
}
/*
@@ -1634,19 +1617,12 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte)
* specified pmap's collection of idle page table pages. Returns NULL if there
* is no page table page corresponding to the specified virtual address.
*/
-static vm_page_t
+static __inline vm_page_t
pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
{
- vm_page_t mpte;
- vm_pindex_t pindex = va >> PDRSHIFT;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) {
- mpte = vm_page_splay(pindex, mpte);
- if ((pmap->pm_root = mpte)->pindex != pindex)
- mpte = NULL;
- }
- return (mpte);
+ return (vm_radix_lookup(&pmap->pm_root, va >> PDRSHIFT));
}
/*
@@ -1654,21 +1630,12 @@ pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va)
* of idle page table pages. The specified page table page must be a member of
* the pmap's collection.
*/
-static void
+static __inline void
pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte)
{
- vm_page_t root;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (mpte != pmap->pm_root)
- vm_page_splay(mpte->pindex, pmap->pm_root);
- if (mpte->left == NULL)
- root = mpte->right;
- else {
- root = vm_page_splay(mpte->pindex, mpte->left);
- root->right = mpte->right;
- }
- pmap->pm_root = root;
+ vm_radix_remove(&pmap->pm_root, mpte->pindex);
}
/*
@@ -1755,7 +1722,7 @@ pmap_pinit0(pmap_t pmap)
#ifdef PAE
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
#endif
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
@@ -1794,9 +1761,9 @@ pmap_pinit(pmap_t pmap)
KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
("pmap_pinit: pdpt above 4g"));
#endif
- pmap->pm_root = NULL;
+ pmap->pm_root.rt_root = 0;
}
- KASSERT(pmap->pm_root == NULL,
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_pinit: pmap has reserved page table page(s)"));
/*
@@ -2060,7 +2027,7 @@ pmap_release(pmap_t pmap)
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
- KASSERT(pmap->pm_root == NULL,
+ KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap has reserved page table page(s)"));
pmap_lazyfix(pmap);
@@ -2343,7 +2310,7 @@ out:
}
if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
m_pc = free;
- free = m_pc->right;
+ free = (void *)m_pc->object;
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&cnt.v_wire_count, 1);
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index 1de47f2..8c20e1b 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -159,6 +159,8 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <vm/_vm_radix.h>
+
#ifdef PAE
typedef uint64_t pdpt_entry_t;
@@ -441,7 +443,7 @@ struct pmap {
pdpt_entry_t *pm_pdpt; /* KVA of page director pointer
table */
#endif
- vm_page_t pm_root; /* spare page table pages */
+ struct vm_radix pm_root; /* spare page table pages */
};
typedef struct pmap *pmap_t;
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index e8115b0..0f7a80f 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -1335,7 +1335,8 @@ pmap_free_zero_pages(vm_page_t free)
while (free != NULL) {
m = free;
- free = m->right;
+ free = (void *)m->object;
+ m->object = NULL;
vm_page_free_zero(m);
}
}
@@ -1393,7 +1394,7 @@ _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free)
* Put page on a list so that it is released after
* *ALL* TLB shootdown is done
*/
- m->right = *free;
+ m->object = (void *)*free;
*free = m;
}
@@ -2090,7 +2091,7 @@ out:
}
if (m_pc == NULL && pv_vafree != 0 && free != NULL) {
m_pc = free;
- free = m_pc->right;
+ free = (void *)m_pc->object;
/* Recycle a freed page table page. */
m_pc->wire_count = 1;
atomic_add_int(&cnt.v_wire_count, 1);
diff --git a/sys/vm/_vm_radix.h b/sys/vm/_vm_radix.h
new file mode 100644
index 0000000..07b035e
--- /dev/null
+++ b/sys/vm/_vm_radix.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef __VM_RADIX_H_
+#define __VM_RADIX_H_
+
+/*
+ * Radix tree root.
+ */
+struct vm_radix {
+ uintptr_t rt_root;
+};
+
+#ifdef _KERNEL
+
+static __inline boolean_t
+vm_radix_is_empty(struct vm_radix *rtree)
+{
+
+ return (rtree->rt_root == 0);
+}
+
+#endif /* _KERNEL */
+#endif /* !__VM_RADIX_H_ */
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 684fdf4..3d9ec61 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -94,6 +94,7 @@ __FBSDID("$FreeBSD$");
#include <vm/swap_pager.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
@@ -167,8 +168,8 @@ vm_object_zdtor(void *mem, int size, void *arg)
object = (vm_object_t)mem;
KASSERT(TAILQ_EMPTY(&object->memq),
("object %p has resident pages in its memq", object));
- KASSERT(object->root == NULL,
- ("object %p has resident pages in its tree", object));
+ KASSERT(vm_radix_is_empty(&object->rtree),
+ ("object %p has resident pages in its trie", object));
#if VM_NRESERVLEVEL > 0
KASSERT(LIST_EMPTY(&object->rvq),
("object %p has reservations",
@@ -199,11 +200,11 @@ vm_object_zinit(void *mem, int size, int flags)
rw_init_flags(&object->lock, "vm object", RW_DUPOK);
/* These are true for any object that has been freed */
- object->root = NULL;
+ object->rtree.rt_root = 0;
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
- object->cache = NULL;
+ object->cache.rt_root = 0;
return (0);
}
@@ -295,6 +296,8 @@ vm_object_init(void)
NULL,
#endif
vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
+
+ vm_radix_init();
}
void
@@ -742,7 +745,7 @@ vm_object_terminate(vm_object_t object)
* modified by the preceding loop.
*/
if (object->resident_page_count != 0) {
- object->root = NULL;
+ vm_radix_reclaim_allnodes(&object->rtree);
TAILQ_INIT(&object->memq);
object->resident_page_count = 0;
if (object->type == OBJT_VNODE)
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index dac90fc..2f2c961 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -72,6 +72,8 @@
#include <sys/_mutex.h>
#include <sys/_rwlock.h>
+#include <vm/_vm_radix.h>
+
/*
* Types defined:
*
@@ -79,10 +81,10 @@
*
* The root of cached pages pool is protected by both the per-object lock
* and the free pages queue mutex.
- * On insert in the cache splay tree, the per-object lock is expected
+ * On insert in the cache radix trie, the per-object lock is expected
* to be already held and the free pages queue mutex will be
* acquired during the operation too.
- * On remove and lookup from the cache splay tree, only the free
+ * On remove and lookup from the cache radix trie, only the free
* pages queue mutex is expected to be locked.
* These rules allow for reliably checking for the presence of cached
* pages with only the per-object lock held, thereby reducing contention
@@ -101,7 +103,7 @@ struct vm_object {
LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
TAILQ_HEAD(, vm_page) memq; /* list of resident pages */
- vm_page_t root; /* root of the resident page splay tree */
+ struct vm_radix rtree; /* root of the resident page radix trie*/
vm_pindex_t size; /* Object size */
int generation; /* generation ID */
int ref_count; /* How many refs?? */
@@ -116,7 +118,7 @@ struct vm_object {
vm_ooffset_t backing_object_offset;/* Offset in backing object */
TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
LIST_HEAD(, vm_reserv) rvq; /* list of reservations */
- vm_page_t cache; /* (o + f) root of the cache page splay tree */
+ struct vm_radix cache; /* (o + f) root of the cache page radix trie */
void *handle;
union {
/*
@@ -246,7 +248,7 @@ static __inline boolean_t
vm_object_cache_is_empty(vm_object_t object)
{
- return (object->cache == NULL);
+ return (vm_radix_is_empty(&object->cache));
}
vm_object_t vm_object_allocate (objtype_t, vm_pindex_t);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 08f58e1..3e4cce9 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -110,6 +110,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
@@ -794,63 +795,6 @@ vm_page_dirty_KBI(vm_page_t m)
}
/*
- * vm_page_splay:
- *
- * Implements Sleator and Tarjan's top-down splay algorithm. Returns
- * the vm_page containing the given pindex. If, however, that
- * pindex is not found in the vm_object, returns a vm_page that is
- * adjacent to the pindex, coming before or after it.
- */
-vm_page_t
-vm_page_splay(vm_pindex_t pindex, vm_page_t root)
-{
- struct vm_page dummy;
- vm_page_t lefttreemax, righttreemin, y;
-
- if (root == NULL)
- return (root);
- lefttreemax = righttreemin = &dummy;
- for (;; root = y) {
- if (pindex < root->pindex) {
- if ((y = root->left) == NULL)
- break;
- if (pindex < y->pindex) {
- /* Rotate right. */
- root->left = y->right;
- y->right = root;
- root = y;
- if ((y = root->left) == NULL)
- break;
- }
- /* Link into the new root's right tree. */
- righttreemin->left = root;
- righttreemin = root;
- } else if (pindex > root->pindex) {
- if ((y = root->right) == NULL)
- break;
- if (pindex > y->pindex) {
- /* Rotate left. */
- root->right = y->left;
- y->left = root;
- root = y;
- if ((y = root->right) == NULL)
- break;
- }
- /* Link into the new root's left tree. */
- lefttreemax->right = root;
- lefttreemax = root;
- } else
- break;
- }
- /* Assemble the new root. */
- lefttreemax->right = root->left;
- righttreemin->left = root->right;
- root->left = dummy.right;
- root->right = dummy.left;
- return (root);
-}
-
-/*
* vm_page_insert: [ internal use only ]
*
* Inserts the given mem entry into the object and object list.
@@ -865,7 +809,7 @@ vm_page_splay(vm_pindex_t pindex, vm_page_t root)
void
vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
{
- vm_page_t root;
+ vm_page_t neighbor;
VM_OBJECT_ASSERT_WLOCKED(object);
if (m->object != NULL)
@@ -880,28 +824,19 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
/*
* Now link into the object's ordered list of backed pages.
*/
- root = object->root;
- if (root == NULL) {
- m->left = NULL;
- m->right = NULL;
+ if (object->resident_page_count == 0) {
TAILQ_INSERT_TAIL(&object->memq, m, listq);
} else {
- root = vm_page_splay(pindex, root);
- if (pindex < root->pindex) {
- m->left = root->left;
- m->right = root;
- root->left = NULL;
- TAILQ_INSERT_BEFORE(root, m, listq);
- } else if (pindex == root->pindex)
- panic("vm_page_insert: offset already allocated");
- else {
- m->right = root->right;
- m->left = root;
- root->right = NULL;
- TAILQ_INSERT_AFTER(&object->memq, root, m, listq);
- }
+ neighbor = vm_radix_lookup_le(&object->rtree, pindex);
+ if (neighbor != NULL) {
+ KASSERT(pindex > neighbor->pindex,
+ ("vm_page_insert: offset %ju less than %ju",
+ (uintmax_t)pindex, (uintmax_t)neighbor->pindex));
+ TAILQ_INSERT_AFTER(&object->memq, neighbor, m, listq);
+ } else
+ TAILQ_INSERT_HEAD(&object->memq, m, listq);
}
- object->root = m;
+ vm_radix_insert(&object->rtree, m);
/*
* Show that the object has one more resident page.
@@ -937,7 +872,6 @@ void
vm_page_remove(vm_page_t m)
{
vm_object_t object;
- vm_page_t next, prev, root;
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_lock_assert(m, MA_OWNED);
@@ -952,42 +886,7 @@ vm_page_remove(vm_page_t m)
/*
* Now remove from the object's list of backed pages.
*/
- if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) {
- /*
- * Since the page's successor in the list is also its parent
- * in the tree, its right subtree must be empty.
- */
- next->left = m->left;
- KASSERT(m->right == NULL,
- ("vm_page_remove: page %p has right child", m));
- } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
- prev->right == m) {
- /*
- * Since the page's predecessor in the list is also its parent
- * in the tree, its left subtree must be empty.
- */
- KASSERT(m->left == NULL,
- ("vm_page_remove: page %p has left child", m));
- prev->right = m->right;
- } else {
- if (m != object->root)
- vm_page_splay(m->pindex, object->root);
- if (m->left == NULL)
- root = m->right;
- else if (m->right == NULL)
- root = m->left;
- else {
- /*
- * Move the page's successor to the root, because
- * pages are usually removed in ascending order.
- */
- if (m->right != next)
- vm_page_splay(m->pindex, m->right);
- next->left = m->left;
- root = next;
- }
- object->root = root;
- }
+ vm_radix_remove(&object->rtree, m->pindex);
TAILQ_REMOVE(&object->memq, m, listq);
/*
@@ -1015,15 +914,9 @@ vm_page_remove(vm_page_t m)
vm_page_t
vm_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
- vm_page_t m;
VM_OBJECT_ASSERT_WLOCKED(object);
- if ((m = object->root) != NULL && m->pindex != pindex) {
- m = vm_page_splay(pindex, m);
- if ((object->root = m)->pindex != pindex)
- m = NULL;
- }
- return (m);
+ return (vm_radix_lookup(&object->rtree, pindex));
}
/*
@@ -1040,13 +933,8 @@ vm_page_find_least(vm_object_t object, vm_pindex_t pindex)
vm_page_t m;
VM_OBJECT_ASSERT_WLOCKED(object);
- if ((m = TAILQ_FIRST(&object->memq)) != NULL) {
- if (m->pindex < pindex) {
- m = vm_page_splay(pindex, object->root);
- if ((object->root = m)->pindex < pindex)
- m = TAILQ_NEXT(m, listq);
- }
- }
+ if ((m = TAILQ_FIRST(&object->memq)) != NULL && m->pindex < pindex)
+ m = vm_radix_lookup_ge(&object->rtree, pindex);
return (m);
}
@@ -1126,45 +1014,18 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
void
vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
{
- vm_page_t m, m_next;
+ vm_page_t m;
boolean_t empty;
mtx_lock(&vm_page_queue_free_mtx);
- if (__predict_false(vm_object_cache_is_empty(object))) {
+ if (__predict_false(vm_radix_is_empty(&object->cache))) {
mtx_unlock(&vm_page_queue_free_mtx);
return;
}
- m = object->cache = vm_page_splay(start, object->cache);
- if (m->pindex < start) {
- if (m->right == NULL)
- m = NULL;
- else {
- m_next = vm_page_splay(start, m->right);
- m_next->left = m;
- m->right = NULL;
- m = object->cache = m_next;
- }
- }
-
- /*
- * At this point, "m" is either (1) a reference to the page
- * with the least pindex that is greater than or equal to
- * "start" or (2) NULL.
- */
- for (; m != NULL && (m->pindex < end || end == 0); m = m_next) {
- /*
- * Find "m"'s successor and remove "m" from the
- * object's cache.
- */
- if (m->right == NULL) {
- object->cache = m->left;
- m_next = NULL;
- } else {
- m_next = vm_page_splay(start, m->right);
- m_next->left = m->left;
- object->cache = m_next;
- }
- /* Convert "m" to a free page. */
+ while ((m = vm_radix_lookup_ge(&object->cache, start)) != NULL) {
+ if (end != 0 && m->pindex >= end)
+ break;
+ vm_radix_remove(&object->cache, m->pindex);
m->object = NULL;
m->valid = 0;
/* Clear PG_CACHED and set PG_FREE. */
@@ -1174,7 +1035,7 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
cnt.v_cache_count--;
cnt.v_free_count++;
}
- empty = vm_object_cache_is_empty(object);
+ empty = vm_radix_is_empty(&object->cache);
mtx_unlock(&vm_page_queue_free_mtx);
if (object->type == OBJT_VNODE && empty)
vdrop(object->handle);
@@ -1189,15 +1050,9 @@ vm_page_cache_free(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
static inline vm_page_t
vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
{
- vm_page_t m;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- if ((m = object->cache) != NULL && m->pindex != pindex) {
- m = vm_page_splay(pindex, m);
- if ((object->cache = m)->pindex != pindex)
- m = NULL;
- }
- return (m);
+ return (vm_radix_lookup(&object->cache, pindex));
}
/*
@@ -1209,28 +1064,11 @@ vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
static void
vm_page_cache_remove(vm_page_t m)
{
- vm_object_t object;
- vm_page_t root;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
KASSERT((m->flags & PG_CACHED) != 0,
("vm_page_cache_remove: page %p is not cached", m));
- object = m->object;
- if (m != object->cache) {
- root = vm_page_splay(m->pindex, object->cache);
- KASSERT(root == m,
- ("vm_page_cache_remove: page %p is not cached in object %p",
- m, object));
- }
- if (m->left == NULL)
- root = m->right;
- else if (m->right == NULL)
- root = m->left;
- else {
- root = vm_page_splay(m->pindex, m->left);
- root->right = m->right;
- }
- object->cache = root;
+ vm_radix_remove(&m->object->cache, m->pindex);
m->object = NULL;
cnt.v_cache_count--;
}
@@ -1250,7 +1088,7 @@ void
vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
vm_object_t new_object)
{
- vm_page_t m, m_next;
+ vm_page_t m;
/*
* Insertion into an object's collection of cached pages
@@ -1258,53 +1096,24 @@ vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
* not.
*/
VM_OBJECT_ASSERT_WLOCKED(new_object);
- KASSERT(vm_object_cache_is_empty(new_object),
+ KASSERT(vm_radix_is_empty(&new_object->cache),
("vm_page_cache_transfer: object %p has cached pages",
new_object));
mtx_lock(&vm_page_queue_free_mtx);
- if ((m = orig_object->cache) != NULL) {
+ while ((m = vm_radix_lookup_ge(&orig_object->cache,
+ offidxstart)) != NULL) {
/*
* Transfer all of the pages with offset greater than or
* equal to 'offidxstart' from the original object's
* cache to the new object's cache.
*/
- m = vm_page_splay(offidxstart, m);
- if (m->pindex < offidxstart) {
- orig_object->cache = m;
- new_object->cache = m->right;
- m->right = NULL;
- } else {
- orig_object->cache = m->left;
- new_object->cache = m;
- m->left = NULL;
- }
- while ((m = new_object->cache) != NULL) {
- if ((m->pindex - offidxstart) >= new_object->size) {
- /*
- * Return all of the cached pages with
- * offset greater than or equal to the
- * new object's size to the original
- * object's cache.
- */
- new_object->cache = m->left;
- m->left = orig_object->cache;
- orig_object->cache = m;
- break;
- }
- m_next = vm_page_splay(m->pindex, m->right);
- /* Update the page's object and offset. */
- m->object = new_object;
- m->pindex -= offidxstart;
- if (m_next == NULL)
- break;
- m->right = NULL;
- m_next->left = m;
- new_object->cache = m_next;
- }
- KASSERT(vm_object_cache_is_empty(new_object) ||
- new_object->type == OBJT_SWAP,
- ("vm_page_cache_transfer: object %p's type is incompatible"
- " with cached pages", new_object));
+ if ((m->pindex - offidxstart) >= new_object->size)
+ break;
+ vm_radix_remove(&orig_object->cache, m->pindex);
+ /* Update the page's object and offset. */
+ m->object = new_object;
+ m->pindex -= offidxstart;
+ vm_radix_insert(&new_object->cache, m);
}
mtx_unlock(&vm_page_queue_free_mtx);
}
@@ -2324,7 +2133,7 @@ void
vm_page_cache(vm_page_t m)
{
vm_object_t object;
- vm_page_t next, prev, root;
+ boolean_t cache_was_empty;
vm_page_lock_assert(m, MA_OWNED);
object = m->object;
@@ -2359,42 +2168,7 @@ vm_page_cache(vm_page_t m)
* Remove the page from the object's collection of resident
* pages.
*/
- if ((next = TAILQ_NEXT(m, listq)) != NULL && next->left == m) {
- /*
- * Since the page's successor in the list is also its parent
- * in the tree, its right subtree must be empty.
- */
- next->left = m->left;
- KASSERT(m->right == NULL,
- ("vm_page_cache: page %p has right child", m));
- } else if ((prev = TAILQ_PREV(m, pglist, listq)) != NULL &&
- prev->right == m) {
- /*
- * Since the page's predecessor in the list is also its parent
- * in the tree, its left subtree must be empty.
- */
- KASSERT(m->left == NULL,
- ("vm_page_cache: page %p has left child", m));
- prev->right = m->right;
- } else {
- if (m != object->root)
- vm_page_splay(m->pindex, object->root);
- if (m->left == NULL)
- root = m->right;
- else if (m->right == NULL)
- root = m->left;
- else {
- /*
- * Move the page's successor to the root, because
- * pages are usually removed in ascending order.
- */
- if (m->right != next)
- vm_page_splay(m->pindex, m->right);
- next->left = m->left;
- root = next;
- }
- object->root = root;
- }
+ vm_radix_remove(&object->rtree, m->pindex);
TAILQ_REMOVE(&object->memq, m, listq);
object->resident_page_count--;
@@ -2412,25 +2186,8 @@ vm_page_cache(vm_page_t m)
mtx_lock(&vm_page_queue_free_mtx);
m->flags |= PG_CACHED;
cnt.v_cache_count++;
- root = object->cache;
- if (root == NULL) {
- m->left = NULL;
- m->right = NULL;
- } else {
- root = vm_page_splay(m->pindex, root);
- if (m->pindex < root->pindex) {
- m->left = root->left;
- m->right = root;
- root->left = NULL;
- } else if (__predict_false(m->pindex == root->pindex))
- panic("vm_page_cache: offset already cached");
- else {
- m->right = root->right;
- m->left = root;
- root->right = NULL;
- }
- }
- object->cache = m;
+ cache_was_empty = vm_radix_is_empty(&object->cache);
+ vm_radix_insert(&object->cache, m);
#if VM_NRESERVLEVEL > 0
if (!vm_reserv_free_page(m)) {
#else
@@ -2448,9 +2205,9 @@ vm_page_cache(vm_page_t m)
* the object's only resident page.
*/
if (object->type == OBJT_VNODE) {
- if (root == NULL && object->resident_page_count != 0)
+ if (cache_was_empty && object->resident_page_count != 0)
vhold(object->handle);
- else if (root != NULL && object->resident_page_count == 0)
+ else if (!cache_was_empty && object->resident_page_count == 0)
vdrop(object->handle);
}
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 7a3f138..d2393dc 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -128,8 +128,6 @@ typedef uint64_t vm_page_bits_t;
struct vm_page {
TAILQ_ENTRY(vm_page) pageq; /* page queue or free list (Q) */
TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */
- struct vm_page *left; /* splay tree link (O) */
- struct vm_page *right; /* splay tree link (O) */
vm_object_t object; /* which object am I in (O,P)*/
vm_pindex_t pindex; /* offset into object (O,P) */
@@ -404,7 +402,6 @@ void vm_page_requeue(vm_page_t m);
void vm_page_requeue_locked(vm_page_t m);
void vm_page_set_valid_range(vm_page_t m, int base, int size);
void vm_page_sleep(vm_page_t m, const char *msg);
-vm_page_t vm_page_splay(vm_pindex_t, vm_page_t);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unwire (vm_page_t, int);
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
new file mode 100644
index 0000000..3b71fec
--- /dev/null
+++ b/sys/vm/vm_radix.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Path-compressed radix trie implementation.
+ * The following code is not generalized into a general purpose library
+ * because there are way too many parameters embedded that should really
+ * be decided by the library consumers. At the same time, consumers
+ * of this code must achieve highest possible performance.
+ *
+ * The implementation takes into account the following rationale:
+ * - Size of the nodes should be as small as possible but still big enough
+ * to avoid a large maximum depth for the trie. This is a balance
+ * between the necessity to not wire too much physical memory for the nodes
+ * and the necessity to avoid too much cache pollution during the trie
+ * operations.
+ * - There is not a huge bias toward the number of lookup operations over
+ * the number of insert and remove operations. This basically implies
+ * that optimizations supposedly helping one operation but hurting the
+ * other might be carefully evaluated.
+ * - On average not many nodes are expected to be fully populated, hence
+ * level compression may just complicate things.
+ */
+
+#include <sys/cdefs.h>
+
+#include "opt_ddb.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vmmeter.h>
+
+#include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+#include <vm/vm_radix.h>
+
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+/*
+ * These widths should allow the pointers to a node's children to fit within
+ * a single cache line. The extra levels from a narrow width should not be
+ * a problem thanks to path compression.
+ */
+#ifdef __LP64__
+#define VM_RADIX_WIDTH 4
+#else
+#define VM_RADIX_WIDTH 3
+#endif
+
+#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
+#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
+#define VM_RADIX_LIMIT \
+ (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1)
+
+/* Flag bits stored in node pointers. */
+#define VM_RADIX_ISLEAF 0x1
+#define VM_RADIX_FLAGS 0x1
+#define VM_RADIX_PAD VM_RADIX_FLAGS
+
+/* Returns one unit associated with specified level. */
+#define VM_RADIX_UNITLEVEL(lev) \
+ ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH))
+
+struct vm_radix_node {
+ void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
+ vm_pindex_t rn_owner; /* Owner of record. */
+ uint16_t rn_count; /* Valid children. */
+ uint16_t rn_clev; /* Current level. */
+};
+
+static uma_zone_t vm_radix_node_zone;
+
+/*
+ * Allocate a radix node. Pre-allocation should ensure that the request
+ * will always be satisfied.
+ */
+static __inline struct vm_radix_node *
+vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel)
+{
+ struct vm_radix_node *rnode;
+
+ rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT);
+
+ /*
+ * The required number of nodes should already be pre-allocated
+ * by vm_radix_prealloc(). However, UMA can hold a few nodes
+ * in per-CPU buckets, which will not be accessible by the
+ * current CPU. Thus, the allocation could return NULL when
+ * the pre-allocated pool is close to exhaustion. Anyway,
+ * in practice this should never occur because a new node
+ * is not always required for insert. Thus, the pre-allocated
+ * pool should have some extra pages that prevent this from
+ * becoming a problem.
+ */
+ if (rnode == NULL)
+ panic("%s: uma_zalloc() returned NULL for a new node",
+ __func__);
+ rnode->rn_owner = owner;
+ rnode->rn_count = count;
+ rnode->rn_clev = clevel;
+ return (rnode);
+}
+
+/*
+ * Free radix node.
+ */
+static __inline void
+vm_radix_node_put(struct vm_radix_node *rnode)
+{
+
+ uma_zfree(vm_radix_node_zone, rnode);
+}
+
+/*
+ * Return the position in the array for a given level.
+ */
+static __inline int
+vm_radix_slot(vm_pindex_t index, uint16_t level)
+{
+
+ return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) &
+ VM_RADIX_MASK);
+}
+
+/* Trims the key after the specified level. */
+static __inline vm_pindex_t
+vm_radix_trimkey(vm_pindex_t index, uint16_t level)
+{
+ vm_pindex_t ret;
+
+ ret = index;
+ if (level < VM_RADIX_LIMIT) {
+ ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
+ ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH;
+ }
+ return (ret);
+}
+
+/*
+ * Get the root node for a radix tree.
+ */
+static __inline struct vm_radix_node *
+vm_radix_getroot(struct vm_radix *rtree)
+{
+
+ return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS));
+}
+
+/*
+ * Set the root node for a radix tree.
+ */
+static __inline void
+vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode)
+{
+
+ rtree->rt_root = (uintptr_t)rnode;
+}
+
+/*
+ * Returns the associated page extracted from rnode if available,
+ * and NULL otherwise.
+ */
+static __inline vm_page_t
+vm_radix_node_page(struct vm_radix_node *rnode)
+{
+
+ return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ?
+ (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL);
+}
+
+/*
+ * Adds the page as a child of the provided node.
+ */
+static __inline void
+vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
+ vm_page_t page)
+{
+ int slot;
+
+ slot = vm_radix_slot(index, clev);
+ rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF);
+}
+
+/*
+ * Returns the slot where two keys differ.
+ * It cannot accept 2 equal keys.
+ */
+static __inline uint16_t
+vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
+{
+ uint16_t clev;
+
+ KASSERT(index1 != index2, ("%s: passing the same key value %jx",
+ __func__, (uintmax_t)index1));
+
+ index1 ^= index2;
+ for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++)
+ if (vm_radix_slot(index1, clev))
+ return (clev);
+ panic("%s: cannot reach this point", __func__);
+ return (0);
+}
+
+/*
+ * Returns TRUE if it can be determined that key does not belong to the
+ * specified rnode. Otherwise, returns FALSE.
+ */
+static __inline boolean_t
+vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
+{
+
+ if (rnode->rn_clev > 0) {
+ idx = vm_radix_trimkey(idx, rnode->rn_clev - 1);
+ idx -= rnode->rn_owner;
+ if (idx != 0)
+ return (TRUE);
+ }
+ return (FALSE);
+}
+
+/*
+ * Adjusts the idx key to the first upper level available, based on a valid
+ * initial level and map of available levels.
+ * Returns a value bigger than 0 to signal that there are not valid levels
+ * available.
+ */
+static __inline int
+vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
+{
+ vm_pindex_t wrapidx;
+
+ for (; levels[ilev] == FALSE ||
+ vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--)
+ if (ilev == 0)
+ break;
+ KASSERT(ilev > 0 || levels[0],
+ ("%s: levels back-scanning problem", __func__));
+ if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1))
+ return (1);
+ wrapidx = *idx;
+ *idx = vm_radix_trimkey(*idx, ilev);
+ *idx += VM_RADIX_UNITLEVEL(ilev);
+ return (*idx < wrapidx);
+}
+
+/*
+ * Adjusts the idx key to the first lower level available, based on a valid
+ * initial level and map of available levels.
+ * Returns a value bigger than 0 to signal that there are not valid levels
+ * available.
+ */
+static __inline int
+vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev)
+{
+ vm_pindex_t wrapidx;
+
+ for (; levels[ilev] == FALSE ||
+ vm_radix_slot(*idx, ilev) == 0; ilev--)
+ if (ilev == 0)
+ break;
+ KASSERT(ilev > 0 || levels[0],
+ ("%s: levels back-scanning problem", __func__));
+ if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0)
+ return (1);
+ wrapidx = *idx;
+ *idx = vm_radix_trimkey(*idx, ilev);
+ *idx |= VM_RADIX_UNITLEVEL(ilev) - 1;
+ *idx -= VM_RADIX_UNITLEVEL(ilev);
+ return (*idx > wrapidx);
+}
+
+/*
+ * Internal helper for vm_radix_reclaim_allnodes().
+ * This function is recursive.
+ */
+static void
+vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
+{
+ int slot;
+
+ for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
+ if (rnode->rn_child[slot] == NULL)
+ continue;
+ if (vm_radix_node_page(rnode->rn_child[slot]) == NULL)
+ vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]);
+ rnode->rn_child[slot] = NULL;
+ rnode->rn_count--;
+ }
+ vm_radix_node_put(rnode);
+}
+
+#ifdef INVARIANTS
+/*
+ * Radix node zone destructor.
+ */
+static void
+vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused)
+{
+ struct vm_radix_node *rnode;
+ int slot;
+
+ rnode = mem;
+ KASSERT(rnode->rn_count == 0,
+ ("vm_radix_node_put: rnode %p has %d children", rnode,
+ rnode->rn_count));
+ for (slot = 0; slot < VM_RADIX_COUNT; slot++)
+ KASSERT(rnode->rn_child[slot] == NULL,
+ ("vm_radix_node_put: rnode %p has a child", rnode));
+}
+#endif
+
+/*
+ * Radix node zone initializer.
+ */
+static int
+vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused)
+{
+ struct vm_radix_node *rnode;
+
+ rnode = mem;
+ memset(rnode->rn_child, 0, sizeof(rnode->rn_child));
+ return (0);
+}
+
+/*
+ * Pre-allocate intermediate nodes from the UMA slab zone.
+ */
+static void
+vm_radix_prealloc(void *arg __unused)
+{
+
+ if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count))
+ panic("%s: unable to create new zone", __func__);
+ uma_prealloc(vm_radix_node_zone, cnt.v_page_count);
+}
+SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc,
+ NULL);
+
+/*
+ * Initialize the UMA slab zone.
+ * Until vm_radix_prealloc() is called, the zone will be served by the
+ * UMA boot-time pre-allocated pool of pages.
+ */
+void
+vm_radix_init(void)
+{
+
+ vm_radix_node_zone = uma_zcreate("RADIX NODE",
+ sizeof(struct vm_radix_node), NULL,
+#ifdef INVARIANTS
+ vm_radix_node_zone_dtor,
+#else
+ NULL,
+#endif
+ vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM |
+ UMA_ZONE_NOFREE);
+}
+
+/*
+ * Inserts the key-value pair into the trie.
+ * Panics if the key already exists.
+ */
+void
+vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
+{
+ vm_pindex_t index, newind;
+ struct vm_radix_node *rnode, *tmp, *tmp2;
+ vm_page_t m;
+ int slot;
+ uint16_t clev;
+
+ index = page->pindex;
+
+ /*
+ * The owner of record for root is not really important because it
+ * will never be used.
+ */
+ rnode = vm_radix_getroot(rtree);
+ if (rnode == NULL) {
+ rnode = vm_radix_node_get(0, 1, 0);
+ vm_radix_setroot(rtree, rnode);
+ vm_radix_addpage(rnode, index, 0, page);
+ return;
+ }
+ while (rnode != NULL) {
+ if (vm_radix_keybarr(rnode, index))
+ break;
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL) {
+ if (m->pindex == index)
+ panic("%s: key %jx is already present",
+ __func__, (uintmax_t)index);
+ clev = vm_radix_keydiff(m->pindex, index);
+ tmp = vm_radix_node_get(vm_radix_trimkey(index,
+ clev - 1), 2, clev);
+ rnode->rn_child[slot] = tmp;
+ vm_radix_addpage(tmp, index, clev, page);
+ vm_radix_addpage(tmp, m->pindex, clev, m);
+ return;
+ }
+ if (rnode->rn_child[slot] == NULL) {
+ rnode->rn_count++;
+ vm_radix_addpage(rnode, index, rnode->rn_clev, page);
+ return;
+ }
+ rnode = rnode->rn_child[slot];
+ }
+ if (rnode == NULL)
+ panic("%s: path traversal ended unexpectedly", __func__);
+
+ /*
+ * Scan the trie from the top and find the parent to insert
+ * the new object.
+ */
+ newind = rnode->rn_owner;
+ clev = vm_radix_keydiff(newind, index);
+ slot = VM_RADIX_COUNT;
+ for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) {
+ KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan",
+ __func__));
+ KASSERT(clev >= rnode->rn_clev,
+ ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d",
+ __func__, clev, rnode->rn_clev));
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ tmp = rnode->rn_child[slot];
+ KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL,
+ ("%s: unexpected lookup interruption", __func__));
+ if (tmp->rn_clev > clev)
+ break;
+ }
+ KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT,
+ ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d",
+ __func__, (void *)rnode, (void *)tmp, slot));
+
+ /*
+ * A new node is needed because the right insertion level is reached.
+ * Setup the new intermediate node and add the 2 children: the
+ * new object and the older edge.
+ */
+ tmp2 = vm_radix_node_get(vm_radix_trimkey(index, clev - 1), 2,
+ clev);
+ rnode->rn_child[slot] = tmp2;
+ vm_radix_addpage(tmp2, index, clev, page);
+ slot = vm_radix_slot(newind, clev);
+ tmp2->rn_child[slot] = tmp;
+}
+
+/*
+ * Returns the value stored at the index. If the index is not present,
+ * NULL is returned.
+ */
+vm_page_t
+vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index)
+{
+ struct vm_radix_node *rnode;
+ vm_page_t m;
+ int slot;
+
+ rnode = vm_radix_getroot(rtree);
+ while (rnode != NULL) {
+ if (vm_radix_keybarr(rnode, index))
+ return (NULL);
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ rnode = rnode->rn_child[slot];
+ m = vm_radix_node_page(rnode);
+ if (m != NULL) {
+ if (m->pindex == index)
+ return (m);
+ else
+ return (NULL);
+ }
+ }
+ return (NULL);
+}
+
+/*
+ * Look up the nearest entry at a position bigger than or equal to index.
+ */
+vm_page_t
+vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
+{
+ vm_pindex_t inc;
+ vm_page_t m;
+ struct vm_radix_node *rnode;
+ int slot;
+ uint16_t difflev;
+ boolean_t maplevels[VM_RADIX_LIMIT + 1];
+#ifdef INVARIANTS
+ int loops = 0;
+#endif
+
+restart:
+ KASSERT(++loops < 1000, ("%s: too many loops", __func__));
+ for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
+ maplevels[difflev] = FALSE;
+ rnode = vm_radix_getroot(rtree);
+ while (rnode != NULL) {
+ maplevels[rnode->rn_clev] = TRUE;
+
+ /*
+ * If the keys differ before the current bisection node
+ * the search key might rollback to the earliest
+ * available bisection node, or to the smaller value
+ * in the current domain (if the owner is bigger than the
+ * search key).
+ * The maplevels array records any node has been seen
+ * at a given level. This aids the search for a valid
+ * bisection node.
+ */
+ if (vm_radix_keybarr(rnode, index)) {
+ difflev = vm_radix_keydiff(index, rnode->rn_owner);
+ if (index > rnode->rn_owner) {
+ if (vm_radix_addlev(&index, maplevels,
+ difflev) > 0)
+ break;
+ } else
+ index = vm_radix_trimkey(rnode->rn_owner,
+ difflev);
+ goto restart;
+ }
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL && m->pindex >= index)
+ return (m);
+ if (rnode->rn_child[slot] != NULL && m == NULL) {
+ rnode = rnode->rn_child[slot];
+ continue;
+ }
+
+ /*
+ * Look for an available edge or page within the current
+ * bisection node.
+ */
+ if (slot < (VM_RADIX_COUNT - 1)) {
+ inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
+ index = vm_radix_trimkey(index, rnode->rn_clev);
+ index += inc;
+ slot++;
+ for (;; index += inc, slot++) {
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL && m->pindex >= index)
+ return (m);
+ if ((rnode->rn_child[slot] != NULL &&
+ m == NULL) || slot == (VM_RADIX_COUNT - 1))
+ break;
+ }
+ }
+
+ /*
+ * If a valid page or edge bigger than the search slot is
+ * found in the traversal, skip to the next higher-level key.
+ */
+ if (slot == (VM_RADIX_COUNT - 1) &&
+ (rnode->rn_child[slot] == NULL || m != NULL)) {
+ if (rnode->rn_clev == 0 || vm_radix_addlev(&index,
+ maplevels, rnode->rn_clev - 1) > 0)
+ break;
+ goto restart;
+ }
+ rnode = rnode->rn_child[slot];
+ }
+ return (NULL);
+}
+
+/*
+ * Look up the nearest entry at a position less than or equal to index.
+ */
+vm_page_t
+vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
+{
+ vm_pindex_t inc;
+ vm_page_t m;
+ struct vm_radix_node *rnode;
+ int slot;
+ uint16_t difflev;
+ boolean_t maplevels[VM_RADIX_LIMIT + 1];
+#ifdef INVARIANTS
+ int loops = 0;
+#endif
+
+restart:
+ KASSERT(++loops < 1000, ("%s: too many loops", __func__));
+ for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++)
+ maplevels[difflev] = FALSE;
+ rnode = vm_radix_getroot(rtree);
+ while (rnode != NULL) {
+ maplevels[rnode->rn_clev] = TRUE;
+
+ /*
+ * If the keys differ before the current bisection node
+ * the search key might rollback to the earliest
+ * available bisection node, or to the higher value
+ * in the current domain (if the owner is smaller than the
+ * search key).
+ * The maplevels array records any node has been seen
+ * at a given level. This aids the search for a valid
+ * bisection node.
+ */
+ if (vm_radix_keybarr(rnode, index)) {
+ difflev = vm_radix_keydiff(index, rnode->rn_owner);
+ if (index > rnode->rn_owner) {
+ index = vm_radix_trimkey(rnode->rn_owner,
+ difflev);
+ index |= VM_RADIX_UNITLEVEL(difflev) - 1;
+ } else if (vm_radix_declev(&index, maplevels,
+ difflev) > 0)
+ break;
+ goto restart;
+ }
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL && m->pindex <= index)
+ return (m);
+ if (rnode->rn_child[slot] != NULL && m == NULL) {
+ rnode = rnode->rn_child[slot];
+ continue;
+ }
+
+ /*
+ * Look for an available edge or page within the current
+ * bisection node.
+ */
+ if (slot > 0) {
+ inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
+ index = vm_radix_trimkey(index, rnode->rn_clev);
+ index |= inc - 1;
+ index -= inc;
+ slot--;
+ for (;; index -= inc, slot--) {
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL && m->pindex <= index)
+ return (m);
+ if ((rnode->rn_child[slot] != NULL &&
+ m == NULL) || slot == 0)
+ break;
+ }
+ }
+
+ /*
+ * If a valid page or edge smaller than the search slot is
+ * found in the traversal, skip to the next higher-level key.
+ */
+ if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) {
+ if (rnode->rn_clev == 0 || vm_radix_declev(&index,
+ maplevels, rnode->rn_clev - 1) > 0)
+ break;
+ goto restart;
+ }
+ rnode = rnode->rn_child[slot];
+ }
+ return (NULL);
+}
+
+/*
+ * Remove the specified index from the tree.
+ * Panics if the key is not present.
+ */
+void
+vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
+{
+ struct vm_radix_node *rnode, *parent;
+ vm_page_t m;
+ int i, slot;
+
+ parent = NULL;
+ rnode = vm_radix_getroot(rtree);
+ for (;;) {
+ if (rnode == NULL)
+ panic("vm_radix_remove: impossible to locate the key");
+ slot = vm_radix_slot(index, rnode->rn_clev);
+ m = vm_radix_node_page(rnode->rn_child[slot]);
+ if (m != NULL && m->pindex == index) {
+ rnode->rn_child[slot] = NULL;
+ rnode->rn_count--;
+ if (rnode->rn_count > 1)
+ break;
+ if (parent == NULL) {
+ if (rnode->rn_count == 0) {
+ vm_radix_node_put(rnode);
+ vm_radix_setroot(rtree, NULL);
+ }
+ break;
+ }
+ for (i = 0; i < VM_RADIX_COUNT; i++)
+ if (rnode->rn_child[i] != NULL)
+ break;
+ KASSERT(i != VM_RADIX_COUNT,
+ ("%s: invalid node configuration", __func__));
+ slot = vm_radix_slot(index, parent->rn_clev);
+ KASSERT(parent->rn_child[slot] == rnode,
+ ("%s: invalid child value", __func__));
+ parent->rn_child[slot] = rnode->rn_child[i];
+ rnode->rn_count--;
+ rnode->rn_child[i] = NULL;
+ vm_radix_node_put(rnode);
+ break;
+ }
+ if (m != NULL && m->pindex != index)
+ panic("%s: invalid key found", __func__);
+ parent = rnode;
+ rnode = rnode->rn_child[slot];
+ }
+}
+
+/*
+ * Remove and free all the nodes from the radix tree.
+ * This function is recursive but there is a tight control on it as the
+ * maximum depth of the tree is fixed.
+ */
+void
+vm_radix_reclaim_allnodes(struct vm_radix *rtree)
+{
+ struct vm_radix_node *root;
+
+ root = vm_radix_getroot(rtree);
+ if (root == NULL)
+ return;
+ vm_radix_reclaim_allnodes_int(root);
+ vm_radix_setroot(rtree, NULL);
+}
+
+#ifdef DDB
+/*
+ * Show details about the given radix node.
+ */
+DB_SHOW_COMMAND(radixnode, db_show_radixnode)
+{
+ struct vm_radix_node *rnode;
+ int i;
+
+ if (!have_addr)
+ return;
+ rnode = (struct vm_radix_node *)addr;
+ db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
+ (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
+ rnode->rn_clev);
+ for (i = 0; i < VM_RADIX_COUNT; i++)
+ if (rnode->rn_child[i] != NULL)
+ db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
+ i, (void *)rnode->rn_child[i],
+ (void *)vm_radix_node_page(rnode->rn_child[i]),
+ rnode->rn_clev);
+}
+#endif /* DDB */
diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h
new file mode 100644
index 0000000..4ac0a06
--- /dev/null
+++ b/sys/vm/vm_radix.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef _VM_RADIX_H_
+#define _VM_RADIX_H_
+
+#include <vm/_vm_radix.h>
+
+#ifdef _KERNEL
+
+void vm_radix_init(void);
+void vm_radix_insert(struct vm_radix *rtree, vm_page_t page);
+vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index);
+vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index);
+vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index);
+void vm_radix_reclaim_allnodes(struct vm_radix *rtree);
+void vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index);
+
+#endif /* _KERNEL */
+#endif /* !_VM_RADIX_H_ */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 98b0de2..4d36385 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
+#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
/*
@@ -342,33 +343,22 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
/*
* Look for an existing reservation.
*/
- msucc = NULL;
- mpred = object->root;
- while (mpred != NULL) {
- KASSERT(mpred->pindex != pindex,
+ mpred = vm_radix_lookup_le(&object->rtree, pindex);
+ if (mpred != NULL) {
+ KASSERT(mpred->pindex < pindex,
("vm_reserv_alloc_contig: pindex already allocated"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
- else if (mpred->pindex < pindex) {
- if (msucc != NULL ||
- (msucc = TAILQ_NEXT(mpred, listq)) == NULL)
- break;
- KASSERT(msucc->pindex != pindex,
- ("vm_reserv_alloc_contig: pindex already allocated"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object &&
- vm_reserv_has_pindex(rv, pindex))
- goto found;
- else if (pindex < msucc->pindex)
- break;
- } else if (msucc == NULL) {
- msucc = mpred;
- mpred = TAILQ_PREV(msucc, pglist, listq);
- continue;
- }
- msucc = NULL;
- mpred = object->root = vm_page_splay(pindex, object->root);
+ msucc = TAILQ_NEXT(mpred, listq);
+ } else
+ msucc = TAILQ_FIRST(&object->memq);
+ if (msucc != NULL) {
+ KASSERT(msucc->pindex > pindex,
+ ("vm_reserv_alloc_page: pindex already allocated"));
+ rv = vm_reserv_from_page(msucc);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
}
/*
@@ -508,33 +498,22 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex)
/*
* Look for an existing reservation.
*/
- msucc = NULL;
- mpred = object->root;
- while (mpred != NULL) {
- KASSERT(mpred->pindex != pindex,
+ mpred = vm_radix_lookup_le(&object->rtree, pindex);
+ if (mpred != NULL) {
+ KASSERT(mpred->pindex < pindex,
("vm_reserv_alloc_page: pindex already allocated"));
rv = vm_reserv_from_page(mpred);
if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
goto found;
- else if (mpred->pindex < pindex) {
- if (msucc != NULL ||
- (msucc = TAILQ_NEXT(mpred, listq)) == NULL)
- break;
- KASSERT(msucc->pindex != pindex,
- ("vm_reserv_alloc_page: pindex already allocated"));
- rv = vm_reserv_from_page(msucc);
- if (rv->object == object &&
- vm_reserv_has_pindex(rv, pindex))
- goto found;
- else if (pindex < msucc->pindex)
- break;
- } else if (msucc == NULL) {
- msucc = mpred;
- mpred = TAILQ_PREV(msucc, pglist, listq);
- continue;
- }
- msucc = NULL;
- mpred = object->root = vm_page_splay(pindex, object->root);
+ msucc = TAILQ_NEXT(mpred, listq);
+ } else
+ msucc = TAILQ_FIRST(&object->memq);
+ if (msucc != NULL) {
+ KASSERT(msucc->pindex > pindex,
+ ("vm_reserv_alloc_page: pindex already allocated"));
+ rv = vm_reserv_from_page(msucc);
+ if (rv->object == object && vm_reserv_has_pindex(rv, pindex))
+ goto found;
}
/*
OpenPOWER on IntegriCloud