summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_radix.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/vm_radix.c')
-rw-r--r--sys/vm/vm_radix.c956
1 files changed, 956 insertions, 0 deletions
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
new file mode 100644
index 0000000..f29f5d3
--- /dev/null
+++ b/sys/vm/vm_radix.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+
+/*
+ * Radix tree implementation.
+ */
+
+#include <sys/cdefs.h>
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/queue.h>
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/ktr.h>
+#include <vm/uma.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#ifndef UMA_MD_SMALL_ALLOC
+#include <vm/vm_map.h>
+#endif
+#include <vm/vm_radix.h>
+#include <vm/vm_object.h>
+
+#include <sys/kdb.h>
+
+#ifndef UMA_MD_SMALL_ALLOC
+#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2)
+#define VM_RADIX_WIDTH 4
+
+/*
+ * Bits of height in root.
+ * The mask of smaller power of 2 containing VM_RADIX_LIMIT.
+ */
+#define VM_RADIX_HEIGHT 0x1f
+#else
+#define VM_RADIX_WIDTH 5
+
+/* See the comment above. */
+#define VM_RADIX_HEIGHT 0xf
+#endif
+
+#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH)
+#define VM_RADIX_MASK (VM_RADIX_COUNT - 1)
+#define VM_RADIX_MAXVAL ((vm_pindex_t)-1)
+#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH)
+
+/* Flag bits stored in node pointers. */
+#define VM_RADIX_FLAGS 0x3
+
+/* Calculates maximum value for a tree of height h. */
+#define VM_RADIX_MAX(h) \
+ ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \
+ (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1))
+
+/*
+ * On 32-bits architectures KTR cannot handle 64-bits values.
+ * Add macros for splitting in 32-bits quantity and provide format strings.
+ * Note that braces are not used because they would break compilation.
+ * Also, note that arguments are cast to u_long in order to follow KTR
+ * convention.
+ */
+#ifdef KTR
+#define KFRMT64(x) __STRING(x)"l 0x%08lx, "__STRING(x)"h 0x%08lx"
+#define KSPLT64L(x) ((u_long)((x) & 0xFFFFFFFF))
+#define KSPLT64H(x) ((u_long)(((x) >> 32) & 0xFFFFFFFF))
+#endif
+
+struct vm_radix_node {
+ void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */
+ volatile uint32_t rn_count; /* Valid children. */
+};
+
+CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT);
+CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE);
+
+static uma_zone_t vm_radix_node_zone;
+
+#ifndef UMA_MD_SMALL_ALLOC
+static vm_map_t rnode_map;
+static u_long rnode_map_scale;
+
+static void *
+vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait)
+{
+ vm_offset_t addr;
+ vm_page_t m;
+ int pflags;
+
+ /* Inform UMA that this allocator uses rnode_map. */
+ *flags = UMA_SLAB_KERNEL;
+
+ pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
+
+ /*
+ * As kmem_alloc_nofault() can however fail, let just assume that
+ * M_NOWAIT is on and act accordingly.
+ */
+ pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT :
+ VM_ALLOC_SYSTEM;
+ if ((wait & M_ZERO) != 0)
+ pflags |= VM_ALLOC_ZERO;
+ addr = kmem_alloc_nofault(rnode_map, size);
+ if (addr == 0)
+ return (NULL);
+
+ /* Just one page allocation is assumed here. */
+ m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS),
+ pflags);
+ if (m == NULL) {
+ kmem_free(rnode_map, addr, size);
+ return (NULL);
+ }
+ if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ pmap_qenter(addr, &m, 1);
+ return ((void *)addr);
+}
+
+static void
+vm_radix_node_zone_freef(void *item, int size, uint8_t flags)
+{
+ vm_page_t m;
+ vm_offset_t voitem;
+
+ MPASS((flags & UMA_SLAB_KERNEL) != 0);
+
+ /* Just one page allocation is assumed here. */
+ voitem = (vm_offset_t)item;
+ m = PHYS_TO_VM_PAGE(pmap_kextract(voitem));
+ pmap_qremove(voitem, 1);
+ vm_page_lock(m);
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ kmem_free(rnode_map, voitem, size);
+}
+
+static void
+init_vm_radix_alloc(void *dummy __unused)
+{
+
+ uma_zone_set_max(vm_radix_node_zone, rnode_map_scale);
+ uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf);
+ uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef);
+}
+SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL);
+#endif
+
+/*
+ * Radix node zone destructor.
+ */
+#ifdef INVARIANTS
+static void
+vm_radix_node_zone_dtor(void *mem, int size, void *arg)
+{
+ struct vm_radix_node *rnode;
+
+ rnode = mem;
+ KASSERT(rnode->rn_count == 0,
+ ("vm_radix_node_put: Freeing a node with %d children\n",
+ rnode->rn_count));
+}
+#endif
+
+/*
+ * Allocate a radix node. Initializes all elements to 0.
+ */
+static __inline struct vm_radix_node *
+vm_radix_node_get(void)
+{
+
+ return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO));
+}
+
+/*
+ * Free radix node.
+ */
+static __inline void
+vm_radix_node_put(struct vm_radix_node *rnode)
+{
+
+ uma_zfree(vm_radix_node_zone, rnode);
+}
+
+/*
+ * Return the position in the array for a given level.
+ */
+static __inline int
+vm_radix_slot(vm_pindex_t index, int level)
+{
+
+ return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
+}
+
+/*
+ * Initialize the radix node submap (for architectures not supporting
+ * direct-mapping) and the radix node zone.
+ *
+ * WITNESS reports a lock order reversal, for architectures not
+ * supporting direct-mapping, between the "system map" lock
+ * and the "vm object" lock. This is because the well established ordering
+ * "system map" -> "vm object" is not honoured in this case as allocating
+ * from the radix node submap ends up adding a mapping entry to it, meaning
+ * it is necessary to lock the submap. However, the radix node submap is
+ * a leaf and self-contained, thus a deadlock cannot happen here and
+ * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal.
+ */
+void
+vm_radix_init(void)
+{
+#ifndef UMA_MD_SMALL_ALLOC
+ vm_offset_t maxaddr, minaddr;
+
+ rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE;
+ TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale);
+ rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
+ rnode_map_scale * sizeof(struct vm_radix_node), FALSE);
+ rnode_map->system_map = 1;
+#endif
+
+ vm_radix_node_zone = uma_zcreate("RADIX NODE",
+ sizeof(struct vm_radix_node), NULL,
+#ifdef INVARIANTS
+ vm_radix_node_zone_dtor,
+#else
+ NULL,
+#endif
+ NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM);
+}
+
+/*
+ * Extract the root node and height from a radix tree with a single load.
+ */
+static __inline int
+vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode)
+{
+ uintptr_t root;
+ int height;
+
+ root = rtree->rt_root;
+ height = root & VM_RADIX_HEIGHT;
+ *rnode = (struct vm_radix_node *)(root - height);
+ return (height);
+}
+
+
+/*
+ * Set the root node and height for a radix tree.
+ */
+static inline void
+vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode,
+ int height)
+{
+ uintptr_t root;
+
+ root = (uintptr_t)rnode | height;
+ rtree->rt_root = root;
+}
+
+static inline void
+vm_radix_unwind_heightup(struct vm_radix *rtree, struct vm_radix_node *root,
+ struct vm_radix_node *iroot, int ilevel)
+{
+ struct vm_radix_node *rnode;
+
+ CTR4(KTR_VM, "unwind: tree %p, root %p, iroot %p, ilevel %d",
+ rtree, root, iroot, ilevel);
+ while (iroot != root && root != NULL) {
+ rnode = root;
+ MPASS(rnode->rn_count == 0 || rnode->rn_count == 1);
+ rnode->rn_count = 0;
+ root = rnode->rn_child[0];
+ vm_radix_node_put(rnode);
+ }
+ vm_radix_setroot(rtree, iroot, ilevel);
+}
+
+static inline void *
+vm_radix_match(void *child, int color)
+{
+ uintptr_t c;
+
+ c = (uintptr_t)child;
+
+ if ((c & color) == 0)
+ return (NULL);
+ return ((void *)(c & ~VM_RADIX_FLAGS));
+}
+
+static void
+vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level)
+{
+ int slot;
+
+ MPASS(rnode != NULL && level >= 0);
+
+ /*
+ * Level 0 just contains pages as children, thus make it a special
+ * case, free the node and return.
+ */
+ if (level == 0) {
+ CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
+ rnode->rn_count = 0;
+ vm_radix_node_put(rnode);
+ return;
+ }
+ for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) {
+ if (rnode->rn_child[slot] == NULL)
+ continue;
+ CTR3(KTR_VM,
+ "reclaiming: node %p, level %d recursing in slot %d",
+ rnode, level, slot);
+ vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot],
+ level - 1);
+ rnode->rn_count--;
+ }
+ MPASS(rnode->rn_count == 0);
+ CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level);
+ vm_radix_node_put(rnode);
+}
+
+/*
+ * Remove the specified index from the tree. If possible the height of the
+ * tree is adjusted after deletion. May be used to cleanup intermediate
+ * nodes of the path if the key is not entirely present.
+ */
+static void
+vm_radix_sweep(struct vm_radix *rtree, vm_pindex_t index, int color, int hard)
+{
+ struct vm_radix_node *stack[VM_RADIX_LIMIT];
+ struct vm_radix_node *rnode, *root;
+ int level;
+ int slot;
+
+ level = vm_radix_height(rtree, &root);
+ KASSERT(index <= VM_RADIX_MAX(level),
+ ("vm_radix_sweep: %p index out of range %jd.", rtree,
+ VM_RADIX_MAX(level)));
+ rnode = root;
+ level--;
+ /*
+ * Find the node and record the path in stack.
+ */
+ while (level && rnode) {
+ stack[level] = rnode;
+ slot = vm_radix_slot(index, level);
+ CTR6(KTR_VM,
+ "remove: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u",
+ rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
+ rnode = rnode->rn_child[slot];
+ level--;
+ }
+ if (rnode == NULL) {
+ if (hard)
+ panic("vm_radix_sweep: index not present.\n");
+
+ /*
+ * Almost certainly it got an half-constructed tree it was
+ * expected.
+ */
+ KASSERT(level < (VM_RADIX_LIMIT - 1),
+ ("vm_radix_sweep: level %d not consistent.\n", level));
+ ++level;
+ rnode = stack[level];
+ slot = vm_radix_slot(index, level);
+ } else {
+ slot = vm_radix_slot(index, 0);
+ if (hard &&
+ vm_radix_match(rnode->rn_child[slot], color) == NULL)
+ panic("vm_radix_sweep: index not present as leaf.\n");
+ }
+
+ for (;;) {
+ CTR6(KTR_VM,
+"remove: resetting tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR4(KTR_VM,
+ "remove: resetting tree %p, rnode %p, child %p, count %u",
+ rtree, rnode,
+ (rnode != NULL) ? rnode->rn_child[slot] : NULL,
+ (rnode != NULL) ? rnode->rn_count : 0);
+ rnode->rn_child[slot] = NULL;
+ /*
+ * Use atomics for the last level since red and black
+ * will both adjust it.
+ * Use a write memory barrier here in order to avoid
+ * rn_count reaching 0 before to fetch the actual pointer.
+ * Concurrent black removal, infact, may want to reclaim
+ * the radix node itself before to read it.
+ */
+ MPASS(rnode->rn_count != 0);
+ if (level == 0)
+ atomic_add_rel_32(&rnode->rn_count, -1);
+ else
+ rnode->rn_count--;
+
+ /*
+ * Only allow black removes to prune the tree.
+ */
+ if ((color & VM_RADIX_BLACK) == 0 || rnode->rn_count > 0)
+ break;
+ vm_radix_node_put(rnode);
+ if (rnode == root) {
+ vm_radix_setroot(rtree, NULL, 0);
+ break;
+ }
+ rnode = stack[++level];
+ slot = vm_radix_slot(index, level);
+
+ }
+}
+
+/*
+ * Inserts the key-value pair in to the radix tree. Returns errno.
+ * Panics if the key already exists.
+ */
+int
+vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val)
+{
+ struct vm_radix_node *iroot, *rnode, *root;
+ int ilevel, level, slot;
+
+ CTR4(KTR_VM,
+ "insert: tree %p, " KFRMT64(index) ", val %p", rtree,
+ KSPLT64L(index), KSPLT64H(index), val);
+ if (index == -1)
+ panic("vm_radix_insert: -1 is not a valid index.\n");
+ level = vm_radix_height(rtree, &root);
+ /*
+ * Increase the height by adding nodes at the root until
+ * there is sufficient space.
+ */
+ ilevel = level;
+ iroot = root;
+ while (level == 0 || index > VM_RADIX_MAX(level)) {
+ CTR5(KTR_VM,
+ "insert: expanding " KFRMT64(index) ">" KFRMT64(mxl) ", height %d",
+ KSPLT64L(index), KSPLT64H(index),
+ KSPLT64L(VM_RADIX_MAX(level)),
+ KSPLT64H(VM_RADIX_MAX(level)), level);
+ level++;
+ KASSERT(level <= VM_RADIX_LIMIT,
+ ("vm_radix_insert: Tree %p height %d too tall",
+ rtree, level));
+ /*
+ * Only allocate tree nodes if they are needed.
+ */
+ if (root == NULL || root->rn_count != 0) {
+ rnode = vm_radix_node_get();
+ if (rnode == NULL) {
+ CTR5(KTR_VM,
+ "insert: tree %p, root %p, " KFRMT64(index) ", level %d ENOMEM",
+ rtree, root, KSPLT64L(index),
+ KSPLT64H(index), level);
+ vm_radix_unwind_heightup(rtree, root, iroot,
+ ilevel);
+ return (ENOMEM);
+ }
+ /*
+ * Store the new pointer with a memory barrier so
+ * that it is visible before the new root.
+ */
+ if (root) {
+ atomic_store_rel_ptr((volatile uintptr_t *)
+ &rnode->rn_child[0], (uintptr_t)root);
+ rnode->rn_count = 1;
+ }
+ root = rnode;
+ }
+ vm_radix_setroot(rtree, root, level);
+ }
+
+ /* Now that the tree is tall enough, fill in the path to the index. */
+ rnode = root;
+ for (level = level - 1; level > 0; level--) {
+ slot = vm_radix_slot(index, level);
+ /* Add the required intermidiate nodes. */
+ if (rnode->rn_child[slot] == NULL) {
+
+ /*
+ * In case of failed allocation, the vm_radix_sweep()
+ * will unwind back rn_count appropriately.
+ */
+ rnode->rn_count++;
+ rnode->rn_child[slot] = vm_radix_node_get();
+ if (rnode->rn_child[slot] == NULL) {
+ CTR6(KTR_VM,
+ "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p ENOMEM",
+ rtree, KSPLT64L(index), KSPLT64H(index),
+ level, slot, rnode);
+ CTR4(KTR_VM,
+ "insert: tree %p, rnode %p, child %p, count %u ENOMEM",
+ rtree, rnode, rnode->rn_child[slot],
+ rnode->rn_count);
+ vm_radix_sweep(rtree, index, VM_RADIX_BLACK, 0);
+
+ /*
+ * vm_radix_sweep() may have changed the shape
+ * of the tree, refetch the root.
+ */
+ vm_radix_height(rtree, &root);
+ vm_radix_unwind_heightup(rtree, root, iroot,
+ ilevel);
+ return (ENOMEM);
+ }
+ }
+ CTR6(KTR_VM,
+ "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR4(KTR_VM,
+ "insert: tree %p, rnode %p, child %p, count %u",
+ rtree, rnode, rnode->rn_child[slot], rnode->rn_count);
+ rnode = rnode->rn_child[slot];
+ }
+
+ slot = vm_radix_slot(index, 0);
+ MPASS(rnode != NULL);
+ KASSERT(rnode->rn_child[slot] == NULL,
+ ("vm_radix_insert: Duplicate value %p at index: %lu\n",
+ rnode->rn_child[slot], (u_long)index));
+ val = (void *)((uintptr_t)val | VM_RADIX_BLACK);
+ rnode->rn_child[slot] = val;
+ atomic_add_32(&rnode->rn_count, 1);
+ CTR5(KTR_VM,
+ "insert: tree %p, " KFRMT64(index) ", level %d, slot %d",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot);
+ CTR3(KTR_VM, "insert: slot %d, rnode %p, count %u", slot, rnode,
+ rnode->rn_count);
+
+ return 0;
+}
+
+/*
+ * Returns the value stored at the index. If the index is not present
+ * NULL is returned.
+ */
+void *
+vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, int color)
+{
+ struct vm_radix_node *rnode;
+ int slot;
+ int level;
+
+ level = vm_radix_height(rtree, &rnode);
+ if (index > VM_RADIX_MAX(level))
+ return NULL;
+ level--;
+ while (rnode) {
+ slot = vm_radix_slot(index, level);
+ CTR6(KTR_VM,
+ "lookup: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR2(KTR_VM, "lookup: rnode %p, child %p", rnode,
+ rnode->rn_child[slot]);
+ if (level == 0)
+ return vm_radix_match(rnode->rn_child[slot], color);
+ rnode = rnode->rn_child[slot];
+ level--;
+ }
+ CTR3(KTR_VM, "lookup: tree %p, " KFRMT64(index) " failed", rtree,
+ KSPLT64L(index), KSPLT64H(index));
+
+ return NULL;
+}
+
+void *
+vm_radix_color(struct vm_radix *rtree, vm_pindex_t index, int color)
+{
+ struct vm_radix_node *rnode;
+ uintptr_t child;
+ int slot;
+ int level;
+
+ level = vm_radix_height(rtree, &rnode);
+ if (index > VM_RADIX_MAX(level))
+ return NULL;
+ level--;
+ while (rnode) {
+ slot = vm_radix_slot(index, level);
+ CTR6(KTR_VM,
+ "color: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR2(KTR_VM, "color: rnode %p, child %p", rnode,
+ rnode->rn_child[slot]);
+ if (level == 0)
+ break;
+ rnode = rnode->rn_child[slot];
+ level--;
+ }
+ if (rnode == NULL || rnode->rn_child[slot] == NULL)
+ return (NULL);
+ child = (uintptr_t)rnode->rn_child[slot];
+ child &= ~VM_RADIX_FLAGS;
+ rnode->rn_child[slot] = (void *)(child | color);
+
+ return (void *)child;
+}
+
+/*
+ * Find the first leaf with a valid node between *startp and end. Return
+ * the index of the first valid item in the leaf in *startp.
+ */
+static struct vm_radix_node *
+vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp, vm_pindex_t end)
+{
+ struct vm_radix_node *rnode;
+ vm_pindex_t start;
+ vm_pindex_t inc;
+ int slot;
+ int level;
+
+ start = *startp;
+restart:
+ level = vm_radix_height(rtree, &rnode);
+ if (start > VM_RADIX_MAX(level) || (end && start >= end)) {
+ rnode = NULL;
+ goto out;
+ }
+ /*
+ * Search the tree from the top for any leaf node holding an index
+ * between start and end.
+ */
+ for (level--; level; level--) {
+ slot = vm_radix_slot(start, level);
+ CTR6(KTR_VM,
+ "leaf: tree %p, " KFRMT64(start) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(start), KSPLT64H(start), level, slot,
+ rnode);
+ CTR2(KTR_VM, "leaf: rnode %p, child %p", rnode,
+ rnode->rn_child[slot]);
+ if (rnode->rn_child[slot] != NULL) {
+ rnode = rnode->rn_child[slot];
+ continue;
+ }
+ /*
+ * Calculate how much to increment our index by
+ * based on the tree level. We must truncate the
+ * lower bits to start from the begnning of the
+ * next leaf.
+ */
+ inc = 1LL << (level * VM_RADIX_WIDTH);
+ start &= ~VM_RADIX_MAX(level);
+
+ /* Avoid start address wrapping up. */
+ if ((VM_RADIX_MAXVAL - start) < inc) {
+ rnode = NULL;
+ goto out;
+ }
+ start += inc;
+ slot++;
+ CTR6(KTR_VM,
+ "leaf: " KFRMT64(start) ", " KFRMT64(end) ", " KFRMT64(inc),
+ KSPLT64L(start), KSPLT64H(start), KSPLT64L(end),
+ KSPLT64H(end), KSPLT64L(inc), KSPLT64H(inc));
+ CTR2(KTR_VM, "leaf: level %d, slot %d", level, slot);
+ for (; slot < VM_RADIX_COUNT; slot++, start += inc) {
+ if (end != 0 && start >= end) {
+ rnode = NULL;
+ goto out;
+ }
+ if (rnode->rn_child[slot]) {
+ rnode = rnode->rn_child[slot];
+ break;
+ }
+ if ((VM_RADIX_MAXVAL - start) < inc) {
+ rnode = NULL;
+ goto out;
+ }
+ }
+ if (slot == VM_RADIX_COUNT)
+ goto restart;
+ }
+
+out:
+ *startp = start;
+ return (rnode);
+}
+
+
+
+/*
+ * Looks up as many as cnt values between start and end, and stores
+ * them in the caller allocated array out. The next index can be used
+ * to restart the scan. This optimizes forward scans in the tree.
+ */
+int
+vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start,
+ vm_pindex_t end, int color, void **out, int cnt, vm_pindex_t *next,
+ u_int *exhausted)
+{
+ struct vm_radix_node *rnode;
+ void *val;
+ int slot;
+ int outidx;
+
+ CTR5(KTR_VM, "lookupn: tree %p, " KFRMT64(start) ", " KFRMT64(end),
+ rtree, KSPLT64L(start), KSPLT64H(start), KSPLT64L(end),
+ KSPLT64H(end));
+ if (end == 0)
+ *exhausted = 0;
+ if (rtree->rt_root == 0)
+ return (0);
+ outidx = 0;
+ while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
+ slot = vm_radix_slot(start, 0);
+ for (; slot < VM_RADIX_COUNT; slot++, start++) {
+ if (end != 0 && start >= end)
+ goto out;
+ val = vm_radix_match(rnode->rn_child[slot], color);
+ if (val == NULL) {
+
+ /*
+ * The start address can wrap at the
+ * VM_RADIX_MAXVAL value.
+ * We need to make sure that start address
+ * point to the next chunk (even if wrapping)
+ * to stay consistent with default scanning
+ * behaviour. Also, because of the nature
+ * of the wrapping, the wrap up checks must
+ * be done after all the necessary controls
+ * on start are completed.
+ */
+ if ((VM_RADIX_MAXVAL - start) == 0) {
+ start++;
+ if (end == 0)
+ *exhausted = 1;
+ goto out;
+ }
+ continue;
+ }
+ CTR5(KTR_VM,
+ "lookupn: tree %p " KFRMT64(index) " slot %d found child %p",
+ rtree, KSPLT64L(start), KSPLT64H(start), slot, val);
+ out[outidx] = val;
+ if (++outidx == cnt ||
+ (VM_RADIX_MAXVAL - start) == 0) {
+ start++;
+ if ((VM_RADIX_MAXVAL - start) == 0 && end == 0)
+ *exhausted = 1;
+ goto out;
+ }
+ }
+ MPASS((VM_RADIX_MAXVAL - start) != 0);
+ if (end != 0 && start >= end)
+ break;
+ }
+out:
+ *next = start;
+ return (outidx);
+}
+
+#if 0
+void
+vm_radix_foreach(struct vm_radix *rtree, vm_pindex_t start, vm_pindex_t end,
+ int color, void (*iter)(void *))
+{
+ struct vm_radix_node *rnode;
+ void *val;
+ int slot;
+
+ if (rtree->rt_root == 0)
+ return;
+ while ((rnode = vm_radix_leaf(rtree, &start, end)) != NULL) {
+ slot = vm_radix_slot(start, 0);
+ for (; slot < VM_RADIX_COUNT; slot++, start++) {
+ if (end != 0 && start >= end)
+ return;
+ val = vm_radix_match(rnode->rn_child[slot], color);
+ if (val)
+ iter(val);
+ }
+ if (end != 0 && start >= end)
+ return;
+ }
+}
+#endif
+
+
+/*
+ * Look up any entry at a position less than or equal to index.
+ */
+void *
+vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index, int color)
+{
+ struct vm_radix_node *rnode;
+ struct vm_radix_node *child;
+ vm_pindex_t max;
+ vm_pindex_t inc;
+ void *val;
+ int slot;
+ int level;
+
+ CTR3(KTR_VM, "lookup_le: tree %p, " KFRMT64(index), rtree,
+ KSPLT64L(index), KSPLT64H(index));
+restart:
+ level = vm_radix_height(rtree, &rnode);
+ if (rnode == NULL)
+ return (NULL);
+ max = VM_RADIX_MAX(level);
+ if (index > max || index == 0)
+ index = max;
+ /*
+ * Search the tree from the top for any leaf node holding an index
+ * lower than 'index'.
+ */
+ level--;
+ while (rnode) {
+ slot = vm_radix_slot(index, level);
+ CTR6(KTR_VM,
+ "lookup_le: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p",
+ rtree, KSPLT64L(index), KSPLT64H(index), level, slot,
+ rnode);
+ CTR2(KTR_VM, "lookup_le: rnode %p, child %p", rnode,
+ rnode->rn_child[slot]);
+ if (level == 0)
+ break;
+ /*
+ * If we don't have an exact match we must start our search
+ * from the next leaf and adjust our index appropriately.
+ */
+ if ((child = rnode->rn_child[slot]) == NULL) {
+ /*
+ * Calculate how much to decrement our index by
+ * based on the tree level. We must set the
+ * lower bits to start from the end of the next
+ * leaf.
+ */
+ inc = 1LL << (level * VM_RADIX_WIDTH);
+ index |= VM_RADIX_MAX(level);
+ index -= inc;
+ slot--;
+ CTR6(KTR_VM,
+ "lookup_le: " KFRMT64(start) ", " KFRMT64(inc) ", level %d slot %d",
+ KSPLT64L(index), KSPLT64H(index), KSPLT64L(inc),
+ KSPLT64H(inc), level, slot);
+ for (; slot >= 0; slot--, index -= inc) {
+ child = rnode->rn_child[slot];
+ if (child)
+ break;
+ }
+ }
+ rnode = child;
+ level--;
+ }
+ if (rnode) {
+ for (; slot >= 0; slot--, index--) {
+ val = vm_radix_match(rnode->rn_child[slot], color);
+ if (val)
+ return (val);
+ }
+ }
+ if (index != -1)
+ goto restart;
+ return (NULL);
+}
+
+/*
+ * Remove an entry from the tree. Expects the entry to be already present.
+ */
+void
+vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index, int color)
+{
+
+ vm_radix_sweep(rtree, index, color, 1);
+}
+
+/*
+ * Remove and free all the nodes from the radix tree.
+ * This function is recrusive but there is a tight control on it as the
+ * maximum depth of the tree is fixed.
+ */
+void
+vm_radix_reclaim_allnodes(struct vm_radix *rtree)
+{
+ struct vm_radix_node *root;
+ int level;
+
+ if (rtree->rt_root == 0)
+ return;
+ level = vm_radix_height(rtree, &root);
+ vm_radix_reclaim_allnodes_internal(root, level - 1);
+ rtree->rt_root = 0;
+}
+
+#ifdef notyet
+/*
+ * Attempts to reduce the height of the tree.
+ */
+void
+vm_radix_shrink(struct vm_radix *rtree)
+{
+ struct vm_radix_node *tmp, *root;
+ int level;
+
+ if (rtree->rt_root == 0)
+ return;
+ level = vm_radix_height(rtree, &root);
+
+ /* Adjust the height of the tree. */
+ while (root->rn_count == 1 && root->rn_child[0] != NULL) {
+ tmp = root;
+ root->rn_count--;
+ root = root->rn_child[0];
+ level--;
+ vm_radix_node_put(tmp);
+ }
+ /* Finally see if we have an empty tree. */
+ if (root->rn_count == 0) {
+ vm_radix_node_put(root);
+ root = NULL;
+ level--;
+ }
+ vm_radix_setroot(rtree, root, level);
+}
+#endif
OpenPOWER on IntegriCloud