summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/conf/options.ia642
-rw-r--r--sys/ia64/ia64/pmap.c657
-rw-r--r--sys/ia64/include/pmap.h21
3 files changed, 447 insertions, 233 deletions
diff --git a/sys/conf/options.ia64 b/sys/conf/options.ia64
index 6eefc8c..750aeca 100644
--- a/sys/conf/options.ia64
+++ b/sys/conf/options.ia64
@@ -11,6 +11,8 @@ UWX_TRACE_ENABLE opt_global.h
COMPAT_FREEBSD32 opt_compat.h
+PV_STATS opt_pmap.h
+
EXCEPTION_TRACING opt_xtrace.h
VGA_ALT_SEQACCESS opt_vga.h
diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c
index d824405..dd312c0 100644
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@@ -48,12 +48,15 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_pmap.h"
+
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rwlock.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@@ -108,16 +111,18 @@ __FBSDID("$FreeBSD$");
/* XXX move to a header. */
extern uint64_t ia64_gateway_page[];
-#ifndef PMAP_SHPGPERPROC
-#define PMAP_SHPGPERPROC 200
-#endif
-
#if !defined(DIAGNOSTIC)
#define PMAP_INLINE __inline
#else
#define PMAP_INLINE
#endif
+#ifdef PV_STATS
+#define PV_STAT(x) do { x ; } while (0)
+#else
+#define PV_STAT(x) do { } while (0)
+#endif
+
#define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED)
#define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY)
#define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX)
@@ -187,10 +192,21 @@ static uint64_t *pmap_ridmap;
struct mtx pmap_ridmutex;
/*
+ * Isolate the global pv list lock from data and other locks to prevent false
+ * sharing within the cache.
+ */
+static struct {
+ struct rwlock lock;
+ char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)];
+} pvh_global __aligned(CACHE_LINE_SIZE);
+
+#define pvh_global_lock pvh_global.lock
+
+/*
* Data for the pv entry allocation mechanism
*/
-static uma_zone_t pvzone;
-static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+static int pv_entry_count;
/*
* Data for allocating PTEs for user processes.
@@ -224,8 +240,10 @@ SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD,
static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va);
-static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t locked_pmap);
+static void free_pv_chunk(struct pv_chunk *pc);
+static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try);
+static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap);
static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot);
@@ -402,9 +420,14 @@ pmap_bootstrap()
PMAP_LOCK_INIT(kernel_pmap);
for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
kernel_pmap->pm_rid[i] = 0;
- TAILQ_INIT(&kernel_pmap->pm_pvlist);
+ TAILQ_INIT(&kernel_pmap->pm_pvchunk);
PCPU_SET(md.current_pmap, kernel_pmap);
+ /*
+ * Initialize the global pv list lock.
+ */
+ rw_init(&pvh_global_lock, "pmap pv global");
+
/* Region 5 is mapped via the VHPT. */
ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1);
@@ -449,7 +472,6 @@ pmap_page_init(vm_page_t m)
{
TAILQ_INIT(&m->md.pv_list);
- m->md.pv_list_count = 0;
m->md.memattr = VM_MEMATTR_DEFAULT;
}
@@ -461,19 +483,6 @@ pmap_page_init(vm_page_t m)
void
pmap_init(void)
{
- int shpgperproc = PMAP_SHPGPERPROC;
-
- /*
- * Initialize the address space (zone) for the pv entries. Set a
- * high water mark so that the system can recover from excessive
- * numbers of pv entries.
- */
- pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
- TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
- pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
- TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
- pv_entry_high_water = 9 * (pv_entry_max / 10);
ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE);
@@ -615,7 +624,7 @@ pmap_pinit(struct pmap *pmap)
PMAP_LOCK_INIT(pmap);
for (i = 0; i < IA64_VM_MINKERN_REGION; i++)
pmap->pm_rid[i] = pmap_allocate_rid();
- TAILQ_INIT(&pmap->pm_pvlist);
+ TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
return (1);
}
@@ -683,14 +692,222 @@ pmap_growkernel(vm_offset_t addr)
* page management routines.
***************************************************/
+CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
+CTASSERT(_NPCM == 6);
+CTASSERT(_NPCPV == 337);
+
+static __inline struct pv_chunk *
+pv_to_chunk(pv_entry_t pv)
+{
+
+ return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
+}
+
+#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
+
+#define PC_FREE0_4 0xfffffffffffffffful
+#define PC_FREE5 0x000000000001fffful
+
+static const u_long pc_freemask[_NPCM] = {
+ PC_FREE0_4, PC_FREE0_4, PC_FREE0_4,
+ PC_FREE0_4, PC_FREE0_4, PC_FREE5
+};
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
+ "Current number of pv entries");
+
+#ifdef PV_STATS
+static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
+
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
+ "Current number of pv entry chunks");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
+ "Current number of pv entry chunks allocated");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
+ "Current number of pv entry chunks frees");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
+ "Number of times tried to get a chunk page but failed.");
+
+static long pv_entry_frees, pv_entry_allocs;
+static int pv_entry_spare;
+
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
+ "Current number of pv entry frees");
+SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
+ "Current number of pv entry allocs");
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
+ "Current number of spare pv entries");
+#endif
+
+/*
+ * We are in a serious low memory condition. Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk.
+ */
+static vm_page_t
+pmap_pv_reclaim(pmap_t locked_pmap)
+{
+ struct pch newtail;
+ struct pv_chunk *pc;
+ struct ia64_lpte *pte;
+ pmap_t pmap;
+ pv_entry_t pv;
+ vm_offset_t va;
+ vm_page_t m, m_pc;
+ u_long inuse;
+ int bit, field, freed, idx;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ pmap = NULL;
+ m_pc = NULL;
+ TAILQ_INIT(&newtail);
+ while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) {
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ if (pmap != pc->pc_pmap) {
+ if (pmap != NULL) {
+ if (pmap != locked_pmap) {
+ pmap_switch(locked_pmap);
+ PMAP_UNLOCK(pmap);
+ }
+ }
+ pmap = pc->pc_pmap;
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap)
+ PMAP_LOCK(pmap);
+ else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) {
+ pmap = NULL;
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+ continue;
+ }
+ pmap_switch(pmap);
+ }
+
+ /*
+ * Destroy every non-wired, 8 KB page mapping in the chunk.
+ */
+ freed = 0;
+ for (field = 0; field < _NPCM; field++) {
+ for (inuse = ~pc->pc_map[field] & pc_freemask[field];
+ inuse != 0; inuse &= ~(1UL << bit)) {
+ bit = ffsl(inuse) - 1;
+ idx = field * sizeof(inuse) * NBBY + bit;
+ pv = &pc->pc_pventry[idx];
+ va = pv->pv_va;
+ pte = pmap_find_vhpt(va);
+ KASSERT(pte != NULL, ("pte"));
+ if (pmap_wired(pte))
+ continue;
+ pmap_remove_vhpt(va);
+ pmap_invalidate_page(va);
+ m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
+ if (pmap_accessed(pte))
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ if (pmap_dirty(pte))
+ vm_page_dirty(m);
+ pmap_free_pte(pte, va);
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pc->pc_map[field] |= 1UL << bit;
+ freed++;
+ }
+ }
+ if (freed == 0) {
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+ continue;
+ }
+ /* Every freed mapping is for a 8 KB page. */
+ pmap->pm_stats.resident_count -= freed;
+ PV_STAT(pv_entry_frees += freed);
+ PV_STAT(pv_entry_spare += freed);
+ pv_entry_count -= freed;
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ for (field = 0; field < _NPCM; field++)
+ if (pc->pc_map[field] != pc_freemask[field]) {
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ TAILQ_INSERT_TAIL(&newtail, pc, pc_lru);
+
+ /*
+ * One freed pv entry in locked_pmap is
+ * sufficient.
+ */
+ if (pmap == locked_pmap)
+ goto out;
+ break;
+ }
+ if (field == _NPCM) {
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
+ break;
+ }
+ }
+out:
+ TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru);
+ if (pmap != NULL) {
+ if (pmap != locked_pmap) {
+ pmap_switch(locked_pmap);
+ PMAP_UNLOCK(pmap);
+ }
+ }
+ return (m_pc);
+}
+
/*
* free the pv_entry back to the free list
*/
-static PMAP_INLINE void
-free_pv_entry(pv_entry_t pv)
+static void
+free_pv_entry(pmap_t pmap, pv_entry_t pv)
{
+ struct pv_chunk *pc;
+ int bit, field, idx;
+
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PV_STAT(pv_entry_frees++);
+ PV_STAT(pv_entry_spare++);
pv_entry_count--;
- uma_zfree(pvzone, pv);
+ pc = pv_to_chunk(pv);
+ idx = pv - &pc->pc_pventry[0];
+ field = idx / (sizeof(u_long) * NBBY);
+ bit = idx % (sizeof(u_long) * NBBY);
+ pc->pc_map[field] |= 1ul << bit;
+ for (idx = 0; idx < _NPCM; idx++)
+ if (pc->pc_map[idx] != pc_freemask[idx]) {
+ /*
+ * 98% of the time, pc is already at the head of the
+ * list. If it isn't already, move it to the head.
+ */
+ if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) !=
+ pc)) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc,
+ pc_list);
+ }
+ return;
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+}
+
+static void
+free_pv_chunk(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ PV_STAT(pv_entry_spare -= _NPCPV);
+ PV_STAT(pc_chunk_count--);
+ PV_STAT(pc_chunk_frees++);
+ /* entire chunk is free, return it */
+ m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc));
+ vm_page_unwire(m, 0);
+ vm_page_free(m);
}
/*
@@ -698,82 +915,67 @@ free_pv_entry(pv_entry_t pv)
* when needed.
*/
static pv_entry_t
-get_pv_entry(pmap_t locked_pmap)
+get_pv_entry(pmap_t pmap, boolean_t try)
{
- static const struct timeval printinterval = { 60, 0 };
- static struct timeval lastprint;
- struct vpgqueues *vpq;
- struct ia64_lpte *pte;
- pmap_t oldpmap, pmap;
- pv_entry_t allocated_pv, next_pv, pv;
- vm_offset_t va;
+ struct pv_chunk *pc;
+ pv_entry_t pv;
vm_page_t m;
+ int bit, field, idx;
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- allocated_pv = uma_zalloc(pvzone, M_NOWAIT);
- if (allocated_pv != NULL) {
- pv_entry_count++;
- if (pv_entry_count > pv_entry_high_water)
- pagedaemon_wakeup();
- else
- return (allocated_pv);
- }
-
- /*
- * Reclaim pv entries: At first, destroy mappings to inactive
- * pages. After that, if a pv entry is still needed, destroy
- * mappings to active pages.
- */
- if (ratecheck(&lastprint, &printinterval))
- printf("Approaching the limit on PV entries, "
- "increase the vm.pmap.shpgperproc tunable.\n");
- vpq = &vm_page_queues[PQ_INACTIVE];
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PV_STAT(pv_entry_allocs++);
+ pv_entry_count++;
retry:
- TAILQ_FOREACH(m, &vpq->pl, pageq) {
- if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy)
- continue;
- TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) {
- va = pv->pv_va;
- pmap = pv->pv_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap)
- PMAP_LOCK(pmap);
- else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap))
- continue;
- pmap->pm_stats.resident_count--;
- oldpmap = pmap_switch(pmap);
- pte = pmap_find_vhpt(va);
- KASSERT(pte != NULL, ("pte"));
- pmap_remove_vhpt(va);
- pmap_invalidate_page(va);
- pmap_switch(oldpmap);
- if (pmap_accessed(pte))
- vm_page_aflag_set(m, PGA_REFERENCED);
- if (pmap_dirty(pte))
- vm_page_dirty(m);
- pmap_free_pte(pte, va);
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
- m->md.pv_list_count--;
- TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- if (allocated_pv == NULL)
- allocated_pv = pv;
- else
- free_pv_entry(pv);
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ if (pc != NULL) {
+ for (field = 0; field < _NPCM; field++) {
+ if (pc->pc_map[field]) {
+ bit = ffsl(pc->pc_map[field]) - 1;
+ break;
+ }
+ }
+ if (field < _NPCM) {
+ idx = field * sizeof(pc->pc_map[field]) * NBBY + bit;
+ pv = &pc->pc_pventry[idx];
+ pc->pc_map[field] &= ~(1ul << bit);
+ /* If this was the last item, move it to tail */
+ for (field = 0; field < _NPCM; field++)
+ if (pc->pc_map[field] != 0) {
+ PV_STAT(pv_entry_spare--);
+ return (pv); /* not full, return */
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(pv_entry_spare--);
+ return (pv);
}
- if (TAILQ_EMPTY(&m->md.pv_list))
- vm_page_aflag_clear(m, PGA_WRITEABLE);
}
- if (allocated_pv == NULL) {
- if (vpq == &vm_page_queues[PQ_INACTIVE]) {
- vpq = &vm_page_queues[PQ_ACTIVE];
- goto retry;
+ /* No free items, allocate another chunk */
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (m == NULL) {
+ if (try) {
+ pv_entry_count--;
+ PV_STAT(pc_chunk_tryfail++);
+ return (NULL);
}
- panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable");
+ m = pmap_pv_reclaim(pmap);
+ if (m == NULL)
+ goto retry;
}
- return (allocated_pv);
+ PV_STAT(pc_chunk_count++);
+ PV_STAT(pc_chunk_allocs++);
+ pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m));
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */
+ for (field = 1; field < _NPCM; field++)
+ pc->pc_map[field] = pc_freemask[field];
+ TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+ pv = &pc->pc_pventry[0];
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(pv_entry_spare += _NPCPV - 1);
+ return (pv);
}
/*
@@ -785,15 +987,10 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
pv_entry_t pv;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- if (pv_entry_count < pv_entry_high_water &&
- (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
- pv_entry_count++;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
+ if ((pv = get_pv_entry(pmap, TRUE)) != NULL) {
pv->pv_va = va;
- pv->pv_pmap = pmap;
- TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count++;
return (TRUE);
} else
return (FALSE);
@@ -899,28 +1096,21 @@ pmap_find_vhpt(vm_offset_t va)
static int
pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv)
{
+
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
if (!pv) {
- if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pmap == pv->pv_pmap && va == pv->pv_va)
- break;
- }
- } else {
- TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
- if (va == pv->pv_va)
- break;
- }
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+ if (pmap == PV_PMAP(pv) && va == pv->pv_va)
+ break;
}
}
if (pv) {
TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count--;
if (TAILQ_FIRST(&m->md.pv_list) == NULL)
vm_page_aflag_clear(m, PGA_WRITEABLE);
- TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
- free_pv_entry(pv);
+ free_pv_entry(pmap, pv);
return 0;
} else {
return ENOENT;
@@ -936,15 +1126,10 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
pv_entry_t pv;
- pv = get_pv_entry(pmap);
- pv->pv_pmap = pmap;
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
+ pv = get_pv_entry(pmap, FALSE);
pv->pv_va = va;
-
- PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
- m->md.pv_list_count++;
}
/*
@@ -1317,53 +1502,32 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
*
* It is assumed that the start and end are properly
* rounded to the page size.
+ *
+ * Sparsely used ranges are inefficiently removed. The VHPT is
+ * probed for every page within the range. XXX
*/
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
pmap_t oldpmap;
vm_offset_t va;
- pv_entry_t npv, pv;
struct ia64_lpte *pte;
+ /*
+ * Perform an unsynchronized read. This is, however, safe.
+ */
if (pmap->pm_stats.resident_count == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
-
- /*
- * special handling of removing one page. a very
- * common operation and easy to short circuit some
- * code.
- */
- if (sva + PAGE_SIZE == eva) {
- pte = pmap_find_vhpt(sva);
+ for (va = sva; va < eva; va += PAGE_SIZE) {
+ pte = pmap_find_vhpt(va);
if (pte != NULL)
- pmap_remove_pte(pmap, pte, sva, 0, 1);
- goto out;
- }
-
- if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) {
- TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) {
- va = pv->pv_va;
- if (va >= sva && va < eva) {
- pte = pmap_find_vhpt(va);
- KASSERT(pte != NULL, ("pte"));
- pmap_remove_pte(pmap, pte, va, pv, 1);
- }
- }
- } else {
- for (va = sva; va < eva; va += PAGE_SIZE) {
- pte = pmap_find_vhpt(va);
- if (pte != NULL)
- pmap_remove_pte(pmap, pte, va, 0, 1);
- }
+ pmap_remove_pte(pmap, pte, va, 0, 1);
}
-
-out:
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
}
@@ -1389,10 +1553,10 @@ pmap_remove_all(vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
struct ia64_lpte *pte;
- pmap_t pmap = pv->pv_pmap;
+ pmap_t pmap = PV_PMAP(pv);
vm_offset_t va = pv->pv_va;
PMAP_LOCK(pmap);
@@ -1406,7 +1570,7 @@ pmap_remove_all(vm_page_t m)
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -1485,7 +1649,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
struct ia64_lpte *pte;
boolean_t icache_inval, managed;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
@@ -1500,9 +1664,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
while ((pte = pmap_find_pte(va)) == NULL) {
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
VM_WAIT;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
}
@@ -1591,7 +1755,7 @@ validate:
if ((prot & VM_PROT_WRITE) != 0 && managed)
vm_page_aflag_set(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
}
@@ -1619,14 +1783,14 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
psize = atop(end - start);
m = m_start;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot);
m = TAILQ_NEXT(m, listq);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
}
@@ -1645,11 +1809,11 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
pmap_t oldpmap;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
pmap_enter_quick_locked(pmap, va, m, prot);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
}
@@ -1664,7 +1828,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ rw_assert(&pvh_global_lock, RA_WLOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((pte = pmap_find_pte(va)) == NULL)
@@ -1845,9 +2009,9 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- if (pv->pv_pmap == pmap) {
+ if (PV_PMAP(pv) == pmap) {
rv = TRUE;
break;
}
@@ -1855,7 +2019,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
if (loops >= 16)
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -1876,9 +2040,9 @@ pmap_page_wired_mappings(vm_page_t m)
count = 0;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (count);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- pmap = pv->pv_pmap;
+ pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
@@ -1888,7 +2052,7 @@ pmap_page_wired_mappings(vm_page_t m)
pmap_switch(oldpmap);
PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (count);
}
@@ -1903,33 +2067,62 @@ pmap_page_wired_mappings(vm_page_t m)
void
pmap_remove_pages(pmap_t pmap)
{
- pmap_t oldpmap;
- pv_entry_t pv, npv;
+ struct pv_chunk *pc, *npc;
+ struct ia64_lpte *pte;
+ pv_entry_t pv;
+ vm_offset_t va;
+ vm_page_t m;
+ u_long inuse, bitmask;
+ int allfree, bit, field, idx;
if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
printf("warning: %s called with non-current pmap\n",
__func__);
return;
}
-
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
PMAP_LOCK(pmap);
- oldpmap = pmap_switch(pmap);
-
- for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
- struct ia64_lpte *pte;
-
- npv = TAILQ_NEXT(pv, pv_plist);
-
- pte = pmap_find_vhpt(pv->pv_va);
- KASSERT(pte != NULL, ("pte"));
- if (!pmap_wired(pte))
- pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1);
+ TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
+ allfree = 1;
+ for (field = 0; field < _NPCM; field++) {
+ inuse = ~pc->pc_map[field] & pc_freemask[field];
+ while (inuse != 0) {
+ bit = ffsl(inuse) - 1;
+ bitmask = 1UL << bit;
+ idx = field * sizeof(inuse) * NBBY + bit;
+ pv = &pc->pc_pventry[idx];
+ inuse &= ~bitmask;
+ va = pv->pv_va;
+ pte = pmap_find_vhpt(va);
+ KASSERT(pte != NULL, ("pte"));
+ if (pmap_wired(pte)) {
+ allfree = 0;
+ continue;
+ }
+ pmap_remove_vhpt(va);
+ pmap_invalidate_page(va);
+ m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
+ if (pmap_dirty(pte))
+ vm_page_dirty(m);
+ pmap_free_pte(pte, va);
+ /* Mark free */
+ PV_STAT(pv_entry_frees++);
+ PV_STAT(pv_entry_spare++);
+ pv_entry_count--;
+ pc->pc_map[field] |= bitmask;
+ pmap->pm_stats.resident_count--;
+ TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+ if (TAILQ_EMPTY(&m->md.pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ if (allfree) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ free_pv_chunk(pc);
+ }
}
-
- pmap_switch(oldpmap);
+ rw_wunlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
- vm_page_unlock_queues();
}
/*
@@ -1948,16 +2141,17 @@ int
pmap_ts_referenced(vm_page_t m)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
int count = 0;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
KASSERT(pte != NULL, ("pte"));
if (pmap_accessed(pte)) {
@@ -1966,9 +2160,9 @@ pmap_ts_referenced(vm_page_t m)
pmap_invalidate_page(pv->pv_va);
}
pmap_switch(oldpmap);
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (count);
}
@@ -1982,7 +2176,7 @@ boolean_t
pmap_is_modified(vm_page_t m)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
boolean_t rv;
@@ -1999,19 +2193,20 @@ pmap_is_modified(vm_page_t m)
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return (rv);
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
pmap_switch(oldpmap);
KASSERT(pte != NULL, ("pte"));
rv = pmap_dirty(pte) ? TRUE : FALSE;
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
if (rv)
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -2042,26 +2237,27 @@ boolean_t
pmap_is_referenced(vm_page_t m)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
boolean_t rv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_is_referenced: page %p is not managed", m));
rv = FALSE;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
pmap_switch(oldpmap);
KASSERT(pte != NULL, ("pte"));
rv = pmap_accessed(pte) ? TRUE : FALSE;
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
if (rv)
break;
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
return (rv);
}
@@ -2072,7 +2268,7 @@ void
pmap_clear_modify(vm_page_t m)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
@@ -2088,10 +2284,11 @@ pmap_clear_modify(vm_page_t m)
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
KASSERT(pte != NULL, ("pte"));
if (pmap_dirty(pte)) {
@@ -2099,9 +2296,9 @@ pmap_clear_modify(vm_page_t m)
pmap_invalidate_page(pv->pv_va);
}
pmap_switch(oldpmap);
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -2113,15 +2310,16 @@ void
pmap_clear_reference(vm_page_t m)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_reference: page %p is not managed", m));
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
KASSERT(pte != NULL, ("pte"));
if (pmap_accessed(pte)) {
@@ -2129,9 +2327,9 @@ pmap_clear_reference(vm_page_t m)
pmap_invalidate_page(pv->pv_va);
}
pmap_switch(oldpmap);
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -2157,9 +2355,9 @@ pmap_remove_write(vm_page_t m)
if ((m->oflags & VPO_BUSY) == 0 &&
(m->aflags & PGA_WRITEABLE) == 0)
return;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- pmap = pv->pv_pmap;
+ pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
@@ -2179,7 +2377,7 @@ pmap_remove_write(vm_page_t m)
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
}
/*
@@ -2249,23 +2447,24 @@ void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
struct ia64_lpte *pte;
- pmap_t oldpmap;
+ pmap_t oldpmap, pmap;
pv_entry_t pv;
void *va;
- vm_page_lock_queues();
+ rw_wlock(&pvh_global_lock);
m->md.memattr = ma;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- PMAP_LOCK(pv->pv_pmap);
- oldpmap = pmap_switch(pv->pv_pmap);
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
pte = pmap_find_vhpt(pv->pv_va);
KASSERT(pte != NULL, ("pte"));
pmap_pte_attr(pte, ma);
pmap_invalidate_page(pv->pv_va);
pmap_switch(oldpmap);
- PMAP_UNLOCK(pv->pv_pmap);
+ PMAP_UNLOCK(pmap);
}
- vm_page_unlock_queues();
+ rw_wunlock(&pvh_global_lock);
if (ma == VM_MEMATTR_UNCACHEABLE) {
#ifdef SMP
diff --git a/sys/ia64/include/pmap.h b/sys/ia64/include/pmap.h
index 3c4539f..fca4c78 100644
--- a/sys/ia64/include/pmap.h
+++ b/sys/ia64/include/pmap.h
@@ -64,16 +64,16 @@
* Pmap stuff
*/
struct pv_entry;
+struct pv_chunk;
struct md_page {
- int pv_list_count;
TAILQ_HEAD(,pv_entry) pv_list;
vm_memattr_t memattr;
};
struct pmap {
struct mtx pm_mtx;
- TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
+ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
uint32_t pm_rid[IA64_VM_MINKERN_REGION];
struct pmap_statistics pm_stats; /* pmap statistics */
};
@@ -101,12 +101,25 @@ extern struct pmap kernel_pmap_store;
* mappings of that page. An entry is a pv_entry_t, the list is pv_list.
*/
typedef struct pv_entry {
- pmap_t pv_pmap; /* pmap where mapping lies */
vm_offset_t pv_va; /* virtual address for mapping */
TAILQ_ENTRY(pv_entry) pv_list;
- TAILQ_ENTRY(pv_entry) pv_plist;
} *pv_entry_t;
+/*
+ * pv_entries are allocated in chunks per-process. This avoids the
+ * need to track per-pmap assignments.
+ */
+#define _NPCM 6
+#define _NPCPV 337
+struct pv_chunk {
+ pmap_t pc_pmap;
+ TAILQ_ENTRY(pv_chunk) pc_list;
+ u_long pc_map[_NPCM]; /* bitmap; 1 = free */
+ TAILQ_ENTRY(pv_chunk) pc_lru;
+ u_long pc_spare[2];
+ struct pv_entry pc_pventry[_NPCPV];
+};
+
#ifdef _KERNEL
extern vm_paddr_t phys_avail[];
OpenPOWER on IntegriCloud