diff options
-rw-r--r-- | sys/conf/options.ia64 | 2 | ||||
-rw-r--r-- | sys/ia64/ia64/pmap.c | 657 | ||||
-rw-r--r-- | sys/ia64/include/pmap.h | 21 |
3 files changed, 447 insertions, 233 deletions
diff --git a/sys/conf/options.ia64 b/sys/conf/options.ia64 index 6eefc8c..750aeca 100644 --- a/sys/conf/options.ia64 +++ b/sys/conf/options.ia64 @@ -11,6 +11,8 @@ UWX_TRACE_ENABLE opt_global.h COMPAT_FREEBSD32 opt_compat.h +PV_STATS opt_pmap.h + EXCEPTION_TRACING opt_xtrace.h VGA_ALT_SEQACCESS opt_vga.h diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index d824405..dd312c0 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -48,12 +48,15 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_pmap.h" + #include <sys/param.h> #include <sys/kernel.h> #include <sys/lock.h> #include <sys/mman.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/rwlock.h> #include <sys/smp.h> #include <sys/sysctl.h> #include <sys/systm.h> @@ -108,16 +111,18 @@ __FBSDID("$FreeBSD$"); /* XXX move to a header. */ extern uint64_t ia64_gateway_page[]; -#ifndef PMAP_SHPGPERPROC -#define PMAP_SHPGPERPROC 200 -#endif - #if !defined(DIAGNOSTIC) #define PMAP_INLINE __inline #else #define PMAP_INLINE #endif +#ifdef PV_STATS +#define PV_STAT(x) do { x ; } while (0) +#else +#define PV_STAT(x) do { } while (0) +#endif + #define pmap_accessed(lpte) ((lpte)->pte & PTE_ACCESSED) #define pmap_dirty(lpte) ((lpte)->pte & PTE_DIRTY) #define pmap_exec(lpte) ((lpte)->pte & PTE_AR_RX) @@ -187,10 +192,21 @@ static uint64_t *pmap_ridmap; struct mtx pmap_ridmutex; /* + * Isolate the global pv list lock from data and other locks to prevent false + * sharing within the cache. + */ +static struct { + struct rwlock lock; + char padding[CACHE_LINE_SIZE - sizeof(struct rwlock)]; +} pvh_global __aligned(CACHE_LINE_SIZE); + +#define pvh_global_lock pvh_global.lock + +/* * Data for the pv entry allocation mechanism */ -static uma_zone_t pvzone; -static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); +static int pv_entry_count; /* * Data for allocating PTEs for user processes. @@ -224,8 +240,10 @@ SYSCTL_PROC(_machdep_vhpt, OID_AUTO, population, CTLTYPE_INT | CTLFLAG_RD, static struct ia64_lpte *pmap_find_vhpt(vm_offset_t va); -static PMAP_INLINE void free_pv_entry(pv_entry_t pv); -static pv_entry_t get_pv_entry(pmap_t locked_pmap); +static void free_pv_chunk(struct pv_chunk *pc); +static void free_pv_entry(pmap_t pmap, pv_entry_t pv); +static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); +static vm_page_t pmap_pv_reclaim(pmap_t locked_pmap); static void pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); @@ -402,9 +420,14 @@ pmap_bootstrap() PMAP_LOCK_INIT(kernel_pmap); for (i = 0; i < IA64_VM_MINKERN_REGION; i++) kernel_pmap->pm_rid[i] = 0; - TAILQ_INIT(&kernel_pmap->pm_pvlist); + TAILQ_INIT(&kernel_pmap->pm_pvchunk); PCPU_SET(md.current_pmap, kernel_pmap); + /* + * Initialize the global pv list lock. + */ + rw_init(&pvh_global_lock, "pmap pv global"); + /* Region 5 is mapped via the VHPT. */ ia64_set_rr(IA64_RR_BASE(5), (5 << 8) | (PAGE_SHIFT << 2) | 1); @@ -449,7 +472,6 @@ pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); - m->md.pv_list_count = 0; m->md.memattr = VM_MEMATTR_DEFAULT; } @@ -461,19 +483,6 @@ pmap_page_init(vm_page_t m) void pmap_init(void) { - int shpgperproc = PMAP_SHPGPERPROC; - - /* - * Initialize the address space (zone) for the pv entries. Set a - * high water mark so that the system can recover from excessive - * numbers of pv entries. - */ - pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); - TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); - pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; - TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); - pv_entry_high_water = 9 * (pv_entry_max / 10); ptezone = uma_zcreate("PT ENTRY", sizeof (struct ia64_lpte), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM|UMA_ZONE_NOFREE); @@ -615,7 +624,7 @@ pmap_pinit(struct pmap *pmap) PMAP_LOCK_INIT(pmap); for (i = 0; i < IA64_VM_MINKERN_REGION; i++) pmap->pm_rid[i] = pmap_allocate_rid(); - TAILQ_INIT(&pmap->pm_pvlist); + TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); return (1); } @@ -683,14 +692,222 @@ pmap_growkernel(vm_offset_t addr) * page management routines. ***************************************************/ +CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); +CTASSERT(_NPCM == 6); +CTASSERT(_NPCPV == 337); + +static __inline struct pv_chunk * +pv_to_chunk(pv_entry_t pv) +{ + + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); +} + +#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) + +#define PC_FREE0_4 0xfffffffffffffffful +#define PC_FREE5 0x000000000001fffful + +static const u_long pc_freemask[_NPCM] = { + PC_FREE0_4, PC_FREE0_4, PC_FREE0_4, + PC_FREE0_4, PC_FREE0_4, PC_FREE5 +}; + +static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); + +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, + "Current number of pv entries"); + +#ifdef PV_STATS +static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; + +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, + "Current number of pv entry chunks"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, + "Current number of pv entry chunks allocated"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, + "Current number of pv entry chunks frees"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, + "Number of times tried to get a chunk page but failed."); + +static long pv_entry_frees, pv_entry_allocs; +static int pv_entry_spare; + +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, + "Current number of pv entry frees"); +SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, + "Current number of pv entry allocs"); +SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, + "Current number of spare pv entries"); +#endif + +/* + * We are in a serious low memory condition. Resort to + * drastic measures to free some pages so we can allocate + * another pv entry chunk. + */ +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) +{ + struct pch newtail; + struct pv_chunk *pc; + struct ia64_lpte *pte; + pmap_t pmap; + pv_entry_t pv; + vm_offset_t va; + vm_page_t m, m_pc; + u_long inuse; + int bit, field, freed, idx; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + m_pc = NULL; + TAILQ_INIT(&newtail); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + if (pmap != locked_pmap) { + pmap_switch(locked_pmap); + PMAP_UNLOCK(pmap); + } + } + pmap = pc->pc_pmap; + /* Avoid deadlock and lock recursion. */ + if (pmap > locked_pmap) + PMAP_LOCK(pmap); + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + pmap_switch(pmap); + } + + /* + * Destroy every non-wired, 8 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = ffsl(inuse) - 1; + idx = field * sizeof(inuse) * NBBY + bit; + pv = &pc->pc_pventry[idx]; + va = pv->pv_va; + pte = pmap_find_vhpt(va); + KASSERT(pte != NULL, ("pte")); + if (pmap_wired(pte)) + continue; + pmap_remove_vhpt(va); + pmap_invalidate_page(va); + m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + if (pmap_accessed(pte)) + vm_page_aflag_set(m, PGA_REFERENCED); + if (pmap_dirty(pte)) + vm_page_dirty(m); + pmap_free_pte(pte, va); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + pc->pc_map[field] |= 1UL << bit; + freed++; + } + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + /* Every freed mapping is for a 8 KB page. */ + pmap->pm_stats.resident_count -= freed; + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != pc_freemask[field]) { + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + + /* + * One freed pv entry in locked_pmap is + * sufficient. + */ + if (pmap == locked_pmap) + goto out; + break; + } + if (field == _NPCM) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m_pc = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc)); + break; + } + } +out: + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + if (pmap != locked_pmap) { + pmap_switch(locked_pmap); + PMAP_UNLOCK(pmap); + } + } + return (m_pc); +} + /* * free the pv_entry back to the free list */ -static PMAP_INLINE void -free_pv_entry(pv_entry_t pv) +static void +free_pv_entry(pmap_t pmap, pv_entry_t pv) { + struct pv_chunk *pc; + int bit, field, idx; + + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(pv_entry_frees++); + PV_STAT(pv_entry_spare++); pv_entry_count--; - uma_zfree(pvzone, pv); + pc = pv_to_chunk(pv); + idx = pv - &pc->pc_pventry[0]; + field = idx / (sizeof(u_long) * NBBY); + bit = idx % (sizeof(u_long) * NBBY); + pc->pc_map[field] |= 1ul << bit; + for (idx = 0; idx < _NPCM; idx++) + if (pc->pc_map[idx] != pc_freemask[idx]) { + /* + * 98% of the time, pc is already at the head of the + * list. If it isn't already, move it to the head. + */ + if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != + pc)) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, + pc_list); + } + return; + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); +} + +static void +free_pv_chunk(struct pv_chunk *pc) +{ + vm_page_t m; + + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* entire chunk is free, return it */ + m = PHYS_TO_VM_PAGE(IA64_RR_MASK((vm_offset_t)pc)); + vm_page_unwire(m, 0); + vm_page_free(m); } /* @@ -698,82 +915,67 @@ free_pv_entry(pv_entry_t pv) * when needed. */ static pv_entry_t -get_pv_entry(pmap_t locked_pmap) +get_pv_entry(pmap_t pmap, boolean_t try) { - static const struct timeval printinterval = { 60, 0 }; - static struct timeval lastprint; - struct vpgqueues *vpq; - struct ia64_lpte *pte; - pmap_t oldpmap, pmap; - pv_entry_t allocated_pv, next_pv, pv; - vm_offset_t va; + struct pv_chunk *pc; + pv_entry_t pv; vm_page_t m; + int bit, field, idx; - PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - allocated_pv = uma_zalloc(pvzone, M_NOWAIT); - if (allocated_pv != NULL) { - pv_entry_count++; - if (pv_entry_count > pv_entry_high_water) - pagedaemon_wakeup(); - else - return (allocated_pv); - } - - /* - * Reclaim pv entries: At first, destroy mappings to inactive - * pages. After that, if a pv entry is still needed, destroy - * mappings to active pages. - */ - if (ratecheck(&lastprint, &printinterval)) - printf("Approaching the limit on PV entries, " - "increase the vm.pmap.shpgperproc tunable.\n"); - vpq = &vm_page_queues[PQ_INACTIVE]; + rw_assert(&pvh_global_lock, RA_WLOCKED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + PV_STAT(pv_entry_allocs++); + pv_entry_count++; retry: - TAILQ_FOREACH(m, &vpq->pl, pageq) { - if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) - continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - va = pv->pv_va; - pmap = pv->pv_pmap; - /* Avoid deadlock and lock recursion. */ - if (pmap > locked_pmap) - PMAP_LOCK(pmap); - else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) - continue; - pmap->pm_stats.resident_count--; - oldpmap = pmap_switch(pmap); - pte = pmap_find_vhpt(va); - KASSERT(pte != NULL, ("pte")); - pmap_remove_vhpt(va); - pmap_invalidate_page(va); - pmap_switch(oldpmap); - if (pmap_accessed(pte)) - vm_page_aflag_set(m, PGA_REFERENCED); - if (pmap_dirty(pte)) - vm_page_dirty(m); - pmap_free_pte(pte, va); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - m->md.pv_list_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (pmap != locked_pmap) - PMAP_UNLOCK(pmap); - if (allocated_pv == NULL) - allocated_pv = pv; - else - free_pv_entry(pv); + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + if (pc != NULL) { + for (field = 0; field < _NPCM; field++) { + if (pc->pc_map[field]) { + bit = ffsl(pc->pc_map[field]) - 1; + break; + } + } + if (field < _NPCM) { + idx = field * sizeof(pc->pc_map[field]) * NBBY + bit; + pv = &pc->pc_pventry[idx]; + pc->pc_map[field] &= ~(1ul << bit); + /* If this was the last item, move it to tail */ + for (field = 0; field < _NPCM; field++) + if (pc->pc_map[field] != 0) { + PV_STAT(pv_entry_spare--); + return (pv); /* not full, return */ + } + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + PV_STAT(pv_entry_spare--); + return (pv); } - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); } - if (allocated_pv == NULL) { - if (vpq == &vm_page_queues[PQ_INACTIVE]) { - vpq = &vm_page_queues[PQ_ACTIVE]; - goto retry; + /* No free items, allocate another chunk */ + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED); + if (m == NULL) { + if (try) { + pv_entry_count--; + PV_STAT(pc_chunk_tryfail++); + return (NULL); } - panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; } - return (allocated_pv); + PV_STAT(pc_chunk_count++); + PV_STAT(pc_chunk_allocs++); + pc = (struct pv_chunk *)IA64_PHYS_TO_RR7(VM_PAGE_TO_PHYS(m)); + pc->pc_pmap = pmap; + pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ + for (field = 1; field < _NPCM; field++) + pc->pc_map[field] = pc_freemask[field]; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + pv = &pc->pc_pventry[0]; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + PV_STAT(pv_entry_spare += _NPCPV - 1); + return (pv); } /* @@ -785,15 +987,10 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (pv_entry_count < pv_entry_high_water && - (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { - pv_entry_count++; + rw_assert(&pvh_global_lock, RA_WLOCKED); + if ((pv = get_pv_entry(pmap, TRUE)) != NULL) { pv->pv_va = va; - pv->pv_pmap = pmap; - TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count++; return (TRUE); } else return (FALSE); @@ -899,28 +1096,21 @@ pmap_find_vhpt(vm_offset_t va) static int pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pv_entry_t pv) { + + rw_assert(&pvh_global_lock, RA_WLOCKED); if (!pv) { - if (m->md.pv_list_count < pmap->pm_stats.resident_count) { - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pmap == pv->pv_pmap && va == pv->pv_va) - break; - } - } else { - TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { - if (va == pv->pv_va) - break; - } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pmap == PV_PMAP(pv) && va == pv->pv_va) + break; } } if (pv) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL) vm_page_aflag_clear(m, PGA_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - free_pv_entry(pv); + free_pv_entry(pmap, pv); return 0; } else { return ENOENT; @@ -936,15 +1126,10 @@ pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) { pv_entry_t pv; - pv = get_pv_entry(pmap); - pv->pv_pmap = pmap; + rw_assert(&pvh_global_lock, RA_WLOCKED); + pv = get_pv_entry(pmap, FALSE); pv->pv_va = va; - - PMAP_LOCK_ASSERT(pmap, MA_OWNED); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); - m->md.pv_list_count++; } /* @@ -1317,53 +1502,32 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) * * It is assumed that the start and end are properly * rounded to the page size. + * + * Sparsely used ranges are inefficiently removed. The VHPT is + * probed for every page within the range. XXX */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { pmap_t oldpmap; vm_offset_t va; - pv_entry_t npv, pv; struct ia64_lpte *pte; + /* + * Perform an unsynchronized read. This is, however, safe. + */ if (pmap->pm_stats.resident_count == 0) return; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); - - /* - * special handling of removing one page. a very - * common operation and easy to short circuit some - * code. - */ - if (sva + PAGE_SIZE == eva) { - pte = pmap_find_vhpt(sva); + for (va = sva; va < eva; va += PAGE_SIZE) { + pte = pmap_find_vhpt(va); if (pte != NULL) - pmap_remove_pte(pmap, pte, sva, 0, 1); - goto out; - } - - if (pmap->pm_stats.resident_count < ((eva - sva) >> PAGE_SHIFT)) { - TAILQ_FOREACH_SAFE(pv, &pmap->pm_pvlist, pv_plist, npv) { - va = pv->pv_va; - if (va >= sva && va < eva) { - pte = pmap_find_vhpt(va); - KASSERT(pte != NULL, ("pte")); - pmap_remove_pte(pmap, pte, va, pv, 1); - } - } - } else { - for (va = sva; va < eva; va += PAGE_SIZE) { - pte = pmap_find_vhpt(va); - if (pte != NULL) - pmap_remove_pte(pmap, pte, va, 0, 1); - } + pmap_remove_pte(pmap, pte, va, 0, 1); } - -out: - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1389,10 +1553,10 @@ pmap_remove_all(vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { struct ia64_lpte *pte; - pmap_t pmap = pv->pv_pmap; + pmap_t pmap = PV_PMAP(pv); vm_offset_t va = pv->pv_va; PMAP_LOCK(pmap); @@ -1406,7 +1570,7 @@ pmap_remove_all(vm_page_t m) PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); } /* @@ -1485,7 +1649,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, struct ia64_lpte *pte; boolean_t icache_inval, managed; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); @@ -1500,9 +1664,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, while ((pte = pmap_find_pte(va)) == NULL) { pmap_switch(oldpmap); PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); VM_WAIT; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); } @@ -1591,7 +1755,7 @@ validate: if ((prot & VM_PROT_WRITE) != 0 && managed) vm_page_aflag_set(m, PGA_WRITEABLE); - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1619,14 +1783,14 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); psize = atop(end - start); m = m_start; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { pmap_enter_quick_locked(pmap, start + ptoa(diff), m, prot); m = TAILQ_NEXT(m, listq); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1645,11 +1809,11 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { pmap_t oldpmap; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); pmap_enter_quick_locked(pmap, va, m, prot); - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } @@ -1664,7 +1828,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + rw_assert(&pvh_global_lock, RA_WLOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pte = pmap_find_pte(va)) == NULL) @@ -1845,9 +2009,9 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { + if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } @@ -1855,7 +2019,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) if (loops >= 16) break; } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); return (rv); } @@ -1876,9 +2040,9 @@ pmap_page_wired_mappings(vm_page_t m) count = 0; if ((m->oflags & VPO_UNMANAGED) != 0) return (count); - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - pmap = pv->pv_pmap; + pmap = PV_PMAP(pv); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); @@ -1888,7 +2052,7 @@ pmap_page_wired_mappings(vm_page_t m) pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); return (count); } @@ -1903,33 +2067,62 @@ pmap_page_wired_mappings(vm_page_t m) void pmap_remove_pages(pmap_t pmap) { - pmap_t oldpmap; - pv_entry_t pv, npv; + struct pv_chunk *pc, *npc; + struct ia64_lpte *pte; + pv_entry_t pv; + vm_offset_t va; + vm_page_t m; + u_long inuse, bitmask; + int allfree, bit, field, idx; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { printf("warning: %s called with non-current pmap\n", __func__); return; } - - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - oldpmap = pmap_switch(pmap); - - for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { - struct ia64_lpte *pte; - - npv = TAILQ_NEXT(pv, pv_plist); - - pte = pmap_find_vhpt(pv->pv_va); - KASSERT(pte != NULL, ("pte")); - if (!pmap_wired(pte)) - pmap_remove_pte(pmap, pte, pv->pv_va, pv, 1); + TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { + allfree = 1; + for (field = 0; field < _NPCM; field++) { + inuse = ~pc->pc_map[field] & pc_freemask[field]; + while (inuse != 0) { + bit = ffsl(inuse) - 1; + bitmask = 1UL << bit; + idx = field * sizeof(inuse) * NBBY + bit; + pv = &pc->pc_pventry[idx]; + inuse &= ~bitmask; + va = pv->pv_va; + pte = pmap_find_vhpt(va); + KASSERT(pte != NULL, ("pte")); + if (pmap_wired(pte)) { + allfree = 0; + continue; + } + pmap_remove_vhpt(va); + pmap_invalidate_page(va); + m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + if (pmap_dirty(pte)) + vm_page_dirty(m); + pmap_free_pte(pte, va); + /* Mark free */ + PV_STAT(pv_entry_frees++); + PV_STAT(pv_entry_spare++); + pv_entry_count--; + pc->pc_map[field] |= bitmask; + pmap->pm_stats.resident_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_aflag_clear(m, PGA_WRITEABLE); + } + } + if (allfree) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + free_pv_chunk(pc); + } } - - pmap_switch(oldpmap); + rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); } /* @@ -1948,16 +2141,17 @@ int pmap_ts_referenced(vm_page_t m) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; int count = 0; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL, ("pte")); if (pmap_accessed(pte)) { @@ -1966,9 +2160,9 @@ pmap_ts_referenced(vm_page_t m) pmap_invalidate_page(pv->pv_va); } pmap_switch(oldpmap); - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); return (count); } @@ -1982,7 +2176,7 @@ boolean_t pmap_is_modified(vm_page_t m) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; boolean_t rv; @@ -1999,19 +2193,20 @@ pmap_is_modified(vm_page_t m) if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return (rv); - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); pmap_switch(oldpmap); KASSERT(pte != NULL, ("pte")); rv = pmap_dirty(pte) ? TRUE : FALSE; - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); if (rv) break; } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); return (rv); } @@ -2042,26 +2237,27 @@ boolean_t pmap_is_referenced(vm_page_t m) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); rv = FALSE; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); pmap_switch(oldpmap); KASSERT(pte != NULL, ("pte")); rv = pmap_accessed(pte) ? TRUE : FALSE; - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); if (rv) break; } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); return (rv); } @@ -2072,7 +2268,7 @@ void pmap_clear_modify(vm_page_t m) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -2088,10 +2284,11 @@ pmap_clear_modify(vm_page_t m) */ if ((m->aflags & PGA_WRITEABLE) == 0) return; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL, ("pte")); if (pmap_dirty(pte)) { @@ -2099,9 +2296,9 @@ pmap_clear_modify(vm_page_t m) pmap_invalidate_page(pv->pv_va); } pmap_switch(oldpmap); - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); } /* @@ -2113,15 +2310,16 @@ void pmap_clear_reference(vm_page_t m) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_reference: page %p is not managed", m)); - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL, ("pte")); if (pmap_accessed(pte)) { @@ -2129,9 +2327,9 @@ pmap_clear_reference(vm_page_t m) pmap_invalidate_page(pv->pv_va); } pmap_switch(oldpmap); - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); } /* @@ -2157,9 +2355,9 @@ pmap_remove_write(vm_page_t m) if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0) return; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - pmap = pv->pv_pmap; + pmap = PV_PMAP(pv); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); @@ -2179,7 +2377,7 @@ pmap_remove_write(vm_page_t m) PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); } /* @@ -2249,23 +2447,24 @@ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { struct ia64_lpte *pte; - pmap_t oldpmap; + pmap_t oldpmap, pmap; pv_entry_t pv; void *va; - vm_page_lock_queues(); + rw_wlock(&pvh_global_lock); m->md.memattr = ma; TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - PMAP_LOCK(pv->pv_pmap); - oldpmap = pmap_switch(pv->pv_pmap); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); pte = pmap_find_vhpt(pv->pv_va); KASSERT(pte != NULL, ("pte")); pmap_pte_attr(pte, ma); pmap_invalidate_page(pv->pv_va); pmap_switch(oldpmap); - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } - vm_page_unlock_queues(); + rw_wunlock(&pvh_global_lock); if (ma == VM_MEMATTR_UNCACHEABLE) { #ifdef SMP diff --git a/sys/ia64/include/pmap.h b/sys/ia64/include/pmap.h index 3c4539f..fca4c78 100644 --- a/sys/ia64/include/pmap.h +++ b/sys/ia64/include/pmap.h @@ -64,16 +64,16 @@ * Pmap stuff */ struct pv_entry; +struct pv_chunk; struct md_page { - int pv_list_count; TAILQ_HEAD(,pv_entry) pv_list; vm_memattr_t memattr; }; struct pmap { struct mtx pm_mtx; - TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ + TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ uint32_t pm_rid[IA64_VM_MINKERN_REGION]; struct pmap_statistics pm_stats; /* pmap statistics */ }; @@ -101,12 +101,25 @@ extern struct pmap kernel_pmap_store; * mappings of that page. An entry is a pv_entry_t, the list is pv_list. */ typedef struct pv_entry { - pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; - TAILQ_ENTRY(pv_entry) pv_plist; } *pv_entry_t; +/* + * pv_entries are allocated in chunks per-process. This avoids the + * need to track per-pmap assignments. + */ +#define _NPCM 6 +#define _NPCPV 337 +struct pv_chunk { + pmap_t pc_pmap; + TAILQ_ENTRY(pv_chunk) pc_list; + u_long pc_map[_NPCM]; /* bitmap; 1 = free */ + TAILQ_ENTRY(pv_chunk) pc_lru; + u_long pc_spare[2]; + struct pv_entry pc_pventry[_NPCPV]; +}; + #ifdef _KERNEL extern vm_paddr_t phys_avail[]; |