diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/pmap.c | 122 | ||||
-rw-r--r-- | sys/arm/arm/pmap-v6.c | 8 | ||||
-rw-r--r-- | sys/arm/arm/pmap.c | 8 | ||||
-rw-r--r-- | sys/i386/i386/pmap.c | 106 | ||||
-rw-r--r-- | sys/i386/xen/pmap.c | 66 | ||||
-rw-r--r-- | sys/ia64/ia64/pmap.c | 44 | ||||
-rw-r--r-- | sys/mips/mips/pmap.c | 8 | ||||
-rw-r--r-- | sys/powerpc/powerpc/mmu_if.m | 19 | ||||
-rw-r--r-- | sys/powerpc/powerpc/pmap_dispatch.c | 9 | ||||
-rw-r--r-- | sys/sparc64/sparc64/pmap.c | 8 | ||||
-rw-r--r-- | sys/vm/pmap.h | 2 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 21 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 9 |
13 files changed, 419 insertions, 11 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 7fb1277..851f92a 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -4967,6 +4967,128 @@ out: } /* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + struct rwlock *lock; + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t oldpde, *pde; + pt_entry_t *pte; + vm_offset_t va_next; + vm_page_t m; + boolean_t anychanged, pv_lists_locked; + + if (advice != MADV_DONTNEED && advice != MADV_FREE) + return; + pv_lists_locked = FALSE; +resume: + anychanged = FALSE; + PMAP_LOCK(pmap); + for (; sva < eva; sva = va_next) { + pml4e = pmap_pml4e(pmap, sva); + if ((*pml4e & PG_V) == 0) { + va_next = (sva + NBPML4) & ~PML4MASK; + if (va_next < sva) + va_next = eva; + continue; + } + pdpe = pmap_pml4e_to_pdpe(pml4e, sva); + if ((*pdpe & PG_V) == 0) { + va_next = (sva + NBPDP) & ~PDPMASK; + if (va_next < sva) + va_next = eva; + continue; + } + va_next = (sva + NBPDR) & ~PDRMASK; + if (va_next < sva) + va_next = eva; + pde = pmap_pdpe_to_pde(pdpe, sva); + oldpde = *pde; + if ((oldpde & PG_V) == 0) + continue; + else if ((oldpde & PG_PS) != 0) { + if ((oldpde & PG_MANAGED) == 0) + continue; + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_rlock(&pvh_global_lock)) { + if (anychanged) + pmap_invalidate_all(pmap); + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + goto resume; + } + } + lock = NULL; + if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { + if (lock != NULL) + rw_wunlock(lock); + + /* + * The large page mapping was destroyed. + */ + continue; + } + + /* + * Unless the page mappings are wired, remove the + * mapping to a single page so that a subsequent + * access may repromote. Since the underlying page + * table page is fully populated, this removal never + * frees a page table page. + */ + if ((oldpde & PG_W) == 0) { + pte = pmap_pde_to_pte(pde, sva); + KASSERT((*pte & PG_V) != 0, + ("pmap_advise: invalid PTE")); + pmap_remove_pte(pmap, pte, sva, *pde, NULL, + &lock); + anychanged = TRUE; + } + if (lock != NULL) + rw_wunlock(lock); + } + if (va_next > eva) + va_next = eva; + for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, + sva += PAGE_SIZE) { + if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | + PG_V)) + continue; + else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() + * can be avoided by making the page + * dirty now. + */ + m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); + vm_page_dirty(m); + } + atomic_clear_long(pte, PG_M | PG_A); + } else if ((*pte & PG_A) != 0) + atomic_clear_long(pte, PG_A); + else + continue; + if ((*pte & PG_G) != 0) + pmap_invalidate_page(pmap, sva); + else + anychanged = TRUE; + } + } + if (anychanged) + pmap_invalidate_all(pmap); + if (pv_lists_locked) + rw_runlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 08763b8..b9447d3 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -4767,6 +4767,14 @@ pmap_is_modified(vm_page_t m) } /* + * This function is advisory. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index f232b07..bc7912d 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -4516,6 +4516,14 @@ pmap_page_wired_mappings(vm_page_t m) } /* + * This function is advisory. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ +} + +/* * pmap_ts_referenced: * * Return the count of reference bits for a page, clearing all of them. diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 8fa9026..f09abee 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -4834,6 +4834,112 @@ out: } /* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + pd_entry_t oldpde, *pde; + pt_entry_t *pte; + vm_offset_t pdnxt; + vm_page_t m; + boolean_t anychanged, pv_lists_locked; + + if (advice != MADV_DONTNEED && advice != MADV_FREE) + return; + if (pmap_is_current(pmap)) + pv_lists_locked = FALSE; + else { + pv_lists_locked = TRUE; +resume: + rw_wlock(&pvh_global_lock); + sched_pin(); + } + anychanged = FALSE; + PMAP_LOCK(pmap); + for (; sva < eva; sva = pdnxt) { + pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; + pde = pmap_pde(pmap, sva); + oldpde = *pde; + if ((oldpde & PG_V) == 0) + continue; + else if ((oldpde & PG_PS) != 0) { + if ((oldpde & PG_MANAGED) == 0) + continue; + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_wlock(&pvh_global_lock)) { + if (anychanged) + pmap_invalidate_all(pmap); + PMAP_UNLOCK(pmap); + goto resume; + } + sched_pin(); + } + if (!pmap_demote_pde(pmap, pde, sva)) { + /* + * The large page mapping was destroyed. + */ + continue; + } + + /* + * Unless the page mappings are wired, remove the + * mapping to a single page so that a subsequent + * access may repromote. Since the underlying page + * table page is fully populated, this removal never + * frees a page table page. + */ + if ((oldpde & PG_W) == 0) { + pte = pmap_pte_quick(pmap, sva); + KASSERT((*pte & PG_V) != 0, + ("pmap_advise: invalid PTE")); + pmap_remove_pte(pmap, pte, sva, NULL); + anychanged = TRUE; + } + } + if (pdnxt > eva) + pdnxt = eva; + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | + PG_V)) + continue; + else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() + * can be avoided by making the page + * dirty now. + */ + m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); + vm_page_dirty(m); + } + atomic_clear_int((u_int *)pte, PG_M | PG_A); + } else if ((*pte & PG_A) != 0) + atomic_clear_int((u_int *)pte, PG_A); + else + continue; + if ((*pte & PG_G) != 0) + pmap_invalidate_page(pmap, sva); + else + anychanged = TRUE; + } + } + if (anychanged) + pmap_invalidate_all(pmap); + if (pv_lists_locked) { + sched_unpin(); + rw_wunlock(&pvh_global_lock); + } + PMAP_UNLOCK(pmap); +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index c34fe29..3abe7ef 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -3914,6 +3914,72 @@ pmap_ts_referenced(vm_page_t m) } /* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + pd_entry_t oldpde; + pt_entry_t *pte; + vm_offset_t pdnxt; + vm_page_t m; + boolean_t anychanged; + + if (advice != MADV_DONTNEED && advice != MADV_FREE) + return; + anychanged = FALSE; + rw_wlock(&pvh_global_lock); + sched_pin(); + PMAP_LOCK(pmap); + for (; sva < eva; sva = pdnxt) { + pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; + oldpde = pmap->pm_pdir[sva >> PDRSHIFT]; + if ((oldpde & (PG_PS | PG_V)) != PG_V) + continue; + if (pdnxt > eva) + pdnxt = eva; + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | + PG_V)) + continue; + else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() + * can be avoided by making the page + * dirty now. + */ + m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) & + PG_FRAME); + vm_page_dirty(m); + } + PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE); + } else if ((*pte & PG_A) != 0) + PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE); + else + continue; + if ((*pte & PG_G) != 0) + pmap_invalidate_page(pmap, sva); + else + anychanged = TRUE; + } + } + PT_UPDATES_FLUSH(); + if (*PMAP1) + PT_SET_VA_MA(PMAP1, 0, TRUE); + if (anychanged) + pmap_invalidate_all(pmap); + sched_unpin(); + rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index b23c77c..442149f 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -2310,6 +2310,50 @@ pmap_is_referenced(vm_page_t m) } /* + * Apply the given advice to the specified range of addresses within the + * given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ + struct ia64_lpte *pte; + pmap_t oldpmap; + vm_page_t m; + + PMAP_LOCK(pmap); + oldpmap = pmap_switch(pmap); + for (; sva < eva; sva += PAGE_SIZE) { + /* If page is invalid, skip this page. */ + pte = pmap_find_vhpt(sva); + if (pte == NULL) + continue; + + /* If it isn't managed, skip it too. */ + if (!pmap_managed(pte)) + continue; + + /* Clear its modified and referenced bits. */ + if (pmap_dirty(pte)) { + if (advice == MADV_DONTNEED) { + /* + * Future calls to pmap_is_modified() can be + * avoided by making the page dirty now. + */ + m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + vm_page_dirty(m); + } + pmap_clear_dirty(pte); + } else if (!pmap_accessed(pte)) + continue; + pmap_clear_accessed(pte); + pmap_invalidate_page(sva); + } + pmap_switch(oldpmap); + PMAP_UNLOCK(pmap); +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index fb22a89..90994cc 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -2914,6 +2914,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) } /* + * This function is advisory. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ +} + +/* * Clear the modify bits on the specified physical page. */ void diff --git a/sys/powerpc/powerpc/mmu_if.m b/sys/powerpc/powerpc/mmu_if.m index 0382bd8..f9f37cb 100644 --- a/sys/powerpc/powerpc/mmu_if.m +++ b/sys/powerpc/powerpc/mmu_if.m @@ -133,6 +133,25 @@ CODE { /** + * @brief Apply the given advice to the specified range of addresses within + * the given pmap. Depending on the advice, clear the referenced and/or + * modified flags in each mapping and set the mapped page's dirty field. + * + * @param _pmap physical map + * @param _start virtual range start + * @param _end virtual range end + * @param _advice advice to apply + */ +METHOD void advise { + mmu_t _mmu; + pmap_t _pmap; + vm_offset_t _start; + vm_offset_t _end; + int _advice; +}; + + +/** * @brief Change the wiring attribute for the page in the given physical * map and virtual address. * diff --git a/sys/powerpc/powerpc/pmap_dispatch.c b/sys/powerpc/powerpc/pmap_dispatch.c index 7fd98f4..24e6076 100644 --- a/sys/powerpc/powerpc/pmap_dispatch.c +++ b/sys/powerpc/powerpc/pmap_dispatch.c @@ -91,6 +91,15 @@ RB_GENERATE(pvo_tree, pvo_entry, pvo_plink, pvo_vaddr_compare); void +pmap_advise(pmap_t pmap, vm_offset_t start, vm_offset_t end, int advice) +{ + + CTR5(KTR_PMAP, "%s(%p, %#x, %#x, %d)", __func__, pmap, start, end, + advice); + MMU_ADVISE(mmu_obj, pmap, start, end, advice); +} + +void pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) { diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 47f2c49..a84e4c3 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -2126,6 +2126,14 @@ pmap_is_referenced(vm_page_t m) return (rv); } +/* + * This function is advisory. + */ +void +pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) +{ +} + void pmap_clear_modify(vm_page_t m) { diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index c0f80a7..911298f 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -98,6 +98,8 @@ struct thread; extern vm_offset_t kernel_vm_end; void pmap_activate(struct thread *td); +void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, + int advice); void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t); void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 71c44ad..1be62af 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2125,7 +2125,7 @@ vm_map_madvise( (current != &map->header) && (current->start < end); current = current->next ) { - vm_offset_t useStart; + vm_offset_t useEnd, useStart; if (current->eflags & MAP_ENTRY_IS_SUB_MAP) continue; @@ -2133,17 +2133,34 @@ vm_map_madvise( pstart = OFF_TO_IDX(current->offset); pend = pstart + atop(current->end - current->start); useStart = current->start; + useEnd = current->end; if (current->start < start) { pstart += atop(start - current->start); useStart = start; } - if (current->end > end) + if (current->end > end) { pend -= atop(current->end - end); + useEnd = end; + } if (pstart >= pend) continue; + /* + * Perform the pmap_advise() before clearing + * PGA_REFERENCED in vm_page_advise(). Otherwise, a + * concurrent pmap operation, such as pmap_remove(), + * could clear a reference in the pmap and set + * PGA_REFERENCED on the page before the pmap_advise() + * had completed. Consequently, the page would appear + * referenced based upon an old reference that + * occurred before this pmap_advise() ran. + */ + if (behav == MADV_DONTNEED || behav == MADV_FREE) + pmap_advise(map->pmap, useStart, useEnd, + behav); + vm_object_madvise(current->object.vm_object, pstart, pend, behav); if (behav == MADV_WILLNEED) { diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 7d8ecfa..7b4b57c 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2634,7 +2634,6 @@ vm_page_advise(vm_page_t m, int advice) * But we do make the page is freeable as we can without * actually taking the step of unmapping it. */ - pmap_clear_modify(m); m->dirty = 0; m->act_count = 0; } else if (advice != MADV_DONTNEED) @@ -2654,15 +2653,7 @@ vm_page_advise(vm_page_t m, int advice) /* * Clear any references to the page. Otherwise, the page daemon will * immediately reactivate the page. - * - * Perform the pmap_clear_reference() first. Otherwise, a concurrent - * pmap operation, such as pmap_remove(), could clear a reference in - * the pmap and set PGA_REFERENCED on the page before the - * pmap_clear_reference() had completed. Consequently, the page would - * appear referenced based upon an old reference that occurred before - * this function ran. */ - pmap_clear_reference(m); vm_page_aflag_clear(m, PGA_REFERENCED); if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m)) |