summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/pmap.c122
-rw-r--r--sys/arm/arm/pmap-v6.c8
-rw-r--r--sys/arm/arm/pmap.c8
-rw-r--r--sys/i386/i386/pmap.c106
-rw-r--r--sys/i386/xen/pmap.c66
-rw-r--r--sys/ia64/ia64/pmap.c44
-rw-r--r--sys/mips/mips/pmap.c8
-rw-r--r--sys/powerpc/powerpc/mmu_if.m19
-rw-r--r--sys/powerpc/powerpc/pmap_dispatch.c9
-rw-r--r--sys/sparc64/sparc64/pmap.c8
-rw-r--r--sys/vm/pmap.h2
-rw-r--r--sys/vm/vm_map.c21
-rw-r--r--sys/vm/vm_page.c9
13 files changed, 419 insertions, 11 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 7fb1277..851f92a 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -4967,6 +4967,128 @@ out:
}
/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ struct rwlock *lock;
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+ pd_entry_t oldpde, *pde;
+ pt_entry_t *pte;
+ vm_offset_t va_next;
+ vm_page_t m;
+ boolean_t anychanged, pv_lists_locked;
+
+ if (advice != MADV_DONTNEED && advice != MADV_FREE)
+ return;
+ pv_lists_locked = FALSE;
+resume:
+ anychanged = FALSE;
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ pml4e = pmap_pml4e(pmap, sva);
+ if ((*pml4e & PG_V) == 0) {
+ va_next = (sva + NBPML4) & ~PML4MASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+ if ((*pdpe & PG_V) == 0) {
+ va_next = (sva + NBPDP) & ~PDPMASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva)
+ va_next = eva;
+ pde = pmap_pdpe_to_pde(pdpe, sva);
+ oldpde = *pde;
+ if ((oldpde & PG_V) == 0)
+ continue;
+ else if ((oldpde & PG_PS) != 0) {
+ if ((oldpde & PG_MANAGED) == 0)
+ continue;
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_rlock(&pvh_global_lock)) {
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+ rw_rlock(&pvh_global_lock);
+ goto resume;
+ }
+ }
+ lock = NULL;
+ if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
+ if (lock != NULL)
+ rw_wunlock(lock);
+
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
+ }
+
+ /*
+ * Unless the page mappings are wired, remove the
+ * mapping to a single page so that a subsequent
+ * access may repromote. Since the underlying page
+ * table page is fully populated, this removal never
+ * frees a page table page.
+ */
+ if ((oldpde & PG_W) == 0) {
+ pte = pmap_pde_to_pte(pde, sva);
+ KASSERT((*pte & PG_V) != 0,
+ ("pmap_advise: invalid PTE"));
+ pmap_remove_pte(pmap, pte, sva, *pde, NULL,
+ &lock);
+ anychanged = TRUE;
+ }
+ if (lock != NULL)
+ rw_wunlock(lock);
+ }
+ if (va_next > eva)
+ va_next = eva;
+ for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+ PG_V))
+ continue;
+ else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified()
+ * can be avoided by making the page
+ * dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+ vm_page_dirty(m);
+ }
+ atomic_clear_long(pte, PG_M | PG_A);
+ } else if ((*pte & PG_A) != 0)
+ atomic_clear_long(pte, PG_A);
+ else
+ continue;
+ if ((*pte & PG_G) != 0)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ }
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ if (pv_lists_locked)
+ rw_runlock(&pvh_global_lock);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c
index 08763b8..b9447d3 100644
--- a/sys/arm/arm/pmap-v6.c
+++ b/sys/arm/arm/pmap-v6.c
@@ -4767,6 +4767,14 @@ pmap_is_modified(vm_page_t m)
}
/*
+ * This function is advisory.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c
index f232b07..bc7912d 100644
--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@@ -4516,6 +4516,14 @@ pmap_page_wired_mappings(vm_page_t m)
}
/*
+ * This function is advisory.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+}
+
+/*
* pmap_ts_referenced:
*
* Return the count of reference bits for a page, clearing all of them.
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 8fa9026..f09abee 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4834,6 +4834,112 @@ out:
}
/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ pd_entry_t oldpde, *pde;
+ pt_entry_t *pte;
+ vm_offset_t pdnxt;
+ vm_page_t m;
+ boolean_t anychanged, pv_lists_locked;
+
+ if (advice != MADV_DONTNEED && advice != MADV_FREE)
+ return;
+ if (pmap_is_current(pmap))
+ pv_lists_locked = FALSE;
+ else {
+ pv_lists_locked = TRUE;
+resume:
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ }
+ anychanged = FALSE;
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ pde = pmap_pde(pmap, sva);
+ oldpde = *pde;
+ if ((oldpde & PG_V) == 0)
+ continue;
+ else if ((oldpde & PG_PS) != 0) {
+ if ((oldpde & PG_MANAGED) == 0)
+ continue;
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_wlock(&pvh_global_lock)) {
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ PMAP_UNLOCK(pmap);
+ goto resume;
+ }
+ sched_pin();
+ }
+ if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
+ }
+
+ /*
+ * Unless the page mappings are wired, remove the
+ * mapping to a single page so that a subsequent
+ * access may repromote. Since the underlying page
+ * table page is fully populated, this removal never
+ * frees a page table page.
+ */
+ if ((oldpde & PG_W) == 0) {
+ pte = pmap_pte_quick(pmap, sva);
+ KASSERT((*pte & PG_V) != 0,
+ ("pmap_advise: invalid PTE"));
+ pmap_remove_pte(pmap, pte, sva, NULL);
+ anychanged = TRUE;
+ }
+ }
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+ PG_V))
+ continue;
+ else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified()
+ * can be avoided by making the page
+ * dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME);
+ vm_page_dirty(m);
+ }
+ atomic_clear_int((u_int *)pte, PG_M | PG_A);
+ } else if ((*pte & PG_A) != 0)
+ atomic_clear_int((u_int *)pte, PG_A);
+ else
+ continue;
+ if ((*pte & PG_G) != 0)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ }
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ if (pv_lists_locked) {
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index c34fe29..3abe7ef 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -3914,6 +3914,72 @@ pmap_ts_referenced(vm_page_t m)
}
/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ pd_entry_t oldpde;
+ pt_entry_t *pte;
+ vm_offset_t pdnxt;
+ vm_page_t m;
+ boolean_t anychanged;
+
+ if (advice != MADV_DONTNEED && advice != MADV_FREE)
+ return;
+ anychanged = FALSE;
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ oldpde = pmap->pm_pdir[sva >> PDRSHIFT];
+ if ((oldpde & (PG_PS | PG_V)) != PG_V)
+ continue;
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED |
+ PG_V))
+ continue;
+ else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified()
+ * can be avoided by making the page
+ * dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) &
+ PG_FRAME);
+ vm_page_dirty(m);
+ }
+ PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE);
+ } else if ((*pte & PG_A) != 0)
+ PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE);
+ else
+ continue;
+ if ((*pte & PG_G) != 0)
+ pmap_invalidate_page(pmap, sva);
+ else
+ anychanged = TRUE;
+ }
+ }
+ PT_UPDATES_FLUSH();
+ if (*PMAP1)
+ PT_SET_VA_MA(PMAP1, 0, TRUE);
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c
index b23c77c..442149f 100644
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@@ -2310,6 +2310,50 @@ pmap_is_referenced(vm_page_t m)
}
/*
+ * Apply the given advice to the specified range of addresses within the
+ * given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+ struct ia64_lpte *pte;
+ pmap_t oldpmap;
+ vm_page_t m;
+
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
+ for (; sva < eva; sva += PAGE_SIZE) {
+ /* If page is invalid, skip this page. */
+ pte = pmap_find_vhpt(sva);
+ if (pte == NULL)
+ continue;
+
+ /* If it isn't managed, skip it too. */
+ if (!pmap_managed(pte))
+ continue;
+
+ /* Clear its modified and referenced bits. */
+ if (pmap_dirty(pte)) {
+ if (advice == MADV_DONTNEED) {
+ /*
+ * Future calls to pmap_is_modified() can be
+ * avoided by making the page dirty now.
+ */
+ m = PHYS_TO_VM_PAGE(pmap_ppn(pte));
+ vm_page_dirty(m);
+ }
+ pmap_clear_dirty(pte);
+ } else if (!pmap_accessed(pte))
+ continue;
+ pmap_clear_accessed(pte);
+ pmap_invalidate_page(sva);
+ }
+ pmap_switch(oldpmap);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index fb22a89..90994cc 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -2914,6 +2914,14 @@ pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
}
/*
+ * This function is advisory.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+}
+
+/*
* Clear the modify bits on the specified physical page.
*/
void
diff --git a/sys/powerpc/powerpc/mmu_if.m b/sys/powerpc/powerpc/mmu_if.m
index 0382bd8..f9f37cb 100644
--- a/sys/powerpc/powerpc/mmu_if.m
+++ b/sys/powerpc/powerpc/mmu_if.m
@@ -133,6 +133,25 @@ CODE {
/**
+ * @brief Apply the given advice to the specified range of addresses within
+ * the given pmap. Depending on the advice, clear the referenced and/or
+ * modified flags in each mapping and set the mapped page's dirty field.
+ *
+ * @param _pmap physical map
+ * @param _start virtual range start
+ * @param _end virtual range end
+ * @param _advice advice to apply
+ */
+METHOD void advise {
+ mmu_t _mmu;
+ pmap_t _pmap;
+ vm_offset_t _start;
+ vm_offset_t _end;
+ int _advice;
+};
+
+
+/**
* @brief Change the wiring attribute for the page in the given physical
* map and virtual address.
*
diff --git a/sys/powerpc/powerpc/pmap_dispatch.c b/sys/powerpc/powerpc/pmap_dispatch.c
index 7fd98f4..24e6076 100644
--- a/sys/powerpc/powerpc/pmap_dispatch.c
+++ b/sys/powerpc/powerpc/pmap_dispatch.c
@@ -91,6 +91,15 @@ RB_GENERATE(pvo_tree, pvo_entry, pvo_plink, pvo_vaddr_compare);
void
+pmap_advise(pmap_t pmap, vm_offset_t start, vm_offset_t end, int advice)
+{
+
+ CTR5(KTR_PMAP, "%s(%p, %#x, %#x, %d)", __func__, pmap, start, end,
+ advice);
+ MMU_ADVISE(mmu_obj, pmap, start, end, advice);
+}
+
+void
pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
{
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index 47f2c49..a84e4c3 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -2126,6 +2126,14 @@ pmap_is_referenced(vm_page_t m)
return (rv);
}
+/*
+ * This function is advisory.
+ */
+void
+pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
+{
+}
+
void
pmap_clear_modify(vm_page_t m)
{
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
index c0f80a7..911298f 100644
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -98,6 +98,8 @@ struct thread;
extern vm_offset_t kernel_vm_end;
void pmap_activate(struct thread *td);
+void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ int advice);
void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *,
vm_size_t);
void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 71c44ad..1be62af 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -2125,7 +2125,7 @@ vm_map_madvise(
(current != &map->header) && (current->start < end);
current = current->next
) {
- vm_offset_t useStart;
+ vm_offset_t useEnd, useStart;
if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
continue;
@@ -2133,17 +2133,34 @@ vm_map_madvise(
pstart = OFF_TO_IDX(current->offset);
pend = pstart + atop(current->end - current->start);
useStart = current->start;
+ useEnd = current->end;
if (current->start < start) {
pstart += atop(start - current->start);
useStart = start;
}
- if (current->end > end)
+ if (current->end > end) {
pend -= atop(current->end - end);
+ useEnd = end;
+ }
if (pstart >= pend)
continue;
+ /*
+ * Perform the pmap_advise() before clearing
+ * PGA_REFERENCED in vm_page_advise(). Otherwise, a
+ * concurrent pmap operation, such as pmap_remove(),
+ * could clear a reference in the pmap and set
+ * PGA_REFERENCED on the page before the pmap_advise()
+ * had completed. Consequently, the page would appear
+ * referenced based upon an old reference that
+ * occurred before this pmap_advise() ran.
+ */
+ if (behav == MADV_DONTNEED || behav == MADV_FREE)
+ pmap_advise(map->pmap, useStart, useEnd,
+ behav);
+
vm_object_madvise(current->object.vm_object, pstart,
pend, behav);
if (behav == MADV_WILLNEED) {
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 7d8ecfa..7b4b57c 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2634,7 +2634,6 @@ vm_page_advise(vm_page_t m, int advice)
* But we do make the page is freeable as we can without
* actually taking the step of unmapping it.
*/
- pmap_clear_modify(m);
m->dirty = 0;
m->act_count = 0;
} else if (advice != MADV_DONTNEED)
@@ -2654,15 +2653,7 @@ vm_page_advise(vm_page_t m, int advice)
/*
* Clear any references to the page. Otherwise, the page daemon will
* immediately reactivate the page.
- *
- * Perform the pmap_clear_reference() first. Otherwise, a concurrent
- * pmap operation, such as pmap_remove(), could clear a reference in
- * the pmap and set PGA_REFERENCED on the page before the
- * pmap_clear_reference() had completed. Consequently, the page would
- * appear referenced based upon an old reference that occurred before
- * this function ran.
*/
- pmap_clear_reference(m);
vm_page_aflag_clear(m, PGA_REFERENCED);
if (advice != MADV_FREE && m->dirty == 0 && pmap_is_modified(m))
OpenPOWER on IntegriCloud