summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2014-07-06 17:42:38 +0000
committeralc <alc@FreeBSD.org>2014-07-06 17:42:38 +0000
commitd74e85dbb9aba51f1ffc63a0ebae250559a8fcd9 (patch)
treea294db7e9041f651c7a421baf7d2bc56d5447086 /sys
parent3437bf817836057606a700b02d68b0ed46582823 (diff)
downloadFreeBSD-src-d74e85dbb9aba51f1ffc63a0ebae250559a8fcd9.zip
FreeBSD-src-d74e85dbb9aba51f1ffc63a0ebae250559a8fcd9.tar.gz
Introduce pmap_unwire(). It will replace pmap_change_wiring(). There are
several reasons for this change: pmap_change_wiring() has never (in my memory) been used to set the wired attribute on a virtual page. We have always used pmap_enter() to do that. Moreover, it is not really safe to use pmap_change_wiring() to set the wired attribute on a virtual page. The description of pmap_change_wiring() says that it assumes the existence of a mapping in the pmap. However, non-wired mappings may be reclaimed by the pmap at any time. (See pmap_collect().) Many implementations of pmap_change_wiring() will crash if the mapping does not exist. pmap_unwire() accepts a range of virtual addresses, whereas pmap_change_wiring() acts upon a single virtual page. Since we are typically unwiring a range of virtual addresses, pmap_unwire() will be more efficient. Moreover, pmap_unwire() allows us to unwire superpage mappings. Previously, we were forced to demote the superpage mapping, because pmap_change_wiring() only allowed us to express the unwiring of a single base page mapping at a time. This added to the overhead of unwiring for large ranges of addresses, including the implicit unwiring that occurs at process termination. Implementations for arm and powerpc will follow. Discussed with: jeff, marcel Reviewed by: kib Sponsored by: EMC / Isilon Storage Division
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c96
-rw-r--r--sys/i386/i386/pmap.c93
-rw-r--r--sys/i386/xen/pmap.c51
-rw-r--r--sys/ia64/ia64/pmap.c32
-rw-r--r--sys/mips/mips/pmap.c49
-rw-r--r--sys/sparc64/sparc64/pmap.c40
-rw-r--r--sys/vm/pmap.h1
7 files changed, 362 insertions, 0 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 7492960..3edaae9 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -4736,6 +4736,102 @@ out:
}
/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t va_next;
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+ pt_entry_t *pte, PG_V;
+ boolean_t pv_lists_locked;
+
+ PG_V = pmap_valid_bit(pmap);
+ pv_lists_locked = FALSE;
+resume:
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ pml4e = pmap_pml4e(pmap, sva);
+ if ((*pml4e & PG_V) == 0) {
+ va_next = (sva + NBPML4) & ~PML4MASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ pdpe = pmap_pml4e_to_pdpe(pml4e, sva);
+ if ((*pdpe & PG_V) == 0) {
+ va_next = (sva + NBPDP) & ~PDPMASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva)
+ va_next = eva;
+ pde = pmap_pdpe_to_pde(pdpe, sva);
+ if ((*pde & PG_V) == 0)
+ continue;
+ if ((*pde & PG_PS) != 0) {
+ if ((*pde & PG_W) == 0)
+ panic("pmap_unwire: pde %#jx is missing PG_W",
+ (uintmax_t)*pde);
+
+ /*
+ * Are we unwiring the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + NBPDR == va_next && eva >= va_next) {
+ atomic_clear_long(pde, PG_W);
+ pmap->pm_stats.wired_count -= NBPDR /
+ PAGE_SIZE;
+ continue;
+ } else {
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_rlock(&pvh_global_lock)) {
+ PMAP_UNLOCK(pmap);
+ rw_rlock(&pvh_global_lock);
+ /* Repeat sva. */
+ goto resume;
+ }
+ }
+ if (!pmap_demote_pde(pmap, pde, sva))
+ panic("pmap_unwire: demotion failed");
+ }
+ }
+ if (va_next > eva)
+ va_next = eva;
+ for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & PG_V) == 0)
+ continue;
+ if ((*pte & PG_W) == 0)
+ panic("pmap_unwire: pte %#jx is missing PG_W",
+ (uintmax_t)*pte);
+
+ /*
+ * PG_W must be cleared atomically. Although the pmap
+ * lock synchronizes access to PG_W, another processor
+ * could be setting PG_M and/or PG_A concurrently.
+ */
+ atomic_clear_long(pte, PG_W);
+ pmap->pm_stats.wired_count--;
+ }
+ }
+ if (pv_lists_locked)
+ rw_runlock(&pvh_global_lock);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 3085157..c0ad7df 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -4012,6 +4012,99 @@ out:
PMAP_UNLOCK(pmap);
}
+/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t pdnxt;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ boolean_t pv_lists_locked;
+
+ if (pmap_is_current(pmap))
+ pv_lists_locked = FALSE;
+ else {
+ pv_lists_locked = TRUE;
+resume:
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ }
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ pde = pmap_pde(pmap, sva);
+ if ((*pde & PG_V) == 0)
+ continue;
+ if ((*pde & PG_PS) != 0) {
+ if ((*pde & PG_W) == 0)
+ panic("pmap_unwire: pde %#jx is missing PG_W",
+ (uintmax_t)*pde);
+
+ /*
+ * Are we unwiring the entire large page? If not,
+ * demote the mapping and fall through.
+ */
+ if (sva + NBPDR == pdnxt && eva >= pdnxt) {
+ /*
+ * Regardless of whether a pde (or pte) is 32
+ * or 64 bits in size, PG_W is among the least
+ * significant 32 bits.
+ */
+ atomic_clear_int((u_int *)pde, PG_W);
+ pmap->pm_stats.wired_count -= NBPDR /
+ PAGE_SIZE;
+ continue;
+ } else {
+ if (!pv_lists_locked) {
+ pv_lists_locked = TRUE;
+ if (!rw_try_wlock(&pvh_global_lock)) {
+ PMAP_UNLOCK(pmap);
+ /* Repeat sva. */
+ goto resume;
+ }
+ sched_pin();
+ }
+ if (!pmap_demote_pde(pmap, pde, sva))
+ panic("pmap_unwire: demotion failed");
+ }
+ }
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & PG_V) == 0)
+ continue;
+ if ((*pte & PG_W) == 0)
+ panic("pmap_unwire: pte %#jx is missing PG_W",
+ (uintmax_t)*pte);
+
+ /*
+ * PG_W must be cleared atomically. Although the pmap
+ * lock synchronizes access to PG_W, another processor
+ * could be setting PG_M and/or PG_A concurrently.
+ *
+ * PG_W is among the least significant 32 bits.
+ */
+ atomic_clear_int((u_int *)pte, PG_W);
+ pmap->pm_stats.wired_count--;
+ }
+ }
+ if (pv_lists_locked) {
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ }
+ PMAP_UNLOCK(pmap);
+}
/*
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index fdc64bc..54ee3a6 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -3199,6 +3199,57 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
rw_wunlock(&pvh_global_lock);
}
+/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t pdnxt;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+
+ CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva,
+ eva);
+ rw_wlock(&pvh_global_lock);
+ sched_pin();
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = pdnxt) {
+ pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
+ pde = pmap_pde(pmap, sva);
+ if ((*pde & PG_V) == 0)
+ continue;
+ if ((*pde & PG_PS) != 0)
+ panic("pmap_unwire: unexpected PG_PS in pde %#jx",
+ (uintmax_t)*pde);
+ if (pdnxt > eva)
+ pdnxt = eva;
+ for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+ sva += PAGE_SIZE) {
+ if ((*pte & PG_V) == 0)
+ continue;
+ if ((*pte & PG_W) == 0)
+ panic("pmap_unwire: pte %#jx is missing PG_W",
+ (uintmax_t)*pte);
+ PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE);
+ pmap->pm_stats.wired_count--;
+ }
+ }
+ if (*PMAP1)
+ PT_CLEAR_VA(PMAP1, FALSE);
+ PT_UPDATES_FLUSH();
+ sched_unpin();
+ rw_wunlock(&pvh_global_lock);
+ PMAP_UNLOCK(pmap);
+}
/*
diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c
index 71d4202..307ccfd 100644
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@@ -1974,6 +1974,38 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
}
/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ pmap_t oldpmap;
+ struct ia64_lpte *pte;
+
+ CTR4(KTR_PMAP, "%s(%p, %#x, %#x)", __func__, pmap, sva, eva);
+
+ PMAP_LOCK(pmap);
+ oldpmap = pmap_switch(pmap);
+ for (; sva < eva; sva += PAGE_SIZE) {
+ pte = pmap_find_vhpt(sva);
+ if (pte == NULL)
+ continue;
+ if (!pmap_wired(pte))
+ panic("pmap_unwire: pte %p isn't wired", pte);
+ pmap->pm_stats.wired_count--;
+ pmap_clear_wired(pte);
+ }
+ pmap_switch(oldpmap);
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 7a262c8..5e766b2 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -2456,6 +2456,55 @@ pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired)
}
/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range
+ * must have the wired attribute set. In contrast, invalid mappings
+ * cannot have the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the page table entry is not a hardware feature,
+ * so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ pd_entry_t *pde, *pdpe;
+ pt_entry_t *pte;
+ vm_offset_t va_next;
+
+ PMAP_LOCK(pmap);
+ for (; sva < eva; sva = va_next) {
+ pdpe = pmap_segmap(pmap, sva);
+#ifdef __mips_n64
+ if (*pdpe == NULL) {
+ va_next = (sva + NBSEG) & ~SEGMASK;
+ if (va_next < sva)
+ va_next = eva;
+ continue;
+ }
+#endif
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva)
+ va_next = eva;
+ pde = pmap_pdpe_to_pde(pdpe, sva);
+ if (*pde == NULL)
+ continue;
+ if (va_next > eva)
+ va_next = eva;
+ for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++,
+ sva += PAGE_SIZE) {
+ if (!pte_test(pte, PTE_V))
+ continue;
+ if (!pte_test(pte, PTE_W))
+ panic("pmap_unwire: pte %#jx is missing PG_W",
+ (uintmax_t)*pte);
+ pte_clear(pte, PTE_W);
+ pmap->pm_stats.wired_count--;
+ }
+ }
+ PMAP_UNLOCK(pmap);
+}
+
+/*
* Copy the range specified by src_addr/len
* from the source map to the range dst_addr/len
* in the destination map.
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index 28fcb1f..af49fbd 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -141,6 +141,8 @@ static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data);
static void pmap_cache_remove(vm_page_t m, vm_offset_t va);
static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2,
struct tte *tp, vm_offset_t va);
+static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp,
+ vm_offset_t va);
/*
* Map the given physical page at the specified virtual address in the
@@ -1690,6 +1692,44 @@ pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired)
}
static int
+pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va)
+{
+
+ PMAP_LOCK_ASSERT(pm, MA_OWNED);
+ if ((tp->tte_data & TD_WIRED) == 0)
+ panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp);
+ atomic_clear_long(&tp->tte_data, TD_WIRED);
+ pm->pm_stats.wired_count--;
+ return (1);
+}
+
+/*
+ * Clear the wired attribute from the mappings for the specified range of
+ * addresses in the given pmap. Every valid mapping within that range must
+ * have the wired attribute set. In contrast, invalid mappings cannot have
+ * the wired attribute set, so they are ignored.
+ *
+ * The wired attribute of the translation table entry is not a hardware
+ * feature, so there is no need to invalidate any TLB entries.
+ */
+void
+pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva)
+{
+ vm_offset_t va;
+ struct tte *tp;
+
+ PMAP_LOCK(pm);
+ if (eva - sva > PMAP_TSB_THRESH)
+ tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte);
+ else {
+ for (va = sva; va < eva; va += PAGE_SIZE)
+ if ((tp = tsb_tte_lookup(pm, va)) != NULL)
+ pmap_unwire_tte(pm, NULL, tp, va);
+ }
+ PMAP_UNLOCK(pm);
+}
+
+static int
pmap_copy_tte(pmap_t src_pmap, pmap_t dst_pmap, struct tte *tp,
vm_offset_t va)
{
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
index 0c45e33..7c9d8c1 100644
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -142,6 +142,7 @@ void pmap_remove_pages(pmap_t);
void pmap_remove_write(vm_page_t m);
void pmap_sync_icache(pmap_t, vm_offset_t, vm_size_t);
boolean_t pmap_ts_referenced(vm_page_t m);
+void pmap_unwire(pmap_t pmap, vm_offset_t start, vm_offset_t end);
void pmap_zero_page(vm_page_t);
void pmap_zero_page_area(vm_page_t, int off, int size);
void pmap_zero_page_idle(vm_page_t);
OpenPOWER on IntegriCloud