From 798eea16149d6a39c6fb5f721410f61b5bb1134a Mon Sep 17 00:00:00 2001 From: kib Date: Mon, 1 Sep 2014 07:58:15 +0000 Subject: Fix a leak of the wired pages when unwiring of the PROT_NONE-mapped wired region. Rework the handling of unwire to do the it in batch, both at pmap and object level. All commits below are by alc. MFC r268327: Introduce pmap_unwire(). MFC r268591: Implement pmap_unwire() for powerpc. MFC r268776: Implement pmap_unwire() for arm. MFC r268806: pmap_unwire(9) man page. MFC r269134: When unwiring a region of an address space, do not assume that the underlying physical pages are mapped by the pmap. This fixes a leak of the wired pages on the unwiring of the region mapped with no access allowed. MFC r269339: In the implementation of the new function pmap_unwire(), the call to MOEA64_PVO_TO_PTE() must be performed before any changes are made to the PVO. Otherwise, MOEA64_PVO_TO_PTE() will panic. MFC r269365: Correct a long-standing problem in moea{,64}_pvo_enter() that was revealed by the combination of r268591 and r269134: When we attempt to add the wired attribute to an existing mapping, moea{,64}_pvo_enter() do nothing. (They only set the wired attribute on newly created mappings.) MFC r269433: Handle wiring failures in vm_map_wire() with the new functions pmap_unwire() and vm_object_unwire(). Retire vm_fault_{un,}wire(), since they are no longer used. MFC r269438: Rewrite a loop in vm_map_wire() so that gcc doesn't think that the variable "rv" is uninitialized. MFC r269485: Retire pmap_change_wiring(). Reviewed by: alc --- sys/amd64/amd64/pmap.c | 114 ++++++++++++++++++++++---------- sys/arm/arm/pmap-v6.c | 95 ++++++++++++++++---------- sys/arm/arm/pmap.c | 51 +++++++++----- sys/i386/i386/pmap.c | 117 +++++++++++++++++++++----------- sys/i386/xen/pmap.c | 62 ++++++++++------- sys/ia64/ia64/pmap.c | 31 +++++---- sys/mips/mips/pmap.c | 60 +++++++++++------ sys/powerpc/aim/mmu_oea.c | 42 +++++++----- sys/powerpc/aim/mmu_oea64.c | 95 ++++++++++++++------------ sys/powerpc/booke/pmap.c | 35 +++++----- sys/powerpc/powerpc/mmu_if.m | 32 ++++----- sys/powerpc/powerpc/pmap_dispatch.c | 16 ++--- sys/sparc64/sparc64/pmap.c | 43 ++++++++---- sys/vm/pmap.h | 2 +- sys/vm/vm_extern.h | 2 - sys/vm/vm_fault.c | 62 ----------------- sys/vm/vm_map.c | 128 +++++++++++++++++++++++------------- sys/vm/vm_object.c | 72 ++++++++++++++++++++ sys/vm/vm_object.h | 2 + 19 files changed, 658 insertions(+), 403 deletions(-) (limited to 'sys') diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 1eb7768..2cb769d 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -4704,52 +4704,96 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + vm_offset_t va_next; + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; pd_entry_t *pde; - pt_entry_t *pte; + pt_entry_t *pte, PG_V; boolean_t pv_lists_locked; + PG_V = pmap_valid_bit(pmap); pv_lists_locked = FALSE; - - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. - */ -retry: +resume: PMAP_LOCK(pmap); - pde = pmap_pde(pmap, va); - if ((*pde & PG_PS) != 0) { - if (!wired != ((*pde & PG_W) == 0)) { - if (!pv_lists_locked) { - pv_lists_locked = TRUE; - if (!rw_try_rlock(&pvh_global_lock)) { - PMAP_UNLOCK(pmap); - rw_rlock(&pvh_global_lock); - goto retry; + for (; sva < eva; sva = va_next) { + pml4e = pmap_pml4e(pmap, sva); + if ((*pml4e & PG_V) == 0) { + va_next = (sva + NBPML4) & ~PML4MASK; + if (va_next < sva) + va_next = eva; + continue; + } + pdpe = pmap_pml4e_to_pdpe(pml4e, sva); + if ((*pdpe & PG_V) == 0) { + va_next = (sva + NBPDP) & ~PDPMASK; + if (va_next < sva) + va_next = eva; + continue; + } + va_next = (sva + NBPDR) & ~PDRMASK; + if (va_next < sva) + va_next = eva; + pde = pmap_pdpe_to_pde(pdpe, sva); + if ((*pde & PG_V) == 0) + continue; + if ((*pde & PG_PS) != 0) { + if ((*pde & PG_W) == 0) + panic("pmap_unwire: pde %#jx is missing PG_W", + (uintmax_t)*pde); + + /* + * Are we unwiring the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + NBPDR == va_next && eva >= va_next) { + atomic_clear_long(pde, PG_W); + pmap->pm_stats.wired_count -= NBPDR / + PAGE_SIZE; + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_rlock(&pvh_global_lock)) { + PMAP_UNLOCK(pmap); + rw_rlock(&pvh_global_lock); + /* Repeat sva. */ + goto resume; + } } + if (!pmap_demote_pde(pmap, pde, sva)) + panic("pmap_unwire: demotion failed"); } - if (!pmap_demote_pde(pmap, pde, va)) - panic("pmap_change_wiring: demotion failed"); - } else - goto out; - } - pte = pmap_pde_to_pte(pde, va); - if (wired && (*pte & PG_W) == 0) { - pmap->pm_stats.wired_count++; - atomic_set_long(pte, PG_W); - } else if (!wired && (*pte & PG_W) != 0) { - pmap->pm_stats.wired_count--; - atomic_clear_long(pte, PG_W); + } + if (va_next > eva) + va_next = eva; + for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, + sva += PAGE_SIZE) { + if ((*pte & PG_V) == 0) + continue; + if ((*pte & PG_W) == 0) + panic("pmap_unwire: pte %#jx is missing PG_W", + (uintmax_t)*pte); + + /* + * PG_W must be cleared atomically. Although the pmap + * lock synchronizes access to PG_W, another processor + * could be setting PG_M and/or PG_A concurrently. + */ + atomic_clear_long(pte, PG_W); + pmap->pm_stats.wired_count--; + } } -out: if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); diff --git a/sys/arm/arm/pmap-v6.c b/sys/arm/arm/pmap-v6.c index 197a2eb..b2be785 100644 --- a/sys/arm/arm/pmap-v6.c +++ b/sys/arm/arm/pmap-v6.c @@ -3264,53 +3264,76 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * XXX Wired mappings of unmanaged pages cannot be counted by this pmap + * implementation. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct l2_bucket *l2b; struct md_page *pvh; - struct pv_entry *pve; - pd_entry_t *pl1pd, l1pd; + pd_entry_t l1pd; pt_entry_t *ptep, pte; + pv_entry_t pv; + vm_offset_t next_bucket; + vm_paddr_t pa; vm_page_t m; - + rw_wlock(&pvh_global_lock); PMAP_LOCK(pmap); - pl1pd = &pmap->pm_l1->l1_kva[L1_IDX(va)]; - l1pd = *pl1pd; - if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { - m = PHYS_TO_VM_PAGE(l1pd & L1_S_FRAME); - KASSERT((m != NULL) && ((m->oflags & VPO_UNMANAGED) == 0), - ("pmap_change_wiring: unmanaged superpage should not " - "be changed")); - KASSERT(pmap != pmap_kernel(), - ("pmap_change_wiring: managed kernel superpage " - "should not exist")); - pvh = pa_to_pvh(l1pd & L1_S_FRAME); - pve = pmap_find_pv(pvh, pmap, trunc_1mpage(va)); - if (!wired != ((pve->pv_flags & PVF_WIRED) == 0)) { - if (!pmap_demote_section(pmap, va)) - panic("pmap_change_wiring: demotion failed"); - } else - goto out; + while (sva < eva) { + next_bucket = L2_NEXT_BUCKET(sva); + l1pd = pmap->pm_l1->l1_kva[L1_IDX(sva)]; + if ((l1pd & L1_TYPE_MASK) == L1_S_PROTO) { + pa = l1pd & L1_S_FRAME; + m = PHYS_TO_VM_PAGE(pa); + KASSERT(m != NULL && (m->oflags & VPO_UNMANAGED) == 0, + ("pmap_unwire: unmanaged 1mpage %p", m)); + pvh = pa_to_pvh(pa); + pv = pmap_find_pv(pvh, pmap, trunc_1mpage(sva)); + if ((pv->pv_flags & PVF_WIRED) == 0) + panic("pmap_unwire: pv %p isn't wired", pv); + + /* + * Are we unwiring the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + L1_S_SIZE == next_bucket && + eva >= next_bucket) { + pv->pv_flags &= ~PVF_WIRED; + pmap->pm_stats.wired_count -= L2_PTE_NUM_TOTAL; + sva = next_bucket; + continue; + } else if (!pmap_demote_section(pmap, sva)) + panic("pmap_unwire: demotion failed"); + } + if (next_bucket > eva) + next_bucket = eva; + l2b = pmap_get_l2_bucket(pmap, sva); + if (l2b == NULL) { + sva = next_bucket; + continue; + } + for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; sva < next_bucket; + sva += PAGE_SIZE, ptep++) { + if ((pte = *ptep) == 0 || + (m = PHYS_TO_VM_PAGE(l2pte_pa(pte))) == NULL || + (m->oflags & VPO_UNMANAGED) != 0) + continue; + pv = pmap_find_pv(&m->md, pmap, sva); + if ((pv->pv_flags & PVF_WIRED) == 0) + panic("pmap_unwire: pv %p isn't wired", pv); + pv->pv_flags &= ~PVF_WIRED; + pmap->pm_stats.wired_count--; + } } - l2b = pmap_get_l2_bucket(pmap, va); - KASSERT(l2b, ("No l2b bucket in pmap_change_wiring")); - ptep = &l2b->l2b_kva[l2pte_index(va)]; - pte = *ptep; - m = PHYS_TO_VM_PAGE(l2pte_pa(pte)); - if (m != NULL) - pmap_modify_pv(m, pmap, va, PVF_WIRED, - wired == TRUE ? PVF_WIRED : 0); -out: rw_wunlock(&pvh_global_lock); - PMAP_UNLOCK(pmap); + PMAP_UNLOCK(pmap); } diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 802d5ee..f4e44d7 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -3542,28 +3542,47 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * XXX Wired mappings of unmanaged pages cannot be counted by this pmap + * implementation. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct l2_bucket *l2b; pt_entry_t *ptep, pte; - vm_page_t pg; - + pv_entry_t pv; + vm_offset_t next_bucket; + vm_page_t m; + rw_wlock(&pvh_global_lock); - PMAP_LOCK(pmap); - l2b = pmap_get_l2_bucket(pmap, va); - KASSERT(l2b, ("No l2b bucket in pmap_change_wiring")); - ptep = &l2b->l2b_kva[l2pte_index(va)]; - pte = *ptep; - pg = PHYS_TO_VM_PAGE(l2pte_pa(pte)); - if (pg) - pmap_modify_pv(pg, pmap, va, PVF_WIRED, wired ? PVF_WIRED : 0); + PMAP_LOCK(pmap); + while (sva < eva) { + next_bucket = L2_NEXT_BUCKET(sva); + if (next_bucket > eva) + next_bucket = eva; + l2b = pmap_get_l2_bucket(pmap, sva); + if (l2b == NULL) { + sva = next_bucket; + continue; + } + for (ptep = &l2b->l2b_kva[l2pte_index(sva)]; sva < next_bucket; + sva += PAGE_SIZE, ptep++) { + if ((pte = *ptep) == 0 || + (m = PHYS_TO_VM_PAGE(l2pte_pa(pte))) == NULL || + (m->oflags & VPO_UNMANAGED) != 0) + continue; + pv = pmap_find_pv(m, pmap, sva); + if ((pv->pv_flags & PVF_WIRED) == 0) + panic("pmap_unwire: pv %p isn't wired", pv); + pv->pv_flags &= ~PVF_WIRED; + pmap->pm_stats.wired_count--; + } + } rw_wunlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 0336295..d5c2cbe 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -3968,59 +3968,100 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + vm_offset_t pdnxt; pd_entry_t *pde; pt_entry_t *pte; - boolean_t are_queues_locked; + boolean_t pv_lists_locked; - are_queues_locked = FALSE; -retry: + if (pmap_is_current(pmap)) + pv_lists_locked = FALSE; + else { + pv_lists_locked = TRUE; +resume: + rw_wlock(&pvh_global_lock); + sched_pin(); + } PMAP_LOCK(pmap); - pde = pmap_pde(pmap, va); - if ((*pde & PG_PS) != 0) { - if (!wired != ((*pde & PG_W) == 0)) { - if (!are_queues_locked) { - are_queues_locked = TRUE; - if (!rw_try_wlock(&pvh_global_lock)) { - PMAP_UNLOCK(pmap); - rw_wlock(&pvh_global_lock); - goto retry; + for (; sva < eva; sva = pdnxt) { + pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; + pde = pmap_pde(pmap, sva); + if ((*pde & PG_V) == 0) + continue; + if ((*pde & PG_PS) != 0) { + if ((*pde & PG_W) == 0) + panic("pmap_unwire: pde %#jx is missing PG_W", + (uintmax_t)*pde); + + /* + * Are we unwiring the entire large page? If not, + * demote the mapping and fall through. + */ + if (sva + NBPDR == pdnxt && eva >= pdnxt) { + /* + * Regardless of whether a pde (or pte) is 32 + * or 64 bits in size, PG_W is among the least + * significant 32 bits. + */ + atomic_clear_int((u_int *)pde, PG_W); + pmap->pm_stats.wired_count -= NBPDR / + PAGE_SIZE; + continue; + } else { + if (!pv_lists_locked) { + pv_lists_locked = TRUE; + if (!rw_try_wlock(&pvh_global_lock)) { + PMAP_UNLOCK(pmap); + /* Repeat sva. */ + goto resume; + } + sched_pin(); } + if (!pmap_demote_pde(pmap, pde, sva)) + panic("pmap_unwire: demotion failed"); } - if (!pmap_demote_pde(pmap, pde, va)) - panic("pmap_change_wiring: demotion failed"); - } else - goto out; - } - pte = pmap_pte(pmap, va); - - if (wired && !pmap_pte_w(pte)) - pmap->pm_stats.wired_count++; - else if (!wired && pmap_pte_w(pte)) - pmap->pm_stats.wired_count--; + } + if (pdnxt > eva) + pdnxt = eva; + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + if ((*pte & PG_V) == 0) + continue; + if ((*pte & PG_W) == 0) + panic("pmap_unwire: pte %#jx is missing PG_W", + (uintmax_t)*pte); - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. - */ - pmap_pte_set_w(pte, wired); - pmap_pte_release(pte); -out: - if (are_queues_locked) + /* + * PG_W must be cleared atomically. Although the pmap + * lock synchronizes access to PG_W, another processor + * could be setting PG_M and/or PG_A concurrently. + * + * PG_W is among the least significant 32 bits. + */ + atomic_clear_int((u_int *)pte, PG_W); + pmap->pm_stats.wired_count--; + } + } + if (pv_lists_locked) { + sched_unpin(); rw_wunlock(&pvh_global_lock); + } PMAP_UNLOCK(pmap); } - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index 1ed2c03..48139a1 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -3169,40 +3169,58 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + vm_offset_t pdnxt; + pd_entry_t *pde; pt_entry_t *pte; + CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva, + eva); rw_wlock(&pvh_global_lock); + sched_pin(); PMAP_LOCK(pmap); - pte = pmap_pte(pmap, va); - - if (wired && !pmap_pte_w(pte)) { - PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); - pmap->pm_stats.wired_count++; - } else if (!wired && pmap_pte_w(pte)) { - PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); - pmap->pm_stats.wired_count--; + for (; sva < eva; sva = pdnxt) { + pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pdnxt < sva) + pdnxt = eva; + pde = pmap_pde(pmap, sva); + if ((*pde & PG_V) == 0) + continue; + if ((*pde & PG_PS) != 0) + panic("pmap_unwire: unexpected PG_PS in pde %#jx", + (uintmax_t)*pde); + if (pdnxt > eva) + pdnxt = eva; + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + if ((*pte & PG_V) == 0) + continue; + if ((*pte & PG_W) == 0) + panic("pmap_unwire: pte %#jx is missing PG_W", + (uintmax_t)*pte); + PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE); + pmap->pm_stats.wired_count--; + } } - - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. - */ - pmap_pte_release(pte); - PMAP_UNLOCK(pmap); + if (*PMAP1) + PT_CLEAR_VA(PMAP1, FALSE); + PT_UPDATES_FLUSH(); + sched_unpin(); rw_wunlock(&pvh_global_lock); + PMAP_UNLOCK(pmap); } - /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 602090a..90a9615 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -1946,34 +1946,33 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { pmap_t oldpmap; struct ia64_lpte *pte; - CTR4(KTR_PMAP, "%s(pm=%p, va=%#lx, wired=%u)", __func__, pmap, va, - wired); + CTR4(KTR_PMAP, "%s(%p, %#x, %#x)", __func__, pmap, sva, eva); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); - - pte = pmap_find_vhpt(va); - KASSERT(pte != NULL, ("pte")); - if (wired && !pmap_wired(pte)) { - pmap->pm_stats.wired_count++; - pmap_set_wired(pte); - } else if (!wired && pmap_wired(pte)) { + for (; sva < eva; sva += PAGE_SIZE) { + pte = pmap_find_vhpt(sva); + if (pte == NULL) + continue; + if (!pmap_wired(pte)) + panic("pmap_unwire: pte %p isn't wired", pte); pmap->pm_stats.wired_count--; pmap_clear_wired(pte); } - pmap_switch(oldpmap); PMAP_UNLOCK(pmap); } diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 973791b..caee47c 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -2426,33 +2426,51 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, } /* - * Routine: pmap_change_wiring - * Function: Change the wiring attribute for a map/virtual-address - * pair. - * In/out conditions: - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range + * must have the wired attribute set. In contrast, invalid mappings + * cannot have the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, + * so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + pd_entry_t *pde, *pdpe; pt_entry_t *pte; + vm_offset_t va_next; PMAP_LOCK(pmap); - pte = pmap_pte(pmap, va); - - if (wired && !pte_test(pte, PTE_W)) - pmap->pm_stats.wired_count++; - else if (!wired && pte_test(pte, PTE_W)) - pmap->pm_stats.wired_count--; - - /* - * Wiring is not a hardware characteristic so there is no need to - * invalidate TLB. - */ - if (wired) - pte_set(pte, PTE_W); - else - pte_clear(pte, PTE_W); + for (; sva < eva; sva = va_next) { + pdpe = pmap_segmap(pmap, sva); +#ifdef __mips_n64 + if (*pdpe == NULL) { + va_next = (sva + NBSEG) & ~SEGMASK; + if (va_next < sva) + va_next = eva; + continue; + } +#endif + va_next = (sva + NBPDR) & ~PDRMASK; + if (va_next < sva) + va_next = eva; + pde = pmap_pdpe_to_pde(pdpe, sva); + if (*pde == NULL) + continue; + if (va_next > eva) + va_next = eva; + for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, + sva += PAGE_SIZE) { + if (!pte_test(pte, PTE_V)) + continue; + if (!pte_test(pte, PTE_W)) + panic("pmap_unwire: pte %#jx is missing PG_W", + (uintmax_t)*pte); + pte_clear(pte, PTE_W); + pmap->pm_stats.wired_count--; + } + } PMAP_UNLOCK(pmap); } diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index c7811ef..f2cdf7a 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -269,7 +269,6 @@ int moea_pte_spill(vm_offset_t); /* * Kernel MMU interface */ -void moea_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); void moea_clear_modify(mmu_t, vm_page_t); void moea_copy_page(mmu_t, vm_page_t, vm_page_t); void moea_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, @@ -298,6 +297,7 @@ void moea_release(mmu_t, pmap_t); void moea_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea_remove_all(mmu_t, vm_page_t); void moea_remove_write(mmu_t, vm_page_t); +void moea_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea_zero_page(mmu_t, vm_page_t); void moea_zero_page_area(mmu_t, vm_page_t, int, int); void moea_zero_page_idle(mmu_t, vm_page_t); @@ -319,7 +319,6 @@ vm_offset_t moea_dumpsys_map(mmu_t mmu, struct pmap_md *md, vm_size_t ofs, struct pmap_md * moea_scan_md(mmu_t mmu, struct pmap_md *prev); static mmu_method_t moea_methods[] = { - MMUMETHOD(mmu_change_wiring, moea_change_wiring), MMUMETHOD(mmu_clear_modify, moea_clear_modify), MMUMETHOD(mmu_copy_page, moea_copy_page), MMUMETHOD(mmu_copy_pages, moea_copy_pages), @@ -346,6 +345,7 @@ static mmu_method_t moea_methods[] = { MMUMETHOD(mmu_remove_all, moea_remove_all), MMUMETHOD(mmu_remove_write, moea_remove_write), MMUMETHOD(mmu_sync_icache, moea_sync_icache), + MMUMETHOD(mmu_unwire, moea_unwire), MMUMETHOD(mmu_zero_page, moea_zero_page), MMUMETHOD(mmu_zero_page_area, moea_zero_page_area), MMUMETHOD(mmu_zero_page_idle, moea_zero_page_idle), @@ -1015,23 +1015,19 @@ moea_deactivate(mmu_t mmu, struct thread *td) } void -moea_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) +moea_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { - struct pvo_entry *pvo; + struct pvo_entry key, *pvo; PMAP_LOCK(pm); - pvo = moea_pvo_find_va(pm, va & ~ADDR_POFF, NULL); - - if (pvo != NULL) { - if (wired) { - if ((pvo->pvo_vaddr & PVO_WIRED) == 0) - pm->pm_stats.wired_count++; - pvo->pvo_vaddr |= PVO_WIRED; - } else { - if ((pvo->pvo_vaddr & PVO_WIRED) != 0) - pm->pm_stats.wired_count--; - pvo->pvo_vaddr &= ~PVO_WIRED; - } + key.pvo_vaddr = sva; + for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); + pvo != NULL && PVO_VADDR(pvo) < eva; + pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { + if ((pvo->pvo_vaddr & PVO_WIRED) == 0) + panic("moea_unwire: pvo %p is missing PVO_WIRED", pvo); + pvo->pvo_vaddr &= ~PVO_WIRED; + pm->pm_stats.wired_count--; } PMAP_UNLOCK(pm); } @@ -1941,7 +1937,21 @@ moea_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, if ((pvo->pvo_pte.pte.pte_lo & PTE_RPGN) == pa && (pvo->pvo_pte.pte.pte_lo & PTE_PP) == (pte_lo & PTE_PP)) { + /* + * The PTE is not changing. Instead, this may + * be a request to change the mapping's wired + * attribute. + */ mtx_unlock(&moea_table_mutex); + if ((flags & PVO_WIRED) != 0 && + (pvo->pvo_vaddr & PVO_WIRED) == 0) { + pvo->pvo_vaddr |= PVO_WIRED; + pm->pm_stats.wired_count++; + } else if ((flags & PVO_WIRED) == 0 && + (pvo->pvo_vaddr & PVO_WIRED) != 0) { + pvo->pvo_vaddr &= ~PVO_WIRED; + pm->pm_stats.wired_count--; + } return (0); } moea_pvo_remove(pvo, -1); diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index ceca204..ef66064 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -283,7 +283,6 @@ static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, /* * Kernel MMU interface */ -void moea64_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); void moea64_clear_modify(mmu_t, vm_page_t); void moea64_copy_page(mmu_t, vm_page_t, vm_page_t); void moea64_copy_pages(mmu_t mmu, vm_page_t *ma, vm_offset_t a_offset, @@ -313,6 +312,7 @@ void moea64_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_remove_pages(mmu_t, pmap_t); void moea64_remove_all(mmu_t, vm_page_t); void moea64_remove_write(mmu_t, vm_page_t); +void moea64_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); void moea64_zero_page(mmu_t, vm_page_t); void moea64_zero_page_area(mmu_t, vm_page_t, int, int); void moea64_zero_page_idle(mmu_t, vm_page_t); @@ -332,7 +332,6 @@ vm_offset_t moea64_dumpsys_map(mmu_t mmu, struct pmap_md *md, vm_size_t ofs, struct pmap_md * moea64_scan_md(mmu_t mmu, struct pmap_md *prev); static mmu_method_t moea64_methods[] = { - MMUMETHOD(mmu_change_wiring, moea64_change_wiring), MMUMETHOD(mmu_clear_modify, moea64_clear_modify), MMUMETHOD(mmu_copy_page, moea64_copy_page), MMUMETHOD(mmu_copy_pages, moea64_copy_pages), @@ -360,6 +359,7 @@ static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_remove_all, moea64_remove_all), MMUMETHOD(mmu_remove_write, moea64_remove_write), MMUMETHOD(mmu_sync_icache, moea64_sync_icache), + MMUMETHOD(mmu_unwire, moea64_unwire), MMUMETHOD(mmu_zero_page, moea64_zero_page), MMUMETHOD(mmu_zero_page_area, moea64_zero_page_area), MMUMETHOD(mmu_zero_page_idle, moea64_zero_page_idle), @@ -1025,55 +1025,38 @@ moea64_deactivate(mmu_t mmu, struct thread *td) } void -moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) +moea64_unwire(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) { - struct pvo_entry *pvo; + struct pvo_entry key, *pvo; uintptr_t pt; - uint64_t vsid; - int i, ptegidx; - LOCK_TABLE_WR(); + LOCK_TABLE_RD(); PMAP_LOCK(pm); - pvo = moea64_pvo_find_va(pm, va & ~ADDR_POFF); - - if (pvo != NULL) { + key.pvo_vaddr = sva; + for (pvo = RB_NFIND(pvo_tree, &pm->pmap_pvo, &key); + pvo != NULL && PVO_VADDR(pvo) < eva; + pvo = RB_NEXT(pvo_tree, &pm->pmap_pvo, pvo)) { pt = MOEA64_PVO_TO_PTE(mmu, pvo); - - if (wired) { - if ((pvo->pvo_vaddr & PVO_WIRED) == 0) - pm->pm_stats.wired_count++; - pvo->pvo_vaddr |= PVO_WIRED; - pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; - } else { - if ((pvo->pvo_vaddr & PVO_WIRED) != 0) - pm->pm_stats.wired_count--; - pvo->pvo_vaddr &= ~PVO_WIRED; - pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; - } - + if ((pvo->pvo_vaddr & PVO_WIRED) == 0) + panic("moea64_unwire: pvo %p is missing PVO_WIRED", + pvo); + pvo->pvo_vaddr &= ~PVO_WIRED; + if ((pvo->pvo_pte.lpte.pte_hi & LPTE_WIRED) == 0) + panic("moea64_unwire: pte %p is missing LPTE_WIRED", + &pvo->pvo_pte.lpte); + pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; if (pt != -1) { - /* Update wiring flag in page table. */ - MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, - pvo->pvo_vpn); - } else if (wired) { /* - * If we are wiring the page, and it wasn't in the - * page table before, add it. + * The PTE's wired attribute is not a hardware + * feature, so there is no need to invalidate any TLB + * entries. */ - vsid = PVO_VSID(pvo); - ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), - pvo->pvo_vaddr & PVO_LARGE); - - i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); - - if (i >= 0) { - PVO_PTEGIDX_CLR(pvo); - PVO_PTEGIDX_SET(pvo, i); - } + MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, + pvo->pvo_vpn); } - + pm->pm_stats.wired_count--; } - UNLOCK_TABLE_WR(); + UNLOCK_TABLE_RD(); PMAP_UNLOCK(pm); } @@ -2207,6 +2190,7 @@ moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, uint64_t pte_lo, int flags, int8_t psind __unused) { struct pvo_entry *pvo; + uintptr_t pt; uint64_t vsid; int first; u_int ptegidx; @@ -2249,13 +2233,42 @@ moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, if ((pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN) == pa && (pvo->pvo_pte.lpte.pte_lo & (LPTE_NOEXEC | LPTE_PP)) == (pte_lo & (LPTE_NOEXEC | LPTE_PP))) { + /* + * The physical page and protection are not + * changing. Instead, this may be a request + * to change the mapping's wired attribute. + */ + pt = -1; + if ((flags & PVO_WIRED) != 0 && + (pvo->pvo_vaddr & PVO_WIRED) == 0) { + pt = MOEA64_PVO_TO_PTE(mmu, pvo); + pvo->pvo_vaddr |= PVO_WIRED; + pvo->pvo_pte.lpte.pte_hi |= LPTE_WIRED; + pm->pm_stats.wired_count++; + } else if ((flags & PVO_WIRED) == 0 && + (pvo->pvo_vaddr & PVO_WIRED) != 0) { + pt = MOEA64_PVO_TO_PTE(mmu, pvo); + pvo->pvo_vaddr &= ~PVO_WIRED; + pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; + pm->pm_stats.wired_count--; + } if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) { + KASSERT(pt == -1, + ("moea64_pvo_enter: valid pt")); /* Re-insert if spilled */ i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) PVO_PTEGIDX_SET(pvo, i); moea64_pte_overflow--; + } else if (pt != -1) { + /* + * The PTE's wired attribute is not a + * hardware feature, so there is no + * need to invalidate any TLB entries. + */ + MOEA64_PTE_CHANGE(mmu, pt, + &pvo->pvo_pte.lpte, pvo->pvo_vpn); } return (0); } diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 0862b99..a65eff6 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -266,7 +266,6 @@ void pmap_bootstrap_ap(volatile uint32_t *); /* * Kernel MMU interface */ -static void mmu_booke_change_wiring(mmu_t, pmap_t, vm_offset_t, boolean_t); static void mmu_booke_clear_modify(mmu_t, vm_page_t); static void mmu_booke_copy(mmu_t, pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); @@ -306,6 +305,7 @@ static void mmu_booke_release(mmu_t, pmap_t); static void mmu_booke_remove(mmu_t, pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_remove_all(mmu_t, vm_page_t); static void mmu_booke_remove_write(mmu_t, vm_page_t); +static void mmu_booke_unwire(mmu_t, pmap_t, vm_offset_t, vm_offset_t); static void mmu_booke_zero_page(mmu_t, vm_page_t); static void mmu_booke_zero_page_area(mmu_t, vm_page_t, int, int); static void mmu_booke_zero_page_idle(mmu_t, vm_page_t); @@ -330,7 +330,6 @@ static struct pmap_md *mmu_booke_scan_md(mmu_t, struct pmap_md *); static mmu_method_t mmu_booke_methods[] = { /* pmap dispatcher interface */ - MMUMETHOD(mmu_change_wiring, mmu_booke_change_wiring), MMUMETHOD(mmu_clear_modify, mmu_booke_clear_modify), MMUMETHOD(mmu_copy, mmu_booke_copy), MMUMETHOD(mmu_copy_page, mmu_booke_copy_page), @@ -361,6 +360,7 @@ static mmu_method_t mmu_booke_methods[] = { MMUMETHOD(mmu_remove_all, mmu_booke_remove_all), MMUMETHOD(mmu_remove_write, mmu_booke_remove_write), MMUMETHOD(mmu_sync_icache, mmu_booke_sync_icache), + MMUMETHOD(mmu_unwire, mmu_booke_unwire), MMUMETHOD(mmu_zero_page, mmu_booke_zero_page), MMUMETHOD(mmu_zero_page_area, mmu_booke_zero_page_area), MMUMETHOD(mmu_zero_page_idle, mmu_booke_zero_page_idle), @@ -2432,28 +2432,33 @@ mmu_booke_ts_referenced(mmu_t mmu, vm_page_t m) } /* - * Change wiring attribute for a map/virtual-address pair. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range must + * have the wired attribute set. In contrast, invalid mappings cannot have + * the wired attribute set, so they are ignored. + * + * The wired attribute of the page table entry is not a hardware feature, so + * there is no need to invalidate any TLB entries. */ static void -mmu_booke_change_wiring(mmu_t mmu, pmap_t pmap, vm_offset_t va, boolean_t wired) +mmu_booke_unwire(mmu_t mmu, pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + vm_offset_t va; pte_t *pte; PMAP_LOCK(pmap); - if ((pte = pte_find(mmu, pmap, va)) != NULL) { - if (wired) { - if (!PTE_ISWIRED(pte)) { - pte->flags |= PTE_WIRED; - pmap->pm_stats.wired_count++; - } - } else { - if (PTE_ISWIRED(pte)) { - pte->flags &= ~PTE_WIRED; - pmap->pm_stats.wired_count--; - } + for (va = sva; va < eva; va += PAGE_SIZE) { + if ((pte = pte_find(mmu, pmap, va)) != NULL && + PTE_ISVALID(pte)) { + if (!PTE_ISWIRED(pte)) + panic("mmu_booke_unwire: pte %p isn't wired", + pte); + pte->flags &= ~PTE_WIRED; + pmap->pm_stats.wired_count--; } } PMAP_UNLOCK(pmap); + } /* diff --git a/sys/powerpc/powerpc/mmu_if.m b/sys/powerpc/powerpc/mmu_if.m index 65a4046..5c44b71 100644 --- a/sys/powerpc/powerpc/mmu_if.m +++ b/sys/powerpc/powerpc/mmu_if.m @@ -152,22 +152,6 @@ METHOD void advise { /** - * @brief Change the wiring attribute for the page in the given physical - * map and virtual address. - * - * @param _pmap physical map of page - * @param _va page virtual address - * @param _wired TRUE to increment wired count, FALSE to decrement - */ -METHOD void change_wiring { - mmu_t _mmu; - pmap_t _pmap; - vm_offset_t _va; - boolean_t _wired; -}; - - -/** * @brief Clear the 'modified' bit on the given physical page * * @param _pg physical page @@ -630,6 +614,22 @@ METHOD void remove_pages { /** + * @brief Clear the wired attribute from the mappings for the specified range + * of addresses in the given pmap. + * + * @param _pmap physical map + * @param _start virtual range start + * @param _end virtual range end + */ +METHOD void unwire { + mmu_t _mmu; + pmap_t _pmap; + vm_offset_t _start; + vm_offset_t _end; +}; + + +/** * @brief Zero a physical page. It is not assumed that the page is mapped, * so a temporary (or direct) mapping may need to be used. * diff --git a/sys/powerpc/powerpc/pmap_dispatch.c b/sys/powerpc/powerpc/pmap_dispatch.c index 1957692..7f3f913 100644 --- a/sys/powerpc/powerpc/pmap_dispatch.c +++ b/sys/powerpc/powerpc/pmap_dispatch.c @@ -100,14 +100,6 @@ pmap_advise(pmap_t pmap, vm_offset_t start, vm_offset_t end, int advice) } void -pmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) -{ - - CTR4(KTR_PMAP, "%s(%p, %#x, %u)", __func__, pmap, va, wired); - MMU_CHANGE_WIRING(mmu_obj, pmap, va, wired); -} - -void pmap_clear_modify(vm_page_t m) { @@ -361,6 +353,14 @@ pmap_remove_write(vm_page_t m) } void +pmap_unwire(pmap_t pmap, vm_offset_t start, vm_offset_t end) +{ + + CTR4(KTR_PMAP, "%s(%p, %#x, %#x)", __func__, pmap, start, end); + MMU_UNWIRE(mmu_obj, pmap, start, end); +} + +void pmap_zero_page(vm_page_t m) { diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 54e0b22..9073760 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -141,6 +141,8 @@ static void pmap_bootstrap_set_tte(struct tte *tp, u_long vpn, u_long data); static void pmap_cache_remove(vm_page_t m, vm_offset_t va); static int pmap_protect_tte(struct pmap *pm1, struct pmap *pm2, struct tte *tp, vm_offset_t va); +static int pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, + vm_offset_t va); /* * Map the given physical page at the specified virtual address in the @@ -1668,27 +1670,40 @@ pmap_object_init_pt(pmap_t pm, vm_offset_t addr, vm_object_t object, ("pmap_object_init_pt: non-device object")); } +static int +pmap_unwire_tte(pmap_t pm, pmap_t pm2, struct tte *tp, vm_offset_t va) +{ + + PMAP_LOCK_ASSERT(pm, MA_OWNED); + if ((tp->tte_data & TD_WIRED) == 0) + panic("pmap_unwire_tte: tp %p is missing TD_WIRED", tp); + atomic_clear_long(&tp->tte_data, TD_WIRED); + pm->pm_stats.wired_count--; + return (1); +} + /* - * Change the wiring attribute for a map/virtual-address pair. - * The mapping must already exist in the pmap. + * Clear the wired attribute from the mappings for the specified range of + * addresses in the given pmap. Every valid mapping within that range must + * have the wired attribute set. In contrast, invalid mappings cannot have + * the wired attribute set, so they are ignored. + * + * The wired attribute of the translation table entry is not a hardware + * feature, so there is no need to invalidate any TLB entries. */ void -pmap_change_wiring(pmap_t pm, vm_offset_t va, boolean_t wired) +pmap_unwire(pmap_t pm, vm_offset_t sva, vm_offset_t eva) { + vm_offset_t va; struct tte *tp; - u_long data; PMAP_LOCK(pm); - if ((tp = tsb_tte_lookup(pm, va)) != NULL) { - if (wired) { - data = atomic_set_long(&tp->tte_data, TD_WIRED); - if ((data & TD_WIRED) == 0) - pm->pm_stats.wired_count++; - } else { - data = atomic_clear_long(&tp->tte_data, TD_WIRED); - if ((data & TD_WIRED) != 0) - pm->pm_stats.wired_count--; - } + if (eva - sva > PMAP_TSB_THRESH) + tsb_foreach(pm, NULL, sva, eva, pmap_unwire_tte); + else { + for (va = sva; va < eva; va += PAGE_SIZE) + if ((tp = tsb_tte_lookup(pm, va)) != NULL) + pmap_unwire_tte(pm, NULL, tp, va); } PMAP_UNLOCK(pm); } diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 2e7b19d..d73babc 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -109,7 +109,6 @@ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice); void pmap_align_superpage(vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t); -void pmap_change_wiring(pmap_t, vm_offset_t, boolean_t); void pmap_clear_modify(vm_page_t m); void pmap_copy(pmap_t, pmap_t, vm_offset_t, vm_size_t, vm_offset_t); void pmap_copy_page(vm_page_t, vm_page_t); @@ -149,6 +148,7 @@ void pmap_remove_pages(pmap_t); void pmap_remove_write(vm_page_t m); void pmap_sync_icache(pmap_t, vm_offset_t, vm_size_t); boolean_t pmap_ts_referenced(vm_page_t m); +void pmap_unwire(pmap_t pmap, vm_offset_t start, vm_offset_t end); void pmap_zero_page(vm_page_t); void pmap_zero_page_area(vm_page_t, int off, int size); void pmap_zero_page_idle(vm_page_t); diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 39a1a21..f639a22 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -81,8 +81,6 @@ int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); -void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); -int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 09f7423..a3a4c36 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -1203,68 +1203,6 @@ error: } /* - * vm_fault_wire: - * - * Wire down a range of virtual addresses in a map. - */ -int -vm_fault_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t fictitious) -{ - vm_offset_t va; - int rv; - - /* - * We simulate a fault to get the page and enter it in the physical - * map. For user wiring, we only ask for read access on currently - * read-only sections. - */ - for (va = start; va < end; va += PAGE_SIZE) { - rv = vm_fault(map, va, VM_PROT_NONE, VM_FAULT_CHANGE_WIRING); - if (rv) { - if (va != start) - vm_fault_unwire(map, start, va, fictitious); - return (rv); - } - } - return (KERN_SUCCESS); -} - -/* - * vm_fault_unwire: - * - * Unwire a range of virtual addresses in a map. - */ -void -vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t fictitious) -{ - vm_paddr_t pa; - vm_offset_t va; - vm_page_t m; - pmap_t pmap; - - pmap = vm_map_pmap(map); - - /* - * Since the pages are wired down, we must be able to get their - * mappings from the physical map system. - */ - for (va = start; va < end; va += PAGE_SIZE) { - pa = pmap_extract(pmap, va); - if (pa != 0) { - pmap_change_wiring(pmap, va, FALSE); - if (!fictitious) { - m = PHYS_TO_VM_PAGE(pa); - vm_page_lock(m); - vm_page_unwire(m, TRUE); - vm_page_unlock(m); - } - } - } -} - -/* * Routine: * vm_fault_copy_entry * Function: diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 891b68c..15611bf 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -132,6 +132,7 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max); static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); +static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry); #ifdef INVARIANTS static void vm_map_zdtor(void *mem, int size, void *arg); static void vmspace_zdtor(void *mem, int size, void *arg); @@ -139,6 +140,8 @@ static void vmspace_zdtor(void *mem, int size, void *arg); static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow); +static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, + vm_offset_t failed_addr); #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ @@ -2407,16 +2410,10 @@ done: (entry->eflags & MAP_ENTRY_USER_WIRED))) { if (user_unwire) entry->eflags &= ~MAP_ENTRY_USER_WIRED; - entry->wired_count--; - if (entry->wired_count == 0) { - /* - * Retain the map lock. - */ - vm_fault_unwire(map, entry->start, entry->end, - entry->object.vm_object != NULL && - (entry->object.vm_object->flags & - OBJ_FICTITIOUS) != 0); - } + if (entry->wired_count == 1) + vm_map_entry_unwire(map, entry); + else + entry->wired_count--; } KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, ("vm_map_unwire: in-transition flag missing %p", entry)); @@ -2437,6 +2434,42 @@ done: } /* + * vm_map_wire_entry_failure: + * + * Handle a wiring failure on the given entry. + * + * The map should be locked. + */ +static void +vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, + vm_offset_t failed_addr) +{ + + VM_MAP_ASSERT_LOCKED(map); + KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && + entry->wired_count == 1, + ("vm_map_wire_entry_failure: entry %p isn't being wired", entry)); + KASSERT(failed_addr < entry->end, + ("vm_map_wire_entry_failure: entry %p was fully wired", entry)); + + /* + * If any pages at the start of this entry were successfully wired, + * then unwire them. + */ + if (failed_addr > entry->start) { + pmap_unwire(map->pmap, entry->start, failed_addr); + vm_object_unwire(entry->object.vm_object, entry->offset, + failed_addr - entry->start, PQ_ACTIVE); + } + + /* + * Assign an out-of-range value to represent the failure to wire this + * entry. + */ + entry->wired_count = -1; +} + +/* * vm_map_wire: * * Implements both kernel and user wiring. @@ -2446,10 +2479,10 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags) { vm_map_entry_t entry, first_entry, tmp_entry; - vm_offset_t saved_end, saved_start; + vm_offset_t faddr, saved_end, saved_start; unsigned int last_timestamp; int rv; - boolean_t fictitious, need_wakeup, result, user_wire; + boolean_t need_wakeup, result, user_wire; vm_prot_t prot; if (start == end) @@ -2542,17 +2575,24 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, entry->wired_count++; saved_start = entry->start; saved_end = entry->end; - fictitious = entry->object.vm_object != NULL && - (entry->object.vm_object->flags & - OBJ_FICTITIOUS) != 0; + /* * Release the map lock, relying on the in-transition * mark. Mark the map busy for fork. */ vm_map_busy(map); vm_map_unlock(map); - rv = vm_fault_wire(map, saved_start, saved_end, - fictitious); + + faddr = saved_start; + do { + /* + * Simulate a fault to get the page and enter + * it into the physical map. + */ + if ((rv = vm_fault(map, faddr, VM_PROT_NONE, + VM_FAULT_CHANGE_WIRING)) != KERN_SUCCESS) + break; + } while ((faddr += PAGE_SIZE) < saved_end); vm_map_lock(map); vm_map_unbusy(map); if (last_timestamp + 1 != map->timestamp) { @@ -2571,23 +2611,22 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, first_entry = NULL; entry = tmp_entry; while (entry->end < saved_end) { - if (rv != KERN_SUCCESS) { - KASSERT(entry->wired_count == 1, - ("vm_map_wire: bad count")); - entry->wired_count = -1; - } + /* + * In case of failure, handle entries + * that were not fully wired here; + * fully wired entries are handled + * later. + */ + if (rv != KERN_SUCCESS && + faddr < entry->end) + vm_map_wire_entry_failure(map, + entry, faddr); entry = entry->next; } } last_timestamp = map->timestamp; if (rv != KERN_SUCCESS) { - KASSERT(entry->wired_count == 1, - ("vm_map_wire: bad count")); - /* - * Assign an out-of-range value to represent - * the failure to wire this entry. - */ - entry->wired_count = -1; + vm_map_wire_entry_failure(map, entry, faddr); end = entry->end; goto done; } @@ -2649,19 +2688,16 @@ done: * unnecessary. */ entry->wired_count = 0; - } else { - if (!user_wire || - (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) + } else if (!user_wire || + (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { + /* + * Undo the wiring. Wiring succeeded on this entry + * but failed on a later entry. + */ + if (entry->wired_count == 1) + vm_map_entry_unwire(map, entry); + else entry->wired_count--; - if (entry->wired_count == 0) { - /* - * Retain the map lock. - */ - vm_fault_unwire(map, entry->start, entry->end, - entry->object.vm_object != NULL && - (entry->object.vm_object->flags & - OBJ_FICTITIOUS) != 0); - } } next_entry_done: KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, @@ -2797,9 +2833,13 @@ vm_map_sync( static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) { - vm_fault_unwire(map, entry->start, entry->end, - entry->object.vm_object != NULL && - (entry->object.vm_object->flags & OBJ_FICTITIOUS) != 0); + + VM_MAP_ASSERT_LOCKED(map); + KASSERT(entry->wired_count > 0, + ("vm_map_entry_unwire: entry %p isn't wired", entry)); + pmap_unwire(map->pmap, entry->start, entry->end); + vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - + entry->start, PQ_ACTIVE); entry->wired_count = 0; } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 6cfb0d4..94c3d30 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2203,6 +2203,78 @@ vm_object_set_writeable_dirty(vm_object_t object) vm_object_set_flag(object, OBJ_MIGHTBEDIRTY); } +/* + * vm_object_unwire: + * + * For each page offset within the specified range of the given object, + * find the highest-level page in the shadow chain and unwire it. A page + * must exist at every page offset, and the highest-level page must be + * wired. + */ +void +vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, + uint8_t queue) +{ + vm_object_t tobject; + vm_page_t m, tm; + vm_pindex_t end_pindex, pindex, tpindex; + int depth, locked_depth; + + KASSERT((offset & PAGE_MASK) == 0, + ("vm_object_unwire: offset is not page aligned")); + KASSERT((length & PAGE_MASK) == 0, + ("vm_object_unwire: length is not a multiple of PAGE_SIZE")); + /* The wired count of a fictitious page never changes. */ + if ((object->flags & OBJ_FICTITIOUS) != 0) + return; + pindex = OFF_TO_IDX(offset); + end_pindex = pindex + atop(length); + locked_depth = 1; + VM_OBJECT_RLOCK(object); + m = vm_page_find_least(object, pindex); + while (pindex < end_pindex) { + if (m == NULL || pindex < m->pindex) { + /* + * The first object in the shadow chain doesn't + * contain a page at the current index. Therefore, + * the page must exist in a backing object. + */ + tobject = object; + tpindex = pindex; + depth = 0; + do { + tpindex += + OFF_TO_IDX(tobject->backing_object_offset); + tobject = tobject->backing_object; + KASSERT(tobject != NULL, + ("vm_object_unwire: missing page")); + if ((tobject->flags & OBJ_FICTITIOUS) != 0) + goto next_page; + depth++; + if (depth == locked_depth) { + locked_depth++; + VM_OBJECT_RLOCK(tobject); + } + } while ((tm = vm_page_lookup(tobject, tpindex)) == + NULL); + } else { + tm = m; + m = TAILQ_NEXT(m, listq); + } + vm_page_lock(tm); + vm_page_unwire(tm, queue); + vm_page_unlock(tm); +next_page: + pindex++; + } + /* Release the accumulated object locks. */ + for (depth = 0; depth < locked_depth; depth++) { + tobject = object->backing_object; + VM_OBJECT_RUNLOCK(object); + object = tobject; + } +} + #include "opt_ddb.h" #ifdef DDB #include diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 4b21e55..48ba743 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -295,6 +295,8 @@ void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t); void vm_object_split(vm_map_entry_t); boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t, boolean_t); +void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, + vm_size_t length, uint8_t queue); #endif /* _KERNEL */ #endif /* _VM_OBJECT_ */ -- cgit v1.1