diff options
Diffstat (limited to 'sys')
44 files changed, 579 insertions, 133 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index d07292d..045b634 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -793,7 +793,6 @@ static u_long pmap_pdpe_demotions; SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); - /*************************************************** * Low level helper routines..... ***************************************************/ @@ -1200,15 +1199,20 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde, *pdep; pt_entry_t pte; + vm_paddr_t pa; vm_page_t m; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1217,12 +1221,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -3143,9 +3149,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, * In the case that a page table page is not * resident, we are creating it here. */ - if (va < VM_MAXUSER_ADDRESS) { + if (va < VM_MAXUSER_ADDRESS) mpte = pmap_allocpte(pmap, va, M_WAITOK); - } pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0) { @@ -3393,7 +3398,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, mpte); m = TAILQ_NEXT(m, listq); } - PMAP_UNLOCK(pmap); + PMAP_UNLOCK(pmap); } /* diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 11a5628..6f0b188 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -245,6 +245,8 @@ struct pmap { pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ u_int pm_active; /* active on cpus */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 0b5004c..6dbe371 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -145,6 +145,10 @@ #define VM_LEVEL_0_ORDER 9 #endif +#ifdef SMP +#define PA_LOCK_COUNT 256 +#endif + /* * Virtual addresses of things. Derived from the page directory and * page table indexes from pmap.h for precision. diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 366b43f..8fe6424 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -3740,13 +3740,14 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct l2_dtable *l2; pd_entry_t l1pd; pt_entry_t *ptep, pte; - vm_paddr_t pa; + vm_paddr_t pa, paddr; vm_page_t m = NULL; u_int l1idx; l1idx = L1_IDX(va); + paddr = 0; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: l1pd = pmap->pm_l1->l1_kva[l1idx]; if (l1pte_section_p(l1pd)) { /* @@ -3758,6 +3759,8 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (l1pd & L1_SUP_FRAME) | (va & L1_SUP_OFFSET); else pa = (l1pd & L1_S_FRAME) | (va & L1_S_OFFSET); + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; if (l1pd & L1_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); @@ -3774,7 +3777,6 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) if (l2 == NULL || (ptep = l2->l2_bucket[L2_BUCKET(l1idx)].l2b_kva) == NULL) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } @@ -3783,7 +3785,6 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) if (pte == 0) { PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); return (NULL); } if (pte & L2_S_PROT_W || (prot & VM_PROT_WRITE) == 0) { @@ -3796,13 +3797,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pa = (pte & L2_S_FRAME) | (va & L2_S_OFFSET); break; } + if (vm_page_pa_tryrelock(pmap, pa & PG_FRAME, &paddr)) + goto retry; m = PHYS_TO_VM_PAGE(pa); vm_page_hold(m); } } PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); + PA_UNLOCK_COND(paddr); return (m); } diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h index 8ee7bac..287c4c1 100644 --- a/sys/arm/include/pmap.h +++ b/sys/arm/include/pmap.h @@ -134,6 +134,8 @@ struct pmap { struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; pd_entry_t *pm_pdir; /* KVA of page directory */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ diff --git a/sys/dev/drm/via_dmablit.c b/sys/dev/drm/via_dmablit.c index ea449f1..3a66b26 100644 --- a/sys/dev/drm/via_dmablit.c +++ b/sys/dev/drm/via_dmablit.c @@ -248,10 +248,12 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) (vm_offset_t)xfer->mem_addr + IDX_TO_OFF(i), VM_PROT_RW); if (m == NULL) break; + vm_page_lock(m); vm_page_lock_queues(); vm_page_wire(m); vm_page_unhold(m); vm_page_unlock_queues(); + vm_page_unlock(m); vsg->pages[i] = m; } vsg->state = dr_via_pages_locked; diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index d8b9686..661fade 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1346,14 +1346,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = *pmap_pde(pmap, va); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1363,13 +1368,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pte = *pmap_pte_quick(pmap, va); if (pte != 0 && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index ae7d79d..f45a9df 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -420,11 +420,14 @@ struct pmap { u_int pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; #ifdef PAE pdpt_entry_t *pm_pdpt; /* KVA of page director pointer table */ #endif vm_page_t pm_root; /* spare page table pages */ + }; typedef struct pmap *pmap_t; diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index 18619c3..ec96ef0 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -1219,14 +1219,19 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) pd_entry_t pde; pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); +retry: pde = PT_GET(pmap_pde(pmap, va)); if (pde != 0) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | + (va & PDRMASK), &pa)) + goto retry; m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); vm_page_hold(m); @@ -1238,13 +1243,15 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) PT_SET_MA(PADDR1, 0); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pte & PG_FRAME); vm_page_hold(m); } sched_unpin(); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 91b2e07..7cc18c1 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -1028,18 +1028,22 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) struct ia64_lpte *pte; pmap_t oldpmap; vm_page_t m; + vm_paddr_t pa; + pa = 0; m = NULL; - vm_page_lock_queues(); PMAP_LOCK(pmap); oldpmap = pmap_switch(pmap); +retry: pte = pmap_find_vhpt(va); if (pte != NULL && pmap_present(pte) && (pmap_prot(pte) & prot) == prot) { m = PHYS_TO_VM_PAGE(pmap_ppn(pte)); + if (vm_page_pa_tryrelock(pmap, pmap_ppn(pte), &pa)) + goto retry; vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); pmap_switch(oldpmap); PMAP_UNLOCK(pmap); return (m); diff --git a/sys/ia64/include/pmap.h b/sys/ia64/include/pmap.h index 44079c8..ff059fd 100644 --- a/sys/ia64/include/pmap.h +++ b/sys/ia64/include/pmap.h @@ -77,6 +77,8 @@ struct pmap { TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ u_int32_t pm_rid[5]; /* base RID for pmap */ struct pmap_statistics pm_stats; /* pmap statistics */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; }; typedef struct pmap *pmap_t; diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index ed22519..bb92972 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -957,9 +957,9 @@ exec_map_first_page(imgp) return (EIO); } } - vm_page_lock_queues(); + vm_page_lock(ma[0]); vm_page_hold(ma[0]); - vm_page_unlock_queues(); + vm_page_unlock(ma[0]); vm_page_wakeup(ma[0]); VM_OBJECT_UNLOCK(object); @@ -979,9 +979,9 @@ exec_unmap_first_page(imgp) m = sf_buf_page(imgp->firstpage); sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } } diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 5b7d565..ef1bc39 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -597,6 +597,15 @@ static struct witness_order_list_entry order_lists[] = { { "cdev", &lock_class_mtx_sleep }, { NULL, NULL }, /* + * VM + * + */ + { "vm object", &lock_class_mtx_sleep }, + { "page lock", &lock_class_mtx_sleep }, + { "vm page queue mutex", &lock_class_mtx_sleep }, + { "pmap", &lock_class_mtx_sleep }, + { NULL, NULL }, + /* * kqueue/VFS interaction */ { "kqueue", &lock_class_mtx_sleep }, diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index e098648..7130c26 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -773,10 +773,12 @@ pipe_build_write_buffer(wpipe, uio) */ race: if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { - vm_page_lock_queues(); - for (j = 0; j < i; j++) + + for (j = 0; j < i; j++) { + vm_page_lock(wpipe->pipe_map.ms[j]); vm_page_unhold(wpipe->pipe_map.ms[j]); - vm_page_unlock_queues(); + vm_page_unlock(wpipe->pipe_map.ms[j]); + } return (EFAULT); } wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, @@ -816,11 +818,11 @@ pipe_destroy_write_buffer(wpipe) int i; PIPE_LOCK_ASSERT(wpipe, MA_OWNED); - vm_page_lock_queues(); for (i = 0; i < wpipe->pipe_map.npages; i++) { + vm_page_lock(wpipe->pipe_map.ms[i]); vm_page_unhold(wpipe->pipe_map.ms[i]); + vm_page_unlock(wpipe->pipe_map.ms[i]); } - vm_page_unlock_queues(); wpipe->pipe_map.npages = 0; } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index f5671d9..d8cc4f0 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -328,9 +328,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Hold the page in memory. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); /* * We're done with tmap now. @@ -349,9 +349,9 @@ proc_rwmem(struct proc *p, struct uio *uio) /* * Release the page. */ - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); diff --git a/sys/kern/uipc_cow.c b/sys/kern/uipc_cow.c index 52988dd..5c0dcd2 100644 --- a/sys/kern/uipc_cow.c +++ b/sys/kern/uipc_cow.c @@ -128,10 +128,12 @@ socow_setup(struct mbuf *m0, struct uio *uio) /* * set up COW */ + vm_page_lock(pp); vm_page_lock_queues(); if (vm_page_cowsetup(pp) != 0) { vm_page_unhold(pp); vm_page_unlock_queues(); + vm_page_unlock(pp); return (0); } @@ -141,7 +143,7 @@ socow_setup(struct mbuf *m0, struct uio *uio) vm_page_wire(pp); vm_page_unhold(pp); vm_page_unlock_queues(); - + vm_page_unlock(pp); /* * Allocate an sf buf */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 156b676..ea846a5 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3860,12 +3860,12 @@ vmapbuf(struct buf *bp) retry: if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data, prot) < 0) { - vm_page_lock_queues(); for (i = 0; i < pidx; ++i) { + vm_page_lock(bp->b_pages[i]); vm_page_unhold(bp->b_pages[i]); + vm_page_unlock(bp->b_pages[i]); bp->b_pages[i] = NULL; } - vm_page_unlock_queues(); return(-1); } m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot); @@ -3896,11 +3896,12 @@ vunmapbuf(struct buf *bp) npages = bp->b_npages; pmap_qremove(trunc_page((vm_offset_t)bp->b_data), npages); - vm_page_lock_queues(); - for (pidx = 0; pidx < npages; pidx++) + for (pidx = 0; pidx < npages; pidx++) { + vm_page_lock(bp->b_pages[pidx]); vm_page_unhold(bp->b_pages[pidx]); - vm_page_unlock_queues(); - + vm_page_unlock(bp->b_pages[pidx]); + } + bp->b_data = bp->b_saveaddr; } diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index 80772d9..f4df6ca 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -88,6 +88,8 @@ struct pmap { pd_entry_t *pm_segtab; /* KVA of segment table */ TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in * pmap */ + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; int pm_active; /* active on cpus */ struct { u_int32_t asid:ASID_BITS; /* TLB address space tag */ diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index e7c3239..826177f 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -147,7 +147,6 @@ unsigned pmap_max_asid; /* max ASID supported by the system */ #define PMAP_ASID_RESERVED 0 - vm_offset_t kernel_vm_end; static struct tlb tlbstash[MAXCPU][MIPS_MAX_TLB_ENTRIES]; @@ -710,18 +709,22 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t pte; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); - +retry: pte = *pmap_pte(pmap, va); if (pte != 0 && pmap_pte_v(&pte) && ((pte & PTE_RW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, mips_tlbpfn_to_paddr(pte), &pa)) + goto retry; + m = PHYS_TO_VM_PAGE(mips_tlbpfn_to_paddr(pte)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/net/bpf_zerocopy.c b/sys/net/bpf_zerocopy.c index a1dd923..3cf4b8d 100644 --- a/sys/net/bpf_zerocopy.c +++ b/sys/net/bpf_zerocopy.c @@ -168,10 +168,12 @@ zbuf_sfbuf_get(struct vm_map *map, vm_offset_t uaddr) VM_PROT_WRITE); if (pp == NULL) return (NULL); + vm_page_lock(pp); vm_page_lock_queues(); vm_page_wire(pp); vm_page_unhold(pp); vm_page_unlock_queues(); + vm_page_unlock(pp); sf = sf_buf_alloc(pp, SFB_NOWAIT); if (sf == NULL) { zbuf_page_free(pp); diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 24e7b4e..95936ed 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -1241,18 +1241,22 @@ moea_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.pte.pte_hi & PTE_VALID) && ((pvo->pvo_pte.pte.pte_lo & PTE_PP) == PTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, pvo->pvo_pte.pte.pte_lo & PTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.pte.pte_lo & PTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 2571587..0483b41 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -1374,18 +1374,23 @@ moea64_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_prot_t prot) { struct pvo_entry *pvo; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: pvo = moea64_pvo_find_va(pmap, va & ~ADDR_POFF, NULL); if (pvo != NULL && (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) == LPTE_RW || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, + pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, &pa)) + goto retry; m = PHYS_TO_VM_PAGE(pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 549eaaa..0b0fce4 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -2034,11 +2034,12 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_t *pte; vm_page_t m; uint32_t pte_wbit; - + vm_paddr_t pa; + m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); - +retry: pte = pte_find(mmu, pmap, va); if ((pte != NULL) && PTE_ISVALID(pte)) { if (pmap == kernel_pmap) @@ -2047,12 +2048,14 @@ mmu_booke_extract_and_hold(mmu_t mmu, pmap_t pmap, vm_offset_t va, pte_wbit = PTE_UW; if ((pte->flags & pte_wbit) || ((prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, PTE_PA(pte), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(PTE_PA(pte)); vm_page_hold(m); } } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index a23052e..a49bd6c 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -88,6 +88,8 @@ struct pmap { struct mtx pm_mtx; u_int pm_sr[16]; u_int pm_active; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; u_int pm_context; struct pmap *pmap_phys; diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index 83c8190..388f751 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -62,6 +62,8 @@ struct pmap { struct tte *pm_tsb; vm_object_t pm_tsb_obj; u_int pm_active; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 22ae860..5d69c13 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -694,13 +694,17 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) { struct tte *tp; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; + PMAP_LOCK(pm); +retry: if (pm == kernel_pmap) { if (va >= VM_MIN_DIRECT_ADDRESS) { tp = NULL; m = PHYS_TO_VM_PAGE(TLB_DIRECT_TO_PHYS(va)); + (void)vm_page_pa_tryrelock(pm, TLB_DIRECT_TO_PHYS(va), &pa); vm_page_hold(m); } else { tp = tsb_kvtotte(va); @@ -708,17 +712,17 @@ pmap_extract_and_hold(pmap_t pm, vm_offset_t va, vm_prot_t prot) tp = NULL; } } else { - PMAP_LOCK(pm); tp = tsb_tte_lookup(pm, va); } if (tp != NULL && ((tp->tte_data & TD_SW) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pm, TTE_GET_PA(tp), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tp)); vm_page_hold(m); } - vm_page_unlock_queues(); - if (pm != kernel_pmap) - PMAP_UNLOCK(pm); + PA_UNLOCK_COND(pa); + PMAP_UNLOCK(pm); return (m); } diff --git a/sys/sun4v/include/pmap.h b/sys/sun4v/include/pmap.h index 90ae4c4..6acebb4 100644 --- a/sys/sun4v/include/pmap.h +++ b/sys/sun4v/include/pmap.h @@ -75,6 +75,8 @@ struct pmap { struct tte_hash *pm_hash; TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ struct hv_tsb_info pm_tsb; + uint32_t pm_gen_count; /* generation count (pmap lock dropped) */ + u_int pm_retries; pmap_cpumask_t pm_active; /* mask of cpus currently using pmap */ pmap_cpumask_t pm_tlbactive; /* mask of cpus that have used this pmap */ struct pmap_statistics pm_stats; diff --git a/sys/sun4v/sun4v/pmap.c b/sys/sun4v/sun4v/pmap.c index cbd8106..0c84421 100644 --- a/sys/sun4v/sun4v/pmap.c +++ b/sys/sun4v/sun4v/pmap.c @@ -1275,17 +1275,21 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { tte_t tte_data; vm_page_t m; + vm_paddr_t pa; m = NULL; - vm_page_lock_queues(); + pa = 0; PMAP_LOCK(pmap); +retry: tte_data = tte_hash_lookup(pmap->pm_hash, va); if (tte_data != 0 && ((tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { + if (vm_page_pa_tryrelock(pmap, TTE_GET_PA(tte_data), &pa)) + goto retry; m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); vm_page_hold(m); } - vm_page_unlock_queues(); + PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); diff --git a/sys/sys/param.h b/sys/sys/param.h index 9f87ee2..11c15ab 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 900010 /* Master, propagated to newvers */ +#define __FreeBSD_version 900011 /* Master, propagated to newvers */ #ifndef LOCORE #include <sys/types.h> diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index 9002e77..996f740 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -251,12 +251,16 @@ dev_pager_getpages(object, m, count, reqpage) VM_OBJECT_LOCK(object); dev_pager_updatefake(page, paddr, memattr); if (count > 1) { - vm_page_lock_queues(); + for (i = 0; i < count; i++) { - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } } - vm_page_unlock_queues(); } } else { /* @@ -266,10 +270,13 @@ dev_pager_getpages(object, m, count, reqpage) page = dev_pager_getfake(paddr, memattr); VM_OBJECT_LOCK(object); TAILQ_INSERT_TAIL(&object->un_pager.devp.devp_pglist, page, pageq); - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; } diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c index a17fe82..845401a 100644 --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -198,10 +198,13 @@ sg_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) TAILQ_INSERT_TAIL(&object->un_pager.sgp.sgp_pglist, page, pageq); /* Free the original pages and insert this fake page into the object. */ - vm_page_lock_queues(); - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } vm_page_insert(page, object, offset); m[reqpage] = page; page->valid = VM_PAGE_BITS_ALL; diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index f47719b..6017a52 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1137,12 +1137,21 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage) if (0 < i || j < count) { int k; - vm_page_lock_queues(); - for (k = 0; k < i; ++k) + + for (k = 0; k < i; ++k) { + vm_page_lock(m[k]); + vm_page_lock_queues(); swp_pager_free_nrpage(m[k]); - for (k = j; k < count; ++k) + vm_page_unlock_queues(); + vm_page_unlock(m[k]); + } + for (k = j; k < count; ++k) { + vm_page_lock(m[k]); + vm_page_lock_queues(); swp_pager_free_nrpage(m[k]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[k]); + } } /* @@ -1497,7 +1506,7 @@ swp_pager_async_iodone(struct buf *bp) object = bp->b_pages[0]->object; VM_OBJECT_LOCK(object); } - vm_page_lock_queues(); + /* * cleanup pages. If an error occurs writing to swap, we are in * very serious trouble. If it happens to be a disk error, though, @@ -1509,6 +1518,8 @@ swp_pager_async_iodone(struct buf *bp) for (i = 0; i < bp->b_npages; ++i) { vm_page_t m = bp->b_pages[i]; + vm_page_lock(m); + vm_page_lock_queues(); m->oflags &= ~VPO_SWAPINPROG; if (bp->b_ioflags & BIO_ERROR) { @@ -1605,8 +1616,9 @@ swp_pager_async_iodone(struct buf *bp) if (vm_page_count_severe()) vm_page_try_to_cache(m); } + vm_page_unlock_queues(); + vm_page_unlock(m); } - vm_page_unlock_queues(); /* * adjust pip. NOTE: the original parent may still have its own @@ -1702,10 +1714,12 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); if (m->valid == VM_PAGE_BITS_ALL) { vm_object_pip_subtract(object, 1); + vm_page_lock(m); vm_page_lock_queues(); vm_page_activate(m); vm_page_dirty(m); vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); return; @@ -1714,10 +1728,12 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex) if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK) panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ vm_object_pip_subtract(object, 1); + vm_page_lock(m); vm_page_lock_queues(); vm_page_dirty(m); vm_page_dontneed(m); vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); vm_pager_page_unswapped(m); } diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c index a2d5633..00a8276 100644 --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -1022,10 +1022,12 @@ obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) while (pages != startpages) { pages--; p = TAILQ_LAST(&object->memq, pglist); + vm_page_lock(p); vm_page_lock_queues(); vm_page_unwire(p, 0); vm_page_free(p); vm_page_unlock_queues(); + vm_page_unlock(p); } retkva = 0; goto done; diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 78d7e28..7220055 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -257,9 +257,11 @@ retry: i -= PAGE_SIZE; m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); + vm_page_lock(m); vm_page_lock_queues(); vm_page_free(m); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(object); vm_map_delete(map, addr, addr + size); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 058cbb0..de74915 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -137,9 +137,11 @@ release_page(struct faultstate *fs) { vm_page_wakeup(fs->m); + vm_page_lock(fs->m); vm_page_lock_queues(); vm_page_deactivate(fs->m); vm_page_unlock_queues(); + vm_page_unlock(fs->m); fs->m = NULL; } @@ -161,9 +163,11 @@ unlock_and_deallocate(struct faultstate *fs) VM_OBJECT_UNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_LOCK(fs->first_object); + vm_page_lock(fs->first_m); vm_page_lock_queues(); vm_page_free(fs->first_m); vm_page_unlock_queues(); + vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_UNLOCK(fs->first_object); fs->first_m = NULL; @@ -305,12 +309,14 @@ RetryFault:; * removes the page from the backing object, * which is not what we want. */ + vm_page_lock(fs.m); vm_page_lock_queues(); if ((fs.m->cow) && (fault_type & VM_PROT_WRITE) && (fs.object == fs.first_object)) { vm_page_cowfault(fs.m); vm_page_unlock_queues(); + vm_page_unlock(fs.m); unlock_and_deallocate(&fs); goto RetryFault; } @@ -333,12 +339,15 @@ RetryFault:; */ if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) { vm_page_unlock_queues(); + vm_page_unlock(fs.m); VM_OBJECT_UNLOCK(fs.object); if (fs.object != fs.first_object) { VM_OBJECT_LOCK(fs.first_object); + vm_page_lock(fs.first_m); vm_page_lock_queues(); vm_page_free(fs.first_m); vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_UNLOCK(fs.first_object); fs.first_m = NULL; @@ -358,6 +367,7 @@ RetryFault:; } vm_pageq_remove(fs.m); vm_page_unlock_queues(); + vm_page_unlock(fs.m); /* * Mark page busy for other processes, and the @@ -481,17 +491,25 @@ readrest: continue; if (!are_queues_locked) { are_queues_locked = TRUE; + vm_page_lock(mt); + vm_page_lock_queues(); + } else { + vm_page_unlock_queues(); + vm_page_lock(mt); vm_page_lock_queues(); } if (mt->hold_count || - mt->wire_count) + mt->wire_count) { + vm_page_unlock(mt); continue; + } pmap_remove_all(mt); if (mt->dirty) { vm_page_deactivate(mt); } else { vm_page_cache(mt); } + vm_page_unlock(mt); } if (are_queues_locked) vm_page_unlock_queues(); @@ -623,17 +641,21 @@ vnode_locked: */ if (((fs.map != kernel_map) && (rv == VM_PAGER_ERROR)) || (rv == VM_PAGER_BAD)) { + vm_page_lock(fs.m); vm_page_lock_queues(); vm_page_free(fs.m); vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return ((rv == VM_PAGER_ERROR) ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } if (fs.object != fs.first_object) { + vm_page_lock(fs.m); vm_page_lock_queues(); vm_page_free(fs.m); vm_page_unlock_queues(); + vm_page_unlock(fs.m); fs.m = NULL; /* * XXX - we cannot just fall out at this @@ -746,18 +768,24 @@ vnode_locked: * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { + vm_page_lock(fs.first_m); vm_page_lock_queues(); /* * get rid of the unnecessary page */ vm_page_free(fs.first_m); + vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); /* * grab the page and put it into the * process'es object. The page is * automatically made dirty. */ + vm_page_lock(fs.m); + vm_page_lock_queues(); vm_page_rename(fs.m, fs.first_object, fs.first_pindex); vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_busy(fs.m); fs.first_m = fs.m; fs.m = NULL; @@ -770,10 +798,17 @@ vnode_locked: fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { + vm_page_lock(fs.first_m); vm_page_lock_queues(); vm_page_wire(fs.first_m); + vm_page_unlock_queues(); + vm_page_unlock(fs.first_m); + + vm_page_lock(fs.m); + vm_page_lock_queues(); vm_page_unwire(fs.m, FALSE); vm_page_unlock_queues(); + vm_page_unlock(fs.m); } /* * We no longer need the old page or object. @@ -923,6 +958,7 @@ vnode_locked: if ((fault_flags & VM_FAULT_CHANGE_WIRING) == 0 && wired == 0) vm_fault_prefault(fs.map->pmap, vaddr, fs.entry); VM_OBJECT_LOCK(fs.object); + vm_page_lock(fs.m); vm_page_lock_queues(); /* @@ -938,6 +974,7 @@ vnode_locked: vm_page_activate(fs.m); } vm_page_unlock_queues(); + vm_page_unlock(fs.m); vm_page_wakeup(fs.m); /* @@ -1014,9 +1051,11 @@ vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry) } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) { + vm_page_lock(m); vm_page_lock_queues(); pmap_enter_quick(pmap, addr, m, entry->protection); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(lobject); } @@ -1092,9 +1131,11 @@ vm_fault_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, if (pa != 0) { pmap_change_wiring(pmap, va, FALSE); if (!fictitious) { + vm_page_lock(PHYS_TO_VM_PAGE(pa)); vm_page_lock_queues(); vm_page_unwire(PHYS_TO_VM_PAGE(pa), 1); vm_page_unlock_queues(); + vm_page_unlock(PHYS_TO_VM_PAGE(pa)); } } } @@ -1237,13 +1278,26 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_LOCK(dst_object); - vm_page_lock_queues(); + if (upgrade) { + vm_page_lock(src_m); + vm_page_lock_queues(); vm_page_unwire(src_m, 0); + vm_page_unlock_queues(); + vm_page_lock(src_m); + + vm_page_lock(dst_m); + vm_page_lock_queues(); vm_page_wire(dst_m); - } else + vm_page_unlock_queues(); + vm_page_lock(dst_m); + } else { + vm_page_lock(dst_m); + vm_page_lock_queues(); vm_page_activate(dst_m); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_lock(dst_m); + } vm_page_wakeup(dst_m); } VM_OBJECT_UNLOCK(dst_object); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 4eeaa4d..288c5d7 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -257,16 +257,18 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) if (m == NULL) goto out; if (rv != VM_PAGER_OK) { + vm_page_lock(m); vm_page_lock_queues(); vm_page_free(m); vm_page_unlock_queues(); + vm_page_unlock(m); m = NULL; goto out; } } - vm_page_lock_queues(); + vm_page_lock(m); vm_page_hold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); vm_page_wakeup(m); out: VM_OBJECT_UNLOCK(object); @@ -300,9 +302,9 @@ vm_imgact_unmap_page(struct sf_buf *sf) m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unhold(m); - vm_page_unlock_queues(); + vm_page_unlock(m); } void @@ -434,10 +436,12 @@ vm_thread_stack_dispose(vm_object_t ksobj, vm_offset_t ks, int pages) m = vm_page_lookup(ksobj, i); if (m == NULL) panic("vm_thread_dispose: kstack already missing?"); + vm_page_lock(m); vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); vm_object_deallocate(ksobj); @@ -524,9 +528,11 @@ vm_thread_swapout(struct thread *td) if (m == NULL) panic("vm_thread_swapout: kstack already missing?"); vm_page_dirty(m); + vm_page_lock(m); vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(ksobj); } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 739d289..ead6d0d 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -380,10 +380,12 @@ retry: i -= PAGE_SIZE; m = vm_page_lookup(kmem_object, OFF_TO_IDX(offset + i)); + vm_page_lock(m); vm_page_lock_queues(); vm_page_unwire(m, 0); vm_page_free(m); vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(kmem_object); vm_map_delete(map, addr, addr + size); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 88ed3d5..f9b3db3 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -866,6 +866,7 @@ RestartScan: */ if (m != NULL && m->valid != 0) { mincoreinfo = MINCORE_INCORE; + vm_page_lock(m); vm_page_lock_queues(); if (m->dirty || pmap_is_modified(m)) @@ -874,6 +875,7 @@ RestartScan: pmap_is_referenced(m)) mincoreinfo |= MINCORE_REFERENCED_OTHER; vm_page_unlock_queues(); + vm_page_unlock(m); } VM_OBJECT_UNLOCK(current->object.vm_object); } diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index c25ab77..47ef973 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -717,19 +717,21 @@ vm_object_terminate(vm_object_t object) * removes them from paging queues. Don't free wired pages, just * remove them from the object. */ - vm_page_lock_queues(); while ((p = TAILQ_FIRST(&object->memq)) != NULL) { KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, ("vm_object_terminate: freeing busy page %p " "p->busy = %d, p->oflags %x\n", p, p->busy, p->oflags)); + vm_page_lock(p); + vm_page_lock_queues(); if (p->wire_count == 0) { vm_page_free(p); cnt.v_pfree++; } else { vm_page_remove(p); } + vm_page_unlock_queues(); + vm_page_unlock(p); } - vm_page_unlock_queues(); #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) @@ -789,7 +791,6 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int tend = end; } - vm_page_lock_queues(); /* * If the caller is smart and only msync()s a range he knows is * dirty, we may be able to avoid an object scan. This results in @@ -818,8 +819,12 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int ++tscan; continue; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0) { + vm_page_unlock_queues(); + vm_page_unlock(p); if (--scanlimit == 0) break; ++tscan; @@ -830,6 +835,8 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int * this is a nosync page, we can't continue. */ if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) { + vm_page_unlock_queues(); + vm_page_unlock(p); if (--scanlimit == 0) break; ++tscan; @@ -842,6 +849,8 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int * page (i.e. had to sleep). */ tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags); + vm_page_unlock_queues(); + vm_page_unlock(p); } /* @@ -871,8 +880,13 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int p->oflags |= VPO_CLEANCHK; if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) clearobjflags = 0; - else + else { + vm_page_lock(p); + vm_page_lock_queues(); pmap_remove_write(p); + vm_page_unlock_queues(); + vm_page_unlock(p); + } } if (clearobjflags && (tstart == 0) && (tend == object->size)) @@ -895,8 +909,12 @@ again: continue; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0) { + vm_page_unlock_queues(); + vm_page_unlock(p); p->oflags &= ~VPO_CLEANCHK; continue; } @@ -907,28 +925,37 @@ again: * not cleared in this case so we do not have to set them. */ if ((flags & OBJPC_NOSYNC) && (p->oflags & VPO_NOSYNC)) { + vm_page_unlock_queues(); + vm_page_unlock(p); p->oflags &= ~VPO_CLEANCHK; continue; } n = vm_object_page_collect_flush(object, p, curgeneration, pagerflags); - if (n == 0) + if (n == 0) { + vm_page_unlock_queues(); + vm_page_unlock(p); goto rescan; + } - if (object->generation != curgeneration) + if (object->generation != curgeneration) { + vm_page_unlock_queues(); + vm_page_unlock(p); goto rescan; + } /* * Try to optimize the next page. If we can't we pick up * our (random) scan where we left off. */ if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { + vm_page_unlock_queues(); + vm_page_unlock(p); if ((p = vm_page_lookup(object, pi + n)) != NULL) goto again; } } - vm_page_unlock_queues(); #if 0 VOP_FSYNC(vp, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); #endif @@ -951,8 +978,10 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, vm_page_t ma[vm_pageout_page_count]; mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(p, MA_OWNED); pi = p->pindex; while (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) { + vm_page_lock(p); vm_page_lock_queues(); if (object->generation != curgeneration) { return(0); @@ -968,11 +997,16 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_unlock_queues(); + vm_page_lock(tp); + vm_page_lock_queues(); vm_page_test_dirty(tp); if (tp->dirty == 0) { + vm_page_unlock(tp); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_unlock(tp); maf[ i - 1 ] = tp; maxf++; continue; @@ -992,11 +1026,16 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, (tp->oflags & VPO_CLEANCHK) == 0) || (tp->busy != 0)) break; + vm_page_unlock_queues(); + vm_page_lock(tp); + vm_page_lock_queues(); vm_page_test_dirty(tp); if (tp->dirty == 0) { + vm_page_unlock(tp); tp->oflags &= ~VPO_CLEANCHK; break; } + vm_page_unlock(tp); mab[ i - 1 ] = tp; maxb++; continue; @@ -1022,7 +1061,11 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, vm_pageout_flush(ma, runlen, pagerflags); for (i = 0; i < runlen; i++) { if (ma[i]->dirty) { + vm_page_unlock_queues(); + vm_page_lock(ma[i]); + vm_page_lock_queues(); pmap_remove_write(ma[i]); + vm_page_unlock(ma[i]); ma[i]->oflags |= VPO_CLEANCHK; /* @@ -1195,9 +1238,11 @@ shadowlookup: /* * If the page is not in a normal state, skip it. */ + vm_page_lock(m); vm_page_lock_queues(); if (m->hold_count != 0 || m->wire_count != 0) { vm_page_unlock_queues(); + vm_page_unlock(m); goto unlock_tobject; } if ((m->oflags & VPO_BUSY) || m->busy) { @@ -1209,6 +1254,7 @@ shadowlookup: */ vm_page_flag_set(m, PG_REFERENCED); vm_page_unlock_queues(); + vm_page_unlock(m); if (object != tobject) VM_OBJECT_UNLOCK(object); m->oflags |= VPO_WANTED; @@ -1243,6 +1289,7 @@ shadowlookup: vm_page_dontneed(m); } vm_page_unlock_queues(); + vm_page_unlock(m); if (advise == MADV_FREE && tobject->type == OBJT_SWAP) swap_pager_freespace(tobject, tpindex, 1); unlock_tobject: @@ -1405,7 +1452,6 @@ retry: m = TAILQ_NEXT(m, listq); } } - vm_page_lock_queues(); for (; m != NULL && (idx = m->pindex - offidxstart) < size; m = m_next) { m_next = TAILQ_NEXT(m, listq); @@ -1425,11 +1471,14 @@ retry: VM_OBJECT_LOCK(new_object); goto retry; } + vm_page_lock(m); + vm_page_lock_queues(); vm_page_rename(m, new_object, idx); + vm_page_unlock_queues(); + vm_page_unlock(m); /* page automatically made dirty by rename and cache handled */ vm_page_busy(m); } - vm_page_unlock_queues(); if (orig_object->type == OBJT_SWAP) { /* * swap_pager_copy() can sleep, in which case the orig_object's @@ -1597,6 +1646,7 @@ vm_object_backing_scan(vm_object_t object, int op) * Page is out of the parent object's range, we * can simply destroy it. */ + vm_page_lock(p); vm_page_lock_queues(); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); @@ -1605,6 +1655,7 @@ vm_object_backing_scan(vm_object_t object, int op) else vm_page_remove(p); vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1621,6 +1672,7 @@ vm_object_backing_scan(vm_object_t object, int op) * * Leave the parent's page alone */ + vm_page_lock(p); vm_page_lock_queues(); KASSERT(!pmap_page_is_mapped(p), ("freeing mapped page %p", p)); @@ -1629,6 +1681,7 @@ vm_object_backing_scan(vm_object_t object, int op) else vm_page_remove(p); vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -1648,9 +1701,11 @@ vm_object_backing_scan(vm_object_t object, int op) * If the page was mapped to a process, it can remain * mapped through the rename. */ + vm_page_lock(p); vm_page_lock_queues(); vm_page_rename(p, object, new_pindex); vm_page_unlock_queues(); + vm_page_unlock(p); /* page automatically made dirty by rename */ } p = next; @@ -1915,7 +1970,7 @@ again: p = TAILQ_NEXT(p, listq); } } - vm_page_lock_queues(); + /* * Assert: the variable p is either (1) the page with the * least pindex greater than or equal to the parameter pindex @@ -1934,6 +1989,8 @@ again: * cannot be freed. They can, however, be invalidated * if "clean_only" is FALSE. */ + vm_page_lock(p); + vm_page_lock_queues(); if ((wirings = p->wire_count) != 0 && (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { /* Fictitious pages do not have managed mappings. */ @@ -1945,6 +2002,8 @@ again: p->valid = 0; vm_page_undirty(p); } + vm_page_unlock_queues(); + vm_page_unlock(p); continue; } if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) @@ -1953,16 +2012,20 @@ again: ("vm_object_page_remove: page %p is fictitious", p)); if (clean_only && p->valid) { pmap_remove_write(p); - if (p->dirty) + if (p->dirty) { + vm_page_unlock_queues(); + vm_page_unlock(p); continue; + } } pmap_remove_all(p); /* Account for removal of managed, wired mappings. */ if (wirings != 0) p->wire_count -= wirings; vm_page_free(p); + vm_page_unlock_queues(); + vm_page_unlock(p); } - vm_page_unlock_queues(); vm_object_pip_wakeup(object); skipmemq: if (__predict_false(object->cache != NULL)) @@ -1997,9 +2060,11 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) if (m == NULL) break; if (rv != VM_PAGER_OK) { + vm_page_lock(m); vm_page_lock_queues(); vm_page_free(m); vm_page_unlock_queues(); + vm_page_unlock(m); break; } } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 7c149c9..4c0d385 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include <sys/vnode.h> #include <vm/vm.h> +#include <vm/pmap.h> #include <vm/vm_param.h> #include <vm/vm_kern.h> #include <vm/vm_object.h> @@ -129,6 +130,24 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> +#if defined(__amd64__) || defined (__i386__) +extern struct sysctl_oid_list sysctl__vm_pmap_children; +#else +SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); +#endif + +static uint64_t pmap_tryrelock_calls; +SYSCTL_QUAD(_vm_pmap, OID_AUTO, tryrelock_calls, CTLFLAG_RD, + &pmap_tryrelock_calls, 0, "Number of tryrelock calls"); + +static int pmap_tryrelock_restart; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_restart, CTLFLAG_RD, + &pmap_tryrelock_restart, 0, "Number of tryrelock restarts"); + +static int pmap_tryrelock_race; +SYSCTL_INT(_vm_pmap, OID_AUTO, tryrelock_race, CTLFLAG_RD, + &pmap_tryrelock_race, 0, "Number of tryrelock pmap race cases"); + /* * Associated with page of user-allocatable memory is a * page structure. @@ -138,6 +157,8 @@ struct vpgqueues vm_page_queues[PQ_COUNT]; struct vpglocks vm_page_queue_lock; struct vpglocks vm_page_queue_free_lock; +struct vpglocks pa_lock[PA_LOCK_COUNT] __aligned(CACHE_LINE_SIZE); + vm_page_t vm_page_array = 0; int vm_page_array_size = 0; long first_page = 0; @@ -158,6 +179,43 @@ CTASSERT(sizeof(u_long) >= 8); #endif /* + * Try to acquire a physical address lock while a pmap is locked. If we + * fail to trylock we unlock and lock the pmap directly and cache the + * locked pa in *locked. The caller should then restart their loop in case + * the virtual to physical mapping has changed. + */ +int +vm_page_pa_tryrelock(pmap_t pmap, vm_paddr_t pa, vm_paddr_t *locked) +{ + vm_paddr_t lockpa; + uint32_t gen_count; + + gen_count = pmap->pm_gen_count; + atomic_add_long((volatile long *)&pmap_tryrelock_calls, 1); + lockpa = *locked; + *locked = pa; + if (lockpa) { + PA_LOCK_ASSERT(lockpa, MA_OWNED); + if (PA_LOCKPTR(pa) == PA_LOCKPTR(lockpa)) + return (0); + PA_UNLOCK(lockpa); + } + if (PA_TRYLOCK(pa)) + return (0); + PMAP_UNLOCK(pmap); + atomic_add_int((volatile int *)&pmap_tryrelock_restart, 1); + PA_LOCK(pa); + PMAP_LOCK(pmap); + + if (pmap->pm_gen_count != gen_count + 1) { + pmap->pm_retries++; + atomic_add_int((volatile int *)&pmap_tryrelock_race, 1); + return (EAGAIN); + } + return (0); +} + +/* * vm_set_page_size: * * Sets the page size, perhaps based upon the memory @@ -271,6 +329,11 @@ vm_page_startup(vm_offset_t vaddr) mtx_init(&vm_page_queue_free_mtx, "vm page queue free mutex", NULL, MTX_DEF); + /* Setup page locks. */ + for (i = 0; i < PA_LOCK_COUNT; i++) + mtx_init(&pa_lock[i].data, "page lock", NULL, + MTX_DEF | MTX_RECURSE | MTX_DUPOK); + /* * Initialize the queue headers for the hold queue, the active queue, * and the inactive queue. @@ -489,7 +552,7 @@ void vm_page_hold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); mem->hold_count++; } @@ -497,7 +560,7 @@ void vm_page_unhold(vm_page_t mem) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(mem, MA_OWNED); --mem->hold_count; KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!")); if (mem->hold_count == 0 && VM_PAGE_INQUEUE2(mem, PQ_HOLD)) @@ -542,10 +605,13 @@ vm_page_sleep(vm_page_t m, const char *msg) { VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + if (!mtx_owned(vm_page_lockptr(m))) + vm_page_lock(m); if (!mtx_owned(&vm_page_queue_mtx)) vm_page_lock_queues(); vm_page_flag_set(m, PG_REFERENCED); vm_page_unlock_queues(); + vm_page_unlock(m); /* * It's possible that while we sleep, the page will get @@ -1425,6 +1491,7 @@ vm_page_free_toq(vm_page_t m) panic("vm_page_free: freeing wired page"); } if (m->hold_count != 0) { + vm_page_lock_assert(m, MA_OWNED); m->flags &= ~PG_ZERO; vm_page_enqueue(PQ_HOLD, m); } else { diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 662af98..35a81f8 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -177,9 +177,35 @@ struct vpglocks { } __aligned(CACHE_LINE_SIZE); extern struct vpglocks vm_page_queue_free_lock; +extern struct vpglocks pa_lock[]; -#define vm_page_queue_free_mtx vm_page_queue_free_lock.data +#if defined(__arm__) +#define PDRSHIFT PDR_SHIFT +#elif !defined(PDRSHIFT) +#define PDRSHIFT 21 +#endif +#define pa_index(pa) ((pa) >> PDRSHIFT) +#define PA_LOCKPTR(pa) &pa_lock[pa_index((pa)) % PA_LOCK_COUNT].data +#define PA_LOCKOBJPTR(pa) ((struct lock_object *)PA_LOCKPTR((pa))) +#define PA_LOCK(pa) mtx_lock(PA_LOCKPTR(pa)) +#define PA_TRYLOCK(pa) mtx_trylock(PA_LOCKPTR(pa)) +#define PA_UNLOCK(pa) mtx_unlock(PA_LOCKPTR(pa)) +#define PA_UNLOCK_COND(pa) \ + do { \ + if (pa) \ + PA_UNLOCK(pa); \ + } while (0) + +#define PA_LOCK_ASSERT(pa, a) mtx_assert(PA_LOCKPTR(pa), (a)) + +#define vm_page_lockptr(m) (PA_LOCKPTR(VM_PAGE_TO_PHYS((m)))) +#define vm_page_lock(m) mtx_lock(vm_page_lockptr((m))) +#define vm_page_unlock(m) mtx_unlock(vm_page_lockptr((m))) +#define vm_page_trylock(m) mtx_trylock(vm_page_lockptr((m))) +#define vm_page_lock_assert(m, a) mtx_assert(vm_page_lockptr((m)), (a)) + +#define vm_page_queue_free_mtx vm_page_queue_free_lock.data /* * These are the flags defined for vm_page. * @@ -324,6 +350,7 @@ void vm_page_dontneed(vm_page_t); void vm_page_deactivate (vm_page_t); void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t); vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t); +int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *); void vm_page_remove (vm_page_t); void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t); void vm_page_requeue(vm_page_t m); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 258da8b..95517de 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -252,7 +252,9 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl, m, &marker, pageq); vm_page_unlock_queues(); + vm_page_unlock(m); VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* Page queue might have changed. */ @@ -275,8 +277,7 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) * late and we cannot do anything that will mess with the page. */ static int -vm_pageout_clean(m) - vm_page_t m; +vm_pageout_clean(vm_page_t m) { vm_object_t object; vm_page_t mc[2*vm_pageout_page_count]; @@ -284,7 +285,8 @@ vm_pageout_clean(m) int ib, is, page_base; vm_pindex_t pindex = m->pindex; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); + vm_page_lock_assert(m, MA_NOTOWNED); + vm_page_lock(m); VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); /* @@ -301,6 +303,7 @@ vm_pageout_clean(m) */ if ((m->hold_count != 0) || ((m->busy != 0) || (m->oflags & VPO_BUSY))) { + vm_page_unlock(m); return 0; } @@ -347,13 +350,19 @@ more: ib = 0; break; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock(p); + vm_page_unlock_queues(); ib = 0; break; } + vm_page_unlock_queues(); + vm_page_unlock(p); mc[--page_base] = p; ++pageout_count; ++ib; @@ -374,12 +383,18 @@ more: if ((p->oflags & VPO_BUSY) || p->busy) { break; } + vm_page_lock(p); + vm_page_lock_queues(); vm_page_test_dirty(p); if (p->dirty == 0 || p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ + vm_page_unlock_queues(); + vm_page_unlock(p); break; } + vm_page_unlock_queues(); + vm_page_unlock(p); mc[page_base + pageout_count] = p; ++pageout_count; ++is; @@ -393,6 +408,7 @@ more: if (ib && pageout_count < vm_pageout_page_count) goto more; + vm_page_unlock(m); /* * we allow reads during pageouts... */ @@ -416,7 +432,6 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) int numpagedout = 0; int i; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); /* * Initiate I/O. Bump the vm_page_t->busy counter and @@ -433,17 +448,21 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) ("vm_pageout_flush: partially invalid page %p index %d/%d", mc[i], i, count)); vm_page_io_start(mc[i]); + vm_page_lock(mc[i]); + vm_page_lock_queues(); pmap_remove_write(mc[i]); + vm_page_unlock(mc[i]); + vm_page_unlock_queues(); } - vm_page_unlock_queues(); vm_object_pip_add(object, count); vm_pager_put_pages(object, mc, count, flags, pageout_status); - vm_page_lock_queues(); for (i = 0; i < count; i++) { vm_page_t mt = mc[i]; + vm_page_lock(mt); + vm_page_lock_queues(); KASSERT(pageout_status[i] == VM_PAGER_PEND || (mt->flags & PG_WRITEABLE) == 0, ("vm_pageout_flush: page %p is not write protected", mt)); @@ -485,6 +504,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags) if (vm_page_count_severe()) vm_page_try_to_cache(mt); } + vm_page_unlock_queues(); + vm_page_unlock(mt); } return numpagedout; } @@ -527,10 +548,12 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) * scan the objects entire memory queue */ p = TAILQ_FIRST(&object->memq); - vm_page_lock_queues(); while (p != NULL) { + vm_page_lock(p); + vm_page_lock_queues(); if (pmap_resident_count(pmap) <= desired) { vm_page_unlock_queues(); + vm_page_unlock(p); goto unlock_return; } next = TAILQ_NEXT(p, listq); @@ -540,6 +563,8 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) p->busy != 0 || (p->oflags & VPO_BUSY) || !pmap_page_exists_quick(pmap, p)) { + vm_page_unlock_queues(); + vm_page_unlock(p); p = next; continue; } @@ -573,9 +598,10 @@ vm_pageout_object_deactivate_pages(pmap, first_object, desired) } else if (p->queue == PQ_INACTIVE) { pmap_remove_all(p); } + vm_page_unlock_queues(); + vm_page_unlock(p); p = next; } - vm_page_unlock_queues(); if ((backing_object = object->backing_object) == NULL) goto unlock_return; VM_OBJECT_LOCK(backing_object); @@ -742,7 +768,6 @@ rescan0: } next = TAILQ_NEXT(m, pageq); - object = m->object; /* * skip marker pages @@ -758,18 +783,26 @@ rescan0: addl_page_shortage++; continue; } + + if (!vm_page_trylock(m) || (object = m->object) == NULL) { + addl_page_shortage++; + continue; + } + /* * Don't mess with busy pages, keep in the front of the * queue, most likely are being paged out. */ if (!VM_OBJECT_TRYLOCK(object) && (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) { + m->hold_count != 0)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); addl_page_shortage++; continue; } if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); addl_page_shortage++; continue; @@ -798,6 +831,7 @@ rescan0: vm_page_activate(m); VM_OBJECT_UNLOCK(object); m->act_count += (actcount + ACT_ADVANCE); + vm_page_unlock(m); continue; } @@ -813,6 +847,7 @@ rescan0: vm_page_activate(m); VM_OBJECT_UNLOCK(object); m->act_count += (actcount + ACT_ADVANCE + 1); + vm_page_unlock(m); continue; } @@ -898,6 +933,7 @@ rescan0: * Those objects are in a "rundown" state. */ if (!swap_pageouts_ok || (object->flags & OBJ_DEAD)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); continue; @@ -937,6 +973,8 @@ rescan0: * of time. */ if (object->type == OBJT_VNODE) { + vm_page_unlock_queues(); + vm_page_unlock(m); vp = object->handle; if (vp->v_type == VREG && vn_start_write(vp, &mp, V_NOWAIT) != 0) { @@ -944,11 +982,11 @@ rescan0: ++pageout_lock_miss; if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; + vm_page_lock_queues(); goto unlock_and_continue; } KASSERT(mp != NULL, ("vp %p with NULL v_mount", vp)); - vm_page_unlock_queues(); vm_object_reference_locked(object); VM_OBJECT_UNLOCK(object); vfslocked = VFS_LOCK_GIANT(vp->v_mount); @@ -963,6 +1001,7 @@ rescan0: goto unlock_and_continue; } VM_OBJECT_LOCK(object); + vm_page_lock(m); vm_page_lock_queues(); /* * The page might have been moved to another @@ -973,6 +1012,7 @@ rescan0: if (VM_PAGE_GETQUEUE(m) != PQ_INACTIVE || m->object != object || TAILQ_NEXT(m, pageq) != &marker) { + vm_page_unlock(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; @@ -985,6 +1025,7 @@ rescan0: * statistics are more correct if we don't. */ if (m->busy || (m->oflags & VPO_BUSY)) { + vm_page_unlock(m); goto unlock_and_continue; } @@ -993,12 +1034,14 @@ rescan0: * be undergoing I/O, so skip it */ if (m->hold_count) { + vm_page_unlock(m); vm_page_requeue(m); if (object->flags & OBJ_MIGHTBEDIRTY) vnodes_skipped++; goto unlock_and_continue; } } + vm_page_unlock(m); /* * If a page is dirty, then it is either being washed @@ -1010,11 +1053,14 @@ rescan0: * the (future) cleaned page. Otherwise we could wind * up laundering or cleaning too many pages. */ + vm_page_unlock_queues(); if (vm_pageout_clean(m) != 0) { --page_shortage; --maxlaunder; } + vm_page_lock_queues(); unlock_and_continue: + vm_page_lock_assert(m, MA_NOTOWNED); VM_OBJECT_UNLOCK(object); if (mp != NULL) { vm_page_unlock_queues(); @@ -1028,8 +1074,10 @@ unlock_and_continue: next = TAILQ_NEXT(&marker, pageq); TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq); + vm_page_lock_assert(m, MA_NOTOWNED); continue; } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); } @@ -1048,6 +1096,7 @@ unlock_and_continue: */ pcount = cnt.v_active_count; m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { @@ -1060,9 +1109,14 @@ unlock_and_continue: m = next; continue; } + if (!vm_page_trylock(m) || (object = m->object) == NULL) { + m = next; + continue; + } if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1073,6 +1127,7 @@ unlock_and_continue: if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1132,6 +1187,7 @@ unlock_and_continue: vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } @@ -1317,9 +1373,15 @@ vm_pageout_page_stats() m = next; continue; } + vm_page_lock_assert(m, MA_NOTOWNED); + if (vm_page_trylock(m) == 0 || (object = m->object) == NULL) { + m = next; + continue; + } if (!VM_OBJECT_TRYLOCK(object) && !vm_pageout_fallback_object_lock(m, &next)) { VM_OBJECT_UNLOCK(object); + vm_page_unlock(m); m = next; continue; } @@ -1330,6 +1392,7 @@ vm_pageout_page_stats() if ((m->busy != 0) || (m->oflags & VPO_BUSY) || (m->hold_count != 0)) { + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); vm_page_requeue(m); m = next; @@ -1366,6 +1429,7 @@ vm_pageout_page_stats() vm_page_requeue(m); } } + vm_page_unlock(m); VM_OBJECT_UNLOCK(object); m = next; } diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h index 2ff2603..c404989 100644 --- a/sys/vm/vm_param.h +++ b/sys/vm/vm_param.h @@ -126,6 +126,14 @@ struct xswdev { #define KERN_NOT_RECEIVER 7 #define KERN_NO_ACCESS 8 +#ifndef PA_LOCK_COUNT +#ifdef SMP +#define PA_LOCK_COUNT 32 +#else +#define PA_LOCK_COUNT 1 +#endif /* !SMP */ +#endif /* !PA_LOCK_COUNT */ + #ifndef ASSEMBLER #ifdef _KERNEL #define num_pages(x) \ diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index aedc794..eb21c60 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -429,9 +429,11 @@ vnode_pager_setsize(vp, nsize) * bits. This would prevent bogus_page * replacement from working properly. */ + vm_page_lock(m); vm_page_lock_queues(); vm_page_clear_dirty(m, base, PAGE_SIZE - base); vm_page_unlock_queues(); + vm_page_unlock(m); } else if ((nsize & PAGE_MASK) && __predict_false(object->cache != NULL)) { vm_page_cache_free(object, OFF_TO_IDX(nsize), @@ -719,11 +721,15 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) error = VOP_BMAP(vp, foff / bsize, &bo, &reqblock, NULL, NULL); if (error == EOPNOTSUPP) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); + for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); error = vnode_pager_input_old(object, m[reqpage]); @@ -731,11 +737,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) return (error); } else if (error != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); @@ -747,11 +756,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) } else if ((PAGE_SIZE / bsize) > 1 && (vp->v_mount->mnt_stat.f_type != nfs_mount_type)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); PCPU_INC(cnt.v_vnodein); PCPU_INC(cnt.v_vnodepgsin); @@ -765,11 +777,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) */ VM_OBJECT_LOCK(object); if (m[reqpage]->valid == VM_PAGE_BITS_ALL) { - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return VM_PAGER_OK; } else if (reqblock == -1) { @@ -777,11 +792,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) KASSERT(m[reqpage]->dirty == 0, ("vnode_pager_generic_getpages: page %p is dirty", m)); m[reqpage]->valid = VM_PAGE_BITS_ALL; - vm_page_lock_queues(); for (i = 0; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_OK); } @@ -800,11 +818,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) if (vnode_pager_addr(vp, IDX_TO_OFF(m[i]->pindex), &firstaddr, &runpg) != 0) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (; i < count; i++) - if (i != reqpage) + if (i != reqpage) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); return (VM_PAGER_ERROR); } @@ -818,9 +839,11 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) (object->un_pager.vnp.vnp_size >> 32), (uintmax_t)object->un_pager.vnp.vnp_size); } + vm_page_lock(m[i]); vm_page_lock_queues(); vm_page_free(m[i]); vm_page_unlock_queues(); + vm_page_unlock(m[i]); VM_OBJECT_UNLOCK(object); runend = i + 1; first = runend; @@ -829,18 +852,24 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) runend = i + runpg; if (runend <= reqpage) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (j = i; j < runend; j++) + for (j = i; j < runend; j++) { + vm_page_lock(m[j]); + vm_page_lock_queues(); vm_page_free(m[j]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[j]); + } VM_OBJECT_UNLOCK(object); } else { if (runpg < (count - first)) { VM_OBJECT_LOCK(object); - vm_page_lock_queues(); - for (i = first + runpg; i < count; i++) + for (i = first + runpg; i < count; i++) { + vm_page_lock(m[i]); + vm_page_lock_queues(); vm_page_free(m[i]); - vm_page_unlock_queues(); + vm_page_unlock_queues(); + vm_page_unlock(m[i]); + } VM_OBJECT_UNLOCK(object); count = first + runpg; } @@ -931,13 +960,14 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) relpbuf(bp, &vnode_pbuf_freecnt); VM_OBJECT_LOCK(object); - vm_page_lock_queues(); for (i = 0, tfoff = foff; i < count; i++, tfoff = nextoff) { vm_page_t mt; nextoff = tfoff + PAGE_SIZE; mt = m[i]; + vm_page_lock(mt); + vm_page_lock_queues(); if (nextoff <= object->un_pager.vnp.vnp_size) { /* * Read filled up entire page. @@ -989,8 +1019,9 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage) vm_page_free(mt); } } + vm_page_unlock_queues(); + vm_page_unlock(mt); } - vm_page_unlock_queues(); VM_OBJECT_UNLOCK(object); if (error) { printf("vnode_pager_getpages: I/O read error\n"); @@ -1113,10 +1144,12 @@ vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) maxsize = object->un_pager.vnp.vnp_size - poffset; ncount = btoc(maxsize); if ((pgoff = (int)maxsize & PAGE_MASK) != 0) { + vm_page_lock(m[ncount - 1]); vm_page_lock_queues(); vm_page_clear_dirty(m[ncount - 1], pgoff, PAGE_SIZE - pgoff); vm_page_unlock_queues(); + vm_page_unlock(m[ncount - 1]); } } else { maxsize = 0; |