diff options
Diffstat (limited to 'mm/mlock.c')
-rw-r--r-- | mm/mlock.c | 62 |
1 files changed, 43 insertions, 19 deletions
@@ -133,7 +133,10 @@ static void __munlock_isolation_failed(struct page *page) /** * munlock_vma_page - munlock a vma page - * @page - page to be unlocked + * @page - page to be unlocked, either a normal page or THP page head + * + * returns the size of the page as a page mask (0 for normal page, + * HPAGE_PMD_NR - 1 for THP head page) * * called from munlock()/munmap() path with page supposedly on the LRU. * When we munlock a page, because the vma where we found the page is being @@ -148,21 +151,30 @@ static void __munlock_isolation_failed(struct page *page) */ unsigned int munlock_vma_page(struct page *page) { - unsigned int page_mask = 0; + unsigned int nr_pages; BUG_ON(!PageLocked(page)); if (TestClearPageMlocked(page)) { - unsigned int nr_pages = hpage_nr_pages(page); + nr_pages = hpage_nr_pages(page); mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); - page_mask = nr_pages - 1; if (!isolate_lru_page(page)) __munlock_isolated_page(page); else __munlock_isolation_failed(page); + } else { + nr_pages = hpage_nr_pages(page); } - return page_mask; + /* + * Regardless of the original PageMlocked flag, we determine nr_pages + * after touching the flag. This leaves a possible race with a THP page + * split, such that a whole THP page was munlocked, but nr_pages == 1. + * Returning a smaller mask due to that is OK, the worst that can + * happen is subsequent useless scanning of the former tail pages. + * The NR_MLOCK accounting can however become broken. + */ + return nr_pages - 1; } /** @@ -286,10 +298,12 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone) { int i; int nr = pagevec_count(pvec); - int delta_munlocked = -nr; + int delta_munlocked; struct pagevec pvec_putback; int pgrescued = 0; + pagevec_init(&pvec_putback, 0); + /* Phase 1: page isolation */ spin_lock_irq(&zone->lru_lock); for (i = 0; i < nr; i++) { @@ -318,18 +332,21 @@ skip_munlock: /* * We won't be munlocking this page in the next phase * but we still need to release the follow_page_mask() - * pin. + * pin. We cannot do it under lru_lock however. If it's + * the last pin, __page_cache_release would deadlock. */ + pagevec_add(&pvec_putback, pvec->pages[i]); pvec->pages[i] = NULL; - put_page(page); - delta_munlocked++; } } + delta_munlocked = -nr + pagevec_count(&pvec_putback); __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked); spin_unlock_irq(&zone->lru_lock); + /* Now we can release pins of pages that we are not munlocking */ + pagevec_release(&pvec_putback); + /* Phase 2: page munlock */ - pagevec_init(&pvec_putback, 0); for (i = 0; i < nr; i++) { struct page *page = pvec->pages[i]; @@ -440,7 +457,8 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, while (start < end) { struct page *page = NULL; - unsigned int page_mask, page_increm; + unsigned int page_mask; + unsigned long page_increm; struct pagevec pvec; struct zone *zone; int zoneid; @@ -490,7 +508,9 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, goto next; } } - page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + /* It's a bug to munlock in the middle of a THP page */ + VM_BUG_ON((start >> PAGE_SHIFT) & page_mask); + page_increm = 1 + page_mask; start += page_increm * PAGE_SIZE; next: cond_resched(); @@ -689,19 +709,21 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) lru_add_drain_all(); /* flush pagevec */ - down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - locked = len >> PAGE_SHIFT; - locked += current->mm->locked_vm; - lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; + locked = len >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked += current->mm->locked_vm; /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) error = do_mlock(start, len, 1); + up_write(¤t->mm->mmap_sem); if (!error) error = __mm_populate(start, len, 0); @@ -712,11 +734,13 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; - down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; + + down_write(¤t->mm->mmap_sem); ret = do_mlock(start, len, 0); up_write(¤t->mm->mmap_sem); + return ret; } @@ -761,12 +785,12 @@ SYSCALL_DEFINE1(mlockall, int, flags) if (flags & MCL_CURRENT) lru_add_drain_all(); /* flush pagevec */ - down_write(¤t->mm->mmap_sem); - lock_limit = rlimit(RLIMIT_MEMLOCK); lock_limit >>= PAGE_SHIFT; ret = -ENOMEM; + down_write(¤t->mm->mmap_sem); + if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK)) ret = do_mlockall(flags); |