diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 113 |
1 files changed, 76 insertions, 37 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index f071648..eade24d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free) * just let do_wp_page work it out if a write is requested later - to * force COW, vm_page_prot omits write permission from any private vma. */ -static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, +static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, swp_entry_t entry, struct page *page) { + spinlock_t *ptl; + pte_t *pte; + int found = 1; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { + found = 0; + goto out; + } + inc_mm_counter(vma->vm_mm, anon_rss); get_page(page); set_pte_at(vma->vm_mm, addr, pte, @@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, * immediately swapped out again after swapon. */ activate_page(page); +out: + pte_unmap_unlock(pte, ptl); + return found; } static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, @@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, { pte_t swp_pte = swp_entry_to_pte(entry); pte_t *pte; - spinlock_t *ptl; int found = 0; - pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + /* + * We don't actually need pte lock while scanning for swp_pte: since + * we hold page lock and mmap_sem, swp_pte cannot be inserted into the + * page table while we're scanning; though it could get zapped, and on + * some architectures (e.g. x86_32 with PAE) we might catch a glimpse + * of unmatched parts which look like swp_pte, so unuse_pte must + * recheck under pte lock. Scanning without pte lock lets it be + * preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE. + */ + pte = pte_offset_map(pmd, addr); do { /* * swapoff spends a _lot_ of time in this loop! * Test inline before going to call unuse_pte. */ if (unlikely(pte_same(*pte, swp_pte))) { - unuse_pte(vma, pte++, addr, entry, page); - found = 1; - break; + pte_unmap(pte); + found = unuse_pte(vma, pmd, addr, entry, page); + if (found) + goto out; + pte = pte_offset_map(pmd, addr); } } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap_unlock(pte - 1, ptl); + pte_unmap(pte - 1); +out: return found; } @@ -730,7 +754,8 @@ static int try_to_unuse(unsigned int type) */ swap_map = &si->swap_map[i]; entry = swp_entry(type, i); - page = read_swap_cache_async(entry, NULL, 0); + page = read_swap_cache_async(entry, + GFP_HIGHUSER_MOVABLE, NULL, 0); if (!page) { /* * Either swap_duplicate() failed because entry @@ -789,7 +814,7 @@ static int try_to_unuse(unsigned int type) atomic_inc(&new_start_mm->mm_users); atomic_inc(&prev_mm->mm_users); spin_lock(&mmlist_lock); - while (*swap_map > 1 && !retval && + while (*swap_map > 1 && !retval && !shmem && (p = p->next) != &start_mm->mmlist) { mm = list_entry(p, struct mm_struct, mmlist); if (!atomic_inc_not_zero(&mm->mm_users)) @@ -821,6 +846,13 @@ static int try_to_unuse(unsigned int type) mmput(start_mm); start_mm = new_start_mm; } + if (shmem) { + /* page has already been unlocked and released */ + if (shmem > 0) + continue; + retval = shmem; + break; + } if (retval) { unlock_page(page); page_cache_release(page); @@ -859,12 +891,6 @@ static int try_to_unuse(unsigned int type) * read from disk into another page. Splitting into two * pages would be incorrect if swap supported "shared * private" pages, but they are handled by tmpfs files. - * - * Note shmem_unuse already deleted a swappage from - * the swap cache, unless the move to filepage failed: - * in which case it left swappage in cache, lowered its - * swap count to pass quickly through the loops above, - * and now we must reincrement count to try again later. */ if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) { struct writeback_control wbc = { @@ -875,12 +901,8 @@ static int try_to_unuse(unsigned int type) lock_page(page); wait_on_page_writeback(page); } - if (PageSwapCache(page)) { - if (shmem) - swap_duplicate(entry); - else - delete_from_swap_cache(page); - } + if (PageSwapCache(page)) + delete_from_swap_cache(page); /* * So we could skip searching mms once swap count went @@ -1768,31 +1790,48 @@ get_swap_info_struct(unsigned type) */ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) { + struct swap_info_struct *si; int our_page_cluster = page_cluster; - int ret = 0, i = 1 << our_page_cluster; - unsigned long toff; - struct swap_info_struct *swapdev = swp_type(entry) + swap_info; + pgoff_t target, toff; + pgoff_t base, end; + int nr_pages = 0; if (!our_page_cluster) /* no readahead */ return 0; - toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster; - if (!toff) /* first page is swap header */ - toff++, i--; - *offset = toff; + + si = &swap_info[swp_type(entry)]; + target = swp_offset(entry); + base = (target >> our_page_cluster) << our_page_cluster; + end = base + (1 << our_page_cluster); + if (!base) /* first page is swap header */ + base++; spin_lock(&swap_lock); - do { - /* Don't read-ahead past the end of the swap area */ - if (toff >= swapdev->max) + if (end > si->max) /* don't go beyond end of map */ + end = si->max; + + /* Count contiguous allocated slots above our target */ + for (toff = target; ++toff < end; nr_pages++) { + /* Don't read in free or bad pages */ + if (!si->swap_map[toff]) + break; + if (si->swap_map[toff] == SWAP_MAP_BAD) break; + } + /* Count contiguous allocated slots below our target */ + for (toff = target; --toff >= base; nr_pages++) { /* Don't read in free or bad pages */ - if (!swapdev->swap_map[toff]) + if (!si->swap_map[toff]) break; - if (swapdev->swap_map[toff] == SWAP_MAP_BAD) + if (si->swap_map[toff] == SWAP_MAP_BAD) break; - toff++; - ret++; - } while (--i); + } spin_unlock(&swap_lock); - return ret; + + /* + * Indicate starting offset, and return number of pages to get: + * if only 1, say 0, since there's then no readahead to be done. + */ + *offset = ++toff; + return nr_pages? ++nr_pages: 0; } |