diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 221 |
1 files changed, 167 insertions, 54 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 594d73f..923fc2e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -35,6 +35,7 @@ #include <linux/hugetlb.h> #include <linux/memcontrol.h> #include <linux/cleancache.h> +#include <linux/shmem_fs.h> #include <linux/rmap.h> #include "internal.h" @@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping, *shadowp = p; } __radix_tree_replace(&mapping->page_tree, node, slot, page, - workingset_update_node, mapping); + workingset_lookup_update(mapping)); mapping->nrpages++; return 0; } @@ -162,9 +163,12 @@ static void page_cache_tree_delete(struct address_space *mapping, radix_tree_clear_tags(&mapping->page_tree, node, slot); __radix_tree_replace(&mapping->page_tree, node, slot, shadow, - workingset_update_node, mapping); + workingset_lookup_update(mapping)); } + page->mapping = NULL; + /* Leave page->index set: truncation lookup relies upon it */ + if (shadow) { mapping->nrexceptional += nr; /* @@ -178,17 +182,11 @@ static void page_cache_tree_delete(struct address_space *mapping, mapping->nrpages -= nr; } -/* - * Delete a page from the page cache and free it. Caller has to make - * sure the page is locked and that nobody else uses it - or that usage - * is safe. The caller must hold the mapping's tree_lock. - */ -void __delete_from_page_cache(struct page *page, void *shadow) +static void unaccount_page_cache_page(struct address_space *mapping, + struct page *page) { - struct address_space *mapping = page->mapping; - int nr = hpage_nr_pages(page); + int nr; - trace_mm_filemap_delete_from_page_cache(page); /* * if we're uptodate, flush out into the cleancache, otherwise * invalidate any existing cleancache entries. We can't leave @@ -224,15 +222,12 @@ void __delete_from_page_cache(struct page *page, void *shadow) } } - page_cache_tree_delete(mapping, page, shadow); - - page->mapping = NULL; - /* Leave page->index set: truncation lookup relies upon it */ - /* hugetlb pages do not participate in page cache accounting. */ if (PageHuge(page)) return; + nr = hpage_nr_pages(page); + __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) { __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); @@ -243,17 +238,51 @@ void __delete_from_page_cache(struct page *page, void *shadow) } /* - * At this point page must be either written or cleaned by truncate. - * Dirty page here signals a bug and loss of unwritten data. + * At this point page must be either written or cleaned by + * truncate. Dirty page here signals a bug and loss of + * unwritten data. * - * This fixes dirty accounting after removing the page entirely but - * leaves PageDirty set: it has no effect for truncated page and - * anyway will be cleared before returning page into buddy allocator. + * This fixes dirty accounting after removing the page entirely + * but leaves PageDirty set: it has no effect for truncated + * page and anyway will be cleared before returning page into + * buddy allocator. */ if (WARN_ON_ONCE(PageDirty(page))) account_page_cleaned(page, mapping, inode_to_wb(mapping->host)); } +/* + * Delete a page from the page cache and free it. Caller has to make + * sure the page is locked and that nobody else uses it - or that usage + * is safe. The caller must hold the mapping's tree_lock. + */ +void __delete_from_page_cache(struct page *page, void *shadow) +{ + struct address_space *mapping = page->mapping; + + trace_mm_filemap_delete_from_page_cache(page); + + unaccount_page_cache_page(mapping, page); + page_cache_tree_delete(mapping, page, shadow); +} + +static void page_cache_free_page(struct address_space *mapping, + struct page *page) +{ + void (*freepage)(struct page *); + + freepage = mapping->a_ops->freepage; + if (freepage) + freepage(page); + + if (PageTransHuge(page) && !PageHuge(page)) { + page_ref_sub(page, HPAGE_PMD_NR); + VM_BUG_ON_PAGE(page_count(page) <= 0, page); + } else { + put_page(page); + } +} + /** * delete_from_page_cache - delete page from page cache * @page: the page which the kernel is trying to remove from page cache @@ -266,27 +295,98 @@ void delete_from_page_cache(struct page *page) { struct address_space *mapping = page_mapping(page); unsigned long flags; - void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); - - freepage = mapping->a_ops->freepage; - spin_lock_irqsave(&mapping->tree_lock, flags); __delete_from_page_cache(page, NULL); spin_unlock_irqrestore(&mapping->tree_lock, flags); - if (freepage) - freepage(page); + page_cache_free_page(mapping, page); +} +EXPORT_SYMBOL(delete_from_page_cache); - if (PageTransHuge(page) && !PageHuge(page)) { - page_ref_sub(page, HPAGE_PMD_NR); - VM_BUG_ON_PAGE(page_count(page) <= 0, page); - } else { - put_page(page); +/* + * page_cache_tree_delete_batch - delete several pages from page cache + * @mapping: the mapping to which pages belong + * @pvec: pagevec with pages to delete + * + * The function walks over mapping->page_tree and removes pages passed in @pvec + * from the radix tree. The function expects @pvec to be sorted by page index. + * It tolerates holes in @pvec (radix tree entries at those indices are not + * modified). The function expects only THP head pages to be present in the + * @pvec and takes care to delete all corresponding tail pages from the radix + * tree as well. + * + * The function expects mapping->tree_lock to be held. + */ +static void +page_cache_tree_delete_batch(struct address_space *mapping, + struct pagevec *pvec) +{ + struct radix_tree_iter iter; + void **slot; + int total_pages = 0; + int i = 0, tail_pages = 0; + struct page *page; + pgoff_t start; + + start = pvec->pages[0]->index; + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { + if (i >= pagevec_count(pvec) && !tail_pages) + break; + page = radix_tree_deref_slot_protected(slot, + &mapping->tree_lock); + if (radix_tree_exceptional_entry(page)) + continue; + if (!tail_pages) { + /* + * Some page got inserted in our range? Skip it. We + * have our pages locked so they are protected from + * being removed. + */ + if (page != pvec->pages[i]) + continue; + WARN_ON_ONCE(!PageLocked(page)); + if (PageTransHuge(page) && !PageHuge(page)) + tail_pages = HPAGE_PMD_NR - 1; + page->mapping = NULL; + /* + * Leave page->index set: truncation lookup relies + * upon it + */ + i++; + } else { + tail_pages--; + } + radix_tree_clear_tags(&mapping->page_tree, iter.node, slot); + __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL, + workingset_lookup_update(mapping)); + total_pages++; } + mapping->nrpages -= total_pages; +} + +void delete_from_page_cache_batch(struct address_space *mapping, + struct pagevec *pvec) +{ + int i; + unsigned long flags; + + if (!pagevec_count(pvec)) + return; + + spin_lock_irqsave(&mapping->tree_lock, flags); + for (i = 0; i < pagevec_count(pvec); i++) { + trace_mm_filemap_delete_from_page_cache(pvec->pages[i]); + + unaccount_page_cache_page(mapping, pvec->pages[i]); + } + page_cache_tree_delete_batch(mapping, pvec); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + + for (i = 0; i < pagevec_count(pvec); i++) + page_cache_free_page(mapping, pvec->pages[i]); } -EXPORT_SYMBOL(delete_from_page_cache); int filemap_check_errors(struct address_space *mapping) { @@ -419,20 +519,18 @@ static void __filemap_fdatawait_range(struct address_space *mapping, if (end_byte < start_byte) return; - pagevec_init(&pvec, 0); - while ((index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_WRITEBACK, - min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { + pagevec_init(&pvec); + while (index <= end) { unsigned i; + nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, + end, PAGECACHE_TAG_WRITEBACK); + if (!nr_pages) + break; + for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - /* until radix tree lookup accepts end_index */ - if (page->index > end) - continue; - wait_on_page_writeback(page); ClearPageError(page); } @@ -1754,9 +1852,10 @@ repeat: EXPORT_SYMBOL(find_get_pages_contig); /** - * find_get_pages_tag - find and return pages that match @tag + * find_get_pages_range_tag - find and return pages in given range matching @tag * @mapping: the address_space to search * @index: the starting page index + * @end: The final page index (inclusive) * @tag: the tag index * @nr_pages: the maximum number of pages * @pages: where the resulting pages are placed @@ -1764,8 +1863,9 @@ EXPORT_SYMBOL(find_get_pages_contig); * Like find_get_pages, except we only return pages which are tagged with * @tag. We update @index to index the next page for the traversal. */ -unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, - int tag, unsigned int nr_pages, struct page **pages) +unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, + pgoff_t end, int tag, unsigned int nr_pages, + struct page **pages) { struct radix_tree_iter iter; void **slot; @@ -1778,6 +1878,9 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter, *index, tag) { struct page *head, *page; + + if (iter.index > end) + break; repeat: page = radix_tree_deref_slot(slot); if (unlikely(!page)) @@ -1819,18 +1922,28 @@ repeat: } pages[ret] = page; - if (++ret == nr_pages) - break; + if (++ret == nr_pages) { + *index = pages[ret - 1]->index + 1; + goto out; + } } + /* + * We come here when we got at @end. We take care to not overflow the + * index @index as it confuses some of the callers. This breaks the + * iteration when there is page at index -1 but that is already broken + * anyway. + */ + if (end == (pgoff_t)-1) + *index = (pgoff_t)-1; + else + *index = end + 1; +out: rcu_read_unlock(); - if (ret) - *index = pages[ret - 1]->index + 1; - return ret; } -EXPORT_SYMBOL(find_get_pages_tag); +EXPORT_SYMBOL(find_get_pages_range_tag); /** * find_get_entries_tag - find and return entries that match @tag @@ -2159,7 +2272,7 @@ no_cached_page: * Ok, it wasn't cached, so we need to create a new * page.. */ - page = page_cache_alloc_cold(mapping); + page = page_cache_alloc(mapping); if (!page) { error = -ENOMEM; goto out; @@ -2271,7 +2384,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask) int ret; do { - page = __page_cache_alloc(gfp_mask|__GFP_COLD); + page = __page_cache_alloc(gfp_mask); if (!page) return -ENOMEM; @@ -2675,7 +2788,7 @@ static struct page *do_read_cache_page(struct address_space *mapping, repeat: page = find_get_page(mapping, index); if (!page) { - page = __page_cache_alloc(gfp | __GFP_COLD); + page = __page_cache_alloc(gfp); if (!page) return ERR_PTR(-ENOMEM); err = add_to_page_cache_lru(page, mapping, index, gfp); |