diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-02 16:41:54 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-03-02 16:41:54 -0800 |
commit | b695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch) | |
tree | a3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/extent_io.c | |
parent | 48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff) | |
parent | 180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff) | |
download | op-kernel-dev-b695188dd39162a1a6bff11fdbcc4c0b65b933ab.zip op-kernel-dev-b695188dd39162a1a6bff11fdbcc4c0b65b933ab.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason:
"The biggest feature in the pull is the new (and still experimental)
raid56 code that David Woodhouse started long ago. I'm still working
on the parity logging setup that will avoid inconsistent parity after
a crash, so this is only for testing right now. But, I'd really like
to get it out to a broader audience to hammer out any performance
issues or other problems.
scrub does not yet correct errors on raid5/6 either.
Josef has another pass at fsync performance. The big change here is
to combine waiting for metadata with waiting for data, which is a big
latency win. It is also step one toward using atomics from the
hardware during a commit.
Mark Fasheh has a new way to use btrfs send/receive to send only the
metadata changes. SUSE is using this to make snapper more efficient
at finding changes between snapshosts.
Snapshot-aware defrag is also included.
Otherwise we have a large number of fixes and cleanups. Eric Sandeen
wins the award for removing the most lines, and I'm hoping we steal
this idea from XFS over and over again."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits)
btrfs: fixup/remove module.h usage as required
Btrfs: delete inline extents when we find them during logging
btrfs: try harder to allocate raid56 stripe cache
Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic
Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails
Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree
Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails
Btrfs: fix missing deleted items in btrfs_clean_quota_tree
btrfs: use only inline_pages from extent buffer
Btrfs: fix wrong reserved space when deleting a snapshot/subvolume
Btrfs: fix wrong reserved space in qgroup during snap/subv creation
Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot
btrfs: remove a printk from scan_one_device
Btrfs: fix NULL pointer after aborting a transaction
Btrfs: fix memory leak of log roots
Btrfs: copy everything if we've created an inline extent
btrfs: cleanup for open-coded alignment
Btrfs: do not change inode flags in rename
Btrfs: use reserved space for creating a snapshot
clear chunk_alloc flag on retryable failure
...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 138 |
1 files changed, 65 insertions, 73 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1b319df..f173c5a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4,7 +4,6 @@ #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/page-flags.h> -#include <linux/module.h> #include <linux/spinlock.h> #include <linux/blkdev.h> #include <linux/swap.h> @@ -1834,7 +1833,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, */ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) { - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) SetPageUptodate(page); @@ -1846,7 +1845,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) */ static void check_page_locked(struct extent_io_tree *tree, struct page *page) { - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) unlock_page(page); @@ -1895,13 +1894,11 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, if (ret) err = ret; - if (did_repair) { - ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, - rec->start + rec->len - 1, - EXTENT_DAMAGED, GFP_NOFS); - if (ret && !err) - err = ret; - } + ret = clear_extent_bits(&BTRFS_I(inode)->io_tree, rec->start, + rec->start + rec->len - 1, + EXTENT_DAMAGED, GFP_NOFS); + if (ret && !err) + err = ret; kfree(rec); return err; @@ -1932,10 +1929,15 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, u64 map_length = 0; u64 sector; struct btrfs_bio *bbio = NULL; + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; int ret; BUG_ON(!mirror_num); + /* we can't repair anything in raid56 yet */ + if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num)) + return 0; + bio = bio_alloc(GFP_NOFS, 1); if (!bio) return -EIO; @@ -1960,7 +1962,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, return -EIO; } bio->bi_bdev = dev->bdev; - bio_add_page(bio, page, length, start-page_offset(page)); + bio_add_page(bio, page, length, start - page_offset(page)); btrfsic_submit_bio(WRITE_SYNC, bio); wait_for_completion(&compl); @@ -2052,6 +2054,7 @@ static int clean_io_failure(u64 start, struct page *page) failrec->failed_mirror); did_repair = !ret; } + ret = 0; } out: @@ -2293,8 +2296,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) struct page *page = bvec->bv_page; tree = &BTRFS_I(page->mapping->host)->io_tree; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; + start = page_offset(page) + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -2353,8 +2355,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) (long int)bio->bi_bdev); tree = &BTRFS_I(page->mapping->host)->io_tree; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + - bvec->bv_offset; + start = page_offset(page) + bvec->bv_offset; end = start + bvec->bv_len - 1; if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) @@ -2471,7 +2472,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, struct extent_io_tree *tree = bio->bi_private; u64 start; - start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; + start = page_offset(page) + bvec->bv_offset; bio->bi_private = NULL; @@ -2489,13 +2490,13 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, return ret; } -static int merge_bio(struct extent_io_tree *tree, struct page *page, +static int merge_bio(int rw, struct extent_io_tree *tree, struct page *page, unsigned long offset, size_t size, struct bio *bio, unsigned long bio_flags) { int ret = 0; if (tree->ops && tree->ops->merge_bio_hook) - ret = tree->ops->merge_bio_hook(page, offset, size, bio, + ret = tree->ops->merge_bio_hook(rw, page, offset, size, bio, bio_flags); BUG_ON(ret < 0); return ret; @@ -2530,7 +2531,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, sector; if (prev_bio_flags != bio_flags || !contig || - merge_bio(tree, page, offset, page_size, bio, bio_flags) || + merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, prev_bio_flags); @@ -2595,7 +2596,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, unsigned long *bio_flags) { struct inode *inode = page->mapping->host; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; u64 cur = start; @@ -2648,6 +2649,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } } while (cur <= end) { + unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + if (cur >= last_byte) { char *userpage; struct extent_state *cached = NULL; @@ -2682,7 +2685,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, iosize = min(extent_map_end(em) - cur, end - cur + 1); cur_end = min(extent_map_end(em) - 1, end); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + iosize = ALIGN(iosize, blocksize); if (this_bio_flag & EXTENT_BIO_COMPRESSED) { disk_io_size = em->block_len; sector = em->block_start >> 9; @@ -2735,26 +2738,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, continue; } - ret = 0; - if (tree->ops && tree->ops->readpage_io_hook) { - ret = tree->ops->readpage_io_hook(page, cur, - cur + iosize - 1); - } - if (!ret) { - unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; - pnr -= page->index; - ret = submit_extent_page(READ, tree, page, + pnr -= page->index; + ret = submit_extent_page(READ, tree, page, sector, disk_io_size, pg_offset, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, this_bio_flag); - if (!ret) { - nr++; - *bio_flags = this_bio_flag; - } - } - if (ret) { + if (!ret) { + nr++; + *bio_flags = this_bio_flag; + } else { SetPageError(page); unlock_extent(tree, cur, cur + iosize - 1); } @@ -2806,7 +2800,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, struct inode *inode = page->mapping->host; struct extent_page_data *epd = data; struct extent_io_tree *tree = epd->tree; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 delalloc_start; u64 page_end = start + PAGE_CACHE_SIZE - 1; u64 end; @@ -2982,7 +2976,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, BUG_ON(extent_map_end(em) <= cur); BUG_ON(end < cur); iosize = min(extent_map_end(em) - cur, end - cur + 1); - iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1); + iosize = ALIGN(iosize, blocksize); sector = (em->block_start + extent_offset) >> 9; bdev = em->bdev; block_start = em->block_start; @@ -3124,12 +3118,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); spin_unlock(&eb->refs_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_lock(&fs_info->delalloc_lock); - if (fs_info->dirty_metadata_bytes >= eb->len) - fs_info->dirty_metadata_bytes -= eb->len; - else - WARN_ON(1); - spin_unlock(&fs_info->delalloc_lock); + __percpu_counter_add(&fs_info->dirty_metadata_bytes, + -eb->len, + fs_info->dirty_metadata_batch); ret = 1; } else { spin_unlock(&eb->refs_lock); @@ -3446,15 +3437,9 @@ retry: * swizzled back from swapper_space to tmpfs file * mapping */ - if (tree->ops && - tree->ops->write_cache_pages_lock_hook) { - tree->ops->write_cache_pages_lock_hook(page, - data, flush_fn); - } else { - if (!trylock_page(page)) { - flush_fn(data); - lock_page(page); - } + if (!trylock_page(page)) { + flush_fn(data); + lock_page(page); } if (unlikely(page->mapping != mapping)) { @@ -3674,11 +3659,11 @@ int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset) { struct extent_state *cached_state = NULL; - u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); + u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; - start += (offset + blocksize - 1) & ~(blocksize - 1); + start += ALIGN(offset, blocksize); if (start > end) return 0; @@ -3700,7 +3685,7 @@ int try_release_extent_state(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask) { - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; int ret = 1; @@ -3739,7 +3724,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, gfp_t mask) { struct extent_map *em; - u64 start = (u64)page->index << PAGE_CACHE_SHIFT; + u64 start = page_offset(page); u64 end = start + PAGE_CACHE_SIZE - 1; if ((mask & __GFP_WAIT) && @@ -3797,7 +3782,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, len = last - offset; if (len == 0) break; - len = (len + sectorsize - 1) & ~(sectorsize - 1); + len = ALIGN(len, sectorsize); em = get_extent(inode, NULL, 0, offset, len, 0); if (IS_ERR_OR_NULL(em)) return em; @@ -3995,8 +3980,6 @@ static void __free_extent_buffer(struct extent_buffer *eb) list_del(&eb->leak_list); spin_unlock_irqrestore(&leak_lock, flags); #endif - if (eb->pages && eb->pages != eb->inline_pages) - kfree(eb->pages); kmem_cache_free(extent_buffer_cache, eb); } @@ -4037,19 +4020,12 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, atomic_set(&eb->refs, 1); atomic_set(&eb->io_pages, 0); - if (len > MAX_INLINE_EXTENT_BUFFER_SIZE) { - struct page **pages; - int num_pages = (len + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - pages = kzalloc(num_pages, mask); - if (!pages) { - __free_extent_buffer(eb); - return NULL; - } - eb->pages = pages; - } else { - eb->pages = eb->inline_pages; - } + /* + * Sanity checks, currently the maximum is 64k covered by 16x 4k pages + */ + BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE + > MAX_INLINE_EXTENT_BUFFER_SIZE); + BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); return eb; } @@ -4180,6 +4156,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) static void check_buffer_tree_ref(struct extent_buffer *eb) { + int refs; /* the ref bit is tricky. We have to make sure it is set * if we have the buffer dirty. Otherwise the * code to free a buffer can end up dropping a dirty @@ -4200,6 +4177,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb) * So bump the ref count first, then set the bit. If someone * beat us to it, drop the ref we added. */ + refs = atomic_read(&eb->refs); + if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) + return; + spin_lock(&eb->refs_lock); if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) atomic_inc(&eb->refs); @@ -4401,9 +4382,20 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) void free_extent_buffer(struct extent_buffer *eb) { + int refs; + int old; if (!eb) return; + while (1) { + refs = atomic_read(&eb->refs); + if (refs <= 3) + break; + old = atomic_cmpxchg(&eb->refs, refs, refs - 1); + if (old == refs) + return; + } + spin_lock(&eb->refs_lock); if (atomic_read(&eb->refs) == 2 && test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags)) |