summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-05-27 13:57:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2011-05-27 13:57:12 -0700
commita0c3061093c8b49facef95dc09a618c6e0d17cb5 (patch)
tree1d6ff7c06134b71a8bd0721395386e82e46e60c8 /fs/btrfs/extent_io.c
parent10799db60cbc4f990dd69eb49883477095c66af7 (diff)
parent174ba50915b08dcfd07c8b5fb795b46a165fa09a (diff)
downloadop-kernel-dev-a0c3061093c8b49facef95dc09a618c6e0d17cb5.zip
op-kernel-dev-a0c3061093c8b49facef95dc09a618c6e0d17cb5.tar.gz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (58 commits) Btrfs: use the device_list_mutex during write_dev_supers Btrfs: setup free ino caching in a more asynchronous way btrfs scrub: don't coalesce pages that are logically discontiguous Btrfs: return -ENOMEM in clear_extent_bit Btrfs: add mount -o auto_defrag Btrfs: using rcu lock in the reader side of devices list Btrfs: drop unnecessary device lock Btrfs: fix the race between remove dev and alloc chunk Btrfs: fix the race between reading and updating devices Btrfs: fix bh leak on __btrfs_open_devices path Btrfs: fix unsafe usage of merge_state Btrfs: allocate extent state and check the result properly fs/btrfs: Add missing btrfs_free_path Btrfs: check return value of btrfs_inc_extent_ref() Btrfs: return error to caller if read_one_inode() fails Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item Btrfs: return error code to caller when btrfs_del_item fails Btrfs: return error code to caller when btrfs_previous_item fails btrfs: fix typo 'testeing' -> 'testing' btrfs: typo: 'btrfS' -> 'btrfs' ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c324
1 files changed, 60 insertions, 264 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4f98932..c5d9fbb 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -103,7 +103,7 @@ void extent_io_exit(void)
}
void extent_io_tree_init(struct extent_io_tree *tree,
- struct address_space *mapping, gfp_t mask)
+ struct address_space *mapping)
{
tree->state = RB_ROOT;
INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@ -441,6 +441,15 @@ static int clear_state_bit(struct extent_io_tree *tree,
return ret;
}
+static struct extent_state *
+alloc_extent_state_atomic(struct extent_state *prealloc)
+{
+ if (!prealloc)
+ prealloc = alloc_extent_state(GFP_ATOMIC);
+
+ return prealloc;
+}
+
/*
* clear some bits on a range in the tree. This may require splitting
* or inserting elements in the tree, so the gfp mask is used to
@@ -531,8 +540,8 @@ hit_next:
*/
if (state->start < start) {
- if (!prealloc)
- prealloc = alloc_extent_state(GFP_ATOMIC);
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
BUG_ON(err == -EEXIST);
prealloc = NULL;
@@ -553,8 +562,8 @@ hit_next:
* on the first half
*/
if (state->start <= end && state->end > end) {
- if (!prealloc)
- prealloc = alloc_extent_state(GFP_ATOMIC);
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
if (wake)
@@ -727,8 +736,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
- if (!prealloc)
- return -ENOMEM;
+ BUG_ON(!prealloc);
}
spin_lock(&tree->lock);
@@ -745,6 +753,8 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
@@ -773,20 +783,18 @@ hit_next:
if (err)
goto out;
+ next_node = rb_next(node);
cache_state(state, cached_state);
merge_state(tree, state);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
- if (start < end && prealloc && !need_resched()) {
- next_node = rb_next(node);
- if (next_node) {
- state = rb_entry(next_node, struct extent_state,
- rb_node);
- if (state->start == start)
- goto hit_next;
- }
+ if (next_node && start < end && prealloc && !need_resched()) {
+ state = rb_entry(next_node, struct extent_state,
+ rb_node);
+ if (state->start == start)
+ goto hit_next;
}
goto search_again;
}
@@ -813,6 +821,9 @@ hit_next:
err = -EEXIST;
goto out;
}
+
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, start);
BUG_ON(err == -EEXIST);
prealloc = NULL;
@@ -843,14 +854,25 @@ hit_next:
this_end = end;
else
this_end = last_start - 1;
+
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
+
+ /*
+ * Avoid to free 'prealloc' if it can be merged with
+ * the later extent.
+ */
+ atomic_inc(&prealloc->refs);
err = insert_state(tree, prealloc, start, this_end,
&bits);
BUG_ON(err == -EEXIST);
if (err) {
+ free_extent_state(prealloc);
prealloc = NULL;
goto out;
}
cache_state(prealloc, cached_state);
+ free_extent_state(prealloc);
prealloc = NULL;
start = this_end + 1;
goto search_again;
@@ -867,6 +889,9 @@ hit_next:
err = -EEXIST;
goto out;
}
+
+ prealloc = alloc_extent_state_atomic(prealloc);
+ BUG_ON(!prealloc);
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
@@ -943,13 +968,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
NULL, mask);
}
-static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
- gfp_t mask)
-{
- return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
- NULL, mask);
-}
-
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask)
{
@@ -965,11 +983,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
cached_state, mask);
}
-int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
-}
-
/*
* either insert or lock state struct between start and end use mask to tell
* us if waiting is desired.
@@ -1030,25 +1043,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
}
/*
- * helper function to set pages and extents in the tree dirty
- */
-int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
-{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
- struct page *page;
-
- while (index <= end_index) {
- page = find_get_page(tree->mapping, index);
- BUG_ON(!page);
- __set_page_dirty_nobuffers(page);
- page_cache_release(page);
- index++;
- }
- return 0;
-}
-
-/*
* helper function to set both pages and extents in the tree writeback
*/
static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1821,46 +1815,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
bio_put(bio);
}
-/*
- * IO done from prepare_write is pretty simple, we just unlock
- * the structs in the extent tree when done, and set the uptodate bits
- * as appropriate.
- */
-static void end_bio_extent_preparewrite(struct bio *bio, int err)
-{
- const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct extent_io_tree *tree;
- u64 start;
- u64 end;
-
- do {
- struct page *page = bvec->bv_page;
- struct extent_state *cached = NULL;
- tree = &BTRFS_I(page->mapping->host)->io_tree;
-
- start = ((u64)page->index << PAGE_CACHE_SHIFT) +
- bvec->bv_offset;
- end = start + bvec->bv_len - 1;
-
- if (--bvec >= bio->bi_io_vec)
- prefetchw(&bvec->bv_page->flags);
-
- if (uptodate) {
- set_extent_uptodate(tree, start, end, &cached,
- GFP_ATOMIC);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
-
- unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
- } while (bvec >= bio->bi_io_vec);
-
- bio_put(bio);
-}
-
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
@@ -2009,7 +1963,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct btrfs_ordered_extent *ordered;
int ret;
int nr = 0;
- size_t page_offset = 0;
+ size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
@@ -2052,9 +2006,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
char *userpage;
struct extent_state *cached = NULL;
- iosize = PAGE_CACHE_SIZE - page_offset;
+ iosize = PAGE_CACHE_SIZE - pg_offset;
userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + page_offset, 0, iosize);
+ memset(userpage + pg_offset, 0, iosize);
flush_dcache_page(page);
kunmap_atomic(userpage, KM_USER0);
set_extent_uptodate(tree, cur, cur + iosize - 1,
@@ -2063,9 +2017,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
&cached, GFP_NOFS);
break;
}
- em = get_extent(inode, page, page_offset, cur,
+ em = get_extent(inode, page, pg_offset, cur,
end - cur + 1, 0);
- if (IS_ERR(em) || !em) {
+ if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
unlock_extent(tree, cur, end, GFP_NOFS);
break;
@@ -2103,7 +2057,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct extent_state *cached = NULL;
userpage = kmap_atomic(page, KM_USER0);
- memset(userpage + page_offset, 0, iosize);
+ memset(userpage + pg_offset, 0, iosize);
flush_dcache_page(page);
kunmap_atomic(userpage, KM_USER0);
@@ -2112,7 +2066,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unlock_extent_cached(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
continue;
}
/* the get_extent function already copied into the page */
@@ -2121,7 +2075,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
check_page_uptodate(tree, page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
continue;
}
/* we have an inline extent but it didn't get marked up
@@ -2131,7 +2085,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
continue;
}
@@ -2144,7 +2098,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
pnr -= page->index;
ret = submit_extent_page(READ, tree, page,
- sector, disk_io_size, page_offset,
+ sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
@@ -2155,7 +2109,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
if (ret)
SetPageError(page);
cur = cur + iosize;
- page_offset += iosize;
+ pg_offset += iosize;
}
out:
if (!nr) {
@@ -2351,7 +2305,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
em = epd->get_extent(inode, page, pg_offset, cur,
end - cur + 1, 1);
- if (IS_ERR(em) || !em) {
+ if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
break;
}
@@ -2730,128 +2684,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
}
/*
- * simple commit_write call, set_range_dirty is used to mark both
- * the pages and the extent records as dirty
- */
-int extent_commit_write(struct extent_io_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to)
-{
- loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-
- set_page_extent_mapped(page);
- set_page_dirty(page);
-
- if (pos > inode->i_size) {
- i_size_write(inode, pos);
- mark_inode_dirty(inode);
- }
- return 0;
-}
-
-int extent_prepare_write(struct extent_io_tree *tree,
- struct inode *inode, struct page *page,
- unsigned from, unsigned to, get_extent_t *get_extent)
-{
- u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
- u64 block_start;
- u64 orig_block_start;
- u64 block_end;
- u64 cur_end;
- struct extent_map *em;
- unsigned blocksize = 1 << inode->i_blkbits;
- size_t page_offset = 0;
- size_t block_off_start;
- size_t block_off_end;
- int err = 0;
- int iocount = 0;
- int ret = 0;
- int isnew;
-
- set_page_extent_mapped(page);
-
- block_start = (page_start + from) & ~((u64)blocksize - 1);
- block_end = (page_start + to - 1) | (blocksize - 1);
- orig_block_start = block_start;
-
- lock_extent(tree, page_start, page_end, GFP_NOFS);
- while (block_start <= block_end) {
- em = get_extent(inode, page, page_offset, block_start,
- block_end - block_start + 1, 1);
- if (IS_ERR(em) || !em)
- goto err;
-
- cur_end = min(block_end, extent_map_end(em) - 1);
- block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
- block_off_end = block_off_start + blocksize;
- isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
-
- if (!PageUptodate(page) && isnew &&
- (block_off_end > to || block_off_start < from)) {
- void *kaddr;
-
- kaddr = kmap_atomic(page, KM_USER0);
- if (block_off_end > to)
- memset(kaddr + to, 0, block_off_end - to);
- if (block_off_start < from)
- memset(kaddr + block_off_start, 0,
- from - block_off_start);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
- }
- if ((em->block_start != EXTENT_MAP_HOLE &&
- em->block_start != EXTENT_MAP_INLINE) &&
- !isnew && !PageUptodate(page) &&
- (block_off_end > to || block_off_start < from) &&
- !test_range_bit(tree, block_start, cur_end,
- EXTENT_UPTODATE, 1, NULL)) {
- u64 sector;
- u64 extent_offset = block_start - em->start;
- size_t iosize;
- sector = (em->block_start + extent_offset) >> 9;
- iosize = (cur_end - block_start + blocksize) &
- ~((u64)blocksize - 1);
- /*
- * we've already got the extent locked, but we
- * need to split the state such that our end_bio
- * handler can clear the lock.
- */
- set_extent_bit(tree, block_start,
- block_start + iosize - 1,
- EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
- ret = submit_extent_page(READ, tree, page,
- sector, iosize, page_offset, em->bdev,
- NULL, 1,
- end_bio_extent_preparewrite, 0,
- 0, 0);
- if (ret && !err)
- err = ret;
- iocount++;
- block_start = block_start + iosize;
- } else {
- struct extent_state *cached = NULL;
-
- set_extent_uptodate(tree, block_start, cur_end, &cached,
- GFP_NOFS);
- unlock_extent_cached(tree, block_start, cur_end,
- &cached, GFP_NOFS);
- block_start = cur_end + 1;
- }
- page_offset = block_start & (PAGE_CACHE_SIZE - 1);
- free_extent_map(em);
- }
- if (iocount) {
- wait_extent_bit(tree, orig_block_start,
- block_end, EXTENT_LOCKED);
- }
- check_page_uptodate(tree, page);
-err:
- /* FIXME, zero out newly allocated blocks on error */
- return err;
-}
-
-/*
* a helper for releasepage, this tests for areas of the page that
* are locked or under IO and drops the related state bits if it is safe
* to drop the page.
@@ -2909,7 +2741,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
len = end - start + 1;
write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
- if (!em || IS_ERR(em)) {
+ if (IS_ERR_OR_NULL(em)) {
write_unlock(&map->lock);
break;
}
@@ -2937,33 +2769,6 @@ int try_release_extent_mapping(struct extent_map_tree *map,
return try_release_extent_state(map, tree, page, mask);
}
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
- get_extent_t *get_extent)
-{
- struct inode *inode = mapping->host;
- struct extent_state *cached_state = NULL;
- u64 start = iblock << inode->i_blkbits;
- sector_t sector = 0;
- size_t blksize = (1 << inode->i_blkbits);
- struct extent_map *em;
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
- 0, &cached_state, GFP_NOFS);
- em = get_extent(inode, NULL, 0, start, blksize, 0);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
- start + blksize - 1, &cached_state, GFP_NOFS);
- if (!em || IS_ERR(em))
- return 0;
-
- if (em->block_start > EXTENT_MAP_LAST_BYTE)
- goto out;
-
- sector = (em->block_start + start - em->start) >> inode->i_blkbits;
-out:
- free_extent_map(em);
- return sector;
-}
-
/*
* helper function for fiemap, which doesn't want to see any holes.
* This maps until we find something past 'last'
@@ -2986,7 +2791,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
break;
len = (len + sectorsize - 1) & ~(sectorsize - 1);
em = get_extent(inode, NULL, 0, offset, len, 0);
- if (!em || IS_ERR(em))
+ if (IS_ERR_OR_NULL(em))
return em;
/* if this isn't a hole return it */
@@ -3040,7 +2845,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* because there might be preallocation past i_size
*/
ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
- path, inode->i_ino, -1, 0);
+ path, btrfs_ino(inode), -1, 0);
if (ret < 0) {
btrfs_free_path(path);
return ret;
@@ -3053,7 +2858,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
found_type = btrfs_key_type(&found_key);
/* No extents, but there might be delalloc bits */
- if (found_key.objectid != inode->i_ino ||
+ if (found_key.objectid != btrfs_ino(inode) ||
found_type != BTRFS_EXTENT_DATA_KEY) {
/* have to trust i_size as the end */
last = (u64)-1;
@@ -3276,8 +3081,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len,
- struct page *page0,
- gfp_t mask)
+ struct page *page0)
{
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
@@ -3298,7 +3102,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
}
rcu_read_unlock();
- eb = __alloc_extent_buffer(tree, start, len, mask);
+ eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
if (!eb)
return NULL;
@@ -3315,7 +3119,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
i = 0;
}
for (; i < num_pages; i++, index++) {
- p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+ p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM);
if (!p) {
WARN_ON(1);
goto free_eb;
@@ -3387,8 +3191,7 @@ free_eb:
}
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len,
- gfp_t mask)
+ u64 start, unsigned long len)
{
struct extent_buffer *eb;
@@ -3449,13 +3252,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
return 0;
}
-int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
- struct extent_buffer *eb)
-{
- return wait_on_extent_writeback(tree, eb->start,
- eb->start + eb->len - 1);
-}
-
int set_extent_buffer_dirty(struct extent_io_tree *tree,
struct extent_buffer *eb)
{
OpenPOWER on IntegriCloud