diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 987 |
1 files changed, 774 insertions, 213 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9096fd0..dae12dc 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations; static const struct inode_operations btrfs_file_inode_operations; static const struct address_space_operations btrfs_aops; static const struct address_space_operations btrfs_symlink_aops; -static struct file_operations btrfs_dir_file_operations; +static const struct file_operations btrfs_dir_file_operations; static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, } ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, &hint_byte); + aligned_end, aligned_end, start, + &hint_byte, 1); BUG_ON(ret); if (isize > actual_end) @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, inline_len, compressed_size, compressed_pages); BUG_ON(ret); - btrfs_drop_extent_cache(inode, start, aligned_end, 0); + btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); return 0; } @@ -423,9 +424,12 @@ again: * and free up our temp pages. */ extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, 1, 0, - 0, 1, 1, 1); + &BTRFS_I(inode)->io_tree, + start, end, NULL, + EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | + EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); ret = 0; goto free_pages_out; } @@ -611,9 +615,9 @@ static noinline int submit_compressed_extents(struct inode *inode, set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -636,11 +640,14 @@ static noinline int submit_compressed_extents(struct inode *inode, * clear dirty, set writeback and unlock the pages. */ extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - async_extent->start, - async_extent->start + - async_extent->ram_size - 1, - NULL, 1, 1, 0, 1, 1, 0); + &BTRFS_I(inode)->io_tree, + async_extent->start, + async_extent->start + + async_extent->ram_size - 1, + NULL, EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK); ret = btrfs_submit_compressed_write(inode, async_extent->start, @@ -711,9 +718,15 @@ static noinline int cow_file_range(struct inode *inode, start, end, 0, NULL); if (ret == 0) { extent_clear_unlock_delalloc(inode, - &BTRFS_I(inode)->io_tree, - start, end, NULL, 1, 1, - 1, 1, 1, 1); + &BTRFS_I(inode)->io_tree, + start, end, NULL, + EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | + EXTENT_CLEAR_DELALLOC | + EXTENT_CLEAR_ACCOUNTING | + EXTENT_CLEAR_DIRTY | + EXTENT_SET_WRITEBACK | + EXTENT_END_WRITEBACK); *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -725,9 +738,20 @@ static noinline int cow_file_range(struct inode *inode, BUG_ON(disk_num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy)); + + read_lock(&BTRFS_I(inode)->extent_tree.lock); + em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, + start, num_bytes); + if (em) { + alloc_hint = em->block_start; + free_extent_map(em); + } + read_unlock(&BTRFS_I(inode)->extent_tree.lock); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); while (disk_num_bytes > 0) { + unsigned long op; + cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, @@ -737,7 +761,6 @@ static noinline int cow_file_range(struct inode *inode, em = alloc_extent_map(GFP_NOFS); em->start = start; em->orig_start = em->start; - ram_size = ins.offset; em->len = ins.offset; @@ -747,9 +770,9 @@ static noinline int cow_file_range(struct inode *inode, set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -776,11 +799,17 @@ static noinline int cow_file_range(struct inode *inode, /* we're not doing compressed IO, don't unlock the first * page (which the caller expects to stay locked), don't * clear any dirty bits and don't set any writeback bits + * + * Do set the Private2 bit so we know this page was properly + * setup for writepage */ + op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0; + op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | + EXTENT_SET_PRIVATE2; + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, start, start + ram_size - 1, - locked_page, unlock, 1, - 1, 0, 0, 0); + locked_page, op); disk_num_bytes -= cur_alloc_size; num_bytes -= cur_alloc_size; alloc_hint = ins.objectid + ins.offset; @@ -852,8 +881,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, u64 cur_end; int limit = 10 * 1024 * 1042; - clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | - EXTENT_DELALLOC, 1, 0, GFP_NOFS); + clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, + 1, 0, NULL, GFP_NOFS); while (start < end) { async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS); async_cow->inode = inode; @@ -994,6 +1023,7 @@ next_slot: if (found_key.offset > cur_offset) { extent_end = found_key.offset; + extent_type = 0; goto out_check; } @@ -1080,9 +1110,9 @@ out_check: em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); while (1) { - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); break; @@ -1100,8 +1130,10 @@ out_check: BUG_ON(ret); extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, - cur_offset, cur_offset + num_bytes - 1, - locked_page, 1, 1, 1, 0, 0, 0); + cur_offset, cur_offset + num_bytes - 1, + locked_page, EXTENT_CLEAR_UNLOCK_PAGE | + EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC | + EXTENT_SET_PRIVATE2); cur_offset = extent_end; if (cur_offset > end) break; @@ -1147,6 +1179,89 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } +static int btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 size; + + if (!(orig->state & EXTENT_DELALLOC)) + return 0; + + size = orig->end - orig->start + 1; + if (size > root->fs_info->max_extent) { + u64 num_extents; + u64 new_size; + + new_size = orig->end - split + 1; + num_extents = div64_u64(size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + + /* + * if we break a large extent up then leave oustanding_extents + * be, since we've already accounted for the large extent. + */ + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) < num_extents) + return 0; + } + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + + return 0; +} + +/* + * extent_io.c merge_extent_hook, used to track merged delayed allocation + * extents so we can keep track of new extents that are just merged onto old + * extents, such as when we are doing sequential writes, so we can properly + * account for the metadata space we'll need. + */ +static int btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 new_size, old_size; + u64 num_extents; + + /* not delalloc, ignore it */ + if (!(other->state & EXTENT_DELALLOC)) + return 0; + + old_size = other->end - other->start + 1; + if (new->start < other->start) + new_size = other->end - new->start + 1; + else + new_size = new->end - other->start + 1; + + /* we're not bigger than the max, unreserve the space and go */ + if (new_size <= root->fs_info->max_extent) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + return 0; + } + + /* + * If we grew by another max_extent, just return, we want to keep that + * reserved amount. + */ + num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent); + if (div64_u64(new_size + root->fs_info->max_extent - 1, + root->fs_info->max_extent) > num_extents) + return 0; + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + + return 0; +} + /* * extent_io.c set_bit_hook, used to track delayed allocation * bytes in this file, and to maintain the list of inodes that @@ -1155,6 +1270,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits) { + /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC @@ -1162,6 +1278,10 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, */ if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; @@ -1178,22 +1298,31 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits) +static int btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, unsigned long bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore * but in this case, we are only testeing for the DELALLOC * bit, which is only set or cleared with irqs on */ - if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { + if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; + if (bits & EXTENT_DO_ACCOUNTING) { + spin_lock(&BTRFS_I(inode)->accounting_lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->accounting_lock); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + } + spin_lock(&root->fs_info->delalloc_lock); - if (end - start + 1 > root->fs_info->delalloc_bytes) { + if (state->end - state->start + 1 > + root->fs_info->delalloc_bytes) { printk(KERN_INFO "btrfs warning: delalloc account " "%llu %llu\n", - (unsigned long long)end - start + 1, + (unsigned long long) + state->end - state->start + 1, (unsigned long long) root->fs_info->delalloc_bytes); btrfs_delalloc_free_space(root, inode, (u64)-1); @@ -1201,9 +1330,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->delalloc_bytes = 0; } else { btrfs_delalloc_free_space(root, inode, - end - start + 1); - root->fs_info->delalloc_bytes -= end - start + 1; - BTRFS_I(inode)->delalloc_bytes -= end - start + 1; + state->end - + state->start + 1); + root->fs_info->delalloc_bytes -= state->end - + state->start + 1; + BTRFS_I(inode)->delalloc_bytes -= state->end - + state->start + 1; } if (BTRFS_I(inode)->delalloc_bytes == 0 && !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { @@ -1374,10 +1506,8 @@ again: lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); /* already ordered? We're done */ - if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, - EXTENT_ORDERED, 0)) { + if (PagePrivate2(page)) goto out; - } ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { @@ -1413,11 +1543,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) struct inode *inode = page->mapping->host; struct btrfs_writepage_fixup *fixup; struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, - EXTENT_ORDERED, 0); - if (ret) + /* this page is properly in the ordered list */ + if (TestClearPagePrivate2(page)) return 0; if (PageChecked(page)) @@ -1455,9 +1583,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, BUG_ON(!path); path->leave_spinning = 1; + + /* + * we may be replacing one extent in the tree with another. + * The new extent is pinned in the extent map, and we don't want + * to drop it from the cache until it is completely in the btree. + * + * So, tell btrfs_drop_extents to leave this extent in the cache. + * the caller is expected to unpin it and allow it to be merged + * with the others. + */ ret = btrfs_drop_extents(trans, root, inode, file_pos, file_pos + num_bytes, locked_end, - file_pos, &hint); + file_pos, &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1485,7 +1623,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); inode_add_bytes(inode, num_bytes); - btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0); ins.objectid = disk_bytenr; ins.offset = disk_num_bytes; @@ -1596,6 +1733,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); + unpin_extent_cache(&BTRFS_I(inode)->extent_tree, + ordered_extent->file_offset, + ordered_extent->len); BUG_ON(ret); } unlock_extent(io_tree, ordered_extent->file_offset, @@ -1623,6 +1763,7 @@ nocow: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -1669,13 +1810,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, failrec->last_mirror = 0; failrec->bio_flags = 0; - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, failrec->len); if (em->start > start || em->start + em->len < start) { free_extent_map(em); em = NULL; } - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (!em || IS_ERR(em)) { kfree(failrec); @@ -1794,7 +1935,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, return 0; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && - test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { + test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) { clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM, GFP_NOFS); return 0; @@ -2352,6 +2493,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) return ret; } +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *dir, u64 objectid, + const char *name, int name_len) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_dir_item *di; + struct btrfs_key key; + u64 index; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, + objectid, root->root_key.objectid, + dir->i_ino, &index, name, name_len); + if (ret < 0) { + BUG_ON(ret != -ENOENT); + di = btrfs_search_dir_index_item(root, path, dir->i_ino, + name, name_len); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(root, path); + index = key.offset; + } + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, + index, name, name_len, -1); + BUG_ON(!di || IS_ERR(di)); + + leaf = path->nodes[0]; + btrfs_dir_item_key_to_cpu(leaf, di, &key); + WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); + ret = btrfs_delete_one_dir_name(trans, root, path, di); + BUG_ON(ret); + btrfs_release_path(root, path); + + btrfs_i_size_write(dir, dir->i_size - name_len * 2); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + ret = btrfs_update_inode(trans, root, dir); + BUG_ON(ret); + dir->i_sb->s_dirt = 1; + + btrfs_free_path(path); + return 0; +} + static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; @@ -2361,29 +2565,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) struct btrfs_trans_handle *trans; unsigned long nr = 0; - /* - * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir - * the root of a subvolume or snapshot - */ if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || - inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { + inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - } trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, dir); + if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + err = btrfs_unlink_subvol(trans, root, dir, + BTRFS_I(inode)->location.objectid, + dentry->d_name.name, + dentry->d_name.len); + goto out; + } + err = btrfs_orphan_add(trans, inode); if (err) - goto fail_trans; + goto out; /* now the directory is empty */ err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); if (!err) btrfs_i_size_write(inode, 0); - -fail_trans: +out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -2826,12 +3032,22 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) if ((offset & (blocksize - 1)) == 0) goto out; + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); + if (ret) + goto out; + + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) + goto out; ret = -ENOMEM; again: page = grab_cache_page(mapping, index); - if (!page) + if (!page) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); goto out; + } page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -2864,7 +3080,16 @@ again: goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + GFP_NOFS); + + ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + if (ret) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + goto out_unlock; + } + ret = 0; if (offset != PAGE_CACHE_SIZE) { kaddr = kmap(page); @@ -2877,6 +3102,9 @@ again: unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + if (ret) + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); unlock_page(page); page_cache_release(page); out: @@ -2895,17 +3123,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) u64 last_byte; u64 cur_offset; u64 hole_size; - int err; + int err = 0; if (size <= hole_start) return 0; - err = btrfs_check_metadata_free_space(root); + err = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (err) return err; - btrfs_truncate_page(inode->i_mapping, inode->i_size); - while (1) { struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, @@ -2935,15 +3161,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) cur_offset, cur_offset + hole_size, block_end, - cur_offset, &hint_byte); + cur_offset, &hint_byte, 1); + if (err) + break; + + err = btrfs_reserve_metadata_space(root, 1); if (err) break; + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); + btrfs_unreserve_metadata_space(root, 1); } free_extent_map(em); cur_offset = last_byte; @@ -3003,6 +3235,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (inode->i_nlink > 0) { + BUG_ON(btrfs_root_refs(&root->root_item) != 0); + goto no_delete; + } + btrfs_i_size_write(inode, 0); trans = btrfs_join_transaction(root, 1); @@ -3070,29 +3307,67 @@ out_err: * is kind of like crossing a mount point. */ static int fixup_tree_root_location(struct btrfs_root *root, - struct btrfs_key *location, - struct btrfs_root **sub_root, - struct dentry *dentry) + struct inode *dir, + struct dentry *dentry, + struct btrfs_key *location, + struct btrfs_root **sub_root) { - struct btrfs_root_item *ri; + struct btrfs_path *path; + struct btrfs_root *new_root; + struct btrfs_root_ref *ref; + struct extent_buffer *leaf; + int ret; + int err = 0; - if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) - return 0; - if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) - return 0; + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } - *sub_root = btrfs_read_fs_root(root->fs_info, location, - dentry->d_name.name, - dentry->d_name.len); - if (IS_ERR(*sub_root)) - return PTR_ERR(*sub_root); + err = -ENOENT; + ret = btrfs_find_root_ref(root->fs_info->tree_root, path, + BTRFS_I(dir)->root->root_key.objectid, + location->objectid); + if (ret) { + if (ret < 0) + err = ret; + goto out; + } - ri = &(*sub_root)->root_item; - location->objectid = btrfs_root_dirid(ri); - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - location->offset = 0; + leaf = path->nodes[0]; + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); + if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || + btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) + goto out; - return 0; + ret = memcmp_extent_buffer(leaf, dentry->d_name.name, + (unsigned long)(ref + 1), + dentry->d_name.len); + if (ret) + goto out; + + btrfs_release_path(root->fs_info->tree_root, path); + + new_root = btrfs_read_fs_root_no_name(root->fs_info, location); + if (IS_ERR(new_root)) { + err = PTR_ERR(new_root); + goto out; + } + + if (btrfs_root_refs(&new_root->root_item) == 0) { + err = -ENOENT; + goto out; + } + + *sub_root = new_root; + location->objectid = btrfs_root_dirid(&new_root->root_item); + location->type = BTRFS_INODE_ITEM_KEY; + location->offset = 0; + err = 0; +out: + btrfs_free_path(path); + return err; } static void inode_tree_add(struct inode *inode) @@ -3101,11 +3376,13 @@ static void inode_tree_add(struct inode *inode) struct btrfs_inode *entry; struct rb_node **p; struct rb_node *parent; - again: p = &root->inode_tree.rb_node; parent = NULL; + if (hlist_unhashed(&inode->i_hash)) + return; + spin_lock(&root->inode_lock); while (*p) { parent = *p; @@ -3132,13 +3409,87 @@ again: static void inode_tree_del(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; + int empty = 0; spin_lock(&root->inode_lock); if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) { rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + empty = RB_EMPTY_ROOT(&root->inode_tree); } spin_unlock(&root->inode_lock); + + if (empty && btrfs_root_refs(&root->root_item) == 0) { + synchronize_srcu(&root->fs_info->subvol_srcu); + spin_lock(&root->inode_lock); + empty = RB_EMPTY_ROOT(&root->inode_tree); + spin_unlock(&root->inode_lock); + if (empty) + btrfs_add_dead_root(root); + } +} + +int btrfs_invalidate_inodes(struct btrfs_root *root) +{ + struct rb_node *node; + struct rb_node *prev; + struct btrfs_inode *entry; + struct inode *inode; + u64 objectid = 0; + + WARN_ON(btrfs_root_refs(&root->root_item) != 0); + + spin_lock(&root->inode_lock); +again: + node = root->inode_tree.rb_node; + prev = NULL; + while (node) { + prev = node; + entry = rb_entry(node, struct btrfs_inode, rb_node); + + if (objectid < entry->vfs_inode.i_ino) + node = node->rb_left; + else if (objectid > entry->vfs_inode.i_ino) + node = node->rb_right; + else + break; + } + if (!node) { + while (prev) { + entry = rb_entry(prev, struct btrfs_inode, rb_node); + if (objectid <= entry->vfs_inode.i_ino) { + node = prev; + break; + } + prev = rb_next(prev); + } + } + while (node) { + entry = rb_entry(node, struct btrfs_inode, rb_node); + objectid = entry->vfs_inode.i_ino + 1; + inode = igrab(&entry->vfs_inode); + if (inode) { + spin_unlock(&root->inode_lock); + if (atomic_read(&inode->i_count) > 1) + d_prune_aliases(inode); + /* + * btrfs_drop_inode will remove it from + * the inode cache when its usage count + * hits zero. + */ + iput(inode); + cond_resched(); + spin_lock(&root->inode_lock); + goto again; + } + + if (cond_resched_lock(&root->inode_lock)) + goto again; + + node = rb_next(node); + } + spin_unlock(&root->inode_lock); + return 0; } static noinline void init_btrfs_i(struct inode *inode) @@ -3148,6 +3499,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; + bi->last_sub_trans = 0; bi->logged_trans = 0; bi->delalloc_bytes = 0; bi->reserved_bytes = 0; @@ -3225,15 +3577,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, return inode; } +static struct inode *new_simple_dir(struct super_block *s, + struct btrfs_key *key, + struct btrfs_root *root) +{ + struct inode *inode = new_inode(s); + + if (!inode) + return ERR_PTR(-ENOMEM); + + init_btrfs_i(inode); + + BTRFS_I(inode)->root = root; + memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); + BTRFS_I(inode)->dummy_inode = 1; + + inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + + return inode; +} + struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) { struct inode *inode; - struct btrfs_inode *bi = BTRFS_I(dir); - struct btrfs_root *root = bi->root; + struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; struct btrfs_key location; + int index; int ret; + dentry->d_op = &btrfs_dentry_operations; + if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); @@ -3242,29 +3620,52 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (ret < 0) return ERR_PTR(ret); - inode = NULL; - if (location.objectid) { - ret = fixup_tree_root_location(root, &location, &sub_root, - dentry); - if (ret < 0) - return ERR_PTR(ret); - if (ret > 0) - return ERR_PTR(-ENOENT); + if (location.objectid == 0) + return NULL; + + if (location.type == BTRFS_INODE_ITEM_KEY) { + inode = btrfs_iget(dir->i_sb, &location, root); + return inode; + } + + BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); + + index = srcu_read_lock(&root->fs_info->subvol_srcu); + ret = fixup_tree_root_location(root, dir, dentry, + &location, &sub_root); + if (ret < 0) { + if (ret != -ENOENT) + inode = ERR_PTR(ret); + else + inode = new_simple_dir(dir->i_sb, &location, sub_root); + } else { inode = btrfs_iget(dir->i_sb, &location, sub_root); - if (IS_ERR(inode)) - return ERR_CAST(inode); } + srcu_read_unlock(&root->fs_info->subvol_srcu, index); + return inode; } +static int btrfs_dentry_delete(struct dentry *dentry) +{ + struct btrfs_root *root; + + if (!dentry->d_inode && !IS_ROOT(dentry)) + dentry = dentry->d_parent; + + if (dentry->d_inode) { + root = BTRFS_I(dentry->d_inode)->root; + if (btrfs_root_refs(&root->root_item) == 0) + return 1; + } + return 0; +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct inode *inode; - if (dentry->d_name.len > BTRFS_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - inode = btrfs_lookup_dentry(dir, dentry); if (IS_ERR(inode)) return ERR_CAST(inode); @@ -3603,9 +4004,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if (ret != 0) goto fail; - if (objectid > root->highest_inode) - root->highest_inode = objectid; - inode->i_uid = current_fsuid(); if (dir && (dir->i_mode & S_ISGID)) { @@ -3673,26 +4071,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, struct inode *parent_inode, struct inode *inode, const char *name, int name_len, int add_backref, u64 index) { - int ret; + int ret = 0; struct btrfs_key key; struct btrfs_root *root = BTRFS_I(parent_inode)->root; - key.objectid = inode->i_ino; - btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); - key.offset = 0; + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); + } else { + key.objectid = inode->i_ino; + btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); + key.offset = 0; + } + + if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, + key.objectid, root->root_key.objectid, + parent_inode->i_ino, + index, name, name_len); + } else if (add_backref) { + ret = btrfs_insert_inode_ref(trans, root, + name, name_len, inode->i_ino, + parent_inode->i_ino, index); + } - ret = btrfs_insert_dir_item(trans, root, name, name_len, - parent_inode->i_ino, - &key, btrfs_inode_type(inode), - index); if (ret == 0) { - if (add_backref) { - ret = btrfs_insert_inode_ref(trans, root, - name, name_len, - inode->i_ino, - parent_inode->i_ino, - index); - } + ret = btrfs_insert_dir_item(trans, root, name, name_len, + parent_inode->i_ino, &key, + btrfs_inode_type(inode), index); + BUG_ON(ret); + btrfs_i_size_write(parent_inode, parent_inode->i_size + name_len * 2); parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -3732,11 +4139,18 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -3774,6 +4188,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -3794,10 +4209,18 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, u64 objectid; u64 index = 0; - err = btrfs_check_metadata_free_space(root); + /* + * 2 for inode item and ref + * 2 for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto fail; + return err; + trans = btrfs_start_transaction(root, 1); + if (!trans) + goto fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -3838,6 +4261,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -3860,10 +4284,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; - btrfs_inc_nlink(inode); - err = btrfs_check_metadata_free_space(root); + /* + * 1 item for inode ref + * 2 items for dir items + */ + err = btrfs_reserve_metadata_space(root, 3); if (err) - goto fail; + return err; + + btrfs_inc_nlink(inode); + err = btrfs_set_inode_index(dir, &index); if (err) goto fail; @@ -3875,20 +4305,19 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, err = btrfs_add_nondir(trans, dentry, inode, 1, index); - if (err) - drop_inode = 1; - - btrfs_update_inode_block_group(trans, dir); - err = btrfs_update_inode(trans, root, inode); - - if (err) + if (err) { drop_inode = 1; + } else { + btrfs_update_inode_block_group(trans, dir); + err = btrfs_update_inode(trans, root, inode); + BUG_ON(err); + btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); + } nr = trans->blocks_used; - - btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); btrfs_end_transaction_throttle(trans, root); fail: + btrfs_unreserve_metadata_space(root, 3); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -3908,17 +4337,21 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode and ref + * 2 items for dir items + * 1 for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_unlock; + return err; trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, dir); - - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (!trans) { + err = -ENOMEM; goto out_unlock; } + btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { @@ -3967,6 +4400,7 @@ out_fail: btrfs_end_transaction_throttle(trans, root); out_unlock: + btrfs_unreserve_metadata_space(root, 5); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -4064,11 +4498,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, int compressed; again: - spin_lock(&em_tree->lock); + read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); if (em) em->bdev = root->fs_info->fs_devices->latest_bdev; - spin_unlock(&em_tree->lock); + read_unlock(&em_tree->lock); if (em) { if (em->start > start || em->start + em->len <= start) @@ -4215,6 +4649,11 @@ again: map = kmap(page); read_extent_buffer(leaf, map + pg_offset, ptr, copy_size); + if (pg_offset + copy_size < PAGE_CACHE_SIZE) { + memset(map + pg_offset + copy_size, 0, + PAGE_CACHE_SIZE - pg_offset - + copy_size); + } kunmap(page); } flush_dcache_page(page); @@ -4259,7 +4698,7 @@ insert: } err = 0; - spin_lock(&em_tree->lock); + write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); /* it is possible that someone inserted the extent into the tree * while we had the lock dropped. It is also possible that @@ -4299,7 +4738,7 @@ insert: err = 0; } } - spin_unlock(&em_tree->lock); + write_unlock(&em_tree->lock); out: if (path) btrfs_free_path(path); @@ -4398,13 +4837,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + + /* + * we have the page locked, so new writeback can't start, + * and the dirty bit won't be cleared while we are here. + * + * Wait for IO on this page so that we can safely clear + * the PagePrivate2 bit and do ordered accounting + */ wait_on_page_writeback(page); + tree = &BTRFS_I(page->mapping->host)->io_tree; if (offset) { btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent(tree, page_start, page_end, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); @@ -4415,16 +4862,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) */ clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_LOCKED, 1, 0, GFP_NOFS); - btrfs_finish_ordered_io(page->mapping->host, - page_start, page_end); + EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, + NULL, GFP_NOFS); + /* + * whoever cleared the private bit is responsible + * for the finish_ordered_io + */ + if (TestClearPagePrivate2(page)) { + btrfs_finish_ordered_io(page->mapping->host, + page_start, page_end); + } btrfs_put_ordered_extent(ordered); lock_extent(tree, page_start, page_end, GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_ORDERED, - 1, 1, GFP_NOFS); + EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -4473,6 +4926,13 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) goto out; } + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) { + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + ret = VM_FAULT_SIGBUS; + goto out; + } + ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ again: lock_page(page); @@ -4504,7 +4964,24 @@ again: goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + /* + * XXX - page_mkwrite gets called every time the page is dirtied, even + * if it was already dirty, so for space accounting reasons we need to + * clear any delalloc bits for the range we are fixing to save. There + * is probably a better way to do this, but for now keep consistent with + * prepare_pages in the normal write path. + */ + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, + GFP_NOFS); + + ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + if (ret) { + unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + ret = VM_FAULT_SIGBUS; + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + goto out_unlock; + } ret = 0; /* page is wholly or partially inside EOF */ @@ -4521,11 +4998,17 @@ again: } ClearPageChecked(page); set_page_dirty(page); + SetPageUptodate(page); + + BTRFS_I(inode)->last_trans = root->fs_info->generation; + BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); out_unlock: + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + if (!ret) + return VM_FAULT_LOCKED; unlock_page(page); out: return ret; @@ -4544,7 +5027,9 @@ static void btrfs_truncate(struct inode *inode) if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return; - btrfs_truncate_page(inode->i_mapping, inode->i_size); + ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); + if (ret) + return; btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); trans = btrfs_start_transaction(root, 1); @@ -4594,11 +5079,11 @@ out: * create a new subvolume directory/inode (helper for the ioctl). */ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, - struct btrfs_root *new_root, struct dentry *dentry, + struct btrfs_root *new_root, u64 new_dirid, u64 alloc_hint) { struct inode *inode; - int error; + int err; u64 index = 0; inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, @@ -4611,11 +5096,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, inode->i_nlink = 1; btrfs_i_size_write(inode, 0); - error = btrfs_update_inode(trans, new_root, inode); - if (error) - return error; + err = btrfs_update_inode(trans, new_root, inode); + BUG_ON(err); - d_instantiate(dentry, inode); + iput(inode); return 0; } @@ -4640,7 +5124,11 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->last_trans = 0; + ei->last_sub_trans = 0; ei->logged_trans = 0; + ei->outstanding_extents = 0; + ei->reserved_extents = 0; + spin_lock_init(&ei->accounting_lock); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); @@ -4693,6 +5181,16 @@ void btrfs_destroy_inode(struct inode *inode) kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); } +void btrfs_drop_inode(struct inode *inode) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + + if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) + generic_delete_inode(inode); + else + generic_drop_inode(inode); +} + static void init_once(void *foo) { struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -4761,31 +5259,37 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(old_dir)->root; + struct btrfs_root *dest = BTRFS_I(new_dir)->root; struct inode *new_inode = new_dentry->d_inode; struct inode *old_inode = old_dentry->d_inode; struct timespec ctime = CURRENT_TIME; u64 index = 0; + u64 root_objectid; int ret; - /* we're not allowed to rename between subvolumes */ - if (BTRFS_I(old_inode)->root->root_key.objectid != - BTRFS_I(new_dir)->root->root_key.objectid) + if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return -EPERM; + + /* we only allow rename subvolume link between subvolumes */ + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) return -EXDEV; + if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || + (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) + return -ENOTEMPTY; + if (S_ISDIR(old_inode->i_mode) && new_inode && - new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { + new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - } - /* to rename a snapshot or subvolume, we need to juggle the - * backrefs. This isn't coded yet + /* + * 2 items for dir items + * 1 item for orphan entry + * 1 item for ref */ - if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) - return -EXDEV; - - ret = btrfs_check_metadata_free_space(root); + ret = btrfs_reserve_metadata_space(root, 4); if (ret) - goto out_unlock; + return ret; /* * we're using rename to replace one file with another. @@ -4796,8 +5300,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) filemap_flush(old_inode->i_mapping); + /* close the racy window with snapshot create/destroy ioctl */ + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + down_read(&root->fs_info->subvol_sem); + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, new_dir); + + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + ret = btrfs_set_inode_index(new_dir, &index); + if (ret) + goto out_fail; + + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + /* force full log commit if subvolume involved. */ + root->fs_info->last_trans_log_full_commit = trans->transid; + } else { + ret = btrfs_insert_inode_ref(trans, dest, + new_dentry->d_name.name, + new_dentry->d_name.len, + old_inode->i_ino, + new_dir->i_ino, index); + if (ret) + goto out_fail; + /* + * this is an ugly little race, but the rename is required + * to make sure that if we crash, the inode is either at the + * old name or the new one. pinning the log transaction lets + * us make sure we don't allow a log commit to come in after + * we unlink the name but before we add the new name back in. + */ + btrfs_pin_log_trans(root); + } /* * make sure the inode gets flushed if it is replacing * something. @@ -4807,18 +5343,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, btrfs_add_ordered_operation(trans, root, old_inode); } - /* - * this is an ugly little race, but the rename is required to make - * sure that if we crash, the inode is either at the old name - * or the new one. pinning the log transaction lets us make sure - * we don't allow a log commit to come in after we unlink the - * name but before we add the new name back in. - */ - btrfs_pin_log_trans(root); - - btrfs_set_trans_block_group(trans, new_dir); - - btrfs_inc_nlink(old_dentry->d_inode); old_dir->i_ctime = old_dir->i_mtime = ctime; new_dir->i_ctime = new_dir->i_mtime = ctime; old_inode->i_ctime = ctime; @@ -4826,47 +5350,60 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dentry->d_parent != new_dentry->d_parent) btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); - ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, - old_dentry->d_name.name, - old_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { + root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; + ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, + old_dentry->d_name.name, + old_dentry->d_name.len); + } else { + btrfs_inc_nlink(old_dentry->d_inode); + ret = btrfs_unlink_inode(trans, root, old_dir, + old_dentry->d_inode, + old_dentry->d_name.name, + old_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode) { new_inode->i_ctime = CURRENT_TIME; - ret = btrfs_unlink_inode(trans, root, new_dir, - new_dentry->d_inode, - new_dentry->d_name.name, - new_dentry->d_name.len); - if (ret) - goto out_fail; + if (unlikely(new_inode->i_ino == + BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { + root_objectid = BTRFS_I(new_inode)->location.objectid; + ret = btrfs_unlink_subvol(trans, dest, new_dir, + root_objectid, + new_dentry->d_name.name, + new_dentry->d_name.len); + BUG_ON(new_inode->i_nlink == 0); + } else { + ret = btrfs_unlink_inode(trans, dest, new_dir, + new_dentry->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len); + } + BUG_ON(ret); if (new_inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, new_dentry->d_inode); - if (ret) - goto out_fail; + BUG_ON(ret); } - } - ret = btrfs_set_inode_index(new_dir, &index); - if (ret) - goto out_fail; - ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, - old_inode, new_dentry->d_name.name, - new_dentry->d_name.len, 1, index); - if (ret) - goto out_fail; + ret = btrfs_add_link(trans, new_dir, old_inode, + new_dentry->d_name.name, + new_dentry->d_name.len, 0, index); + BUG_ON(ret); - btrfs_log_new_name(trans, old_inode, old_dir, - new_dentry->d_parent); + if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { + btrfs_log_new_name(trans, old_inode, old_dir, + new_dentry->d_parent); + btrfs_end_log_trans(root); + } out_fail: - - /* this btrfs_end_log_trans just allows the current - * log-sub transaction to complete - */ - btrfs_end_log_trans(root); btrfs_end_transaction_throttle(trans, root); -out_unlock: + + if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) + up_read(&root->fs_info->subvol_sem); + + btrfs_unreserve_metadata_space(root, 4); return ret; } @@ -4938,11 +5475,18 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; - err = btrfs_check_metadata_free_space(root); + /* + * 2 items for inode item and ref + * 2 items for dir items + * 1 item for xattr if selinux is on + */ + err = btrfs_reserve_metadata_space(root, 5); if (err) - goto out_fail; + return err; trans = btrfs_start_transaction(root, 1); + if (!trans) + goto out_fail; btrfs_set_trans_block_group(trans, dir); err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); @@ -5023,6 +5567,7 @@ out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); out_fail: + btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -5044,6 +5589,11 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + goto out; + ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); @@ -5058,9 +5608,12 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); + btrfs_drop_extent_cache(inode, cur_offset, + cur_offset + ins.offset -1, 0); num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; + btrfs_unreserve_metadata_space(root, 1); } out: if (cur_offset > start) { @@ -5223,7 +5776,8 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, }; -static struct file_operations btrfs_dir_file_operations = { + +static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = btrfs_real_readdir, @@ -5245,6 +5799,8 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, + .merge_extent_hook = btrfs_merge_extent_hook, + .split_extent_hook = btrfs_split_extent_hook, }; /* @@ -5269,6 +5825,7 @@ static const struct address_space_operations btrfs_aops = { .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, .set_page_dirty = btrfs_set_page_dirty, + .error_remove_page = generic_error_remove_page, }; static const struct address_space_operations btrfs_symlink_aops = { @@ -5309,3 +5866,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, }; + +const struct dentry_operations btrfs_dentry_operations = { + .d_delete = btrfs_dentry_delete, +}; |