diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 15:31:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 15:31:36 -0700 |
commit | 53c566625fb872e7826a237f0f5c21458028e94a (patch) | |
tree | 8ef9990ed2124f085442bc5a44c3f5212bf4002d /fs/btrfs/send.c | |
parent | 34917f9713905a937816ebb7ee5f25bef7a6441c (diff) | |
parent | 00fdf13a2e9f313a044288aa59d3b8ec29ff904a (diff) | |
download | op-kernel-dev-53c566625fb872e7826a237f0f5c21458028e94a.zip op-kernel-dev-53c566625fb872e7826a237f0f5c21458028e94a.tar.gz |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs changes from Chris Mason:
"This is a pretty long stream of bug fixes and performance fixes.
Qu Wenruo has replaced the btrfs async threads with regular kernel
workqueues. We'll keep an eye out for performance differences, but
it's nice to be using more generic code for this.
We still have some corruption fixes and other patches coming in for
the merge window, but this batch is tested and ready to go"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (108 commits)
Btrfs: fix a crash of clone with inline extents's split
btrfs: fix uninit variable warning
Btrfs: take into account total references when doing backref lookup
Btrfs: part 2, fix incremental send's decision to delay a dir move/rename
Btrfs: fix incremental send's decision to delay a dir move/rename
Btrfs: remove unnecessary inode generation lookup in send
Btrfs: fix race when updating existing ref head
btrfs: Add trace for btrfs_workqueue alloc/destroy
Btrfs: less fs tree lock contention when using autodefrag
Btrfs: return EPERM when deleting a default subvolume
Btrfs: add missing kfree in btrfs_destroy_workqueue
Btrfs: cache extent states in defrag code path
Btrfs: fix deadlock with nested trans handles
Btrfs: fix possible empty list access when flushing the delalloc inodes
Btrfs: split the global ordered extents mutex
Btrfs: don't flush all delalloc inodes when we doesn't get s_umount lock
Btrfs: reclaim delalloc metadata more aggressively
Btrfs: remove unnecessary lock in may_commit_transaction()
Btrfs: remove the unnecessary flush when preparing the pages
Btrfs: just do dirty page flush for the inode with compression before direct IO
...
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r-- | fs/btrfs/send.c | 821 |
1 files changed, 557 insertions, 264 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde971..9b6da9d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -51,15 +51,18 @@ struct fs_path { struct { char *start; char *end; - char *prepared; char *buf; - int buf_len; - unsigned int reversed:1; - unsigned int virtual_mem:1; + unsigned short buf_len:15; + unsigned short reversed:1; char inline_buf[]; }; - char pad[PAGE_SIZE]; + /* + * Average path length does not exceed 200 bytes, we'll have + * better packing in the slab and higher chance to satisfy + * a allocation later during send. + */ + char pad[256]; }; }; #define FS_PATH_INLINE_SIZE \ @@ -109,6 +112,7 @@ struct send_ctx { int cur_inode_deleted; u64 cur_inode_size; u64 cur_inode_mode; + u64 cur_inode_rdev; u64 cur_inode_last_extent; u64 send_progress; @@ -120,6 +124,8 @@ struct send_ctx { struct list_head name_cache_list; int name_cache_size; + struct file_ra_state ra; + char *read_buf; /* @@ -175,6 +181,47 @@ struct send_ctx { * own move/rename can be performed. */ struct rb_root waiting_dir_moves; + + /* + * A directory that is going to be rm'ed might have a child directory + * which is in the pending directory moves index above. In this case, + * the directory can only be removed after the move/rename of its child + * is performed. Example: + * + * Parent snapshot: + * + * . (ino 256) + * |-- a/ (ino 257) + * |-- b/ (ino 258) + * |-- c/ (ino 259) + * | |-- x/ (ino 260) + * | + * |-- y/ (ino 261) + * + * Send snapshot: + * + * . (ino 256) + * |-- a/ (ino 257) + * |-- b/ (ino 258) + * |-- YY/ (ino 261) + * |-- x/ (ino 260) + * + * Sequence of steps that lead to the send snapshot: + * rm -f /a/b/c/foo.txt + * mv /a/b/y /a/b/YY + * mv /a/b/c/x /a/b/YY + * rmdir /a/b/c + * + * When the child is processed, its move/rename is delayed until its + * parent is processed (as explained above), but all other operations + * like update utimes, chown, chgrp, etc, are performed and the paths + * that it uses for those operations must use the orphanized name of + * its parent (the directory we're going to rm later), so we need to + * memorize that name. + * + * Indexed by the inode number of the directory to be deleted. + */ + struct rb_root orphan_dirs; }; struct pending_dir_move { @@ -189,6 +236,18 @@ struct pending_dir_move { struct waiting_dir_move { struct rb_node node; u64 ino; + /* + * There might be some directory that could not be removed because it + * was waiting for this directory inode to be moved first. Therefore + * after this directory is moved, we can try to rmdir the ino rmdir_ino. + */ + u64 rmdir_ino; +}; + +struct orphan_dir_info { + struct rb_node node; + u64 ino; + u64 gen; }; struct name_cache_entry { @@ -214,6 +273,11 @@ struct name_cache_entry { static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); +static struct waiting_dir_move * +get_waiting_dir_move(struct send_ctx *sctx, u64 ino); + +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); + static int need_send_hole(struct send_ctx *sctx) { return (sctx->parent_root && !sctx->cur_inode_new && @@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) if (!p) return NULL; p->reversed = 0; - p->virtual_mem = 0; p->buf = p->inline_buf; p->buf_len = FS_PATH_INLINE_SIZE; fs_path_reset(p); @@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) { if (!p) return; - if (p->buf != p->inline_buf) { - if (p->virtual_mem) - vfree(p->buf); - else - kfree(p->buf); - } + if (p->buf != p->inline_buf) + kfree(p->buf); kfree(p); } @@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) path_len = p->end - p->start; old_buf_len = p->buf_len; - len = PAGE_ALIGN(len); - - if (p->buf == p->inline_buf) { - tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); - if (!tmp_buf) { - tmp_buf = vmalloc(len); - if (!tmp_buf) - return -ENOMEM; - p->virtual_mem = 1; - } - memcpy(tmp_buf, p->buf, p->buf_len); - p->buf = tmp_buf; - p->buf_len = len; - } else { - if (p->virtual_mem) { - tmp_buf = vmalloc(len); - if (!tmp_buf) - return -ENOMEM; - memcpy(tmp_buf, p->buf, p->buf_len); - vfree(p->buf); - } else { - tmp_buf = krealloc(p->buf, len, GFP_NOFS); - if (!tmp_buf) { - tmp_buf = vmalloc(len); - if (!tmp_buf) - return -ENOMEM; - memcpy(tmp_buf, p->buf, p->buf_len); - kfree(p->buf); - p->virtual_mem = 1; - } - } - p->buf = tmp_buf; - p->buf_len = len; - } + + /* + * First time the inline_buf does not suffice + */ + if (p->buf == p->inline_buf) + tmp_buf = kmalloc(len, GFP_NOFS); + else + tmp_buf = krealloc(p->buf, len, GFP_NOFS); + if (!tmp_buf) + return -ENOMEM; + p->buf = tmp_buf; + /* + * The real size of the buffer is bigger, this will let the fast path + * happen most of the time + */ + p->buf_len = ksize(p->buf); + if (p->reversed) { tmp_buf = p->buf + old_buf_len - path_len - 1; p->end = p->buf + p->buf_len - 1; @@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) return 0; } -static int fs_path_prepare_for_add(struct fs_path *p, int name_len) +static int fs_path_prepare_for_add(struct fs_path *p, int name_len, + char **prepared) { int ret; int new_len; @@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) if (p->start != p->end) *--p->start = '/'; p->start -= name_len; - p->prepared = p->start; + *prepared = p->start; } else { if (p->start != p->end) *p->end++ = '/'; - p->prepared = p->end; + *prepared = p->end; p->end += name_len; *p->end = 0; } @@ -370,12 +413,12 @@ out: static int fs_path_add(struct fs_path *p, const char *name, int name_len) { int ret; + char *prepared; - ret = fs_path_prepare_for_add(p, name_len); + ret = fs_path_prepare_for_add(p, name_len, &prepared); if (ret < 0) goto out; - memcpy(p->prepared, name, name_len); - p->prepared = NULL; + memcpy(prepared, name, name_len); out: return ret; @@ -384,12 +427,12 @@ out: static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) { int ret; + char *prepared; - ret = fs_path_prepare_for_add(p, p2->end - p2->start); + ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); if (ret < 0) goto out; - memcpy(p->prepared, p2->start, p2->end - p2->start); - p->prepared = NULL; + memcpy(prepared, p2->start, p2->end - p2->start); out: return ret; @@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, unsigned long off, int len) { int ret; + char *prepared; - ret = fs_path_prepare_for_add(p, len); + ret = fs_path_prepare_for_add(p, len, &prepared); if (ret < 0) goto out; - read_extent_buffer(eb, p->prepared, off, len); - p->prepared = NULL; + read_extent_buffer(eb, prepared, off, len); out: return ret; @@ -915,9 +958,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_dir_item *di; struct btrfs_key di_key; char *buf = NULL; - char *buf2 = NULL; - int buf_len; - int buf_virtual = 0; + const int buf_len = PATH_MAX; u32 name_len; u32 data_len; u32 cur; @@ -927,7 +968,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, int num; u8 type; - buf_len = PAGE_SIZE; buf = kmalloc(buf_len, GFP_NOFS); if (!buf) { ret = -ENOMEM; @@ -949,30 +989,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, type = btrfs_dir_type(eb, di); btrfs_dir_item_key_to_cpu(eb, di, &di_key); + /* + * Path too long + */ if (name_len + data_len > buf_len) { - buf_len = PAGE_ALIGN(name_len + data_len); - if (buf_virtual) { - buf2 = vmalloc(buf_len); - if (!buf2) { - ret = -ENOMEM; - goto out; - } - vfree(buf); - } else { - buf2 = krealloc(buf, buf_len, GFP_NOFS); - if (!buf2) { - buf2 = vmalloc(buf_len); - if (!buf2) { - ret = -ENOMEM; - goto out; - } - kfree(buf); - buf_virtual = 1; - } - } - - buf = buf2; - buf2 = NULL; + ret = -ENAMETOOLONG; + goto out; } read_extent_buffer(eb, buf, (unsigned long)(di + 1), @@ -995,10 +1017,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, } out: - if (buf_virtual) - vfree(buf); - else - kfree(buf); + kfree(buf); return ret; } @@ -1292,8 +1311,6 @@ static int find_extent_clone(struct send_ctx *sctx, extent_item_pos = logical - found_key.objectid; else extent_item_pos = 0; - - extent_item_pos = logical - found_key.objectid; ret = iterate_extent_inodes(sctx->send_root->fs_info, found_key.objectid, extent_item_pos, 1, __iterate_backrefs, backref_ctx); @@ -1418,11 +1435,7 @@ static int gen_unique_name(struct send_ctx *sctx, while (1) { len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", ino, gen, idx); - if (len >= sizeof(tmp)) { - /* should really not happen */ - ret = -EOVERFLOW; - goto out; - } + ASSERT(len < sizeof(tmp)); di = btrfs_lookup_dir_item(NULL, sctx->send_root, path, BTRFS_FIRST_FREE_OBJECTID, @@ -1898,13 +1911,20 @@ static void name_cache_delete(struct send_ctx *sctx, nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)nce->ino); - BUG_ON(!nce_head); + if (!nce_head) { + btrfs_err(sctx->send_root->fs_info, + "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", + nce->ino, sctx->name_cache_size); + } list_del(&nce->radix_list); list_del(&nce->list); sctx->name_cache_size--; - if (list_empty(nce_head)) { + /* + * We may not get to the final release of nce_head if the lookup fails + */ + if (nce_head && list_empty(nce_head)) { radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); kfree(nce_head); } @@ -1977,7 +1997,6 @@ static void name_cache_free(struct send_ctx *sctx) */ static int __get_cur_name_and_parent(struct send_ctx *sctx, u64 ino, u64 gen, - int skip_name_cache, u64 *parent_ino, u64 *parent_gen, struct fs_path *dest) @@ -1987,8 +2006,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, struct btrfs_path *path = NULL; struct name_cache_entry *nce = NULL; - if (skip_name_cache) - goto get_ref; /* * First check if we already did a call to this function with the same * ino/gen. If yes, check if the cache entry is still up-to-date. If yes @@ -2033,12 +2050,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, goto out_cache; } -get_ref: /* * Depending on whether the inode was already processed or not, use * send_root or parent_root for ref lookup. */ - if (ino < sctx->send_progress && !skip_name_cache) + if (ino < sctx->send_progress) ret = get_first_ref(sctx->send_root, ino, parent_ino, parent_gen, dest); else @@ -2062,8 +2078,6 @@ get_ref: goto out; ret = 1; } - if (skip_name_cache) - goto out; out_cache: /* @@ -2131,9 +2145,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, u64 parent_inode = 0; u64 parent_gen = 0; int stop = 0; - u64 start_ino = ino; - u64 start_gen = gen; - int skip_name_cache = 0; name = fs_path_alloc(); if (!name) { @@ -2141,31 +2152,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, goto out; } - if (is_waiting_for_move(sctx, ino)) - skip_name_cache = 1; - -again: dest->reversed = 1; fs_path_reset(dest); while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { fs_path_reset(name); - ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, - &parent_inode, &parent_gen, name); + if (is_waiting_for_rm(sctx, ino)) { + ret = gen_unique_name(sctx, ino, gen, name); + if (ret < 0) + goto out; + ret = fs_path_add_path(dest, name); + break; + } + + if (is_waiting_for_move(sctx, ino)) { + ret = get_first_ref(sctx->parent_root, ino, + &parent_inode, &parent_gen, name); + } else { + ret = __get_cur_name_and_parent(sctx, ino, gen, + &parent_inode, + &parent_gen, name); + if (ret) + stop = 1; + } + if (ret < 0) goto out; - if (ret) - stop = 1; - - if (!skip_name_cache && - is_waiting_for_move(sctx, parent_inode)) { - ino = start_ino; - gen = start_gen; - stop = 0; - skip_name_cache = 1; - goto again; - } ret = fs_path_add_path(dest, name); if (ret < 0) @@ -2429,10 +2442,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); if (!p) return -ENOMEM; - ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, - NULL, &rdev); - if (ret < 0) - goto out; + if (ino != sctx->cur_ino) { + ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, + NULL, NULL, &rdev); + if (ret < 0) + goto out; + } else { + gen = sctx->cur_inode_gen; + mode = sctx->cur_inode_mode; + rdev = sctx->cur_inode_rdev; + } if (S_ISREG(mode)) { cmd = BTRFS_SEND_C_MKFILE; @@ -2512,17 +2531,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) key.objectid = dir; key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; + ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); + if (ret < 0) + goto out; + while (1) { - ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, - 1, 0); - if (ret < 0) - goto out; - if (!ret) { - eb = path->nodes[0]; - slot = path->slots[0]; - btrfs_item_key_to_cpu(eb, &found_key, slot); + eb = path->nodes[0]; + slot = path->slots[0]; + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(sctx->send_root, path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + break; + } + continue; } - if (ret || found_key.objectid != key.objectid || + + btrfs_item_key_to_cpu(eb, &found_key, slot); + if (found_key.objectid != key.objectid || found_key.type != key.type) { ret = 0; goto out; @@ -2537,8 +2565,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) goto out; } - key.offset = found_key.offset + 1; - btrfs_release_path(path); + path->slots[0]++; } out: @@ -2590,7 +2617,7 @@ struct recorded_ref { * everything mixed. So we first record all refs and later process them. * This function is a helper to record one ref. */ -static int record_ref(struct list_head *head, u64 dir, +static int __record_ref(struct list_head *head, u64 dir, u64 dir_gen, struct fs_path *path) { struct recorded_ref *ref; @@ -2676,12 +2703,78 @@ out: return ret; } +static struct orphan_dir_info * +add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +{ + struct rb_node **p = &sctx->orphan_dirs.rb_node; + struct rb_node *parent = NULL; + struct orphan_dir_info *entry, *odi; + + odi = kmalloc(sizeof(*odi), GFP_NOFS); + if (!odi) + return ERR_PTR(-ENOMEM); + odi->ino = dir_ino; + odi->gen = 0; + + while (*p) { + parent = *p; + entry = rb_entry(parent, struct orphan_dir_info, node); + if (dir_ino < entry->ino) { + p = &(*p)->rb_left; + } else if (dir_ino > entry->ino) { + p = &(*p)->rb_right; + } else { + kfree(odi); + return entry; + } + } + + rb_link_node(&odi->node, parent, p); + rb_insert_color(&odi->node, &sctx->orphan_dirs); + return odi; +} + +static struct orphan_dir_info * +get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) +{ + struct rb_node *n = sctx->orphan_dirs.rb_node; + struct orphan_dir_info *entry; + + while (n) { + entry = rb_entry(n, struct orphan_dir_info, node); + if (dir_ino < entry->ino) + n = n->rb_left; + else if (dir_ino > entry->ino) + n = n->rb_right; + else + return entry; + } + return NULL; +} + +static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) +{ + struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); + + return odi != NULL; +} + +static void free_orphan_dir_info(struct send_ctx *sctx, + struct orphan_dir_info *odi) +{ + if (!odi) + return; + rb_erase(&odi->node, &sctx->orphan_dirs); + kfree(odi); +} + /* * Returns 1 if a directory can be removed at this point in time. * We check this by iterating all dir items and checking if the inode behind * the dir item was already processed. */ -static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) +static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, + u64 send_progress) { int ret = 0; struct btrfs_root *root = sctx->parent_root; @@ -2704,31 +2797,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) key.objectid = dir; key.type = BTRFS_DIR_INDEX_KEY; key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; while (1) { - ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); - if (ret < 0) - goto out; - if (!ret) { - btrfs_item_key_to_cpu(path->nodes[0], &found_key, - path->slots[0]); + struct waiting_dir_move *dm; + + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; } - if (ret || found_key.objectid != key.objectid || - found_key.type != key.type) { + btrfs_item_key_to_cpu(path->nodes[0], &found_key, + path->slots[0]); + if (found_key.objectid != key.objectid || + found_key.type != key.type) break; - } di = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); + dm = get_waiting_dir_move(sctx, loc.objectid); + if (dm) { + struct orphan_dir_info *odi; + + odi = add_orphan_dir_info(sctx, dir); + if (IS_ERR(odi)) { + ret = PTR_ERR(odi); + goto out; + } + odi->gen = dir_gen; + dm->rmdir_ino = dir; + ret = 0; + goto out; + } + if (loc.objectid > send_progress) { ret = 0; goto out; } - btrfs_release_path(path); - key.offset = found_key.offset + 1; + path->slots[0]++; } ret = 1; @@ -2740,19 +2854,9 @@ out: static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) { - struct rb_node *n = sctx->waiting_dir_moves.rb_node; - struct waiting_dir_move *entry; + struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); - while (n) { - entry = rb_entry(n, struct waiting_dir_move, node); - if (ino < entry->ino) - n = n->rb_left; - else if (ino > entry->ino) - n = n->rb_right; - else - return 1; - } - return 0; + return entry != NULL; } static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) @@ -2765,6 +2869,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) if (!dm) return -ENOMEM; dm->ino = ino; + dm->rmdir_ino = 0; while (*p) { parent = *p; @@ -2784,31 +2889,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) return 0; } -static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) +static struct waiting_dir_move * +get_waiting_dir_move(struct send_ctx *sctx, u64 ino) { struct rb_node *n = sctx->waiting_dir_moves.rb_node; struct waiting_dir_move *entry; while (n) { entry = rb_entry(n, struct waiting_dir_move, node); - if (ino < entry->ino) { + if (ino < entry->ino) n = n->rb_left; - } else if (ino > entry->ino) { + else if (ino > entry->ino) n = n->rb_right; - } else { - rb_erase(&entry->node, &sctx->waiting_dir_moves); - kfree(entry); - return 0; - } + else + return entry; } - return -ENOENT; + return NULL; +} + +static void free_waiting_dir_move(struct send_ctx *sctx, + struct waiting_dir_move *dm) +{ + if (!dm) + return; + rb_erase(&dm->node, &sctx->waiting_dir_moves); + kfree(dm); } -static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) +static int add_pending_dir_move(struct send_ctx *sctx, + u64 ino, + u64 ino_gen, + u64 parent_ino) { struct rb_node **p = &sctx->pending_dir_moves.rb_node; struct rb_node *parent = NULL; - struct pending_dir_move *entry, *pm; + struct pending_dir_move *entry = NULL, *pm; struct recorded_ref *cur; int exists = 0; int ret; @@ -2817,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) if (!pm) return -ENOMEM; pm->parent_ino = parent_ino; - pm->ino = sctx->cur_ino; - pm->gen = sctx->cur_inode_gen; + pm->ino = ino; + pm->gen = ino_gen; INIT_LIST_HEAD(&pm->list); INIT_LIST_HEAD(&pm->update_refs); RB_CLEAR_NODE(&pm->node); @@ -2888,19 +3003,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) { struct fs_path *from_path = NULL; struct fs_path *to_path = NULL; + struct fs_path *name = NULL; u64 orig_progress = sctx->send_progress; struct recorded_ref *cur; + u64 parent_ino, parent_gen; + struct waiting_dir_move *dm = NULL; + u64 rmdir_ino = 0; int ret; + name = fs_path_alloc(); from_path = fs_path_alloc(); - if (!from_path) - return -ENOMEM; + if (!name || !from_path) { + ret = -ENOMEM; + goto out; + } - sctx->send_progress = pm->ino; - ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); + dm = get_waiting_dir_move(sctx, pm->ino); + ASSERT(dm); + rmdir_ino = dm->rmdir_ino; + free_waiting_dir_move(sctx, dm); + + ret = get_first_ref(sctx->parent_root, pm->ino, + &parent_ino, &parent_gen, name); if (ret < 0) goto out; + if (parent_ino == sctx->cur_ino) { + /* child only renamed, not moved */ + ASSERT(parent_gen == sctx->cur_inode_gen); + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, + from_path); + if (ret < 0) + goto out; + ret = fs_path_add_path(from_path, name); + if (ret < 0) + goto out; + } else { + /* child moved and maybe renamed too */ + sctx->send_progress = pm->ino; + ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); + if (ret < 0) + goto out; + } + + fs_path_free(name); + name = NULL; + to_path = fs_path_alloc(); if (!to_path) { ret = -ENOMEM; @@ -2908,9 +3056,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) } sctx->send_progress = sctx->cur_ino + 1; - ret = del_waiting_dir_move(sctx, pm->ino); - ASSERT(ret == 0); - ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); if (ret < 0) goto out; @@ -2919,6 +3064,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) if (ret < 0) goto out; + if (rmdir_ino) { + struct orphan_dir_info *odi; + + odi = get_orphan_dir_info(sctx, rmdir_ino); + if (!odi) { + /* already deleted */ + goto finish; + } + ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); + if (ret < 0) + goto out; + if (!ret) + goto finish; + + name = fs_path_alloc(); + if (!name) { + ret = -ENOMEM; + goto out; + } + ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); + if (ret < 0) + goto out; + ret = send_rmdir(sctx, name); + if (ret < 0) + goto out; + free_orphan_dir_info(sctx, odi); + } + +finish: ret = send_utimes(sctx, pm->ino, pm->gen); if (ret < 0) goto out; @@ -2928,12 +3102,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) * and old parent(s). */ list_for_each_entry(cur, &pm->update_refs, list) { + if (cur->dir == rmdir_ino) + continue; ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; } out: + fs_path_free(name); fs_path_free(from_path); fs_path_free(to_path); sctx->send_progress = orig_progress; @@ -3005,17 +3182,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, int ret; u64 ino = parent_ref->dir; u64 parent_ino_before, parent_ino_after; - u64 new_gen, old_gen; + u64 old_gen; struct fs_path *path_before = NULL; struct fs_path *path_after = NULL; int len1, len2; - - if (parent_ref->dir <= sctx->cur_ino) - return 0; + int register_upper_dirs; + u64 gen; if (is_waiting_for_move(sctx, ino)) return 1; + if (parent_ref->dir <= sctx->cur_ino) + return 0; + ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, NULL, NULL, NULL, NULL); if (ret == -ENOENT) @@ -3023,12 +3202,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, else if (ret < 0) return ret; - ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, - NULL, NULL, NULL, NULL); - if (ret < 0) - return ret; - - if (new_gen != old_gen) + if (parent_ref->dir_gen != old_gen) return 0; path_before = fs_path_alloc(); @@ -3051,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, } ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, - NULL, path_after); + &gen, path_after); if (ret == -ENOENT) { ret = 0; goto out; @@ -3061,13 +3235,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, len1 = fs_path_len(path_before); len2 = fs_path_len(path_after); - if ((parent_ino_before != parent_ino_after) && (len1 != len2 || - memcmp(path_before->start, path_after->start, len1))) { + if (parent_ino_before != parent_ino_after || len1 != len2 || + memcmp(path_before->start, path_after->start, len1)) { ret = 1; goto out; } ret = 0; + /* + * Ok, our new most direct ancestor has a higher inode number but + * wasn't moved/renamed. So maybe some of the new ancestors higher in + * the hierarchy have an higher inode number too *and* were renamed + * or moved - in this case we need to wait for the ancestor's rename + * or move operation before we can do the move/rename for the current + * inode. + */ + register_upper_dirs = 0; + ino = parent_ino_after; +again: + while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { + u64 parent_gen; + + fs_path_reset(path_before); + fs_path_reset(path_after); + + ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, + &parent_gen, path_after); + if (ret < 0) + goto out; + ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, + NULL, path_before); + if (ret == -ENOENT) { + ret = 0; + break; + } else if (ret < 0) { + goto out; + } + + len1 = fs_path_len(path_before); + len2 = fs_path_len(path_after); + if (parent_ino_before != parent_ino_after || len1 != len2 || + memcmp(path_before->start, path_after->start, len1)) { + ret = 1; + if (register_upper_dirs) { + break; + } else { + register_upper_dirs = 1; + ino = parent_ref->dir; + gen = parent_ref->dir_gen; + goto again; + } + } else if (register_upper_dirs) { + ret = add_pending_dir_move(sctx, ino, gen, + parent_ino_after); + if (ret < 0 && ret != -EEXIST) + goto out; + } + + ino = parent_ino_after; + gen = parent_gen; + } + out: fs_path_free(path_before); fs_path_free(path_after); @@ -3089,6 +3317,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) u64 ow_gen; int did_overwrite = 0; int is_orphan = 0; + u64 last_dir_ino_rm = 0; verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); @@ -3227,9 +3456,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * dirs, we always have one new and one deleted * ref. The deleted ref is ignored later. */ - if (wait_for_parent_move(sctx, cur)) { + ret = wait_for_parent_move(sctx, cur); + if (ret < 0) + goto out; + if (ret) { ret = add_pending_dir_move(sctx, - cur->dir); + sctx->cur_ino, + sctx->cur_inode_gen, + cur->dir); *pending_move = 1; } else { ret = send_rename(sctx, valid_path, @@ -3259,7 +3493,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); * later, we do this check again and rmdir it then if possible. * See the use of check_dirs for more details. */ - ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); + ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, + sctx->cur_ino); if (ret < 0) goto out; if (ret) { @@ -3350,8 +3585,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = send_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - } else if (ret == inode_state_did_delete) { - ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); + } else if (ret == inode_state_did_delete && + cur->dir != last_dir_ino_rm) { + ret = can_rmdir(sctx, cur->dir, cur->dir_gen, + sctx->cur_ino); if (ret < 0) goto out; if (ret) { @@ -3362,6 +3599,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); ret = send_rmdir(sctx, valid_path); if (ret < 0) goto out; + last_dir_ino_rm = cur->dir; } } } @@ -3375,9 +3613,8 @@ out: return ret; } -static int __record_new_ref(int num, u64 dir, int index, - struct fs_path *name, - void *ctx) +static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, + struct fs_path *name, void *ctx, struct list_head *refs) { int ret = 0; struct send_ctx *sctx = ctx; @@ -3388,7 +3625,7 @@ static int __record_new_ref(int num, u64 dir, int index, if (!p) return -ENOMEM; - ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, + ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, NULL, NULL); if (ret < 0) goto out; @@ -3400,7 +3637,7 @@ static int __record_new_ref(int num, u64 dir, int index, if (ret < 0) goto out; - ret = record_ref(&sctx->new_refs, dir, gen, p); + ret = __record_ref(refs, dir, gen, p); out: if (ret) @@ -3408,37 +3645,23 @@ out: return ret; } +static int __record_new_ref(int num, u64 dir, int index, + struct fs_path *name, + void *ctx) +{ + struct send_ctx *sctx = ctx; + return record_ref(sctx->send_root, num, dir, index, name, + ctx, &sctx->new_refs); +} + + static int __record_deleted_ref(int num, u64 dir, int index, struct fs_path *name, void *ctx) { - int ret = 0; struct send_ctx *sctx = ctx; - struct fs_path *p; - u64 gen; - - p = fs_path_alloc(); - if (!p) - return -ENOMEM; - - ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, - NULL, NULL); - if (ret < 0) - goto out; - - ret = get_cur_path(sctx, dir, gen, p); - if (ret < 0) - goto out; - ret = fs_path_add_path(p, name); - if (ret < 0) - goto out; - - ret = record_ref(&sctx->deleted_refs, dir, gen, p); - -out: - if (ret) - fs_path_free(p); - return ret; + return record_ref(sctx->parent_root, num, dir, index, name, + ctx, &sctx->deleted_refs); } static int record_new_ref(struct send_ctx *sctx) @@ -3619,21 +3842,31 @@ static int process_all_refs(struct send_ctx *sctx, root = sctx->parent_root; cb = __record_deleted_ref; } else { - BUG(); + btrfs_err(sctx->send_root->fs_info, + "Wrong command %d in process_all_refs", cmd); + ret = -EINVAL; + goto out; } key.objectid = sctx->cmp_key->objectid; key.type = BTRFS_INODE_REF_KEY; key.offset = 0; - while (1) { - ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); - if (ret < 0) - goto out; - if (ret) - break; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + while (1) { eb = path->nodes[0]; slot = path->slots[0]; + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + else if (ret > 0) + break; + continue; + } + btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || @@ -3642,11 +3875,10 @@ static int process_all_refs(struct send_ctx *sctx, break; ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); - btrfs_release_path(path); if (ret < 0) goto out; - key.offset = found_key.offset + 1; + path->slots[0]++; } btrfs_release_path(path); @@ -3927,19 +4159,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) key.objectid = sctx->cmp_key->objectid; key.type = BTRFS_XATTR_ITEM_KEY; key.offset = 0; - while (1) { - ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); - if (ret < 0) - goto out; - if (ret) { - ret = 0; - goto out; - } + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + while (1) { eb = path->nodes[0]; slot = path->slots[0]; - btrfs_item_key_to_cpu(eb, &found_key, slot); + if (slot >= btrfs_header_nritems(eb)) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) { + goto out; + } else if (ret > 0) { + ret = 0; + break; + } + continue; + } + btrfs_item_key_to_cpu(eb, &found_key, slot); if (found_key.objectid != key.objectid || found_key.type != key.type) { ret = 0; @@ -3951,8 +4189,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) if (ret < 0) goto out; - btrfs_release_path(path); - key.offset = found_key.offset + 1; + path->slots[0]++; } out: @@ -3991,6 +4228,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) goto out; last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; + + /* initial readahead */ + memset(&sctx->ra, 0, sizeof(struct file_ra_state)); + file_ra_state_init(&sctx->ra, inode->i_mapping); + btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, + last_index - index + 1); + while (index <= last_index) { unsigned cur_len = min_t(unsigned, len, PAGE_CACHE_SIZE - pg_offset); @@ -4763,18 +5007,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) ret = apply_children_dir_moves(sctx); if (ret) goto out; + /* + * Need to send that every time, no matter if it actually + * changed between the two trees as we have done changes to + * the inode before. If our inode is a directory and it's + * waiting to be moved/renamed, we will send its utimes when + * it's moved/renamed, therefore we don't need to do it here. + */ + sctx->send_progress = sctx->cur_ino + 1; + ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); + if (ret < 0) + goto out; } - /* - * Need to send that every time, no matter if it actually - * changed between the two trees as we have done changes to - * the inode before. - */ - sctx->send_progress = sctx->cur_ino + 1; - ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); - if (ret < 0) - goto out; - out: return ret; } @@ -4840,6 +5085,8 @@ static int changed_inode(struct send_ctx *sctx, sctx->left_path->nodes[0], left_ii); sctx->cur_inode_mode = btrfs_inode_mode( sctx->left_path->nodes[0], left_ii); + sctx->cur_inode_rdev = btrfs_inode_rdev( + sctx->left_path->nodes[0], left_ii); if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) ret = send_create_inode_if_needed(sctx); } else if (result == BTRFS_COMPARE_TREE_DELETED) { @@ -4884,6 +5131,8 @@ static int changed_inode(struct send_ctx *sctx, sctx->left_path->nodes[0], left_ii); sctx->cur_inode_mode = btrfs_inode_mode( sctx->left_path->nodes[0], left_ii); + sctx->cur_inode_rdev = btrfs_inode_rdev( + sctx->left_path->nodes[0], left_ii); ret = send_create_inode_if_needed(sctx); if (ret < 0) goto out; @@ -5118,6 +5367,7 @@ out: static int full_send_tree(struct send_ctx *sctx) { int ret; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *send_root = sctx->send_root; struct btrfs_key key; struct btrfs_key found_key; @@ -5139,6 +5389,19 @@ static int full_send_tree(struct send_ctx *sctx) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; +join_trans: + /* + * We need to make sure the transaction does not get committed + * while we do anything on commit roots. Join a transaction to prevent + * this. + */ + trans = btrfs_join_transaction(send_root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + trans = NULL; + goto out; + } + /* * Make sure the tree has not changed after re-joining. We detect this * by comparing start_ctransid and ctransid. They should always match. @@ -5162,6 +5425,19 @@ static int full_send_tree(struct send_ctx *sctx) goto out_finish; while (1) { + /* + * When someone want to commit while we iterate, end the + * joined transaction and rejoin. + */ + if (btrfs_should_end_transaction(trans, send_root)) { + ret = btrfs_end_transaction(trans, send_root); + trans = NULL; + if (ret < 0) + goto out; + btrfs_release_path(path); + goto join_trans; + } + eb = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(eb, &found_key, slot); @@ -5189,6 +5465,12 @@ out_finish: out: btrfs_free_path(path); + if (trans) { + if (!ret) + ret = btrfs_end_transaction(trans, send_root); + else + btrfs_end_transaction(trans, send_root); + } return ret; } @@ -5340,6 +5622,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) sctx->pending_dir_moves = RB_ROOT; sctx->waiting_dir_moves = RB_ROOT; + sctx->orphan_dirs = RB_ROOT; sctx->clone_roots = vzalloc(sizeof(struct clone_root) * (arg->clone_sources_count + 1)); @@ -5477,6 +5760,16 @@ out: kfree(dm); } + WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); + while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { + struct rb_node *n; + struct orphan_dir_info *odi; + + n = rb_first(&sctx->orphan_dirs); + odi = rb_entry(n, struct orphan_dir_info, node); + free_orphan_dir_info(sctx, odi); + } + if (sort_clone_roots) { for (i = 0; i < sctx->clone_roots_cnt; i++) btrfs_root_dec_send_in_progress( |