summaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig12
-rw-r--r--fs/btrfs/Makefile7
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/async-thread.c27
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/backref.c101
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/btrfs_inode.h46
-rw-r--r--fs/btrfs/check-integrity.c438
-rw-r--r--fs/btrfs/compat.h7
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/ctree.c357
-rw-r--r--fs/btrfs/ctree.h212
-rw-r--r--fs/btrfs/delayed-inode.c65
-rw-r--r--fs/btrfs/delayed-ref.c8
-rw-r--r--fs/btrfs/dev-replace.c37
-rw-r--r--fs/btrfs/dir-item.c8
-rw-r--r--fs/btrfs/disk-io.c413
-rw-r--r--fs/btrfs/disk-io.h17
-rw-r--r--fs/btrfs/export.c1
-rw-r--r--fs/btrfs/extent-tree.c409
-rw-r--r--fs/btrfs/extent_io.c821
-rw-r--r--fs/btrfs/extent_io.h43
-rw-r--r--fs/btrfs/extent_map.h8
-rw-r--r--fs/btrfs/file-item.c92
-rw-r--r--fs/btrfs/file.c180
-rw-r--r--fs/btrfs/free-space-cache.c617
-rw-r--r--fs/btrfs/free-space-cache.h20
-rw-r--r--fs/btrfs/inode-item.c2
-rw-r--r--fs/btrfs/inode-map.c13
-rw-r--r--fs/btrfs/inode.c841
-rw-r--r--fs/btrfs/ioctl.c803
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/ordered-data.c98
-rw-r--r--fs/btrfs/ordered-data.h14
-rw-r--r--fs/btrfs/print-tree.c109
-rw-r--r--fs/btrfs/qgroup.c69
-rw-r--r--fs/btrfs/raid56.c15
-rw-r--r--fs/btrfs/relocation.c180
-rw-r--r--fs/btrfs/root-tree.c29
-rw-r--r--fs/btrfs/scrub.c200
-rw-r--r--fs/btrfs/send.c435
-rw-r--r--fs/btrfs/super.c193
-rw-r--r--fs/btrfs/tests/btrfs-tests.c74
-rw-r--r--fs/btrfs/tests/btrfs-tests.h59
-rw-r--r--fs/btrfs/tests/extent-buffer-tests.c229
-rw-r--r--fs/btrfs/tests/extent-io-tests.c276
-rw-r--r--fs/btrfs/tests/free-space-tests.c395
-rw-r--r--fs/btrfs/tests/inode-tests.c955
-rw-r--r--fs/btrfs/transaction.c122
-rw-r--r--fs/btrfs/transaction.h4
-rw-r--r--fs/btrfs/tree-defrag.c5
-rw-r--r--fs/btrfs/tree-log.c219
-rw-r--r--fs/btrfs/uuid-tree.c354
-rw-r--r--fs/btrfs/volumes.c647
-rw-r--r--fs/btrfs/volumes.h36
56 files changed, 6899 insertions, 3447 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 2b3b832..f9d5094 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -59,7 +59,8 @@ config BTRFS_FS_RUN_SANITY_TESTS
help
This will run some basic sanity tests on the free space cache
code to make sure it is acting as it should. These are mostly
- regression tests and are only really interesting to btrfs devlopers.
+ regression tests and are only really interesting to btrfs
+ developers.
If unsure, say N.
@@ -72,3 +73,12 @@ config BTRFS_DEBUG
performance, or export extra information via sysfs.
If unsure, say N.
+
+config BTRFS_ASSERT
+ bool "Btrfs assert support"
+ depends on BTRFS_FS
+ help
+ Enable run-time assertion checking. This will result in panics if
+ any of the assertions trip. This is meant for btrfs developers only.
+
+ If unsure, say N.
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 3932224..1a44e42 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -8,7 +8,12 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
- reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
+ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
+ uuid-tree.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+
+btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
+ tests/extent-buffer-tests.o tests/btrfs-tests.o \
+ tests/extent-io-tests.o tests/inode-tests.o
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0..0890c83 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 58b7d14..8aec751 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -107,7 +107,8 @@ static void check_idle_worker(struct btrfs_worker_thread *worker)
worker->idle = 1;
/* the list may be empty if the worker is just starting */
- if (!list_empty(&worker->worker_list)) {
+ if (!list_empty(&worker->worker_list) &&
+ !worker->workers->stopping) {
list_move(&worker->worker_list,
&worker->workers->idle_list);
}
@@ -127,7 +128,8 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
spin_lock_irqsave(&worker->workers->lock, flags);
worker->idle = 0;
- if (!list_empty(&worker->worker_list)) {
+ if (!list_empty(&worker->worker_list) &&
+ !worker->workers->stopping) {
list_move_tail(&worker->worker_list,
&worker->workers->worker_list);
}
@@ -260,7 +262,7 @@ static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
struct btrfs_work *work = NULL;
struct list_head *cur = NULL;
- if(!list_empty(prio_head))
+ if (!list_empty(prio_head))
cur = prio_head->next;
smp_mb();
@@ -412,6 +414,7 @@ void btrfs_stop_workers(struct btrfs_workers *workers)
int can_stop;
spin_lock_irq(&workers->lock);
+ workers->stopping = 1;
list_splice_init(&workers->idle_list, &workers->worker_list);
while (!list_empty(&workers->worker_list)) {
cur = workers->worker_list.next;
@@ -455,6 +458,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
workers->ordered = 0;
workers->atomic_start_pending = 0;
workers->atomic_worker_start = async_helper;
+ workers->stopping = 0;
}
/*
@@ -480,15 +484,19 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
atomic_set(&worker->num_pending, 0);
atomic_set(&worker->refs, 1);
worker->workers = workers;
- worker->task = kthread_run(worker_loop, worker,
- "btrfs-%s-%d", workers->name,
- workers->num_workers + 1);
+ worker->task = kthread_create(worker_loop, worker,
+ "btrfs-%s-%d", workers->name,
+ workers->num_workers + 1);
if (IS_ERR(worker->task)) {
ret = PTR_ERR(worker->task);
- kfree(worker);
goto fail;
}
+
spin_lock_irq(&workers->lock);
+ if (workers->stopping) {
+ spin_unlock_irq(&workers->lock);
+ goto fail_kthread;
+ }
list_add_tail(&worker->worker_list, &workers->idle_list);
worker->idle = 1;
workers->num_workers++;
@@ -496,8 +504,13 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
WARN_ON(workers->num_workers_starting < 0);
spin_unlock_irq(&workers->lock);
+ wake_up_process(worker->task);
return 0;
+
+fail_kthread:
+ kthread_stop(worker->task);
fail:
+ kfree(worker);
spin_lock_irq(&workers->lock);
workers->num_workers_starting--;
spin_unlock_irq(&workers->lock);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 063698b..1f26792 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -107,6 +107,8 @@ struct btrfs_workers {
/* extra name for this worker, used for current->name */
char *name;
+
+ int stopping;
};
void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 8bc5e8c..3775947 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -119,6 +119,26 @@ struct __prelim_ref {
u64 wanted_disk_byte;
};
+static struct kmem_cache *btrfs_prelim_ref_cache;
+
+int __init btrfs_prelim_ref_init(void)
+{
+ btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
+ sizeof(struct __prelim_ref),
+ 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!btrfs_prelim_ref_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void btrfs_prelim_ref_exit(void)
+{
+ if (btrfs_prelim_ref_cache)
+ kmem_cache_destroy(btrfs_prelim_ref_cache);
+}
+
/*
* the rules for all callers of this function are:
* - obtaining the parent is the goal
@@ -160,12 +180,15 @@ struct __prelim_ref {
static int __add_prelim_ref(struct list_head *head, u64 root_id,
struct btrfs_key *key, int level,
- u64 parent, u64 wanted_disk_byte, int count)
+ u64 parent, u64 wanted_disk_byte, int count,
+ gfp_t gfp_mask)
{
struct __prelim_ref *ref;
- /* in case we're adding delayed refs, we're holding the refs spinlock */
- ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
+ ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
@@ -295,17 +318,15 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
pr_debug("search slot in root %llu (level %d, ref count %d) returned "
"%d for key (%llu %u %llu)\n",
- (unsigned long long)ref->root_id, level, ref->count, ret,
- (unsigned long long)ref->key_for_search.objectid,
- ref->key_for_search.type,
- (unsigned long long)ref->key_for_search.offset);
+ ref->root_id, level, ref->count, ret,
+ ref->key_for_search.objectid, ref->key_for_search.type,
+ ref->key_for_search.offset);
if (ret < 0)
goto out;
eb = path->nodes[level];
while (!eb) {
- if (!level) {
- WARN_ON(1);
+ if (WARN_ON(!level)) {
ret = 1;
goto out;
}
@@ -365,11 +386,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
node = ulist_next(parents, &uiter);
ref->parent = node ? node->val : 0;
ref->inode_list = node ?
- (struct extent_inode_elem *)(uintptr_t)node->aux : 0;
+ (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
/* additional parents require new refs being added here */
while ((node = ulist_next(parents, &uiter))) {
- new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+ new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
+ GFP_NOFS);
if (!new_ref) {
ret = -ENOMEM;
goto out;
@@ -493,7 +515,7 @@ static void __merge_refs(struct list_head *head, int mode)
ref1->count += ref2->count;
list_del(&ref2->list);
- kfree(ref2);
+ kmem_cache_free(btrfs_prelim_ref_cache, ref2);
}
}
@@ -548,7 +570,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
ref = btrfs_delayed_node_to_tree_ref(node);
ret = __add_prelim_ref(prefs, ref->root, &op_key,
ref->level + 1, 0, node->bytenr,
- node->ref_mod * sgn);
+ node->ref_mod * sgn, GFP_ATOMIC);
break;
}
case BTRFS_SHARED_BLOCK_REF_KEY: {
@@ -558,7 +580,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
ret = __add_prelim_ref(prefs, ref->root, NULL,
ref->level + 1, ref->parent,
node->bytenr,
- node->ref_mod * sgn);
+ node->ref_mod * sgn, GFP_ATOMIC);
break;
}
case BTRFS_EXTENT_DATA_REF_KEY: {
@@ -570,7 +592,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
key.offset = ref->offset;
ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
node->bytenr,
- node->ref_mod * sgn);
+ node->ref_mod * sgn, GFP_ATOMIC);
break;
}
case BTRFS_SHARED_DATA_REF_KEY: {
@@ -583,7 +605,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
key.offset = ref->offset;
ret = __add_prelim_ref(prefs, ref->root, &key, 0,
ref->parent, node->bytenr,
- node->ref_mod * sgn);
+ node->ref_mod * sgn, GFP_ATOMIC);
break;
}
default:
@@ -657,7 +679,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, NULL,
*info_level + 1, offset,
- bytenr, 1);
+ bytenr, 1, GFP_NOFS);
break;
case BTRFS_SHARED_DATA_REF_KEY: {
struct btrfs_shared_data_ref *sdref;
@@ -666,13 +688,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
sdref = (struct btrfs_shared_data_ref *)(iref + 1);
count = btrfs_shared_data_ref_count(leaf, sdref);
ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
- bytenr, count);
+ bytenr, count, GFP_NOFS);
break;
}
case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, offset, NULL,
*info_level + 1, 0,
- bytenr, 1);
+ bytenr, 1, GFP_NOFS);
break;
case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref;
@@ -687,7 +709,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0,
- bytenr, count);
+ bytenr, count, GFP_NOFS);
break;
}
default:
@@ -738,7 +760,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
case BTRFS_SHARED_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, 0, NULL,
info_level + 1, key.offset,
- bytenr, 1);
+ bytenr, 1, GFP_NOFS);
break;
case BTRFS_SHARED_DATA_REF_KEY: {
struct btrfs_shared_data_ref *sdref;
@@ -748,13 +770,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_shared_data_ref);
count = btrfs_shared_data_ref_count(leaf, sdref);
ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
- bytenr, count);
+ bytenr, count, GFP_NOFS);
break;
}
case BTRFS_TREE_BLOCK_REF_KEY:
ret = __add_prelim_ref(prefs, key.offset, NULL,
info_level + 1, 0,
- bytenr, 1);
+ bytenr, 1, GFP_NOFS);
break;
case BTRFS_EXTENT_DATA_REF_KEY: {
struct btrfs_extent_data_ref *dref;
@@ -770,7 +792,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
key.offset = btrfs_extent_data_ref_offset(leaf, dref);
root = btrfs_extent_data_ref_root(leaf, dref);
ret = __add_prelim_ref(prefs, root, &key, 0, 0,
- bytenr, count);
+ bytenr, count, GFP_NOFS);
break;
}
default:
@@ -911,7 +933,6 @@ again:
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
- list_del(&ref->list);
WARN_ON(ref->count < 0);
if (ref->count && ref->root_id && ref->parent == 0) {
/* no parent == root of tree */
@@ -935,8 +956,10 @@ again:
}
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
- ref->inode_list = eie;
free_extent_buffer(eb);
+ if (ret < 0)
+ goto out;
+ ref->inode_list = eie;
}
ret = ulist_add_merge(refs, ref->parent,
(uintptr_t)ref->inode_list,
@@ -954,7 +977,8 @@ again:
eie->next = ref->inode_list;
}
}
- kfree(ref);
+ list_del(&ref->list);
+ kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
out:
@@ -962,13 +986,13 @@ out:
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
list_del(&ref->list);
- kfree(ref);
+ kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
while (!list_empty(&prefs_delayed)) {
ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
list);
list_del(&ref->list);
- kfree(ref);
+ kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
return ret;
@@ -1326,8 +1350,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
found_key->type != BTRFS_METADATA_ITEM_KEY) ||
found_key->objectid > logical ||
found_key->objectid + size <= logical) {
- pr_debug("logical %llu is not within any extent\n",
- (unsigned long long)logical);
+ pr_debug("logical %llu is not within any extent\n", logical);
return -ENOENT;
}
@@ -1340,11 +1363,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
pr_debug("logical %llu is at position %llu within the extent (%llu "
"EXTENT_ITEM %llu) flags %#llx size %u\n",
- (unsigned long long)logical,
- (unsigned long long)(logical - found_key->objectid),
- (unsigned long long)found_key->objectid,
- (unsigned long long)found_key->offset,
- (unsigned long long)flags, item_size);
+ logical, logical - found_key->objectid, found_key->objectid,
+ found_key->offset, flags, item_size);
WARN_ON(!flags_ret);
if (flags_ret) {
@@ -1516,7 +1536,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
pr_debug("root %llu references leaf %llu, data list "
"%#llx\n", root_node->val, ref_node->val,
- (long long)ref_node->aux);
+ ref_node->aux);
ret = iterate_leaf_refs((struct extent_inode_elem *)
(uintptr_t)ref_node->aux,
root_node->val,
@@ -1601,16 +1621,15 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
for (cur = 0; cur < btrfs_item_size(eb, item); cur += len) {
name_len = btrfs_inode_ref_name_len(eb, iref);
/* path must be released before calling iterate()! */
pr_debug("following ref at offset %u for inode %llu in "
- "tree %llu\n", cur,
- (unsigned long long)found_key.objectid,
- (unsigned long long)fs_root->objectid);
+ "tree %llu\n", cur, found_key.objectid,
+ fs_root->objectid);
ret = iterate(parent, name_len,
(unsigned long)(iref + 1), eb, ctx);
if (ret)
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 8f2e767..a910b27 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
+int __init btrfs_prelim_ref_init(void);
+void btrfs_prelim_ref_exit(void);
#endif
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 08b286b..ac0b39d 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -19,6 +19,7 @@
#ifndef __BTRFS_I__
#define __BTRFS_I__
+#include <linux/hash.h>
#include "extent_map.h"
#include "extent_io.h"
#include "ordered-data.h"
@@ -179,6 +180,25 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
return container_of(inode, struct btrfs_inode, vfs_inode);
}
+static inline unsigned long btrfs_inode_hash(u64 objectid,
+ const struct btrfs_root *root)
+{
+ u64 h = objectid ^ (root->objectid * GOLDEN_RATIO_PRIME);
+
+#if BITS_PER_LONG == 32
+ h = (h >> 32) ^ (h & 0xffffffff);
+#endif
+
+ return (unsigned long)h;
+}
+
+static inline void btrfs_insert_inode_hash(struct inode *inode)
+{
+ unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
+
+ __insert_inode_hash(inode, h);
+}
+
static inline u64 btrfs_ino(struct inode *inode)
{
u64 ino = BTRFS_I(inode)->location.objectid;
@@ -213,11 +233,35 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
{
if (BTRFS_I(inode)->logged_trans == generation &&
- BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit)
+ BTRFS_I(inode)->last_sub_trans <=
+ BTRFS_I(inode)->last_log_commit &&
+ BTRFS_I(inode)->last_sub_trans <=
+ BTRFS_I(inode)->root->last_log_commit)
return 1;
return 0;
}
+struct btrfs_dio_private {
+ struct inode *inode;
+ u64 logical_offset;
+ u64 disk_bytenr;
+ u64 bytes;
+ void *private;
+
+ /* number of bios pending for this dio */
+ atomic_t pending_bios;
+
+ /* IO errors */
+ int errors;
+
+ /* orig_bio is our btrfs_io_bio */
+ struct bio *orig_bio;
+
+ /* dio_bio came from fs/direct-io.c */
+ struct bio *dio_bio;
+ u8 csum[0];
+};
+
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 1431a69..e0aab44 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
next_bytenr = btrfs_super_root(selected_super);
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "root@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "root@%llu\n", next_bytenr);
break;
case 1:
next_bytenr = btrfs_super_chunk_root(selected_super);
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "chunk@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "chunk@%llu\n", next_bytenr);
break;
case 2:
next_bytenr = btrfs_super_log_root(selected_super);
@@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
continue;
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "log@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "log@%llu\n", next_bytenr);
break;
}
@@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
+ next_bytenr, num_copies);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
struct btrfsic_block *next_block;
@@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
printk(KERN_INFO "btrfsic:"
" btrfsic_map_block(root @%llu,"
" mirror %d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
+ next_bytenr, mirror_num);
kfree(selected_super);
return -1;
}
@@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
if (ret < (int)PAGE_CACHE_SIZE) {
printk(KERN_INFO
"btrfsic: read @logical %llu failed!\n",
- (unsigned long long)
tmp_next_block_ctx.start);
btrfsic_release_block_ctx(&tmp_next_block_ctx);
kfree(selected_super);
@@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror(
(bh->b_data + (dev_bytenr & 4095));
if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
- super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
+ btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
btrfs_super_nodesize(super_tmp) != state->metablock_size ||
btrfs_super_leafsize(super_tmp) != state->metablock_size ||
@@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror(
printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
" @%llu (%s/%llu/%d)\n",
superblock_bdev,
- rcu_str_deref(device->name),
- (unsigned long long)dev_bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
+ rcu_str_deref(device->name), dev_bytenr,
+ dev_state->name, dev_bytenr,
superblock_mirror_num);
list_add(&superblock_tmp->all_blocks_node,
&state->all_blocks_list);
@@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror(
tmp_disk_key.offset = 0;
switch (pass) {
case 0:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_ROOT_TREE_OBJECTID);
additional_string = "initial root ";
next_bytenr = btrfs_super_root(super_tmp);
break;
case 1:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_CHUNK_TREE_OBJECTID);
additional_string = "initial chunk ";
next_bytenr = btrfs_super_chunk_root(super_tmp);
break;
case 2:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_TREE_LOG_OBJECTID);
additional_string = "initial log ";
next_bytenr = btrfs_super_log_root(super_tmp);
if (0 == next_bytenr)
@@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror(
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
+ next_bytenr, num_copies);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
struct btrfsic_block *next_block;
struct btrfsic_block_data_ctx tmp_next_block_ctx;
@@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror(
mirror_num)) {
printk(KERN_INFO "btrfsic: btrfsic_map_block("
"bytenr @%llu, mirror %d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
+ next_bytenr, mirror_num);
brelse(bh);
return -1;
}
@@ -1003,19 +995,17 @@ continue_with_new_stack_frame:
(struct btrfs_leaf *)sf->hdr;
if (-1 == sf->i) {
- sf->nr = le32_to_cpu(leafhdr->header.nritems);
+ sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"leaf %llu items %d generation %llu"
" owner %llu\n",
- (unsigned long long)
- sf->block_ctx->start,
- sf->nr,
- (unsigned long long)
- le64_to_cpu(leafhdr->header.generation),
- (unsigned long long)
- le64_to_cpu(leafhdr->header.owner));
+ sf->block_ctx->start, sf->nr,
+ btrfs_stack_header_generation(
+ &leafhdr->header),
+ btrfs_stack_header_owner(
+ &leafhdr->header));
}
continue_with_current_leaf_stack_frame:
@@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error:
&disk_item,
disk_item_offset,
sizeof(struct btrfs_item));
- item_offset = le32_to_cpu(disk_item.offset);
- item_size = le32_to_cpu(disk_item.size);
+ item_offset = btrfs_stack_item_offset(&disk_item);
+ item_size = btrfs_stack_item_size(&disk_item);
disk_key = &disk_item.key;
- type = disk_key->type;
+ type = btrfs_disk_key_type(disk_key);
if (BTRFS_ROOT_ITEM_KEY == type) {
struct btrfs_root_item root_item;
@@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error:
sf->block_ctx, &root_item,
root_item_offset,
item_size);
- next_bytenr = le64_to_cpu(root_item.bytenr);
+ next_bytenr = btrfs_root_bytenr(&root_item);
sf->error =
btrfsic_create_link_to_next_block(
@@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error:
&sf->num_copies,
&sf->mirror_num,
disk_key,
- le64_to_cpu(root_item.
- generation));
+ btrfs_root_generation(
+ &root_item));
if (sf->error)
goto one_stack_frame_backwards;
@@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error:
struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
if (-1 == sf->i) {
- sf->nr = le32_to_cpu(nodehdr->header.nritems);
+ sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO "node %llu level %d items %d"
" generation %llu owner %llu\n",
- (unsigned long long)
sf->block_ctx->start,
nodehdr->header.level, sf->nr,
- (unsigned long long)
- le64_to_cpu(nodehdr->header.generation),
- (unsigned long long)
- le64_to_cpu(nodehdr->header.owner));
+ btrfs_stack_header_generation(
+ &nodehdr->header),
+ btrfs_stack_header_owner(
+ &nodehdr->header));
}
continue_with_current_node_stack_frame:
@@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame:
btrfsic_read_from_block_data(
sf->block_ctx, &key_ptr, key_ptr_offset,
sizeof(struct btrfs_key_ptr));
- next_bytenr = le64_to_cpu(key_ptr.blockptr);
+ next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
sf->error = btrfsic_create_link_to_next_block(
state,
@@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame:
&sf->num_copies,
&sf->mirror_num,
&key_ptr.key,
- le64_to_cpu(key_ptr.generation));
+ btrfs_stack_key_generation(&key_ptr));
if (sf->error)
goto one_stack_frame_backwards;
@@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data(
unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
WARN_ON(offset + len > block_ctx->len);
- offset_in_page = (start_offset + offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
@@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block(
next_bytenr, state->metablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, *num_copiesp);
+ next_bytenr, *num_copiesp);
*mirror_nump = 1;
}
@@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block(
if (ret) {
printk(KERN_INFO
"btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
- (unsigned long long)next_bytenr, *mirror_nump);
+ next_bytenr, *mirror_nump);
btrfsic_release_block_ctx(next_block_ctx);
*next_blockp = NULL;
return -1;
@@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block(
"Referenced block @%llu (%s/%llu/%d)"
" found in hash table, %c,"
" bytenr mismatch (!= stored %llu).\n",
- (unsigned long long)next_bytenr,
- next_block_ctx->dev->name,
- (unsigned long long)next_block_ctx->dev_bytenr,
- *mirror_nump,
+ next_bytenr, next_block_ctx->dev->name,
+ next_block_ctx->dev_bytenr, *mirror_nump,
btrfsic_get_block_type(state, next_block),
- (unsigned long long)next_block->logical_bytenr);
+ next_block->logical_bytenr);
} else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"Referenced block @%llu (%s/%llu/%d)"
" found in hash table, %c.\n",
- (unsigned long long)next_bytenr,
- next_block_ctx->dev->name,
- (unsigned long long)next_block_ctx->dev_bytenr,
- *mirror_nump,
+ next_bytenr, next_block_ctx->dev->name,
+ next_block_ctx->dev_bytenr, *mirror_nump,
btrfsic_get_block_type(state, next_block));
next_block->logical_bytenr = next_bytenr;
@@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block(
if (ret < (int)next_block_ctx->len) {
printk(KERN_INFO
"btrfsic: read block @logical %llu failed!\n",
- (unsigned long long)next_bytenr);
+ next_bytenr);
btrfsic_release_block_ctx(next_block_ctx);
*next_blockp = NULL;
return -1;
@@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data(
file_extent_item_offset,
offsetof(struct btrfs_file_extent_item, disk_num_bytes));
if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
- ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
+ btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
file_extent_item.type,
- (unsigned long long)
- le64_to_cpu(file_extent_item.disk_bytenr));
+ btrfs_stack_file_extent_disk_bytenr(
+ &file_extent_item));
return 0;
}
@@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data(
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
file_extent_item_offset,
sizeof(struct btrfs_file_extent_item));
- next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
- le64_to_cpu(file_extent_item.offset);
- generation = le64_to_cpu(file_extent_item.generation);
- num_bytes = le64_to_cpu(file_extent_item.num_bytes);
- generation = le64_to_cpu(file_extent_item.generation);
+ next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
+ btrfs_stack_file_extent_offset(&file_extent_item);
+ generation = btrfs_stack_file_extent_generation(&file_extent_item);
+ num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+ generation = btrfs_stack_file_extent_generation(&file_extent_item);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
" offset = %llu, num_bytes = %llu\n",
file_extent_item.type,
- (unsigned long long)
- le64_to_cpu(file_extent_item.disk_bytenr),
- (unsigned long long)le64_to_cpu(file_extent_item.offset),
- (unsigned long long)num_bytes);
+ btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
+ btrfs_stack_file_extent_offset(&file_extent_item),
+ num_bytes);
while (num_bytes > 0) {
u32 chunk_len;
int num_copies;
@@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data(
next_bytenr, state->datablock_size);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
+ next_bytenr, num_copies);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
struct btrfsic_block_data_ctx next_block_ctx;
struct btrfsic_block *next_block;
@@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data(
if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
printk(KERN_INFO
"\tdisk_bytenr = %llu, num_bytes %u\n",
- (unsigned long long)next_bytenr,
- chunk_len);
+ next_bytenr, chunk_len);
ret = btrfsic_map_block(state, next_bytenr,
chunk_len, &next_block_ctx,
mirror_num);
@@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data(
printk(KERN_INFO
"btrfsic: btrfsic_map_block(@%llu,"
" mirror=%d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
+ next_bytenr, mirror_num);
return -1;
}
@@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data(
" found in hash table, D,"
" bytenr mismatch"
" (!= stored %llu).\n",
- (unsigned long long)next_bytenr,
+ next_bytenr,
next_block_ctx.dev->name,
- (unsigned long long)
next_block_ctx.dev_bytenr,
mirror_num,
- (unsigned long long)
next_block->logical_bytenr);
}
next_block->logical_bytenr = next_bytenr;
@@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
printk(KERN_INFO
"btrfsic: read_block() with unaligned bytenr %llu\n",
- (unsigned long long)block_ctx->dev_bytenr);
+ block_ctx->dev_bytenr);
return -1;
}
@@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num);
+ b_all->logical_bytenr, b_all->dev_state->name,
+ b_all->dev_bytenr, b_all->mirror_num);
list_for_each(elem_ref_to, &b_all->ref_to_list) {
const struct btrfsic_block_link *const l =
@@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
" refers %u* to"
" %c @%llu (%s/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num,
+ b_all->logical_bytenr, b_all->dev_state->name,
+ b_all->dev_bytenr, b_all->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
@@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
" is ref %u* from"
" %c @%llu (%s/%llu/%d)\n",
btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num,
+ b_all->logical_bytenr, b_all->dev_state->name,
+ b_all->dev_bytenr, b_all->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
- (unsigned long long)
l->block_ref_from->dev_bytenr,
l->block_ref_from->mirror_num);
}
@@ -1896,8 +1866,8 @@ again:
struct list_head *tmp_ref_to;
if (block->is_superblock) {
- bytenr = le64_to_cpu(((struct btrfs_super_block *)
- mapped_datav[0])->bytenr);
+ bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
+ mapped_datav[0]);
if (num_pages * PAGE_CACHE_SIZE <
BTRFS_SUPER_INFO_SIZE) {
printk(KERN_INFO
@@ -1923,36 +1893,33 @@ again:
return;
}
processed_len = state->metablock_size;
- bytenr = le64_to_cpu(((struct btrfs_header *)
- mapped_datav[0])->bytenr);
+ bytenr = btrfs_stack_header_bytenr(
+ (struct btrfs_header *)
+ mapped_datav[0]);
btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
dev_state,
dev_bytenr);
}
- if (block->logical_bytenr != bytenr) {
+ if (block->logical_bytenr != bytenr &&
+ !(!block->is_metadata &&
+ block->logical_bytenr == 0))
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c,"
" bytenr mismatch"
" (!= stored %llu).\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
+ bytenr, dev_state->name, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block),
- (unsigned long long)
block->logical_bytenr);
- block->logical_bytenr = bytenr;
- } else if (state->print_mask &
- BTRFSIC_PRINT_MASK_VERBOSE)
+ else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
+ bytenr, dev_state->name, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block));
+ block->logical_bytenr = bytenr;
} else {
if (num_pages * PAGE_CACHE_SIZE <
state->datablock_size) {
@@ -1966,9 +1933,7 @@ again:
printk(KERN_INFO
"Written block @%llu (%s/%llu/%d)"
" found in hash table, %c.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
+ bytenr, dev_state->name, dev_bytenr,
block->mirror_num,
btrfsic_get_block_type(state, block));
}
@@ -1985,21 +1950,14 @@ again:
" new(gen=%llu),"
" which is referenced by most recent superblock"
" (superblockgen=%llu)!\n",
- btrfsic_get_block_type(state, block),
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- (unsigned long long)block->generation,
- (unsigned long long)
- le64_to_cpu(block->disk_key.objectid),
+ btrfsic_get_block_type(state, block), bytenr,
+ dev_state->name, dev_bytenr, block->mirror_num,
+ block->generation,
+ btrfs_disk_key_objectid(&block->disk_key),
block->disk_key.type,
- (unsigned long long)
- le64_to_cpu(block->disk_key.offset),
- (unsigned long long)
- le64_to_cpu(((struct btrfs_header *)
- mapped_datav[0])->generation),
- (unsigned long long)
+ btrfs_disk_key_offset(&block->disk_key),
+ btrfs_stack_header_generation(
+ (struct btrfs_header *) mapped_datav[0]),
state->max_superblock_generation);
btrfsic_dump_tree(state);
}
@@ -2008,15 +1966,12 @@ again:
printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
" @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
" which is not yet iodone!\n",
- btrfsic_get_block_type(state, block),
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr,
- block->mirror_num,
- (unsigned long long)block->generation,
- (unsigned long long)
- le64_to_cpu(((struct btrfs_header *)
- mapped_datav[0])->generation));
+ btrfsic_get_block_type(state, block), bytenr,
+ dev_state->name, dev_bytenr, block->mirror_num,
+ block->generation,
+ btrfs_stack_header_generation(
+ (struct btrfs_header *)
+ mapped_datav[0]));
/* it would not be safe to go on */
btrfsic_dump_tree(state);
goto continue_loop;
@@ -2056,7 +2011,7 @@ again:
if (ret) {
printk(KERN_INFO
"btrfsic: btrfsic_map_block(root @%llu)"
- " failed!\n", (unsigned long long)bytenr);
+ " failed!\n", bytenr);
goto continue_loop;
}
block_ctx.datav = mapped_datav;
@@ -2140,7 +2095,7 @@ again:
printk(KERN_INFO
"btrfsic: btrfsic_process_metablock"
"(root @%llu) failed!\n",
- (unsigned long long)dev_bytenr);
+ dev_bytenr);
} else {
block->is_metadata = 0;
block->mirror_num = 0; /* unknown */
@@ -2168,8 +2123,7 @@ again:
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO "Written block (%s/%llu/?)"
" !found in hash table, D.\n",
- dev_state->name,
- (unsigned long long)dev_bytenr);
+ dev_state->name, dev_bytenr);
if (!state->include_extent_data) {
/* ignore that written D block */
goto continue_loop;
@@ -2184,17 +2138,16 @@ again:
block_ctx.pagev = NULL;
} else {
processed_len = state->metablock_size;
- bytenr = le64_to_cpu(((struct btrfs_header *)
- mapped_datav[0])->bytenr);
+ bytenr = btrfs_stack_header_bytenr(
+ (struct btrfs_header *)
+ mapped_datav[0]);
btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
dev_bytenr);
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
printk(KERN_INFO
"Written block @%llu (%s/%llu/?)"
" !found in hash table, M.\n",
- (unsigned long long)bytenr,
- dev_state->name,
- (unsigned long long)dev_bytenr);
+ bytenr, dev_state->name, dev_bytenr);
ret = btrfsic_map_block(state, bytenr, processed_len,
&block_ctx, 0);
@@ -2202,7 +2155,7 @@ again:
printk(KERN_INFO
"btrfsic: btrfsic_map_block(root @%llu)"
" failed!\n",
- (unsigned long long)dev_bytenr);
+ dev_bytenr);
goto continue_loop;
}
}
@@ -2267,10 +2220,8 @@ again:
printk(KERN_INFO
"New written %c-block @%llu (%s/%llu/%d)\n",
is_metadata ? 'M' : 'D',
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
+ block->logical_bytenr, block->dev_state->name,
+ block->dev_bytenr, block->mirror_num);
list_add(&block->all_blocks_node, &state->all_blocks_list);
btrfsic_block_hashtable_add(block, &state->block_hashtable);
@@ -2281,7 +2232,7 @@ again:
printk(KERN_INFO
"btrfsic: process_metablock(root @%llu)"
" failed!\n",
- (unsigned long long)dev_bytenr);
+ dev_bytenr);
}
btrfsic_release_block_ctx(&block_ctx);
}
@@ -2319,10 +2270,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
"bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
bio_error_status,
btrfsic_get_block_type(dev_state->state, block),
- (unsigned long long)block->logical_bytenr,
- dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
+ block->logical_bytenr, dev_state->name,
+ block->dev_bytenr, block->mirror_num);
next_block = block->next_in_same_bio;
block->iodone_w_error = iodone_w_error;
if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2332,7 +2281,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
printk(KERN_INFO
"bio_end_io() new %s flush_gen=%llu\n",
dev_state->name,
- (unsigned long long)
dev_state->last_flush_gen);
}
if (block->submit_bio_bh_rw & REQ_FUA)
@@ -2358,10 +2306,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
"bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
iodone_w_error,
btrfsic_get_block_type(dev_state->state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
+ block->logical_bytenr, block->dev_state->name,
+ block->dev_bytenr, block->mirror_num);
block->iodone_w_error = iodone_w_error;
if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2370,8 +2316,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
printk(KERN_INFO
"bh_end_io() new %s flush_gen=%llu\n",
- dev_state->name,
- (unsigned long long)dev_state->last_flush_gen);
+ dev_state->name, dev_state->last_flush_gen);
}
if (block->submit_bio_bh_rw & REQ_FUA)
block->flush_gen = 0; /* FUA completed means block is on disk */
@@ -2396,26 +2341,20 @@ static int btrfsic_process_written_superblock(
printk(KERN_INFO
"btrfsic: superblock @%llu (%s/%llu/%d)"
" with old gen %llu <= %llu\n",
- (unsigned long long)superblock->logical_bytenr,
+ superblock->logical_bytenr,
superblock->dev_state->name,
- (unsigned long long)superblock->dev_bytenr,
- superblock->mirror_num,
- (unsigned long long)
+ superblock->dev_bytenr, superblock->mirror_num,
btrfs_super_generation(super_hdr),
- (unsigned long long)
state->max_superblock_generation);
} else {
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
printk(KERN_INFO
"btrfsic: got new superblock @%llu (%s/%llu/%d)"
" with new gen %llu > %llu\n",
- (unsigned long long)superblock->logical_bytenr,
+ superblock->logical_bytenr,
superblock->dev_state->name,
- (unsigned long long)superblock->dev_bytenr,
- superblock->mirror_num,
- (unsigned long long)
+ superblock->dev_bytenr, superblock->mirror_num,
btrfs_super_generation(super_hdr),
- (unsigned long long)
state->max_superblock_generation);
state->max_superblock_generation =
@@ -2432,43 +2371,41 @@ static int btrfsic_process_written_superblock(
int num_copies;
int mirror_num;
const char *additional_string = NULL;
- struct btrfs_disk_key tmp_disk_key;
+ struct btrfs_disk_key tmp_disk_key = {0};
- tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
- tmp_disk_key.offset = 0;
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_ROOT_ITEM_KEY);
+ btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
switch (pass) {
case 0:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_ROOT_TREE_OBJECTID);
additional_string = "root ";
next_bytenr = btrfs_super_root(super_hdr);
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "root@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "root@%llu\n", next_bytenr);
break;
case 1:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_CHUNK_TREE_OBJECTID);
additional_string = "chunk ";
next_bytenr = btrfs_super_chunk_root(super_hdr);
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "chunk@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "chunk@%llu\n", next_bytenr);
break;
case 2:
- tmp_disk_key.objectid =
- cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+ btrfs_set_disk_key_objectid(&tmp_disk_key,
+ BTRFS_TREE_LOG_OBJECTID);
additional_string = "log ";
next_bytenr = btrfs_super_log_root(super_hdr);
if (0 == next_bytenr)
continue;
if (state->print_mask &
BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
- printk(KERN_INFO "log@%llu\n",
- (unsigned long long)next_bytenr);
+ printk(KERN_INFO "log@%llu\n", next_bytenr);
break;
}
@@ -2477,7 +2414,7 @@ static int btrfsic_process_written_superblock(
next_bytenr, BTRFS_SUPER_INFO_SIZE);
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
- (unsigned long long)next_bytenr, num_copies);
+ next_bytenr, num_copies);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
int was_created;
@@ -2493,8 +2430,7 @@ static int btrfsic_process_written_superblock(
printk(KERN_INFO
"btrfsic: btrfsic_map_block(@%llu,"
" mirror=%d) failed!\n",
- (unsigned long long)next_bytenr,
- mirror_num);
+ next_bytenr, mirror_num);
return -1;
}
@@ -2528,10 +2464,8 @@ static int btrfsic_process_written_superblock(
}
}
- if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
- WARN_ON(1);
+ if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
btrfsic_dump_tree(state);
- }
return 0;
}
@@ -2579,26 +2513,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
" %u* refers to %c @%llu (%s/%llu/%d)\n",
recursion_level,
btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num,
+ block->logical_bytenr, block->dev_state->name,
+ block->dev_bytenr, block->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
if (l->block_ref_to->never_written) {
printk(KERN_INFO "btrfs: attempt to write superblock"
" which references block %c @%llu (%s/%llu/%d)"
" which is never written!\n",
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
} else if (!l->block_ref_to->is_iodone) {
@@ -2606,10 +2536,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
" which references block %c @%llu (%s/%llu/%d)"
" which is not yet iodone!\n",
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
} else if (l->block_ref_to->iodone_w_error) {
@@ -2617,10 +2546,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
" which references block %c @%llu (%s/%llu/%d)"
" which has write error!\n",
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
ret = -1;
} else if (l->parent_generation !=
@@ -2634,13 +2562,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
" with generation %llu !="
" parent generation %llu!\n",
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num,
- (unsigned long long)l->block_ref_to->generation,
- (unsigned long long)l->parent_generation);
+ l->block_ref_to->generation,
+ l->parent_generation);
ret = -1;
} else if (l->block_ref_to->flush_gen >
l->block_ref_to->dev_state->last_flush_gen) {
@@ -2650,13 +2577,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
" (block flush_gen=%llu,"
" dev->flush_gen=%llu)!\n",
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)
l->block_ref_to->logical_bytenr,
l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
- l->block_ref_to->mirror_num,
- (unsigned long long)block->flush_gen,
- (unsigned long long)
+ l->block_ref_to->dev_bytenr,
+ l->block_ref_to->mirror_num, block->flush_gen,
l->block_ref_to->dev_state->last_flush_gen);
ret = -1;
} else if (-1 == btrfsic_check_all_ref_blocks(state,
@@ -2701,16 +2625,12 @@ static int btrfsic_is_block_ref_by_superblock(
" is ref %u* from %c @%llu (%s/%llu/%d)\n",
recursion_level,
btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num,
+ block->logical_bytenr, block->dev_state->name,
+ block->dev_bytenr, block->mirror_num,
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)
l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
- (unsigned long long)
l->block_ref_from->dev_bytenr,
l->block_ref_from->mirror_num);
if (l->block_ref_from->is_superblock &&
@@ -2737,14 +2657,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state,
" to %c @%llu (%s/%llu/%d).\n",
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
- (unsigned long long)l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num,
+ l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
@@ -2756,14 +2674,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state,
" to %c @%llu (%s/%llu/%d).\n",
l->ref_cnt,
btrfsic_get_block_type(state, l->block_ref_from),
- (unsigned long long)l->block_ref_from->logical_bytenr,
+ l->block_ref_from->logical_bytenr,
l->block_ref_from->dev_state->name,
- (unsigned long long)l->block_ref_from->dev_bytenr,
- l->block_ref_from->mirror_num,
+ l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
btrfsic_get_block_type(state, l->block_ref_to),
- (unsigned long long)l->block_ref_to->logical_bytenr,
- l->block_ref_to->dev_state->name,
- (unsigned long long)l->block_ref_to->dev_bytenr,
+ l->block_ref_to->logical_bytenr,
+ l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
l->block_ref_to->mirror_num);
}
@@ -2807,10 +2723,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
*/
indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- block->dev_state->name,
- (unsigned long long)block->dev_bytenr,
- block->mirror_num);
+ block->logical_bytenr, block->dev_state->name,
+ block->dev_bytenr, block->mirror_num);
if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
printk("[...]\n");
return;
@@ -2943,10 +2857,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
"New %s%c-block @%llu (%s/%llu/%d)\n",
additional_string,
btrfsic_get_block_type(state, block),
- (unsigned long long)block->logical_bytenr,
- dev_state->name,
- (unsigned long long)block->dev_bytenr,
- mirror_num);
+ block->logical_bytenr, dev_state->name,
+ block->dev_bytenr, mirror_num);
list_add(&block->all_blocks_node, &state->all_blocks_list);
btrfsic_block_hashtable_add(block, &state->block_hashtable);
if (NULL != was_created)
@@ -2980,7 +2892,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
printk(KERN_INFO "btrfsic:"
" btrfsic_map_block(logical @%llu,"
" mirror %d) failed!\n",
- (unsigned long long)bytenr, mirror_num);
+ bytenr, mirror_num);
continue;
}
@@ -2993,12 +2905,11 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
btrfsic_release_block_ctx(&block_ctx);
}
- if (!match) {
+ if (WARN_ON(!match)) {
printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
" buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
" phys_bytenr=%llu)!\n",
- (unsigned long long)bytenr, dev_state->name,
- (unsigned long long)dev_bytenr);
+ bytenr, dev_state->name, dev_bytenr);
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
ret = btrfsic_map_block(state, bytenr,
state->metablock_size,
@@ -3008,12 +2919,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
printk(KERN_INFO "Read logical bytenr @%llu maps to"
" (%s/%llu/%d)\n",
- (unsigned long long)bytenr,
- block_ctx.dev->name,
- (unsigned long long)block_ctx.dev_bytenr,
- mirror_num);
+ bytenr, block_ctx.dev->name,
+ block_ctx.dev_bytenr, mirror_num);
}
- WARN_ON(1);
}
}
@@ -3048,12 +2956,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
if (dev_state->state->print_mask &
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
- "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
- " size=%lu, data=%p, bdev=%p)\n",
- rw, (unsigned long)bh->b_blocknr,
- (unsigned long long)dev_bytenr,
- (unsigned long)bh->b_size, bh->b_data,
- bh->b_bdev);
+ "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
+ " size=%zu, data=%p, bdev=%p)\n",
+ rw, (unsigned long long)bh->b_blocknr,
+ dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
btrfsic_process_written_block(dev_state, dev_bytenr,
&bh->b_data, 1, NULL,
NULL, bh, rw);
@@ -3118,9 +3024,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
printk(KERN_INFO
"submit_bio(rw=0x%x, bi_vcnt=%u,"
- " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
- rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
- (unsigned long long)dev_bytenr,
+ " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
+ rw, bio->bi_vcnt,
+ (unsigned long long)bio->bi_sector, dev_bytenr,
bio->bi_bdev);
mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
@@ -3213,19 +3119,19 @@ int btrfsic_mount(struct btrfs_root *root,
if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
printk(KERN_INFO
"btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
- root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
+ root->nodesize, PAGE_CACHE_SIZE);
return -1;
}
if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
printk(KERN_INFO
"btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
- root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
+ root->leafsize, PAGE_CACHE_SIZE);
return -1;
}
if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
printk(KERN_INFO
"btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
- root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
+ root->sectorsize, PAGE_CACHE_SIZE);
return -1;
}
state = kzalloc(sizeof(*state), GFP_NOFS);
@@ -3369,10 +3275,8 @@ void btrfsic_unmount(struct btrfs_root *root,
" @%llu (%s/%llu/%d) on umount which is"
" not yet iodone!\n",
btrfsic_get_block_type(state, b_all),
- (unsigned long long)b_all->logical_bytenr,
- b_all->dev_state->name,
- (unsigned long long)b_all->dev_bytenr,
- b_all->mirror_num);
+ b_all->logical_bytenr, b_all->dev_state->name,
+ b_all->dev_bytenr, b_all->mirror_num);
}
mutex_unlock(&btrfsic_mutex);
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
deleted file mode 100644
index 7c4503e..0000000
--- a/fs/btrfs/compat.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
-
-#define btrfs_drop_nlink(inode) drop_nlink(inode)
-#define btrfs_inc_nlink(inode) inc_nlink(inode)
-
-#endif /* _COMPAT_H_ */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b189bd1..1499b27 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -32,7 +32,6 @@
#include <linux/writeback.h>
#include <linux/bit_spinlock.h>
#include <linux/slab.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -132,9 +131,8 @@ static int check_compressed_csum(struct inode *inode,
printk(KERN_INFO "btrfs csum failed ino %llu "
"extent %llu csum %u "
"wanted %u mirror %d\n",
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)disk_start,
- csum, *cb_sum, cb->mirror_num);
+ btrfs_ino(inode), disk_start, csum, *cb_sum,
+ cb->mirror_num);
ret = -EIO;
goto fail;
}
@@ -361,7 +359,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
- if(!bio) {
+ if (!bio) {
kfree(cb);
return -ENOMEM;
}
@@ -639,7 +637,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
faili = nr_pages - 1;
cb->nr_pages = nr_pages;
- add_ra_bio_pages(inode, em_start + em_len, cb);
+ /* In the parent-locked case, we only locked the range we are
+ * interested in. In all other cases, we can opportunistically
+ * cache decompressed data that goes beyond the requested range. */
+ if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
+ add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ed50460..316136b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, new_root_objectid);
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
+ int ret = 0;
- BUG_ON(!tm || !tm->seq);
+ BUG_ON(!tm);
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&fs_info->tree_mod_seq_list)) {
+ tree_mod_log_write_unlock(fs_info);
+ /*
+ * Ok we no longer care about logging modifications, free up tm
+ * and return 0. Any callers shouldn't be using tm after
+ * calling tree_mod_log_insert, but if they do we can just
+ * change this to return a special error code to let the callers
+ * do their own thing.
+ */
+ kfree(tm);
+ return 0;
+ }
+
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
else {
+ ret = -EEXIST;
kfree(tm);
- return -EEXIST;
+ goto out;
}
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
- return 0;
+out:
+ tree_mod_log_write_unlock(fs_info);
+ return ret;
}
/*
@@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
if (eb && btrfs_header_level(eb) == 0)
return 1;
-
- tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- /*
- * someone emptied the list while we were waiting for the lock.
- * we must not add to the list when no blocker exists.
- */
- tree_mod_log_write_unlock(fs_info);
- return 1;
- }
-
return 0;
}
-/*
- * This allocates memory and gets a tree modification sequence number.
- *
- * Returns <0 on error.
- * Returns >0 (the added sequence number) on success.
- */
-static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
- struct tree_mod_elem **tm_ret)
-{
- struct tree_mod_elem *tm;
-
- /*
- * once we switch from spin locks to something different, we should
- * honor the flags parameter here.
- */
- tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
- if (!tm)
- return -ENOMEM;
-
- spin_lock(&fs_info->tree_mod_seq_lock);
- tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
- spin_unlock(&fs_info->tree_mod_seq_lock);
-
- return tm->seq;
-}
-
static inline int
__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
- int ret;
struct tree_mod_elem *tm;
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- return ret;
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm)
+ return -ENOMEM;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
}
static noinline int
-tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
- int ret;
-
if (tree_mod_dont_log(fs_info, eb))
return 0;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
-
- tree_mod_log_write_unlock(fs_info);
- return ret;
-}
-
-static noinline int
-tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
- int slot, enum mod_log_op op)
-{
- return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
-}
-
-static noinline int
-tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op)
-{
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
+ return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
}
static noinline int
@@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING);
+ ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
BUG_ON(ret < 0);
}
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- goto out;
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm)
+ return -ENOMEM;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
tm->slot = src_slot;
@@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
tm->move.nr_items = nr_items;
tm->op = MOD_LOG_MOVE_KEYS;
- ret = __tree_mod_log_insert(fs_info, tm);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return __tree_mod_log_insert(fs_info, tm);
}
static inline void
@@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING);
+ ret = __tree_mod_log_insert_key(fs_info, eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
BUG_ON(ret < 0);
}
}
@@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
int log_removal)
{
struct tree_mod_elem *tm;
- int ret;
if (tree_mod_dont_log(fs_info, NULL))
return 0;
@@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (log_removal)
__tree_mod_log_free_eb(fs_info, old_root);
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- goto out;
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm)
+ return -ENOMEM;
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- ret = __tree_mod_log_insert(fs_info, tm);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return __tree_mod_log_insert(fs_info, tm);
}
static struct tree_mod_elem *
@@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
if (tree_mod_dont_log(fs_info, NULL))
return;
- if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
- tree_mod_log_write_unlock(fs_info);
+ if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
return;
- }
for (i = 0; i < nr_items; i++) {
- ret = tree_mod_log_insert_key_locked(fs_info, src,
+ ret = __tree_mod_log_insert_key(fs_info, src,
i + src_offset,
- MOD_LOG_KEY_REMOVE);
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
BUG_ON(ret < 0);
- ret = tree_mod_log_insert_key_locked(fs_info, dst,
+ ret = __tree_mod_log_insert_key(fs_info, dst,
i + dst_offset,
- MOD_LOG_KEY_ADD);
+ MOD_LOG_KEY_ADD,
+ GFP_NOFS);
BUG_ON(ret < 0);
}
-
- tree_mod_log_write_unlock(fs_info);
}
static inline void
@@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
- MOD_LOG_KEY_REPLACE,
- atomic ? GFP_ATOMIC : GFP_NOFS);
+ ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+ MOD_LOG_KEY_REPLACE,
+ atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
@@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
if (tree_mod_dont_log(fs_info, eb))
return;
-
__tree_mod_log_free_eb(fs_info, eb);
-
- tree_mod_log_write_unlock(fs_info);
}
static noinline void
@@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, root->root_key.objectid);
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1056,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows)
- btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (root->ref_cows) {
+ ret = btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (ret)
+ return ret;
+ }
if (buf == root->node) {
WARN_ON(parent && parent != buf);
@@ -1083,7 +1035,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(trans->transid != btrfs_header_generation(parent));
tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
- MOD_LOG_KEY_REPLACE);
+ MOD_LOG_KEY_REPLACE, GFP_NOFS);
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -1116,7 +1068,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
int looped = 0;
if (!time_seq)
- return 0;
+ return NULL;
/*
* the very last operation that's logged for a root is the replacement
@@ -1127,7 +1079,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
tm = tree_mod_log_search_oldest(fs_info, root_logical,
time_seq);
if (!looped && !tm)
- return 0;
+ return NULL;
/*
* if there are no tree operation for the oldest root, we simply
* return it. this should only happen if that (old) root is at
@@ -1240,8 +1192,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
* is freed (its refcount is decremented).
*/
static struct extent_buffer *
-tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
- u64 time_seq)
+tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+ struct extent_buffer *eb, u64 time_seq)
{
struct extent_buffer *eb_rewin;
struct tree_mod_elem *tm;
@@ -1256,11 +1208,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (!tm)
return eb;
+ btrfs_set_path_blocking(path);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
eb_rewin = alloc_dummy_extent_buffer(eb->start,
fs_info->tree_root->nodesize);
- BUG_ON(!eb_rewin);
+ if (!eb_rewin) {
+ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ return NULL;
+ }
btrfs_set_header_bytenr(eb_rewin, eb->start);
btrfs_set_header_backref_rev(eb_rewin,
btrfs_header_backref_rev(eb));
@@ -1268,10 +1227,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
} else {
eb_rewin = btrfs_clone_extent_buffer(eb);
- BUG_ON(!eb_rewin);
+ if (!eb_rewin) {
+ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ return NULL;
+ }
}
- btrfs_tree_read_unlock(eb);
+ btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
extent_buffer_get(eb_rewin);
@@ -1321,11 +1285,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (!old || !extent_buffer_uptodate(old)) {
+ if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
free_extent_buffer(old);
pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
logical);
- WARN_ON(1);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
@@ -1335,8 +1298,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
} else {
+ btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
eb = btrfs_clone_extent_buffer(eb_root);
- btrfs_tree_read_unlock(eb_root);
+ btrfs_tree_read_unlock_blocking(eb_root);
free_extent_buffer(eb_root);
}
@@ -1419,14 +1383,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
if (trans->transaction != root->fs_info->running_transaction)
WARN(1, KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)
+ trans->transid,
root->fs_info->running_transaction->transid);
if (trans->transid != root->fs_info->generation)
WARN(1, KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)root->fs_info->generation);
+ trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
*cow_ret = buf;
@@ -2466,6 +2428,40 @@ done:
return ret;
}
+static void key_search_validate(struct extent_buffer *b,
+ struct btrfs_key *key,
+ int level)
+{
+#ifdef CONFIG_BTRFS_ASSERT
+ struct btrfs_disk_key disk_key;
+
+ btrfs_cpu_key_to_disk(&disk_key, key);
+
+ if (level == 0)
+ ASSERT(!memcmp_extent_buffer(b, &disk_key,
+ offsetof(struct btrfs_leaf, items[0].key),
+ sizeof(disk_key)));
+ else
+ ASSERT(!memcmp_extent_buffer(b, &disk_key,
+ offsetof(struct btrfs_node, ptrs[0].key),
+ sizeof(disk_key)));
+#endif
+}
+
+static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+ int level, int *prev_cmp, int *slot)
+{
+ if (*prev_cmp != 0) {
+ *prev_cmp = bin_search(b, key, level, slot);
+ return *prev_cmp;
+ }
+
+ key_search_validate(b, key, level);
+ *slot = 0;
+
+ return 0;
+}
+
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
@@ -2494,6 +2490,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int write_lock_level = 0;
u8 lowest_level = 0;
int min_write_lock_level;
+ int prev_cmp;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
@@ -2524,6 +2521,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
min_write_lock_level = write_lock_level;
again:
+ prev_cmp = -1;
/*
* we try very hard to do read locks on the root
*/
@@ -2624,7 +2622,7 @@ cow_done:
if (!cow)
btrfs_unlock_up_safe(p, level + 1);
- ret = bin_search(b, key, level, &slot);
+ ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
int dec = 0;
@@ -2759,6 +2757,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2786,7 +2785,12 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
- ret = bin_search(b, key, level, &slot);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
+ ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
int dec = 0;
@@ -2820,7 +2824,11 @@ again:
btrfs_clear_path_blocking(p, b,
BTRFS_READ_LOCK);
}
- b = tree_mod_log_rewind(root->fs_info, b, time_seq);
+ b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
+ if (!b) {
+ ret = -ENOMEM;
+ goto done;
+ }
p->locks[level] = BTRFS_READ_LOCK;
p->nodes[level] = b;
} else {
@@ -3143,13 +3151,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, root->root_key.objectid);
- write_extent_buffer(c, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(c),
+ write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(c),
- BTRFS_UUID_SIZE);
+ btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
btrfs_set_node_key(c, &lower_key, 0);
btrfs_set_node_blockptr(c, 0, lower->start);
@@ -3208,7 +3214,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
}
if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
- MOD_LOG_KEY_ADD);
+ MOD_LOG_KEY_ADD, GFP_NOFS);
BUG_ON(ret < 0);
}
btrfs_set_node_key(lower, key, slot);
@@ -3284,10 +3290,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(split, root->root_key.objectid);
write_extent_buffer(split, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(split),
- BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(split),
+ btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
@@ -3335,8 +3340,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
if (!nr)
return 0;
btrfs_init_map_token(&token);
- start_item = btrfs_item_nr(l, start);
- end_item = btrfs_item_nr(l, end);
+ start_item = btrfs_item_nr(start);
+ end_item = btrfs_item_nr(end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3404,7 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] > i)
@@ -3468,7 +3473,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
}
@@ -3610,7 +3615,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -3637,8 +3642,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
ret = 1;
goto out;
}
- if (!empty && push_items == btrfs_header_nritems(right))
- WARN_ON(1);
+ WARN_ON(!empty && push_items == btrfs_header_nritems(right));
/* push data from right to left */
copy_extent_buffer(left, right,
@@ -3661,7 +3665,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
@@ -3692,7 +3696,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
@@ -3833,7 +3837,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_item_end_nr(l, mid);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(right, i);
+ struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
@@ -4014,7 +4018,7 @@ again:
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
if (data_size && !tried_avoid_double)
goto push_for_double;
- split = 2 ;
+ split = 2;
}
}
}
@@ -4040,11 +4044,10 @@ again:
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
write_extent_buffer(right, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(right),
- BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(right),
+ btrfs_header_chunk_tree_uuid(right),
BTRFS_UUID_SIZE);
if (split == 0) {
@@ -4176,7 +4179,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@@ -4199,7 +4202,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(leaf, slot);
+ new_item = btrfs_item_nr(slot);
btrfs_set_item_offset(leaf, new_item, orig_offset);
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4338,7 +4341,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4386,7 +4389,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(root, path, &disk_key, 1);
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4440,7 +4443,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4454,7 +4457,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
data_end = old_data;
old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4513,7 +4516,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr( i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
@@ -4534,7 +4537,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
+ item = btrfs_item_nr(slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
data_end -= data_size[i];
@@ -4642,7 +4645,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
(nritems - slot - 1));
} else if (level) {
ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
- MOD_LOG_KEY_REMOVE);
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
BUG_ON(ret < 0);
}
@@ -4729,7 +4732,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
@@ -4814,7 +4817,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* This may release the path, and so you may lose any locks held at the
* time you call it.
*/
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_disk_key found_key;
@@ -4822,14 +4825,18 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- if (key.offset > 0)
+ if (key.offset > 0) {
key.offset--;
- else if (key.type > 0)
+ } else if (key.type > 0) {
key.type--;
- else if (key.objectid > 0)
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
key.objectid--;
- else
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
return 1;
+ }
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4865,7 +4872,6 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans)
{
@@ -4910,10 +4916,8 @@ again:
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
- u64 blockptr;
u64 gen;
- blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
@@ -5329,19 +5333,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
advance_right = ADVANCE;
} else {
+ enum btrfs_compare_tree_result cmp;
+
WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
ret = tree_compare_item(left_root, left_path,
right_path, tmp_buf);
- if (ret) {
- WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
- ret = changed_cb(left_root, right_root,
- left_path, right_path,
- &left_key,
- BTRFS_COMPARE_TREE_CHANGED,
- ctx);
- if (ret < 0)
- goto out;
- }
+ if (ret)
+ cmp = BTRFS_COMPARE_TREE_CHANGED;
+ else
+ cmp = BTRFS_COMPARE_TREE_SAME;
+ ret = changed_cb(left_root, right_root,
+ left_path, right_path,
+ &left_key, cmp, ctx);
+ if (ret < 0)
+ goto out;
advance_left = ADVANCE;
advance_right = ADVANCE;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e795bf1..aea4433 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -23,6 +23,7 @@
#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rwsem.h>
+#include <linux/semaphore.h>
#include <linux/completion.h>
#include <linux/backing-dev.h>
#include <linux/wait.h>
@@ -46,6 +47,12 @@ extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
struct btrfs_ordered_sum;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+#define STATIC noinline
+#else
+#define STATIC static noinline
+#endif
+
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
#define BTRFS_MAX_MIRRORS 3
@@ -91,6 +98,9 @@ struct btrfs_ordered_sum;
/* holds quota configuration and tracking */
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+/* for storing items that use the BTRFS_UUID_KEY* types */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -142,7 +152,7 @@ struct btrfs_ordered_sum;
#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
-#define BTRFS_DEV_REPLACE_DEVID 0
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
/*
* the max metadata block size. This limit is somewhat artificial,
@@ -478,9 +488,10 @@ struct btrfs_super_block {
char label[BTRFS_LABEL_SIZE];
__le64 cache_generation;
+ __le64 uuid_tree_generation;
/* future expansion */
- __le64 reserved[31];
+ __le64 reserved[30];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
} __attribute__ ((__packed__));
@@ -1113,15 +1124,6 @@ struct btrfs_space_info {
*/
struct percpu_counter total_bytes_pinned;
- /*
- * we bump reservation progress every time we decrement
- * bytes_reserved. This way people waiting for reservations
- * know something good has happened and they can check
- * for progress. The number here isn't to be trusted, it
- * just shows reclaim activity
- */
- unsigned long reservation_progress;
-
unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -1188,6 +1190,7 @@ enum btrfs_caching_type {
BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FAST = 2,
BTRFS_CACHE_FINISHED = 3,
+ BTRFS_CACHE_ERROR = 4,
};
enum btrfs_disk_cache_state {
@@ -1302,6 +1305,7 @@ struct btrfs_fs_info {
struct btrfs_root *fs_root;
struct btrfs_root *csum_root;
struct btrfs_root *quota_root;
+ struct btrfs_root *uuid_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
@@ -1350,6 +1354,7 @@ struct btrfs_fs_info {
u64 last_trans_log_full_commit;
unsigned long mount_opt;
unsigned long compress_type:4;
+ int commit_interval;
/*
* It is a suggestive number, the read side is safe even it gets a
* wrong number because we will write out the data into a regular
@@ -1411,6 +1416,13 @@ struct btrfs_fs_info {
* before jumping into the main commit.
*/
struct mutex ordered_operations_mutex;
+
+ /*
+ * Same as ordered_operations_mutex except this is for ordered extents
+ * and not the operations.
+ */
+ struct mutex ordered_extent_flush_mutex;
+
struct rw_semaphore extent_commit_sem;
struct rw_semaphore cleanup_work_sem;
@@ -1574,7 +1586,6 @@ struct btrfs_fs_info {
atomic_t scrubs_paused;
atomic_t scrub_cancel_req;
wait_queue_head_t scrub_pause_wait;
- struct rw_semaphore scrub_super_lock;
int scrub_workers_refcnt;
struct btrfs_workers scrub_workers;
struct btrfs_workers scrub_wr_completion_workers;
@@ -1641,6 +1652,9 @@ struct btrfs_fs_info {
struct btrfs_dev_replace dev_replace;
atomic_t mutually_exclusive_operation_running;
+
+ struct semaphore uuid_tree_rescan_sem;
+ unsigned int update_uuid_tree_gen:1;
};
/*
@@ -1715,7 +1729,9 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
-
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ int dummy_root;
+#endif
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
@@ -1934,6 +1950,19 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_DEV_REPLACE_KEY 250
/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#if BTRFS_UUID_SIZE != 16
+#error "UUID items require BTRFS_UUID_SIZE == 16!"
+#endif
+#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
+ * received subvols */
+
+/*
* string items are for debugging. They just store a short string of
* data in the FS
*/
@@ -1967,6 +1996,9 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
+#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
+
+#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2130,14 +2162,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
generation, 64);
-static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d)
{
- return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+ return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid);
}
-static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d)
{
- return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+ return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid);
}
BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
@@ -2240,6 +2272,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
+ sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
+ transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
+ nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
+ block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
static inline struct btrfs_timespec *
btrfs_inode_atime(struct btrfs_inode_item *inode_item)
@@ -2267,6 +2316,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
/* struct btrfs_dev_extent */
BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
@@ -2277,10 +2328,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
chunk_offset, 64);
BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
-static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
{
unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
- return (u8 *)((unsigned long)dev + ptr);
+ return (unsigned long)dev + ptr;
}
BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
@@ -2348,6 +2399,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
/* struct btrfs_node */
BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
+ blockptr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
+ generation, 64);
static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
{
@@ -2404,6 +2459,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb,
/* struct btrfs_item */
BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32);
static inline unsigned long btrfs_item_nr_offset(int nr)
{
@@ -2411,8 +2468,7 @@ static inline unsigned long btrfs_item_nr_offset(int nr)
sizeof(struct btrfs_item) * nr;
}
-static inline struct btrfs_item *btrfs_item_nr(struct extent_buffer *eb,
- int nr)
+static inline struct btrfs_item *btrfs_item_nr(int nr)
{
return (struct btrfs_item *)btrfs_item_nr_offset(nr);
}
@@ -2425,30 +2481,30 @@ static inline u32 btrfs_item_end(struct extent_buffer *eb,
static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_end(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_end(eb, btrfs_item_nr(nr));
}
static inline u32 btrfs_item_offset_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_offset(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_offset(eb, btrfs_item_nr(nr));
}
static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
{
- return btrfs_item_size(eb, btrfs_item_nr(eb, nr));
+ return btrfs_item_size(eb, btrfs_item_nr(nr));
}
static inline void btrfs_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ struct btrfs_item *item = btrfs_item_nr(nr);
read_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
static inline void btrfs_set_item_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
{
- struct btrfs_item *item = btrfs_item_nr(eb, nr);
+ struct btrfs_item *item = btrfs_item_nr(nr);
write_eb_member(eb, item, struct btrfs_item, key, disk_key);
}
@@ -2466,6 +2522,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item,
+ data_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
+ name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
+ transid, 64);
static inline void btrfs_dir_item_key(struct extent_buffer *eb,
struct btrfs_dir_item *item,
@@ -2568,6 +2631,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
+ nritems, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
{
@@ -2603,16 +2672,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
btrfs_set_header_flags(eb, flags);
}
-static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_fsid(void)
{
- unsigned long ptr = offsetof(struct btrfs_header, fsid);
- return (u8 *)ptr;
+ return offsetof(struct btrfs_header, fsid);
}
-static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
{
- unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
- return (u8 *)ptr;
+ return offsetof(struct btrfs_header, chunk_tree_uuid);
}
static inline int btrfs_is_leaf(struct extent_buffer *eb)
@@ -2830,6 +2897,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
csum_type, 16);
BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+ uuid_tree_generation, 64);
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
@@ -2847,6 +2917,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
/* struct btrfs_file_extent_item */
BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
+ struct btrfs_file_extent_item, disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
+ struct btrfs_file_extent_item, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
+ struct btrfs_file_extent_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
+ struct btrfs_file_extent_item, num_bytes, 64);
static inline unsigned long
btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -3054,7 +3132,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 3 * num_items;
+ 2 * num_items;
}
/*
@@ -3107,11 +3185,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner, u64 offset,
struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, int is_data);
+int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+ u64 min_alloc_size, u64 empty_size, u64 hint_byte,
+ struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3175,7 +3251,7 @@ void btrfs_orphan_release_metadata(struct inode *inode);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems,
- u64 *qgroup_reserved);
+ u64 *qgroup_reserved, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
u64 qgroup_reserved);
@@ -3238,13 +3314,13 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level,
u64 min_trans);
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans);
enum btrfs_compare_tree_result {
BTRFS_COMPARE_TREE_NEW,
BTRFS_COMPARE_TREE_DELETED,
BTRFS_COMPARE_TREE_CHANGED,
+ BTRFS_COMPARE_TREE_SAME,
};
typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
struct btrfs_root *right_root,
@@ -3380,6 +3456,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
kfree(fs_info->dev_root);
kfree(fs_info->csum_root);
kfree(fs_info->quota_root);
+ kfree(fs_info->uuid_root);
kfree(fs_info->super_copy);
kfree(fs_info->super_for_commit);
kfree(fs_info);
@@ -3414,8 +3491,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_root_item *item);
-void btrfs_read_root_item(struct extent_buffer *eb, int slot,
- struct btrfs_root_item *item);
int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
struct btrfs_path *path, struct btrfs_root_item *root_item,
struct btrfs_key *root_key);
@@ -3426,6 +3501,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+/* uuid-tree.c */
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
+ struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ u64 subid);
+int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
+ struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ u64 subid);
+int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
+ int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
+ u64));
+
/* dir-item.c */
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
const char *name, int name_len);
@@ -3509,12 +3595,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
struct btrfs_inode_extref **extref_ret);
/* file-item.c */
+struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst);
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 logical_offset);
+ struct btrfs_dio_private *dip, struct bio *bio,
+ u64 logical_offset);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
@@ -3552,8 +3640,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create);
-noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 offset, u64 *len,
+noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
@@ -3593,8 +3680,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
- int delay_iput);
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -3643,11 +3729,15 @@ extern const struct dentry_operations btrfs_dentry_operations;
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
void btrfs_update_iflags(struct inode *inode);
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
+int btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
+void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+ struct btrfs_ioctl_balance_args *bargs);
+
/* file.c */
int btrfs_auto_defrag_init(void);
@@ -3720,6 +3810,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
#define btrfs_debug(fs_info, fmt, args...) \
btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+#ifdef CONFIG_BTRFS_ASSERT
+
+static inline void assfail(char *expr, char *file, int line)
+{
+ printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
+ expr, file, line);
+ BUG();
+}
+
+#define ASSERT(expr) \
+ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+#else
+#define ASSERT(expr) ((void)0)
+#endif
+
+#define btrfs_assert()
__printf(5, 6)
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
@@ -3828,9 +3934,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow);
+int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *cow);
void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending,
u64 *bytes_to_reserve);
@@ -3842,9 +3948,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
u64 end, struct btrfs_scrub_progress *progress,
int readonly, int is_dev_replace);
void btrfs_scrub_pause(struct btrfs_root *root);
-void btrfs_scrub_pause_super(struct btrfs_root *root);
void btrfs_scrub_continue(struct btrfs_root *root);
-void btrfs_scrub_continue_super(struct btrfs_root *root);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
@@ -3926,5 +4030,9 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
return signal_pending(current);
}
+/* Sanity test specific functions */
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_destroy_inode(struct inode *inode);
+#endif
#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 3755109..8d292fb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -21,6 +21,7 @@
#include "delayed-inode.h"
#include "disk-io.h"
#include "transaction.h"
+#include "ctree.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
@@ -107,8 +108,8 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
return node;
}
btrfs_inode->delayed_node = node;
- atomic_inc(&node->refs); /* can be accessed */
- atomic_inc(&node->refs); /* cached in the inode */
+ /* can be accessed and cached in the inode */
+ atomic_add(2, &node->refs);
spin_unlock(&root->inode_lock);
return node;
}
@@ -137,8 +138,8 @@ again:
return ERR_PTR(-ENOMEM);
btrfs_init_delayed_node(node, root, ino);
- atomic_inc(&node->refs); /* cached in the btrfs inode */
- atomic_inc(&node->refs); /* can be accessed */
+ /* cached in the btrfs inode and can be accessed */
+ atomic_add(2, &node->refs);
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
if (ret) {
@@ -648,14 +649,13 @@ static int btrfs_delayed_inode_reserve_metadata(
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!ret)
+ if (!WARN_ON(ret))
goto out;
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
*/
- WARN_ON(1);
ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
dst_rsv, num_bytes);
goto out;
@@ -770,13 +770,13 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
*/
btrfs_set_path_blocking(path);
- keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
+ keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
if (!keys) {
ret = -ENOMEM;
goto out;
}
- data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
+ data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
if (!data_size) {
ret = -ENOMEM;
goto error;
@@ -1173,8 +1173,10 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
mutex_unlock(&delayed_node->mutex);
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ btrfs_release_delayed_node(delayed_node);
return -ENOMEM;
+ }
path->leave_spinning = 1;
block_rsv = trans->block_rsv;
@@ -1453,10 +1455,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
dir_item = (struct btrfs_dir_item *)delayed_item->data;
dir_item->location = *disk_key;
- dir_item->transid = cpu_to_le64(trans->transid);
- dir_item->data_len = 0;
- dir_item->name_len = cpu_to_le16(name_len);
- dir_item->type = type;
+ btrfs_set_stack_dir_transid(dir_item, trans->transid);
+ btrfs_set_stack_dir_data_len(dir_item, 0);
+ btrfs_set_stack_dir_name_len(dir_item, name_len);
+ btrfs_set_stack_dir_type(dir_item, type);
memcpy((char *)(dir_item + 1), name, name_len);
ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
@@ -1470,13 +1472,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex);
ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
if (unlikely(ret)) {
- printk(KERN_ERR "err add delayed dir index item(name: %s) into "
- "the insertion tree of the delayed node"
+ printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
+ "into the insertion tree of the delayed node"
"(root id: %llu, inode id: %llu, errno: %d)\n",
- name,
- (unsigned long long)delayed_node->root->objectid,
- (unsigned long long)delayed_node->inode_id,
- ret);
+ name_len, name, delayed_node->root->objectid,
+ delayed_node->inode_id, ret);
BUG();
}
mutex_unlock(&delayed_node->mutex);
@@ -1547,9 +1547,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
printk(KERN_ERR "err add delayed dir index item(index: %llu) "
"into the deletion tree of the delayed node"
"(root id: %llu, inode id: %llu, errno: %d)\n",
- (unsigned long long)index,
- (unsigned long long)node->root->objectid,
- (unsigned long long)node->inode_id,
+ index, node->root->objectid, node->inode_id,
ret);
BUG();
}
@@ -1699,7 +1697,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
di = (struct btrfs_dir_item *)curr->data;
name = (char *)(di + 1);
- name_len = le16_to_cpu(di->name_len);
+ name_len = btrfs_stack_dir_name_len(di);
d_type = btrfs_filetype_table[di->type];
btrfs_disk_key_to_cpu(&location, &di->location);
@@ -1716,27 +1714,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
return 0;
}
-BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
- generation, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
- sequence, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
- transid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
- nbytes, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
- block_group, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
-
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
-
static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *inode_item,
struct inode *inode)
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index c219463..e4d467b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
-static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_node *ref)
{
@@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
INIT_LIST_HEAD(&head_ref->cluster);
mutex_init(&head_ref->mutex);
- trace_btrfs_delayed_ref_head(ref, head_ref, action);
+ trace_add_delayed_ref_head(ref, head_ref, action);
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
@@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->type = BTRFS_TREE_BLOCK_REF_KEY;
full_ref->level = level;
- trace_btrfs_delayed_tree_ref(ref, full_ref, action);
+ trace_add_delayed_tree_ref(ref, full_ref, action);
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
@@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
full_ref->objectid = owner;
full_ref->offset = offset;
- trace_btrfs_delayed_data_ref(ref, full_ref, action);
+ trace_add_delayed_data_ref(ref, full_ref, action);
existing = tree_insert(&delayed_refs->root, &ref->rb_node);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 4253ad5..342f9fd 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -26,7 +26,6 @@
#include <linux/kthread.h>
#include <linux/math64.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -38,7 +37,6 @@
#include "rcu-string.h"
#include "dev-replace.h"
-static u64 btrfs_get_seconds_since_1970(void);
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
int scrub_ret);
static void btrfs_dev_replace_update_device_in_mapping_tree(
@@ -148,13 +146,13 @@ no_valid_dev_replace_entry_found:
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
- (unsigned long long)src_devid);
+ src_devid);
}
if (!dev_replace->tgtdev &&
!btrfs_test_opt(dev_root, DEGRADED)) {
ret = -EIO;
pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
- (unsigned long long)BTRFS_DEV_REPLACE_DEVID);
+ BTRFS_DEV_REPLACE_DEVID);
}
if (dev_replace->tgtdev) {
if (dev_replace->srcdev) {
@@ -296,13 +294,6 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info)
dev_replace->cursor_left_last_write_of_item;
}
-static u64 btrfs_get_seconds_since_1970(void)
-{
- struct timespec t = CURRENT_TIME_SEC;
-
- return t.tv_sec;
-}
-
int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -390,7 +381,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
* go to the tgtdev as well (refer to btrfs_map_block()).
*/
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED;
- dev_replace->time_started = btrfs_get_seconds_since_1970();
+ dev_replace->time_started = get_seconds();
dev_replace->cursor_left = 0;
dev_replace->committed_cursor_left = 0;
dev_replace->cursor_left_last_write_of_item = 0;
@@ -400,7 +391,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
btrfs_dev_replace_unlock(dev_replace);
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
/* force writing the updated state information to disk */
trans = btrfs_start_transaction(root, 0);
@@ -470,12 +461,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
- ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0);
+ ret = btrfs_start_delalloc_roots(root->fs_info, 0);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;
}
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
@@ -493,7 +484,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
dev_replace->tgtdev = NULL;
dev_replace->srcdev = NULL;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
if (scrub_ret) {
@@ -535,10 +526,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
btrfs_rm_dev_replace_srcdev(fs_info, src_device);
- if (src_device->bdev) {
- /* zero out the old super */
- btrfs_scratch_superblock(src_device);
- }
+
/*
* this is again a consistent state where no dev_replace procedure
* is running, the target device is part of the filesystem, the
@@ -653,6 +641,9 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
u64 result;
int ret;
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
btrfs_dev_replace_lock(dev_replace);
switch (dev_replace->replace_state) {
@@ -671,7 +662,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
break;
}
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(dev_replace);
btrfs_scrub_cancel(fs_info);
@@ -706,7 +697,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
dev_replace->replace_state =
BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED;
- dev_replace->time_stopped = btrfs_get_seconds_since_1970();
+ dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
pr_info("btrfs: suspending dev_replace for unmount\n");
break;
@@ -747,7 +738,7 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl");
- return PTR_RET(task);
+ return PTR_ERR_OR_ZERO(task);
}
static int btrfs_dev_replace_kthread(void *data)
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 79e594e..c031ea3 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -58,7 +58,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
return ERR_PTR(ret);
WARN_ON(ret > 0);
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
ptr = btrfs_item_ptr(leaf, path->slots[0], char);
BUG_ON(data_size > btrfs_item_size(leaf, item));
ptr += btrfs_item_size(leaf, item) - data_size;
@@ -474,8 +474,10 @@ int verify_dir_item(struct btrfs_root *root,
}
/* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
- if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
- printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
+ if ((btrfs_dir_data_len(leaf, dir_item) +
+ btrfs_dir_name_len(leaf, dir_item)) > BTRFS_MAX_XATTR_SIZE(root)) {
+ printk(KERN_CRIT "btrfs: invalid dir item name + data len: %u + %u\n",
+ (unsigned)btrfs_dir_name_len(leaf, dir_item),
(unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6b092a1..4c4ed0b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -31,8 +31,8 @@
#include <linux/migrate.h>
#include <linux/ratelimit.h>
#include <linux/uuid.h>
+#include <linux/semaphore.h>
#include <asm/unaligned.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -63,7 +63,6 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
-static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
@@ -156,6 +155,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
+ { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
{ .id = 0, .name_stem = "tree" },
};
@@ -302,9 +302,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
"failed on %llu wanted %X found %X "
"level %d\n",
- root->fs_info->sb->s_id,
- (unsigned long long)buf->start, val, found,
- btrfs_header_level(buf));
+ root->fs_info->sb->s_id, buf->start,
+ val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
return 1;
@@ -345,9 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
}
printk_ratelimited("parent transid verify failed on %llu wanted %llu "
"found %llu\n",
- (unsigned long long)eb->start,
- (unsigned long long)parent_transid,
- (unsigned long long)btrfs_header_generation(eb));
+ eb->start, parent_transid, btrfs_header_generation(eb));
ret = 1;
clear_extent_buffer_uptodate(eb);
out:
@@ -478,14 +475,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
if (page != eb->pages[0])
return 0;
found_start = btrfs_header_bytenr(eb);
- if (found_start != start) {
- WARN_ON(1);
+ if (WARN_ON(found_start != start || !PageUptodate(page)))
return 0;
- }
- if (!PageUptodate(page)) {
- WARN_ON(1);
- return 0;
- }
csum_tree_block(root, eb, 0);
return 0;
}
@@ -497,8 +488,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;
- read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
- BTRFS_FSID_SIZE);
+ read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE);
while (fs_devices) {
if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
ret = 0;
@@ -512,8 +502,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
#define CORRUPT(reason, eb, root, slot) \
printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
"root=%llu, slot=%d\n", reason, \
- (unsigned long long)btrfs_header_bytenr(eb), \
- (unsigned long long)root->objectid, slot)
+ btrfs_header_bytenr(eb), root->objectid, slot)
static noinline int check_leaf(struct btrfs_root *root,
struct extent_buffer *leaf)
@@ -576,8 +565,9 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror)
+static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ u64 phy_offset, struct page *page,
+ u64 start, u64 end, int mirror)
{
struct extent_io_tree *tree;
u64 found_start;
@@ -612,14 +602,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "btrfs bad tree block start "
"%llu %llu\n",
- (unsigned long long)found_start,
- (unsigned long long)eb->start);
+ found_start, eb->start);
ret = -EIO;
goto err;
}
if (check_tree_block_fsid(root, eb)) {
printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
- (unsigned long long)eb->start);
+ eb->start);
ret = -EIO;
goto err;
}
@@ -1108,8 +1097,7 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
{
struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
- eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
- bytenr, blocksize);
+ eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
return eb;
}
@@ -1148,6 +1136,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL;
ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
+ if (ret) {
+ free_extent_buffer(buf);
+ return NULL;
+ }
return buf;
}
@@ -1228,14 +1220,18 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->refs, 1);
root->log_transid = 0;
root->last_log_commit = 0;
- extent_io_tree_init(&root->dirty_log_pages,
- fs_info->btree_inode->i_mapping);
+ if (fs_info)
+ extent_io_tree_init(&root->dirty_log_pages,
+ fs_info->btree_inode->i_mapping);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
- root->defrag_trans_start = fs_info->generation;
+ if (fs_info)
+ root->defrag_trans_start = fs_info->generation;
+ else
+ root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
@@ -1252,6 +1248,22 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+/* Should only be used by the testing infrastructure */
+struct btrfs_root *btrfs_alloc_dummy_root(void)
+{
+ struct btrfs_root *root;
+
+ root = btrfs_alloc_root(NULL);
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+ __setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
+ root->dummy_root = 1;
+
+ return root;
+}
+#endif
+
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
@@ -1291,11 +1303,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
- write_extent_buffer(leaf, fs_info->fsid,
- (unsigned long)btrfs_header_fsid(leaf),
+ write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+ btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
@@ -1379,8 +1390,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->node = leaf;
write_extent_buffer(root->node, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(root->node),
- BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
return root;
@@ -1413,11 +1423,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
log_root->root_key.offset = root->root_key.objectid;
inode_item = &log_root->root_item.inode;
- inode_item->generation = cpu_to_le64(1);
- inode_item->size = cpu_to_le64(3);
- inode_item->nlink = cpu_to_le32(1);
- inode_item->nbytes = cpu_to_le64(root->leafsize);
- inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+ btrfs_set_stack_inode_generation(inode_item, 1);
+ btrfs_set_stack_inode_size(inode_item, 3);
+ btrfs_set_stack_inode_nlink(inode_item, 1);
+ btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+ btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
btrfs_set_root_node(&log_root->root_item, log_root->node);
@@ -1428,8 +1438,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return 0;
}
-struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
- struct btrfs_key *key)
+static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+ struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
@@ -1529,8 +1539,8 @@ fail:
return ret;
}
-struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
- u64 root_id)
+static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+ u64 root_id)
{
struct btrfs_root *root;
@@ -1562,8 +1572,9 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
return ret;
}
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
- struct btrfs_key *location)
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location,
+ bool check_ref)
{
struct btrfs_root *root;
int ret;
@@ -1581,16 +1592,22 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
return fs_info->quota_root ? fs_info->quota_root :
ERR_PTR(-ENOENT);
+ if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
+ return fs_info->uuid_root ? fs_info->uuid_root :
+ ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, location->objectid);
- if (root)
+ if (root) {
+ if (check_ref && btrfs_root_refs(&root->root_item) == 0)
+ return ERR_PTR(-ENOENT);
return root;
+ }
root = btrfs_read_fs_root(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
- if (btrfs_root_refs(&root->root_item) == 0) {
+ if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
ret = -ENOENT;
goto fail;
}
@@ -1737,7 +1754,7 @@ static int transaction_kthread(void *arg)
do {
cannot_commit = false;
- delay = HZ * 30;
+ delay = HZ * root->fs_info->commit_interval;
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
@@ -1749,7 +1766,8 @@ static int transaction_kthread(void *arg)
now = get_seconds();
if (cur->state < TRANS_STATE_BLOCKED &&
- (now < cur->start_time || now - cur->start_time < 30)) {
+ (now < cur->start_time ||
+ now - cur->start_time < root->fs_info->commit_interval)) {
spin_unlock(&root->fs_info->trans_lock);
delay = HZ * 5;
goto sleep;
@@ -1773,6 +1791,9 @@ sleep:
wake_up_process(root->fs_info->cleaner_kthread);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
+ if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
+ &root->fs_info->fs_state)))
+ btrfs_cleanup_transaction(root);
if (!try_to_freeze()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
@@ -2006,44 +2027,28 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
}
+static void free_root_extent_buffers(struct btrfs_root *root)
+{
+ if (root) {
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ root->node = NULL;
+ root->commit_root = NULL;
+ }
+}
+
/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
- free_extent_buffer(info->tree_root->node);
- free_extent_buffer(info->tree_root->commit_root);
- info->tree_root->node = NULL;
- info->tree_root->commit_root = NULL;
-
- if (info->dev_root) {
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- }
- if (info->extent_root) {
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- }
- if (info->csum_root) {
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
- }
- if (info->quota_root) {
- free_extent_buffer(info->quota_root->node);
- free_extent_buffer(info->quota_root->commit_root);
- info->quota_root->node = NULL;
- info->quota_root->commit_root = NULL;
- }
- if (chunk_root) {
- free_extent_buffer(info->chunk_root->node);
- free_extent_buffer(info->chunk_root->commit_root);
- info->chunk_root->node = NULL;
- info->chunk_root->commit_root = NULL;
- }
+ free_root_extent_buffers(info->tree_root);
+
+ free_root_extent_buffers(info->dev_root);
+ free_root_extent_buffers(info->extent_root);
+ free_root_extent_buffers(info->csum_root);
+ free_root_extent_buffers(info->quota_root);
+ free_root_extent_buffers(info->uuid_root);
+ if (chunk_root)
+ free_root_extent_buffers(info->chunk_root);
}
static void del_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2098,11 +2103,14 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *quota_root;
+ struct btrfs_root *uuid_root;
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
int num_backups_tried = 0;
int backup_index = 0;
+ bool create_uuid_tree;
+ bool check_uuid_tree;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2189,6 +2197,7 @@ int open_ctree(struct super_block *sb,
fs_info->defrag_inodes = RB_ROOT;
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
+ fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
@@ -2213,7 +2222,6 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->scrubs_paused, 0);
atomic_set(&fs_info->scrub_cancel_req, 0);
init_waitqueue_head(&fs_info->scrub_pause_wait);
- init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
fs_info->check_integrity_print_mask = 0;
@@ -2255,7 +2263,7 @@ int open_ctree(struct super_block *sb,
sizeof(struct btrfs_key));
set_bit(BTRFS_INODE_DUMMY,
&BTRFS_I(fs_info->btree_inode)->runtime_flags);
- insert_inode_hash(fs_info->btree_inode);
+ btrfs_insert_inode_hash(fs_info->btree_inode);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
@@ -2270,6 +2278,7 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->ordered_operations_mutex);
+ mutex_init(&fs_info->ordered_extent_flush_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2278,6 +2287,7 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->extent_commit_sem);
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
+ sema_init(&fs_info->uuid_tree_rescan_sem, 1);
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
@@ -2383,7 +2393,7 @@ int open_ctree(struct super_block *sb,
if (features) {
printk(KERN_ERR "BTRFS: couldn't mount because of "
"unsupported optional features (%Lx).\n",
- (unsigned long long)features);
+ features);
err = -EINVAL;
goto fail_alloc;
}
@@ -2453,7 +2463,7 @@ int open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY) && features) {
printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
"unsupported option features (%Lx).\n",
- (unsigned long long)features);
+ features);
err = -EINVAL;
goto fail_alloc;
}
@@ -2466,20 +2476,17 @@ int open_ctree(struct super_block *sb,
&fs_info->generic_worker);
btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
+ fs_info->thread_pool_size, NULL);
btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
+ fs_info->thread_pool_size, NULL);
btrfs_init_workers(&fs_info->submit_workers, "submit",
min_t(u64, fs_devices->num_devices,
- fs_info->thread_pool_size),
- &fs_info->generic_worker);
+ fs_info->thread_pool_size), NULL);
btrfs_init_workers(&fs_info->caching_workers, "cache",
- 2, &fs_info->generic_worker);
+ fs_info->thread_pool_size, NULL);
/* a higher idle thresh on the submit workers makes it much more
* likely that bios will be send down in a sane order to the
@@ -2575,7 +2582,7 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
+ if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@@ -2615,8 +2622,7 @@ int open_ctree(struct super_block *sb,
chunk_root->commit_root = btrfs_root_node(chunk_root);
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
- BTRFS_UUID_SIZE);
+ btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE);
ret = btrfs_read_chunk_tree(chunk_root);
if (ret) {
@@ -2655,6 +2661,7 @@ retry_root_backup:
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
+ btrfs_set_root_refs(&tree_root->root_item, 1);
location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -2696,6 +2703,22 @@ retry_root_backup:
fs_info->quota_root = quota_root;
}
+ location.objectid = BTRFS_UUID_TREE_OBJECTID;
+ uuid_root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(uuid_root)) {
+ ret = PTR_ERR(uuid_root);
+ if (ret != -ENOENT)
+ goto recovery_tree_root;
+ create_uuid_tree = true;
+ check_uuid_tree = false;
+ } else {
+ uuid_root->track_dirty = 1;
+ fs_info->uuid_root = uuid_root;
+ create_uuid_tree = false;
+ check_uuid_tree =
+ generation != btrfs_super_uuid_tree_generation(disk_super);
+ }
+
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
@@ -2882,6 +2905,29 @@ retry_root_backup:
btrfs_qgroup_rescan_resume(fs_info);
+ if (create_uuid_tree) {
+ pr_info("btrfs: creating UUID tree\n");
+ ret = btrfs_create_uuid_tree(fs_info);
+ if (ret) {
+ pr_warn("btrfs: failed to create the UUID tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ } else if (check_uuid_tree ||
+ btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
+ pr_info("btrfs: checking UUID tree\n");
+ ret = btrfs_check_uuid_tree(fs_info);
+ if (ret) {
+ pr_warn("btrfs: failed to check the UUID tree %d\n",
+ ret);
+ close_ctree(tree_root);
+ return ret;
+ }
+ } else {
+ fs_info->update_uuid_tree_gen = 1;
+ }
+
return 0;
fail_qgroup:
@@ -2983,15 +3029,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
*/
for (i = 0; i < 1; i++) {
bytenr = btrfs_sb_offset(i);
- if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
+ if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+ i_size_read(bdev->bd_inode))
break;
- bh = __bread(bdev, bytenr / 4096, 4096);
+ bh = __bread(bdev, bytenr / 4096,
+ BTRFS_SUPER_INFO_SIZE);
if (!bh)
continue;
super = (struct btrfs_super_block *)bh->b_data;
if (btrfs_super_bytenr(super) != bytenr ||
- super->magic != cpu_to_le64(BTRFS_MAGIC)) {
+ btrfs_super_magic(super) != BTRFS_MAGIC) {
brelse(bh);
continue;
}
@@ -3311,7 +3359,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int total_errors = 0;
u64 flags;
- max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
do_barriers = !btrfs_test_opt(root, NOBARRIER);
backup_super_roots(root->fs_info);
@@ -3320,6 +3367,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
head = &root->fs_info->fs_devices->devices;
+ max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
if (do_barriers) {
ret = barrier_all_devices(root->fs_info);
@@ -3361,9 +3409,12 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
if (total_errors > max_errors) {
printk(KERN_ERR "btrfs: %d errors while writing supers\n",
total_errors);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- /* This shouldn't happen. FUA is masked off if unsupported */
- BUG();
+ /* FUA is masked off if unsupported and can't be the reason */
+ btrfs_error(root->fs_info, -EIO,
+ "%d errors while writing supers", total_errors);
+ return -EIO;
}
total_errors = 0;
@@ -3389,10 +3440,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors)
{
- int ret;
-
- ret = write_all_supers(root, max_mirrors);
- return ret;
+ return write_all_supers(root, max_mirrors);
}
/* Drop a fs root from the radix tree and free it. */
@@ -3421,6 +3469,8 @@ static void free_fs_root(struct btrfs_root *root)
{
iput(root->cache_inode);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+ btrfs_free_block_rsv(root, root->orphan_block_rsv);
+ root->orphan_block_rsv = NULL;
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
free_extent_buffer(root->node);
@@ -3510,6 +3560,11 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
+ /* wait for the uuid_scan task to finish */
+ down(&fs_info->uuid_tree_rescan_sem);
+ /* avoid complains from lockdep et al., set sem back to initial state */
+ up(&fs_info->uuid_tree_rescan_sem);
+
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(fs_info);
@@ -3548,12 +3603,12 @@ int close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
+ del_fs_roots(fs_info);
+
btrfs_free_block_groups(fs_info);
btrfs_stop_all_workers(fs_info);
- del_fs_roots(fs_info);
-
free_root_pointers(fs_info, 1);
iput(fs_info->btree_inode);
@@ -3573,6 +3628,9 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_stripe_hash_table(fs_info);
+ btrfs_free_block_rsv(root, root->orphan_block_rsv);
+ root->orphan_block_rsv = NULL;
+
return 0;
}
@@ -3600,17 +3658,25 @@ int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
- struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+ struct btrfs_root *root;
u64 transid = btrfs_header_generation(buf);
int was_dirty;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ /*
+ * This is a fast path so only do this check if we have sanity tests
+ * enabled. Normal people shouldn't be marking dummy buffers as dirty
+ * outside of the sanity tests.
+ */
+ if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &buf->bflags)))
+ return;
+#endif
+ root = BTRFS_I(buf->pages[0]->mapping->host)->root;
btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
"found %llu running %llu\n",
- (unsigned long long)buf->start,
- (unsigned long long)transid,
- (unsigned long long)root->fs_info->generation);
+ buf->start, transid, root->fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty)
__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
@@ -3735,7 +3801,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
- list_del_init(&root->ordered_root);
+ list_move_tail(&root->ordered_root,
+ &fs_info->ordered_roots);
btrfs_destroy_ordered_extents(root);
@@ -3744,8 +3811,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->ordered_root_lock);
}
-int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
- struct btrfs_root *root)
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+ struct btrfs_root *root)
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -3813,24 +3880,6 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
-static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
-{
- struct btrfs_pending_snapshot *snapshot;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- list_splice_init(&t->pending_snapshots, &splice);
-
- while (!list_empty(&splice)) {
- snapshot = list_entry(splice.next,
- struct btrfs_pending_snapshot,
- list);
- snapshot->error = -ECANCELED;
- list_del_init(&snapshot->list);
- }
-}
-
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
@@ -3960,15 +4009,13 @@ again:
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
+ btrfs_destroy_ordered_operations(cur_trans, root);
+
btrfs_destroy_delayed_refs(cur_trans, root);
- btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
- cur_trans->dirty_pages.dirty_bytes);
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&root->fs_info->transaction_blocked_wait);
- btrfs_evict_pending_snapshots(cur_trans);
-
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&root->fs_info->transaction_wait);
@@ -3992,63 +4039,51 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
static int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
- LIST_HEAD(list);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
- list_splice_init(&root->fs_info->trans_list, &list);
- root->fs_info->running_transaction = NULL;
- spin_unlock(&root->fs_info->trans_lock);
-
- while (!list_empty(&list)) {
- t = list_entry(list.next, struct btrfs_transaction, list);
-
- btrfs_destroy_ordered_operations(t, root);
-
- btrfs_destroy_all_ordered_extents(root->fs_info);
-
- btrfs_destroy_delayed_refs(t, root);
-
- /*
- * FIXME: cleanup wait for commit
- * We needn't acquire the lock here, because we are during
- * the umount, there is no other task which will change it.
- */
- t->state = TRANS_STATE_COMMIT_START;
- smp_mb();
- if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
- wake_up(&root->fs_info->transaction_blocked_wait);
-
- btrfs_evict_pending_snapshots(t);
-
- t->state = TRANS_STATE_UNBLOCKED;
- smp_mb();
- if (waitqueue_active(&root->fs_info->transaction_wait))
- wake_up(&root->fs_info->transaction_wait);
-
- btrfs_destroy_delayed_inodes(root);
- btrfs_assert_delayed_root_empty(root);
-
- btrfs_destroy_all_delalloc_inodes(root->fs_info);
-
- btrfs_destroy_marked_extents(root, &t->dirty_pages,
- EXTENT_DIRTY);
-
- btrfs_destroy_pinned_extent(root,
- root->fs_info->pinned_extents);
-
- t->state = TRANS_STATE_COMPLETED;
- smp_mb();
- if (waitqueue_active(&t->commit_wait))
- wake_up(&t->commit_wait);
+ while (!list_empty(&root->fs_info->trans_list)) {
+ t = list_first_entry(&root->fs_info->trans_list,
+ struct btrfs_transaction, list);
+ if (t->state >= TRANS_STATE_COMMIT_START) {
+ atomic_inc(&t->use_count);
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_wait_for_commit(root, t->transid);
+ btrfs_put_transaction(t);
+ spin_lock(&root->fs_info->trans_lock);
+ continue;
+ }
+ if (t == root->fs_info->running_transaction) {
+ t->state = TRANS_STATE_COMMIT_DOING;
+ spin_unlock(&root->fs_info->trans_lock);
+ /*
+ * We wait for 0 num_writers since we don't hold a trans
+ * handle open currently for this transaction.
+ */
+ wait_event(t->writer_wait,
+ atomic_read(&t->num_writers) == 0);
+ } else {
+ spin_unlock(&root->fs_info->trans_lock);
+ }
+ btrfs_cleanup_one_transaction(t, root);
- atomic_set(&t->use_count, 0);
+ spin_lock(&root->fs_info->trans_lock);
+ if (t == root->fs_info->running_transaction)
+ root->fs_info->running_transaction = NULL;
list_del_init(&t->list);
- memset(t, 0, sizeof(*t));
- kmem_cache_free(btrfs_transaction_cachep, t);
- }
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_put_transaction(t);
+ trace_btrfs_transaction_commit(root);
+ spin_lock(&root->fs_info->trans_lock);
+ }
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_destroy_all_ordered_extents(root->fs_info);
+ btrfs_destroy_delayed_inodes(root);
+ btrfs_assert_delayed_root_empty(root);
+ btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents);
+ btrfs_destroy_all_delalloc_inodes(root->fs_info);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
return 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b71acd6e..53059df 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,8 +68,17 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
-struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
- struct btrfs_key *location);
+
+struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *key,
+ bool check_ref);
+static inline struct btrfs_root *
+btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location)
+{
+ return btrfs_get_fs_root(fs_info, location, true);
+}
+
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_root *root);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root);
@@ -77,6 +86,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_root(struct btrfs_root *root);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+struct btrfs_root *btrfs_alloc_dummy_root(void);
+#endif
+
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 4b86916..41422a3 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -5,7 +5,6 @@
#include "btrfs_inode.h"
#include "print-tree.h"
#include "export.h"
-#include "compat.h"
#define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \
parent_objectid) / 4)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1204c8e..45d98d0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -25,7 +25,6 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/percpu_counter.h>
-#include "compat.h"
#include "hash.h"
#include "ctree.h"
#include "disk-io.h"
@@ -113,7 +112,8 @@ static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
{
smp_mb();
- return cache->cached == BTRFS_CACHE_FINISHED;
+ return cache->cached == BTRFS_CACHE_FINISHED ||
+ cache->cached == BTRFS_CACHE_ERROR;
}
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
@@ -389,7 +389,7 @@ static noinline void caching_thread(struct btrfs_work *work)
u64 total_found = 0;
u64 last = 0;
u32 nritems;
- int ret = 0;
+ int ret = -ENOMEM;
caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group;
@@ -420,6 +420,7 @@ again:
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
+next:
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
@@ -459,6 +460,16 @@ again:
continue;
}
+ if (key.objectid < last) {
+ key.objectid = last;
+ key.offset = 0;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+
+ caching_ctl->progress = last;
+ btrfs_release_path(path);
+ goto next;
+ }
+
if (key.objectid < block_group->key.objectid) {
path->slots[0]++;
continue;
@@ -506,6 +517,12 @@ err:
mutex_unlock(&caching_ctl->mutex);
out:
+ if (ret) {
+ spin_lock(&block_group->lock);
+ block_group->caching_ctl = NULL;
+ block_group->cached = BTRFS_CACHE_ERROR;
+ spin_unlock(&block_group->lock);
+ }
wake_up(&caching_ctl->wait);
put_caching_control(caching_ctl);
@@ -771,10 +788,23 @@ again:
goto out_free;
if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = root->leafsize;
- btrfs_release_path(path);
- goto again;
+ metadata = 0;
+ if (path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == root->leafsize)
+ ret = 0;
+ }
+ if (ret) {
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = root->leafsize;
+ btrfs_release_path(path);
+ goto again;
+ }
}
if (ret == 0) {
@@ -1520,9 +1550,8 @@ again:
if (ret && !insert) {
err = -ENOENT;
goto out;
- } else if (ret) {
+ } else if (WARN_ON(ret)) {
err = -EIO;
- WARN_ON(1);
goto out;
}
@@ -1948,7 +1977,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_extent_item *item;
u64 refs;
int ret;
- int err = 0;
path = btrfs_alloc_path();
if (!path)
@@ -1961,14 +1989,9 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path, bytenr, num_bytes, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
- if (ret == 0)
+ if (ret != -EAGAIN)
goto out;
- if (ret != -EAGAIN) {
- err = ret;
- goto out;
- }
-
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, item);
@@ -1990,7 +2013,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
out:
btrfs_free_path(path);
- return err;
+ return ret;
}
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
@@ -2011,6 +2034,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ins.type = BTRFS_EXTENT_ITEM_KEY;
ref = btrfs_delayed_node_to_data_ref(node);
+ trace_run_delayed_data_ref(node, ref, node->action);
+
if (node->type == BTRFS_SHARED_DATA_REF_KEY)
parent = ref->parent;
else
@@ -2104,15 +2129,28 @@ again:
}
if (ret > 0) {
if (metadata) {
- btrfs_release_path(path);
- metadata = 0;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == node->bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == node->num_bytes)
+ ret = 0;
+ }
+ if (ret > 0) {
+ btrfs_release_path(path);
+ metadata = 0;
- key.offset = node->num_bytes;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- goto again;
+ key.objectid = node->bytenr;
+ key.offset = node->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
+ }
+ } else {
+ err = -EIO;
+ goto out;
}
- err = -EIO;
- goto out;
}
leaf = path->nodes[0];
@@ -2154,6 +2192,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
+ trace_run_delayed_tree_ref(node, ref, node->action);
+
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
parent = ref->parent;
else
@@ -2199,8 +2239,12 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
{
int ret = 0;
- if (trans->aborted)
+ if (trans->aborted) {
+ if (insert_reserved)
+ btrfs_pin_extent(root, node->bytenr,
+ node->num_bytes, 1);
return 0;
+ }
if (btrfs_delayed_ref_is_head(node)) {
struct btrfs_delayed_ref_head *head;
@@ -2212,6 +2256,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
*/
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
+ trace_run_delayed_ref_head(node, head, node->action);
+
if (insert_reserved) {
btrfs_pin_extent(root, node->bytenr,
node->num_bytes, 1);
@@ -2374,6 +2420,14 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
+ /*
+ * Need to reset must_insert_reserved if
+ * there was an error so the abort stuff
+ * can cleanup the reserved space
+ * properly.
+ */
+ if (must_insert_reserved)
+ locked_ref->must_insert_reserved = 1;
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
@@ -2403,6 +2457,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
default:
WARN_ON(1);
}
+ } else {
+ list_del_init(&locked_ref->cluster);
}
spin_unlock(&delayed_refs->lock);
@@ -2425,7 +2481,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
* list before we release it.
*/
if (btrfs_delayed_ref_is_head(ref)) {
- list_del_init(&locked_ref->cluster);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
}
@@ -3159,8 +3214,7 @@ again:
if (ret)
goto out_put;
- ret = btrfs_truncate_free_space_cache(root, trans, path,
- inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret)
goto out_put;
}
@@ -3280,10 +3334,9 @@ again:
last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, path, cache);
+ btrfs_put_block_group(cache);
if (err) /* File system offline */
goto out;
-
- btrfs_put_block_group(cache);
}
while (1) {
@@ -3567,10 +3620,9 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = ALIGN(bytes, root->sectorsize);
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) {
- alloc_chunk = 0;
+ if (btrfs_is_free_space_inode(inode)) {
committed = 1;
+ ASSERT(current->journal_info);
}
data_sinfo = fs_info->data_sinfo;
@@ -3598,6 +3650,16 @@ again:
spin_unlock(&data_sinfo->lock);
alloc:
alloc_target = btrfs_get_alloc_profile(root, 1);
+ /*
+ * It is ugly that we don't call nolock join
+ * transaction for the free space inode case here.
+ * But it is safe because we only do the data space
+ * reservation for the free space cache in the
+ * transaction context, the common join transaction
+ * just increase the counter of the current transaction
+ * handler, doesn't try to acquire the trans_lock of
+ * the fs.
+ */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return PTR_ERR(trans);
@@ -3643,6 +3705,9 @@ commit_trans:
goto again;
}
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ data_sinfo->flags, bytes, 1);
return -ENOSPC;
}
data_sinfo->bytes_may_use += bytes;
@@ -3799,8 +3864,12 @@ again:
if (force < space_info->force_alloc)
force = space_info->force_alloc;
if (space_info->full) {
+ if (should_alloc_chunk(extent_root, space_info, force))
+ ret = -ENOSPC;
+ else
+ ret = 0;
spin_unlock(&space_info->lock);
- return 0;
+ return ret;
}
if (!should_alloc_chunk(extent_root, space_info, force)) {
@@ -3883,7 +3952,6 @@ static int can_overcommit(struct btrfs_root *root,
u64 space_size;
u64 avail;
u64 used;
- u64 to_add;
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3917,25 +3985,17 @@ static int can_overcommit(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_RAID10))
avail >>= 1;
- to_add = space_info->total_bytes;
-
/*
* If we aren't flushing all things, let us overcommit up to
* 1/2th of the space. If we can flush, don't let us overcommit
* too much, let it overcommit up to 1/8 of the space.
*/
if (flush == BTRFS_RESERVE_FLUSH_ALL)
- to_add >>= 3;
+ avail >>= 3;
else
- to_add >>= 1;
-
- /*
- * Limit the overcommit to the amount of free space we could possibly
- * allocate for chunks.
- */
- to_add = min(avail, to_add);
+ avail >>= 1;
- if (used + bytes < space_info->total_bytes + to_add)
+ if (used + bytes < space_info->total_bytes + avail)
return 1;
return 0;
}
@@ -3956,12 +4016,26 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
* the filesystem is readonly(all dirty pages are written to
* the disk).
*/
- btrfs_start_all_delalloc_inodes(root->fs_info, 0);
+ btrfs_start_delalloc_roots(root->fs_info, 0);
if (!current->journal_info)
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, -1);
}
}
+static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim)
+{
+ u64 bytes;
+ int nr;
+
+ bytes = btrfs_calc_trans_metadata_size(root, 1);
+ nr = (int)div64_u64(to_reclaim, bytes);
+ if (!nr)
+ nr = 1;
+ return nr;
+}
+
+#define EXTENT_SIZE_PER_ITEM (256 * 1024)
+
/*
* shrink metadata reservation for delalloc
*/
@@ -3974,24 +4048,30 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
u64 delalloc_bytes;
u64 max_reclaim;
long time_left;
- unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
- int loops = 0;
+ unsigned long nr_pages;
+ int loops;
+ int items;
enum btrfs_reserve_flush_enum flush;
+ /* Calc the number of the pages we need flush for space reservation */
+ items = calc_reclaim_items_nr(root, to_reclaim);
+ to_reclaim = items * EXTENT_SIZE_PER_ITEM;
+
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
if (delalloc_bytes == 0) {
if (trans)
return;
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ if (wait_ordered)
+ btrfs_wait_ordered_roots(root->fs_info, items);
return;
}
+ loops = 0;
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
@@ -4000,9 +4080,19 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
* We need to wait for the async pages to actually start before
* we do anything.
*/
- wait_event(root->fs_info->async_submit_wait,
- !atomic_read(&root->fs_info->async_delalloc_pages));
+ max_reclaim = atomic_read(&root->fs_info->async_delalloc_pages);
+ if (!max_reclaim)
+ goto skip_async;
+
+ if (max_reclaim <= nr_pages)
+ max_reclaim = 0;
+ else
+ max_reclaim -= nr_pages;
+ wait_event(root->fs_info->async_submit_wait,
+ atomic_read(&root->fs_info->async_delalloc_pages) <=
+ (int)max_reclaim);
+skip_async:
if (!trans)
flush = BTRFS_RESERVE_FLUSH_ALL;
else
@@ -4016,13 +4106,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops++;
if (wait_ordered && !trans) {
- btrfs_wait_all_ordered_extents(root->fs_info, 0);
+ btrfs_wait_ordered_roots(root->fs_info, items);
} else {
time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
- smp_mb();
delalloc_bytes = percpu_counter_sum_positive(
&root->fs_info->delalloc_bytes);
}
@@ -4107,16 +4196,11 @@ static int flush_space(struct btrfs_root *root,
switch (state) {
case FLUSH_DELAYED_ITEMS_NR:
case FLUSH_DELAYED_ITEMS:
- if (state == FLUSH_DELAYED_ITEMS_NR) {
- u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
-
- nr = (int)div64_u64(num_bytes, bytes);
- if (!nr)
- nr = 1;
- nr *= 2;
- } else {
+ if (state == FLUSH_DELAYED_ITEMS_NR)
+ nr = calc_reclaim_items_nr(root, num_bytes) * 2;
+ else
nr = -1;
- }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -4299,6 +4383,10 @@ out:
!block_rsv_use_bytes(global_rsv, orig_bytes))
ret = 0;
}
+ if (ret == -ENOSPC)
+ trace_btrfs_space_reservation(root->fs_info,
+ "space_info:enospc",
+ space_info->flags, orig_bytes, 1);
if (flushing) {
spin_lock(&space_info->lock);
space_info->flush = 0;
@@ -4320,6 +4408,9 @@ static struct btrfs_block_rsv *get_block_rsv(
if (root == root->fs_info->csum_root && trans->adding_csums)
block_rsv = trans->block_rsv;
+ if (root == root->fs_info->uuid_root)
+ block_rsv = trans->block_rsv;
+
if (!block_rsv)
block_rsv = root->block_rsv;
@@ -4420,7 +4511,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
space_info->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
space_info->flags, num_bytes, 0);
- space_info->reservation_progress++;
spin_unlock(&space_info->lock);
}
}
@@ -4621,7 +4711,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
- sinfo->reservation_progress++;
block_rsv->reserved = block_rsv->size;
block_rsv->full = 1;
}
@@ -4729,10 +4818,12 @@ void btrfs_orphan_release_metadata(struct inode *inode)
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int items,
- u64 *qgroup_reserved)
+ u64 *qgroup_reserved,
+ bool use_global_rsv)
{
u64 num_bytes;
int ret;
+ struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
if (root->fs_info->quota_enabled) {
/* One for parent inode, two for dir entries */
@@ -4751,6 +4842,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
BTRFS_BLOCK_GROUP_METADATA);
ret = btrfs_block_rsv_add(root, rsv, num_bytes,
BTRFS_RESERVE_FLUSH_ALL);
+
+ if (ret == -ENOSPC && use_global_rsv)
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
+
if (ret) {
if (*qgroup_reserved)
btrfs_qgroup_free(root, *qgroup_reserved);
@@ -4946,7 +5041,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
if (to_reserve)
- trace_btrfs_space_reservation(root->fs_info,"delalloc",
+ trace_btrfs_space_reservation(root->fs_info, "delalloc",
btrfs_ino(inode), to_reserve, 1);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
@@ -5224,6 +5319,8 @@ static int pin_down_extent(struct btrfs_root *root,
set_extent_dirty(root->fs_info->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
+ if (reserved)
+ trace_btrfs_reserved_extent_free(root, bytenr, num_bytes);
return 0;
}
@@ -5395,7 +5492,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
- space_info->reservation_progress++;
}
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
@@ -5668,7 +5764,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_err(info, "umm, got %d back from search, was looking for %llu",
- ret, (unsigned long long)bytenr);
+ ret, bytenr);
if (ret > 0)
btrfs_print_leaf(extent_root,
path->nodes[0]);
@@ -5679,16 +5775,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
extent_slot = path->slots[0];
}
- } else if (ret == -ENOENT) {
+ } else if (WARN_ON(ret == -ENOENT)) {
btrfs_print_leaf(extent_root, path->nodes[0]);
- WARN_ON(1);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
- (unsigned long long)bytenr,
- (unsigned long long)parent,
- (unsigned long long)root_objectid,
- (unsigned long long)owner_objectid,
- (unsigned long long)owner_offset);
+ bytenr, parent, root_objectid, owner_objectid,
+ owner_offset);
} else {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@@ -5717,7 +5809,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
-1, 1);
if (ret) {
btrfs_err(info, "umm, got %d back from search, was looking for %llu",
- ret, (unsigned long long)bytenr);
+ ret, bytenr);
btrfs_print_leaf(extent_root, path->nodes[0]);
}
if (ret < 0) {
@@ -5931,6 +6023,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
btrfs_add_free_space(cache, buf->start, buf->len);
btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+ trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
pin = 0;
}
out:
@@ -5999,8 +6092,11 @@ static u64 stripe_align(struct btrfs_root *root,
* for our min num_bytes. Another option is to have it go ahead
* and look in the rbtree for a free extent of a given size, but this
* is a good start.
+ *
+ * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
+ * any of the information in this block group.
*/
-static noinline int
+static noinline void
wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
@@ -6008,28 +6104,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
caching_ctl = get_caching_control(cache);
if (!caching_ctl)
- return 0;
+ return;
wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
(cache->free_space_ctl->free_space >= num_bytes));
put_caching_control(caching_ctl);
- return 0;
}
static noinline int
wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
{
struct btrfs_caching_control *caching_ctl;
+ int ret = 0;
caching_ctl = get_caching_control(cache);
if (!caching_ctl)
- return 0;
+ return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
wait_event(caching_ctl->wait, block_group_cache_done(cache));
-
+ if (cache->cached == BTRFS_CACHE_ERROR)
+ ret = -EIO;
put_caching_control(caching_ctl);
- return 0;
+ return ret;
}
int __get_raid_index(u64 flags)
@@ -6065,13 +6162,15 @@ enum btrfs_loop_type {
/*
* walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole:
- * ins->objectid == block start
+ * ins->objectid == start position
* ins->flags = BTRFS_EXTENT_ITEM_KEY
- * ins->offset == number of blocks
+ * ins->offset == the size of the hole.
* Any available blocks before search_start are skipped.
+ *
+ * If there is no suitable free space, we will record the max size of
+ * the free space extent currently.
*/
-static noinline int find_free_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *orig_root,
+static noinline int find_free_extent(struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
u64 flags)
@@ -6082,6 +6181,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group = NULL;
struct btrfs_block_group_cache *used_block_group;
u64 search_start = 0;
+ u64 max_extent_size = 0;
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
@@ -6212,6 +6312,8 @@ have_block_group:
ret = 0;
}
+ if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
+ goto loop;
if (unlikely(block_group->ro))
goto loop;
@@ -6239,7 +6341,10 @@ have_block_group:
btrfs_get_block_group(used_block_group);
offset = btrfs_alloc_from_cluster(used_block_group,
- last_ptr, num_bytes, used_block_group->key.objectid);
+ last_ptr,
+ num_bytes,
+ used_block_group->key.objectid,
+ &max_extent_size);
if (offset) {
/* we have a block, we're done */
spin_unlock(&last_ptr->refill_lock);
@@ -6292,18 +6397,20 @@ refill_cluster:
block_group->full_stripe_len);
/* allocate a cluster in this block group */
- ret = btrfs_find_space_cluster(trans, root,
- block_group, last_ptr,
- search_start, num_bytes,
- aligned_cluster);
+ ret = btrfs_find_space_cluster(root, block_group,
+ last_ptr, search_start,
+ num_bytes,
+ aligned_cluster);
if (ret == 0) {
/*
* now pull our allocation out of this
* cluster
*/
offset = btrfs_alloc_from_cluster(block_group,
- last_ptr, num_bytes,
- search_start);
+ last_ptr,
+ num_bytes,
+ search_start,
+ &max_extent_size);
if (offset) {
/* we found one, proceed */
spin_unlock(&last_ptr->refill_lock);
@@ -6338,13 +6445,18 @@ unclustered_alloc:
if (cached &&
block_group->free_space_ctl->free_space <
num_bytes + empty_cluster + empty_size) {
+ if (block_group->free_space_ctl->free_space >
+ max_extent_size)
+ max_extent_size =
+ block_group->free_space_ctl->free_space;
spin_unlock(&block_group->free_space_ctl->tree_lock);
goto loop;
}
spin_unlock(&block_group->free_space_ctl->tree_lock);
offset = btrfs_find_space_for_alloc(block_group, search_start,
- num_bytes, empty_size);
+ num_bytes, empty_size,
+ &max_extent_size);
/*
* If we didn't find a chunk, and we haven't failed on this
* block group before, and this block group is in the middle of
@@ -6426,17 +6538,28 @@ loop:
index = 0;
loop++;
if (loop == LOOP_ALLOC_CHUNK) {
+ struct btrfs_trans_handle *trans;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
ret = do_chunk_alloc(trans, root, flags,
CHUNK_ALLOC_FORCE);
/*
* Do not bail out on ENOSPC since we
* can do more things.
*/
- if (ret < 0 && ret != -ENOSPC) {
+ if (ret < 0 && ret != -ENOSPC)
btrfs_abort_transaction(trans,
root, ret);
+ else
+ ret = 0;
+ btrfs_end_transaction(trans, root);
+ if (ret)
goto out;
- }
}
if (loop == LOOP_NO_EMPTY_SIZE) {
@@ -6451,7 +6574,8 @@ loop:
ret = 0;
}
out:
-
+ if (ret == -ENOSPC)
+ ins->offset = max_extent_size;
return ret;
}
@@ -6463,19 +6587,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
spin_lock(&info->lock);
printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
- (unsigned long long)info->flags,
- (unsigned long long)(info->total_bytes - info->bytes_used -
- info->bytes_pinned - info->bytes_reserved -
- info->bytes_readonly),
+ info->flags,
+ info->total_bytes - info->bytes_used - info->bytes_pinned -
+ info->bytes_reserved - info->bytes_readonly,
(info->full) ? "" : "not ");
printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
"reserved=%llu, may_use=%llu, readonly=%llu\n",
- (unsigned long long)info->total_bytes,
- (unsigned long long)info->bytes_used,
- (unsigned long long)info->bytes_pinned,
- (unsigned long long)info->bytes_reserved,
- (unsigned long long)info->bytes_may_use,
- (unsigned long long)info->bytes_readonly);
+ info->total_bytes, info->bytes_used, info->bytes_pinned,
+ info->bytes_reserved, info->bytes_may_use,
+ info->bytes_readonly);
spin_unlock(&info->lock);
if (!dump_block_groups)
@@ -6486,12 +6606,9 @@ again:
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
- (unsigned long long)cache->key.objectid,
- (unsigned long long)cache->key.offset,
- (unsigned long long)btrfs_block_group_used(&cache->item),
- (unsigned long long)cache->pinned,
- (unsigned long long)cache->reserved,
- cache->ro ? "[readonly]" : "");
+ cache->key.objectid, cache->key.offset,
+ btrfs_block_group_used(&cache->item), cache->pinned,
+ cache->reserved, cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
}
@@ -6500,8 +6617,7 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data)
@@ -6513,12 +6629,12 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(trans, root, num_bytes, empty_size,
- hint_byte, ins, flags);
+ ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
+ flags);
if (ret == -ENOSPC) {
- if (!final_tried) {
- num_bytes = num_bytes >> 1;
+ if (!final_tried && ins->offset) {
+ num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
if (num_bytes == min_alloc_size)
@@ -6529,15 +6645,12 @@ again:
sinfo = __find_space_info(root->fs_info, flags);
btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
- (unsigned long long)flags,
- (unsigned long long)num_bytes);
+ flags, num_bytes);
if (sinfo)
dump_space_info(sinfo, num_bytes, 1);
}
}
- trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
-
return ret;
}
@@ -6550,7 +6663,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
cache = btrfs_lookup_block_group(root->fs_info, start);
if (!cache) {
btrfs_err(root->fs_info, "Unable to find block group for %llu",
- (unsigned long long)start);
+ start);
return -ENOSPC;
}
@@ -6646,10 +6759,10 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = update_block_group(root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
- (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
+ ins->objectid, ins->offset);
BUG();
}
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
return ret;
}
@@ -6674,13 +6787,18 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
size += sizeof(*block_info);
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
return -ENOMEM;
+ }
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
ins, size);
if (ret) {
+ btrfs_free_and_pin_reserved_extent(root, ins->objectid,
+ root->leafsize);
btrfs_free_path(path);
return ret;
}
@@ -6719,10 +6837,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
ret = update_block_group(root, ins->objectid, root->leafsize, 1);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
- (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
+ ins->objectid, ins->offset);
BUG();
}
+
+ trace_btrfs_reserved_extent_alloc(root, ins->objectid, root->leafsize);
return ret;
}
@@ -6902,7 +7021,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
- ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
+ ret = btrfs_reserve_extent(root, blocksize, blocksize,
empty_size, hint, &ins, 0);
if (ret) {
unuse_block_rsv(root->fs_info, block_rsv, blocksize);
@@ -7173,6 +7292,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
next = btrfs_find_create_tree_block(root, bytenr, blocksize);
if (!next)
return -ENOMEM;
+ btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
+ level - 1);
reada = 1;
}
btrfs_tree_lock(next);
@@ -7658,7 +7779,7 @@ out:
* don't have it in the radix (like when we recover after a power fail
* or unmount) so we don't leak memory.
*/
- if (root_dropped == false)
+ if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
if (err)
btrfs_std_error(root->fs_info, err);
@@ -7925,7 +8046,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
spin_lock(&sinfo->lock);
- for(i = 0; i < BTRFS_NR_RAID_TYPES; i++)
+ for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
if (!list_empty(&sinfo->block_groups[i]))
free_bytes += __btrfs_get_ro_block_group_free_space(
&sinfo->block_groups[i]);
@@ -8192,7 +8313,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
* We haven't cached this block group, which means we could
* possibly have excluded extents on this block group.
*/
- if (block_group->cached == BTRFS_CACHE_NO)
+ if (block_group->cached == BTRFS_CACHE_NO ||
+ block_group->cached == BTRFS_CACHE_ERROR)
free_excluded_extents(info->extent_root, block_group);
btrfs_remove_free_space_cache(block_group);
@@ -8212,15 +8334,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
release_global_block_rsv(info);
- while(!list_empty(&info->space_info)) {
+ while (!list_empty(&info->space_info)) {
space_info = list_entry(info->space_info.next,
struct btrfs_space_info,
list);
if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
- if (space_info->bytes_pinned > 0 ||
+ if (WARN_ON(space_info->bytes_pinned > 0 ||
space_info->bytes_reserved > 0 ||
- space_info->bytes_may_use > 0) {
- WARN_ON(1);
+ space_info->bytes_may_use > 0)) {
dump_space_info(space_info, 0, 0);
}
}
@@ -8409,9 +8530,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* avoid allocating from un-mirrored block group if there are
* mirrored block groups.
*/
- list_for_each_entry(cache, &space_info->block_groups[3], list)
+ list_for_each_entry(cache,
+ &space_info->block_groups[BTRFS_RAID_RAID0],
+ list)
set_block_group_ro(cache, 1);
- list_for_each_entry(cache, &space_info->block_groups[4], list)
+ list_for_each_entry(cache,
+ &space_info->block_groups[BTRFS_RAID_SINGLE],
+ list)
set_block_group_ro(cache, 1);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fe443fe..856bc2b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -13,13 +13,13 @@
#include <linux/cleancache.h>
#include "extent_io.h"
#include "extent_map.h"
-#include "compat.h"
#include "ctree.h"
#include "btrfs_inode.h"
#include "volumes.h"
#include "check-integrity.h"
#include "locking.h"
#include "rcu-string.h"
+#include "backref.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
state = list_entry(states.next, struct extent_state, leak_list);
printk(KERN_ERR "btrfs state leak: start %llu end %llu "
"state %lu in tree %p refs %d\n",
- (unsigned long long)state->start,
- (unsigned long long)state->end,
- state->state, state->tree, atomic_read(&state->refs));
+ state->start, state->end, state->state, state->tree,
+ atomic_read(&state->refs));
list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
- "refs %d\n", (unsigned long long)eb->start,
- eb->len, atomic_read(&eb->refs));
+ "refs %d\n",
+ eb->start, eb->len, atomic_read(&eb->refs));
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
printk_ratelimited(KERN_DEBUG
"btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
- caller,
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)isize,
- (unsigned long long)start,
- (unsigned long long)end);
+ caller, btrfs_ino(inode), isize, start, end);
}
}
#else
@@ -150,8 +145,16 @@ int __init extent_io_init(void)
offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
goto free_buffer_cache;
+
+ if (bioset_integrity_create(btrfs_bioset, BIO_POOL_SIZE))
+ goto free_bioset;
+
return 0;
+free_bioset:
+ bioset_free(btrfs_bioset);
+ btrfs_bioset = NULL;
+
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
@@ -388,8 +391,7 @@ static int insert_state(struct extent_io_tree *tree,
if (end < start)
WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
- (unsigned long long)end,
- (unsigned long long)start);
+ end, start);
state->start = start;
state->end = end;
@@ -400,9 +402,8 @@ static int insert_state(struct extent_io_tree *tree,
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
printk(KERN_ERR "btrfs found node %llu %llu on insert of "
- "%llu %llu\n", (unsigned long long)found->start,
- (unsigned long long)found->end,
- (unsigned long long)start, (unsigned long long)end);
+ "%llu %llu\n",
+ found->start, found->end, start, end);
return -EEXIST;
}
state->tree = tree;
@@ -762,15 +763,6 @@ static void cache_state(struct extent_state *state,
}
}
-static void uncache_state(struct extent_state **cached_ptr)
-{
- if (cached_ptr && (*cached_ptr)) {
- struct extent_state *state = *cached_ptr;
- *cached_ptr = NULL;
- free_extent_state(state);
- }
-}
-
/*
* set some bits on a range in the tree. This may require allocations or
* sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1497,11 +1489,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
*end = state->end;
cur_start = state->end + 1;
node = rb_next(node);
- if (!node)
- break;
total_bytes += state->end - state->start + 1;
if (total_bytes >= max_bytes)
break;
+ if (!node)
+ break;
}
out:
spin_unlock(&tree->lock);
@@ -1605,11 +1597,10 @@ done:
*
* 1 is returned if we find something, 0 if nothing was in the tree
*/
-static noinline u64 find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
- struct page *locked_page,
- u64 *start, u64 *end,
- u64 max_bytes)
+STATIC u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page, u64 *start,
+ u64 *end, u64 max_bytes)
{
u64 delalloc_start;
u64 delalloc_end;
@@ -1628,7 +1619,7 @@ again:
*start = delalloc_start;
*end = delalloc_end;
free_extent_state(cached_state);
- return found;
+ return 0;
}
/*
@@ -1641,10 +1632,9 @@ again:
/*
* make sure to limit the number of pages we try to lock down
- * if we're looping.
*/
- if (delalloc_end + 1 - delalloc_start > max_bytes && loops)
- delalloc_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ if (delalloc_end + 1 - delalloc_start > max_bytes)
+ delalloc_end = delalloc_start + max_bytes - 1;
/* step two, lock all the pages after the page that has start */
ret = lock_delalloc_pages(inode, locked_page,
@@ -1655,8 +1645,7 @@ again:
*/
free_extent_state(cached_state);
if (!loops) {
- unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
- max_bytes = PAGE_CACHE_SIZE - offset;
+ max_bytes = PAGE_CACHE_SIZE;
loops = 1;
goto again;
} else {
@@ -1687,31 +1676,21 @@ out_failed:
return found;
}
-int extent_clear_unlock_delalloc(struct inode *inode,
- struct extent_io_tree *tree,
- u64 start, u64 end, struct page *locked_page,
- unsigned long op)
+int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+ struct page *locked_page,
+ unsigned long clear_bits,
+ unsigned long page_ops)
{
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
int ret;
struct page *pages[16];
unsigned long index = start >> PAGE_CACHE_SHIFT;
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
- unsigned long clear_bits = 0;
-
- if (op & EXTENT_CLEAR_UNLOCK)
- clear_bits |= EXTENT_LOCKED;
- if (op & EXTENT_CLEAR_DIRTY)
- clear_bits |= EXTENT_DIRTY;
-
- if (op & EXTENT_CLEAR_DELALLOC)
- clear_bits |= EXTENT_DELALLOC;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
- if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
- EXTENT_SET_PRIVATE2)))
+ if (page_ops == 0)
return 0;
while (nr_pages > 0) {
@@ -1720,20 +1699,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
nr_pages, ARRAY_SIZE(pages)), pages);
for (i = 0; i < ret; i++) {
- if (op & EXTENT_SET_PRIVATE2)
+ if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
page_cache_release(pages[i]);
continue;
}
- if (op & EXTENT_CLEAR_DIRTY)
+ if (page_ops & PAGE_CLEAR_DIRTY)
clear_page_dirty_for_io(pages[i]);
- if (op & EXTENT_SET_WRITEBACK)
+ if (page_ops & PAGE_SET_WRITEBACK)
set_page_writeback(pages[i]);
- if (op & EXTENT_END_WRITEBACK)
+ if (page_ops & PAGE_END_WRITEBACK)
end_page_writeback(pages[i]);
- if (op & EXTENT_CLEAR_UNLOCK_PAGE)
+ if (page_ops & PAGE_UNLOCK)
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
@@ -1760,10 +1739,8 @@ u64 count_range_bits(struct extent_io_tree *tree,
u64 last = 0;
int found = 0;
- if (search_end <= cur_start) {
- WARN_ON(1);
+ if (WARN_ON(search_end <= cur_start))
return 0;
- }
spin_lock(&tree->lock);
if (cur_start == 0 && bits == EXTENT_DIRTY) {
@@ -1810,7 +1787,7 @@ out:
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
{
struct rb_node *node;
struct extent_state *state;
@@ -1837,64 +1814,6 @@ out:
return ret;
}
-void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
- int count)
-{
- struct rb_node *node;
- struct extent_state *state;
-
- spin_lock(&tree->lock);
- /*
- * this search will find all the extents that end after
- * our range starts.
- */
- node = tree_search(tree, start);
- BUG_ON(!node);
-
- state = rb_entry(node, struct extent_state, rb_node);
- BUG_ON(state->start != start);
-
- while (count) {
- state->private = *csums++;
- count--;
- state = next_state(state);
- }
- spin_unlock(&tree->lock);
-}
-
-static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
-{
- struct bio_vec *bvec = bio->bi_io_vec + bio_index;
-
- return page_offset(bvec->bv_page) + bvec->bv_offset;
-}
-
-void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
- u32 csums[], int count)
-{
- struct rb_node *node;
- struct extent_state *state = NULL;
- u64 start;
-
- spin_lock(&tree->lock);
- do {
- start = __btrfs_get_bio_offset(bio, bio_index);
- if (state == NULL || state->start != start) {
- node = tree_search(tree, start);
- BUG_ON(!node);
-
- state = rb_entry(node, struct extent_state, rb_node);
- BUG_ON(state->start != start);
- }
- state->private = *csums++;
- count--;
- bio_index++;
-
- state = next_state(state);
- } while (count);
- spin_unlock(&tree->lock);
-}
-
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
{
struct rb_node *node;
@@ -2173,7 +2092,8 @@ static int clean_io_failure(u64 start, struct page *page)
EXTENT_LOCKED);
spin_unlock(&BTRFS_I(inode)->io_tree.lock);
- if (state && state->start == failrec->start) {
+ if (state && state->start <= failrec->start &&
+ state->end >= failrec->start + failrec->len - 1) {
fs_info = BTRFS_I(inode)->root->fs_info;
num_copies = btrfs_num_copies(fs_info, failrec->logical,
failrec->len);
@@ -2201,9 +2121,9 @@ out:
* needed
*/
-static int bio_readpage_error(struct bio *failed_bio, struct page *page,
- u64 start, u64 end, int failed_mirror,
- struct extent_state *state)
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
+ struct page *page, u64 start, u64 end,
+ int failed_mirror)
{
struct io_failure_record *failrec = NULL;
u64 private;
@@ -2213,6 +2133,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct bio *bio;
+ struct btrfs_io_bio *btrfs_failed_bio;
+ struct btrfs_io_bio *btrfs_bio;
int num_copies;
int ret;
int read_mode;
@@ -2296,23 +2218,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
* all the retry and error correction code that follows. no
* matter what the error is, it is very likely to persist.
*/
- pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
- "state=%p, num_copies=%d, next_mirror %d, "
- "failed_mirror %d\n", state, num_copies,
- failrec->this_mirror, failed_mirror);
+ pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
+ num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec, 0);
return -EIO;
}
- if (!state) {
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, failrec->start,
- EXTENT_LOCKED);
- if (state && state->start != failrec->start)
- state = NULL;
- spin_unlock(&tree->lock);
- }
-
/*
* there are two premises:
* a) deliver good data to the caller
@@ -2349,9 +2260,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
read_mode = READ_SYNC;
}
- if (!state || failrec->this_mirror > num_copies) {
- pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
- "next_mirror %d, failed_mirror %d\n", state,
+ if (failrec->this_mirror > num_copies) {
+ pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
num_copies, failrec->this_mirror, failed_mirror);
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2362,12 +2272,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
free_io_failure(inode, failrec, 0);
return -EIO;
}
- bio->bi_private = state;
bio->bi_end_io = failed_bio->bi_end_io;
bio->bi_sector = failrec->logical >> 9;
bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
bio->bi_size = 0;
+ btrfs_failed_bio = btrfs_io_bio(failed_bio);
+ if (btrfs_failed_bio->csum) {
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = btrfs_bio->csum_inline;
+ phy_offset >>= inode->i_sb->s_blocksize_bits;
+ phy_offset *= csum_size;
+ memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
+ csum_size);
+ }
+
bio_add_page(bio, page, failrec->len, start - page_offset(page));
pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2372,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
bio_put(bio);
}
+static void
+endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
+ int uptodate)
+{
+ struct extent_state *cached = NULL;
+ u64 end = start + len - 1;
+
+ if (uptodate && tree->track_uptodate)
+ set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
+ unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
+}
+
/*
* after a readpage IO is done, we need to:
* clear the uptodate bits on error
@@ -2466,9 +2400,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
struct bio_vec *bvec = bio->bi_io_vec;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct extent_io_tree *tree;
+ u64 offset = 0;
u64 start;
u64 end;
+ u64 len;
+ u64 extent_start = 0;
+ u64 extent_len = 0;
int mirror;
int ret;
@@ -2477,9 +2416,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
do {
struct page *page = bvec->bv_page;
- struct extent_state *cached = NULL;
- struct extent_state *state;
- struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
struct inode *inode = page->mapping->host;
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2436,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
start = page_offset(page);
end = start + bvec->bv_offset + bvec->bv_len - 1;
+ len = bvec->bv_len;
if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags);
- spin_lock(&tree->lock);
- state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
- if (state && state->start == start) {
- /*
- * take a reference on the state, unlock will drop
- * the ref
- */
- cache_state(state, &cached);
- }
- spin_unlock(&tree->lock);
-
mirror = io_bio->mirror_num;
- if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
- ret = tree->ops->readpage_end_io_hook(page, start, end,
- state, mirror);
+ if (likely(uptodate && tree->ops &&
+ tree->ops->readpage_end_io_hook)) {
+ ret = tree->ops->readpage_end_io_hook(io_bio, offset,
+ page, start, end,
+ mirror);
if (ret)
uptodate = 0;
else
clean_io_failure(start, page);
}
- if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+ if (likely(uptodate))
+ goto readpage_ok;
+
+ if (tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1;
- } else if (!uptodate) {
+ } else {
/*
* The generic bio_readpage_error handles errors the
* following way: If possible, new read requests are
@@ -2541,24 +2472,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we
* remain responsible for that page.
*/
- ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
+ ret = bio_readpage_error(bio, offset, page, start, end,
+ mirror);
if (ret == 0) {
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err)
uptodate = 0;
- uncache_state(&cached);
continue;
}
}
-
- if (uptodate && tree->track_uptodate) {
- set_extent_uptodate(tree, start, end, &cached,
- GFP_ATOMIC);
- }
- unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
- if (uptodate) {
+readpage_ok:
+ if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
@@ -2573,8 +2498,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
SetPageError(page);
}
unlock_page(page);
+ offset += len;
+
+ if (unlikely(!uptodate)) {
+ if (extent_len) {
+ endio_readpage_release_extent(tree,
+ extent_start,
+ extent_len, 1);
+ extent_start = 0;
+ extent_len = 0;
+ }
+ endio_readpage_release_extent(tree, start,
+ end - start + 1, 0);
+ } else if (!extent_len) {
+ extent_start = start;
+ extent_len = end + 1 - start;
+ } else if (extent_start + extent_len == start) {
+ extent_len += end + 1 - start;
+ } else {
+ endio_readpage_release_extent(tree, extent_start,
+ extent_len, uptodate);
+ extent_start = start;
+ extent_len = end + 1 - start;
+ }
} while (bvec <= bvec_end);
+ if (extent_len)
+ endio_readpage_release_extent(tree, extent_start, extent_len,
+ uptodate);
+ if (io_bio->end_io)
+ io_bio->end_io(io_bio, err);
bio_put(bio);
}
@@ -2586,6 +2539,7 @@ struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
+ struct btrfs_io_bio *btrfs_bio;
struct bio *bio;
bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2555,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
bio->bi_size = 0;
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = NULL;
+ btrfs_bio->csum_allocated = NULL;
+ btrfs_bio->end_io = NULL;
}
return bio;
}
@@ -2614,7 +2572,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
/* this also allocates from the btrfs_bioset */
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
- return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+ struct btrfs_io_bio *btrfs_bio;
+ struct bio *bio;
+
+ bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+ if (bio) {
+ btrfs_bio = btrfs_io_bio(bio);
+ btrfs_bio->csum = NULL;
+ btrfs_bio->csum_allocated = NULL;
+ btrfs_bio->end_io = NULL;
+ }
+ return bio;
}
@@ -2738,17 +2706,45 @@ void set_page_extent_mapped(struct page *page)
}
}
+static struct extent_map *
+__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
+ u64 start, u64 len, get_extent_t *get_extent,
+ struct extent_map **em_cached)
+{
+ struct extent_map *em;
+
+ if (em_cached && *em_cached) {
+ em = *em_cached;
+ if (em->in_tree && start >= em->start &&
+ start < extent_map_end(em)) {
+ atomic_inc(&em->refs);
+ return em;
+ }
+
+ free_extent_map(em);
+ *em_cached = NULL;
+ }
+
+ em = get_extent(inode, page, pg_offset, start, len, 0);
+ if (em_cached && !IS_ERR_OR_NULL(em)) {
+ BUG_ON(*em_cached);
+ atomic_inc(&em->refs);
+ *em_cached = em;
+ }
+ return em;
+}
/*
* basic readpage implementation. Locked extent state structs are inserted
* into the tree that are removed when the IO is done (by the end_io
* handlers)
* XXX JDM: This needs looking at to ensure proper page locking
*/
-static int __extent_read_full_page(struct extent_io_tree *tree,
- struct page *page,
- get_extent_t *get_extent,
- struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+static int __do_readpage(struct extent_io_tree *tree,
+ struct page *page,
+ get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
@@ -2762,35 +2758,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
- struct btrfs_ordered_extent *ordered;
int ret;
int nr = 0;
+ int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
size_t pg_offset = 0;
size_t iosize;
size_t disk_io_size;
size_t blocksize = inode->i_sb->s_blocksize;
- unsigned long this_bio_flag = 0;
+ unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
set_page_extent_mapped(page);
+ end = page_end;
if (!PageUptodate(page)) {
if (cleancache_get_page(page) == 0) {
BUG_ON(blocksize != PAGE_SIZE);
+ unlock_extent(tree, start, end);
goto out;
}
}
- end = page_end;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_extent(inode, start);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
-
if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
char *userpage;
size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2804,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
- unlock_extent_cached(tree, cur, cur + iosize - 1,
- &cached, GFP_NOFS);
+ if (!parent_locked)
+ unlock_extent_cached(tree, cur,
+ cur + iosize - 1,
+ &cached, GFP_NOFS);
break;
}
- em = get_extent(inode, page, pg_offset, cur,
- end - cur + 1, 0);
+ em = __get_extent_map(inode, page, pg_offset, cur,
+ end - cur + 1, get_extent, em_cached);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
- unlock_extent(tree, cur, end);
+ if (!parent_locked)
+ unlock_extent(tree, cur, end);
break;
}
extent_offset = cur - em->start;
@@ -2833,7 +2823,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
BUG_ON(end < cur);
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- this_bio_flag = EXTENT_BIO_COMPRESSED;
+ this_bio_flag |= EXTENT_BIO_COMPRESSED;
extent_set_compress_type(&this_bio_flag,
em->compress_type);
}
@@ -2877,7 +2867,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
if (test_range_bit(tree, cur, cur_end,
EXTENT_UPTODATE, 1, NULL)) {
check_page_uptodate(tree, page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2887,7 +2878,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
cur = cur + iosize;
pg_offset += iosize;
continue;
@@ -2905,7 +2897,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
*bio_flags = this_bio_flag;
} else {
SetPageError(page);
- unlock_extent(tree, cur, cur + iosize - 1);
+ if (!parent_locked)
+ unlock_extent(tree, cur, cur + iosize - 1);
}
cur = cur + iosize;
pg_offset += iosize;
@@ -2919,6 +2912,104 @@ out:
return 0;
}
+static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+ struct page *pages[], int nr_pages,
+ u64 start, u64 end,
+ get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ struct inode *inode;
+ struct btrfs_ordered_extent *ordered;
+ int index;
+
+ inode = pages[0]->mapping->host;
+ while (1) {
+ lock_extent(tree, start, end);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ end - start + 1);
+ if (!ordered)
+ break;
+ unlock_extent(tree, start, end);
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ }
+
+ for (index = 0; index < nr_pages; index++) {
+ __do_readpage(tree, pages[index], get_extent, em_cached, bio,
+ mirror_num, bio_flags, rw);
+ page_cache_release(pages[index]);
+ }
+}
+
+static void __extent_readpages(struct extent_io_tree *tree,
+ struct page *pages[],
+ int nr_pages, get_extent_t *get_extent,
+ struct extent_map **em_cached,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ u64 start = 0;
+ u64 end = 0;
+ u64 page_start;
+ int index;
+ int first_index = 0;
+
+ for (index = 0; index < nr_pages; index++) {
+ page_start = page_offset(pages[index]);
+ if (!end) {
+ start = page_start;
+ end = start + PAGE_CACHE_SIZE - 1;
+ first_index = index;
+ } else if (end + 1 == page_start) {
+ end += PAGE_CACHE_SIZE;
+ } else {
+ __do_contiguous_readpages(tree, &pages[first_index],
+ index - first_index, start,
+ end, get_extent, em_cached,
+ bio, mirror_num, bio_flags,
+ rw);
+ start = page_start;
+ end = start + PAGE_CACHE_SIZE - 1;
+ first_index = index;
+ }
+ }
+
+ if (end)
+ __do_contiguous_readpages(tree, &pages[first_index],
+ index - first_index, start,
+ end, get_extent, em_cached, bio,
+ mirror_num, bio_flags, rw);
+}
+
+static int __extent_read_full_page(struct extent_io_tree *tree,
+ struct page *page,
+ get_extent_t *get_extent,
+ struct bio **bio, int mirror_num,
+ unsigned long *bio_flags, int rw)
+{
+ struct inode *inode = page->mapping->host;
+ struct btrfs_ordered_extent *ordered;
+ u64 start = page_offset(page);
+ u64 end = start + PAGE_CACHE_SIZE - 1;
+ int ret;
+
+ while (1) {
+ lock_extent(tree, start, end);
+ ordered = btrfs_lookup_ordered_extent(inode, start);
+ if (!ordered)
+ break;
+ unlock_extent(tree, start, end);
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+ }
+
+ ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
+ bio_flags, rw);
+ return ret;
+}
+
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num)
{
@@ -2933,6 +3024,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
return ret;
}
+int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent, int mirror_num)
+{
+ struct bio *bio = NULL;
+ unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
+ int ret;
+
+ ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
+ &bio_flags, READ);
+ if (bio)
+ ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ return ret;
+}
+
static noinline void update_nr_written(struct page *page,
struct writeback_control *wbc,
unsigned long nr_written)
@@ -3189,8 +3294,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (!PageWriteback(page)) {
printk(KERN_ERR "btrfs warning page %lu not "
"writeback, cur %llu end %llu\n",
- page->index, (unsigned long long)cur,
- (unsigned long long)end);
+ page->index, cur, end);
}
ret = submit_extent_page(write_flags, tree, page,
@@ -3462,9 +3566,8 @@ retry:
* but no sense in crashing the users box for something
* we can survive anyway.
*/
- if (!eb) {
+ if (WARN_ON(!eb)) {
spin_unlock(&mapping->private_lock);
- WARN_ON(1);
continue;
}
@@ -3769,7 +3872,7 @@ int extent_readpages(struct extent_io_tree *tree,
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct page *page;
- int i = 0;
+ struct extent_map *em_cached = NULL;
int nr = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3889,16 @@ int extent_readpages(struct extent_io_tree *tree,
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
- for (i = 0; i < nr; i++) {
- __extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags, READ);
- page_cache_release(pagepool[i]);
- }
+ __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+ &bio, 0, &bio_flags, READ);
nr = 0;
}
- for (i = 0; i < nr; i++) {
- __extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags, READ);
- page_cache_release(pagepool[i]);
- }
+ if (nr)
+ __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+ &bio, 0, &bio_flags, READ);
+
+ if (em_cached)
+ free_extent_map(em_cached);
BUG_ON(!list_empty(pages));
if (bio)
@@ -3933,7 +4034,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (offset >= last)
return NULL;
- while(1) {
+ while (1) {
len = last - offset;
if (len == 0)
break;
@@ -3957,6 +4058,19 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
return NULL;
}
+static noinline int count_ext_ref(u64 inum, u64 offset, u64 root_id, void *ctx)
+{
+ unsigned long cnt = *((unsigned long *)ctx);
+
+ cnt++;
+ *((unsigned long *)ctx) = cnt;
+
+ /* Now we're sure that the extent is shared. */
+ if (cnt > 1)
+ return 1;
+ return 0;
+}
+
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent)
{
@@ -4023,7 +4137,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last = found_key.offset;
last_for_get_extent = last + 1;
}
- btrfs_free_path(path);
+ btrfs_release_path(path);
/*
* we might have some extents allocated but more delalloc past those
@@ -4093,7 +4207,24 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
flags |= (FIEMAP_EXTENT_DELALLOC |
FIEMAP_EXTENT_UNKNOWN);
} else {
+ unsigned long ref_cnt = 0;
+
disko = em->block_start + offset_in_extent;
+
+ /*
+ * As btrfs supports shared space, this information
+ * can be exported to userspace tools via
+ * flag FIEMAP_EXTENT_SHARED.
+ */
+ ret = iterate_inodes_from_logical(
+ em->block_start,
+ BTRFS_I(inode)->root->fs_info,
+ path, count_ext_ref, &ref_cnt);
+ if (ret < 0 && ret != -ENOENT)
+ goto out_free;
+
+ if (ref_cnt > 1)
+ flags |= FIEMAP_EXTENT_SHARED;
}
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
flags |= FIEMAP_EXTENT_ENCODED;
@@ -4125,6 +4256,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
+ btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
@@ -4136,6 +4268,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
kmem_cache_free(extent_buffer_cache, eb);
}
+static int extent_buffer_under_io(struct extent_buffer *eb)
+{
+ return (atomic_read(&eb->io_pages) ||
+ test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+ test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+ unsigned long start_idx)
+{
+ unsigned long index;
+ unsigned long num_pages;
+ struct page *page;
+ int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+ BUG_ON(extent_buffer_under_io(eb));
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ index = start_idx + num_pages;
+ if (start_idx >= index)
+ return;
+
+ do {
+ index--;
+ page = extent_buffer_page(eb, index);
+ if (page && mapped) {
+ spin_lock(&page->mapping->private_lock);
+ /*
+ * We do this since we'll remove the pages after we've
+ * removed the eb from the radix tree, so we could race
+ * and have this page now attached to the new eb. So
+ * only clear page_private if it's still connected to
+ * this eb.
+ */
+ if (PagePrivate(page) &&
+ page->private == (unsigned long)eb) {
+ BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
+ /*
+ * We need to make sure we haven't be attached
+ * to a new eb.
+ */
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ /* One for the page private */
+ page_cache_release(page);
+ }
+ spin_unlock(&page->mapping->private_lock);
+
+ }
+ if (page) {
+ /* One for when we alloced the page */
+ page_cache_release(page);
+ }
+ } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+ btrfs_release_extent_buffer_page(eb, 0);
+ __free_extent_buffer(eb);
+}
+
static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
u64 start,
unsigned long len,
@@ -4184,13 +4386,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
struct extent_buffer *new;
unsigned long num_pages = num_extent_pages(src->start, src->len);
- new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+ new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
if (new == NULL)
return NULL;
for (i = 0; i < num_pages; i++) {
- p = alloc_page(GFP_ATOMIC);
- BUG_ON(!p);
+ p = alloc_page(GFP_NOFS);
+ if (!p) {
+ btrfs_release_extent_buffer(new);
+ return NULL;
+ }
attach_extent_buffer_page(new, p);
WARN_ON(PageDirty(p));
SetPageUptodate(p);
@@ -4210,12 +4415,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
unsigned long num_pages = num_extent_pages(0, len);
unsigned long i;
- eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+ eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
if (!eb)
return NULL;
for (i = 0; i < num_pages; i++) {
- eb->pages[i] = alloc_page(GFP_ATOMIC);
+ eb->pages[i] = alloc_page(GFP_NOFS);
if (!eb->pages[i])
goto err;
}
@@ -4231,76 +4436,6 @@ err:
return NULL;
}
-static int extent_buffer_under_io(struct extent_buffer *eb)
-{
- return (atomic_read(&eb->io_pages) ||
- test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
- test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
-/*
- * Helper for releasing extent buffer page.
- */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
- unsigned long start_idx)
-{
- unsigned long index;
- unsigned long num_pages;
- struct page *page;
- int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
-
- BUG_ON(extent_buffer_under_io(eb));
-
- num_pages = num_extent_pages(eb->start, eb->len);
- index = start_idx + num_pages;
- if (start_idx >= index)
- return;
-
- do {
- index--;
- page = extent_buffer_page(eb, index);
- if (page && mapped) {
- spin_lock(&page->mapping->private_lock);
- /*
- * We do this since we'll remove the pages after we've
- * removed the eb from the radix tree, so we could race
- * and have this page now attached to the new eb. So
- * only clear page_private if it's still connected to
- * this eb.
- */
- if (PagePrivate(page) &&
- page->private == (unsigned long)eb) {
- BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
- BUG_ON(PageDirty(page));
- BUG_ON(PageWriteback(page));
- /*
- * We need to make sure we haven't be attached
- * to a new eb.
- */
- ClearPagePrivate(page);
- set_page_private(page, 0);
- /* One for the page private */
- page_cache_release(page);
- }
- spin_unlock(&page->mapping->private_lock);
-
- }
- if (page) {
- /* One for when we alloced the page */
- page_cache_release(page);
- }
- } while (index != start_idx);
-}
-
-/*
- * Helper for releasing the extent buffer.
- */
-static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
-{
- btrfs_release_extent_buffer_page(eb, 0);
- __free_extent_buffer(eb);
-}
-
static void check_buffer_tree_ref(struct extent_buffer *eb)
{
int refs;
@@ -4347,6 +4482,23 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
}
}
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 start)
+{
+ struct extent_buffer *eb;
+
+ rcu_read_lock();
+ eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+ if (eb && atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ mark_extent_buffer_accessed(eb);
+ return eb;
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
u64 start, unsigned long len)
{
@@ -4360,14 +4512,10 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
int uptodate = 1;
int ret;
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
+
+ eb = find_extent_buffer(tree, start);
+ if (eb)
return eb;
- }
- rcu_read_unlock();
eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
if (!eb)
@@ -4426,24 +4574,17 @@ again:
spin_lock(&tree->buffer_lock);
ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+ spin_unlock(&tree->buffer_lock);
+ radix_tree_preload_end();
if (ret == -EEXIST) {
- exists = radix_tree_lookup(&tree->buffer,
- start >> PAGE_CACHE_SHIFT);
- if (!atomic_inc_not_zero(&exists->refs)) {
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- exists = NULL;
+ exists = find_extent_buffer(tree, start);
+ if (exists)
+ goto free_eb;
+ else
goto again;
- }
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
- mark_extent_buffer_accessed(exists);
- goto free_eb;
}
/* add one reference for the tree */
check_buffer_tree_ref(eb);
- spin_unlock(&tree->buffer_lock);
- radix_tree_preload_end();
/*
* there is a race where release page may have
@@ -4474,23 +4615,6 @@ free_eb:
return exists;
}
-struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len)
-{
- struct extent_buffer *eb;
-
- rcu_read_lock();
- eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- if (eb && atomic_inc_not_zero(&eb->refs)) {
- rcu_read_unlock();
- mark_extent_buffer_accessed(eb);
- return eb;
- }
- rcu_read_unlock();
-
- return NULL;
-}
-
static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
{
struct extent_buffer *eb =
@@ -4771,7 +4895,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4813,8 +4937,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
if (start + min_len > eb->len) {
WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
- "wanted %lu %lu\n", (unsigned long long)eb->start,
- eb->len, start, min_len);
+ "wanted %lu %lu\n",
+ eb->start, eb->len, start, min_len);
return -EINVAL;
}
@@ -4841,7 +4965,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4875,7 +4999,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4905,7 +5029,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(eb, i);
@@ -4936,7 +5060,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
WARN_ON(src->len != dst_len);
offset = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
while (len > 0) {
page = extent_buffer_page(dst, i);
@@ -4954,23 +5078,6 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
}
}
-static void move_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- if (dst_page == src_page) {
- memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
- } else {
- char *src_kaddr = page_address(src_page);
- char *p = dst_kaddr + dst_off + len;
- char *s = src_kaddr + src_off + len;
-
- while (len--)
- *--p = *--s;
- }
-}
-
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
{
unsigned long distance = (src > dst) ? src - dst : dst - src;
@@ -5022,9 +5129,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
while (len > 0) {
dst_off_in_page = (start_offset + dst_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
src_off_in_page = (start_offset + src_offset) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,13 +5182,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
dst_off_in_page = (start_offset + dst_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
src_off_in_page = (start_offset + src_end) &
- ((unsigned long)PAGE_CACHE_SIZE - 1);
+ (PAGE_CACHE_SIZE - 1);
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- move_pages(extent_buffer_page(dst, dst_i),
+ copy_pages(extent_buffer_page(dst, dst_i),
extent_buffer_page(dst, src_i),
dst_off_in_page - cur + 1,
src_off_in_page - cur + 1, cur);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 3b8c4e2..19620c5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,6 +29,7 @@
*/
#define EXTENT_BIO_COMPRESSED 1
#define EXTENT_BIO_TREE_LOG 2
+#define EXTENT_BIO_PARENT_LOCKED 4
#define EXTENT_BIO_FLAG_SHIFT 16
/* these are bit numbers for test/set bit */
@@ -44,14 +45,11 @@
#define EXTENT_BUFFER_DUMMY 9
/* these are flags for extent_clear_unlock_delalloc */
-#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
-#define EXTENT_CLEAR_UNLOCK 0x2
-#define EXTENT_CLEAR_DELALLOC 0x4
-#define EXTENT_CLEAR_DIRTY 0x8
-#define EXTENT_SET_WRITEBACK 0x10
-#define EXTENT_END_WRITEBACK 0x20
-#define EXTENT_SET_PRIVATE2 0x40
-#define EXTENT_CLEAR_ACCOUNTING 0x80
+#define PAGE_UNLOCK (1 << 0)
+#define PAGE_CLEAR_DIRTY (1 << 1)
+#define PAGE_SET_WRITEBACK (1 << 2)
+#define PAGE_END_WRITEBACK (1 << 3)
+#define PAGE_SET_PRIVATE2 (1 << 4)
/*
* page->private values. Every page that is controlled by the extent
@@ -62,6 +60,7 @@
struct extent_state;
struct btrfs_root;
+struct btrfs_io_bio;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
@@ -77,8 +76,9 @@ struct extent_io_ops {
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
- int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror);
+ int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
+ struct page *page, u64 start, u64 end,
+ int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
+int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
+ get_extent_t *get_extent, int mirror_num);
int __init extent_io_init(void);
void extent_io_exit(void);
@@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree,
get_extent_t get_extent);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
-void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
- int count);
-void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
- int bvec_index, u32 csums[], int count);
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
@@ -274,7 +271,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len);
struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
- u64 start, unsigned long len);
+ u64 start);
void free_extent_buffer(struct extent_buffer *eb);
void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_NONE 0
@@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long *map_len);
int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode,
- struct extent_io_tree *tree,
- u64 start, u64 end, struct page *locked_page,
- unsigned long op);
+int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+ struct page *locked_page,
+ unsigned long bits_to_clear,
+ unsigned long page_ops);
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
@@ -348,4 +345,10 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+noinline u64 find_lock_delalloc_range(struct inode *inode,
+ struct extent_io_tree *tree,
+ struct page *locked_page, u64 *start,
+ u64 *end, u64 max_bytes);
+#endif
#endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 61adc44..93fba71 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -3,10 +3,10 @@
#include <linux/rbtree.h>
-#define EXTENT_MAP_LAST_BYTE (u64)-4
-#define EXTENT_MAP_HOLE (u64)-3
-#define EXTENT_MAP_INLINE (u64)-2
-#define EXTENT_MAP_DELALLOC (u64)-1
+#define EXTENT_MAP_LAST_BYTE ((u64)-4)
+#define EXTENT_MAP_HOLE ((u64)-3)
+#define EXTENT_MAP_INLINE ((u64)-2)
+#define EXTENT_MAP_DELALLOC ((u64)-1)
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a7bfc95..6f38488 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -23,6 +23,7 @@
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
+#include "volumes.h"
#include "print-tree.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
@@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}
+static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
+{
+ kfree(bio->csum_allocated);
+}
+
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
- u32 sum[16];
- int len;
struct bio_vec *bvec = bio->bi_io_vec;
- int bio_index = 0;
+ struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
+ struct btrfs_csum_item *item = NULL;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_path *path;
+ u8 *csum;
u64 offset = 0;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
u32 diff;
- u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ int nblocks;
+ int bio_index = 0;
int count;
- struct btrfs_path *path;
- struct btrfs_csum_item *item = NULL;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+
+ nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
+ if (!dst) {
+ if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
+ btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
+ GFP_NOFS);
+ if (!btrfs_bio->csum_allocated) {
+ btrfs_free_path(path);
+ return -ENOMEM;
+ }
+ btrfs_bio->csum = btrfs_bio->csum_allocated;
+ btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
+ } else {
+ btrfs_bio->csum = btrfs_bio->csum_inline;
+ }
+ csum = btrfs_bio->csum;
+ } else {
+ csum = (u8 *)dst;
+ }
+
if (bio->bi_size > PAGE_CACHE_SIZE * 8)
path->reada = 2;
@@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
- len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
- len);
+ count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
+ (u32 *)csum, nblocks);
if (count)
goto found;
@@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
path, disk_bytenr, 0);
if (IS_ERR(item)) {
count = 1;
- sum[0] = 0;
+ memset(csum, 0, csum_size);
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
@@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
} else {
printk(KERN_INFO "btrfs no csum found "
"for inode %llu start %llu\n",
- (unsigned long long)
- btrfs_ino(inode),
- (unsigned long long)offset);
+ btrfs_ino(inode), offset);
}
item = NULL;
btrfs_release_path(path);
@@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
diff = disk_bytenr - item_start_offset;
diff = diff / root->sectorsize;
diff = diff * csum_size;
- count = min_t(int, len, (item_last_offset - disk_bytenr) >>
- inode->i_sb->s_blocksize_bits);
- read_extent_buffer(path->nodes[0], sum,
+ count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
+ inode->i_sb->s_blocksize_bits);
+ read_extent_buffer(path->nodes[0], csum,
((unsigned long)item) + diff,
csum_size * count);
found:
- if (dst) {
- memcpy(dst, sum, count * csum_size);
- dst += count;
- } else {
- if (dio)
- extent_cache_csums_dio(io_tree, offset, sum,
- count);
- else
- extent_cache_csums(io_tree, bio, bio_index, sum,
- count);
- }
+ csum += count * csum_size;
+ nblocks -= count;
while (count--) {
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
@@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
}
int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio, u64 offset)
+ struct btrfs_dio_private *dip, struct bio *bio,
+ u64 offset)
{
- return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
+ int len = (bio->bi_sector << 9) - dip->disk_bytenr;
+ u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ int ret;
+
+ len >>= inode->i_sb->s_blocksize_bits;
+ len *= csum_size;
+
+ ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
+ (u32 *)(dip->csum + len), 1);
+ return ret;
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
@@ -304,6 +329,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ ASSERT(start == ALIGN(start, root->sectorsize) &&
+ (end + 1) == ALIGN(end + 1, root->sectorsize));
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -821,10 +849,8 @@ insert:
path->leave_spinning = 0;
if (ret < 0)
goto fail_unlock;
- if (ret != 0) {
- WARN_ON(1);
+ if (WARN_ON(ret != 0))
goto fail_unlock;
- }
leaf = path->nodes[0];
csum:
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8e686a4..82d0342 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -39,7 +39,6 @@
#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
-#include "compat.h"
#include "volumes.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
@@ -370,7 +369,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
u64 root_objectid = 0;
atomic_inc(&fs_info->defrag_running);
- while(1) {
+ while (1) {
/* Pause the auto defragger. */
if (test_bit(BTRFS_FS_STATE_REMOUNTING,
&fs_info->fs_state))
@@ -1281,6 +1280,7 @@ again:
}
wait_on_page_writeback(pages[i]);
}
+ faili = num_pages - 1;
err = 0;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1299,8 +1299,10 @@ again:
unlock_page(pages[i]);
page_cache_release(pages[i]);
}
- btrfs_wait_ordered_range(inode, start_pos,
- last_pos - start_pos);
+ err = btrfs_wait_ordered_range(inode, start_pos,
+ last_pos - start_pos);
+ if (err)
+ goto fail;
goto again;
}
if (ordered)
@@ -1334,7 +1336,6 @@ fail:
static noinline int check_can_nocow(struct inode *inode, loff_t pos,
size_t *write_bytes)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered;
u64 lockstart, lockend;
@@ -1356,16 +1357,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
btrfs_put_ordered_extent(ordered);
}
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
- return PTR_ERR(trans);
- }
-
num_bytes = lockend - lockstart + 1;
- ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
- NULL);
- btrfs_end_transaction(trans, root);
+ ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
if (ret <= 0) {
ret = 0;
} else {
@@ -1727,7 +1720,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
*/
BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
- if (num_written > 0 || num_written == -EIOCBQUEUED) {
+ if (num_written > 0) {
err = generic_write_sync(file, pos, num_written);
if (err < 0 && num_written > 0)
num_written = err;
@@ -1818,8 +1811,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- if (full_sync)
- btrfs_wait_ordered_range(inode, start, end - start + 1);
+ if (full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+ }
atomic_inc(&root->log_batch);
/*
@@ -1868,8 +1866,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_log_dentry_safe(trans, root, dentry);
if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- goto out;
+ /* Fallthrough and commit/free transaction. */
+ ret = 1;
}
/* we've logged all the items and now have a consistent
@@ -1885,27 +1883,20 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
- if (ret > 0) {
- /*
- * If we didn't already wait for ordered extents we need
- * to do that now.
- */
- if (!full_sync)
- btrfs_wait_ordered_range(inode, start,
- end - start + 1);
- ret = btrfs_commit_transaction(trans, root);
- } else {
+ if (!ret) {
ret = btrfs_sync_log(trans, root);
- if (ret == 0) {
+ if (!ret) {
ret = btrfs_end_transaction(trans, root);
- } else {
- if (!full_sync)
- btrfs_wait_ordered_range(inode, start,
- end -
- start + 1);
- ret = btrfs_commit_transaction(trans, root);
+ goto out;
}
}
+ if (!full_sync) {
+ ret = btrfs_wait_ordered_range(inode, start,
+ end - start + 1);
+ if (ret)
+ goto out;
+ }
+ ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_end_transaction(trans, root);
}
@@ -2076,7 +2067,9 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
- btrfs_wait_ordered_range(inode, offset, len);
+ ret = btrfs_wait_ordered_range(inode, offset, len);
+ if (ret)
+ return ret;
mutex_lock(&inode->i_mutex);
/*
@@ -2145,8 +2138,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
- btrfs_wait_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
+ ret = btrfs_wait_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
}
path = btrfs_alloc_path();
@@ -2317,7 +2314,10 @@ static long btrfs_fallocate(struct file *file, int mode,
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
- btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+ ret = btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
locked_end = alloc_end - 1;
while (1) {
@@ -2341,8 +2341,10 @@ static long btrfs_fallocate(struct file *file, int mode,
* we can't wait on the range with the transaction
* running or with the extent lock held
*/
- btrfs_wait_ordered_range(inode, alloc_start,
- alloc_end - alloc_start);
+ ret = btrfs_wait_ordered_range(inode, alloc_start,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
} else {
if (ordered)
btrfs_put_ordered_extent(ordered);
@@ -2414,14 +2416,12 @@ out_reserve_fail:
static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct extent_map *em;
+ struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
u64 lockstart = *offset;
u64 lockend = i_size_read(inode);
u64 start = *offset;
- u64 orig_start = *offset;
u64 len = i_size_read(inode);
- u64 last_end = 0;
int ret = 0;
lockend = max_t(u64, root->sectorsize, lockend);
@@ -2438,89 +2438,35 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
&cached_state);
- /*
- * Delalloc is such a pain. If we have a hole and we have pending
- * delalloc for a portion of the hole we will get back a hole that
- * exists for the entire range since it hasn't been actually written
- * yet. So to take care of this case we need to look for an extent just
- * before the position we want in case there is outstanding delalloc
- * going on here.
- */
- if (whence == SEEK_HOLE && start != 0) {
- if (start <= root->sectorsize)
- em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
- root->sectorsize, 0);
- else
- em = btrfs_get_extent_fiemap(inode, NULL, 0,
- start - root->sectorsize,
- root->sectorsize, 0);
- if (IS_ERR(em)) {
- ret = PTR_ERR(em);
- goto out;
- }
- last_end = em->start + em->len;
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
- free_extent_map(em);
- }
-
- while (1) {
+ while (start < inode->i_size) {
em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
+ em = NULL;
break;
}
- if (em->block_start == EXTENT_MAP_HOLE) {
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- if (last_end <= orig_start) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- if (whence == SEEK_HOLE) {
- *offset = start;
- free_extent_map(em);
- break;
- }
- } else {
- if (whence == SEEK_DATA) {
- if (em->block_start == EXTENT_MAP_DELALLOC) {
- if (start >= inode->i_size) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
- }
-
- if (!test_bit(EXTENT_FLAG_PREALLOC,
- &em->flags)) {
- *offset = start;
- free_extent_map(em);
- break;
- }
- }
- }
+ if (whence == SEEK_HOLE &&
+ (em->block_start == EXTENT_MAP_HOLE ||
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
+ break;
+ else if (whence == SEEK_DATA &&
+ (em->block_start != EXTENT_MAP_HOLE &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)))
+ break;
start = em->start + em->len;
- last_end = em->start + em->len;
-
- if (em->block_start == EXTENT_MAP_DELALLOC)
- last_end = min_t(u64, last_end, inode->i_size);
-
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- free_extent_map(em);
- ret = -ENXIO;
- break;
- }
free_extent_map(em);
+ em = NULL;
cond_resched();
}
- if (!ret)
- *offset = min(*offset, inode->i_size);
-out:
+ free_extent_map(em);
+ if (!ret) {
+ if (whence == SEEK_DATA && start >= inode->i_size)
+ ret = -ENXIO;
+ else
+ *offset = min_t(loff_t, start, inode->i_size);
+ }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index b21a3cd..057be95 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -218,15 +218,12 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
struct inode *inode)
{
- loff_t oldsize;
int ret = 0;
- oldsize = i_size_read(inode);
btrfs_i_size_write(inode, 0);
- truncate_pagecache(inode, oldsize, 0);
+ truncate_pagecache(inode, 0);
/*
* We don't need an orphan item because truncating the free space cache
@@ -308,7 +305,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
{
- BUG_ON(io_ctl->index >= io_ctl->num_pages);
+ ASSERT(io_ctl->index < io_ctl->num_pages);
io_ctl->page = io_ctl->pages[io_ctl->index++];
io_ctl->cur = kmap(io_ctl->page);
io_ctl->orig = io_ctl->cur;
@@ -673,8 +670,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
btrfs_err(root->fs_info,
"free space inode generation (%llu) "
"did not match free space cache generation (%llu)",
- (unsigned long long)BTRFS_I(inode)->generation,
- (unsigned long long)generation);
+ BTRFS_I(inode)->generation, generation);
return 0;
}
@@ -729,7 +725,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
goto free_cache;
}
} else {
- BUG_ON(!num_bitmaps);
+ ASSERT(num_bitmaps);
num_bitmaps--;
e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
if (!e->bitmap) {
@@ -1012,8 +1008,13 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
goto out;
-
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (ret) {
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
+ GFP_NOFS);
+ goto out;
+ }
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
key.offset = offset;
@@ -1029,7 +1030,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
leaf = path->nodes[0];
if (ret > 0) {
struct btrfs_key found_key;
- BUG_ON(!path->slots[0]);
+ ASSERT(path->slots[0]);
path->slots[0]--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
@@ -1117,7 +1118,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
u64 offset)
{
- BUG_ON(offset < bitmap_start);
+ ASSERT(offset >= bitmap_start);
offset -= bitmap_start;
return (unsigned long)(div_u64(offset, unit));
}
@@ -1272,7 +1273,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
if (n) {
entry = rb_entry(n, struct btrfs_free_space,
offset_index);
- BUG_ON(entry->offset > offset);
+ ASSERT(entry->offset <= offset);
} else {
if (fuzzy)
return entry;
@@ -1336,7 +1337,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
{
int ret = 0;
- BUG_ON(!info->bitmap && !info->bytes);
+ ASSERT(info->bytes || info->bitmap);
ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
&info->offset_index, (info->bitmap != NULL));
if (ret)
@@ -1359,7 +1360,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
max_bitmaps = max(max_bitmaps, 1);
- BUG_ON(ctl->total_bitmaps > max_bitmaps);
+ ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
* The goal is to keep the total amount of memory used per 1gb of space
@@ -1403,7 +1404,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
start = offset_to_bit(info->offset, ctl->unit, offset);
count = bytes_to_bits(bytes, ctl->unit);
- BUG_ON(start + count > BITS_PER_BITMAP);
+ ASSERT(start + count <= BITS_PER_BITMAP);
bitmap_clear(info->bitmap, start, count);
@@ -1426,7 +1427,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
start = offset_to_bit(info->offset, ctl->unit, offset);
count = bytes_to_bits(bytes, ctl->unit);
- BUG_ON(start + count > BITS_PER_BITMAP);
+ ASSERT(start + count <= BITS_PER_BITMAP);
bitmap_set(info->bitmap, start, count);
@@ -1434,13 +1435,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
ctl->free_space += bytes;
}
+/*
+ * If we can not find suitable extent, we will use bytes to record
+ * the size of the max extent.
+ */
static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
{
unsigned long found_bits = 0;
+ unsigned long max_bits = 0;
unsigned long bits, i;
unsigned long next_zero;
+ unsigned long extent_bits;
i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset));
@@ -1449,9 +1456,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i);
- if ((next_zero - i) >= bits) {
- found_bits = next_zero - i;
+ extent_bits = next_zero - i;
+ if (extent_bits >= bits) {
+ found_bits = extent_bits;
break;
+ } else if (extent_bits > max_bits) {
+ max_bits = extent_bits;
}
i = next_zero;
}
@@ -1462,38 +1472,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
return 0;
}
+ *bytes = (u64)(max_bits) * ctl->unit;
return -1;
}
+/* Cache the size of the max extent in bytes */
static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
- unsigned long align)
+ unsigned long align, u64 *max_extent_size)
{
struct btrfs_free_space *entry;
struct rb_node *node;
- u64 ctl_off;
u64 tmp;
u64 align_off;
int ret;
if (!ctl->free_space_offset.rb_node)
- return NULL;
+ goto out;
entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
if (!entry)
- return NULL;
+ goto out;
for (node = &entry->offset_index; node; node = rb_next(node)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- if (entry->bytes < *bytes)
+ if (entry->bytes < *bytes) {
+ if (entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
continue;
+ }
/* make sure the space returned is big enough
* to match our requested alignment
*/
if (*bytes >= align) {
- ctl_off = entry->offset - ctl->start;
- tmp = ctl_off + align - 1;;
+ tmp = entry->offset - ctl->start + align - 1;
do_div(tmp, align);
tmp = tmp * align + ctl->start;
align_off = tmp - entry->offset;
@@ -1502,14 +1515,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
tmp = entry->offset;
}
- if (entry->bytes < *bytes + align_off)
+ if (entry->bytes < *bytes + align_off) {
+ if (entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
continue;
+ }
if (entry->bitmap) {
- ret = search_bitmap(ctl, entry, &tmp, bytes);
+ u64 size = *bytes;
+
+ ret = search_bitmap(ctl, entry, &tmp, &size);
if (!ret) {
*offset = tmp;
+ *bytes = size;
return entry;
+ } else if (size > *max_extent_size) {
+ *max_extent_size = size;
}
continue;
}
@@ -1518,7 +1539,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
*bytes = entry->bytes - align_off;
return entry;
}
-
+out:
return NULL;
}
@@ -1742,7 +1763,7 @@ no_cluster_bitmap:
bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1, 0);
if (!bitmap_info) {
- BUG_ON(added);
+ ASSERT(added == 0);
goto new_bitmap;
}
@@ -1882,7 +1903,7 @@ out:
if (ret) {
printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
- BUG_ON(ret == -EEXIST);
+ ASSERT(ret != -EEXIST);
}
return ret;
@@ -1991,8 +2012,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
if (info->bytes >= bytes && !block_group->ro)
count++;
printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
- (unsigned long long)info->offset,
- (unsigned long long)info->bytes,
+ info->offset, info->bytes,
(info->bitmap) ? "yes" : "no");
}
printk(KERN_INFO "block group has cluster?: %s\n",
@@ -2120,7 +2140,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
}
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size)
+ u64 offset, u64 bytes, u64 empty_size,
+ u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@@ -2131,7 +2152,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
spin_lock(&ctl->tree_lock);
entry = find_free_space(ctl, &offset, &bytes_search,
- block_group->full_stripe_len);
+ block_group->full_stripe_len, max_extent_size);
if (!entry)
goto out;
@@ -2141,7 +2162,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
if (!entry->bytes)
free_bitmap(ctl, entry);
} else {
-
unlink_free_space(ctl, entry);
align_gap_len = offset - entry->offset;
align_gap = entry->offset;
@@ -2155,7 +2175,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
else
link_free_space(ctl, entry);
}
-
out:
spin_unlock(&ctl->tree_lock);
@@ -2210,7 +2229,8 @@ int btrfs_return_cluster_to_free_space(
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
struct btrfs_free_space *entry,
- u64 bytes, u64 min_start)
+ u64 bytes, u64 min_start,
+ u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
int err;
@@ -2222,8 +2242,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
search_bytes = bytes;
err = search_bitmap(ctl, entry, &search_start, &search_bytes);
- if (err)
+ if (err) {
+ if (search_bytes > *max_extent_size)
+ *max_extent_size = search_bytes;
return 0;
+ }
ret = search_start;
__bitmap_clear_bits(ctl, entry, ret, bytes);
@@ -2238,7 +2261,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
*/
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start)
+ u64 min_start, u64 *max_extent_size)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry = NULL;
@@ -2257,7 +2280,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
goto out;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
- while(1) {
+ while (1) {
+ if (entry->bytes < bytes && entry->bytes > *max_extent_size)
+ *max_extent_size = entry->bytes;
+
if (entry->bytes < bytes ||
(!entry->bitmap && entry->offset < min_start)) {
node = rb_next(&entry->offset_index);
@@ -2271,7 +2297,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
if (entry->bitmap) {
ret = btrfs_alloc_from_bitmap(block_group,
cluster, entry, bytes,
- cluster->window_start);
+ cluster->window_start,
+ max_extent_size);
if (ret == 0) {
node = rb_next(&entry->offset_index);
if (!node)
@@ -2371,7 +2398,7 @@ again:
rb_erase(&entry->offset_index, &ctl->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 1);
- BUG_ON(ret); /* -EEXIST; Logic error */
+ ASSERT(!ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
total_found * ctl->unit, 1);
@@ -2464,7 +2491,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
total_size += entry->bytes;
- BUG_ON(ret); /* -EEXIST; Logic error */
+ ASSERT(!ret); /* -EEXIST; Logic error */
} while (node && entry != last);
cluster->max_size = max_extent;
@@ -2525,8 +2552,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* returns zero and sets up cluster if things worked out, otherwise
* it returns -enospc
*/
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_find_space_cluster(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
@@ -2856,7 +2882,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
ret = search_bitmap(ctl, entry, &offset, &count);
/* Logic error; Should be empty if it can't find anything */
- BUG_ON(ret);
+ ASSERT(!ret);
ino = offset;
bitmap_clear_bits(ctl, entry, offset, 1);
@@ -2945,19 +2971,15 @@ out:
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path)
+ struct btrfs_path *path,
+ struct inode *inode)
{
struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
- struct inode *inode;
int ret;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
return 0;
- inode = lookup_free_ino_inode(root, path);
- if (IS_ERR(inode))
- return 0;
-
ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
@@ -2968,38 +2990,72 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
#endif
}
- iput(inode);
return ret;
}
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-static struct btrfs_block_group_cache *init_test_block_group(void)
+/*
+ * Use this if you need to make a bitmap or extent entry specifically, it
+ * doesn't do any of the merging that add_free_space does, this acts a lot like
+ * how the free space cache loading stuff works, so you can get really weird
+ * configurations.
+ */
+int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes, bool bitmap)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+ struct btrfs_free_space *info = NULL, *bitmap_info;
+ void *map = NULL;
+ u64 bytes_added;
+ int ret;
- cache = kzalloc(sizeof(*cache), GFP_NOFS);
- if (!cache)
- return NULL;
- cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
- GFP_NOFS);
- if (!cache->free_space_ctl) {
- kfree(cache);
- return NULL;
+again:
+ if (!info) {
+ info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
}
- cache->key.objectid = 0;
- cache->key.offset = 1024 * 1024 * 1024;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
- cache->sectorsize = 4096;
+ if (!bitmap) {
+ spin_lock(&ctl->tree_lock);
+ info->offset = offset;
+ info->bytes = bytes;
+ ret = link_free_space(ctl, info);
+ spin_unlock(&ctl->tree_lock);
+ if (ret)
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ return ret;
+ }
- spin_lock_init(&cache->lock);
- INIT_LIST_HEAD(&cache->list);
- INIT_LIST_HEAD(&cache->cluster_list);
- INIT_LIST_HEAD(&cache->new_bg_list);
+ if (!map) {
+ map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ if (!map) {
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ return -ENOMEM;
+ }
+ }
- btrfs_init_free_space_ctl(cache);
+ spin_lock(&ctl->tree_lock);
+ bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+ 1, 0);
+ if (!bitmap_info) {
+ info->bitmap = map;
+ map = NULL;
+ add_new_bitmap(ctl, info, offset);
+ bitmap_info = info;
+ }
- return cache;
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ spin_unlock(&ctl->tree_lock);
+
+ if (bytes)
+ goto again;
+
+ if (map)
+ kfree(map);
+ return 0;
}
/*
@@ -3007,8 +3063,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
* just used to check the absence of space, so if there is free space in the
* range at all we will return 1.
*/
-static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
- u64 bytes)
+int test_check_exists(struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes)
{
struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
struct btrfs_free_space *info;
@@ -3085,411 +3141,4 @@ out:
spin_unlock(&ctl->tree_lock);
return ret;
}
-
-/*
- * Use this if you need to make a bitmap or extent entry specifically, it
- * doesn't do any of the merging that add_free_space does, this acts a lot like
- * how the free space cache loading stuff works, so you can get really weird
- * configurations.
- */
-static int add_free_space_entry(struct btrfs_block_group_cache *cache,
- u64 offset, u64 bytes, bool bitmap)
-{
- struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
- struct btrfs_free_space *info = NULL, *bitmap_info;
- void *map = NULL;
- u64 bytes_added;
- int ret;
-
-again:
- if (!info) {
- info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
- if (!info)
- return -ENOMEM;
- }
-
- if (!bitmap) {
- spin_lock(&ctl->tree_lock);
- info->offset = offset;
- info->bytes = bytes;
- ret = link_free_space(ctl, info);
- spin_unlock(&ctl->tree_lock);
- if (ret)
- kmem_cache_free(btrfs_free_space_cachep, info);
- return ret;
- }
-
- if (!map) {
- map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
- if (!map) {
- kmem_cache_free(btrfs_free_space_cachep, info);
- return -ENOMEM;
- }
- }
-
- spin_lock(&ctl->tree_lock);
- bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
- 1, 0);
- if (!bitmap_info) {
- info->bitmap = map;
- map = NULL;
- add_new_bitmap(ctl, info, offset);
- bitmap_info = info;
- }
-
- bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
- bytes -= bytes_added;
- offset += bytes_added;
- spin_unlock(&ctl->tree_lock);
-
- if (bytes)
- goto again;
-
- if (map)
- kfree(map);
- return 0;
-}
-
-#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__)
-
-/*
- * This test just does basic sanity checking, making sure we can add an exten
- * entry and remove space from either end and the middle, and make sure we can
- * remove space that covers adjacent extent entries.
- */
-static int test_extents(struct btrfs_block_group_cache *cache)
-{
- int ret = 0;
-
- test_msg("Running extent only tests\n");
-
- /* First just make sure we can remove an entire entry */
- ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
- if (ret) {
- test_msg("Error adding initial extents %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
- if (ret) {
- test_msg("Error removing extent %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 0, 4 * 1024 * 1024)) {
- test_msg("Full remove left some lingering space\n");
- return -1;
- }
-
- /* Ok edge and middle cases now */
- ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
- if (ret) {
- test_msg("Error adding half extent %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
- if (ret) {
- test_msg("Error removing tail end %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
- if (ret) {
- test_msg("Error removing front end %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
- if (ret) {
- test_msg("Error removing middle piece %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 0, 1 * 1024 * 1024)) {
- test_msg("Still have space at the front\n");
- return -1;
- }
-
- if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
- test_msg("Still have space in the middle\n");
- return -1;
- }
-
- if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
- test_msg("Still have space at the end\n");
- return -1;
- }
-
- /* Cleanup */
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
- return 0;
-}
-
-static int test_bitmaps(struct btrfs_block_group_cache *cache)
-{
- u64 next_bitmap_offset;
- int ret;
-
- test_msg("Running bitmap only tests\n");
-
- ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't create a bitmap entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
- if (ret) {
- test_msg("Error removing bitmap full range %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 0, 4 * 1024 * 1024)) {
- test_msg("Left some space in bitmap\n");
- return -1;
- }
-
- ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add to our bitmap entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
- if (ret) {
- test_msg("Couldn't remove middle chunk %d\n", ret);
- return ret;
- }
-
- /*
- * The first bitmap we have starts at offset 0 so the next one is just
- * at the end of the first bitmap.
- */
- next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
-
- /* Test a bit straddling two bitmaps */
- ret = add_free_space_entry(cache, next_bitmap_offset -
- (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add space that straddles two bitmaps %d\n",
- ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, next_bitmap_offset -
- (1 * 1024 * 1024), 2 * 1024 * 1024);
- if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
- 2 * 1024 * 1024)) {
- test_msg("Left some space when removing overlapping\n");
- return -1;
- }
-
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
- return 0;
-}
-
-/* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
-{
- u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
- int ret;
-
- test_msg("Running bitmap and extent tests\n");
-
- /*
- * First let's do something simple, an extent at the same offset as the
- * bitmap, but the free space completely in the extent and then
- * completely in the bitmap.
- */
- ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't create bitmap entry %d\n", ret);
- return ret;
- }
-
- ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
- if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
- if (ret) {
- test_msg("Couldn't remove extent entry %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 0, 1 * 1024 * 1024)) {
- test_msg("Left remnants after our remove\n");
- return -1;
- }
-
- /* Now to add back the extent entry and remove from the bitmap */
- ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
- if (ret) {
- test_msg("Couldn't re-add extent entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
- if (ret) {
- test_msg("Couldn't remove from bitmap %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
- test_msg("Left remnants in the bitmap\n");
- return -1;
- }
-
- /*
- * Ok so a little more evil, extent entry and bitmap at the same offset,
- * removing an overlapping chunk.
- */
- ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add to a bitmap %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
- if (ret) {
- test_msg("Couldn't remove overlapping space %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
- test_msg("Left over peices after removing overlapping\n");
- return -1;
- }
-
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
- /* Now with the extent entry offset into the bitmap */
- ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add space to the bitmap %d\n", ret);
- return ret;
- }
-
- ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
- if (ret) {
- test_msg("Couldn't add extent to the cache %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
- if (ret) {
- test_msg("Problem removing overlapping space %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
- test_msg("Left something behind when removing space");
- return -1;
- }
-
- /*
- * This has blown up in the past, the extent entry starts before the
- * bitmap entry, but we're trying to remove an offset that falls
- * completely within the bitmap range and is in both the extent entry
- * and the bitmap entry, looks like this
- *
- * [ extent ]
- * [ bitmap ]
- * [ del ]
- */
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
- ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
- 4 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add bitmap %d\n", ret);
- return ret;
- }
-
- ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
- 5 * 1024 * 1024, 0);
- if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
- 5 * 1024 * 1024);
- if (ret) {
- test_msg("Failed to free our space %d\n", ret);
- return ret;
- }
-
- if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
- 5 * 1024 * 1024)) {
- test_msg("Left stuff over\n");
- return -1;
- }
-
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
- /*
- * This blew up before, we have part of the free space in a bitmap and
- * then the entirety of the rest of the space in an extent. This used
- * to return -EAGAIN back from btrfs_remove_extent, make sure this
- * doesn't happen.
- */
- ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
- if (ret) {
- test_msg("Couldn't add bitmap entry %d\n", ret);
- return ret;
- }
-
- ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
- if (ret) {
- test_msg("Couldn't add extent entry %d\n", ret);
- return ret;
- }
-
- ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
- if (ret) {
- test_msg("Error removing bitmap and extent overlapping %d\n", ret);
- return ret;
- }
-
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
- return 0;
-}
-
-void btrfs_test_free_space_cache(void)
-{
- struct btrfs_block_group_cache *cache;
-
- test_msg("Running btrfs free space cache tests\n");
-
- cache = init_test_block_group();
- if (!cache) {
- test_msg("Couldn't run the tests\n");
- return;
- }
-
- if (test_extents(cache))
- goto out;
- if (test_bitmaps(cache))
- goto out;
- if (test_bitmaps_and_extents(cache))
- goto out;
-out:
- __btrfs_remove_free_space_cache(cache->free_space_ctl);
- kfree(cache->free_space_ctl);
- kfree(cache);
- test_msg("Free space cache tests finished\n");
-}
-#undef test_msg
-#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
-void btrfs_test_free_space_cache(void) {}
-#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
+#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 894116b7..0cf4977 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -58,7 +58,6 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
struct inode *inode);
int load_free_space_cache(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group);
@@ -76,7 +75,8 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_path *path);
+ struct btrfs_path *path,
+ struct inode *inode);
void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
@@ -94,25 +94,31 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
*block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
- u64 offset, u64 bytes, u64 empty_size);
+ u64 offset, u64 bytes, u64 empty_size,
+ u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_find_space_cluster(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
- u64 min_start);
+ u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
-void btrfs_test_free_space_cache(void);
+/* Support functions for runnint our sanity tests */
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes, bool bitmap);
+int test_check_exists(struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes);
+#endif
#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index e0b7034..ec82fae 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -369,7 +369,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
goto out;
leaf = path->nodes[0];
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
ptr = (unsigned long)btrfs_item_ptr(leaf, path->slots[0], char);
ptr += btrfs_item_size(leaf, item) - ins_len;
extref = (struct btrfs_inode_extref *)ptr;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 2c66ddb..ab485e5 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -78,10 +78,8 @@ again:
btrfs_transaction_in_commit(fs_info)) {
leaf = path->nodes[0];
- if (btrfs_header_nritems(leaf) == 0) {
- WARN_ON(1);
+ if (WARN_ON(btrfs_header_nritems(leaf) == 0))
break;
- }
/*
* Save the key so we can advances forward
@@ -237,7 +235,7 @@ again:
start_caching(root);
if (objectid <= root->cache_progress ||
- objectid > root->highest_objectid)
+ objectid >= root->highest_objectid)
__btrfs_add_free_space(ctl, objectid, 1);
else
__btrfs_add_free_space(pinned, objectid, 1);
@@ -412,8 +410,7 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
return 0;
/* Don't save inode cache if we are deleting this root */
- if (btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root)
+ if (btrfs_root_refs(&root->root_item) == 0)
return 0;
if (!btrfs_test_opt(root, INODE_MAP_CACHE))
@@ -467,7 +464,7 @@ again:
}
if (i_size_read(inode) > 0) {
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
if (ret) {
if (ret != -ENOSPC)
btrfs_abort_transaction(trans, root, ret);
@@ -504,7 +501,7 @@ again:
}
btrfs_free_reserved_data_space(inode, prealloc);
- ret = btrfs_write_out_ino_cache(root, trans, path);
+ ret = btrfs_write_out_ino_cache(root, trans, path, inode);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 021694c..da8d2f6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,7 +43,6 @@
#include <linux/btrfs.h>
#include <linux/blkdev.h>
#include <linux/posix_acl_xattr.h>
-#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -230,12 +229,13 @@ fail:
* does the checks required to make sure the data is small enough
* to fit as an inline extent.
*/
-static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode, u64 start, u64 end,
- size_t compressed_size, int compress_type,
- struct page **compressed_pages)
+static noinline int cow_file_range_inline(struct btrfs_root *root,
+ struct inode *inode, u64 start,
+ u64 end, size_t compressed_size,
+ int compress_type,
+ struct page **compressed_pages)
{
+ struct btrfs_trans_handle *trans;
u64 isize = i_size_read(inode);
u64 actual_end = min(end + 1, isize);
u64 inline_len = actual_end - start;
@@ -256,9 +256,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
- if (ret)
- return ret;
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
+ }
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
@@ -267,15 +274,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
btrfs_abort_transaction(trans, root, ret);
- return ret;
+ goto out;
} else if (ret == -ENOSPC) {
- return 1;
+ ret = 1;
+ goto out;
}
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
- return 0;
+out:
+ btrfs_end_transaction(trans, root);
+ return ret;
}
struct async_extent {
@@ -343,7 +353,6 @@ static noinline int compress_file_range(struct inode *inode,
int *num_added)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
u64 num_bytes;
u64 blocksize = root->sectorsize;
u64 actual_end;
@@ -461,45 +470,36 @@ again:
}
cont:
if (start == 0) {
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto cleanup_and_out;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
/* lets try to make an inline extent */
if (ret || total_in < (actual_end - start)) {
/* we didn't compress the entire range, try
* to make an uncompressed inline extent.
*/
- ret = cow_file_range_inline(trans, root, inode,
- start, end, 0, 0, NULL);
+ ret = cow_file_range_inline(root, inode, start, end,
+ 0, 0, NULL);
} else {
/* try making a compressed inline extent */
- ret = cow_file_range_inline(trans, root, inode,
- start, end,
+ ret = cow_file_range_inline(root, inode, start, end,
total_compressed,
compress_type, pages);
}
if (ret <= 0) {
+ unsigned long clear_flags = EXTENT_DELALLOC |
+ EXTENT_DEFRAG;
+ clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
+
/*
* inline extent creation worked or returned error,
* we don't need to create any more async work items.
* Unlock and free up our temp pages.
*/
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
-
- btrfs_end_transaction(trans, root);
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
+ clear_flags, PAGE_UNLOCK |
+ PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK);
goto free_pages_out;
}
- btrfs_end_transaction(trans, root);
}
if (will_compress) {
@@ -590,20 +590,6 @@ free_pages_out:
kfree(pages);
goto out;
-
-cleanup_and_out:
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_DIRTY |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- if (!trans || IS_ERR(trans))
- btrfs_error(root->fs_info, ret, "Failed to join transaction");
- else
- btrfs_abort_transaction(trans, root, ret);
- goto free_pages_out;
}
/*
@@ -617,7 +603,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
{
struct async_extent *async_extent;
u64 alloc_hint = 0;
- struct btrfs_trans_handle *trans;
struct btrfs_key ins;
struct extent_map *em;
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -678,20 +663,10 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1);
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- } else {
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_reserve_extent(trans, root,
+ ret = btrfs_reserve_extent(root,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint, &ins, 1);
- if (ret && ret != -ENOSPC)
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- }
-
if (ret) {
int i;
@@ -770,16 +745,12 @@ retry:
/*
* clear dirty, set writeback and unlock the pages.
*/
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- async_extent->start,
+ extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- NULL, EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
-
+ NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
+ PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK);
ret = btrfs_submit_compressed_write(inode,
async_extent->start,
async_extent->ram_size,
@@ -798,16 +769,13 @@ out:
out_free_reserve:
btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_free:
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- async_extent->start,
+ extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- NULL, EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
+ NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+ PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
kfree(async_extent);
goto again;
}
@@ -857,14 +825,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
* required to start IO on it. It may be clean and already done with
* IO when we return.
*/
-static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
- struct inode *inode,
- struct btrfs_root *root,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
+static noinline int cow_file_range(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written,
+ int unlock)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
@@ -876,7 +843,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(btrfs_is_free_space_inode(inode));
+ if (btrfs_is_free_space_inode(inode)) {
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
@@ -885,29 +855,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
/* if this is a small write inside eof, kick off defrag */
if (num_bytes < 64 * 1024 &&
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
- btrfs_add_inode_defrag(trans, inode);
+ btrfs_add_inode_defrag(NULL, inode);
if (start == 0) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(trans, root, inode,
- start, end, 0, 0, NULL);
+ ret = cow_file_range_inline(root, inode, start, end, 0, 0,
+ NULL);
if (ret == 0) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, NULL,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
+ extent_clear_unlock_delalloc(inode, start, end, NULL,
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DEFRAG, PAGE_UNLOCK |
+ PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK);
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
goto out;
} else if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
goto out_unlock;
}
}
@@ -922,13 +887,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
unsigned long op;
cur_alloc_size = disk_num_bytes;
- ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
+ ret = btrfs_reserve_extent(root, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
&ins, 1);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret < 0)
goto out_unlock;
- }
em = alloc_extent_map();
if (!em) {
@@ -974,10 +937,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret)
goto out_reserve;
- }
}
if (disk_num_bytes < cur_alloc_size)
@@ -990,13 +951,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
* Do set the Private2 bit so we know this page was properly
* setup for writepage
*/
- op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
- op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_PRIVATE2;
+ op = unlock ? PAGE_UNLOCK : 0;
+ op |= PAGE_SET_PRIVATE2;
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- start, start + ram_size - 1,
- locked_page, op);
+ extent_clear_unlock_delalloc(inode, start,
+ start + ram_size - 1, locked_page,
+ EXTENT_LOCKED | EXTENT_DELALLOC,
+ op);
disk_num_bytes -= cur_alloc_size;
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
@@ -1008,52 +969,14 @@ out:
out_reserve:
btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_unlock:
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
-
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+ EXTENT_DELALLOC | EXTENT_DEFRAG,
+ PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
goto out;
}
-static noinline int cow_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
-{
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
- int ret;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- return PTR_ERR(trans);
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
- ret = __cow_file_range(trans, inode, root, locked_page, start, end,
- page_started, nr_written, unlock);
-
- btrfs_end_transaction(trans, root);
-
- return ret;
-}
-
/*
* work queue call back to started compression on a file and pages
*/
@@ -1221,15 +1144,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
path = btrfs_alloc_path();
if (!path) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, PAGE_UNLOCK |
+ PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK);
return -ENOMEM;
}
@@ -1241,15 +1162,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ EXTENT_LOCKED | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, PAGE_UNLOCK |
+ PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK);
btrfs_free_path(path);
return PTR_ERR(trans);
}
@@ -1261,10 +1180,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
while (1) {
ret = btrfs_lookup_file_extent(trans, root, path, ino,
cur_offset, 0);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret < 0)
goto error;
- }
if (ret > 0 && path->slots[0] > 0 && check_prev) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key,
@@ -1278,10 +1195,8 @@ next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret < 0)
goto error;
- }
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -1369,13 +1284,11 @@ out_check:
btrfs_release_path(path);
if (cow_start != (u64)-1) {
- ret = __cow_file_range(trans, inode, root, locked_page,
- cow_start, found_key.offset - 1,
- page_started, nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ ret = cow_file_range(inode, locked_page,
+ cow_start, found_key.offset - 1,
+ page_started, nr_written, 1);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1422,17 +1335,15 @@ out_check:
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret)
goto error;
- }
}
- extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
- cur_offset, cur_offset + num_bytes - 1,
- locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
- EXTENT_SET_PRIVATE2);
+ extent_clear_unlock_delalloc(inode, cur_offset,
+ cur_offset + num_bytes - 1,
+ locked_page, EXTENT_LOCKED |
+ EXTENT_DELALLOC, PAGE_UNLOCK |
+ PAGE_SET_PRIVATE2);
cur_offset = extent_end;
if (cur_offset > end)
break;
@@ -1445,13 +1356,10 @@ out_check:
}
if (cow_start != (u64)-1) {
- ret = __cow_file_range(trans, inode, root, locked_page,
- cow_start, end,
- page_started, nr_written, 1);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ ret = cow_file_range(inode, locked_page, cow_start, end,
+ page_started, nr_written, 1);
+ if (ret)
goto error;
- }
}
error:
@@ -1460,16 +1368,13 @@ error:
ret = err;
if (ret && cur_offset < end)
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- cur_offset, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
-
+ extent_clear_unlock_delalloc(inode, cur_offset, end,
+ locked_page, EXTENT_LOCKED |
+ EXTENT_DELALLOC | EXTENT_DEFRAG |
+ EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
+ PAGE_CLEAR_DIRTY |
+ PAGE_SET_WRITEBACK |
+ PAGE_END_WRITEBACK);
btrfs_free_path(path);
return ret;
}
@@ -1638,7 +1543,13 @@ static void btrfs_clear_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
- if (*bits & EXTENT_DO_ACCOUNTING)
+ /*
+ * We don't reserve metadata space for space cache inodes so we
+ * don't need to call dellalloc_release_metadata if there is an
+ * error.
+ */
+ if (*bits & EXTENT_DO_ACCOUNTING &&
+ root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
@@ -2128,10 +2039,9 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
key.offset = offset;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0) {
- WARN_ON(1);
+ if (WARN_ON(ret < 0))
return ret;
- }
+ ret = 0;
while (1) {
cond_resched();
@@ -2181,8 +2091,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
old->len || extent_offset + num_bytes <=
old->extent_offset + old->offset)
continue;
-
- ret = 0;
break;
}
@@ -2238,16 +2146,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
static int relink_is_mergable(struct extent_buffer *leaf,
struct btrfs_file_extent_item *fi,
- u64 disk_bytenr)
+ struct new_sa_defrag_extent *new)
{
- if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
return 0;
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
return 0;
- if (btrfs_file_extent_compression(leaf, fi) ||
- btrfs_file_extent_encryption(leaf, fi) ||
+ if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
+ return 0;
+
+ if (btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
return 0;
@@ -2391,8 +2301,8 @@ again:
struct btrfs_file_extent_item);
extent_len = btrfs_file_extent_num_bytes(leaf, fi);
- if (relink_is_mergable(leaf, fi, new->bytenr) &&
- extent_len + found_key.offset == start) {
+ if (extent_len + found_key.offset == start &&
+ relink_is_mergable(leaf, fi, new)) {
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_len + len);
btrfs_mark_buffer_dirty(leaf);
@@ -2453,10 +2363,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2499,16 +2422,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2518,7 +2436,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2566,7 +2484,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2595,7 +2513,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2617,15 +2535,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2648,8 +2561,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
struct extent_state *cached_state = NULL;
struct new_sa_defrag_extent *new = NULL;
int compress_type = 0;
- int ret;
+ int ret = 0;
+ u64 logical_len = ordered_extent->len;
bool nolock;
+ bool truncated = false;
nolock = btrfs_is_free_space_inode(inode);
@@ -2658,6 +2573,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+ truncated = true;
+ logical_len = ordered_extent->truncated_len;
+ /* Truncated the entire extent, don't bother adding */
+ if (!logical_len)
+ goto out;
+ }
+
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -2713,15 +2636,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
- ordered_extent->len);
+ logical_len);
} else {
BUG_ON(root == root->fs_info->tree_root);
ret = insert_reserved_file_extent(trans, inode,
ordered_extent->file_offset,
ordered_extent->start,
ordered_extent->disk_len,
- ordered_extent->len,
- ordered_extent->len,
+ logical_len, logical_len,
compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
}
@@ -2753,17 +2675,27 @@ out:
if (trans)
btrfs_end_transaction(trans, root);
- if (ret) {
- clear_extent_uptodate(io_tree, ordered_extent->file_offset,
- ordered_extent->file_offset +
- ordered_extent->len - 1, NULL, GFP_NOFS);
+ if (ret || truncated) {
+ u64 start, end;
+
+ if (truncated)
+ start = ordered_extent->file_offset + logical_len;
+ else
+ start = ordered_extent->file_offset;
+ end = ordered_extent->file_offset + ordered_extent->len - 1;
+ clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
+
+ /* Drop the cache for the part of the extent we didn't write. */
+ btrfs_drop_extent_cache(inode, start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
* wrong we need to return the space for this ordered extent
- * back to the allocator.
+ * back to the allocator. We only free the extent in the
+ * truncated case if we didn't write out the extent at all.
*/
- if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+ if ((ret || !logical_len) &&
+ !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
btrfs_free_reserved_extent(root, ordered_extent->start,
ordered_extent->disk_len);
@@ -2777,8 +2709,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -2827,16 +2765,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
* if there's a match, we allow the bio to finish. If not, the code in
* extent_io.c will try to find good copies for us.
*/
-static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
- struct extent_state *state, int mirror)
+static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+ u64 phy_offset, struct page *page,
+ u64 start, u64 end, int mirror)
{
size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
char *kaddr;
- u64 private = ~(u32)0;
- int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ u32 csum_expected;
u32 csum = ~(u32)0;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -2856,19 +2794,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
return 0;
}
- if (state && state->start == start) {
- private = state->private;
- ret = 0;
- } else {
- ret = get_state_private(io_tree, start, &private);
- }
- kaddr = kmap_atomic(page);
- if (ret)
- goto zeroit;
+ phy_offset >>= inode->i_sb->s_blocksize_bits;
+ csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
+ kaddr = kmap_atomic(page);
csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
btrfs_csum_final(csum, (char *)&csum);
- if (csum != private)
+ if (csum != csum_expected)
goto zeroit;
kunmap_atomic(kaddr);
@@ -2877,14 +2809,12 @@ good:
zeroit:
if (__ratelimit(&_rs))
- btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
- (unsigned long long)btrfs_ino(page->mapping->host),
- (unsigned long long)start, csum,
- (unsigned long long)private);
+ btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+ btrfs_ino(page->mapping->host), start, csum, csum_expected);
memset(kaddr + offset, 1, end - start + 1);
flush_dcache_page(page);
kunmap_atomic(kaddr);
- if (private == 0)
+ if (csum_expected == 0)
return 0;
return -EIO;
}
@@ -2971,8 +2901,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
btrfs_root_refs(&root->root_item) > 0) {
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
- BUG_ON(ret);
- root->orphan_item_inserted = 0;
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ else
+ root->orphan_item_inserted = 0;
}
if (block_rsv) {
@@ -3041,11 +2973,19 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- if (ret && ret != -EEXIST) {
- clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
- &BTRFS_I(inode)->runtime_flags);
- btrfs_abort_transaction(trans, root, ret);
- return ret;
+ if (ret) {
+ atomic_dec(&root->orphan_inodes);
+ if (reserve) {
+ clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+ &BTRFS_I(inode)->runtime_flags);
+ btrfs_orphan_release_metadata(inode);
+ }
+ if (ret != -EEXIST) {
+ clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+ &BTRFS_I(inode)->runtime_flags);
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
}
ret = 0;
}
@@ -3084,17 +3024,17 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
release_rsv = 1;
spin_unlock(&root->orphan_lock);
- if (trans && delete_item) {
- ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
+ if (delete_item) {
+ atomic_dec(&root->orphan_inodes);
+ if (trans)
+ ret = btrfs_del_orphan_item(trans, root,
+ btrfs_ino(inode));
}
- if (release_rsv) {
+ if (release_rsv)
btrfs_orphan_release_metadata(inode);
- atomic_dec(&root->orphan_inodes);
- }
- return 0;
+ return ret;
}
/*
@@ -3174,7 +3114,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- ret = PTR_RET(inode);
+ ret = PTR_ERR_OR_ZERO(inode);
if (ret && ret != -ESTALE)
goto out;
@@ -3224,8 +3164,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.objectid);
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
- BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
btrfs_end_transaction(trans, root);
+ if (ret)
+ goto out;
continue;
}
@@ -3239,8 +3180,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
- if (!S_ISREG(inode->i_mode)) {
- WARN_ON(1);
+ if (WARN_ON(!S_ISREG(inode->i_mode))) {
iput(inode);
continue;
}
@@ -3657,8 +3597,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_info(root->fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
- name_len, name,
- (unsigned long long)ino, (unsigned long long)dir_ino);
+ name_len, name, ino, dir_ino);
btrfs_abort_transaction(trans, root, ret);
goto err;
}
@@ -3704,7 +3643,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
int ret;
ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (!ret) {
- btrfs_drop_nlink(inode);
+ drop_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
}
return ret;
@@ -3929,6 +3868,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
+ u64 last_size = (u64)-1;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
@@ -4026,6 +3966,11 @@ search_again:
if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete;
+ if (del_item)
+ last_size = found_key.offset;
+ else
+ last_size = new_size;
+
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -4137,6 +4082,8 @@ out:
btrfs_abort_transaction(trans, root, ret);
}
error:
+ if (last_size != (u64)-1)
+ btrfs_ordered_update_i_size(inode, last_size, NULL);
btrfs_free_path(path);
return err;
}
@@ -4290,15 +4237,16 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
while (1) {
struct btrfs_ordered_extent *ordered;
- btrfs_wait_ordered_range(inode, hole_start,
- block_end - hole_start);
+
lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
&cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, hole_start);
+ ordered = btrfs_lookup_ordered_range(inode, hole_start,
+ block_end - hole_start);
if (!ordered)
break;
unlock_extent_cached(io_tree, hole_start, block_end - 1,
&cached_state, GFP_NOFS);
+ btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
@@ -4409,7 +4357,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
if (newsize > oldsize) {
- truncate_pagecache(inode, oldsize, newsize);
+ truncate_pagecache(inode, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret)
return ret;
@@ -4465,8 +4413,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
btrfs_inode_resume_unlocked_dio(inode);
ret = btrfs_truncate(inode);
- if (ret && inode->i_nlink)
- btrfs_orphan_del(NULL, inode);
+ if (ret && inode->i_nlink) {
+ int err;
+
+ /*
+ * failed to truncate, disk_i_size is only adjusted down
+ * as we remove extents, so it should represent the true
+ * size of the inode, so reset the in memory size and
+ * delete our orphan entry.
+ */
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ btrfs_orphan_del(NULL, inode);
+ return ret;
+ }
+ i_size_write(inode, BTRFS_I(inode)->disk_i_size);
+ err = btrfs_orphan_del(trans, inode);
+ if (err)
+ btrfs_abort_transaction(trans, root, err);
+ btrfs_end_transaction(trans, root);
+ }
}
return ret;
@@ -4514,8 +4480,10 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
truncate_inode_pages(&inode->i_data, 0);
- if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- btrfs_is_free_space_inode(inode)))
+ if (inode->i_nlink &&
+ ((btrfs_root_refs(&root->root_item) != 0 &&
+ root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
+ btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -4532,7 +4500,8 @@ void btrfs_evict_inode(struct inode *inode)
}
if (inode->i_nlink > 0) {
- BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0 &&
+ root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);
goto no_delete;
}
@@ -4601,10 +4570,15 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_free_block_rsv(root, rsv);
+ /*
+ * Errors here aren't a big deal, it just means we leave orphan items
+ * in the tree. They will be cleaned up on the next mount.
+ */
if (ret == 0) {
trans->block_rsv = root->orphan_block_rsv;
- ret = btrfs_orphan_del(trans, inode);
- BUG_ON(ret);
+ btrfs_orphan_del(trans, inode);
+ } else {
+ btrfs_orphan_del(NULL, inode);
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -4725,11 +4699,11 @@ static void inode_tree_add(struct inode *inode)
struct btrfs_inode *entry;
struct rb_node **p;
struct rb_node *parent;
+ struct rb_node *new = &BTRFS_I(inode)->rb_node;
u64 ino = btrfs_ino(inode);
if (inode_unhashed(inode))
return;
-again:
parent = NULL;
spin_lock(&root->inode_lock);
p = &root->inode_tree.rb_node;
@@ -4744,14 +4718,14 @@ again:
else {
WARN_ON(!(entry->vfs_inode.i_state &
(I_WILL_FREE | I_FREEING)));
- rb_erase(parent, &root->inode_tree);
+ rb_replace_node(parent, new, &root->inode_tree);
RB_CLEAR_NODE(parent);
spin_unlock(&root->inode_lock);
- goto again;
+ return;
}
}
- rb_link_node(&BTRFS_I(inode)->rb_node, parent, p);
- rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree);
+ rb_link_node(new, parent, p);
+ rb_insert_color(new, &root->inode_tree);
spin_unlock(&root->inode_lock);
}
@@ -4768,14 +4742,7 @@ static void inode_tree_del(struct inode *inode)
}
spin_unlock(&root->inode_lock);
- /*
- * Free space cache has inodes in the tree root, but the tree root has a
- * root_refs of 0, so this could end up dropping the tree root as a
- * snapshot, so we need the extra !root->fs_info->tree_root check to
- * make sure we don't drop it.
- */
- if (empty && btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root) {
+ if (empty && btrfs_root_refs(&root->root_item) == 0) {
synchronize_srcu(&root->fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4868,10 +4835,12 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
{
struct inode *inode;
struct btrfs_iget_args args;
+ unsigned long hashval = btrfs_inode_hash(objectid, root);
+
args.ino = objectid;
args.root = root;
- inode = iget5_locked(s, objectid, btrfs_find_actor,
+ inode = iget5_locked(s, hashval, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
@@ -5085,7 +5054,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
continue;
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_item_key_to_cpu(leaf, &found_key, slot);
if (found_key.objectid != key.objectid)
@@ -5491,7 +5460,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_INODE_NODATASUM;
}
- insert_inode_hash(inode);
+ btrfs_insert_inode_hash(inode);
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
@@ -5767,7 +5736,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
@@ -5897,7 +5866,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
compress_type = btrfs_file_extent_compression(leaf, item);
max_size = btrfs_file_extent_ram_bytes(leaf, item);
inline_size = btrfs_file_extent_inline_item_len(leaf,
- btrfs_item_nr(leaf, path->slots[0]));
+ btrfs_item_nr(path->slots[0]));
tmp = kmalloc(inline_size, GFP_NOFS);
if (!tmp)
return -ENOMEM;
@@ -6011,7 +5980,14 @@ again:
found_type = btrfs_key_type(&found_key);
if (found_key.objectid != objectid ||
found_type != BTRFS_EXTENT_DATA_KEY) {
- goto not_found;
+ /*
+ * If we backup past the first extent we want to move forward
+ * and see if there is an extent in front of us, otherwise we'll
+ * say there is a hole for our whole search range which can
+ * cause problems.
+ */
+ extent_end = start;
+ goto next;
}
found_type = btrfs_file_extent_type(leaf, item);
@@ -6026,7 +6002,7 @@ again:
size = btrfs_file_extent_inline_len(leaf, item);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
-
+next:
if (start >= extent_end) {
path->slots[0]++;
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -6161,10 +6137,7 @@ insert:
btrfs_release_path(path);
if (em->start > start || extent_map_end(em) <= start) {
btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
- (unsigned long long)em->start,
- (unsigned long long)em->len,
- (unsigned long long)start,
- (unsigned long long)len);
+ em->start, em->len, start, len);
err = -EIO;
goto out;
}
@@ -6289,7 +6262,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
/* adjust the range_start to make sure it doesn't
* go backwards from the start they passed in
*/
- range_start = max(start,range_start);
+ range_start = max(start, range_start);
found = found_end - range_start;
if (found > 0) {
@@ -6362,39 +6335,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
int ret;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- return ERR_CAST(trans);
-
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
alloc_hint = get_extent_allocation_hint(inode, start, len);
- ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
+ ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
alloc_hint, &ins, 1);
- if (ret) {
- em = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ return ERR_PTR(ret);
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
- if (IS_ERR(em))
- goto out;
+ if (IS_ERR(em)) {
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+ return em;
+ }
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
- em = ERR_PTR(ret);
+ free_extent_map(em);
+ return ERR_PTR(ret);
}
-out:
- btrfs_end_transaction(trans, root);
+
return em;
}
@@ -6402,11 +6368,11 @@ out:
* returns 1 when the nocow is safe, < 1 on error, 0 if the
* block must be cow'd
*/
-noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 offset, u64 *len,
+noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_path *path;
int ret;
struct extent_buffer *leaf;
@@ -6424,7 +6390,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
if (!path)
return -ENOMEM;
- ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
+ ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
offset, 0);
if (ret < 0)
goto out;
@@ -6484,14 +6450,25 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
if (btrfs_extent_readonly(root, disk_bytenr))
goto out;
+ btrfs_release_path(path);
/*
* look for other files referencing this extent, if we
* find any we must cow
*/
- if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
- key.offset - backref_offset, disk_bytenr))
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+ key.offset - backref_offset, disk_bytenr);
+ btrfs_end_transaction(trans, root);
+ if (ret) {
+ ret = 0;
goto out;
+ }
/*
* adjust disk_bytenr and num_bytes to cover just the bytes
@@ -6633,7 +6610,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
- struct btrfs_trans_handle *trans;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
@@ -6715,16 +6691,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
len = min(len, em->len - (start - em->start));
block_start = em->block_start + (start - em->start);
- /*
- * we're not going to log anything, but we do need
- * to make sure the current transaction stays open
- * while we look for nocow cross refs
- */
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto must_cow;
-
- if (can_nocow_extent(trans, inode, start, &len, &orig_start,
+ if (can_nocow_extent(inode, start, &len, &orig_start,
&orig_block_len, &ram_bytes) == 1) {
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
@@ -6733,24 +6700,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
block_start, len,
orig_block_len,
ram_bytes, type);
- if (IS_ERR(em)) {
- btrfs_end_transaction(trans, root);
+ if (IS_ERR(em))
goto unlock_err;
- }
}
ret = btrfs_add_ordered_extent_dio(inode, start,
block_start, len, len, type);
- btrfs_end_transaction(trans, root);
if (ret) {
free_extent_map(em);
goto unlock_err;
}
goto unlock;
}
- btrfs_end_transaction(trans, root);
}
-must_cow:
+
/*
* this will cow the extent, reset the len in case we changed
* it above
@@ -6813,26 +6776,6 @@ unlock_err:
return ret;
}
-struct btrfs_dio_private {
- struct inode *inode;
- u64 logical_offset;
- u64 disk_bytenr;
- u64 bytes;
- void *private;
-
- /* number of bios pending for this dio */
- atomic_t pending_bios;
-
- /* IO errors */
- int errors;
-
- /* orig_bio is our btrfs_io_bio */
- struct bio *orig_bio;
-
- /* dio_bio came from fs/direct-io.c */
- struct bio *dio_bio;
-};
-
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
struct btrfs_dio_private *dip = bio->bi_private;
@@ -6841,6 +6784,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *dio_bio;
+ u32 *csums = (u32 *)dip->csum;
+ int index = 0;
u64 start;
start = dip->logical_offset;
@@ -6849,12 +6794,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct page *page = bvec->bv_page;
char *kaddr;
u32 csum = ~(u32)0;
- u64 private = ~(u32)0;
unsigned long flags;
- if (get_state_private(&BTRFS_I(inode)->io_tree,
- start, &private))
- goto failed;
local_irq_save(flags);
kaddr = kmap_atomic(page);
csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6864,18 +6805,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
- if (csum != private) {
-failed:
- btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)start,
- csum, (unsigned)private);
+ if (csum != csums[index]) {
+ btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+ btrfs_ino(inode), start, csum,
+ csums[index]);
err = -EIO;
}
}
start += bvec->bv_len;
bvec++;
+ index++;
} while (bvec <= bvec_end);
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6956,7 +6896,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
if (err) {
printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
"sector %#Lx len %u err no %d\n",
- (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
+ btrfs_ino(dip->inode), bio->bi_rw,
(unsigned long long)bio->bi_sector, bio->bi_size, err);
dip->errors = 1;
@@ -6992,6 +6932,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
int rw, u64 file_offset, int skip_sum,
int async_submit)
{
+ struct btrfs_dio_private *dip = bio->bi_private;
int write = rw & REQ_WRITE;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
@@ -7026,7 +6967,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
if (ret)
goto err;
} else if (!skip_sum) {
- ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
+ ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+ file_offset);
if (ret)
goto err;
}
@@ -7061,6 +7003,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio_put(orig_bio);
return -EIO;
}
+
if (map_length >= orig_bio->bi_size) {
bio = orig_bio;
goto submit;
@@ -7123,7 +7066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
}
} else {
submit_len += bvec->bv_len;
- nr_pages ++;
+ nr_pages++;
bvec++;
}
}
@@ -7156,19 +7099,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
struct btrfs_dio_private *dip;
struct bio *io_bio;
int skip_sum;
+ int sum_len;
int write = rw & REQ_WRITE;
int ret = 0;
+ u16 csum_size;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
-
if (!io_bio) {
ret = -ENOMEM;
goto free_ordered;
}
- dip = kmalloc(sizeof(*dip), GFP_NOFS);
+ if (!skip_sum && !write) {
+ csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+ sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+ sum_len *= csum_size;
+ } else {
+ sum_len = 0;
+ }
+
+ dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
goto free_io_bio;
@@ -7283,7 +7235,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
* outstanding dirty pages are on disk.
*/
count = iov_length(iov, nr_segs);
- btrfs_wait_ordered_range(inode, offset, count);
+ ret = btrfs_wait_ordered_range(inode, offset, count);
+ if (ret)
+ return ret;
if (rw & WRITE) {
/*
@@ -7443,10 +7397,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* whoever cleared the private bit is responsible
* for the finish_ordered_io
*/
- if (TestClearPagePrivate2(page) &&
- btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
- PAGE_CACHE_SIZE, 1)) {
- btrfs_finish_ordered_io(ordered);
+ if (TestClearPagePrivate2(page)) {
+ struct btrfs_ordered_inode_tree *tree;
+ u64 new_len;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+
+ spin_lock_irq(&tree->lock);
+ set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+ new_len = page_start - ordered->file_offset;
+ if (new_len < ordered->truncated_len)
+ ordered->truncated_len = new_len;
+ spin_unlock_irq(&tree->lock);
+
+ if (btrfs_dec_test_ordered_pending(inode, &ordered,
+ page_start,
+ PAGE_CACHE_SIZE, 1))
+ btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
cached_state = NULL;
@@ -7611,8 +7578,10 @@ static int btrfs_truncate(struct inode *inode)
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
- btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+ ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
+ (u64)-1);
+ if (ret)
+ return ret;
/*
* Yes ladies and gentelment, this is indeed ugly. The fact is we have
@@ -7836,6 +7805,14 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return inode;
}
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_destroy_inode(struct inode *inode)
+{
+ btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+ kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
+}
+#endif
+
static void btrfs_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -7876,7 +7853,7 @@ void btrfs_destroy_inode(struct inode *inode)
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
- (unsigned long long)btrfs_ino(inode));
+ btrfs_ino(inode));
atomic_dec(&root->orphan_inodes);
}
@@ -7886,8 +7863,7 @@ void btrfs_destroy_inode(struct inode *inode)
break;
else {
btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
- (unsigned long long)ordered->file_offset,
- (unsigned long long)ordered->len);
+ ordered->file_offset, ordered->len);
btrfs_remove_ordered_extent(inode, ordered);
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
@@ -7907,8 +7883,7 @@ int btrfs_drop_inode(struct inode *inode)
return 1;
/* the snap/subvol tree is on deleting */
- if (btrfs_root_refs(&root->root_item) == 0 &&
- root != root->fs_info->tree_root)
+ if (btrfs_root_refs(&root->root_item) == 0)
return 1;
else
return generic_drop_inode(inode);
@@ -8037,7 +8012,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* check for collisions, even if the name isn't there */
- ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
new_dentry->d_name.name,
new_dentry->d_name.len);
@@ -8045,8 +8020,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == -EEXIST) {
/* we shouldn't get
* eexist without a new_inode */
- if (!new_inode) {
- WARN_ON(1);
+ if (WARN_ON(!new_inode)) {
return ret;
}
} else {
@@ -8161,10 +8135,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_dentry->d_name.name,
new_dentry->d_name.len);
}
- if (!ret && new_inode->i_nlink == 0) {
+ if (!ret && new_inode->i_nlink == 0)
ret = btrfs_orphan_add(trans, new_dentry->d_inode);
- BUG_ON(ret);
- }
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_fail;
@@ -8196,18 +8168,24 @@ out_notrans:
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
+ struct inode *inode;
delalloc_work = container_of(work, struct btrfs_delalloc_work,
work);
- if (delalloc_work->wait)
- btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
- else
- filemap_flush(delalloc_work->inode->i_mapping);
+ inode = delalloc_work->inode;
+ if (delalloc_work->wait) {
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ } else {
+ filemap_flush(inode->i_mapping);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_flush(inode->i_mapping);
+ }
if (delalloc_work->delay_iput)
- btrfs_add_delayed_iput(delalloc_work->inode);
+ btrfs_add_delayed_iput(inode);
else
- iput(delalloc_work->inode);
+ iput(inode);
complete(&delalloc_work->completion);
}
@@ -8269,6 +8247,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
if (unlikely(!work)) {
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
ret = -ENOMEM;
goto out;
}
@@ -8324,8 +8306,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return ret;
}
-int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,
- int delay_iput)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
{
struct btrfs_root *root;
struct list_head splice;
@@ -8385,14 +8366,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
int err;
int drop_inode = 0;
u64 objectid;
- u64 index = 0 ;
+ u64 index = 0;
int name_len;
int datasize;
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct extent_buffer *leaf;
- name_len = strlen(symname) + 1;
+ name_len = strlen(symname);
if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
return -ENAMETOOLONG;
@@ -8480,7 +8461,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &btrfs_symlink_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
inode_set_bytes(inode, name_len);
- btrfs_i_size_write(inode, name_len - 1);
+ btrfs_i_size_write(inode, name_len);
err = btrfs_update_inode(trans, root, inode);
if (err)
drop_inode = 1;
@@ -8525,8 +8506,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
cur_bytes = max(cur_bytes, min_size);
- ret = btrfs_reserve_extent(trans, root, cur_bytes,
- min_size, 0, *alloc_hint, &ins, 1);
+ ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
+ *alloc_hint, &ins, 1);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -8539,6 +8520,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
if (ret) {
+ btrfs_free_reserved_extent(root, ins.objectid,
+ ins.offset);
btrfs_abort_transaction(trans, root, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -8666,11 +8649,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .update_time = btrfs_update_time,
};
static const struct file_operations btrfs_dir_file_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 238a055..1d04b55 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -43,7 +43,7 @@
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include <linux/btrfs.h>
-#include "compat.h"
+#include <linux/uaccess.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -57,6 +57,9 @@
#include "send.h"
#include "dev-replace.h"
+static int btrfs_clone(struct inode *src, struct inode *inode,
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
{
@@ -363,6 +366,12 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return 0;
}
+int btrfs_is_empty_uuid(u8 *uuid)
+{
+ BUILD_BUG_ON(BTRFS_UUID_SIZE > PAGE_SIZE);
+ return !memcmp(uuid, empty_zero_page, BTRFS_UUID_SIZE);
+}
+
static noinline int create_subvol(struct inode *dir,
struct dentry *dentry,
char *name, int namelen,
@@ -396,7 +405,7 @@ static noinline int create_subvol(struct inode *dir,
* of create_snapshot().
*/
ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 7, &qgroup_reserved);
+ 8, &qgroup_reserved, false);
if (ret)
return ret;
@@ -425,26 +434,25 @@ static noinline int create_subvol(struct inode *dir,
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
- write_extent_buffer(leaf, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(leaf),
+ write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+ btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
memset(&root_item, 0, sizeof(root_item));
inode_item = &root_item.inode;
- inode_item->generation = cpu_to_le64(1);
- inode_item->size = cpu_to_le64(3);
- inode_item->nlink = cpu_to_le32(1);
- inode_item->nbytes = cpu_to_le64(root->leafsize);
- inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+ btrfs_set_stack_inode_generation(inode_item, 1);
+ btrfs_set_stack_inode_size(inode_item, 3);
+ btrfs_set_stack_inode_nlink(inode_item, 1);
+ btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+ btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
- root_item.flags = 0;
- root_item.byte_limit = 0;
- inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
+ btrfs_set_root_flags(&root_item, 0);
+ btrfs_set_root_limit(&root_item, 0);
+ btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
btrfs_set_root_bytenr(&root_item, leaf->start);
btrfs_set_root_generation(&root_item, trans->transid);
@@ -457,8 +465,8 @@ static noinline int create_subvol(struct inode *dir,
btrfs_root_generation(&root_item));
uuid_le_gen(&new_uuid);
memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
- root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
- root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
+ btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
+ btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
root_item.ctime = root_item.otime;
btrfs_set_root_ctransid(&root_item, trans->transid);
btrfs_set_root_otransid(&root_item, trans->transid);
@@ -518,9 +526,14 @@ static noinline int create_subvol(struct inode *dir,
ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
objectid, root->root_key.objectid,
btrfs_ino(dir), index, name, namelen);
-
BUG_ON(ret);
+ ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+ root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+ objectid);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+
fail:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
@@ -559,7 +572,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (ret)
return ret;
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_wait_ordered_extents(root, -1);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
@@ -573,10 +586,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
* 1 - root item
* 2 - root ref/backref
* 1 - root of snapshot
+ * 1 - UUID item
*/
ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
- &pending_snapshot->block_rsv, 7,
- &pending_snapshot->qgroup_reserved);
+ &pending_snapshot->block_rsv, 8,
+ &pending_snapshot->qgroup_reserved,
+ false);
if (ret)
goto out;
@@ -671,7 +686,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
* nfs_async_unlink().
*/
-static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
{
int error;
@@ -825,7 +840,6 @@ static int find_new_extents(struct btrfs_root *root,
{
struct btrfs_path *path;
struct btrfs_key min_key;
- struct btrfs_key max_key;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *extent;
int type;
@@ -840,15 +854,10 @@ static int find_new_extents(struct btrfs_root *root,
min_key.type = BTRFS_EXTENT_DATA_KEY;
min_key.offset = *off;
- max_key.objectid = ino;
- max_key.type = (u8)-1;
- max_key.offset = (u64)-1;
-
path->keep_locks = 1;
- while(1) {
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, newer_than);
+ while (1) {
+ ret = btrfs_search_forward(root, &min_key, path, newer_than);
if (ret != 0)
goto none;
if (min_key.objectid != ino)
@@ -1189,7 +1198,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra = &file->f_ra;
}
- pages = kmalloc(sizeof(struct page *) * max_cluster,
+ pages = kmalloc_array(max_cluster, sizeof(struct page *),
GFP_NOFS);
if (!pages) {
ret = -ENOMEM;
@@ -1267,9 +1276,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
cluster = max_cluster;
}
- if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
- BTRFS_I(inode)->force_compress = compress_type;
-
if (i + cluster > ra_index) {
ra_index = max(i, ra_index);
btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
@@ -1278,6 +1284,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
}
mutex_lock(&inode->i_mutex);
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
+ BTRFS_I(inode)->force_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
if (ret < 0) {
mutex_unlock(&inode->i_mutex);
@@ -1334,10 +1342,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
-
- mutex_lock(&inode->i_mutex);
- BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
}
if (range->compress_type == BTRFS_COMPRESS_LZO) {
@@ -1347,6 +1351,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ret = defrag_count;
out_ra:
+ if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+ mutex_lock(&inode->i_mutex);
+ BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
+ mutex_unlock(&inode->i_mutex);
+ }
if (!file)
kfree(ra);
kfree(pages);
@@ -1377,9 +1386,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
mnt_drop_write_file(file);
- return -EINVAL;
+ return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -1403,14 +1411,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
ret = -EINVAL;
goto out_free;
}
- printk(KERN_INFO "btrfs: resizing devid %llu\n",
- (unsigned long long)devid);
+ printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
}
device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
if (!device) {
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
- (unsigned long long)devid);
+ devid);
ret = -ENODEV;
goto out_free;
}
@@ -1418,7 +1425,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
if (!device->writeable) {
printk(KERN_INFO "btrfs: resizer unable to apply on "
"readonly device %llu\n",
- (unsigned long long)devid);
+ devid);
ret = -EPERM;
goto out_free;
}
@@ -1470,8 +1477,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
new_size *= root->sectorsize;
printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
- rcu_str_deref(device->name),
- (unsigned long long)new_size);
+ rcu_str_deref(device->name), new_size);
if (new_size > old_size) {
trans = btrfs_start_transaction(root, 0);
@@ -1721,13 +1727,28 @@ out:
static noinline int may_destroy_subvol(struct btrfs_root *root)
{
struct btrfs_path *path;
+ struct btrfs_dir_item *di;
struct btrfs_key key;
+ u64 dir_id;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+ /* Make sure this root isn't set as the default subvol */
+ dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
+ di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
+ dir_id, "default", 7, 0);
+ if (di && !IS_ERR(di)) {
+ btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.objectid == root->root_key.objectid) {
+ ret = -ENOTEMPTY;
+ goto out;
+ }
+ btrfs_release_path(path);
+ }
+
key.objectid = root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = (u64)-1;
@@ -1864,7 +1885,6 @@ static noinline int search_ioctl(struct inode *inode,
{
struct btrfs_root *root;
struct btrfs_key key;
- struct btrfs_key max_key;
struct btrfs_path *path;
struct btrfs_ioctl_search_key *sk = &args->key;
struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
@@ -1896,15 +1916,10 @@ static noinline int search_ioctl(struct inode *inode,
key.type = sk->min_type;
key.offset = sk->min_offset;
- max_key.objectid = sk->max_objectid;
- max_key.type = sk->max_type;
- max_key.offset = sk->max_offset;
-
path->keep_locks = 1;
- while(1) {
- ret = btrfs_search_forward(root, &key, &max_key, path,
- sk->min_transid);
+ while (1) {
+ ret = btrfs_search_forward(root, &key, path, sk->min_transid);
if (ret != 0) {
if (ret > 0)
ret = 0;
@@ -1989,32 +2004,36 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.type = BTRFS_INODE_REF_KEY;
key.offset = (u64)-1;
- while(1) {
+ while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
+ else if (ret > 0) {
+ ret = btrfs_previous_item(root, path, dirid,
+ BTRFS_INODE_REF_KEY);
+ if (ret < 0)
+ goto out;
+ else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ }
l = path->nodes[0];
slot = path->slots[0];
- if (ret > 0 && slot > 0)
- slot--;
btrfs_item_key_to_cpu(l, &key, slot);
- if (ret > 0 && (key.objectid != dirid ||
- key.type != BTRFS_INODE_REF_KEY)) {
- ret = -ENOENT;
- goto out;
- }
-
iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
len = btrfs_inode_ref_name_len(l, iref);
ptr -= len + 1;
total_len += len + 1;
- if (ptr < name)
+ if (ptr < name) {
+ ret = -ENAMETOOLONG;
goto out;
+ }
*(ptr + len) = '/';
- read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
+ read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);
if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
break;
@@ -2024,10 +2043,8 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
key.offset = (u64)-1;
dirid = key.objectid;
}
- if (ptr < name)
- goto out;
memmove(name, ptr, total_len);
- name[total_len]='\0';
+ name[total_len] = '\0';
ret = 0;
out:
btrfs_free_path(path);
@@ -2113,7 +2130,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
inode = dentry->d_inode;
dest = BTRFS_I(inode)->root;
- if (!capable(CAP_SYS_ADMIN)){
+ if (!capable(CAP_SYS_ADMIN)) {
/*
* Regular user. Only allow this with a special mount
* option, when the user has write+exec access to the
@@ -2174,7 +2191,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
* ref/backref.
*/
err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
- 5, &qgroup_reserved);
+ 5, &qgroup_reserved, true);
if (err)
goto out_up_write;
@@ -2213,6 +2230,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_end_trans;
}
}
+
+ ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ dest->root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ dest->root_key.objectid);
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, root, ret);
+ err = ret;
+ goto out_end_trans;
+ }
+ }
+
out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
@@ -2326,8 +2364,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- return -EINVAL;
+ return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
}
mutex_lock(&root->fs_info->volume_mutex);
@@ -2400,10 +2437,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
if (!fi_args)
return -ENOMEM;
+ mutex_lock(&fs_devices->device_list_mutex);
fi_args->num_devices = fs_devices->num_devices;
memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
- mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id)
fi_args->max_id = device->devid;
@@ -2424,7 +2461,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
int ret = 0;
char *s_uuid = NULL;
- char empty_uuid[BTRFS_UUID_SIZE] = {0};
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -2433,7 +2469,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
if (IS_ERR(di_args))
return PTR_ERR(di_args);
- if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
+ if (!btrfs_is_empty_uuid(di_args->uuid))
s_uuid = di_args->uuid;
mutex_lock(&fs_devices->device_list_mutex);
@@ -2469,150 +2505,345 @@ out:
return ret;
}
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- u64 off, u64 olen, u64 destoff)
+static struct page *extent_same_get_page(struct inode *inode, u64 off)
+{
+ struct page *page;
+ pgoff_t index;
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+
+ index = off >> PAGE_CACHE_SHIFT;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return NULL;
+
+ if (!PageUptodate(page)) {
+ if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
+ 0))
+ return NULL;
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ return NULL;
+ }
+ }
+ unlock_page(page);
+
+ return page;
+}
+
+static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+{
+ /* do any pending delalloc/csum calc on src, one way or
+ another, and lock file content */
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+ ordered = btrfs_lookup_first_ordered_extent(inode,
+ off + len - 1);
+ if (!ordered &&
+ !test_range_bit(&BTRFS_I(inode)->io_tree, off,
+ off + len - 1, EXTENT_DELALLOC, 0, NULL))
+ break;
+ unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ btrfs_wait_ordered_range(inode, off, len);
+ }
+}
+
+static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+ unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+
+ mutex_unlock(&inode1->i_mutex);
+ mutex_unlock(&inode2->i_mutex);
+}
+
+static void btrfs_double_lock(struct inode *inode1, u64 loff1,
+ struct inode *inode2, u64 loff2, u64 len)
+{
+ if (inode1 < inode2) {
+ swap(inode1, inode2);
+ swap(loff1, loff2);
+ }
+
+ mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+ lock_extent_range(inode1, loff1, len);
+ if (inode1 != inode2) {
+ mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ lock_extent_range(inode2, loff2, len);
+ }
+}
+
+static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
+ u64 dst_loff, u64 len)
+{
+ int ret = 0;
+ struct page *src_page, *dst_page;
+ unsigned int cmp_len = PAGE_CACHE_SIZE;
+ void *addr, *dst_addr;
+
+ while (len) {
+ if (len < PAGE_CACHE_SIZE)
+ cmp_len = len;
+
+ src_page = extent_same_get_page(src, loff);
+ if (!src_page)
+ return -EINVAL;
+ dst_page = extent_same_get_page(dst, dst_loff);
+ if (!dst_page) {
+ page_cache_release(src_page);
+ return -EINVAL;
+ }
+ addr = kmap_atomic(src_page);
+ dst_addr = kmap_atomic(dst_page);
+
+ flush_dcache_page(src_page);
+ flush_dcache_page(dst_page);
+
+ if (memcmp(addr, dst_addr, cmp_len))
+ ret = BTRFS_SAME_DATA_DIFFERS;
+
+ kunmap_atomic(addr);
+ kunmap_atomic(dst_addr);
+ page_cache_release(src_page);
+ page_cache_release(dst_page);
+
+ if (ret)
+ break;
+
+ loff += cmp_len;
+ dst_loff += cmp_len;
+ len -= cmp_len;
+ }
+
+ return ret;
+}
+
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+{
+ u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
+
+ if (off + len > inode->i_size || off + len < off)
+ return -EINVAL;
+ /* Check that we are block aligned - btrfs_clone() requires this */
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+ struct inode *dst, u64 dst_loff)
{
- struct inode *inode = file_inode(file);
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct fd src_file;
- struct inode *src;
- struct btrfs_trans_handle *trans;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- char *buf;
- struct btrfs_key key;
- u32 nritems;
- int slot;
int ret;
- u64 len = olen;
- u64 bs = root->fs_info->sb->s_blocksize;
- int same_inode = 0;
/*
- * TODO:
- * - split compressed inline extents. annoying: we need to
- * decompress into destination's address_space (the file offset
- * may change, so source mapping won't do), then recompress (or
- * otherwise reinsert) a subrange.
- * - allow ranges within the same file to be cloned (provided
- * they don't overlap)?
+ * btrfs_clone() can't handle extents in the same file
+ * yet. Once that works, we can drop this check and replace it
+ * with a check for the same inode, but overlapping extents.
*/
-
- /* the destination must be opened for writing */
- if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+ if (src == dst)
return -EINVAL;
- if (btrfs_root_readonly(root))
- return -EROFS;
+ btrfs_double_lock(src, loff, dst, dst_loff, len);
+
+ ret = extent_same_check_offsets(src, loff, len);
+ if (ret)
+ goto out_unlock;
+
+ ret = extent_same_check_offsets(dst, dst_loff, len);
+ if (ret)
+ goto out_unlock;
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+ if (ret == 0)
+ ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+
+out_unlock:
+ btrfs_double_unlock(src, loff, dst, dst_loff, len);
+
+ return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024)
+
+static long btrfs_ioctl_file_extent_same(struct file *file,
+ void __user *argp)
+{
+ struct btrfs_ioctl_same_args tmp;
+ struct btrfs_ioctl_same_args *same;
+ struct btrfs_ioctl_same_extent_info *info;
+ struct inode *src = file->f_dentry->d_inode;
+ struct file *dst_file = NULL;
+ struct inode *dst;
+ u64 off;
+ u64 len;
+ int i;
+ int ret;
+ unsigned long size;
+ u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+ bool is_admin = capable(CAP_SYS_ADMIN);
+
+ if (!(file->f_mode & FMODE_READ))
+ return -EINVAL;
ret = mnt_want_write_file(file);
if (ret)
return ret;
- src_file = fdget(srcfd);
- if (!src_file.file) {
- ret = -EBADF;
- goto out_drop_write;
+ if (copy_from_user(&tmp,
+ (struct btrfs_ioctl_same_args __user *)argp,
+ sizeof(tmp))) {
+ ret = -EFAULT;
+ goto out;
}
- ret = -EXDEV;
- if (src_file.file->f_path.mnt != file->f_path.mnt)
- goto out_fput;
+ size = sizeof(tmp) +
+ tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
- src = file_inode(src_file.file);
+ same = memdup_user((struct btrfs_ioctl_same_args __user *)argp, size);
- ret = -EINVAL;
- if (src == inode)
- same_inode = 1;
+ if (IS_ERR(same)) {
+ ret = PTR_ERR(same);
+ goto out;
+ }
- /* the src must be open for reading */
- if (!(src_file.file->f_mode & FMODE_READ))
- goto out_fput;
+ off = same->logical_offset;
+ len = same->length;
- /* don't make the dst file partly checksummed */
- if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
- (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- goto out_fput;
+ /*
+ * Limit the total length we will dedupe for each operation.
+ * This is intended to bound the total time spent in this
+ * ioctl to something sane.
+ */
+ if (len > BTRFS_MAX_DEDUPE_LEN)
+ len = BTRFS_MAX_DEDUPE_LEN;
- ret = -EISDIR;
- if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- goto out_fput;
+ if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+ /*
+ * Btrfs does not support blocksize < page_size. As a
+ * result, btrfs_cmp_data() won't correctly handle
+ * this situation without an update.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
- ret = -EXDEV;
- if (src->i_sb != inode->i_sb)
- goto out_fput;
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode))
+ goto out;
- ret = -ENOMEM;
- buf = vmalloc(btrfs_level_size(root, 0));
- if (!buf)
- goto out_fput;
+ ret = -EACCES;
+ if (!S_ISREG(src->i_mode))
+ goto out;
- path = btrfs_alloc_path();
- if (!path) {
- vfree(buf);
- goto out_fput;
+ /* pre-format output fields to sane values */
+ for (i = 0; i < same->dest_count; i++) {
+ same->info[i].bytes_deduped = 0ULL;
+ same->info[i].status = 0;
}
- path->reada = 2;
- if (!same_inode) {
- if (inode < src) {
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
- } else {
- mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ ret = 0;
+ for (i = 0; i < same->dest_count; i++) {
+ info = &same->info[i];
+
+ dst_file = fget(info->fd);
+ if (!dst_file) {
+ info->status = -EBADF;
+ goto next;
}
- } else {
- mutex_lock(&src->i_mutex);
- }
- /* determine range to clone */
- ret = -EINVAL;
- if (off + len > src->i_size || off + len < off)
- goto out_unlock;
- if (len == 0)
- olen = len = src->i_size - off;
- /* if we extend to eof, continue to block boundary */
- if (off + len == src->i_size)
- len = ALIGN(src->i_size, bs) - off;
+ if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+ info->status = -EINVAL;
+ goto next;
+ }
- /* verify the end result is block aligned */
- if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
- !IS_ALIGNED(destoff, bs))
- goto out_unlock;
+ info->status = -EXDEV;
+ if (file->f_path.mnt != dst_file->f_path.mnt)
+ goto next;
- /* verify if ranges are overlapped within the same file */
- if (same_inode) {
- if (destoff + len > off && destoff < off + len)
- goto out_unlock;
- }
+ dst = dst_file->f_dentry->d_inode;
+ if (src->i_sb != dst->i_sb)
+ goto next;
- if (destoff > inode->i_size) {
- ret = btrfs_cont_expand(inode, inode->i_size, destoff);
- if (ret)
- goto out_unlock;
+ if (S_ISDIR(dst->i_mode)) {
+ info->status = -EISDIR;
+ goto next;
+ }
+
+ if (!S_ISREG(dst->i_mode)) {
+ info->status = -EACCES;
+ goto next;
+ }
+
+ info->status = btrfs_extent_same(src, off, len, dst,
+ info->logical_offset);
+ if (info->status == 0)
+ info->bytes_deduped += len;
+
+next:
+ if (dst_file)
+ fput(dst_file);
}
- /* truncate page cache pages from target inode range */
- truncate_inode_pages_range(&inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len) - 1);
+ ret = copy_to_user(argp, same, size);
+ if (ret)
+ ret = -EFAULT;
- /* do any pending delalloc/csum calc on src, one way or
- another, and lock file content */
- while (1) {
- struct btrfs_ordered_extent *ordered;
- lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
- ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1);
- if (!ordered &&
- !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1,
- EXTENT_DELALLOC, 0, NULL))
- break;
- unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- btrfs_wait_ordered_range(src, off, len);
+out:
+ mnt_drop_write_file(file);
+ return ret;
+}
+
+/**
+ * btrfs_clone() - clone a range from inode file to another
+ *
+ * @src: Inode to clone from
+ * @inode: Inode to clone to
+ * @off: Offset within source to start clone from
+ * @olen: Original length, passed by user, of range to clone
+ * @olen_aligned: Block-aligned value of olen, extent_same uses
+ * identical values here
+ * @destoff: Offset within @inode to start clone
+ */
+static int btrfs_clone(struct inode *src, struct inode *inode,
+ u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path = NULL;
+ struct extent_buffer *leaf;
+ struct btrfs_trans_handle *trans;
+ char *buf = NULL;
+ struct btrfs_key key;
+ u32 nritems;
+ int slot;
+ int ret;
+ u64 len = olen_aligned;
+
+ ret = -ENOMEM;
+ buf = vmalloc(btrfs_level_size(root, 0));
+ if (!buf)
+ return ret;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ vfree(buf);
+ return ret;
}
+ path->reada = 2;
/* clone data */
key.objectid = btrfs_ino(src);
key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2858,15 +3089,132 @@ next:
key.offset++;
}
ret = 0;
+
out:
btrfs_release_path(path);
+ btrfs_free_path(path);
+ vfree(buf);
+ return ret;
+}
+
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ u64 off, u64 olen, u64 destoff)
+{
+ struct inode *inode = fdentry(file)->d_inode;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct fd src_file;
+ struct inode *src;
+ int ret;
+ u64 len = olen;
+ u64 bs = root->fs_info->sb->s_blocksize;
+ int same_inode = 0;
+
+ /*
+ * TODO:
+ * - split compressed inline extents. annoying: we need to
+ * decompress into destination's address_space (the file offset
+ * may change, so source mapping won't do), then recompress (or
+ * otherwise reinsert) a subrange.
+ * - allow ranges within the same file to be cloned (provided
+ * they don't overlap)?
+ */
+
+ /* the destination must be opened for writing */
+ if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+ return -EINVAL;
+
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ src_file = fdget(srcfd);
+ if (!src_file.file) {
+ ret = -EBADF;
+ goto out_drop_write;
+ }
+
+ ret = -EXDEV;
+ if (src_file.file->f_path.mnt != file->f_path.mnt)
+ goto out_fput;
+
+ src = file_inode(src_file.file);
+
+ ret = -EINVAL;
+ if (src == inode)
+ same_inode = 1;
+
+ /* the src must be open for reading */
+ if (!(src_file.file->f_mode & FMODE_READ))
+ goto out_fput;
+
+ /* don't make the dst file partly checksummed */
+ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+ (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
+ goto out_fput;
+
+ ret = -EISDIR;
+ if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
+ goto out_fput;
+
+ ret = -EXDEV;
+ if (src->i_sb != inode->i_sb)
+ goto out_fput;
+
+ if (!same_inode) {
+ if (inode < src) {
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
+ } else {
+ mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ }
+ } else {
+ mutex_lock(&src->i_mutex);
+ }
+
+ /* determine range to clone */
+ ret = -EINVAL;
+ if (off + len > src->i_size || off + len < off)
+ goto out_unlock;
+ if (len == 0)
+ olen = len = src->i_size - off;
+ /* if we extend to eof, continue to block boundary */
+ if (off + len == src->i_size)
+ len = ALIGN(src->i_size, bs) - off;
+
+ /* verify the end result is block aligned */
+ if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+ !IS_ALIGNED(destoff, bs))
+ goto out_unlock;
+
+ /* verify if ranges are overlapped within the same file */
+ if (same_inode) {
+ if (destoff + len > off && destoff < off + len)
+ goto out_unlock;
+ }
+
+ if (destoff > inode->i_size) {
+ ret = btrfs_cont_expand(inode, inode->i_size, destoff);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /* truncate page cache pages from target inode range */
+ truncate_inode_pages_range(&inode->i_data, destoff,
+ PAGE_CACHE_ALIGN(destoff + len) - 1);
+
+ lock_extent_range(src, off, len);
+
+ ret = btrfs_clone(src, inode, off, olen, len, destoff);
+
unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
out_unlock:
mutex_unlock(&src->i_mutex);
if (!same_inode)
mutex_unlock(&inode->i_mutex);
- vfree(buf);
- btrfs_free_path(path);
out_fput:
fdput(src_file);
out_drop_write:
@@ -2957,7 +3305,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
}
if (!objectid)
- objectid = root->root_key.objectid;
+ objectid = BTRFS_FS_TREE_OBJECTID;
location.objectid = objectid;
location.type = BTRFS_ROOT_ITEM_KEY;
@@ -3312,11 +3660,14 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
+ if (root->fs_info->sb->s_flags & MS_RDONLY) {
+ ret = -EROFS;
+ goto out;
+ }
if (atomic_xchg(
&root->fs_info->mutually_exclusive_operation_running,
1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- ret = -EINPROGRESS;
+ ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
} else {
ret = btrfs_dev_replace_start(root, p);
atomic_set(
@@ -3338,7 +3689,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
if (copy_to_user(arg, p, sizeof(*p)))
ret = -EFAULT;
-
+out:
kfree(p);
return ret;
}
@@ -3560,8 +3911,7 @@ again:
} else {
/* this is (1) */
mutex_unlock(&fs_info->balance_mutex);
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- ret = -EINVAL;
+ ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
goto out;
}
@@ -3967,6 +4317,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_trans_handle *trans;
struct timespec ct = CURRENT_TIME;
int ret = 0;
+ int received_uuid_changed;
ret = mnt_want_write_file(file);
if (ret < 0)
@@ -3996,7 +4347,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
goto out;
}
- trans = btrfs_start_transaction(root, 1);
+ /*
+ * 1 - root item
+ * 2 - uuid items (received uuid + subvol uuid)
+ */
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
@@ -4007,24 +4362,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
sa->rtime.sec = ct.tv_sec;
sa->rtime.nsec = ct.tv_nsec;
+ received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
+ BTRFS_UUID_SIZE);
+ if (received_uuid_changed &&
+ !btrfs_is_empty_uuid(root_item->received_uuid))
+ btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+ root_item->received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ root->root_key.objectid);
memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
btrfs_set_root_stransid(root_item, sa->stransid);
btrfs_set_root_rtransid(root_item, sa->rtransid);
- root_item->stime.sec = cpu_to_le64(sa->stime.sec);
- root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
- root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
- root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
+ btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
+ btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
+ btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
+ btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
ret = btrfs_update_root(trans, root->fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
btrfs_end_transaction(trans, root);
- trans = NULL;
goto out;
- } else {
- ret = btrfs_commit_transaction(trans, root);
- if (ret < 0)
+ }
+ if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
+ ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+ sa->uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ root->root_key.objectid);
+ if (ret < 0 && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, root, ret);
goto out;
+ }
+ }
+ ret = btrfs_commit_transaction(trans, root);
+ if (ret < 0) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out;
}
ret = copy_to_user(arg, sa, sizeof(*sa));
@@ -4041,18 +4414,22 @@ out:
static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
{
struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
- const char *label = root->fs_info->super_copy->label;
- size_t len = strnlen(label, BTRFS_LABEL_SIZE);
+ size_t len;
int ret;
+ char label[BTRFS_LABEL_SIZE];
+
+ spin_lock(&root->fs_info->super_lock);
+ memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
+ spin_unlock(&root->fs_info->super_lock);
+
+ len = strnlen(label, BTRFS_LABEL_SIZE);
if (len == BTRFS_LABEL_SIZE) {
pr_warn("btrfs: label is too long, return the first %zu bytes\n",
--len);
}
- mutex_lock(&root->fs_info->volume_mutex);
ret = copy_to_user(arg, label, len);
- mutex_unlock(&root->fs_info->volume_mutex);
return ret ? -EFAULT : 0;
}
@@ -4081,18 +4458,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
if (ret)
return ret;
- mutex_lock(&root->fs_info->volume_mutex);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
}
+ spin_lock(&root->fs_info->super_lock);
strcpy(super_block->label, label);
+ spin_unlock(&root->fs_info->super_lock);
ret = btrfs_end_transaction(trans, root);
out_unlock:
- mutex_unlock(&root->fs_info->volume_mutex);
mnt_drop_write_file(file);
return ret;
}
@@ -4162,9 +4539,15 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_logical_to_ino(root, argp);
case BTRFS_IOC_SPACE_INFO:
return btrfs_ioctl_space_info(root, argp);
- case BTRFS_IOC_SYNC:
- btrfs_sync_fs(file->f_dentry->d_sb, 1);
- return 0;
+ case BTRFS_IOC_SYNC: {
+ int ret;
+
+ ret = btrfs_start_delalloc_roots(root->fs_info, 0);
+ if (ret)
+ return ret;
+ ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
+ return ret;
+ }
case BTRFS_IOC_START_SYNC:
return btrfs_ioctl_start_sync(root, argp);
case BTRFS_IOC_WAIT_SYNC:
@@ -4207,6 +4590,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_get_fslabel(file, argp);
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
+ case BTRFS_IOC_FILE_EXTENT_SAME:
+ return btrfs_ioctl_file_extent_same(file, argp);
}
return -ENOTTY;
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index f93151a..b6a6f07 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws,
}
/* we're making it bigger, give up */
- if (tot_in > 8192 && tot_in < tot_out)
+ if (tot_in > 8192 && tot_in < tot_out) {
+ ret = -1;
goto out;
+ }
/* we're all done */
if (tot_in >= len)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8136982..25a8f38 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno,
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
- "%llu\n", (unsigned long long)offset);
+ "%llu\n", offset);
}
/*
@@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->bytes_left = len;
entry->inode = igrab(inode);
entry->compress_type = compress_type;
+ entry->truncated_len = (u64)-1;
if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
set_bit(type, &entry->flags);
@@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
*file_offset = dec_end;
if (dec_start > dec_end) {
printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
- (unsigned long long)dec_start,
- (unsigned long long)dec_end);
+ dec_start, dec_end);
}
to_dec = dec_end - dec_start;
if (to_dec > entry->bytes_left) {
printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
- (unsigned long long)entry->bytes_left,
- (unsigned long long)to_dec);
+ entry->bytes_left, to_dec);
}
entry->bytes_left -= to_dec;
if (!uptodate)
@@ -403,8 +402,7 @@ have_entry:
if (io_size > entry->bytes_left) {
printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
- (unsigned long long)entry->bytes_left,
- (unsigned long long)io_size);
+ entry->bytes_left, io_size);
}
entry->bytes_left -= io_size;
if (!uptodate)
@@ -539,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
*/
if (RB_EMPTY_ROOT(&tree->tree) &&
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+ spin_lock(&root->fs_info->ordered_root_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_root_lock);
}
if (!root->nr_ordered_extents) {
@@ -565,11 +565,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr)
{
struct list_head splice, works;
struct btrfs_ordered_extent *ordered, *next;
- struct inode *inode;
+ int count = 0;
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
@@ -577,20 +577,11 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->ordered_extent_lock);
list_splice_init(&root->ordered_extents, &splice);
- while (!list_empty(&splice)) {
+ while (!list_empty(&splice) && nr) {
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
root_extent_list);
list_move_tail(&ordered->root_extent_list,
&root->ordered_extents);
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(ordered->inode);
- if (!inode) {
- cond_resched_lock(&root->ordered_extent_lock);
- continue;
- }
-
atomic_inc(&ordered->refs);
spin_unlock(&root->ordered_extent_lock);
@@ -601,36 +592,35 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
cond_resched();
spin_lock(&root->ordered_extent_lock);
+ if (nr != -1)
+ nr--;
+ count++;
}
+ list_splice_tail(&splice, &root->ordered_extents);
spin_unlock(&root->ordered_extent_lock);
list_for_each_entry_safe(ordered, next, &works, work_list) {
list_del_init(&ordered->work_list);
wait_for_completion(&ordered->completion);
-
- inode = ordered->inode;
btrfs_put_ordered_extent(ordered);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
-
cond_resched();
}
mutex_unlock(&root->fs_info->ordered_operations_mutex);
+
+ return count;
}
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
- int delay_iput)
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
{
struct btrfs_root *root;
struct list_head splice;
+ int done;
INIT_LIST_HEAD(&splice);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
- while (!list_empty(&splice)) {
+ while (!list_empty(&splice) && nr) {
root = list_first_entry(&splice, struct btrfs_root,
ordered_root);
root = btrfs_grab_fs_root(root);
@@ -639,10 +629,14 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
&fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
- btrfs_wait_ordered_extents(root, delay_iput);
+ done = btrfs_wait_ordered_extents(root, nr);
btrfs_put_fs_root(root);
spin_lock(&fs_info->ordered_root_lock);
+ if (nr != -1) {
+ nr -= done;
+ WARN_ON(nr < 0);
+ }
}
spin_unlock(&fs_info->ordered_root_lock);
}
@@ -671,7 +665,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
- mutex_lock(&root->fs_info->ordered_operations_mutex);
+ mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
spin_lock(&root->fs_info->ordered_root_lock);
list_splice_init(&cur_trans->ordered_operations, &splice);
while (!list_empty(&splice)) {
@@ -718,7 +712,7 @@ out:
list_del_init(&work->list);
btrfs_wait_and_free_delalloc_work(work);
}
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
+ mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
return ret;
}
@@ -754,8 +748,9 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* Used to wait on ordered extents across a large range of bytes.
*/
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{
+ int ret = 0;
u64 end;
u64 orig_end;
struct btrfs_ordered_extent *ordered;
@@ -771,8 +766,9 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
/* start IO across the range first to instantiate any delalloc
* extents
*/
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
+ ret = filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
+ if (ret)
+ return ret;
/*
* So with compression we will find and lock a dirty page and clear the
* first one as dirty, setup an async extent, and immediately return
@@ -788,10 +784,15 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
* right and you are wrong.
*/
if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
- &BTRFS_I(inode)->runtime_flags))
- filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
-
- filemap_fdatawait_range(inode->i_mapping, start, orig_end);
+ &BTRFS_I(inode)->runtime_flags)) {
+ ret = filemap_fdatawrite_range(inode->i_mapping, start,
+ orig_end);
+ if (ret)
+ return ret;
+ }
+ ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
+ if (ret)
+ return ret;
end = orig_end;
while (1) {
@@ -808,11 +809,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
}
btrfs_start_ordered_extent(inode, ordered, 1);
end = ordered->file_offset;
+ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
+ ret = -EIO;
btrfs_put_ordered_extent(ordered);
- if (end == 0 || end == start)
+ if (ret || end == 0 || end == start)
break;
end--;
}
+ return ret;
}
/*
@@ -923,12 +927,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *test;
int ret = 1;
- if (ordered)
+ spin_lock_irq(&tree->lock);
+ if (ordered) {
offset = entry_end(ordered);
- else
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
+ offset = min(offset,
+ ordered->file_offset +
+ ordered->truncated_len);
+ } else {
offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
-
- spin_lock_irq(&tree->lock);
+ }
disk_i_size = BTRFS_I(inode)->disk_i_size;
/* truncate file */
@@ -1092,7 +1100,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
* if this file hasn't been changed since the last transaction
* commit, we can safely return without doing anything
*/
- if (last_mod < root->fs_info->last_trans_committed)
+ if (last_mod <= root->fs_info->last_trans_committed)
return;
spin_lock(&root->fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 68844d5..9b0450f 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -69,6 +69,7 @@ struct btrfs_ordered_sum {
* the isize. */
#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
ordered extent */
+#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
struct btrfs_ordered_extent {
/* logical offset in the file */
@@ -96,6 +97,12 @@ struct btrfs_ordered_extent {
*/
u64 outstanding_isize;
+ /*
+ * If we get truncated we need to adjust the file extent we enter for
+ * this ordered extent so that we do not expose stale data.
+ */
+ u64 truncated_len;
+
/* flags (described above) */
unsigned long flags;
@@ -173,7 +180,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait);
-void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
@@ -188,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
-void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
-void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
- int delay_iput);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index dc0024f..417053b 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
int i;
printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
"num_stripes %d\n",
- (unsigned long long)btrfs_chunk_length(eb, chunk),
- (unsigned long long)btrfs_chunk_owner(eb, chunk),
- (unsigned long long)btrfs_chunk_type(eb, chunk),
- num_stripes);
+ btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk),
+ btrfs_chunk_type(eb, chunk), num_stripes);
for (i = 0 ; i < num_stripes ; i++) {
printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
- (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
- (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
+ btrfs_stripe_devid_nr(eb, chunk, i),
+ btrfs_stripe_offset_nr(eb, chunk, i));
}
}
static void print_dev_item(struct extent_buffer *eb,
@@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb,
{
printk(KERN_INFO "\t\tdev item devid %llu "
"total_bytes %llu bytes used %llu\n",
- (unsigned long long)btrfs_device_id(eb, dev_item),
- (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
- (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
+ btrfs_device_id(eb, dev_item),
+ btrfs_device_total_bytes(eb, dev_item),
+ btrfs_device_bytes_used(eb, dev_item));
}
static void print_extent_data_ref(struct extent_buffer *eb,
struct btrfs_extent_data_ref *ref)
{
printk(KERN_INFO "\t\textent data backref root %llu "
"objectid %llu offset %llu count %u\n",
- (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
- (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
- (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
+ btrfs_extent_data_ref_root(eb, ref),
+ btrfs_extent_data_ref_objectid(eb, ref),
+ btrfs_extent_data_ref_offset(eb, ref),
btrfs_extent_data_ref_count(eb, ref));
}
@@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
flags = btrfs_extent_flags(eb, ei);
printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
- (unsigned long long)btrfs_extent_refs(eb, ei),
- (unsigned long long)btrfs_extent_generation(eb, ei),
- (unsigned long long)flags);
+ btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
+ flags);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
struct btrfs_tree_block_info *info;
info = (struct btrfs_tree_block_info *)(ei + 1);
btrfs_tree_block_key(eb, info, &key);
- printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
+ printk(KERN_INFO "\t\ttree block key (%llu %u %llu) "
"level %d\n",
- (unsigned long long)btrfs_disk_key_objectid(&key),
- key.type,
- (unsigned long long)btrfs_disk_key_offset(&key),
+ btrfs_disk_key_objectid(&key), key.type,
+ btrfs_disk_key_offset(&key),
btrfs_tree_block_level(eb, info));
iref = (struct btrfs_extent_inline_ref *)(info + 1);
} else {
@@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
switch (type) {
case BTRFS_TREE_BLOCK_REF_KEY:
printk(KERN_INFO "\t\ttree block backref "
- "root %llu\n", (unsigned long long)offset);
+ "root %llu\n", offset);
break;
case BTRFS_SHARED_BLOCK_REF_KEY:
printk(KERN_INFO "\t\tshared block backref "
- "parent %llu\n", (unsigned long long)offset);
+ "parent %llu\n", offset);
break;
case BTRFS_EXTENT_DATA_REF_KEY:
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
sref = (struct btrfs_shared_data_ref *)(iref + 1);
printk(KERN_INFO "\t\tshared data backref "
"parent %llu count %u\n",
- (unsigned long long)offset,
- btrfs_shared_data_ref_count(eb, sref));
+ offset, btrfs_shared_data_ref_count(eb, sref));
break;
default:
BUG();
@@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
printk("\t\textent back ref root %llu gen %llu "
"owner %llu num_refs %lu\n",
- (unsigned long long)btrfs_ref_root_v0(eb, ref0),
- (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
- (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
+ btrfs_ref_root_v0(eb, ref0),
+ btrfs_ref_generation_v0(eb, ref0),
+ btrfs_ref_objectid_v0(eb, ref0),
(unsigned long)btrfs_ref_count_v0(eb, ref0));
}
#endif
+static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
+ u32 item_size)
+{
+ if (!IS_ALIGNED(item_size, sizeof(u64))) {
+ pr_warn("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ return;
+ }
+ while (item_size) {
+ __le64 subvol_id;
+
+ read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
+ printk(KERN_INFO "\t\tsubvol_id %llu\n",
+ (unsigned long long)le64_to_cpu(subvol_id));
+ item_size -= sizeof(u64);
+ offset += sizeof(u64);
+ }
+}
+
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
{
int i;
@@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
nr = btrfs_header_nritems(l);
btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
- (unsigned long long)btrfs_header_bytenr(l), nr,
- btrfs_leaf_free_space(root, l));
+ btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
for (i = 0 ; i < nr ; i++) {
- item = btrfs_item_nr(l, i);
+ item = btrfs_item_nr(i);
btrfs_item_key_to_cpu(l, &key, i);
type = btrfs_key_type(&key);
- printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d "
+ printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
"itemsize %d\n",
- i,
- (unsigned long long)key.objectid, type,
- (unsigned long long)key.offset,
+ i, key.objectid, type, key.offset,
btrfs_item_offset(l, item), btrfs_item_size(l, item));
switch (type) {
case BTRFS_INODE_ITEM_KEY:
ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
printk(KERN_INFO "\t\tinode generation %llu size %llu "
"mode %o\n",
- (unsigned long long)
btrfs_inode_generation(l, ii),
- (unsigned long long)btrfs_inode_size(l, ii),
+ btrfs_inode_size(l, ii),
btrfs_inode_mode(l, ii));
break;
case BTRFS_DIR_ITEM_KEY:
di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(l, di, &found_key);
printk(KERN_INFO "\t\tdir oid %llu type %u\n",
- (unsigned long long)found_key.objectid,
+ found_key.objectid,
btrfs_dir_type(l, di));
break;
case BTRFS_ROOT_ITEM_KEY:
ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
- (unsigned long long)
btrfs_disk_root_bytenr(l, ri),
btrfs_disk_root_refs(l, ri));
break;
@@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
}
printk(KERN_INFO "\t\textent data disk bytenr %llu "
"nr %llu\n",
- (unsigned long long)
btrfs_file_extent_disk_bytenr(l, fi),
- (unsigned long long)
btrfs_file_extent_disk_num_bytes(l, fi));
printk(KERN_INFO "\t\textent data offset %llu "
"nr %llu ram %llu\n",
- (unsigned long long)
btrfs_file_extent_offset(l, fi),
- (unsigned long long)
btrfs_file_extent_num_bytes(l, fi),
- (unsigned long long)
btrfs_file_extent_ram_bytes(l, fi));
break;
case BTRFS_EXTENT_REF_V0_KEY:
@@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
printk(KERN_INFO "\t\tblock group used %llu\n",
- (unsigned long long)
btrfs_disk_block_group_used(l, bi));
break;
case BTRFS_CHUNK_ITEM_KEY:
@@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
"\t\tchunk objectid %llu chunk offset %llu "
"length %llu\n",
- (unsigned long long)
btrfs_dev_extent_chunk_tree(l, dev_extent),
- (unsigned long long)
btrfs_dev_extent_chunk_objectid(l, dev_extent),
- (unsigned long long)
btrfs_dev_extent_chunk_offset(l, dev_extent),
- (unsigned long long)
btrfs_dev_extent_length(l, dev_extent));
break;
case BTRFS_DEV_STATS_KEY:
@@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
break;
+ case BTRFS_UUID_KEY_SUBVOL:
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ print_uuid_item(l, btrfs_item_ptr_offset(l, i),
+ btrfs_item_size_nr(l, i));
+ break;
};
}
}
@@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
return;
}
btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
- (unsigned long long)btrfs_header_bytenr(c),
- level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
+ btrfs_header_bytenr(c), level, nr,
+ (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
- i,
- (unsigned long long)key.objectid,
- key.type,
- (unsigned long long)key.offset,
- (unsigned long long)btrfs_node_blockptr(c, i));
+ i, key.objectid, key.type, key.offset,
+ btrfs_node_blockptr(c, i));
}
for (i = 0; i < nr; i++) {
struct extent_buffer *next = read_tree_block(root,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1280eff..4e6ef49 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
return qgroup;
}
-/* must be called with qgroup_lock held */
-static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
+static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
{
- struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
struct btrfs_qgroup_list *list;
- if (!qgroup)
- return -ENOENT;
-
- rb_erase(&qgroup->node, &fs_info->qgroup_tree);
list_del(&qgroup->dirty);
-
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list, next_group);
@@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
kfree(list);
}
kfree(qgroup);
+}
+/* must be called with qgroup_lock held */
+static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
+{
+ struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
+
+ if (!qgroup)
+ return -ENOENT;
+
+ rb_erase(&qgroup->node, &fs_info->qgroup_tree);
+ __del_qgroup_rb(qgroup);
return 0;
}
@@ -394,8 +398,7 @@ next1:
if (ret == -ENOENT) {
printk(KERN_WARNING
"btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
- (unsigned long long)found_key.objectid,
- (unsigned long long)found_key.offset);
+ found_key.objectid, found_key.offset);
ret = 0; /* ignore the error */
}
if (ret)
@@ -428,39 +431,28 @@ out:
}
/*
- * This is only called from close_ctree() or open_ctree(), both in single-
- * treaded paths. Clean up the in-memory structures. No locking needed.
+ * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
+ * first two are in single-threaded paths.And for the third one, we have set
+ * quota_root to be null with qgroup_lock held before, so it is safe to clean
+ * up the in-memory structures without qgroup_lock held.
*/
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
{
struct rb_node *n;
struct btrfs_qgroup *qgroup;
- struct btrfs_qgroup_list *list;
while ((n = rb_first(&fs_info->qgroup_tree))) {
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
-
- while (!list_empty(&qgroup->groups)) {
- list = list_first_entry(&qgroup->groups,
- struct btrfs_qgroup_list,
- next_group);
- list_del(&list->next_group);
- list_del(&list->next_member);
- kfree(list);
- }
-
- while (!list_empty(&qgroup->members)) {
- list = list_first_entry(&qgroup->members,
- struct btrfs_qgroup_list,
- next_member);
- list_del(&list->next_group);
- list_del(&list->next_member);
- kfree(list);
- }
- kfree(qgroup);
+ __del_qgroup_rb(qgroup);
}
+ /*
+ * we call btrfs_free_qgroup_config() when umounting
+ * filesystem and disabling quota, so we set qgroup_ulit
+ * to be null here to avoid double free.
+ */
ulist_free(fs_info->qgroup_ulist);
+ fs_info->qgroup_ulist = NULL;
}
static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
@@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
- btrfs_free_qgroup_config(fs_info);
spin_unlock(&fs_info->qgroup_lock);
- if (!quota_root) {
- ret = -EINVAL;
- goto out;
- }
+ btrfs_free_qgroup_config(fs_info);
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret)
@@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
printk(KERN_INFO "unable to update quota limit for %llu\n",
- (unsigned long long)qgroupid);
+ qgroupid);
}
spin_lock(&fs_info->qgroup_lock);
@@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
path, 1, 0);
pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
- (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+ fs_info->qgroup_rescan_progress.objectid,
fs_info->qgroup_rescan_progress.type,
- (unsigned long long)fs_info->qgroup_rescan_progress.offset,
- ret);
+ fs_info->qgroup_rescan_progress.offset, ret);
if (ret) {
/*
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 0525e13..24ac218 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -33,7 +33,6 @@
#include <linux/raid/xor.h>
#include <linux/vmalloc.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -1540,8 +1539,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio)
int ret;
ret = alloc_rbio_parity_pages(rbio);
- if (ret)
+ if (ret) {
+ __free_raid_bio(rbio);
return ret;
+ }
ret = lock_stripe_add(rbio);
if (ret == 0)
@@ -1687,11 +1688,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
struct blk_plug_cb *cb;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
- if (IS_ERR(rbio)) {
- kfree(raid_map);
- kfree(bbio);
+ if (IS_ERR(rbio))
return PTR_ERR(rbio);
- }
bio_list_add(&rbio->bio_list, bio);
rbio->bio_list_bytes = bio->bi_size;
@@ -2041,9 +2039,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
int ret;
rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
- if (IS_ERR(rbio)) {
+ if (IS_ERR(rbio))
return PTR_ERR(rbio);
- }
rbio->read_rebuild = 1;
bio_list_add(&rbio->bio_list, bio);
@@ -2052,6 +2049,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
BUG();
+ kfree(raid_map);
+ kfree(bbio);
kfree(rbio);
return -EIO;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 1209649..ce459a7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
if (bnode->root)
fs_info = bnode->root->fs_info;
btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
- "found at offset %llu\n", (unsigned long long)bytenr);
+ "found at offset %llu\n", bytenr);
}
/*
@@ -588,7 +588,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
else
key.offset = (u64)-1;
- return btrfs_read_fs_root_no_name(fs_info, &key);
+ return btrfs_get_fs_root(fs_info, &key, false);
}
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
@@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
return 1;
}
+ if (key.type == BTRFS_METADATA_ITEM_KEY &&
+ item_size <= sizeof(*ei)) {
+ WARN_ON(item_size < sizeof(*ei));
+ return 1;
+ }
if (key.type == BTRFS_EXTENT_ITEM_KEY) {
bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
int cowonly;
int ret;
int err = 0;
+ bool need_check = true;
path1 = btrfs_alloc_path();
path2 = btrfs_alloc_path();
@@ -914,6 +920,7 @@ again:
cur->bytenr);
lower = cur;
+ need_check = true;
for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path2->nodes[level]) {
BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,14 +964,12 @@ again:
/*
* add the block to pending list if we
- * need check its backrefs. only block
- * at 'cur->level + 1' is added to the
- * tail of pending list. this guarantees
- * we check backrefs from lower level
- * blocks to upper level blocks.
+ * need check its backrefs, we only do this once
+ * while walking up a tree as we will catch
+ * anything else later on.
*/
- if (!upper->checked &&
- level == cur->level + 1) {
+ if (!upper->checked && need_check) {
+ need_check = false;
list_add_tail(&edge->list[UPPER],
&list);
} else
@@ -1378,6 +1383,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
{
struct btrfs_root *reloc_root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
+ struct btrfs_block_rsv *rsv;
int clear_rsv = 0;
int ret;
@@ -1391,13 +1397,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
return 0;
- if (!trans->block_rsv) {
+ if (!trans->reloc_reserved) {
+ rsv = trans->block_rsv;
trans->block_rsv = rc->block_rsv;
clear_rsv = 1;
}
reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
if (clear_rsv)
- trans->block_rsv = NULL;
+ trans->block_rsv = rsv;
ret = __add_reloc_root(reloc_root);
BUG_ON(ret < 0);
@@ -1543,7 +1550,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
btrfs_file_extent_other_encoding(leaf, fi));
if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
- ret = 1;
+ ret = -EINVAL;
goto out;
}
@@ -1574,7 +1581,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
u64 end;
u32 nritems;
u32 i;
- int ret;
+ int ret = 0;
int first = 1;
int dirty = 0;
@@ -1637,11 +1644,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
ret = get_new_location(rc->data_inode, &new_bytenr,
bytenr, num_bytes);
- if (ret > 0) {
- WARN_ON(1);
- continue;
+ if (ret) {
+ /*
+ * Don't have to abort since we've not changed anything
+ * in the file extent yet.
+ */
+ break;
}
- BUG_ON(ret < 0);
btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
dirty = 1;
@@ -1651,18 +1660,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
num_bytes, parent,
btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ break;
+ }
ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
parent, btrfs_header_owner(leaf),
key.objectid, key.offset, 1);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ break;
+ }
}
if (dirty)
btrfs_mark_buffer_dirty(leaf);
if (inode)
btrfs_add_delayed_iput(inode);
- return 0;
+ return ret;
}
static noinline_for_stack
@@ -1762,8 +1777,7 @@ again:
new_ptr_gen = 0;
}
- if (new_bytenr > 0 && new_bytenr == old_bytenr) {
- WARN_ON(1);
+ if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {
ret = level;
break;
}
@@ -2045,7 +2059,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
LIST_HEAD(inode_list);
struct btrfs_key key;
struct btrfs_key next_key;
- struct btrfs_trans_handle *trans;
+ struct btrfs_trans_handle *trans = NULL;
struct btrfs_root *reloc_root;
struct btrfs_root_item *root_item;
struct btrfs_path *path;
@@ -2094,18 +2108,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
memset(&next_key, 0, sizeof(next_key));
while (1) {
- trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
- trans->block_rsv = rc->block_rsv;
-
ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved,
BTRFS_RESERVE_FLUSH_ALL);
if (ret) {
- BUG_ON(ret != -EAGAIN);
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
- continue;
+ err = ret;
+ goto out;
}
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
+ }
+ trans->block_rsv = rc->block_rsv;
replaced = 0;
max_level = level;
@@ -2151,6 +2166,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
root_item->drop_level = level;
btrfs_end_transaction_throttle(trans, root);
+ trans = NULL;
btrfs_btree_balance_dirty(root);
@@ -2179,7 +2195,8 @@ out:
btrfs_update_reloc_root(trans, root);
}
- btrfs_end_transaction_throttle(trans, root);
+ if (trans)
+ btrfs_end_transaction_throttle(trans, root);
btrfs_btree_balance_dirty(root);
@@ -2314,8 +2331,13 @@ again:
BUG_ON(root->reloc_root != reloc_root);
ret = merge_reloc_root(rc, root);
- if (ret)
+ if (ret) {
+ __update_reloc_root(reloc_root, 1);
+ free_extent_buffer(reloc_root->node);
+ free_extent_buffer(reloc_root->commit_root);
+ kfree(reloc_root);
goto out;
+ }
} else {
list_del_init(&reloc_root->root_list);
}
@@ -2344,9 +2366,6 @@ again:
if (IS_ERR(root))
continue;
- if (btrfs_root_refs(&root->root_item) == 0)
- continue;
-
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
@@ -3243,7 +3262,7 @@ static int add_tree_block(struct reloc_control *rc,
struct rb_node *rb_node;
u32 item_size;
int level = -1;
- int generation;
+ u64 generation;
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
@@ -3392,7 +3411,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
struct inode *inode, u64 ino)
{
struct btrfs_key key;
- struct btrfs_path *path;
struct btrfs_root *root = fs_info->tree_root;
struct btrfs_trans_handle *trans;
int ret = 0;
@@ -3417,22 +3435,14 @@ truncate:
if (ret)
goto out;
- path = btrfs_alloc_path();
- if (!path) {
- ret = -ENOMEM;
- goto out;
- }
-
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
- btrfs_free_path(path);
ret = PTR_ERR(trans);
goto out;
}
- ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ ret = btrfs_truncate_free_space_cache(root, trans, inode);
- btrfs_free_path(path);
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
out:
@@ -3534,10 +3544,8 @@ static int find_data_references(struct reloc_control *rc,
err = ret;
goto out;
}
- if (ret > 0) {
- WARN_ON(1);
+ if (WARN_ON(ret > 0))
goto out;
- }
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
@@ -3557,11 +3565,9 @@ static int find_data_references(struct reloc_control *rc,
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != ref_objectid ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- WARN_ON(1);
+ if (WARN_ON(key.objectid != ref_objectid ||
+ key.type != BTRFS_EXTENT_DATA_KEY))
break;
- }
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -3628,7 +3634,7 @@ int add_data_references(struct reloc_control *rc,
unsigned long ptr;
unsigned long end;
u32 blocksize = btrfs_level_size(rc->extent_root, 0);
- int ret;
+ int ret = 0;
int err = 0;
eb = path->nodes[0];
@@ -3655,6 +3661,10 @@ int add_data_references(struct reloc_control *rc,
} else {
BUG();
}
+ if (ret) {
+ err = ret;
+ goto out;
+ }
ptr += btrfs_extent_inline_ref_size(key.type);
}
WARN_ON(ptr > end);
@@ -3700,6 +3710,7 @@ int add_data_references(struct reloc_control *rc,
}
path->slots[0]++;
}
+out:
btrfs_release_path(path);
if (err)
free_block_list(blocks);
@@ -3981,16 +3992,6 @@ restart:
}
}
- ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
- if (ret < 0) {
- if (ret != -ENOSPC) {
- err = ret;
- WARN_ON(1);
- break;
- }
- rc->commit_transaction = 1;
- }
-
if (rc->commit_transaction) {
rc->commit_transaction = 0;
ret = btrfs_commit_transaction(trans, rc->extent_root);
@@ -4219,15 +4220,14 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
}
printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
- (unsigned long long)rc->block_group->key.objectid,
- (unsigned long long)rc->block_group->flags);
+ rc->block_group->key.objectid, rc->block_group->flags);
- ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
+ ret = btrfs_start_delalloc_roots(fs_info, 0);
if (ret < 0) {
err = ret;
goto out;
}
- btrfs_wait_all_ordered_extents(fs_info, 0);
+ btrfs_wait_ordered_roots(fs_info, -1);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@@ -4242,10 +4242,15 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
break;
printk(KERN_INFO "btrfs: found %llu extents\n",
- (unsigned long long)rc->extents_found);
+ rc->extents_found);
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
- btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
+ ret = btrfs_wait_ordered_range(rc->data_inode, 0,
+ (u64)-1);
+ if (ret) {
+ err = ret;
+ goto out;
+ }
invalidate_mapping_pages(rc->data_inode->i_mapping,
0, -1);
rc->stage = UPDATE_DATA_PTRS;
@@ -4462,6 +4467,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 disk_bytenr;
+ u64 new_bytenr;
LIST_HEAD(list);
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4473,13 +4479,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
- disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
- sums->bytenr = disk_bytenr;
- disk_bytenr += sums->len;
+ /*
+ * We need to offset the new_bytenr based on where the csum is.
+ * We need to do this because we will read in entire prealloc
+ * extents but we may have written to say the middle of the
+ * prealloc extent, so we need to make sure the csum goes with
+ * the right disk offset.
+ *
+ * We can do this because the data reloc inode refers strictly
+ * to the on disk bytes, so we don't have to worry about
+ * disk_len vs real len like with real inodes since it's all
+ * disk length.
+ */
+ new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
+ sums->bytenr = new_bytenr;
btrfs_add_ordered_sum(inode, ordered, sums);
}
@@ -4488,19 +4505,19 @@ out:
return ret;
}
-void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct extent_buffer *buf,
- struct extent_buffer *cow)
+int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *cow)
{
struct reloc_control *rc;
struct backref_node *node;
int first_cow = 0;
int level;
- int ret;
+ int ret = 0;
rc = root->fs_info->reloc_ctl;
if (!rc)
- return;
+ return 0;
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -4536,10 +4553,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
rc->nodes_relocated += buf->len;
}
- if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
+ if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
ret = replace_file_extents(trans, rc, root, cow);
- BUG_ON(ret);
- }
+ return ret;
}
/*
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index ffb1036..ec71ea4 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,8 +29,8 @@
* generation numbers as then we know the root was once mounted with an older
* kernel that was not aware of the root item structure change.
*/
-void btrfs_read_root_item(struct extent_buffer *eb, int slot,
- struct btrfs_root_item *item)
+static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+ struct btrfs_root_item *item)
{
uuid_le uuid;
int len;
@@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
- (unsigned long long)key->objectid, key->type,
- (unsigned long long)key->offset);
+ key->objectid, key->type, key->offset);
BUG_ON(1);
}
@@ -300,11 +299,6 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
continue;
}
- if (btrfs_root_refs(&root->root_item) == 0) {
- btrfs_add_dead_root(root);
- continue;
- }
-
err = btrfs_init_fs_root(root);
if (err) {
btrfs_free_fs_root(root);
@@ -319,6 +313,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
btrfs_free_fs_root(root);
break;
}
+
+ if (btrfs_root_refs(&root->root_item) == 0)
+ btrfs_add_dead_root(root);
}
btrfs_free_path(path);
@@ -490,13 +487,13 @@ again:
*/
void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
{
- u64 inode_flags = le64_to_cpu(root_item->inode.flags);
+ u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
- root_item->inode.flags = cpu_to_le64(inode_flags);
- root_item->flags = 0;
- root_item->byte_limit = 0;
+ btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
+ btrfs_set_root_flags(root_item, 0);
+ btrfs_set_root_limit(root_item, 0);
}
}
@@ -507,8 +504,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_item_lock);
- item->ctransid = cpu_to_le64(trans->transid);
- item->ctime.sec = cpu_to_le64(ct.tv_sec);
- item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
+ btrfs_set_root_ctransid(item, trans->transid);
+ btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
+ btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
spin_unlock(&root->root_item_lock);
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 64a157b..2544805 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
int mirror_num;
};
+struct scrub_nocow_inode {
+ u64 inum;
+ u64 offset;
+ u64 root;
+ struct list_head list;
+};
+
struct scrub_copy_nocow_ctx {
struct scrub_ctx *sctx;
u64 logical;
u64 len;
int mirror_num;
u64 physical_for_dev_replace;
+ struct list_head inodes;
struct btrfs_work work;
};
@@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
static int write_page_nocow(struct scrub_ctx *sctx,
u64 physical_for_dev_replace, struct page *page);
static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
- void *ctx);
+ struct scrub_copy_nocow_ctx *ctx);
static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
int mirror_num, u64 physical_for_dev_replace);
static void copy_nocow_pages_worker(struct btrfs_work *work);
@@ -754,8 +762,7 @@ out:
num_uncorrectable_read_errors);
printk_ratelimited_in_rcu(KERN_ERR
"btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
- (unsigned long long)fixup->logical,
- rcu_str_deref(fixup->dev->name));
+ fixup->logical, rcu_str_deref(fixup->dev->name));
}
btrfs_free_path(path);
@@ -1154,8 +1161,7 @@ corrected_error:
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
"btrfs: fixed up error at logical %llu on dev %s\n",
- (unsigned long long)logical,
- rcu_str_deref(dev->name));
+ logical, rcu_str_deref(dev->name));
}
} else {
did_not_correct_error:
@@ -1164,8 +1170,7 @@ did_not_correct_error:
spin_unlock(&sctx->stat_lock);
printk_ratelimited_in_rcu(KERN_ERR
"btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
- (unsigned long long)logical,
- rcu_str_deref(dev->name));
+ logical, rcu_str_deref(dev->name));
}
out:
@@ -1345,12 +1350,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
h = (struct btrfs_header *)mapped_buffer;
- if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
+ if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
BTRFS_UUID_SIZE)) {
sblock->header_error = 1;
- } else if (generation != le64_to_cpu(h->generation)) {
+ } else if (generation != btrfs_stack_header_generation(h)) {
sblock->header_error = 1;
sblock->generation_error = 1;
}
@@ -1720,10 +1725,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
* b) the page is already kmapped
*/
- if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
+ if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
++fail;
- if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
+ if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
++fail;
if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1786,10 +1791,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
s = (struct btrfs_super_block *)mapped_buffer;
memcpy(on_disk_csum, s->csum, sctx->csum_size);
- if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
+ if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
++fail_cor;
- if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
+ if (sblock->pagev[0]->generation != btrfs_super_generation(s))
++fail_gen;
if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -2455,8 +2460,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
printk(KERN_ERR
"btrfs scrub: tree block %llu spanning "
"stripes, ignored. logical=%llu\n",
- (unsigned long long)key.objectid,
- (unsigned long long)logical);
+ key.objectid, logical);
goto next;
}
@@ -2713,8 +2717,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
mutex_unlock(&fs_info->scrub_lock);
wake_up(&fs_info->scrub_pause_wait);
- dev_replace->cursor_left = dev_replace->cursor_right;
- dev_replace->item_needs_writeback = 1;
btrfs_put_block_group(cache);
if (ret)
break;
@@ -2728,6 +2730,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
+ dev_replace->cursor_left = dev_replace->cursor_right;
+ dev_replace->item_needs_writeback = 1;
+
key.offset = found_key.offset + length;
btrfs_release_path(path);
}
@@ -2779,7 +2784,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
{
int ret = 0;
- mutex_lock(&fs_info->scrub_lock);
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
@@ -2809,21 +2813,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
}
++fs_info->scrub_workers_refcnt;
out:
- mutex_unlock(&fs_info->scrub_lock);
-
return ret;
}
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
{
- mutex_lock(&fs_info->scrub_lock);
if (--fs_info->scrub_workers_refcnt == 0) {
btrfs_stop_workers(&fs_info->scrub_workers);
btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
btrfs_stop_workers(&fs_info->scrub_nocow_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
- mutex_unlock(&fs_info->scrub_lock);
}
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
@@ -2863,9 +2863,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
/* not supported for data w/o checksums */
printk(KERN_ERR
- "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
- fs_info->chunk_root->sectorsize,
- (unsigned long long)PAGE_SIZE);
+ "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
+ fs_info->chunk_root->sectorsize, PAGE_SIZE);
return -EINVAL;
}
@@ -2885,23 +2884,18 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EINVAL;
}
- ret = scrub_workers_get(fs_info, is_dev_replace);
- if (ret)
- return ret;
mutex_lock(&fs_info->fs_devices->device_list_mutex);
dev = btrfs_find_device(fs_info, devid, NULL, NULL);
if (!dev || (dev->missing && !is_dev_replace)) {
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -ENODEV;
}
- mutex_lock(&fs_info->scrub_lock);
+ mutex_lock(&fs_info->scrub_lock);
if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) {
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -EIO;
}
@@ -2912,10 +2906,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- scrub_workers_put(fs_info);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
+
+ ret = scrub_workers_get(fs_info, is_dev_replace);
+ if (ret) {
+ mutex_unlock(&fs_info->scrub_lock);
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+ return ret;
+ }
+
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
@@ -2928,13 +2929,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
atomic_inc(&fs_info->scrubs_running);
mutex_unlock(&fs_info->scrub_lock);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!is_dev_replace) {
- down_read(&fs_info->scrub_super_lock);
+ /*
+ * by holding device list mutex, we can
+ * kick off writing super in log tree sync.
+ */
ret = scrub_supers(sctx, dev);
- up_read(&fs_info->scrub_super_lock);
}
+ mutex_unlock(&fs_info->fs_devices->device_list_mutex);
if (!ret)
ret = scrub_enumerate_chunks(sctx, dev, start, end,
@@ -2951,10 +2954,10 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
mutex_lock(&fs_info->scrub_lock);
dev->scrub_device = NULL;
+ scrub_workers_put(fs_info);
mutex_unlock(&fs_info->scrub_lock);
scrub_free_ctx(sctx);
- scrub_workers_put(fs_info);
return ret;
}
@@ -2984,16 +2987,6 @@ void btrfs_scrub_continue(struct btrfs_root *root)
wake_up(&fs_info->scrub_pause_wait);
}
-void btrfs_scrub_pause_super(struct btrfs_root *root)
-{
- down_write(&root->fs_info->scrub_super_lock);
-}
-
-void btrfs_scrub_continue_super(struct btrfs_root *root)
-{
- up_write(&root->fs_info->scrub_super_lock);
-}
-
int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info)
{
mutex_lock(&fs_info->scrub_lock);
@@ -3131,12 +3124,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
nocow_ctx->mirror_num = mirror_num;
nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
nocow_ctx->work.func = copy_nocow_pages_worker;
+ INIT_LIST_HEAD(&nocow_ctx->inodes);
btrfs_queue_worker(&fs_info->scrub_nocow_workers,
&nocow_ctx->work);
return 0;
}
+static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
+{
+ struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
+ struct scrub_nocow_inode *nocow_inode;
+
+ nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
+ if (!nocow_inode)
+ return -ENOMEM;
+ nocow_inode->inum = inum;
+ nocow_inode->offset = offset;
+ nocow_inode->root = root;
+ list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
+ return 0;
+}
+
+#define COPY_COMPLETE 1
+
static void copy_nocow_pages_worker(struct btrfs_work *work)
{
struct scrub_copy_nocow_ctx *nocow_ctx =
@@ -3172,19 +3183,42 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
}
ret = iterate_inodes_from_logical(logical, fs_info, path,
- copy_nocow_pages_for_inode,
- nocow_ctx);
+ record_inode_for_nocow, nocow_ctx);
if (ret != 0 && ret != -ENOENT) {
- pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
- (unsigned long long)logical,
- (unsigned long long)physical_for_dev_replace,
- (unsigned long long)len,
- (unsigned long long)mirror_num, ret);
+ pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
+ logical, physical_for_dev_replace, len, mirror_num,
+ ret);
not_written = 1;
goto out;
}
+ btrfs_end_transaction(trans, root);
+ trans = NULL;
+ while (!list_empty(&nocow_ctx->inodes)) {
+ struct scrub_nocow_inode *entry;
+ entry = list_first_entry(&nocow_ctx->inodes,
+ struct scrub_nocow_inode,
+ list);
+ list_del_init(&entry->list);
+ ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
+ entry->root, nocow_ctx);
+ kfree(entry);
+ if (ret == COPY_COMPLETE) {
+ ret = 0;
+ break;
+ } else if (ret) {
+ break;
+ }
+ }
out:
+ while (!list_empty(&nocow_ctx->inodes)) {
+ struct scrub_nocow_inode *entry;
+ entry = list_first_entry(&nocow_ctx->inodes,
+ struct scrub_nocow_inode,
+ list);
+ list_del_init(&entry->list);
+ kfree(entry);
+ }
if (trans && !IS_ERR(trans))
btrfs_end_transaction(trans, root);
if (not_written)
@@ -3197,20 +3231,25 @@ out:
scrub_pending_trans_workers_dec(sctx);
}
-static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
+static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
+ struct scrub_copy_nocow_ctx *nocow_ctx)
{
- struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
struct btrfs_key key;
struct inode *inode;
struct page *page;
struct btrfs_root *local_root;
+ struct btrfs_ordered_extent *ordered;
+ struct extent_map *em;
+ struct extent_state *cached_state = NULL;
+ struct extent_io_tree *io_tree;
u64 physical_for_dev_replace;
- u64 len;
+ u64 len = nocow_ctx->len;
+ u64 lockstart = offset, lockend = offset + len - 1;
unsigned long index;
int srcu_index;
- int ret;
- int err;
+ int ret = 0;
+ int err = 0;
key.objectid = root;
key.type = BTRFS_ROOT_ITEM_KEY;
@@ -3224,11 +3263,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
return PTR_ERR(local_root);
}
- if (btrfs_root_refs(&local_root->root_item) == 0) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return -ENOENT;
- }
-
key.type = BTRFS_INODE_ITEM_KEY;
key.objectid = inum;
key.offset = 0;
@@ -3241,9 +3275,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
mutex_lock(&inode->i_mutex);
inode_dio_wait(inode);
- ret = 0;
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
- len = nocow_ctx->len;
+ io_tree = &BTRFS_I(inode)->io_tree;
+
+ lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+ ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ goto out_unlock;
+ }
+
+ em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out_unlock;
+ }
+
+ /*
+ * This extent does not actually cover the logical extent anymore,
+ * move on to the next inode.
+ */
+ if (em->block_start > nocow_ctx->logical ||
+ em->block_start + em->block_len < nocow_ctx->logical + len) {
+ free_extent_map(em);
+ goto out_unlock;
+ }
+ free_extent_map(em);
+
while (len >= PAGE_CACHE_SIZE) {
index = offset >> PAGE_CACHE_SHIFT;
again:
@@ -3259,10 +3317,9 @@ again:
goto next_page;
} else {
ClearPageError(page);
- err = extent_read_full_page(&BTRFS_I(inode)->
- io_tree,
- page, btrfs_get_extent,
- nocow_ctx->mirror_num);
+ err = extent_read_full_page_nolock(io_tree, page,
+ btrfs_get_extent,
+ nocow_ctx->mirror_num);
if (err) {
ret = err;
goto next_page;
@@ -3276,6 +3333,7 @@ again:
* page in the page cache.
*/
if (page->mapping != inode->i_mapping) {
+ unlock_page(page);
page_cache_release(page);
goto again;
}
@@ -3299,6 +3357,10 @@ next_page:
physical_for_dev_replace += PAGE_CACHE_SIZE;
len -= PAGE_CACHE_SIZE;
}
+ ret = COPY_COMPLETE;
+out_unlock:
+ unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
+ GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
iput(inode);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index d3f3b43..6837fe8 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -26,6 +26,7 @@
#include <linux/radix-tree.h>
#include <linux/crc32c.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "send.h"
#include "backref.h"
@@ -54,8 +55,8 @@ struct fs_path {
char *buf;
int buf_len;
- int reversed:1;
- int virtual_mem:1;
+ unsigned int reversed:1;
+ unsigned int virtual_mem:1;
char inline_buf[];
};
char pad[PAGE_SIZE];
@@ -120,7 +121,6 @@ struct send_ctx {
struct list_head name_cache_list;
int name_cache_size;
- struct file *cur_inode_filp;
char *read_buf;
};
@@ -219,7 +219,7 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
len = PAGE_ALIGN(len);
if (p->buf == p->inline_buf) {
- tmp_buf = kmalloc(len, GFP_NOFS);
+ tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN);
if (!tmp_buf) {
tmp_buf = vmalloc(len);
if (!tmp_buf)
@@ -564,10 +564,8 @@ static int begin_cmd(struct send_ctx *sctx, int cmd)
{
struct btrfs_cmd_header *hdr;
- if (!sctx->send_buf) {
- WARN_ON(1);
+ if (WARN_ON(!sctx->send_buf))
return -EINVAL;
- }
BUG_ON(sctx->send_size);
@@ -790,7 +788,7 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
if (found_key->type == BTRFS_INODE_REF_KEY) {
ptr = (unsigned long)btrfs_item_ptr(eb, slot,
struct btrfs_inode_ref);
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
total = btrfs_item_size(eb, item);
elem_size = sizeof(*iref);
} else {
@@ -904,7 +902,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
eb = path->nodes[0];
slot = path->slots[0];
- item = btrfs_item_nr(eb, slot);
+ item = btrfs_item_nr(slot);
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
cur = 0;
len = 0;
@@ -1668,6 +1666,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
u64 *who_ino, u64 *who_gen)
{
int ret = 0;
+ u64 gen;
u64 other_inode = 0;
u8 other_type = 0;
@@ -1678,6 +1677,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
if (ret <= 0)
goto out;
+ /*
+ * If we have a parent root we need to verify that the parent dir was
+ * not delted and then re-created, if it was then we have no overwrite
+ * and we can just unlink this entry.
+ */
+ if (sctx->parent_root) {
+ ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
+ NULL, NULL, NULL);
+ if (ret < 0 && ret != -ENOENT)
+ goto out;
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ if (gen != dir_gen)
+ goto out;
+ }
+
ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
&other_inode, &other_type);
if (ret < 0 && ret != -ENOENT)
@@ -2100,77 +2117,6 @@ out:
}
/*
- * Called for regular files when sending extents data. Opens a struct file
- * to read from the file.
- */
-static int open_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
- struct btrfs_key key;
- struct path path;
- struct inode *inode;
- struct dentry *dentry;
- struct file *filp;
- int new = 0;
-
- if (sctx->cur_inode_filp)
- goto out;
-
- key.objectid = sctx->cur_ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
- &new);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto out;
- }
-
- dentry = d_obtain_alias(inode);
- inode = NULL;
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- goto out;
- }
-
- path.mnt = sctx->mnt;
- path.dentry = dentry;
- filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
- dput(dentry);
- dentry = NULL;
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto out;
- }
- sctx->cur_inode_filp = filp;
-
-out:
- /*
- * no xxxput required here as every vfs op
- * does it by itself on failure
- */
- return ret;
-}
-
-/*
- * Closes the struct file that was created in open_cur_inode_file
- */
-static int close_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
-
- if (!sctx->cur_inode_filp)
- goto out;
-
- ret = filp_close(sctx->cur_inode_filp, NULL);
- sctx->cur_inode_filp = NULL;
-
-out:
- return ret;
-}
-
-/*
* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
*/
static int send_subvol_begin(struct send_ctx *sctx)
@@ -2519,7 +2465,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
- if (di_key.objectid < sctx->send_progress) {
+ if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+ di_key.objectid < sctx->send_progress) {
ret = 1;
goto out;
}
@@ -2581,7 +2528,6 @@ static int record_ref(struct list_head *head, u64 dir,
u64 dir_gen, struct fs_path *path)
{
struct recorded_ref *ref;
- char *tmp;
ref = kmalloc(sizeof(*ref), GFP_NOFS);
if (!ref)
@@ -2591,25 +2537,35 @@ static int record_ref(struct list_head *head, u64 dir,
ref->dir_gen = dir_gen;
ref->full_path = path;
- tmp = strrchr(ref->full_path->start, '/');
- if (!tmp) {
- ref->name_len = ref->full_path->end - ref->full_path->start;
- ref->name = ref->full_path->start;
+ ref->name = (char *)kbasename(ref->full_path->start);
+ ref->name_len = ref->full_path->end - ref->name;
+ ref->dir_path = ref->full_path->start;
+ if (ref->name == ref->full_path->start)
ref->dir_path_len = 0;
- ref->dir_path = ref->full_path->start;
- } else {
- tmp++;
- ref->name_len = ref->full_path->end - tmp;
- ref->name = tmp;
- ref->dir_path = ref->full_path->start;
+ else
ref->dir_path_len = ref->full_path->end -
ref->full_path->start - 1 - ref->name_len;
- }
list_add_tail(&ref->list, head);
return 0;
}
+static int dup_ref(struct recorded_ref *ref, struct list_head *list)
+{
+ struct recorded_ref *new;
+
+ new = kmalloc(sizeof(*ref), GFP_NOFS);
+ if (!new)
+ return -ENOMEM;
+
+ new->dir = ref->dir;
+ new->dir_gen = ref->dir_gen;
+ new->full_path = NULL;
+ INIT_LIST_HEAD(&new->list);
+ list_add_tail(&new->list, list);
+ return 0;
+}
+
static void __free_recorded_refs(struct list_head *head)
{
struct recorded_ref *cur;
@@ -2724,9 +2680,7 @@ static int process_recorded_refs(struct send_ctx *sctx)
int ret = 0;
struct recorded_ref *cur;
struct recorded_ref *cur2;
- struct ulist *check_dirs = NULL;
- struct ulist_iterator uit;
- struct ulist_node *un;
+ struct list_head check_dirs;
struct fs_path *valid_path = NULL;
u64 ow_inode = 0;
u64 ow_gen;
@@ -2740,6 +2694,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
* which is always '..'
*/
BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+ INIT_LIST_HEAD(&check_dirs);
valid_path = fs_path_alloc();
if (!valid_path) {
@@ -2747,12 +2702,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
goto out;
}
- check_dirs = ulist_alloc(GFP_NOFS);
- if (!check_dirs) {
- ret = -ENOMEM;
- goto out;
- }
-
/*
* First, check if the first ref of the current inode was overwritten
* before. If yes, we know that the current inode was already orphanized
@@ -2889,8 +2838,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
goto out;
}
}
- ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
- GFP_NOFS);
+ ret = dup_ref(cur, &check_dirs);
if (ret < 0)
goto out;
}
@@ -2918,8 +2866,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
}
list_for_each_entry(cur, &sctx->deleted_refs, list) {
- ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
- GFP_NOFS);
+ ret = dup_ref(cur, &check_dirs);
if (ret < 0)
goto out;
}
@@ -2930,8 +2877,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
*/
cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
list);
- ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
- GFP_NOFS);
+ ret = dup_ref(cur, &check_dirs);
if (ret < 0)
goto out;
} else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@ -2951,12 +2897,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
if (ret < 0)
goto out;
}
- ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
- GFP_NOFS);
+ ret = dup_ref(cur, &check_dirs);
if (ret < 0)
goto out;
}
-
/*
* If the inode is still orphan, unlink the orphan. This may
* happen when a previous inode did overwrite the first ref
@@ -2978,33 +2922,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
* deletion and if it's finally possible to perform the rmdir now.
* We also update the inode stats of the parent dirs here.
*/
- ULIST_ITER_INIT(&uit);
- while ((un = ulist_next(check_dirs, &uit))) {
+ list_for_each_entry(cur, &check_dirs, list) {
/*
* In case we had refs into dirs that were not processed yet,
* we don't need to do the utime and rmdir logic for these dirs.
* The dir will be processed later.
*/
- if (un->val > sctx->cur_ino)
+ if (cur->dir > sctx->cur_ino)
continue;
- ret = get_cur_inode_state(sctx, un->val, un->aux);
+ ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
if (ret == inode_state_did_create ||
ret == inode_state_no_change) {
/* TODO delayed utimes */
- ret = send_utimes(sctx, un->val, un->aux);
+ ret = send_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
} else if (ret == inode_state_did_delete) {
- ret = can_rmdir(sctx, un->val, sctx->cur_ino);
+ ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
if (ret < 0)
goto out;
if (ret) {
- ret = get_cur_path(sctx, un->val, un->aux,
- valid_path);
+ ret = get_cur_path(sctx, cur->dir,
+ cur->dir_gen, valid_path);
if (ret < 0)
goto out;
ret = send_rmdir(sctx, valid_path);
@@ -3017,8 +2960,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
ret = 0;
out:
+ __free_recorded_refs(&check_dirs);
free_recorded_refs(sctx);
- ulist_free(check_dirs);
fs_path_free(valid_path);
return ret;
}
@@ -3119,6 +3062,8 @@ out:
struct find_ref_ctx {
u64 dir;
+ u64 dir_gen;
+ struct btrfs_root *root;
struct fs_path *name;
int found_idx;
};
@@ -3128,9 +3073,21 @@ static int __find_iref(int num, u64 dir, int index,
void *ctx_)
{
struct find_ref_ctx *ctx = ctx_;
+ u64 dir_gen;
+ int ret;
if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
+ /*
+ * To avoid doing extra lookups we'll only do this if everything
+ * else matches.
+ */
+ ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
+ NULL, NULL, NULL);
+ if (ret)
+ return ret;
+ if (dir_gen != ctx->dir_gen)
+ return 0;
ctx->found_idx = num;
return 1;
}
@@ -3140,14 +3097,16 @@ static int __find_iref(int num, u64 dir, int index,
static int find_iref(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *key,
- u64 dir, struct fs_path *name)
+ u64 dir, u64 dir_gen, struct fs_path *name)
{
int ret;
struct find_ref_ctx ctx;
ctx.dir = dir;
ctx.name = name;
+ ctx.dir_gen = dir_gen;
ctx.found_idx = -1;
+ ctx.root = root;
ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
if (ret < 0)
@@ -3163,11 +3122,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index,
struct fs_path *name,
void *ctx)
{
+ u64 dir_gen;
int ret;
struct send_ctx *sctx = ctx;
+ ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
+ NULL, NULL, NULL);
+ if (ret)
+ return ret;
+
ret = find_iref(sctx->parent_root, sctx->right_path,
- sctx->cmp_key, dir, name);
+ sctx->cmp_key, dir, dir_gen, name);
if (ret == -ENOENT)
ret = __record_new_ref(num, dir, index, name, sctx);
else if (ret > 0)
@@ -3180,11 +3145,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index,
struct fs_path *name,
void *ctx)
{
+ u64 dir_gen;
int ret;
struct send_ctx *sctx = ctx;
+ ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
+ NULL, NULL, NULL);
+ if (ret)
+ return ret;
+
ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
- dir, name);
+ dir, dir_gen, name);
if (ret == -ENOENT)
ret = __record_deleted_ref(num, dir, index, name, sctx);
else if (ret > 0)
@@ -3577,6 +3548,72 @@ out:
return ret;
}
+static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+{
+ struct btrfs_root *root = sctx->send_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct inode *inode;
+ struct page *page;
+ char *addr;
+ struct btrfs_key key;
+ pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t last_index;
+ unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+ ssize_t ret = 0;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (offset + len > i_size_read(inode)) {
+ if (offset > i_size_read(inode))
+ len = 0;
+ else
+ len = offset - i_size_read(inode);
+ }
+ if (len == 0)
+ goto out;
+
+ last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ while (index <= last_index) {
+ unsigned cur_len = min_t(unsigned, len,
+ PAGE_CACHE_SIZE - pg_offset);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ addr = kmap(page);
+ memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ pg_offset = 0;
+ len -= cur_len;
+ ret += cur_len;
+ }
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* Read some bytes from the current inode/file and send a write command to
* user space.
@@ -3585,35 +3622,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
- loff_t pos = offset;
- int num_read = 0;
- mm_segment_t old_fs;
+ ssize_t num_read = 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
- /*
- * vfs normally only accepts user space buffers for security reasons.
- * we only read from the file and also only provide the read_buf buffer
- * to vfs. As this buffer does not come from a user space call, it's
- * ok to temporary allow kernel space buffers.
- */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
-
verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
- ret = open_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
- ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
- if (ret < 0)
- goto out;
- num_read = ret;
- if (!num_read)
+ num_read = fill_read_buf(sctx, offset, len);
+ if (num_read <= 0) {
+ if (num_read < 0)
+ ret = num_read;
goto out;
+ }
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
@@ -3632,7 +3654,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
tlv_put_failure:
out:
fs_path_free(p);
- set_fs(old_fs);
if (ret < 0)
return ret;
return num_read;
@@ -3869,7 +3890,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
btrfs_item_key_to_cpu(eb, &found_key, slot);
if (found_key.objectid != key.objectid ||
found_key.type != key.type) {
- ret = 0;
+ /* If we're a hole then just pretend nothing changed */
+ ret = (left_disknr) ? 0 : 1;
goto out;
}
@@ -3880,22 +3902,23 @@ static int is_extent_unchanged(struct send_ctx *sctx,
while (key.offset < ekey->offset + left_len) {
ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
right_type = btrfs_file_extent_type(eb, ei);
- right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
- right_len = btrfs_file_extent_num_bytes(eb, ei);
- right_offset = btrfs_file_extent_offset(eb, ei);
- right_gen = btrfs_file_extent_generation(eb, ei);
-
if (right_type != BTRFS_FILE_EXTENT_REG) {
ret = 0;
goto out;
}
+ right_disknr = btrfs_file_extent_disk_bytenr(eb, ei);
+ right_len = btrfs_file_extent_num_bytes(eb, ei);
+ right_offset = btrfs_file_extent_offset(eb, ei);
+ right_gen = btrfs_file_extent_generation(eb, ei);
+
/*
* Are we at extent 8? If yes, we know the extent is changed.
* This may only happen on the first iteration.
*/
if (found_key.offset + right_len <= ekey->offset) {
- ret = 0;
+ /* If we're a hole just pretend nothing changed */
+ ret = (left_disknr) ? 0 : 1;
goto out;
}
@@ -3960,8 +3983,8 @@ static int process_extent(struct send_ctx *sctx,
struct btrfs_path *path,
struct btrfs_key *key)
{
- int ret = 0;
struct clone_root *found_clone = NULL;
+ int ret = 0;
if (S_ISLNK(sctx->cur_inode_mode))
return 0;
@@ -3974,6 +3997,32 @@ static int process_extent(struct send_ctx *sctx,
ret = 0;
goto out;
}
+ } else {
+ struct btrfs_file_extent_item *ei;
+ u8 type;
+
+ ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ type = btrfs_file_extent_type(path->nodes[0], ei);
+ if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+ type == BTRFS_FILE_EXTENT_REG) {
+ /*
+ * The send spec does not have a prealloc command yet,
+ * so just leave a hole for prealloc'ed extents until
+ * we have enough commands queued up to justify rev'ing
+ * the send spec.
+ */
+ if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Have a hole, just skip it. */
+ if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
+ ret = 0;
+ goto out;
+ }
+ }
}
ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@ -4149,10 +4198,6 @@ static int changed_inode(struct send_ctx *sctx,
u64 left_gen = 0;
u64 right_gen = 0;
- ret = close_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
@@ -4361,6 +4406,64 @@ static int changed_extent(struct send_ctx *sctx,
return ret;
}
+static int dir_changed(struct send_ctx *sctx, u64 dir)
+{
+ u64 orig_gen, new_gen;
+ int ret;
+
+ ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
+ NULL, NULL, NULL);
+ if (ret)
+ return ret;
+
+ return (orig_gen != new_gen) ? 1 : 0;
+}
+
+static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
+ struct btrfs_key *key)
+{
+ struct btrfs_inode_extref *extref;
+ struct extent_buffer *leaf;
+ u64 dirid = 0, last_dirid = 0;
+ unsigned long ptr;
+ u32 item_size;
+ u32 cur_offset = 0;
+ int ref_name_len;
+ int ret = 0;
+
+ /* Easy case, just check this one dirid */
+ if (key->type == BTRFS_INODE_REF_KEY) {
+ dirid = key->offset;
+
+ ret = dir_changed(sctx, dirid);
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ while (cur_offset < item_size) {
+ extref = (struct btrfs_inode_extref *)(ptr +
+ cur_offset);
+ dirid = btrfs_inode_extref_parent(leaf, extref);
+ ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+ cur_offset += ref_name_len + sizeof(*extref);
+ if (dirid == last_dirid)
+ continue;
+ ret = dir_changed(sctx, dirid);
+ if (ret)
+ break;
+ last_dirid = dirid;
+ }
+out:
+ return ret;
+}
+
/*
* Updates compare related fields in sctx and simply forwards to the actual
* changed_xxx functions.
@@ -4376,6 +4479,19 @@ static int changed_cb(struct btrfs_root *left_root,
int ret = 0;
struct send_ctx *sctx = ctx;
+ if (result == BTRFS_COMPARE_TREE_SAME) {
+ if (key->type != BTRFS_INODE_REF_KEY &&
+ key->type != BTRFS_INODE_EXTREF_KEY)
+ return 0;
+ ret = compare_refs(sctx, left_path, key);
+ if (!ret)
+ return 0;
+ if (ret < 0)
+ return ret;
+ result = BTRFS_COMPARE_TREE_CHANGED;
+ ret = 0;
+ }
+
sctx->left_path = left_path;
sctx->right_path = right_path;
sctx->cmp_key = key;
@@ -4542,11 +4658,6 @@ static int send_subvol(struct send_ctx *sctx)
}
out:
- if (!ret)
- ret = close_cur_inode_file(sctx);
- else
- close_cur_inode_file(sctx);
-
free_recorded_refs(sctx);
return ret;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8eb6191..2d8ac1b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -42,7 +42,6 @@
#include <linux/cleancache.h>
#include <linux/ratelimit.h>
#include <linux/btrfs.h>
-#include "compat.h"
#include "delayed-inode.h"
#include "ctree.h"
#include "disk-io.h"
@@ -56,6 +55,8 @@
#include "rcu-string.h"
#include "dev-replace.h"
#include "free-space-cache.h"
+#include "backref.h"
+#include "tests/btrfs-tests.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
@@ -320,14 +321,15 @@ enum {
Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
Opt_check_integrity, Opt_check_integrity_including_extent_data,
- Opt_check_integrity_print_mask, Opt_fatal_errors,
+ Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
+ Opt_commit_interval,
Opt_err,
};
static match_table_t tokens = {
{Opt_degraded, "degraded"},
{Opt_subvol, "subvol=%s"},
- {Opt_subvolid, "subvolid=%d"},
+ {Opt_subvolid, "subvolid=%s"},
{Opt_device, "device=%s"},
{Opt_nodatasum, "nodatasum"},
{Opt_nodatacow, "nodatacow"},
@@ -360,7 +362,9 @@ static match_table_t tokens = {
{Opt_check_integrity, "check_int"},
{Opt_check_integrity_including_extent_data, "check_int_data"},
{Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
+ {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
{Opt_fatal_errors, "fatal_errors=%s"},
+ {Opt_commit_interval, "commit=%d"},
{Opt_err, NULL},
};
@@ -496,10 +500,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, NOBARRIER);
break;
case Opt_thread_pool:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg)
+ ret = match_int(&args[0], &intarg);
+ if (ret) {
+ goto out;
+ } else if (intarg > 0) {
info->thread_pool_size = intarg;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
break;
case Opt_max_inline:
num = match_strdup(&args[0]);
@@ -513,7 +522,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
root->sectorsize);
}
printk(KERN_INFO "btrfs: max_inline at %llu\n",
- (unsigned long long)info->max_inline);
+ info->max_inline);
+ } else {
+ ret = -ENOMEM;
+ goto out;
}
break;
case Opt_alloc_start:
@@ -525,7 +537,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
kfree(num);
printk(KERN_INFO
"btrfs: allocations start at %llu\n",
- (unsigned long long)info->alloc_start);
+ info->alloc_start);
+ } else {
+ ret = -ENOMEM;
+ goto out;
}
break;
case Opt_noacl:
@@ -540,12 +555,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
break;
case Opt_ratio:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg) {
+ ret = match_int(&args[0], &intarg);
+ if (ret) {
+ goto out;
+ } else if (intarg >= 0) {
info->metadata_ratio = intarg;
printk(KERN_INFO "btrfs: metadata ratio %d\n",
info->metadata_ratio);
+ } else {
+ ret = -EINVAL;
+ goto out;
}
break;
case Opt_discard:
@@ -554,6 +573,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_space_cache:
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
break;
+ case Opt_rescan_uuid_tree:
+ btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
+ break;
case Opt_no_space_cache:
printk(KERN_INFO "btrfs: disabling disk space caching\n");
btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
@@ -596,13 +618,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
break;
case Opt_check_integrity_print_mask:
- intarg = 0;
- match_int(&args[0], &intarg);
- if (intarg) {
+ ret = match_int(&args[0], &intarg);
+ if (ret) {
+ goto out;
+ } else if (intarg >= 0) {
info->check_integrity_print_mask = intarg;
printk(KERN_INFO "btrfs:"
" check_integrity_print_mask 0x%x\n",
info->check_integrity_print_mask);
+ } else {
+ ret = -EINVAL;
+ goto out;
}
break;
#else
@@ -626,6 +652,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
goto out;
}
break;
+ case Opt_commit_interval:
+ intarg = 0;
+ ret = match_int(&args[0], &intarg);
+ if (ret < 0) {
+ printk(KERN_ERR
+ "btrfs: invalid commit interval\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ if (intarg > 0) {
+ if (intarg > 300) {
+ printk(KERN_WARNING
+ "btrfs: excessive commit interval %d\n",
+ intarg);
+ }
+ info->commit_interval = intarg;
+ } else {
+ printk(KERN_INFO
+ "btrfs: using default commit interval %ds\n",
+ BTRFS_DEFAULT_COMMIT_INTERVAL);
+ info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
+ }
+ break;
case Opt_err:
printk(KERN_INFO "btrfs: unrecognized mount option "
"'%s'\n", p);
@@ -654,8 +703,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+ char *num = NULL;
int error = 0;
- int intarg;
if (!options)
return 0;
@@ -679,17 +728,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
case Opt_subvol:
kfree(*subvol_name);
*subvol_name = match_strdup(&args[0]);
+ if (!*subvol_name) {
+ error = -ENOMEM;
+ goto out;
+ }
break;
case Opt_subvolid:
- intarg = 0;
- error = match_int(&args[0], &intarg);
- if (!error) {
+ num = match_strdup(&args[0]);
+ if (num) {
+ *subvol_objectid = memparse(num, NULL);
+ kfree(num);
/* we want the original fs_tree */
- if (!intarg)
+ if (!*subvol_objectid)
*subvol_objectid =
BTRFS_FS_TREE_OBJECTID;
- else
- *subvol_objectid = intarg;
+ } else {
+ error = -EINVAL;
+ goto out;
}
break;
case Opt_subvolrootid:
@@ -865,7 +920,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_all_ordered_extents(fs_info, 1);
+ btrfs_wait_ordered_roots(fs_info, -1);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -892,11 +947,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
if (btrfs_test_opt(root, NOBARRIER))
seq_puts(seq, ",nobarrier");
if (info->max_inline != 8192 * 1024)
- seq_printf(seq, ",max_inline=%llu",
- (unsigned long long)info->max_inline);
+ seq_printf(seq, ",max_inline=%llu", info->max_inline);
if (info->alloc_start != 0)
- seq_printf(seq, ",alloc_start=%llu",
- (unsigned long long)info->alloc_start);
+ seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
if (info->thread_pool_size != min_t(unsigned long,
num_online_cpus() + 2, 8))
seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -928,6 +981,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",space_cache");
else
seq_puts(seq, ",nospace_cache");
+ if (btrfs_test_opt(root, RESCAN_UUID_TREE))
+ seq_puts(seq, ",rescan_uuid_tree");
if (btrfs_test_opt(root, CLEAR_CACHE))
seq_puts(seq, ",clear_cache");
if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -940,8 +995,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(root, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
+ if (btrfs_test_opt(root, RECOVERY))
+ seq_puts(seq, ",recovery");
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+ if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
+ seq_puts(seq, ",check_int_data");
+ else if (btrfs_test_opt(root, CHECK_INTEGRITY))
+ seq_puts(seq, ",check_int");
+ if (info->check_integrity_print_mask)
+ seq_printf(seq, ",check_int_print_mask=%d",
+ info->check_integrity_print_mask);
+#endif
+ if (info->metadata_ratio)
+ seq_printf(seq, ",metadata_ratio=%d",
+ info->metadata_ratio);
if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
seq_puts(seq, ",fatal_errors=panic");
+ if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
+ seq_printf(seq, ",commit=%d", info->commit_interval);
return 0;
}
@@ -1258,6 +1329,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
+
+ /* wait for the uuid_scan task to finish */
+ down(&fs_info->uuid_tree_rescan_sem);
+ /* avoid complains from lockdep et al. */
+ up(&fs_info->uuid_tree_rescan_sem);
+
sb->s_flags |= MS_RDONLY;
btrfs_dev_replace_suspend_for_unmount(fs_info);
@@ -1268,6 +1345,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
} else {
+ if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
+ btrfs_err(fs_info,
+ "Remounting read-write after error is not allowed\n");
+ ret = -EINVAL;
+ goto restore;
+ }
if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES;
goto restore;
@@ -1305,6 +1388,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
+
+ if (!fs_info->uuid_root) {
+ pr_info("btrfs: creating UUID tree\n");
+ ret = btrfs_create_uuid_tree(fs_info);
+ if (ret) {
+ pr_warn("btrfs: failed to create the uuid tree"
+ "%d\n", ret);
+ goto restore;
+ }
+ }
sb->s_flags &= ~MS_RDONLY;
}
out:
@@ -1377,7 +1470,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
nr_devices = fs_info->fs_devices->open_devices;
BUG_ON(!nr_devices);
- devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+ devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
GFP_NOFS);
if (!devices_info)
return -ENOMEM;
@@ -1690,12 +1783,38 @@ static void btrfs_print_info(void)
#ifdef CONFIG_BTRFS_DEBUG
", debug=on"
#endif
+#ifdef CONFIG_BTRFS_ASSERT
+ ", assert=on"
+#endif
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
", integrity-checker=on"
#endif
"\n");
}
+static int btrfs_run_sanity_tests(void)
+{
+ int ret;
+
+ ret = btrfs_init_test_fs();
+ if (ret)
+ return ret;
+
+ ret = btrfs_test_free_space_cache();
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_buffer_operations();
+ if (ret)
+ goto out;
+ ret = btrfs_test_extent_io();
+ if (ret)
+ goto out;
+ ret = btrfs_test_inodes();
+out:
+ btrfs_destroy_test_fs();
+ return ret;
+}
+
static int __init init_btrfs_fs(void)
{
int err;
@@ -1734,23 +1853,32 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_auto_defrag;
- err = btrfs_interface_init();
+ err = btrfs_prelim_ref_init();
if (err)
- goto free_delayed_ref;
+ goto free_prelim_ref;
- err = register_filesystem(&btrfs_fs_type);
+ err = btrfs_interface_init();
if (err)
- goto unregister_ioctl;
+ goto free_delayed_ref;
btrfs_init_lockdep();
btrfs_print_info();
- btrfs_test_free_space_cache();
+
+ err = btrfs_run_sanity_tests();
+ if (err)
+ goto unregister_ioctl;
+
+ err = register_filesystem(&btrfs_fs_type);
+ if (err)
+ goto unregister_ioctl;
return 0;
unregister_ioctl:
btrfs_interface_exit();
+free_prelim_ref:
+ btrfs_prelim_ref_exit();
free_delayed_ref:
btrfs_delayed_ref_exit();
free_auto_defrag:
@@ -1777,6 +1905,7 @@ static void __exit exit_btrfs_fs(void)
btrfs_delayed_ref_exit();
btrfs_auto_defrag_exit();
btrfs_delayed_inode_exit();
+ btrfs_prelim_ref_exit();
ordered_data_exit();
extent_map_exit();
extent_io_exit();
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
new file mode 100644
index 0000000..757ef00
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/magic.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+
+static struct vfsmount *test_mnt = NULL;
+
+static const struct super_operations btrfs_test_super_ops = {
+ .alloc_inode = btrfs_alloc_inode,
+ .destroy_inode = btrfs_test_destroy_inode,
+};
+
+static struct dentry *btrfs_test_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return mount_pseudo(fs_type, "btrfs_test:", &btrfs_test_super_ops,
+ NULL, BTRFS_TEST_MAGIC);
+}
+
+static struct file_system_type test_type = {
+ .name = "btrfs_test_fs",
+ .mount = btrfs_test_mount,
+ .kill_sb = kill_anon_super,
+};
+
+struct inode *btrfs_new_test_inode(void)
+{
+ return new_inode(test_mnt->mnt_sb);
+}
+
+int btrfs_init_test_fs(void)
+{
+ int ret;
+
+ ret = register_filesystem(&test_type);
+ if (ret) {
+ printk(KERN_ERR "btrfs: cannot register test file system\n");
+ return ret;
+ }
+
+ test_mnt = kern_mount(&test_type);
+ if (IS_ERR(test_mnt)) {
+ printk(KERN_ERR "btrfs: cannot mount test file system\n");
+ unregister_filesystem(&test_type);
+ return ret;
+ }
+ return 0;
+}
+
+void btrfs_destroy_test_fs(void)
+{
+ kern_unmount(test_mnt);
+ unregister_filesystem(&test_type);
+}
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
new file mode 100644
index 0000000..b353bc8
--- /dev/null
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_TESTS
+#define __BTRFS_TESTS
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+
+#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
+
+int btrfs_test_free_space_cache(void);
+int btrfs_test_extent_buffer_operations(void);
+int btrfs_test_extent_io(void);
+int btrfs_test_inodes(void);
+int btrfs_init_test_fs(void);
+void btrfs_destroy_test_fs(void);
+struct inode *btrfs_new_test_inode(void);
+#else
+static inline int btrfs_test_free_space_cache(void)
+{
+ return 0;
+}
+static inline int btrfs_test_extent_buffer_operations(void)
+{
+ return 0;
+}
+static inline int btrfs_init_test_fs(void)
+{
+ return 0;
+}
+static inline void btrfs_destroy_test_fs(void)
+{
+}
+static inline int btrfs_test_extent_io(void)
+{
+ return 0;
+}
+static inline int btrfs_test_inodes(void)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c
new file mode 100644
index 0000000..cc286ce
--- /dev/null
+++ b/fs/btrfs/tests/extent-buffer-tests.c
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/slab.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../extent_io.h"
+#include "../disk-io.h"
+
+static int test_btrfs_split_item(void)
+{
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct extent_buffer *eb;
+ struct btrfs_item *item;
+ char *value = "mary had a little lamb";
+ char *split1 = "mary had a little";
+ char *split2 = " lamb";
+ char *split3 = "mary";
+ char *split4 = " had a little";
+ char buf[32];
+ struct btrfs_key key;
+ u32 value_len = strlen(value);
+ int ret = 0;
+
+ test_msg("Running btrfs_split_item tests\n");
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Could not allocate root\n");
+ return PTR_ERR(root);
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ test_msg("Could not allocate path\n");
+ kfree(root);
+ return -ENOMEM;
+ }
+
+ path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096);
+ if (!eb) {
+ test_msg("Could not allocate dummy buffer\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ path->slots[0] = 0;
+
+ key.objectid = 0;
+ key.type = BTRFS_EXTENT_CSUM_KEY;
+ key.offset = 0;
+
+ setup_items_for_insert(root, path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+ item = btrfs_item_nr(0);
+ write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
+ value_len);
+
+ key.offset = 3;
+
+ /*
+ * Passing NULL trans here should be safe because we have plenty of
+ * space in this leaf to split the item without having to split the
+ * leaf.
+ */
+ ret = btrfs_split_item(NULL, root, path, &key, 17);
+ if (ret) {
+ test_msg("Split item failed %d\n", ret);
+ goto out;
+ }
+
+ /*
+ * Read the first slot, it should have the original key and contain only
+ * 'mary had a little'
+ */
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 0) {
+ test_msg("Invalid key at slot 0\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(0);
+ if (btrfs_item_size(eb, item) != strlen(split1)) {
+ test_msg("Invalid len in the first split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
+ strlen(split1));
+ if (memcmp(buf, split1, strlen(split1))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the first split have='%.*s' want '%s'\n",
+ (int)strlen(split1), buf, split1);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 1);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 3) {
+ test_msg("Invalid key at slot 1\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(1);
+ if (btrfs_item_size(eb, item) != strlen(split2)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
+ strlen(split2));
+ if (memcmp(buf, split2, strlen(split2))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ key.offset = 1;
+ /* Do it again so we test memmoving the other items in the leaf */
+ ret = btrfs_split_item(NULL, root, path, &key, 4);
+ if (ret) {
+ test_msg("Second split item failed %d\n", ret);
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 0);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 0) {
+ test_msg("Invalid key at slot 0\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(0);
+ if (btrfs_item_size(eb, item) != strlen(split3)) {
+ test_msg("Invalid len in the first split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 0),
+ strlen(split3));
+ if (memcmp(buf, split3, strlen(split3))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the third split");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 1);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 1) {
+ test_msg("Invalid key at slot 1\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(1);
+ if (btrfs_item_size(eb, item) != strlen(split4)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 1),
+ strlen(split4));
+ if (memcmp(buf, split4, strlen(split4))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the fourth split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_item_key_to_cpu(eb, &key, 2);
+ if (key.objectid != 0 || key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset != 3) {
+ test_msg("Invalid key at slot 2\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ item = btrfs_item_nr(2);
+ if (btrfs_item_size(eb, item) != strlen(split2)) {
+ test_msg("Invalid len in the second split\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ read_extent_buffer(eb, buf, btrfs_item_ptr_offset(eb, 2),
+ strlen(split2));
+ if (memcmp(buf, split2, strlen(split2))) {
+ test_msg("Data in the buffer doesn't match what it should "
+ "in the last chunk\n");
+ ret = -EINVAL;
+ goto out;
+ }
+out:
+ btrfs_free_path(path);
+ kfree(root);
+ return ret;
+}
+
+int btrfs_test_extent_buffer_operations(void)
+{
+ test_msg("Running extent buffer operation tests");
+ return test_btrfs_split_item();
+}
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
new file mode 100644
index 0000000..7e99c2f
--- /dev/null
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/sched.h>
+#include "btrfs-tests.h"
+#include "../extent_io.h"
+
+#define PROCESS_UNLOCK (1 << 0)
+#define PROCESS_RELEASE (1 << 1)
+#define PROCESS_TEST_LOCKED (1 << 2)
+
+static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
+ unsigned long flags)
+{
+ int ret;
+ struct page *pages[16];
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = end_index - index + 1;
+ int i;
+ int count = 0;
+ int loops = 0;
+
+ while (nr_pages > 0) {
+ ret = find_get_pages_contig(inode->i_mapping, index,
+ min_t(unsigned long, nr_pages,
+ ARRAY_SIZE(pages)), pages);
+ for (i = 0; i < ret; i++) {
+ if (flags & PROCESS_TEST_LOCKED &&
+ !PageLocked(pages[i]))
+ count++;
+ if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ if (flags & PROCESS_RELEASE)
+ page_cache_release(pages[i]);
+ }
+ nr_pages -= ret;
+ index += ret;
+ cond_resched();
+ loops++;
+ if (loops > 100000) {
+ printk(KERN_ERR "stuck in a loop, start %Lu, end %Lu, nr_pages %lu, ret %d\n", start, end, nr_pages, ret);
+ break;
+ }
+ }
+ return count;
+}
+
+static int test_find_delalloc(void)
+{
+ struct inode *inode;
+ struct extent_io_tree tmp;
+ struct page *page;
+ struct page *locked_page = NULL;
+ unsigned long index = 0;
+ u64 total_dirty = 256 * 1024 * 1024;
+ u64 max_bytes = 128 * 1024 * 1024;
+ u64 start, end, test_start;
+ u64 found;
+ int ret = -EINVAL;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Failed to allocate test inode\n");
+ return -ENOMEM;
+ }
+
+ extent_io_tree_init(&tmp, &inode->i_data);
+
+ /*
+ * First go through and create and mark all of our pages dirty, we pin
+ * everything to make sure our pages don't get evicted and screw up our
+ * test.
+ */
+ for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ test_msg("Failed to allocate test page\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ SetPageDirty(page);
+ if (index) {
+ unlock_page(page);
+ } else {
+ page_cache_get(page);
+ locked_page = page;
+ }
+ }
+
+ /* Test this scenario
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS);
+ start = 0;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Should have found at least one delalloc\n");
+ goto out_bits;
+ }
+ if (start != 0 || end != 4095) {
+ test_msg("Expected start 0 end 4095, got start %Lu end %Lu\n",
+ start, end);
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+ unlock_page(locked_page);
+ page_cache_release(locked_page);
+
+ /*
+ * Test this scenario
+ *
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ test_start = 64 * 1024 * 1024;
+ locked_page = find_lock_page(inode->i_mapping,
+ test_start >> PAGE_CACHE_SHIFT);
+ if (!locked_page) {
+ test_msg("Couldn't find the locked page\n");
+ goto out_bits;
+ }
+ set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS);
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Couldn't find delalloc in our range\n");
+ goto out_bits;
+ }
+ if (start != test_start || end != max_bytes - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu, end "
+ "%Lu\n", test_start, max_bytes - 1, start, end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end,
+ PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
+ test_msg("There were unlocked pages in the range\n");
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+ /* locked_page was unlocked above */
+ page_cache_release(locked_page);
+
+ /*
+ * Test this scenario
+ * |--- delalloc ---|
+ * |--- search ---|
+ */
+ test_start = max_bytes + 4096;
+ locked_page = find_lock_page(inode->i_mapping, test_start >>
+ PAGE_CACHE_SHIFT);
+ if (!locked_page) {
+ test_msg("Could'nt find the locked page\n");
+ goto out_bits;
+ }
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (found) {
+ test_msg("Found range when we shouldn't have\n");
+ goto out_bits;
+ }
+ if (end != (u64)-1) {
+ test_msg("Did not return the proper end offset\n");
+ goto out_bits;
+ }
+
+ /*
+ * Test this scenario
+ * [------- delalloc -------|
+ * [max_bytes]|-- search--|
+ *
+ * We are re-using our test_start from above since it works out well.
+ */
+ set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS);
+ start = test_start;
+ end = 0;
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Didn't find our range\n");
+ goto out_bits;
+ }
+ if (start != test_start || end != total_dirty - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_start, total_dirty - 1, start, end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end,
+ PROCESS_TEST_LOCKED | PROCESS_UNLOCK)) {
+ test_msg("Pages in range were not all locked\n");
+ goto out_bits;
+ }
+ unlock_extent(&tmp, start, end);
+
+ /*
+ * Now to test where we run into a page that is no longer dirty in the
+ * range we want to find.
+ */
+ page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024))
+ >> PAGE_CACHE_SHIFT);
+ if (!page) {
+ test_msg("Couldn't find our page\n");
+ goto out_bits;
+ }
+ ClearPageDirty(page);
+ page_cache_release(page);
+
+ /* We unlocked it in the previous test */
+ lock_page(locked_page);
+ start = test_start;
+ end = 0;
+ /*
+ * Currently if we fail to find dirty pages in the delalloc range we
+ * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
+ * this changes at any point in the future we will need to fix this
+ * tests expected behavior.
+ */
+ found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
+ &end, max_bytes);
+ if (!found) {
+ test_msg("Didn't find our range\n");
+ goto out_bits;
+ }
+ if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
+ test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
+ test_start, test_start + PAGE_CACHE_SIZE - 1, start,
+ end);
+ goto out_bits;
+ }
+ if (process_page_range(inode, start, end, PROCESS_TEST_LOCKED |
+ PROCESS_UNLOCK)) {
+ test_msg("Pages in range were not all locked\n");
+ goto out_bits;
+ }
+ ret = 0;
+out_bits:
+ clear_extent_bits(&tmp, 0, total_dirty - 1,
+ (unsigned long)-1, GFP_NOFS);
+out:
+ if (locked_page)
+ page_cache_release(locked_page);
+ process_page_range(inode, 0, total_dirty - 1,
+ PROCESS_UNLOCK | PROCESS_RELEASE);
+ iput(inode);
+ return ret;
+}
+
+int btrfs_test_extent_io(void)
+{
+ test_msg("Running find delalloc tests\n");
+ return test_find_delalloc();
+}
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
new file mode 100644
index 0000000..6fc8201
--- /dev/null
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/slab.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../free-space-cache.h"
+
+#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+static struct btrfs_block_group_cache *init_test_block_group(void)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ if (!cache)
+ return NULL;
+ cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+ GFP_NOFS);
+ if (!cache->free_space_ctl) {
+ kfree(cache);
+ return NULL;
+ }
+
+ cache->key.objectid = 0;
+ cache->key.offset = 1024 * 1024 * 1024;
+ cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->sectorsize = 4096;
+
+ spin_lock_init(&cache->lock);
+ INIT_LIST_HEAD(&cache->list);
+ INIT_LIST_HEAD(&cache->cluster_list);
+ INIT_LIST_HEAD(&cache->new_bg_list);
+
+ btrfs_init_free_space_ctl(cache);
+
+ return cache;
+}
+
+/*
+ * This test just does basic sanity checking, making sure we can add an exten
+ * entry and remove space from either end and the middle, and make sure we can
+ * remove space that covers adjacent extent entries.
+ */
+static int test_extents(struct btrfs_block_group_cache *cache)
+{
+ int ret = 0;
+
+ test_msg("Running extent only tests\n");
+
+ /* First just make sure we can remove an entire entry */
+ ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error adding initial extents %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error removing extent %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+ test_msg("Full remove left some lingering space\n");
+ return -1;
+ }
+
+ /* Ok edge and middle cases now */
+ ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error adding half extent %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error removing tail end %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error removing front end %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
+ if (ret) {
+ test_msg("Error removing middle peice %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+ test_msg("Still have space at the front\n");
+ return -1;
+ }
+
+ if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
+ test_msg("Still have space in the middle\n");
+ return -1;
+ }
+
+ if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
+ test_msg("Still have space at the end\n");
+ return -1;
+ }
+
+ /* Cleanup */
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ return 0;
+}
+
+static int test_bitmaps(struct btrfs_block_group_cache *cache)
+{
+ u64 next_bitmap_offset;
+ int ret;
+
+ test_msg("Running bitmap only tests\n");
+
+ ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't create a bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error removing bitmap full range %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+ test_msg("Left some space in bitmap\n");
+ return -1;
+ }
+
+ ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add to our bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
+ if (ret) {
+ test_msg("Couldn't remove middle chunk %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * The first bitmap we have starts at offset 0 so the next one is just
+ * at the end of the first bitmap.
+ */
+ next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+
+ /* Test a bit straddling two bitmaps */
+ ret = test_add_free_space_entry(cache, next_bitmap_offset -
+ (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add space that straddles two bitmaps %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, next_bitmap_offset -
+ (1 * 1024 * 1024), 2 * 1024 * 1024);
+ if (ret) {
+ test_msg("Couldn't remove overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
+ 2 * 1024 * 1024)) {
+ test_msg("Left some space when removing overlapping\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ return 0;
+}
+
+/* This is the high grade jackassery */
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+{
+ u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ int ret;
+
+ test_msg("Running bitmap and extent tests\n");
+
+ /*
+ * First let's do something simple, an extent at the same offset as the
+ * bitmap, but the free space completely in the extent and then
+ * completely in the bitmap.
+ */
+ ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't create bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ if (ret) {
+ test_msg("Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ if (ret) {
+ test_msg("Couldn't remove extent entry %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+ test_msg("Left remnants after our remove\n");
+ return -1;
+ }
+
+ /* Now to add back the extent entry and remove from the bitmap */
+ ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ if (ret) {
+ test_msg("Couldn't re-add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
+ if (ret) {
+ test_msg("Couldn't remove from bitmap %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
+ test_msg("Left remnants in the bitmap\n");
+ return -1;
+ }
+
+ /*
+ * Ok so a little more evil, extent entry and bitmap at the same offset,
+ * removing an overlapping chunk.
+ */
+ ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add to a bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
+ if (ret) {
+ test_msg("Couldn't remove overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
+ test_msg("Left over peices after removing overlapping\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ /* Now with the extent entry offset into the bitmap */
+ ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add space to the bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
+ if (ret) {
+ test_msg("Couldn't add extent to the cache %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
+ if (ret) {
+ test_msg("Problem removing overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
+ test_msg("Left something behind when removing space");
+ return -1;
+ }
+
+ /*
+ * This has blown up in the past, the extent entry starts before the
+ * bitmap entry, but we're trying to remove an offset that falls
+ * completely within the bitmap range and is in both the extent entry
+ * and the bitmap entry, looks like this
+ *
+ * [ extent ]
+ * [ bitmap ]
+ * [ del ]
+ */
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
+ 4 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
+ 5 * 1024 * 1024, 0);
+ if (ret) {
+ test_msg("Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
+ 5 * 1024 * 1024);
+ if (ret) {
+ test_msg("Failed to free our space %d\n", ret);
+ return ret;
+ }
+
+ if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
+ 5 * 1024 * 1024)) {
+ test_msg("Left stuff over\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ /*
+ * This blew up before, we have part of the free space in a bitmap and
+ * then the entirety of the rest of the space in an extent. This used
+ * to return -EAGAIN back from btrfs_remove_extent, make sure this
+ * doesn't happen.
+ */
+ ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
+ if (ret) {
+ test_msg("Couldn't add bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
+ if (ret) {
+ test_msg("Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
+ if (ret) {
+ test_msg("Error removing bitmap and extent overlapping %d\n", ret);
+ return ret;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ return 0;
+}
+
+int btrfs_test_free_space_cache(void)
+{
+ struct btrfs_block_group_cache *cache;
+ int ret;
+
+ test_msg("Running btrfs free space cache tests\n");
+
+ cache = init_test_block_group();
+ if (!cache) {
+ test_msg("Couldn't run the tests\n");
+ return 0;
+ }
+
+ ret = test_extents(cache);
+ if (ret)
+ goto out;
+ ret = test_bitmaps(cache);
+ if (ret)
+ goto out;
+ ret = test_bitmaps_and_extents(cache);
+ if (ret)
+ goto out;
+out:
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ test_msg("Free space cache tests finished\n");
+ return ret;
+}
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
new file mode 100644
index 0000000..397d1f9
--- /dev/null
+++ b/fs/btrfs/tests/inode-tests.c
@@ -0,0 +1,955 @@
+/*
+ * Copyright (C) 2013 Fusion IO. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../btrfs_inode.h"
+#include "../disk-io.h"
+#include "../extent_io.h"
+#include "../volumes.h"
+
+static struct btrfs_fs_info *alloc_dummy_fs_info(void)
+{
+ struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
+ GFP_NOFS);
+ if (!fs_info)
+ return fs_info;
+ fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
+ GFP_NOFS);
+ if (!fs_info->fs_devices) {
+ kfree(fs_info);
+ return NULL;
+ }
+ return fs_info;
+}
+static void free_dummy_root(struct btrfs_root *root)
+{
+ if (!root)
+ return;
+ if (root->fs_info) {
+ kfree(root->fs_info->fs_devices);
+ kfree(root->fs_info);
+ }
+ if (root->node)
+ free_extent_buffer(root->node);
+ kfree(root);
+}
+
+static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
+ u64 ram_bytes, u64 offset, u64 disk_bytenr,
+ u64 disk_len, u32 type, u8 compression, int slot)
+{
+ struct btrfs_path path;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf = root->node;
+ struct btrfs_key key;
+ u32 value_len = sizeof(struct btrfs_file_extent_item);
+
+ if (type == BTRFS_FILE_EXTENT_INLINE)
+ value_len += len;
+ memset(&path, 0, sizeof(path));
+
+ path.nodes[0] = leaf;
+ path.slots[0] = slot;
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ setup_items_for_insert(root, &path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ btrfs_set_file_extent_generation(leaf, fi, 1);
+ btrfs_set_file_extent_type(leaf, fi, type);
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, disk_bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, disk_len);
+ btrfs_set_file_extent_offset(leaf, fi, offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi, len);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, ram_bytes);
+ btrfs_set_file_extent_compression(leaf, fi, compression);
+ btrfs_set_file_extent_encryption(leaf, fi, 0);
+ btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+}
+
+static void insert_inode_item_key(struct btrfs_root *root)
+{
+ struct btrfs_path path;
+ struct extent_buffer *leaf = root->node;
+ struct btrfs_key key;
+ u32 value_len = 0;
+
+ memset(&path, 0, sizeof(path));
+
+ path.nodes[0] = leaf;
+ path.slots[0] = 0;
+
+ key.objectid = BTRFS_INODE_ITEM_KEY;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ setup_items_for_insert(root, &path, &key, &value_len, value_len,
+ value_len + sizeof(struct btrfs_item), 1);
+}
+
+/*
+ * Build the most complicated map of extents the earth has ever seen. We want
+ * this so we can test all of the corner cases of btrfs_get_extent. Here is a
+ * diagram of how the extents will look though this may not be possible we still
+ * want to make sure everything acts normally (the last number is not inclusive)
+ *
+ * [0 - 5][5 - 6][6 - 10][10 - 4096][ 4096 - 8192 ][8192 - 12288]
+ * [hole ][inline][ hole ][ regular ][regular1 split][ hole ]
+ *
+ * [ 12288 - 20480][20480 - 24576][ 24576 - 28672 ][28672 - 36864][36864 - 45056]
+ * [regular1 split][ prealloc1 ][prealloc1 written][ prealloc1 ][ compressed ]
+ *
+ * [45056 - 49152][49152-53248][53248-61440][61440-65536][ 65536+81920 ]
+ * [ compressed1 ][ regular ][compressed1][ regular ][ hole but no extent]
+ *
+ * [81920-86016]
+ * [ regular ]
+ */
+static void setup_file_extents(struct btrfs_root *root)
+{
+ int slot = 0;
+ u64 disk_bytenr = 1 * 1024 * 1024;
+ u64 offset = 0;
+
+ /* First we want a hole */
+ insert_extent(root, offset, 5, 5, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
+ slot++;
+ offset += 5;
+
+ /*
+ * Now we want an inline extent, I don't think this is possible but hey
+ * why not? Also keep in mind if we have an inline extent it counts as
+ * the whole first page. If we were to expand it we would have to cow
+ * and we wouldn't have an inline extent anymore.
+ */
+ insert_extent(root, offset, 1, 1, 0, 0, 0, BTRFS_FILE_EXTENT_INLINE, 0,
+ slot);
+ slot++;
+ offset = 4096;
+
+ /* Now another hole */
+ insert_extent(root, offset, 4, 4, 0, 0, 0, BTRFS_FILE_EXTENT_REG, 0,
+ slot);
+ slot++;
+ offset += 4;
+
+ /* Now for a regular extent */
+ insert_extent(root, offset, 4095, 4095, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ disk_bytenr += 4096;
+ offset += 4095;
+
+ /*
+ * Now for 3 extents that were split from a hole punch so we test
+ * offsets properly.
+ */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, 0, 0, BTRFS_FILE_EXTENT_REG,
+ 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 16384;
+
+ /* Now for a unwritten prealloc extent */
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 4096;
+
+ /*
+ * We want to jack up disk_bytenr a little more so the em stuff doesn't
+ * merge our records.
+ */
+ disk_bytenr += 8192;
+
+ /*
+ * Now for a partially written prealloc extent, basically the same as
+ * the hole punch example above. Ram_bytes never changes when you mark
+ * extents written btw.
+ */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 16384, 4096, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 16384,
+ BTRFS_FILE_EXTENT_PREALLOC, 0, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 16384;
+
+ /* Now a normal compressed extent */
+ insert_extent(root, offset, 8192, 8192, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 8192;
+ /* No merges */
+ disk_bytenr += 8192;
+
+ /* Now a split compressed extent */
+ insert_extent(root, offset, 4096, 16384, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr + 4096, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 4096;
+ insert_extent(root, offset, 8192, 16384, 8192, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, BTRFS_COMPRESS_ZLIB, slot);
+ slot++;
+ offset += 8192;
+ disk_bytenr += 8192;
+
+ /* Now extents that have a hole but no hole extent */
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+ slot++;
+ offset += 16384;
+ disk_bytenr += 4096;
+ insert_extent(root, offset, 4096, 4096, 0, disk_bytenr, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, slot);
+}
+
+static unsigned long prealloc_only = 0;
+static unsigned long compressed_only = 0;
+static unsigned long vacancy_only = 0;
+
+static noinline int test_btrfs_get_extent(void)
+{
+ struct inode *inode = NULL;
+ struct btrfs_root *root = NULL;
+ struct extent_map *em = NULL;
+ u64 orig_start;
+ u64 disk_bytenr;
+ u64 offset;
+ int ret = -ENOMEM;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Couldn't allocate inode\n");
+ return ret;
+ }
+
+ BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+ BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ BTRFS_I(inode)->location.offset = 0;
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ goto out;
+ }
+
+ /*
+ * We do this since btrfs_get_extent wants to assign em->bdev to
+ * root->fs_info->fs_devices->latest_bdev.
+ */
+ root->fs_info = alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ goto out;
+ }
+
+ root->node = alloc_dummy_extent_buffer(0, 4096);
+ if (!root->node) {
+ test_msg("Couldn't allocate dummy buffer\n");
+ goto out;
+ }
+
+ /*
+ * We will just free a dummy node if it's ref count is 2 so we need an
+ * extra ref so our searches don't accidently release our page.
+ */
+ extent_buffer_get(root->node);
+ btrfs_set_header_nritems(root->node, 0);
+ btrfs_set_header_level(root->node, 0);
+ ret = -EINVAL;
+
+ /* First with no extents */
+ BTRFS_I(inode)->root = root;
+ em = btrfs_get_extent(inode, NULL, 0, 0, 4096, 0);
+ if (IS_ERR(em)) {
+ em = NULL;
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ test_msg("Vacancy flag wasn't set properly\n");
+ goto out;
+ }
+ free_extent_map(em);
+ btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
+
+ /*
+ * All of the magic numbers are based on the mapping setup in
+ * setup_file_extents, so if you change anything there you need to
+ * update the comment and update the expected values below.
+ */
+ setup_file_extents(root);
+
+ em = btrfs_get_extent(inode, NULL, 0, 0, (u64)-1, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 0 || em->len != 5) {
+ test_msg("Unexpected extent wanted start 0 len 5, got start "
+ "%llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_INLINE) {
+ test_msg("Expected an inline, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4091) {
+ test_msg("Unexpected extent wanted start %llu len 1, got start "
+ "%llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ /*
+ * We don't test anything else for inline since it doesn't get set
+ * unless we have a page for it to write into. Maybe we should change
+ * this?
+ */
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4) {
+ test_msg("Unexpected extent wanted start %llu len 4, got start "
+ "%llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Regular extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4095) {
+ test_msg("Unexpected extent wanted start %llu len 4095, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* The next 3 are split extents */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ orig_start, em->orig_start);
+ goto out;
+ }
+ disk_bytenr += (em->start - orig_start);
+ if (em->block_start != disk_bytenr) {
+ test_msg("Wrong block start, want %llu, have %llu\n",
+ disk_bytenr, em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Prealloc extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* The next 3 are a half written prealloc extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_HOLE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Unexpected orig offset, wanted %llu, have %llu\n",
+ orig_start, em->orig_start);
+ goto out;
+ }
+ if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
+ test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ disk_bytenr + (em->start - em->orig_start),
+ em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != prealloc_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ prealloc_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", orig_start,
+ em->orig_start);
+ goto out;
+ }
+ if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
+ test_msg("Unexpected block start, wanted %llu, have %llu\n",
+ disk_bytenr + (em->start - em->orig_start),
+ em->block_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Now for the compressed extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, em->orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* Split compressed extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, em->orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ disk_bytenr = em->block_start;
+ orig_start = em->start;
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != disk_bytenr) {
+ test_msg("Block start does not match, want %llu got %llu\n",
+ disk_bytenr, em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 8192) {
+ test_msg("Unexpected extent wanted start %llu len 8192, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != compressed_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ compressed_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != orig_start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n",
+ em->start, orig_start);
+ goto out;
+ }
+ if (em->compress_type != BTRFS_COMPRESS_ZLIB) {
+ test_msg("Unexpected compress type, wanted %d, got %d\n",
+ BTRFS_COMPRESS_ZLIB, em->compress_type);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ /* A hole between regular extents but no hole extent */
+ em = btrfs_get_extent(inode, NULL, 0, offset + 6, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096 * 1024, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ /*
+ * Currently we just return a length that we requested rather than the
+ * length of the actual hole, if this changes we'll have to change this
+ * test.
+ */
+ if (em->start != offset || em->len != 12288) {
+ test_msg("Unexpected extent wanted start %llu len 12288, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != vacancy_only) {
+ test_msg("Unexpected flags set, want %lu have %lu\n",
+ vacancy_only, em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ offset = em->start + em->len;
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, offset, 4096, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != offset || em->len != 4096) {
+ test_msg("Unexpected extent wanted start %llu len 4096, got "
+ "start %llu len %llu\n", offset, em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, want 0 have %lu\n", em->flags);
+ goto out;
+ }
+ if (em->orig_start != em->start) {
+ test_msg("Wrong orig offset, want %llu, have %llu\n", em->start,
+ em->orig_start);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!IS_ERR(em))
+ free_extent_map(em);
+ iput(inode);
+ free_dummy_root(root);
+ return ret;
+}
+
+static int test_hole_first(void)
+{
+ struct inode *inode = NULL;
+ struct btrfs_root *root = NULL;
+ struct extent_map *em = NULL;
+ int ret = -ENOMEM;
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_msg("Couldn't allocate inode\n");
+ return ret;
+ }
+
+ BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
+ BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ BTRFS_I(inode)->location.offset = 0;
+
+ root = btrfs_alloc_dummy_root();
+ if (IS_ERR(root)) {
+ test_msg("Couldn't allocate root\n");
+ goto out;
+ }
+
+ root->fs_info = alloc_dummy_fs_info();
+ if (!root->fs_info) {
+ test_msg("Couldn't allocate dummy fs info\n");
+ goto out;
+ }
+
+ root->node = alloc_dummy_extent_buffer(0, 4096);
+ if (!root->node) {
+ test_msg("Couldn't allocate dummy buffer\n");
+ goto out;
+ }
+
+ extent_buffer_get(root->node);
+ btrfs_set_header_nritems(root->node, 0);
+ btrfs_set_header_level(root->node, 0);
+ BTRFS_I(inode)->root = root;
+ ret = -EINVAL;
+
+ /*
+ * Need a blank inode item here just so we don't confuse
+ * btrfs_get_extent.
+ */
+ insert_inode_item_key(root);
+ insert_extent(root, 4096, 4096, 4096, 0, 4096, 4096,
+ BTRFS_FILE_EXTENT_REG, 0, 1);
+ em = btrfs_get_extent(inode, NULL, 0, 0, 8192, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != EXTENT_MAP_HOLE) {
+ test_msg("Expected a hole, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 0 || em->len != 4096) {
+ test_msg("Unexpected extent wanted start 0 len 4096, got start "
+ "%llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != vacancy_only) {
+ test_msg("Wrong flags, wanted %lu, have %lu\n", vacancy_only,
+ em->flags);
+ goto out;
+ }
+ free_extent_map(em);
+
+ em = btrfs_get_extent(inode, NULL, 0, 4096, 8192, 0);
+ if (IS_ERR(em)) {
+ test_msg("Got an error when we shouldn't have\n");
+ goto out;
+ }
+ if (em->block_start != 4096) {
+ test_msg("Expected a real extent, got %llu\n", em->block_start);
+ goto out;
+ }
+ if (em->start != 4096 || em->len != 4096) {
+ test_msg("Unexpected extent wanted start 4096 len 4096, got "
+ "start %llu len %llu\n", em->start, em->len);
+ goto out;
+ }
+ if (em->flags != 0) {
+ test_msg("Unexpected flags set, wanted 0 got %lu\n",
+ em->flags);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (!IS_ERR(em))
+ free_extent_map(em);
+ iput(inode);
+ free_dummy_root(root);
+ return ret;
+}
+
+int btrfs_test_inodes(void)
+{
+ int ret;
+
+ set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
+ set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
+ set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
+
+ test_msg("Running btrfs_get_extent tests\n");
+ ret = test_btrfs_get_extent();
+ if (ret)
+ return ret;
+ test_msg("Running hole first btrfs_get_extent test\n");
+ return test_hole_first();
+}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index af1931a..57c16b4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -57,7 +57,7 @@ static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
__TRANS_JOIN_NOLOCK),
};
-static void put_transaction(struct btrfs_transaction *transaction)
+void btrfs_put_transaction(struct btrfs_transaction *transaction)
{
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
@@ -332,7 +332,7 @@ static void wait_current_trans(struct btrfs_root *root)
wait_event(root->fs_info->transaction_wait,
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
cur_trans->aborted);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -353,6 +353,17 @@ static int may_wait_transaction(struct btrfs_root *root, int type)
return 0;
}
+static inline bool need_reserve_reloc_root(struct btrfs_root *root)
+{
+ if (!root->fs_info->reloc_ctl ||
+ !root->ref_cows ||
+ root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ root->reloc_root)
+ return false;
+
+ return true;
+}
+
static struct btrfs_trans_handle *
start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
enum btrfs_reserve_flush_enum flush)
@@ -360,8 +371,9 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
- int ret;
u64 qgroup_reserved = 0;
+ bool reloc_reserved = false;
+ int ret;
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return ERR_PTR(-EROFS);
@@ -390,6 +402,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ /*
+ * Do the reservation for the relocation root creation
+ */
+ if (unlikely(need_reserve_reloc_root(root))) {
+ num_bytes += root->nodesize;
+ reloc_reserved = true;
+ }
+
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
num_bytes, flush);
@@ -451,6 +471,7 @@ again:
h->delayed_ref_elem.seq = 0;
h->type = type;
h->allocating_chunk = false;
+ h->reloc_reserved = false;
INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs);
@@ -466,6 +487,7 @@ again:
h->transid, num_bytes, 1);
h->block_rsv = &root->fs_info->trans_block_rsv;
h->bytes_reserved = num_bytes;
+ h->reloc_reserved = reloc_reserved;
}
h->qgroup_reserved = qgroup_reserved;
@@ -610,7 +632,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
}
wait_for_commit(root, cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
out:
return ret;
}
@@ -735,7 +757,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
smp_mb();
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
if (current->journal_info == trans)
current->journal_info = NULL;
@@ -744,8 +766,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_run_delayed_iputs(root);
if (trans->aborted ||
- test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
+ test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
+ wake_up_process(info->transaction_kthread);
err = -EIO;
+ }
assert_qgroups_uptodate(trans);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
@@ -837,7 +861,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
* them in one of two extent_io trees. This is used to make sure all of
* those extents are on disk for transaction or log commit
*/
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark)
{
int ret;
@@ -948,16 +972,19 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info);
- WARN_ON(ret);
+ if (ret)
+ return ret;
ret = btrfs_run_dev_replace(trans, root->fs_info);
- WARN_ON(ret);
-
+ if (ret)
+ return ret;
ret = btrfs_run_qgroups(trans, root->fs_info);
- BUG_ON(ret);
+ if (ret)
+ return ret;
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
- BUG_ON(ret);
+ if (ret)
+ return ret;
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
@@ -1225,8 +1252,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_set_root_stransid(new_root_item, 0);
btrfs_set_root_rtransid(new_root_item, 0);
}
- new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
- new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
+ btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
+ btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
old = btrfs_lock_root_node(root);
@@ -1311,8 +1338,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
dentry->d_name.len * 2);
parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
- if (ret)
+ if (ret) {
btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+ ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
+ BTRFS_UUID_KEY_SUBVOL, objectid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+ if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
+ ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+ new_root_item->received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ objectid);
+ if (ret && ret != -EEXIST) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto fail;
+ }
+ }
fail:
pending->error = ret;
dir_item_existed:
@@ -1362,6 +1407,8 @@ static void update_super_roots(struct btrfs_root *root)
super->root_level = root_item->level;
if (btrfs_test_opt(root, SPACE_CACHE))
super->cache_generation = root_item->generation;
+ if (root->fs_info->update_uuid_tree_gen)
+ super->uuid_tree_generation = root_item->generation;
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -1490,7 +1537,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
if (current->journal_info == trans)
current->journal_info = NULL;
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
return 0;
}
@@ -1532,8 +1579,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
- put_transaction(cur_trans);
- put_transaction(cur_trans);
+ if (trans->type & __TRANS_FREEZABLE)
+ sb_end_intwrite(root->fs_info->sb);
+ btrfs_put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
trace_btrfs_transaction_commit(root);
@@ -1551,15 +1600,19 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- return ret;
-
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
- btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ if (ret) {
+ btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ return ret;
+ }
+
+ ret = btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
+ if (ret)
+ return ret;
/*
* rename don't use btrfs_join_transaction, so, once we
@@ -1576,14 +1629,14 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- return btrfs_start_all_delalloc_inodes(fs_info, 1);
+ return btrfs_start_delalloc_roots(fs_info, 1);
return 0;
}
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
- btrfs_wait_all_ordered_extents(fs_info, 1);
+ btrfs_wait_ordered_roots(fs_info, -1);
}
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
@@ -1649,7 +1702,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
return ret;
}
@@ -1666,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
wait_for_commit(root, prev_trans);
- put_transaction(prev_trans);
+ btrfs_put_transaction(prev_trans);
} else {
spin_unlock(&root->fs_info->trans_lock);
}
@@ -1818,11 +1871,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
assert_qgroups_uptodate(trans);
update_super_roots(root);
- if (!root->fs_info->log_root_recovering) {
- btrfs_set_super_log_root(root->fs_info->super_copy, 0);
- btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
- }
-
+ btrfs_set_super_log_root(root->fs_info->super_copy, 0);
+ btrfs_set_super_log_root_level(root->fs_info->super_copy, 0);
memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy,
sizeof(*root->fs_info->super_copy));
@@ -1868,8 +1918,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_del_init(&cur_trans->list);
spin_unlock(&root->fs_info->trans_lock);
- put_transaction(cur_trans);
- put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
+ btrfs_put_transaction(cur_trans);
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(root->fs_info->sb);
@@ -1928,8 +1978,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
list_del_init(&root->root_list);
spin_unlock(&fs_info->trans_lock);
- pr_debug("btrfs: cleaner removing %llu\n",
- (unsigned long long)root->objectid);
+ pr_debug("btrfs: cleaner removing %llu\n", root->objectid);
btrfs_kill_all_delayed_nodes(root);
@@ -1942,6 +1991,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
* If we encounter a transaction abort during snapshot cleaning, we
* don't want to crash here
*/
- BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
- return 1;
+ return (ret < 0) ? 0 : 1;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index defbc42..7657d11 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -92,6 +92,7 @@ struct btrfs_trans_handle {
short aborted;
short adding_csums;
bool allocating_chunk;
+ bool reloc_reserved;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
@@ -160,12 +161,11 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
void btrfs_throttle(struct btrfs_root *root);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages, int mark);
int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
+void btrfs_put_transaction(struct btrfs_transaction *transaction);
#endif
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 94e05c1..76928ca 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -37,7 +37,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
int ret = 0;
int wret;
int level;
- int is_extent = 0;
int next_key_ret = 0;
u64 last_ret = 0;
u64 min_trans = 0;
@@ -50,7 +49,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
goto out;
}
- if (root->ref_cows == 0 && !is_extent)
+ if (root->ref_cows == 0)
goto out;
if (btrfs_test_opt(root, SSD))
@@ -85,7 +84,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
+ ret = btrfs_search_forward(root, &key, path, min_trans);
if (ret < 0)
goto out;
if (ret > 0) {
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ff60d89..744553c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,7 +26,6 @@
#include "locking.h"
#include "print-tree.h"
#include "backref.h"
-#include "compat.h"
#include "tree-log.h"
#include "hash.h"
@@ -93,7 +92,8 @@
*/
#define LOG_WALK_PIN_ONLY 0
#define LOG_WALK_REPLAY_INODES 1
-#define LOG_WALK_REPLAY_ALL 2
+#define LOG_WALK_REPLAY_DIR_INDEX 2
+#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
@@ -393,6 +393,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
if (inode_item) {
struct btrfs_inode_item *item;
u64 nbytes;
+ u32 mode;
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
@@ -400,9 +401,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
item = btrfs_item_ptr(eb, slot,
struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, nbytes);
+
+ /*
+ * If this is a directory we need to reset the i_size to
+ * 0 so that we can set it up properly when replaying
+ * the rest of the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
}
} else if (inode_item) {
struct btrfs_inode_item *item;
+ u32 mode;
/*
* New inode, set nbytes to 0 so that the nbytes comes out
@@ -410,6 +421,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
*/
item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
btrfs_set_inode_nbytes(eb, item, 0);
+
+ /*
+ * If this is a directory we need to reset the i_size to 0 so
+ * that we can set it up properly when replaying the rest of
+ * the items in this log.
+ */
+ mode = btrfs_inode_mode(eb, item);
+ if (S_ISDIR(mode))
+ btrfs_set_inode_size(eb, item, 0);
}
insert:
btrfs_release_path(path);
@@ -747,7 +767,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
if (ret)
goto out;
- btrfs_run_delayed_items(trans, root);
+ else
+ ret = btrfs_run_delayed_items(trans, root);
out:
kfree(name);
iput(inode);
@@ -914,7 +935,7 @@ again:
parent_objectid,
victim_name,
victim_name_len)) {
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root, dir,
@@ -923,7 +944,9 @@ again:
kfree(victim_name);
if (ret)
return ret;
- btrfs_run_delayed_items(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
+ if (ret)
+ return ret;
*search_done = 1;
goto again;
}
@@ -982,7 +1005,7 @@ again:
victim_parent = read_one_inode(root,
parent_objectid);
if (victim_parent) {
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
btrfs_release_path(path);
ret = btrfs_unlink_inode(trans, root,
@@ -990,7 +1013,9 @@ again:
inode,
victim_name,
victim_name_len);
- btrfs_run_delayed_items(trans, root);
+ if (!ret)
+ ret = btrfs_run_delayed_items(
+ trans, root);
}
iput(victim_parent);
kfree(victim_name);
@@ -1087,11 +1112,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct inode *dir;
- struct inode *inode;
+ struct inode *dir = NULL;
+ struct inode *inode = NULL;
unsigned long ref_ptr;
unsigned long ref_end;
- char *name;
+ char *name = NULL;
int namelen;
int ret;
int search_done = 0;
@@ -1124,13 +1149,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
* care of the rest
*/
dir = read_one_inode(root, parent_objectid);
- if (!dir)
- return -ENOENT;
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
inode = read_one_inode(root, inode_objectid);
if (!inode) {
- iput(dir);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
while (ref_ptr < ref_end) {
@@ -1143,14 +1170,16 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
*/
if (!dir)
dir = read_one_inode(root, parent_objectid);
- if (!dir)
- return -ENOENT;
+ if (!dir) {
+ ret = -ENOENT;
+ goto out;
+ }
} else {
ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
&ref_index);
}
if (ret)
- return ret;
+ goto out;
/* if we already have a perfect match, we're done */
if (!inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode),
@@ -1170,12 +1199,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
parent_objectid,
ref_index, name, namelen,
&search_done);
- if (ret == 1) {
- ret = 0;
+ if (ret) {
+ if (ret == 1)
+ ret = 0;
goto out;
}
- if (ret)
- goto out;
}
/* insert our name */
@@ -1189,6 +1217,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
kfree(name);
+ name = NULL;
if (log_ref_ver) {
iput(dir);
dir = NULL;
@@ -1199,6 +1228,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, root, path, eb, slot, key);
out:
btrfs_release_path(path);
+ kfree(name);
iput(dir);
iput(inode);
return ret;
@@ -1281,6 +1311,7 @@ static int count_inode_refs(struct btrfs_root *root,
break;
path->slots[0]--;
}
+process_slot:
btrfs_item_key_to_cpu(path->nodes[0], &key,
path->slots[0]);
if (key.objectid != ino ||
@@ -1301,6 +1332,10 @@ static int count_inode_refs(struct btrfs_root *root,
if (key.offset == 0)
break;
+ if (path->slots[0] > 0) {
+ path->slots[0]--;
+ goto process_slot;
+ }
key.offset--;
btrfs_release_path(path);
}
@@ -1454,7 +1489,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
if (!inode->i_nlink)
set_nlink(inode, 1);
else
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
@@ -1491,6 +1526,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
iput(inode);
return -EIO;
}
+
ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
/* FIXME, put inode into FIXUP list */
@@ -1529,6 +1565,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
u8 log_type;
int exists;
int ret = 0;
+ bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
dir = read_one_inode(root, key->objectid);
if (!dir)
@@ -1536,8 +1573,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
- if (!name)
- return -ENOMEM;
+ if (!name) {
+ ret = -ENOMEM;
+ goto out;
+ }
log_type = btrfs_dir_type(eb, di);
read_extent_buffer(eb, name, (unsigned long)(di + 1),
@@ -1597,6 +1636,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto insert;
out:
btrfs_release_path(path);
+ if (!ret && update_size) {
+ btrfs_i_size_write(dir, dir->i_size + name_len * 2);
+ ret = btrfs_update_inode(trans, root, dir);
+ }
kfree(name);
iput(dir);
return ret;
@@ -1607,6 +1650,7 @@ insert:
name, name_len, log_type, &log_key);
if (ret && ret != -ENOENT)
goto out;
+ update_size = false;
ret = 0;
goto out;
}
@@ -1788,7 +1832,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (IS_ERR_OR_NULL(log_di)) {
+ if (!log_di || (IS_ERR(log_di) && PTR_ERR(log_di) == -ENOENT)) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -1806,11 +1850,11 @@ again:
goto out;
}
- btrfs_inc_nlink(inode);
+ inc_nlink(inode);
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
if (!ret)
- btrfs_run_delayed_items(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
kfree(name);
iput(inode);
if (ret)
@@ -1825,6 +1869,9 @@ again:
goto again;
ret = 0;
goto out;
+ } else if (IS_ERR(log_di)) {
+ kfree(name);
+ return PTR_ERR(log_di);
}
btrfs_release_path(log_path);
kfree(name);
@@ -2020,6 +2067,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (ret)
break;
}
+
+ if (key.type == BTRFS_DIR_INDEX_KEY &&
+ wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
+ ret = replay_one_dir_item(wc->trans, root, path,
+ eb, i, &key);
+ if (ret)
+ break;
+ }
+
if (wc->stage < LOG_WALK_REPLAY_ALL)
continue;
@@ -2041,8 +2097,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
eb, i, &key);
if (ret)
break;
- } else if (key.type == BTRFS_DIR_ITEM_KEY ||
- key.type == BTRFS_DIR_INDEX_KEY) {
+ } else if (key.type == BTRFS_DIR_ITEM_KEY) {
ret = replay_one_dir_item(wc->trans, root, path,
eb, i, &key);
if (ret)
@@ -2075,8 +2130,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
- if (btrfs_header_level(cur) != *level)
- WARN_ON(1);
+ WARN_ON(btrfs_header_level(cur) != *level);
if (path->slots[*level] >=
btrfs_header_nritems(cur))
@@ -2108,11 +2162,13 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
return ret;
}
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2184,11 +2240,13 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[*level];
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, root, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, root, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_and_pin_reserved_extent(root,
@@ -2258,11 +2316,13 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
next = path->nodes[orig_level];
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
- clean_tree_block(trans, log, next);
- btrfs_wait_tree_block_writeback(next);
- btrfs_tree_unlock(next);
+ if (trans) {
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ clean_tree_block(trans, log, next);
+ btrfs_wait_tree_block_writeback(next);
+ btrfs_tree_unlock(next);
+ }
WARN_ON(log->root_key.objectid !=
BTRFS_TREE_LOG_OBJECTID);
@@ -2528,9 +2588,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* the running transaction open, so a full commit can't hop
* in and cause problems either.
*/
- btrfs_scrub_pause_super(root);
ret = write_ctree_super(trans, root->fs_info->tree_root, 1);
- btrfs_scrub_continue_super(root);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out_wake_log_root;
@@ -2565,13 +2623,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
.process_func = process_one_buffer
};
- if (trans) {
- ret = walk_log_tree(trans, log, &wc);
-
- /* I don't think this can happen but just in case */
- if (ret)
- btrfs_abort_transaction(trans, log, ret);
- }
+ ret = walk_log_tree(trans, log, &wc);
+ /* I don't think this can happen but just in case */
+ if (ret)
+ btrfs_abort_transaction(trans, log, ret);
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
@@ -2824,7 +2879,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 min_offset, u64 *last_offset_ret)
{
struct btrfs_key min_key;
- struct btrfs_key max_key;
struct btrfs_root *log = root->log_root;
struct extent_buffer *src;
int err = 0;
@@ -2836,9 +2890,6 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
u64 ino = btrfs_ino(inode);
log = root->log_root;
- max_key.objectid = ino;
- max_key.offset = (u64)-1;
- max_key.type = key_type;
min_key.objectid = ino;
min_key.type = key_type;
@@ -2846,8 +2897,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
- ret = btrfs_search_forward(root, &min_key, &max_key,
- path, trans->transid);
+ ret = btrfs_search_forward(root, &min_key, path, trans->transid);
/*
* we didn't find anything from this transaction, see if there
@@ -2900,10 +2950,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
/* find the first key from this transaction again */
ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0);
- if (ret != 0) {
- WARN_ON(1);
+ if (WARN_ON(ret != 0))
goto done;
- }
/*
* we have a block from this transaction, log every item in it
@@ -3129,11 +3177,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode)
{
struct btrfs_inode_item *inode_item;
- struct btrfs_key key;
int ret;
- memcpy(&key, &BTRFS_I(inode)->location, sizeof(key));
- ret = btrfs_insert_empty_item(trans, log, path, &key,
+ ret = btrfs_insert_empty_item(trans, log, path,
+ &BTRFS_I(inode)->location,
sizeof(*inode_item));
if (ret && ret != -EEXIST)
return ret;
@@ -3332,7 +3379,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
@@ -3374,11 +3421,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (skip_csum)
return 0;
- if (em->compress_type) {
- csum_offset = 0;
- csum_len = block_len;
- }
-
/*
* First check and see if our csums are on our outstanding ordered
* extents.
@@ -3462,8 +3504,13 @@ unlocked:
if (!mod_len || ret)
return ret;
- csum_offset = mod_start - em->start;
- csum_len = mod_len;
+ if (em->compress_type) {
+ csum_offset = 0;
+ csum_len = block_len;
+ } else {
+ csum_offset = mod_start - em->start;
+ csum_len = mod_len;
+ }
/* block start is already adjusted for the file extent offset. */
ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
@@ -3676,7 +3723,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
while (1) {
ins_nr = 0;
- ret = btrfs_search_forward(root, &min_key, &max_key,
+ ret = btrfs_search_forward(root, &min_key,
path, trans->transid);
if (ret != 0)
break;
@@ -3726,14 +3773,14 @@ next_slot:
}
btrfs_release_path(path);
- if (min_key.offset < (u64)-1)
+ if (min_key.offset < (u64)-1) {
min_key.offset++;
- else if (min_key.type < (u8)-1)
+ } else if (min_key.type < max_key.type) {
min_key.type++;
- else if (min_key.objectid < (u64)-1)
- min_key.objectid++;
- else
+ min_key.offset = 0;
+ } else {
break;
+ }
}
if (ins_nr) {
ret = copy_items(trans, inode, dst_path, src, ins_start_slot,
@@ -3798,6 +3845,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
int ret = 0;
struct btrfs_root *root;
struct dentry *old_parent = NULL;
+ struct inode *orig_inode = inode;
/*
* for regular files, if its inode is already on disk, we don't
@@ -3817,7 +3865,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
}
while (1) {
- BTRFS_I(inode)->logged_trans = trans->transid;
+ /*
+ * If we are logging a directory then we start with our inode,
+ * not our parents inode, so we need to skipp setting the
+ * logged_trans so that further down in the log code we don't
+ * think this inode has already been logged.
+ */
+ if (inode != orig_inode)
+ BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
new file mode 100644
index 0000000..fbda900
--- /dev/null
+++ b/fs/btrfs/uuid-tree.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) STRATO AG 2013. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/uuid.h>
+#include <asm/unaligned.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+
+static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
+{
+ key->type = type;
+ key->objectid = get_unaligned_le64(uuid);
+ key->offset = get_unaligned_le64(uuid + sizeof(u64));
+}
+
+/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
+static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
+ u8 type, u64 subid)
+{
+ int ret;
+ struct btrfs_path *path = NULL;
+ struct extent_buffer *eb;
+ int slot;
+ u32 item_size;
+ unsigned long offset;
+ struct btrfs_key key;
+
+ if (WARN_ON_ONCE(!uuid_root)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ btrfs_uuid_to_key(uuid, type, &key);
+ ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ item_size = btrfs_item_size_nr(eb, slot);
+ offset = btrfs_item_ptr_offset(eb, slot);
+ ret = -ENOENT;
+
+ if (!IS_ALIGNED(item_size, sizeof(u64))) {
+ pr_warn("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ goto out;
+ }
+ while (item_size) {
+ __le64 data;
+
+ read_extent_buffer(eb, &data, offset, sizeof(data));
+ if (le64_to_cpu(data) == subid) {
+ ret = 0;
+ break;
+ }
+ offset += sizeof(data);
+ item_size -= sizeof(data);
+ }
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
+ struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ u64 subid_cpu)
+{
+ int ret;
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ int slot;
+ unsigned long offset;
+ __le64 subid_le;
+
+ ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
+ if (ret != -ENOENT)
+ return ret;
+
+ if (WARN_ON_ONCE(!uuid_root)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_uuid_to_key(uuid, type, &key);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
+ sizeof(subid_le));
+ if (ret >= 0) {
+ /* Add an item for the type for the first time */
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ offset = btrfs_item_ptr_offset(eb, slot);
+ } else if (ret == -EEXIST) {
+ /*
+ * An item with that type already exists.
+ * Extend the item and store the new subid at the end.
+ */
+ btrfs_extend_item(uuid_root, path, sizeof(subid_le));
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ offset = btrfs_item_ptr_offset(eb, slot);
+ offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
+ } else if (ret < 0) {
+ pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
+ ret, (unsigned long long)key.objectid,
+ (unsigned long long)key.offset, type);
+ goto out;
+ }
+
+ ret = 0;
+ subid_le = cpu_to_le64(subid_cpu);
+ write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
+ btrfs_mark_buffer_dirty(eb);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
+ struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ u64 subid)
+{
+ int ret;
+ struct btrfs_path *path = NULL;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ int slot;
+ unsigned long offset;
+ u32 item_size;
+ unsigned long move_dst;
+ unsigned long move_src;
+ unsigned long move_len;
+
+ if (WARN_ON_ONCE(!uuid_root)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ btrfs_uuid_to_key(uuid, type, &key);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
+ if (ret < 0) {
+ pr_warn("btrfs: error %d while searching for uuid item!\n",
+ ret);
+ goto out;
+ }
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ offset = btrfs_item_ptr_offset(eb, slot);
+ item_size = btrfs_item_size_nr(eb, slot);
+ if (!IS_ALIGNED(item_size, sizeof(u64))) {
+ pr_warn("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ ret = -ENOENT;
+ goto out;
+ }
+ while (item_size) {
+ __le64 read_subid;
+
+ read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
+ if (le64_to_cpu(read_subid) == subid)
+ break;
+ offset += sizeof(read_subid);
+ item_size -= sizeof(read_subid);
+ }
+
+ if (!item_size) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ item_size = btrfs_item_size_nr(eb, slot);
+ if (item_size == sizeof(subid)) {
+ ret = btrfs_del_item(trans, uuid_root, path);
+ goto out;
+ }
+
+ move_dst = offset;
+ move_src = offset + sizeof(subid);
+ move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
+ memmove_extent_buffer(eb, move_dst, move_src, move_len);
+ btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+ u64 subid)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ /* 1 - for the uuid item */
+ trans = btrfs_start_transaction(uuid_root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+
+ ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid);
+ btrfs_end_transaction(trans, uuid_root);
+
+out:
+ return ret;
+}
+
+int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
+ int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
+ u64))
+{
+ struct btrfs_root *root = fs_info->uuid_root;
+ struct btrfs_key key;
+ struct btrfs_path *path;
+ int ret = 0;
+ struct extent_buffer *leaf;
+ int slot;
+ u32 item_size;
+ unsigned long offset;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = 0;
+ key.type = 0;
+ key.offset = 0;
+
+again_search_slot:
+ path->keep_locks = 1;
+ ret = btrfs_search_forward(root, &key, path, 0);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ goto out;
+ }
+
+ while (1) {
+ cond_resched();
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.type != BTRFS_UUID_KEY_SUBVOL &&
+ key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+ goto skip;
+
+ offset = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(leaf, slot);
+ if (!IS_ALIGNED(item_size, sizeof(u64))) {
+ pr_warn("btrfs: uuid item with illegal size %lu!\n",
+ (unsigned long)item_size);
+ goto skip;
+ }
+ while (item_size) {
+ u8 uuid[BTRFS_UUID_SIZE];
+ __le64 subid_le;
+ u64 subid_cpu;
+
+ put_unaligned_le64(key.objectid, uuid);
+ put_unaligned_le64(key.offset, uuid + sizeof(u64));
+ read_extent_buffer(leaf, &subid_le, offset,
+ sizeof(subid_le));
+ subid_cpu = le64_to_cpu(subid_le);
+ ret = check_func(fs_info, uuid, key.type, subid_cpu);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ btrfs_release_path(path);
+ ret = btrfs_uuid_iter_rem(root, uuid, key.type,
+ subid_cpu);
+ if (ret == 0) {
+ /*
+ * this might look inefficient, but the
+ * justification is that it is an
+ * exception that check_func returns 1,
+ * and that in the regular case only one
+ * entry per UUID exists.
+ */
+ goto again_search_slot;
+ }
+ if (ret < 0 && ret != -ENOENT)
+ goto out;
+ }
+ item_size -= sizeof(subid_le);
+ offset += sizeof(subid_le);
+ }
+
+skip:
+ ret = btrfs_next_item(root, path);
+ if (ret == 0)
+ continue;
+ else if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+out:
+ btrfs_free_path(path);
+ if (ret)
+ pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret);
+ return 0;
+}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 78b8717..0db6370 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -26,8 +26,8 @@
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
+#include <linux/semaphore.h>
#include <asm/div64.h>
-#include "compat.h"
#include "ctree.h"
#include "extent_map.h"
#include "disk-io.h"
@@ -62,6 +62,48 @@ static void unlock_chunks(struct btrfs_root *root)
mutex_unlock(&root->fs_info->chunk_mutex);
}
+static struct btrfs_fs_devices *__alloc_fs_devices(void)
+{
+ struct btrfs_fs_devices *fs_devs;
+
+ fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+ if (!fs_devs)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&fs_devs->device_list_mutex);
+
+ INIT_LIST_HEAD(&fs_devs->devices);
+ INIT_LIST_HEAD(&fs_devs->alloc_list);
+ INIT_LIST_HEAD(&fs_devs->list);
+
+ return fs_devs;
+}
+
+/**
+ * alloc_fs_devices - allocate struct btrfs_fs_devices
+ * @fsid: a pointer to UUID for this FS. If NULL a new UUID is
+ * generated.
+ *
+ * Return: a pointer to a new &struct btrfs_fs_devices on success;
+ * ERR_PTR() on error. Returned struct is not linked onto any lists and
+ * can be destroyed with kfree() right away.
+ */
+static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
+{
+ struct btrfs_fs_devices *fs_devs;
+
+ fs_devs = __alloc_fs_devices();
+ if (IS_ERR(fs_devs))
+ return fs_devs;
+
+ if (fsid)
+ memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+ else
+ generate_random_uuid(fs_devs->fsid);
+
+ return fs_devs;
+}
+
static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device;
@@ -101,6 +143,27 @@ void btrfs_cleanup_fs_uuids(void)
}
}
+static struct btrfs_device *__alloc_device(void)
+{
+ struct btrfs_device *dev;
+
+ dev = kzalloc(sizeof(*dev), GFP_NOFS);
+ if (!dev)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&dev->dev_list);
+ INIT_LIST_HEAD(&dev->dev_alloc_list);
+
+ spin_lock_init(&dev->io_lock);
+
+ spin_lock_init(&dev->reada_lock);
+ atomic_set(&dev->reada_in_flight, 0);
+ INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+ INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+
+ return dev;
+}
+
static noinline struct btrfs_device *__find_device(struct list_head *head,
u64 devid, u8 *uuid)
{
@@ -395,16 +458,14 @@ static noinline int device_list_add(const char *path,
fs_devices = find_fsid(disk_super->fsid);
if (!fs_devices) {
- fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!fs_devices)
- return -ENOMEM;
- INIT_LIST_HEAD(&fs_devices->devices);
- INIT_LIST_HEAD(&fs_devices->alloc_list);
+ fs_devices = alloc_fs_devices(disk_super->fsid);
+ if (IS_ERR(fs_devices))
+ return PTR_ERR(fs_devices);
+
list_add(&fs_devices->list, &fs_uuids);
- memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
fs_devices->latest_devid = devid;
fs_devices->latest_trans = found_transid;
- mutex_init(&fs_devices->device_list_mutex);
+
device = NULL;
} else {
device = __find_device(&fs_devices->devices, devid,
@@ -414,17 +475,12 @@ static noinline int device_list_add(const char *path,
if (fs_devices->opened)
return -EBUSY;
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device) {
+ device = btrfs_alloc_device(NULL, &devid,
+ disk_super->dev_item.uuid);
+ if (IS_ERR(device)) {
/* we can safely leave the fs_devices entry around */
- return -ENOMEM;
+ return PTR_ERR(device);
}
- device->devid = devid;
- device->dev_stats_valid = 0;
- device->work.func = pending_bios_fn;
- memcpy(device->uuid, disk_super->dev_item.uuid,
- BTRFS_UUID_SIZE);
- spin_lock_init(&device->io_lock);
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
@@ -432,22 +488,13 @@ static noinline int device_list_add(const char *path,
return -ENOMEM;
}
rcu_assign_pointer(device->name, name);
- INIT_LIST_HEAD(&device->dev_alloc_list);
-
- /* init readahead state */
- spin_lock_init(&device->reada_lock);
- device->reada_curr_zone = NULL;
- atomic_set(&device->reada_in_flight, 0);
- device->reada_next = 0;
- INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
- INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
mutex_lock(&fs_devices->device_list_mutex);
list_add_rcu(&device->dev_list, &fs_devices->devices);
+ fs_devices->num_devices++;
mutex_unlock(&fs_devices->device_list_mutex);
device->fs_devices = fs_devices;
- fs_devices->num_devices++;
} else if (!device->name || strcmp(device->name->str, path)) {
name = rcu_string_strdup(path, GFP_NOFS);
if (!name)
@@ -474,25 +521,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
struct btrfs_device *device;
struct btrfs_device *orig_dev;
- fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!fs_devices)
- return ERR_PTR(-ENOMEM);
+ fs_devices = alloc_fs_devices(orig->fsid);
+ if (IS_ERR(fs_devices))
+ return fs_devices;
- INIT_LIST_HEAD(&fs_devices->devices);
- INIT_LIST_HEAD(&fs_devices->alloc_list);
- INIT_LIST_HEAD(&fs_devices->list);
- mutex_init(&fs_devices->device_list_mutex);
fs_devices->latest_devid = orig->latest_devid;
fs_devices->latest_trans = orig->latest_trans;
fs_devices->total_devices = orig->total_devices;
- memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
/* We have held the volume lock, it is safe to get the devices. */
list_for_each_entry(orig_dev, &orig->devices, dev_list) {
struct rcu_string *name;
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device)
+ device = btrfs_alloc_device(NULL, &orig_dev->devid,
+ orig_dev->uuid);
+ if (IS_ERR(device))
goto error;
/*
@@ -506,13 +549,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
}
rcu_assign_pointer(device->name, name);
- device->devid = orig_dev->devid;
- device->work.func = pending_bios_fn;
- memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
- spin_lock_init(&device->io_lock);
- INIT_LIST_HEAD(&device->dev_list);
- INIT_LIST_HEAD(&device->dev_alloc_list);
-
list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
fs_devices->num_devices++;
@@ -629,30 +665,30 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev)
fs_devices->open_devices--;
- if (device->writeable && !device->is_tgtdev_for_dev_replace) {
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--;
}
if (device->can_discard)
fs_devices->num_can_discard--;
+ if (device->missing)
+ fs_devices->missing_devices--;
- new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
- BUG_ON(!new_device); /* -ENOMEM */
- memcpy(new_device, device, sizeof(*new_device));
+ new_device = btrfs_alloc_device(NULL, &device->devid,
+ device->uuid);
+ BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
/* Safe because we are under uuid_mutex */
if (device->name) {
name = rcu_string_strdup(device->name->str, GFP_NOFS);
- BUG_ON(device->name && !name); /* -ENOMEM */
+ BUG_ON(!name); /* -ENOMEM */
rcu_assign_pointer(new_device->name, name);
}
- new_device->bdev = NULL;
- new_device->writeable = 0;
- new_device->in_fs_metadata = 0;
- new_device->can_discard = 0;
- spin_lock_init(&new_device->io_lock);
+
list_replace_rcu(&device->dev_list, &new_device->dev_list);
+ new_device->fs_devices = device->fs_devices;
call_rcu(&device->rcu, free_device);
}
@@ -760,7 +796,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->rotating = 1;
fs_devices->open_devices++;
- if (device->writeable && !device->is_tgtdev_for_dev_replace) {
+ if (device->writeable &&
+ device->devid != BTRFS_DEV_REPLACE_DEVID) {
fs_devices->rw_devices++;
list_add(&device->dev_alloc_list,
&fs_devices->alloc_list);
@@ -865,7 +902,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
if (btrfs_super_bytenr(disk_super) != bytenr ||
- disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
+ btrfs_super_magic(disk_super) != BTRFS_MAGIC)
goto error_unmap;
devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -875,13 +912,12 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
if (disk_super->label[0]) {
if (disk_super->label[BTRFS_LABEL_SIZE - 1])
disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
- printk(KERN_INFO "device label %s ", disk_super->label);
+ printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
} else {
- printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
+ printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
}
- printk(KERN_CONT "devid %llu transid %llu %s\n",
- (unsigned long long)devid, (unsigned long long)transid, path);
+ printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
if (!ret && fs_devices_ret)
@@ -1278,8 +1314,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
- BTRFS_UUID_SIZE);
+ btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
btrfs_mark_buffer_dirty(leaf);
@@ -1307,15 +1342,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
return ret;
}
-static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
+static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
+ u64 *devid_ret)
{
int ret;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
- root = root->fs_info->chunk_root;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -1324,20 +1358,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
key.type = BTRFS_DEV_ITEM_KEY;
key.offset = (u64)-1;
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
if (ret < 0)
goto error;
BUG_ON(ret == 0); /* Corruption */
- ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+ ret = btrfs_previous_item(fs_info->chunk_root, path,
+ BTRFS_DEV_ITEMS_OBJECTID,
BTRFS_DEV_ITEM_KEY);
if (ret) {
- *objectid = 1;
+ *devid_ret = 1;
} else {
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
- *objectid = found_key.offset + 1;
+ *devid_ret = found_key.offset + 1;
}
ret = 0;
error:
@@ -1391,9 +1426,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
btrfs_set_device_bandwidth(leaf, dev_item, 0);
btrfs_set_device_start_offset(leaf, dev_item, 0);
- ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ ptr = btrfs_device_uuid(dev_item);
write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
- ptr = (unsigned long)btrfs_device_fsid(dev_item);
+ ptr = btrfs_device_fsid(dev_item);
write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
@@ -1562,7 +1597,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
clear_super = true;
}
+ mutex_unlock(&uuid_mutex);
ret = btrfs_shrink_device(device, 0);
+ mutex_lock(&uuid_mutex);
if (ret)
goto error_undo;
@@ -1586,7 +1623,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
/*
* the device list mutex makes sure that we don't change
* the device list while someone else is writing out all
- * the device supers.
+ * the device supers. Whoever is writing all supers, should
+ * lock the device list mutex before getting the number of
+ * devices in the super block (super_copy). Conversely,
+ * whoever updates the number of devices in the super block
+ * (super_copy) should hold the device list mutex.
*/
cur_devices = device->fs_devices;
@@ -1610,10 +1651,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
device->fs_devices->open_devices--;
call_rcu(&device->rcu, free_device);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
if (cur_devices->open_devices == 0) {
struct btrfs_fs_devices *fs_devices;
@@ -1675,6 +1716,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev)
{
WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
+
list_del_rcu(&srcdev->dev_list);
list_del_rcu(&srcdev->dev_alloc_list);
fs_info->fs_devices->num_devices--;
@@ -1684,9 +1726,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
}
if (srcdev->can_discard)
fs_info->fs_devices->num_can_discard--;
- if (srcdev->bdev)
+ if (srcdev->bdev) {
fs_info->fs_devices->open_devices--;
+ /* zero out the old super */
+ btrfs_scratch_superblock(srcdev);
+ }
+
call_rcu(&srcdev->rcu, free_device);
}
@@ -1793,9 +1839,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
if (!fs_devices->seeding)
return -EINVAL;
- seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
- if (!seed_devices)
- return -ENOMEM;
+ seed_devices = __alloc_fs_devices();
+ if (IS_ERR(seed_devices))
+ return PTR_ERR(seed_devices);
old_devices = clone_fs_devices(fs_devices);
if (IS_ERR(old_devices)) {
@@ -1814,7 +1860,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
synchronize_rcu);
- mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1830,6 +1875,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
generate_random_uuid(fs_devices->fsid);
memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+ mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
super_flags = btrfs_super_flags(disk_super) &
~BTRFS_SUPER_FLAG_SEEDING;
btrfs_set_super_flags(disk_super, super_flags);
@@ -1889,11 +1936,9 @@ next_slot:
dev_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_dev_item);
devid = btrfs_device_id(leaf, dev_item);
- read_extent_buffer(leaf, dev_uuid,
- (unsigned long)btrfs_device_uuid(dev_item),
+ read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
BTRFS_UUID_SIZE);
- read_extent_buffer(leaf, fs_uuid,
- (unsigned long)btrfs_device_fsid(dev_item),
+ read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
BTRFS_UUID_SIZE);
device = btrfs_find_device(root->fs_info, devid, dev_uuid,
fs_uuid);
@@ -1956,10 +2001,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device) {
+ device = btrfs_alloc_device(root->fs_info, NULL, NULL);
+ if (IS_ERR(device)) {
/* we can safely leave the fs_devices entry around */
- ret = -ENOMEM;
+ ret = PTR_ERR(device);
goto error;
}
@@ -1971,13 +2016,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
}
rcu_assign_pointer(device->name, name);
- ret = find_next_devid(root, &device->devid);
- if (ret) {
- rcu_string_free(device->name);
- kfree(device);
- goto error;
- }
-
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
rcu_string_free(device->name);
@@ -1992,9 +2030,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
if (blk_queue_discard(q))
device->can_discard = 1;
device->writeable = 1;
- device->work.func = pending_bios_fn;
- generate_random_uuid(device->uuid);
- spin_lock_init(&device->io_lock);
device->generation = trans->transid;
device->io_width = root->sectorsize;
device->io_align = root->sectorsize;
@@ -2006,6 +2041,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 0;
device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
if (seeding_dev) {
@@ -2121,6 +2157,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head *devices;
struct rcu_string *name;
+ u64 devid = BTRFS_DEV_REPLACE_DEVID;
int ret = 0;
*device_out = NULL;
@@ -2142,9 +2179,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
}
}
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device) {
- ret = -ENOMEM;
+ device = btrfs_alloc_device(NULL, &devid, NULL);
+ if (IS_ERR(device)) {
+ ret = PTR_ERR(device);
goto error;
}
@@ -2161,10 +2198,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
device->can_discard = 1;
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
device->writeable = 1;
- device->work.func = pending_bios_fn;
- generate_random_uuid(device->uuid);
- device->devid = BTRFS_DEV_REPLACE_DEVID;
- spin_lock_init(&device->io_lock);
device->generation = 0;
device->io_width = root->sectorsize;
device->io_align = root->sectorsize;
@@ -2176,6 +2209,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
device->in_fs_metadata = 1;
device->is_tgtdev_for_dev_replace = 1;
device->mode = FMODE_EXCL;
+ device->dev_stats_valid = 1;
set_blocksize(device->bdev, 4096);
device->fs_devices = fs_info->fs_devices;
list_add(&device->dev_list, &fs_info->fs_devices->devices);
@@ -2518,8 +2552,7 @@ again:
failed = 0;
retried = true;
goto again;
- } else if (failed && retried) {
- WARN_ON(1);
+ } else if (WARN_ON(failed && retried)) {
ret = -ENOSPC;
}
error:
@@ -2971,10 +3004,6 @@ again:
if (found_key.objectid != key.objectid)
break;
- /* chunk zero is special */
- if (found_key.offset == 0)
- break;
-
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
if (!counting) {
@@ -3010,6 +3039,8 @@ again:
spin_unlock(&fs_info->balance_lock);
}
loop:
+ if (found_key.offset == 0)
+ break;
key.offset = found_key.offset - 1;
}
@@ -3074,9 +3105,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
- struct btrfs_ioctl_balance_args *bargs);
-
/*
* Should be called with both balance and volume mutexes held
*/
@@ -3139,7 +3167,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
(bctl->data.target & ~allowed))) {
printk(KERN_ERR "btrfs: unable to start balance with target "
"data profile %llu\n",
- (unsigned long long)bctl->data.target);
+ bctl->data.target);
ret = -EINVAL;
goto out;
}
@@ -3148,7 +3176,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
(bctl->meta.target & ~allowed))) {
printk(KERN_ERR "btrfs: unable to start balance with target "
"metadata profile %llu\n",
- (unsigned long long)bctl->meta.target);
+ bctl->meta.target);
ret = -EINVAL;
goto out;
}
@@ -3157,7 +3185,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
(bctl->sys.target & ~allowed))) {
printk(KERN_ERR "btrfs: unable to start balance with target "
"system profile %llu\n",
- (unsigned long long)bctl->sys.target);
+ bctl->sys.target);
ret = -EINVAL;
goto out;
}
@@ -3302,7 +3330,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
}
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
- return PTR_RET(tsk);
+ return PTR_ERR_OR_ZERO(tsk);
}
int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
@@ -3396,6 +3424,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
{
+ if (fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
mutex_lock(&fs_info->balance_mutex);
if (!fs_info->balance_ctl) {
mutex_unlock(&fs_info->balance_mutex);
@@ -3430,6 +3461,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
return 0;
}
+static int btrfs_uuid_scan_kthread(void *data)
+{
+ struct btrfs_fs_info *fs_info = data;
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_key key;
+ struct btrfs_key max_key;
+ struct btrfs_path *path = NULL;
+ int ret = 0;
+ struct extent_buffer *eb;
+ int slot;
+ struct btrfs_root_item root_item;
+ u32 item_size;
+ struct btrfs_trans_handle *trans = NULL;
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ key.objectid = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = 0;
+
+ max_key.objectid = (u64)-1;
+ max_key.type = BTRFS_ROOT_ITEM_KEY;
+ max_key.offset = (u64)-1;
+
+ path->keep_locks = 1;
+
+ while (1) {
+ ret = btrfs_search_forward(root, &key, path, 0);
+ if (ret) {
+ if (ret > 0)
+ ret = 0;
+ break;
+ }
+
+ if (key.type != BTRFS_ROOT_ITEM_KEY ||
+ (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
+ key.objectid != BTRFS_FS_TREE_OBJECTID) ||
+ key.objectid > BTRFS_LAST_FREE_OBJECTID)
+ goto skip;
+
+ eb = path->nodes[0];
+ slot = path->slots[0];
+ item_size = btrfs_item_size_nr(eb, slot);
+ if (item_size < sizeof(root_item))
+ goto skip;
+
+ read_extent_buffer(eb, &root_item,
+ btrfs_item_ptr_offset(eb, slot),
+ (int)sizeof(root_item));
+ if (btrfs_root_refs(&root_item) == 0)
+ goto skip;
+
+ if (!btrfs_is_empty_uuid(root_item.uuid) ||
+ !btrfs_is_empty_uuid(root_item.received_uuid)) {
+ if (trans)
+ goto update_tree;
+
+ btrfs_release_path(path);
+ /*
+ * 1 - subvol uuid item
+ * 1 - received_subvol uuid item
+ */
+ trans = btrfs_start_transaction(fs_info->uuid_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ continue;
+ } else {
+ goto skip;
+ }
+update_tree:
+ if (!btrfs_is_empty_uuid(root_item.uuid)) {
+ ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+ root_item.uuid,
+ BTRFS_UUID_KEY_SUBVOL,
+ key.objectid);
+ if (ret < 0) {
+ pr_warn("btrfs: uuid_tree_add failed %d\n",
+ ret);
+ break;
+ }
+ }
+
+ if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+ root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ key.objectid);
+ if (ret < 0) {
+ pr_warn("btrfs: uuid_tree_add failed %d\n",
+ ret);
+ break;
+ }
+ }
+
+skip:
+ if (trans) {
+ ret = btrfs_end_transaction(trans, fs_info->uuid_root);
+ trans = NULL;
+ if (ret)
+ break;
+ }
+
+ btrfs_release_path(path);
+ if (key.offset < (u64)-1) {
+ key.offset++;
+ } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
+ key.offset = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ } else if (key.objectid < (u64)-1) {
+ key.offset = 0;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.objectid++;
+ } else {
+ break;
+ }
+ cond_resched();
+ }
+
+out:
+ btrfs_free_path(path);
+ if (trans && !IS_ERR(trans))
+ btrfs_end_transaction(trans, fs_info->uuid_root);
+ if (ret)
+ pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+ else
+ fs_info->update_uuid_tree_gen = 1;
+ up(&fs_info->uuid_tree_rescan_sem);
+ return 0;
+}
+
+/*
+ * Callback for btrfs_uuid_tree_iterate().
+ * returns:
+ * 0 check succeeded, the entry is not outdated.
+ * < 0 if an error occured.
+ * > 0 if the check failed, which means the caller shall remove the entry.
+ */
+static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
+ u8 *uuid, u8 type, u64 subid)
+{
+ struct btrfs_key key;
+ int ret = 0;
+ struct btrfs_root *subvol_root;
+
+ if (type != BTRFS_UUID_KEY_SUBVOL &&
+ type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+ goto out;
+
+ key.objectid = subid;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(subvol_root)) {
+ ret = PTR_ERR(subvol_root);
+ if (ret == -ENOENT)
+ ret = 1;
+ goto out;
+ }
+
+ switch (type) {
+ case BTRFS_UUID_KEY_SUBVOL:
+ if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
+ ret = 1;
+ break;
+ case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+ if (memcmp(uuid, subvol_root->root_item.received_uuid,
+ BTRFS_UUID_SIZE))
+ ret = 1;
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int btrfs_uuid_rescan_kthread(void *data)
+{
+ struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
+ int ret;
+
+ /*
+ * 1st step is to iterate through the existing UUID tree and
+ * to delete all entries that contain outdated data.
+ * 2nd step is to add all missing entries to the UUID tree.
+ */
+ ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
+ if (ret < 0) {
+ pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+ up(&fs_info->uuid_tree_rescan_sem);
+ return ret;
+ }
+ return btrfs_uuid_scan_kthread(data);
+}
+
+int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *uuid_root;
+ struct task_struct *task;
+ int ret;
+
+ /*
+ * 1 - root node
+ * 1 - root item
+ */
+ trans = btrfs_start_transaction(tree_root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ uuid_root = btrfs_create_tree(trans, fs_info,
+ BTRFS_UUID_TREE_OBJECTID);
+ if (IS_ERR(uuid_root)) {
+ btrfs_abort_transaction(trans, tree_root,
+ PTR_ERR(uuid_root));
+ return PTR_ERR(uuid_root);
+ }
+
+ fs_info->uuid_root = uuid_root;
+
+ ret = btrfs_commit_transaction(trans, tree_root);
+ if (ret)
+ return ret;
+
+ down(&fs_info->uuid_tree_rescan_sem);
+ task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
+ if (IS_ERR(task)) {
+ /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+ pr_warn("btrfs: failed to start uuid_scan task\n");
+ up(&fs_info->uuid_tree_rescan_sem);
+ return PTR_ERR(task);
+ }
+
+ return 0;
+}
+
+int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+ struct task_struct *task;
+
+ down(&fs_info->uuid_tree_rescan_sem);
+ task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
+ if (IS_ERR(task)) {
+ /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+ pr_warn("btrfs: failed to start uuid_rescan task\n");
+ up(&fs_info->uuid_tree_rescan_sem);
+ return PTR_ERR(task);
+ }
+
+ return 0;
+}
+
/*
* shrinking a device means finding all of the device extents past
* the new size, and then following the back refs to the chunks.
@@ -4194,15 +4483,16 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
* and exit, so return 1 so the callers don't try to use other copies.
*/
if (!em) {
- btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+ btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
logical+len);
return 1;
}
if (em->start > logical || em->start + em->len < logical) {
- btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+ btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4375,8 +4665,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!em) {
btrfs_crit(fs_info, "unable to find logical %llu len %llu",
- (unsigned long long)logical,
- (unsigned long long)*length);
+ logical, *length);
return -EINVAL;
}
@@ -4384,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}
@@ -4611,7 +4901,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
num_stripes = map->num_stripes;
max_errors = nr_parity_stripes(map);
- raid_map = kmalloc(sizeof(u64) * num_stripes,
+ raid_map = kmalloc_array(num_stripes, sizeof(u64),
GFP_NOFS);
if (!raid_map) {
ret = -ENOMEM;
@@ -4671,6 +4961,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
}
bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
if (!bbio) {
+ kfree(raid_map);
ret = -ENOMEM;
goto out;
}
@@ -5110,10 +5401,8 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
.bi_rw = bio->bi_rw,
};
- if (bio->bi_vcnt == 0) {
- WARN_ON(1);
+ if (WARN_ON(bio->bi_vcnt == 0))
return 1;
- }
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (bio_sectors(bio) > max_sectors)
@@ -5246,9 +5535,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
if (map_length < length) {
btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
- (unsigned long long)logical,
- (unsigned long long)length,
- (unsigned long long)map_length);
+ logical, length, map_length);
BUG();
}
@@ -5314,23 +5601,70 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
struct btrfs_device *device;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
- device = kzalloc(sizeof(*device), GFP_NOFS);
- if (!device)
+ device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+ if (IS_ERR(device))
return NULL;
- list_add(&device->dev_list,
- &fs_devices->devices);
- device->devid = devid;
- device->work.func = pending_bios_fn;
+
+ list_add(&device->dev_list, &fs_devices->devices);
device->fs_devices = fs_devices;
- device->missing = 1;
fs_devices->num_devices++;
+
+ device->missing = 1;
fs_devices->missing_devices++;
- spin_lock_init(&device->io_lock);
- INIT_LIST_HEAD(&device->dev_alloc_list);
- memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
+
return device;
}
+/**
+ * btrfs_alloc_device - allocate struct btrfs_device
+ * @fs_info: used only for generating a new devid, can be NULL if
+ * devid is provided (i.e. @devid != NULL).
+ * @devid: a pointer to devid for this device. If NULL a new devid
+ * is generated.
+ * @uuid: a pointer to UUID for this device. If NULL a new UUID
+ * is generated.
+ *
+ * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
+ * on error. Returned struct is not linked onto any lists and can be
+ * destroyed with kfree() right away.
+ */
+struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+ const u64 *devid,
+ const u8 *uuid)
+{
+ struct btrfs_device *dev;
+ u64 tmp;
+
+ if (WARN_ON(!devid && !fs_info))
+ return ERR_PTR(-EINVAL);
+
+ dev = __alloc_device();
+ if (IS_ERR(dev))
+ return dev;
+
+ if (devid)
+ tmp = *devid;
+ else {
+ int ret;
+
+ ret = find_next_devid(fs_info, &tmp);
+ if (ret) {
+ kfree(dev);
+ return ERR_PTR(ret);
+ }
+ }
+ dev->devid = tmp;
+
+ if (uuid)
+ memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
+ else
+ generate_random_uuid(dev->uuid);
+
+ dev->work.func = pending_bios_fn;
+
+ return dev;
+}
+
static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
struct extent_buffer *leaf,
struct btrfs_chunk *chunk)
@@ -5437,7 +5771,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
device->is_tgtdev_for_dev_replace = 0;
- ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ ptr = btrfs_device_uuid(dev_item);
read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
}
@@ -5500,11 +5834,9 @@ static int read_one_dev(struct btrfs_root *root,
u8 dev_uuid[BTRFS_UUID_SIZE];
devid = btrfs_device_id(leaf, dev_item);
- read_extent_buffer(leaf, dev_uuid,
- (unsigned long)btrfs_device_uuid(dev_item),
+ read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
BTRFS_UUID_SIZE);
- read_extent_buffer(leaf, fs_uuid,
- (unsigned long)btrfs_device_fsid(dev_item),
+ read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
BTRFS_UUID_SIZE);
if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@ -5519,8 +5851,7 @@ static int read_one_dev(struct btrfs_root *root,
return -EIO;
if (!device) {
- btrfs_warn(root->fs_info, "devid %llu missing",
- (unsigned long long)devid);
+ btrfs_warn(root->fs_info, "devid %llu missing", devid);
device = add_missing_dev(root, devid, dev_uuid);
if (!device)
return -ENOMEM;
@@ -5644,14 +5975,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
mutex_lock(&uuid_mutex);
lock_chunks(root);
- /* first we search for all of the device items, and then we
- * read in all of the chunk items. This way we can create chunk
- * mappings that reference all of the devices that are afound
+ /*
+ * Read all device items, and then all the chunk items. All
+ * device items are found before any chunk item (their object id
+ * is smaller than the lowest possible object id for a chunk
+ * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
*/
key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
key.offset = 0;
key.type = 0;
-again:
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto error;
@@ -5667,17 +5999,13 @@ again:
break;
}
btrfs_item_key_to_cpu(leaf, &found_key, slot);
- if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
- if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
- break;
- if (found_key.type == BTRFS_DEV_ITEM_KEY) {
- struct btrfs_dev_item *dev_item;
- dev_item = btrfs_item_ptr(leaf, slot,
+ if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+ struct btrfs_dev_item *dev_item;
+ dev_item = btrfs_item_ptr(leaf, slot,
struct btrfs_dev_item);
- ret = read_one_dev(root, leaf, dev_item);
- if (ret)
- goto error;
- }
+ ret = read_one_dev(root, leaf, dev_item);
+ if (ret)
+ goto error;
} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
struct btrfs_chunk *chunk;
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -5687,11 +6015,6 @@ again:
}
path->slots[0]++;
}
- if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
- key.objectid = 0;
- btrfs_release_path(path);
- goto again;
- }
ret = 0;
error:
unlock_chunks(root);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 8670558..8b3cd14 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -43,9 +43,8 @@ struct btrfs_device {
/* WRITE_SYNC bios */
struct btrfs_pending_bios pending_sync_bios;
- int running_pending;
u64 generation;
-
+ int running_pending;
int writeable;
int in_fs_metadata;
int missing;
@@ -53,11 +52,11 @@ struct btrfs_device {
int is_tgtdev_for_dev_replace;
spinlock_t io_lock;
+ /* the mode sent to blkdev_get */
+ fmode_t mode;
struct block_device *bdev;
- /* the mode sent to blkdev_get */
- fmode_t mode;
struct rcu_string *name;
@@ -78,16 +77,21 @@ struct btrfs_device {
/* optimal io width for this device */
u32 io_width;
+ /* type and info about this device */
+ u64 type;
/* minimal io size for this device */
u32 sector_size;
- /* type and info about this device */
- u64 type;
/* physical drive uuid (or lvm uuid) */
u8 uuid[BTRFS_UUID_SIZE];
+ /* for sending down flush barriers */
+ int nobarriers;
+ struct bio *flush_bio;
+ struct completion flush_wait;
+
/* per-device scrub information */
struct scrub_ctx *scrub_device;
@@ -103,10 +107,6 @@ struct btrfs_device {
struct radix_tree_root reada_zones;
struct radix_tree_root reada_extents;
- /* for sending down flush barriers */
- struct bio *flush_bio;
- struct completion flush_wait;
- int nobarriers;
/* disk I/O failure stats. For detailed description refer to
* enum btrfs_dev_stat_values in ioctl.h */
@@ -132,7 +132,9 @@ struct btrfs_fs_devices {
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
- * worrying about add/remove by the multi-device code
+ * worrying about add/remove by the multi-device code.
+ * Scrubbing super can kick off supers writing by holding
+ * this mutex lock.
*/
struct mutex device_list_mutex;
struct list_head devices;
@@ -152,6 +154,8 @@ struct btrfs_fs_devices {
int rotating;
};
+#define BTRFS_BIO_INLINE_CSUM_SIZE 64
+
/*
* we need the mirror number and stripe index to be passed around
* the call chain while we are processing end_io (especially errors).
@@ -161,9 +165,14 @@ struct btrfs_fs_devices {
* we allocate are actually btrfs_io_bios. We'll cram as much of
* struct btrfs_bio as we can into this over time.
*/
+typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
struct btrfs_io_bio {
unsigned long mirror_num;
unsigned long stripe_index;
+ u8 *csum;
+ u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+ u8 *csum_allocated;
+ btrfs_io_bio_end_io_t *end_io;
struct bio bio;
};
@@ -298,6 +307,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
+struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+ const u64 *devid,
+ const u8 *uuid);
int btrfs_rm_device(struct btrfs_root *root, char *device_path);
void btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -315,6 +327,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
+int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
+int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
OpenPOWER on IntegriCloud