From 95029d7d598babf62276d9006e575992b1333ba5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 21 Jan 2009 10:49:16 -0500 Subject: Btrfs: change/remove typedef Change one typedef to a regular enum, and remove an unused one. Signed-off-by: Jan Engelhardt Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eee060f..e1fec63 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -454,17 +454,11 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -typedef enum { +enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_LAST = 2, -} btrfs_compression_type; - -/* we don't understand any encryption methods right now */ -typedef enum { - BTRFS_ENCRYPTION_NONE = 0, - BTRFS_ENCRYPTION_LAST = 1, -} btrfs_encryption_type; +}; struct btrfs_inode_item { /* nfs style generation number */ -- cgit v1.1 From 7237f1833601dcc435a64176c2c347ec4bd959f9 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 21 Jan 2009 12:54:03 -0500 Subject: Btrfs: fix tree logs parallel sync To improve performance, btrfs_sync_log merges tree log sync requests. But it wrongly merges sync requests for different tree logs. If multiple tree logs are synced at the same time, only one of them actually gets synced. This patch has following changes to fix the bug: Move most tree log related fields in btrfs_fs_info to btrfs_root. This allows merging sync requests separately for each tree log. Don't insert root item into the log root tree immediately after log tree is allocated. Root item for log tree is inserted when log tree get synced for the first time. This allows syncing the log root tree without first syncing all log trees. At tree-log sync, btrfs_sync_log first sync the log tree; then updates corresponding root item in the log root tree; sync the log root tree; then update the super block. Signed-off-by: Yan Zheng --- fs/btrfs/ctree.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e1fec63..de103a8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -695,9 +695,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; - wait_queue_head_t async_submit_wait; - wait_queue_head_t tree_log_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; @@ -724,10 +722,6 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; - atomic_t tree_log_writers; - atomic_t tree_log_commit; - unsigned long tree_log_batch; - u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -827,7 +821,14 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; + wait_queue_head_t log_writer_wait; + wait_queue_head_t log_commit_wait[2]; + atomic_t log_writers; + atomic_t log_commit[2]; + unsigned long log_transid; + unsigned long log_batch; u64 objectid; u64 last_trans; -- cgit v1.1 From c487685d7c18a8481900755aa5c56a7a74193101 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2009 09:24:25 -0500 Subject: Btrfs: hash_lock is no longer needed Before metadata is written to disk, it is updated to reflect that writeout has begun. Once this update is done, the block must be cow'd before it can be modified again. This update was originally synchronized by using a per-fs spinlock. Today the buffers for the metadata blocks are locked before writeout begins, and everyone that tests the flag has the buffer locked as well. So, the per-fs spinlock (called hash_lock for no good reason) is no longer required. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index de103a8..f2b8d26 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -703,7 +703,6 @@ struct btrfs_fs_info { struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; - spinlock_t hash_lock; struct mutex trans_mutex; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; -- cgit v1.1 From b4ce94de9b4d64e8ab3cf155d13653c666e22b9b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Feb 2009 09:25:08 -0500 Subject: Btrfs: Change btree locking to use explicit blocking points Most of the btrfs metadata operations can be protected by a spinlock, but some operations still need to schedule. So far, btrfs has been using a mutex along with a trylock loop, most of the time it is able to avoid going for the full mutex, so the trylock loop is a big performance gain. This commit is step one for getting rid of the blocking locks entirely. btrfs_tree_lock takes a spinlock, and the code explicitly switches to a blocking lock when it starts an operation that can schedule. We'll be able get rid of the blocking locks in smaller pieces over time. Tracing allows us to find the most common cause of blocking, so we can start with the hot spots first. The basic idea is: btrfs_tree_lock() returns with the spin lock held btrfs_set_lock_blocking() sets the EXTENT_BUFFER_BLOCKING bit in the extent buffer flags, and then drops the spin lock. The buffer is still considered locked by all of the btrfs code. If btrfs_tree_lock gets the spinlock but finds the blocking bit set, it drops the spin lock and waits on a wait queue for the blocking bit to go away. Much of the code that needs to set the blocking bit finishes without actually blocking a good percentage of the time. So, an adaptive spin is still used against the blocking bit to avoid very high context switch rates. btrfs_clear_lock_blocking() clears the blocking bit and returns with the spinlock held again. btrfs_tree_unlock() can be called on either blocking or spinning locks, it does the right thing based on the blocking bit. ctree.c has a helper function to set/clear all the locked buffers in a path as blocking. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f2b8d26..531db11 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1835,6 +1835,10 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); +void btrfs_set_path_blocking(struct btrfs_path *p); +void btrfs_clear_path_blocking(struct btrfs_path *p); +void btrfs_unlock_up_safe(struct btrfs_path *p, int level); + int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int slot, int nr); int btrfs_del_leaf(struct btrfs_trans_handle *trans, -- cgit v1.1 From e00f7308658622fbd483cb0d9fe41165bf9050d0 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 12 Feb 2009 14:11:25 -0500 Subject: Btrfs: remove btrfs_init_path btrfs_init_path was initially used when the path objects were on the stack. Now all the work is done by btrfs_alloc_path and btrfs_init_path isn't required. This patch removes it, and just uses kmem_cache_zalloc to zero out the object. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 531db11..3f7a805 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1834,7 +1834,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); -void btrfs_init_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_clear_path_blocking(struct btrfs_path *p); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); -- cgit v1.1 From 4008c04a07c73ec3cb1be4c1391d2159a8f75d6d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Feb 2009 14:09:45 -0500 Subject: Btrfs: make a lockdep class for the extent buffer locks Btrfs is currently using spin_lock_nested with a nested value based on the tree depth of the block. But, this doesn't quite work because the max tree depth is bigger than what spin_lock_nested can deal with, and because locks are sometimes taken before the level field is filled in. The solution here is to use lockdep_set_class_and_name instead, and to set the class before unlocking the pages when the block is read from the disk and just after init of a freshly allocated tree block. btrfs_clear_path_blocking is also changed to take the locks in the proper order, and it also makes sure all the locks currently held are properly set to blocking before it tries to retake the spinlocks. Otherwise, lockdep gets upset about bad lock orderin. The lockdep magic cam from Peter Zijlstra Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3f7a805..766b31a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -43,11 +43,7 @@ struct btrfs_ordered_sum; #define BTRFS_ACL_NOT_CACHED ((void *)-1) -#ifdef CONFIG_LOCKDEP -# define BTRFS_MAX_LEVEL 7 -#else -# define BTRFS_MAX_LEVEL 8 -#endif +#define BTRFS_MAX_LEVEL 8 /* holds pointers to all of the tree roots */ #define BTRFS_ROOT_TREE_OBJECTID 1ULL @@ -1715,7 +1711,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, u64 empty_size); struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 bytenr, u32 blocksize); + u64 bytenr, u32 blocksize, + int level); int btrfs_alloc_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 num_bytes, u64 parent, u64 min_bytes, @@ -1835,7 +1832,6 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); -void btrfs_clear_path_blocking(struct btrfs_path *p); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, -- cgit v1.1 From 6a63209fc02d5483371f07e4913ee8abad608051 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 20 Feb 2009 11:00:09 -0500 Subject: Btrfs: add better -ENOSPC handling This is a step in the direction of better -ENOSPC handling. Instead of checking the global bytes counter we check the space_info bytes counters to make sure we have enough space. If we don't we go ahead and try to allocate a new chunk, and then if that fails we return -ENOSPC. This patch adds two counters to btrfs_space_info, bytes_delalloc and bytes_may_use. bytes_delalloc account for extents we've actually setup for delalloc and will be allocated at some point down the line. bytes_may_use is to keep track of how many bytes we may use for delalloc at some point. When we actually set the extent_bit for the delalloc bytes we subtract the reserved bytes from the bytes_may_use counter. This keeps us from not actually being able to allocate space for any delalloc bytes. Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 766b31a..82491ba 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,13 +596,27 @@ struct btrfs_block_group_item { struct btrfs_space_info { u64 flags; - u64 total_bytes; - u64 bytes_used; - u64 bytes_pinned; - u64 bytes_reserved; - u64 bytes_readonly; - int full; - int force_alloc; + + u64 total_bytes; /* total bytes in the space */ + u64 bytes_used; /* total bytes used on disk */ + u64 bytes_pinned; /* total bytes pinned, will be freed when the + transaction finishes */ + u64 bytes_reserved; /* total bytes the allocator has reserved for + current allocations */ + u64 bytes_readonly; /* total bytes that are read only */ + + /* delalloc accounting */ + u64 bytes_delalloc; /* number of bytes reserved for allocation, + this space is not necessarily reserved yet + by the allocator */ + u64 bytes_may_use; /* number of bytes that may be used for + delalloc */ + + int full; /* indicates that we cannot allocate any more + chunks for this space */ + int force_alloc; /* set if we need to force a chunk alloc for + this space */ + struct list_head list; /* for block groups in our same type */ @@ -1782,6 +1796,16 @@ int btrfs_add_dead_reloc_root(struct btrfs_root *root); int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); +void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +int btrfs_check_metadata_free_space(struct btrfs_root *root); +int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_free_reserved_data_space(struct btrfs_root *root, + struct inode *inode, u64 bytes); +void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); +void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, + u64 bytes); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, @@ -2027,8 +2051,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, unsigned long btrfs_force_ra(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t offset, pgoff_t last_index); -int btrfs_check_free_space(struct btrfs_root *root, u64 num_required, - int for_del); int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); int btrfs_readpage(struct file *file, struct page *page); void btrfs_delete_inode(struct inode *inode); -- cgit v1.1 From 4184ea7f908d95f329febc3665cf66da8568b467 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Mar 2009 12:39:20 -0400 Subject: Btrfs: Fix locking around adding new space_info Storage allocated to different raid levels in btrfs is tracked by a btrfs_space_info structure, and all of the current space_infos are collected into a list_head. Most filesystems have 3 or 4 of these structs total, and the list is only changed when new raid levels are added or at unmount time. This commit adds rcu locking on the list head, and properly frees things at unmount time. It also clears the space_info->full flag whenever new space is added to the FS. The locking for the space info list goes like this: reads: protected by rcu_read_lock() writes: protected by the chunk_mutex At unmount time we don't need special locking because all the readers are gone. Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 82491ba..5e1d4e3 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -784,7 +784,14 @@ struct btrfs_fs_info { struct list_head dirty_cowonly_roots; struct btrfs_fs_devices *fs_devices; + + /* + * the space_info list is almost entirely read only. It only changes + * when we add a new raid type to the FS, and that happens + * very rarely. RCU is used to protect it. + */ struct list_head space_info; + spinlock_t delalloc_lock; spinlock_t new_trans_lock; u64 delalloc_bytes; @@ -1797,6 +1804,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); +void btrfs_clear_space_info_full(struct btrfs_fs_info *info); + int btrfs_check_metadata_free_space(struct btrfs_root *root); int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); -- cgit v1.1