diff options
Diffstat (limited to 'fs')
37 files changed, 472 insertions, 341 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7301cdb..a383c18 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; eb = path->nodes[level]; - if (!eb) { - WARN_ON(1); - ret = 1; - goto out; + while (!eb) { + if (!level) { + WARN_ON(1); + ret = 1; + goto out; + } + level--; + eb = path->nodes[level]; } ret = add_all_parents(root, path, parents, level, &ref->key_for_search, @@ -835,6 +839,7 @@ again: } ret = __add_delayed_refs(head, delayed_ref_seq, &prefs_delayed); + mutex_unlock(&head->mutex); if (ret) { spin_unlock(&delayed_refs->lock); goto out; @@ -928,8 +933,6 @@ again: } out: - if (head) - mutex_unlock(&head->mutex); btrfs_free_path(path); while (!list_empty(&prefs)) { ref = list_first_entry(&prefs, struct __prelim_ref, list); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 15cbc2b..8206b39 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, if (!looped && !tm) return 0; /* - * we must have key remove operations in the log before the - * replace operation. + * if there are no tree operation for the oldest root, we simply + * return it. this should only happen if that (old) root is at + * level 0. */ - BUG_ON(!tm); + if (!tm) + break; + /* + * if there's an operation that's not a root replacement, we + * found the oldest version of our root. normally, we'll find a + * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. + */ if (tm->op != MOD_LOG_ROOT_REPLACE) break; @@ -1087,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, tm->generation); break; case MOD_LOG_KEY_ADD: - if (tm->slot != n - 1) { - o_dst = btrfs_node_key_ptr_offset(tm->slot); - o_src = btrfs_node_key_ptr_offset(tm->slot + 1); - memmove_extent_buffer(eb, o_dst, o_src, p_size); - } + /* if a move operation is needed it's in the log */ n--; break; case MOD_LOG_MOVE_KEYS: @@ -1192,16 +1195,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } tm = tree_mod_log_search(root->fs_info, logical, time_seq); - /* - * there was an item in the log when __tree_mod_log_oldest_root - * returned. this one must not go away, because the time_seq passed to - * us must be blocking its removal. - */ - BUG_ON(!tm); - if (old_root) - eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT, - root->nodesize); + eb = alloc_dummy_extent_buffer(logical, root->nodesize); else eb = btrfs_clone_extent_buffer(root->node); btrfs_tree_read_unlock(root->node); @@ -1216,7 +1211,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_set_header_level(eb, old_root->level); btrfs_set_header_generation(eb, old_generation); } - __tree_mod_log_rewind(eb, time_seq, tm); + if (tm) + __tree_mod_log_rewind(eb, time_seq, tm); + else + WARN_ON(btrfs_header_level(eb) != 0); extent_buffer_get(eb); return eb; @@ -2995,7 +2993,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, static void insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_disk_key *key, u64 bytenr, - int slot, int level, int tree_mod_log) + int slot, int level) { struct extent_buffer *lower; int nritems; @@ -3008,7 +3006,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, BUG_ON(slot > nritems); BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root)); if (slot != nritems) { - if (tree_mod_log && level) + if (level) tree_mod_log_eb_move(root->fs_info, lower, slot + 1, slot, nritems - slot); memmove_extent_buffer(lower, @@ -3016,7 +3014,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans, btrfs_node_key_ptr_offset(slot), (nritems - slot) * sizeof(struct btrfs_key_ptr)); } - if (tree_mod_log && level) { + if (level) { ret = tree_mod_log_insert_key(root->fs_info, lower, slot, MOD_LOG_KEY_ADD); BUG_ON(ret < 0); @@ -3104,7 +3102,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(split); insert_ptr(trans, root, path, &disk_key, split->start, - path->slots[level + 1] + 1, level + 1, 1); + path->slots[level + 1] + 1, level + 1); if (path->slots[level] >= mid) { path->slots[level] -= mid; @@ -3641,7 +3639,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans, btrfs_set_header_nritems(l, mid); btrfs_item_key(right, &disk_key, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_mark_buffer_dirty(right); btrfs_mark_buffer_dirty(l); @@ -3848,7 +3846,7 @@ again: if (mid <= slot) { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1] + 1, 1, 0); + path->slots[1] + 1, 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -3857,7 +3855,7 @@ again: } else { btrfs_set_header_nritems(right, 0); insert_ptr(trans, root, path, &disk_key, right->start, - path->slots[1], 1, 0); + path->slots[1], 1); btrfs_tree_unlock(path->nodes[0]); free_extent_buffer(path->nodes[0]); path->nodes[0] = right; @@ -5121,6 +5119,18 @@ again: if (!path->skip_locking) { ret = btrfs_try_tree_read_lock(next); + if (!ret && time_seq) { + /* + * If we don't get the lock, we may be racing + * with push_leaf_left, holding that lock while + * itself waiting for the leaf we've currently + * locked. To solve this situation, we give up + * on our lock and cycle. + */ + btrfs_release_path(path); + cond_resched(); + goto again; + } if (!ret) { btrfs_set_path_blocking(path); btrfs_tree_read_lock(next); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7b845ff..2936ca4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2354,12 +2354,17 @@ retry_root_backup: BTRFS_CSUM_TREE_OBJECTID, csum_root); if (ret) goto recovery_tree_root; - csum_root->track_dirty = 1; fs_info->generation = generation; fs_info->last_trans_committed = generation; + ret = btrfs_recover_balance(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to recover balance\n"); + goto fail_block_groups; + } + ret = btrfs_init_dev_stats(fs_info); if (ret) { printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n", @@ -2485,20 +2490,23 @@ retry_root_backup: goto fail_trans_kthread; } - if (!(sb->s_flags & MS_RDONLY)) { - down_read(&fs_info->cleanup_work_sem); - err = btrfs_orphan_cleanup(fs_info->fs_root); - if (!err) - err = btrfs_orphan_cleanup(fs_info->tree_root); - up_read(&fs_info->cleanup_work_sem); + if (sb->s_flags & MS_RDONLY) + return 0; - if (!err) - err = btrfs_recover_balance(fs_info->tree_root); + down_read(&fs_info->cleanup_work_sem); + if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || + (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { + up_read(&fs_info->cleanup_work_sem); + close_ctree(tree_root); + return ret; + } + up_read(&fs_info->cleanup_work_sem); - if (err) { - close_ctree(tree_root); - return err; - } + ret = btrfs_resume_balance_async(fs_info); + if (ret) { + printk(KERN_WARNING "btrfs: failed to resume balance\n"); + close_ctree(tree_root); + return ret; } return 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4b5a1e1..6e1d367 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2347,12 +2347,10 @@ next: return count; } - static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs, - unsigned long num_refs) + unsigned long num_refs, + struct list_head *first_seq) { - struct list_head *first_seq = delayed_refs->seq_head.next; - spin_unlock(&delayed_refs->lock); pr_debug("waiting for more refs (num %ld, first %p)\n", num_refs, first_seq); @@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_node *ref; struct list_head cluster; + struct list_head *first_seq = NULL; int ret; u64 delayed_start; int run_all = count == (unsigned long)-1; @@ -2436,8 +2435,10 @@ again: */ consider_waiting = 1; num_refs = delayed_refs->num_entries; + first_seq = root->fs_info->tree_mod_seq_list.next; } else { - wait_for_more_refs(delayed_refs, num_refs); + wait_for_more_refs(delayed_refs, + num_refs, first_seq); /* * after waiting, things have changed. we * dropped the lock and someone else might have diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aaa12c1..01c21b6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, writepage_t writepage, void *data, void (*flush_fn)(void *)) { + struct inode *inode = mapping->host; int ret = 0; int done = 0; int nr_to_write_done = 0; @@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, int scanned = 0; int tag; + /* + * We have to hold onto the inode so that ordered extents can do their + * work when the IO finishes. The alternative to this is failing to add + * an ordered extent if the igrab() fails there and that is a huge pain + * to deal with, so instead just hold onto the inode throughout the + * writepages operation. If it fails here we are freeing up the inode + * anyway and we'd rather not waste our time writing out stuff that is + * going to be truncated anyway. + */ + if (!igrab(inode)) + return 0; + pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ @@ -3428,6 +3441,7 @@ retry: index = 0; goto retry; } + btrfs_add_delayed_iput(inode); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 70dc8ca..9aa01ec 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, loff_t *ppos, size_t count, size_t ocount) { struct file *file = iocb->ki_filp; - struct inode *inode = fdentry(file)->d_inode; struct iov_iter i; ssize_t written; ssize_t written_buffered; @@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb, written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, count, ocount); - /* - * the generic O_DIRECT will update in-memory i_size after the - * DIOs are done. But our endio handlers that update the on - * disk i_size never update past the in memory i_size. So we - * need one more update here to catch any additions to the - * file - */ - if (inode->i_size != BTRFS_I(inode)->disk_i_size) { - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - mark_inode_dirty(inode); - } - if (written < 0 || written == count) return written; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 81296c5..6c4e2ba 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1543,29 +1543,26 @@ again: end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; /* - * XXX - this can go away after a few releases. - * - * since the only user of btrfs_remove_free_space is the tree logging - * stuff, and the only way to test that is under crash conditions, we - * want to have this debug stuff here just in case somethings not - * working. Search the bitmap for the space we are trying to use to - * make sure its actually there. If its not there then we need to stop - * because something has gone wrong. + * We need to search for bits in this bitmap. We could only cover some + * of the extent in this bitmap thanks to how we add space, so we need + * to search for as much as it as we can and clear that amount, and then + * go searching for the next bit. */ search_start = *offset; - search_bytes = *bytes; + search_bytes = ctl->unit; search_bytes = min(search_bytes, end - search_start + 1); ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); BUG_ON(ret < 0 || search_start != *offset); - if (*offset > bitmap_info->offset && *offset + *bytes > end) { - bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1); - *bytes -= end - *offset + 1; - *offset = end + 1; - } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { - bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes); - *bytes = 0; - } + /* We may have found more bits than what we need */ + search_bytes = min(search_bytes, *bytes); + + /* Cannot clear past the end of the bitmap */ + search_bytes = min(search_bytes, end - search_start + 1); + + bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); + *offset += search_bytes; + *bytes -= search_bytes; if (*bytes) { struct rb_node *next = rb_next(&bitmap_info->offset_index); @@ -1596,7 +1593,7 @@ again: * everything over again. */ search_start = *offset; - search_bytes = *bytes; + search_bytes = ctl->unit; ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); if (ret < 0 || search_start != *offset) @@ -1879,12 +1876,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *info; - struct btrfs_free_space *next_info = NULL; int ret = 0; spin_lock(&ctl->tree_lock); again: + if (!bytes) + goto out_lock; + info = tree_search_offset(ctl, offset, 0, 0); if (!info) { /* @@ -1905,88 +1904,48 @@ again: } } - if (info->bytes < bytes && rb_next(&info->offset_index)) { - u64 end; - next_info = rb_entry(rb_next(&info->offset_index), - struct btrfs_free_space, - offset_index); - - if (next_info->bitmap) - end = next_info->offset + - BITS_PER_BITMAP * ctl->unit - 1; - else - end = next_info->offset + next_info->bytes; - - if (next_info->bytes < bytes || - next_info->offset > offset || offset > end) { - printk(KERN_CRIT "Found free space at %llu, size %llu," - " trying to use %llu\n", - (unsigned long long)info->offset, - (unsigned long long)info->bytes, - (unsigned long long)bytes); - WARN_ON(1); - ret = -EINVAL; - goto out_lock; - } - - info = next_info; - } - - if (info->bytes == bytes) { + if (!info->bitmap) { unlink_free_space(ctl, info); - if (info->bitmap) { - kfree(info->bitmap); - ctl->total_bitmaps--; - } - kmem_cache_free(btrfs_free_space_cachep, info); - ret = 0; - goto out_lock; - } - - if (!info->bitmap && info->offset == offset) { - unlink_free_space(ctl, info); - info->offset += bytes; - info->bytes -= bytes; - ret = link_free_space(ctl, info); - WARN_ON(ret); - goto out_lock; - } + if (offset == info->offset) { + u64 to_free = min(bytes, info->bytes); + + info->bytes -= to_free; + info->offset += to_free; + if (info->bytes) { + ret = link_free_space(ctl, info); + WARN_ON(ret); + } else { + kmem_cache_free(btrfs_free_space_cachep, info); + } - if (!info->bitmap && info->offset <= offset && - info->offset + info->bytes >= offset + bytes) { - u64 old_start = info->offset; - /* - * we're freeing space in the middle of the info, - * this can happen during tree log replay - * - * first unlink the old info and then - * insert it again after the hole we're creating - */ - unlink_free_space(ctl, info); - if (offset + bytes < info->offset + info->bytes) { - u64 old_end = info->offset + info->bytes; + offset += to_free; + bytes -= to_free; + goto again; + } else { + u64 old_end = info->bytes + info->offset; - info->offset = offset + bytes; - info->bytes = old_end - info->offset; + info->bytes = offset - info->offset; ret = link_free_space(ctl, info); WARN_ON(ret); if (ret) goto out_lock; - } else { - /* the hole we're creating ends at the end - * of the info struct, just free the info - */ - kmem_cache_free(btrfs_free_space_cachep, info); - } - spin_unlock(&ctl->tree_lock); - /* step two, insert a new info struct to cover - * anything before the hole - */ - ret = btrfs_add_free_space(block_group, old_start, - offset - old_start); - WARN_ON(ret); /* -ENOMEM */ - goto out; + /* Not enough bytes in this entry to satisfy us */ + if (old_end < offset + bytes) { + bytes -= old_end - offset; + offset = old_end; + goto again; + } else if (old_end == offset + bytes) { + /* all done */ + goto out_lock; + } + spin_unlock(&ctl->tree_lock); + + ret = btrfs_add_free_space(block_group, offset + bytes, + old_end - (offset + bytes)); + WARN_ON(ret); + goto out; + } } ret = remove_from_bitmap(ctl, info, &offset, &bytes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d8bb0db..a7d1921 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode) btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { - BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, + BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, &BTRFS_I(inode)->runtime_flags)); goto no_delete; } @@ -5876,8 +5876,17 @@ map: bh_result->b_size = len; bh_result->b_bdev = em->bdev; set_buffer_mapped(bh_result); - if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) - set_buffer_new(bh_result); + if (create) { + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) + set_buffer_new(bh_result); + + /* + * Need to update the i_size under the extent lock so buffered + * readers will get the updated i_size when we unlock. + */ + if (start + len > i_size_read(inode)) + i_size_write(inode, start + len); + } free_extent_map(em); @@ -6360,12 +6369,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, */ ordered = btrfs_lookup_ordered_range(inode, lockstart, lockend - lockstart + 1); - if (!ordered) + + /* + * We need to make sure there are no buffered pages in this + * range either, we could have raced between the invalidate in + * generic_file_direct_write and locking the extent. The + * invalidate needs to happen so that reads after a write do not + * get stale data. + */ + if (!ordered && (!writing || + !test_range_bit(&BTRFS_I(inode)->io_tree, + lockstart, lockend, EXTENT_UPTODATE, 0, + cached_state))) break; + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state, GFP_NOFS); - btrfs_start_ordered_extent(inode, ordered, 1); - btrfs_put_ordered_extent(ordered); + + if (ordered) { + btrfs_start_ordered_extent(inode, ordered, 1); + btrfs_put_ordered_extent(ordered); + } else { + /* Screw you mmap */ + ret = filemap_write_and_wait_range(file->f_mapping, + lockstart, + lockend); + if (ret) + goto out; + + /* + * If we found a page that couldn't be invalidated just + * fall back to buffered. + */ + ret = invalidate_inode_pages2_range(file->f_mapping, + lockstart >> PAGE_CACHE_SHIFT, + lockend >> PAGE_CACHE_SHIFT); + if (ret) { + if (ret == -EBUSY) + ret = 0; + goto out; + } + } + cond_resched(); } diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 497c530..e440aa6 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats { #define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ struct btrfs_ioctl_vol_args_v2) -#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64) +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64) #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64) #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \ struct btrfs_ioctl_scrub_args) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0eb9a4d..e239915 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; + ret = btrfs_resume_balance_async(fs_info); + if (ret) + goto restore; + sb->s_flags &= ~MS_RDONLY; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2017d0f..8abeae4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, kfree(name); iput(inode); + + btrfs_run_delayed_items(trans, root); return ret; } @@ -895,6 +897,7 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, victim_name, victim_name_len); + btrfs_run_delayed_items(trans, root); } kfree(victim_name); ptr = (unsigned long)(victim_ref + 1) + victim_name_len; @@ -1475,6 +1478,9 @@ again: ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); BUG_ON(ret); + + btrfs_run_delayed_items(trans, root); + kfree(name); iput(inode); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8a3d259..ecaad40 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2845,31 +2845,48 @@ out: static int balance_kthread(void *data) { - struct btrfs_balance_control *bctl = - (struct btrfs_balance_control *)data; - struct btrfs_fs_info *fs_info = bctl->fs_info; + struct btrfs_fs_info *fs_info = data; int ret = 0; mutex_lock(&fs_info->volume_mutex); mutex_lock(&fs_info->balance_mutex); - set_balance_control(bctl); - - if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { - printk(KERN_INFO "btrfs: force skipping balance\n"); - } else { + if (fs_info->balance_ctl) { printk(KERN_INFO "btrfs: continuing balance\n"); - ret = btrfs_balance(bctl, NULL); + ret = btrfs_balance(fs_info->balance_ctl, NULL); } mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); + return ret; } -int btrfs_recover_balance(struct btrfs_root *tree_root) +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) { struct task_struct *tsk; + + spin_lock(&fs_info->balance_lock); + if (!fs_info->balance_ctl) { + spin_unlock(&fs_info->balance_lock); + return 0; + } + spin_unlock(&fs_info->balance_lock); + + if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { + printk(KERN_INFO "btrfs: force skipping balance\n"); + return 0; + } + + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); + if (IS_ERR(tsk)) + return PTR_ERR(tsk); + + return 0; +} + +int btrfs_recover_balance(struct btrfs_fs_info *fs_info) +{ struct btrfs_balance_control *bctl; struct btrfs_balance_item *item; struct btrfs_disk_balance_args disk_bargs; @@ -2882,29 +2899,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) if (!path) return -ENOMEM; - bctl = kzalloc(sizeof(*bctl), GFP_NOFS); - if (!bctl) { - ret = -ENOMEM; - goto out; - } - key.objectid = BTRFS_BALANCE_OBJECTID; key.type = BTRFS_BALANCE_ITEM_KEY; key.offset = 0; - ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); if (ret < 0) - goto out_bctl; + goto out; if (ret > 0) { /* ret = -ENOENT; */ ret = 0; - goto out_bctl; + goto out; + } + + bctl = kzalloc(sizeof(*bctl), GFP_NOFS); + if (!bctl) { + ret = -ENOMEM; + goto out; } leaf = path->nodes[0]; item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item); - bctl->fs_info = tree_root->fs_info; - bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME; + bctl->fs_info = fs_info; + bctl->flags = btrfs_balance_flags(leaf, item); + bctl->flags |= BTRFS_BALANCE_RESUME; btrfs_balance_data(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs); @@ -2913,14 +2931,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root) btrfs_balance_sys(leaf, item, &disk_bargs); btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); - tsk = kthread_run(balance_kthread, bctl, "btrfs-balance"); - if (IS_ERR(tsk)) - ret = PTR_ERR(tsk); - else - goto out; + mutex_lock(&fs_info->volume_mutex); + mutex_lock(&fs_info->balance_mutex); -out_bctl: - kfree(bctl); + set_balance_control(bctl); + + mutex_unlock(&fs_info->balance_mutex); + mutex_unlock(&fs_info->volume_mutex); out: btrfs_free_path(path); return ret; @@ -4061,16 +4078,18 @@ static void btrfs_end_bio(struct bio *bio, int err) BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; - if (bio->bi_rw & WRITE) - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_WRITE_ERRS); - else - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_READ_ERRS); - if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) - btrfs_dev_stat_inc(dev, - BTRFS_DEV_STAT_FLUSH_ERRS); - btrfs_dev_stat_print_on_error(dev); + if (dev->bdev) { + if (bio->bi_rw & WRITE) + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_WRITE_ERRS); + else + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_READ_ERRS); + if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_FLUSH_ERRS); + btrfs_dev_stat_print_on_error(dev); + } } } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 74366f2..95f6637 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -281,7 +281,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); int btrfs_init_new_device(struct btrfs_root *root, char *path); int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs); -int btrfs_recover_balance(struct btrfs_root *tree_root); +int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); +int btrfs_recover_balance(struct btrfs_fs_info *fs_info); int btrfs_pause_balance(struct btrfs_fs_info *fs_info); int btrfs_cancel_balance(struct btrfs_fs_info *fs_info); int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset); diff --git a/fs/buffer.c b/fs/buffer.c index 838a9cf..c7062c8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1036,6 +1036,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { + int ret; + struct buffer_head *bh; + /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1048,20 +1051,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } - for (;;) { - struct buffer_head * bh; - int ret; +retry: + bh = __find_get_block(bdev, block, size); + if (bh) + return bh; + ret = grow_buffers(bdev, block, size); + if (ret == 0) { + free_more_memory(); + goto retry; + } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; - - ret = grow_buffers(bdev, block, size); - if (ret < 0) - return NULL; - if (ret == 0) - free_more_memory(); } + return NULL; } /* diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 5b40073..4ee522b 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -86,7 +86,31 @@ static struct { #endif /* CONFIG_CIFS_WEAK_PW_HASH */ #endif /* CIFS_POSIX */ -/* Forward declarations */ +#ifdef CONFIG_HIGHMEM +/* + * On arches that have high memory, kmap address space is limited. By + * serializing the kmap operations on those arches, we ensure that we don't + * end up with a bunch of threads in writeback with partially mapped page + * arrays, stuck waiting for kmap to come back. That situation prevents + * progress and can deadlock. + */ +static DEFINE_MUTEX(cifs_kmap_mutex); + +static inline void +cifs_kmap_lock(void) +{ + mutex_lock(&cifs_kmap_mutex); +} + +static inline void +cifs_kmap_unlock(void) +{ + mutex_unlock(&cifs_kmap_mutex); +} +#else /* !CONFIG_HIGHMEM */ +#define cifs_kmap_lock() do { ; } while(0) +#define cifs_kmap_unlock() do { ; } while(0) +#endif /* CONFIG_HIGHMEM */ /* Mark as invalid, all open files on tree connections since they were closed when session to server was lost */ @@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) } /* marshal up the page array */ + cifs_kmap_lock(); len = rdata->marshal_iov(rdata, data_len); + cifs_kmap_unlock(); data_len -= len; /* issue the read if we have any iovecs left to fill */ @@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata) * and set the iov_len properly for each one. It may also set * wdata->bytes too. */ + cifs_kmap_lock(); wdata->marshal_iov(iov, wdata); + cifs_kmap_unlock(); cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 78db68a..94b7788 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1653,24 +1653,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, * If yes, we have encountered a double deliminator * reset the NULL character to the deliminator */ - if (tmp_end < end && tmp_end[1] == delim) + if (tmp_end < end && tmp_end[1] == delim) { tmp_end[0] = delim; - /* Keep iterating until we get to a single deliminator - * OR the end - */ - while ((tmp_end = strchr(tmp_end, delim)) != NULL && - (tmp_end[1] == delim)) { - tmp_end = (char *) &tmp_end[2]; - } + /* Keep iterating until we get to a single + * deliminator OR the end + */ + while ((tmp_end = strchr(tmp_end, delim)) + != NULL && (tmp_end[1] == delim)) { + tmp_end = (char *) &tmp_end[2]; + } - /* Reset var options to point to next element */ - if (tmp_end) { - tmp_end[0] = '\0'; - options = (char *) &tmp_end[1]; - } else - /* Reached the end of the mount option string */ - options = end; + /* Reset var options to point to next element */ + if (tmp_end) { + tmp_end[0] = '\0'; + options = (char *) &tmp_end[1]; + } else + /* Reached the end of the mount option + * string */ + options = end; + } /* Now build new password string */ temp_len = strlen(value); @@ -3443,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) +/* + * On hosts with high memory, we can't currently support wsize/rsize that are + * larger than we can kmap at once. Cap the rsize/wsize at + * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request + * larger than that anyway. + */ +#ifdef CONFIG_HIGHMEM +#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE) +#else /* CONFIG_HIGHMEM */ +#define CIFS_KMAP_SIZE_LIMIT (1<<24) +#endif /* CONFIG_HIGHMEM */ + static unsigned int cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) { @@ -3473,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) wsize = min_t(unsigned int, wsize, server->maxBuf - sizeof(WRITE_REQ) + 4); + /* limit to the amount that we can kmap at once */ + wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_WSIZE */ wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); @@ -3493,18 +3510,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) * MS-CIFS indicates that servers are only limited by the client's * bufsize for reads, testing against win98se shows that it throws * INVALID_PARAMETER errors if you try to request too large a read. + * OS/2 just sends back short reads. * - * If the server advertises a MaxBufferSize of less than one page, - * assume that it also can't satisfy reads larger than that either. - * - * FIXME: Is there a better heuristic for this? + * If the server doesn't advertise CAP_LARGE_READ_X, then assume that + * it can't handle a read request larger than its MaxBufferSize either. */ if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP)) defsize = CIFS_DEFAULT_IOSIZE; else if (server->capabilities & CAP_LARGE_READ_X) defsize = CIFS_DEFAULT_NON_POSIX_RSIZE; - else if (server->maxBuf >= PAGE_CACHE_SIZE) - defsize = CIFSMaxBufSize; else defsize = server->maxBuf - sizeof(READ_RSP); @@ -3517,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) if (!(server->capabilities & CAP_LARGE_READ_X)) rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); + /* limit to the amount that we can kmap at once */ + rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT); + /* hard limit of CIFS_MAX_RSIZE */ rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 0a8224d..a4217f0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, dentry = d_lookup(parent, name); if (dentry) { - /* FIXME: check for inode number changes? */ - if (dentry->d_inode != NULL) + inode = dentry->d_inode; + /* update inode in place if i_ino didn't change */ + if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) { + cifs_fattr_to_inode(inode, fattr); return dentry; + } d_drop(dentry); dput(dentry); } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 3097ee5..f25d4ea 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov, if (mid == NULL) return -ENOMEM; - /* put it on the pending_mid_q */ - spin_lock(&GlobalMid_Lock); - list_add_tail(&mid->qhead, &server->pending_mid_q); - spin_unlock(&GlobalMid_Lock); - rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); - if (rc) - delete_mid(mid); + if (rc) { + DeleteMidQEntry(mid); + return rc; + } + *ret_mid = mid; - return rc; + return 0; } /* @@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid->callback_data = cbdata; mid->mid_state = MID_REQUEST_SUBMITTED; + /* put it on the pending_mid_q */ + spin_lock(&GlobalMid_Lock); + list_add_tail(&mid->qhead, &server->pending_mid_q); + spin_unlock(&GlobalMid_Lock); + + cifs_in_send_inc(server); rc = smb_sendv(server, iov, nvec); cifs_in_send_dec(server); cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); - if (rc) - goto out_err; + if (rc == 0) + return 0; - return rc; -out_err: delete_mid(mid); add_credits(server, 1); wake_up(&server->request_q); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 69f994a..0dbe58a 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file, (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); if (!IS_ERR(*lower_file)) goto out; - if (flags & O_RDONLY) { + if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 3a06f40..c0038f6 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) goto out_unlock_daemon; } daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN; + file->private_data = daemon; atomic_inc(&ecryptfs_num_miscdev_opens); out_unlock_daemon: mutex_unlock(&daemon->mux); @@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) mutex_lock(&ecryptfs_daemon_hash_mux); rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) + daemon = file->private_data; mutex_lock(&daemon->mux); - BUG_ON(daemon->pid != task_pid(current)); BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN)); daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN; atomic_dec(&ecryptfs_num_miscdev_opens); @@ -191,31 +195,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, u16 msg_flags, struct ecryptfs_daemon *daemon) { - int rc = 0; + struct ecryptfs_message *msg; - mutex_lock(&msg_ctx->mux); - msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), - GFP_KERNEL); - if (!msg_ctx->msg) { - rc = -ENOMEM; + msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL); + if (!msg) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to kmalloc(%zd, GFP_KERNEL)\n", __func__, - (sizeof(*msg_ctx->msg) + data_size)); - goto out_unlock; + (sizeof(*msg) + data_size)); + return -ENOMEM; } + + mutex_lock(&msg_ctx->mux); + msg_ctx->msg = msg; msg_ctx->msg->index = msg_ctx->index; msg_ctx->msg->data_len = data_size; msg_ctx->type = msg_type; memcpy(msg_ctx->msg->data, data, data_size); msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); - mutex_lock(&daemon->mux); list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); + mutex_unlock(&msg_ctx->mux); + + mutex_lock(&daemon->mux); daemon->num_queued_msg_ctx++; wake_up_interruptible(&daemon->wait); mutex_unlock(&daemon->mux); -out_unlock: - mutex_unlock(&msg_ctx->mux); - return rc; + + return 0; } /* @@ -269,8 +274,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); - BUG_ON(rc || !daemon); + if (rc || !daemon) { + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EINVAL; + } mutex_lock(&daemon->mux); + if (task_pid(current) != daemon->pid) { + mutex_unlock(&daemon->mux); + mutex_unlock(&ecryptfs_daemon_hash_mux); + return -EPERM; + } if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -307,9 +320,6 @@ check_list: * message from the queue; try again */ goto check_list; } - BUG_ON(euid != daemon->euid); - BUG_ON(current_user_ns() != daemon->user_ns); - BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); BUG_ON(!msg_ctx); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 74598f6..1c8b556 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epds.events &= ~EPOLLWAKEUP; /* diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac..6ec6f9ee 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -268,7 +268,6 @@ group_extend_out: err = ext4_move_extents(filp, donor_filp, me.orig_start, me.donor_start, me.len, &me.moved_len); mnt_drop_write_file(filp); - mnt_drop_write(filp->f_path.mnt); if (copy_to_user((struct move_extent __user *)arg, &me, sizeof(me))) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a3d81eb..0038b32 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -738,22 +738,21 @@ static int fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) { int len = *lenp; - u32 ipos_h, ipos_m, ipos_l; + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + loff_t i_pos; if (len < 5) { *lenp = 5; return 255; /* no room */ } - ipos_h = MSDOS_I(inode)->i_pos >> 8; - ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; - ipos_l = (MSDOS_I(inode)->i_pos & 0x0f) << 28; + i_pos = fat_i_pos_read(sbi, inode); *lenp = 5; fh[0] = inode->i_ino; fh[1] = inode->i_generation; - fh[2] = ipos_h; - fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; - fh[4] = ipos_l; + fh[2] = i_pos >> 8; + fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart; + fh[4] = (i_pos & 0x0f) << 28; if (parent) fh[4] |= MSDOS_I(parent)->i_logstart; return 3; @@ -14,7 +14,7 @@ #include <linux/sched.h> #include <linux/pipe_fs_i.h> -static void wait_for_partner(struct inode* inode, unsigned int *cnt) +static int wait_for_partner(struct inode* inode, unsigned int *cnt) { int cur = *cnt; @@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt) if (signal_pending(current)) break; } + return cur == *cnt ? -ERESTARTSYS : 0; } static void wake_up_partner(struct inode* inode) @@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * seen a writer */ filp->f_version = pipe->w_counter; } else { - wait_for_partner(inode, &pipe->w_counter); - if(signal_pending(current)) + if (wait_for_partner(inode, &pipe->w_counter)) goto err_rd; } } @@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp) wake_up_partner(inode); if (!pipe->readers) { - wait_for_partner(inode, &pipe->r_counter); - if (signal_pending(current)) + if (wait_for_partner(inode, &pipe->r_counter)) goto err_wr; } break; @@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) case F_WRLCK: return generic_add_lease(filp, arg, flp); default: - BUG(); + return -EINVAL; } } EXPORT_SYMBOL(generic_setlease); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9a4cbfc..4825337 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -484,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { + nfs_list_remove_request(req); nfs_list_add_request(req, &failed); spin_lock(cinfo.lock); dreq->flags = 0; @@ -494,8 +495,11 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) + while (!list_empty(&failed)) { + req = nfs_list_entry(failed.next); + nfs_list_remove_request(req); nfs_unlock_and_release_request(req); + } if (put_dreq(dreq)) nfs_direct_write_complete(dreq, dreq->inode); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 906f09c..0622819 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2860,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name, dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + mount_info->fill_super = nfs4_fill_super; + export_path = data->nfs_server.export_path; data->nfs_server.export_path = "/"; root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 81a4cd2..4f7795f 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -456,7 +456,7 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level, stats->ls_gets++; stats->ls_total += ktime_to_ns(kt); /* overflow */ - if (unlikely(stats->ls_gets) == 0) { + if (unlikely(stats->ls_gets == 0)) { stats->ls_gets++; stats->ls_total = ktime_to_ns(kt); } @@ -3932,6 +3932,8 @@ unqueue: static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, struct ocfs2_lock_res *lockres) { + unsigned long flags; + assert_spin_locked(&lockres->l_lock); if (lockres->l_flags & OCFS2_LOCK_FREEING) { @@ -3945,21 +3947,22 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); - spin_lock(&osb->dc_task_lock); + spin_lock_irqsave(&osb->dc_task_lock, flags); if (list_empty(&lockres->l_blocked_list)) { list_add_tail(&lockres->l_blocked_list, &osb->blocked_lock_list); osb->blocked_lock_count++; } - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); } static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) { unsigned long processed; + unsigned long flags; struct ocfs2_lock_res *lockres; - spin_lock(&osb->dc_task_lock); + spin_lock_irqsave(&osb->dc_task_lock, flags); /* grab this early so we know to try again if a state change and * wake happens part-way through our work */ osb->dc_work_sequence = osb->dc_wake_sequence; @@ -3972,38 +3975,40 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) struct ocfs2_lock_res, l_blocked_list); list_del_init(&lockres->l_blocked_list); osb->blocked_lock_count--; - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); BUG_ON(!processed); processed--; ocfs2_process_blocked_lock(osb, lockres); - spin_lock(&osb->dc_task_lock); + spin_lock_irqsave(&osb->dc_task_lock, flags); } - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); } static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) { int empty = 0; + unsigned long flags; - spin_lock(&osb->dc_task_lock); + spin_lock_irqsave(&osb->dc_task_lock, flags); if (list_empty(&osb->blocked_lock_list)) empty = 1; - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); return empty; } static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) { int should_wake = 0; + unsigned long flags; - spin_lock(&osb->dc_task_lock); + spin_lock_irqsave(&osb->dc_task_lock, flags); if (osb->dc_work_sequence != osb->dc_wake_sequence) should_wake = 1; - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); return should_wake; } @@ -4033,10 +4038,12 @@ static int ocfs2_downconvert_thread(void *arg) void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) { - spin_lock(&osb->dc_task_lock); + unsigned long flags; + + spin_lock_irqsave(&osb->dc_task_lock, flags); /* make sure the voting thread gets a swipe at whatever changes * the caller may have made to the voting state */ osb->dc_wake_sequence++; - spin_unlock(&osb->dc_task_lock); + spin_unlock_irqrestore(&osb->dc_task_lock, flags); wake_up(&osb->dc_event); } diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2f5b92e..70b5863 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -923,8 +923,6 @@ out_unlock: ocfs2_inode_unlock(inode, 0); out: - if (ret && ret != -ENXIO) - ret = -ENXIO; return ret; } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 061591a..7602783 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, if (ret < 0) mlog_errno(ret); - if (file->f_flags & O_SYNC) + if (file && (file->f_flags & O_SYNC)) handle->h_sync = 1; ocfs2_commit_trans(osb, handle); @@ -2422,8 +2422,10 @@ out_dio: unaligned_dio = 0; } - if (unaligned_dio) + if (unaligned_dio) { + ocfs2_iocb_clear_unaligned_aio(iocb); atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); + } out: if (rw_level != -1) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 92fcd57..0a86e30 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -399,8 +399,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type) msecs_to_jiffies(oinfo->dqi_syncms)); out_err: - if (status) - mlog_errno(status); return status; out_unlock: ocfs2_unlock_global_qf(oinfo, 0); @@ -397,10 +397,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct file *file; struct inode *inode; - int error; + int error, fput_needed; error = -EBADF; - file = fget(fd); + file = fget_raw_light(fd, &fput_needed); if (!file) goto out; @@ -414,7 +414,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd) if (!error) set_fs_pwd(current->fs, &file->f_path); out_putf: - fput(file); + fput_light(file, fput_needed); out: return error; } diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index fbb0b47..d5378d0 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) /* prevent the page from being discarded on memory pressure */ SetPageDirty(page); + SetPageUptodate(page); unlock_page(page); put_page(page); diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9d1aeb7..4f33c32 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -1074,13 +1074,13 @@ restart: * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + if (!forced++) { trace_xfs_alloc_near_busy(args); xfs_log_force(args->mp, XFS_LOG_SYNC); goto restart; } - - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; @@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker( current_restore_flags_nested(&pflags, PF_FSTRANS); } - -int /* error */ +/* + * Data allocation requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. Metadata + * requests, OTOH, are generally from low stack usage paths, so avoid the + * context switch overhead here. + */ +int xfs_alloc_vextent( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) { DECLARE_COMPLETION_ONSTACK(done); + if (!args->userdata) + return __xfs_alloc_vextent(args); + + args->done = &done; INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); queue_work(xfs_alloc_wq, &args->work); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4beb42..269b35c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -989,27 +989,6 @@ xfs_buf_ioerror_alert( (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); } -int -xfs_bwrite( - struct xfs_buf *bp) -{ - int error; - - ASSERT(xfs_buf_islocked(bp)); - - bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); - - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); - if (error) { - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); - } - return error; -} - /* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call xfs_buf_ioend @@ -1079,14 +1058,7 @@ xfs_bioerror_relse( return EIO; } - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int +STATIC int xfs_bdstrat_cb( struct xfs_buf *bp) { @@ -1107,6 +1079,27 @@ xfs_bdstrat_cb( return 0; } +int +xfs_bwrite( + struct xfs_buf *bp) +{ + int error; + + ASSERT(xfs_buf_islocked(bp)); + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); + + xfs_bdstrat_cb(bp); + + error = xfs_buf_iowait(bp); + if (error) { + xfs_force_shutdown(bp->b_target->bt_mount, + SHUTDOWN_META_IO_ERROR); + } + return error; +} + /* * Wrapper around bdstrat so that we can stop data from going to disk in case * we are shutting down the filesystem. Typically user data goes thru this @@ -1243,7 +1236,7 @@ xfs_buf_iorequest( */ atomic_set(&bp->b_io_remaining, 1); _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); + _xfs_buf_ioend(bp, 1); xfs_buf_rele(bp); } diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7f1d139..79344c4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *); extern int xfs_bwrite(struct xfs_buf *bp); extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 45df2b8..d9e4511 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks( if (!XFS_BUF_ISSTALE(bp)) { bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; - xfs_bdstrat_cb(bp); + xfs_buf_iorequest(bp); } else { xfs_buf_relse(bp); } |