diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
commit | ba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch) | |
tree | c08504e4d2fa51ac91cef544f336d0169806c49f /fs | |
parent | 8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff) | |
parent | aa583096d9767892983332e7c1a984bd17e3cd39 (diff) | |
download | op-kernel-dev-ba00410b8131b23edfb0e09f8b6dd26c8eb621fb.zip op-kernel-dev-ba00410b8131b23edfb0e09f8b6dd26c8eb621fb.tar.gz |
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'fs')
54 files changed, 558 insertions, 504 deletions
diff --git a/fs/Makefile b/fs/Makefile index 34a1b9de..da0bbb4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,7 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 19bc616..150822e 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - /* lockdep really cares that we take all of these spinlocks - * in the right order. If any of the locks in the path are not - * currently blocking, it is going to complain. So, make really - * really sure by forcing the path to blocking before we clear - * the path blocking. - */ if (held) { btrfs_set_lock_blocking_rw(held, held_rw); if (held_rw == BTRFS_WRITE_LOCK) @@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, held_rw = BTRFS_READ_LOCK_BLOCKING; } btrfs_set_path_blocking(p); -#endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { if (p->nodes[i] && p->locks[i]) { @@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) btrfs_clear_lock_blocking_rw(held, held_rw); -#endif } /* this also releases the path */ @@ -2893,7 +2883,7 @@ cow_done: } p->locks[level] = BTRFS_WRITE_LOCK; } else { - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); @@ -3025,7 +3015,7 @@ again: } level = btrfs_header_level(b); - err = btrfs_try_tree_read_lock(b); + err = btrfs_tree_read_lock_atomic(b); if (!err) { btrfs_set_path_blocking(p); btrfs_tree_read_lock(b); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d557264e..fe69edd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_async_run_delayed_refs(struct btrfs_root *root, unsigned long count, int wait); -int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 offset, int metadata, u64 *refs, u64 *flags); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1ad0f47..1bf9f89 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb = fs_info->super_copy; int ret = 0; - if (sb->root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", - sb->root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n", + btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } - if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", - sb->chunk_root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n", + btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } - if (sb->log_root_level > BTRFS_MAX_LEVEL) { - printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", - sb->log_root_level, BTRFS_MAX_LEVEL); + if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n", + btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } @@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * The common minimum, we don't know if we can trust the nodesize/sectorsize * items yet, they'll be verified later. Issue just a warning. */ - if (!IS_ALIGNED(sb->root, 4096)) + if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", sb->root); - if (!IS_ALIGNED(sb->chunk_root, 4096)) + if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", sb->chunk_root); - if (!IS_ALIGNED(sb->log_root, 4096)) + if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", - sb->log_root); + btrfs_super_log_root(sb)); if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", @@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ - if (sb->num_devices > (1UL << 31)) + if (btrfs_super_num_devices(sb) > (1UL << 31)) printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", - sb->num_devices); + btrfs_super_num_devices(sb)); - if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { + if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) { printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", - sb->bytenr, BTRFS_SUPER_INFO_OFFSET); + btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } @@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, * The generation is a global counter, we'll trust it more than the others * but it's still possible that it's the one that's wrong. */ - if (sb->generation < sb->chunk_root_generation) + if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) printk(KERN_WARNING "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", - sb->generation, sb->chunk_root_generation); - if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) + btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); + if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) + && btrfs_super_cache_generation(sb) != (u64)-1) printk(KERN_WARNING "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", - sb->generation, sb->cache_generation); + btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d565895..47c1ba1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) rcu_read_unlock(); } -/* simple helper to search for an existing extent at a given offset */ -int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) +/* simple helper to search for an existing data extent at a given offset */ +int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len) { int ret; struct btrfs_key key; @@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) key.type = BTRFS_EXTENT_ITEM_KEY; ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 0, 0); - if (ret > 0) { - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid == start && - key.type == BTRFS_METADATA_ITEM_KEY) - ret = 0; - } btrfs_free_path(path); return ret; } @@ -786,7 +780,6 @@ search_again: else key.type = BTRFS_EXTENT_ITEM_KEY; -again: ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 0); if (ret < 0) @@ -802,13 +795,6 @@ again: key.offset == root->nodesize) ret = 0; } - if (ret) { - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = root->nodesize; - btrfs_release_path(path); - goto again; - } } if (ret == 0) { diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 783a943..84a2d18 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, ret = 0; fail: while (ret < 0 && !list_empty(&tmplist)) { - sums = list_entry(&tmplist, struct btrfs_ordered_sum, list); + sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list); list_del(&sums->list); kfree(sums); } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 5665d21..f8229ef 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -128,6 +128,26 @@ again: } /* + * take a spinning read lock. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers + */ +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb) +{ + if (atomic_read(&eb->blocking_writers)) + return 0; + + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; +} + +/* * returns 1 if we get the read lock and 0 if we don't * this won't wait for blocking writers */ @@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb) atomic_read(&eb->blocking_readers)) return 0; - if (!write_trylock(&eb->lock)) - return 0; - + write_lock(&eb->lock); if (atomic_read(&eb->blocking_writers) || atomic_read(&eb->blocking_readers)) { write_unlock(&eb->lock); diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index b81e0e9..c44a9d5 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); int btrfs_try_tree_read_lock(struct extent_buffer *eb); int btrfs_try_tree_write_lock(struct extent_buffer *eb); +int btrfs_tree_read_lock_atomic(struct extent_buffer *eb); + static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index a2b97ef..54bd91e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2151,6 +2151,7 @@ static void __exit exit_btrfs_fs(void) extent_map_exit(); extent_io_exit(); btrfs_interface_exit(); + btrfs_end_io_wq_exit(); unregister_filesystem(&btrfs_fs_type); btrfs_exit_sysfs(); btrfs_cleanup_fs_uuids(); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1475979..286213c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * is this extent already allocated in the extent * allocation tree? If so, just add a reference */ - ret = btrfs_lookup_extent(root, ins.objectid, + ret = btrfs_lookup_data_extent(root, ins.objectid, ins.offset); if (ret == 0) { ret = btrfs_inc_extent_ref(trans, root, diff --git a/fs/buffer.c b/fs/buffer.c index 6c48f20e..20805db 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -128,21 +128,15 @@ __clear_page_buffers(struct page *page) page_cache_release(page); } - -static int quiet_error(struct buffer_head *bh) -{ - if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit()) - return 0; - return 1; -} - - -static void buffer_io_error(struct buffer_head *bh) +static void buffer_io_error(struct buffer_head *bh, char *msg) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", + + if (!test_bit(BH_Quiet, &bh->b_state)) + printk_ratelimited(KERN_ERR + "Buffer I/O error on dev %s, logical block %llu%s\n", bdevname(bh->b_bdev, b), - (unsigned long long)bh->b_blocknr); + (unsigned long long)bh->b_blocknr, msg); } /* @@ -177,17 +171,10 @@ EXPORT_SYMBOL(end_buffer_read_sync); void end_buffer_write_sync(struct buffer_head *bh, int uptodate) { - char b[BDEVNAME_SIZE]; - if (uptodate) { set_buffer_uptodate(bh); } else { - if (!quiet_error(bh)) { - buffer_io_error(bh); - printk(KERN_WARNING "lost page write due to " - "I/O error on %s\n", - bdevname(bh->b_bdev, b)); - } + buffer_io_error(bh, ", lost sync page write"); set_buffer_write_io_error(bh); clear_buffer_uptodate(bh); } @@ -304,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) set_buffer_uptodate(bh); } else { clear_buffer_uptodate(bh); - if (!quiet_error(bh)) - buffer_io_error(bh); + buffer_io_error(bh, ", async page read"); SetPageError(page); } @@ -353,7 +339,6 @@ still_busy: */ void end_buffer_async_write(struct buffer_head *bh, int uptodate) { - char b[BDEVNAME_SIZE]; unsigned long flags; struct buffer_head *first; struct buffer_head *tmp; @@ -365,12 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!quiet_error(bh)) { - buffer_io_error(bh); - printk(KERN_WARNING "lost page write due to " - "I/O error on %s\n", - bdevname(bh->b_bdev, b)); - } + buffer_io_error(bh, ", lost async page write"); set_bit(AS_EIO, &page->mapping->flags); set_buffer_write_io_error(bh); clear_buffer_uptodate(bh); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 659f2ea..cefca66 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, for (i = 0; i < CEPH_CAP_BITS; i++) if ((dirty & (1 << i)) && - flush_tid == ci->i_cap_flush_tid[i]) + (u16)flush_tid == ci->i_cap_flush_tid[i]) cleaned |= 1 << i; dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s," diff --git a/fs/dcache.c b/fs/dcache.c index a6c5d7e..e368d4f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -777,6 +777,7 @@ restart: struct dentry *parent = lock_parent(dentry); if (likely(!dentry->d_lockref.count)) { __dentry_kill(dentry); + dput(parent); goto restart; } if (parent) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 7015db0..eb742d0 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -1354,13 +1354,6 @@ set_qf_format: "not specified."); return 0; } - } else { - if (sbi->s_jquota_fmt) { - ext3_msg(sb, KERN_ERR, "error: journaled quota format " - "specified with no journaling " - "enabled."); - return 0; - } } #endif return 1; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 37043d0..0b16fb4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3603,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, } } - allocated = ext4_split_extent(handle, inode, ppath, - &split_map, split_flag, flags); - if (allocated < 0) - err = allocated; - + err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, + flags); + if (err > 0) + err = 0; out: /* If we have gotten a failure, don't zero out status tree */ if (!err) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index aca7b24..8131be8 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); } + iocb->private = &overwrite; if (o_direct) { blk_start_plug(&plug); - iocb->private = &overwrite; /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !aio_mutex && diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 8012a5d..ac644c3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -887,6 +887,10 @@ got: struct buffer_head *block_bitmap_bh; block_bitmap_bh = ext4_read_block_bitmap(sb, group); + if (!block_bitmap_bh) { + err = -EIO; + goto out; + } BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); err = ext4_journal_get_write_access(handle, block_bitmap_bh); if (err) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e9777f9..3356ab5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4959,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) if (val) ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); else { - jbd2_journal_flush(journal); + err = jbd2_journal_flush(journal); + if (err < 0) { + jbd2_journal_unlock_updates(journal); + ext4_inode_resume_unlocked_dio(inode); + return err; + } ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); } ext4_set_aops(inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 123798c..4262118 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1816,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; ext4fs_dirhash(name, namelen, &hinfo); + memset(frames, 0, sizeof(frames)); frame = frames; frame->entries = entries; frame->at = entries; frame->bh = bh; bh = bh2; - ext4_handle_dirty_dx_node(handle, dir, frame->bh); - ext4_handle_dirty_dirent_node(handle, dir, bh); + retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh); + if (retval) + goto out_frames; + retval = ext4_handle_dirty_dirent_node(handle, dir, bh); + if (retval) + goto out_frames; de = do_split(handle,dir, &bh, frame, &hinfo); if (IS_ERR(de)) { - /* - * Even if the block split failed, we have to properly write - * out all the changes we did so far. Otherwise we can end up - * with corrupted filesystem. - */ - ext4_mark_inode_dirty(handle, dir); - dx_release(frames); - return PTR_ERR(de); + retval = PTR_ERR(de); + goto out_frames; } dx_release(frames); retval = add_dirent_to_buf(handle, dentry, inode, de, bh); brelse(bh); return retval; +out_frames: + /* + * Even if the block split failed, we have to properly write + * out all the changes we did so far. Otherwise we can end up + * with corrupted filesystem. + */ + ext4_mark_inode_dirty(handle, dir); + dx_release(frames); + return retval; } /* diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f298c60..ca45883 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data, break; if (meta_bg == 0) - backup_block = group * bpg + blk_off; + backup_block = ((ext4_fsblk_t)group) * bpg + blk_off; else backup_block = (ext4_group_first_block_no(sb, group) + ext4_bg_has_super(sb, group)); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1eda6ab..2c9e686 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3526,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_EXT4_FS_POSIX_ACL set_opt(sb, POSIX_ACL); #endif + /* don't forget to enable journal_csum when metadata_csum is enabled. */ + if (ext4_has_metadata_csum(sb)) + set_opt(sb, JOURNAL_CHECKSUM); + if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) set_opt(sb, JOURNAL_DATA); else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) @@ -3943,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && !(sb->s_flags & MS_RDONLY)) if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) - goto failed_mount3; + goto failed_mount3a; /* * The first inode we look at is the journal inode. Don't try @@ -3952,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!test_opt(sb, NOLOAD) && EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { if (ext4_load_journal(sb, es, journal_devnum)) - goto failed_mount3; + goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { ext4_msg(sb, KERN_ERR, "required journal recovery " @@ -4240,6 +4244,7 @@ failed_mount_wq: jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } +failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: del_timer_sync(&sbi->s_err_report); @@ -4841,6 +4846,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ + test_opt(sb, JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "changing journal_checksum " + "during remount not supported"); + err = -EINVAL; + goto restore_opts; + } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { if (test_opt2(sb, EXPLICIT_DELALLOC)) { ext4_msg(sb, KERN_ERR, "can't mount with " diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index fe839b9..d67a16f 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -174,27 +174,6 @@ struct iso9660_options{ * Compute the hash for the isofs name corresponding to the dentry. */ static int -isofs_hash_common(struct qstr *qstr, int ms) -{ - const char *name; - int len; - - len = qstr->len; - name = qstr->name; - if (ms) { - while (len && name[len-1] == '.') - len--; - } - - qstr->hash = full_name_hash(name, len); - - return 0; -} - -/* - * Compute the hash for the isofs name corresponding to the dentry. - */ -static int isofs_hashi_common(struct qstr *qstr, int ms) { const char *name; @@ -263,6 +242,27 @@ isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry, } #ifdef CONFIG_JOLIET +/* + * Compute the hash for the isofs name corresponding to the dentry. + */ +static int +isofs_hash_common(struct qstr *qstr, int ms) +{ + const char *name; + int len; + + len = qstr->len; + name = qstr->name; + if (ms) { + while (len && name[len-1] == '.') + len--; + } + + qstr->hash = full_name_hash(name, len); + + return 0; +} + static int isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr) { diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 8898bbd..dcead63 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -93,6 +93,7 @@ #include <linux/bio.h> #endif #include <linux/log2.h> +#include <linux/hash.h> static struct kmem_cache *revoke_record_cache; static struct kmem_cache *revoke_table_cache; @@ -129,15 +130,11 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int); /* Utility functions to maintain the revoke table */ -/* Borrowed from buffer.c: this is a tried and tested block hash function */ static inline int hash(journal_t *journal, unsigned int block) { struct jbd_revoke_table_s *table = journal->j_revoke; - int hash_shift = table->hash_shift; - return ((block << (hash_shift - 6)) ^ - (block >> 13) ^ - (block << (hash_shift - 12))) & (table->hash_size - 1); + return hash_32(block, table->hash_shift); } static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index d5e95a1..c6cbaef 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -92,6 +92,7 @@ #include <linux/init.h> #include <linux/bio.h> #include <linux/log2.h> +#include <linux/hash.h> #endif static struct kmem_cache *jbd2_revoke_record_cache; @@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int); /* Utility functions to maintain the revoke table */ -/* Borrowed from buffer.c: this is a tried and tested block hash function */ static inline int hash(journal_t *journal, unsigned long long block) { - struct jbd2_revoke_table_s *table = journal->j_revoke; - int hash_shift = table->hash_shift; - int hash = (int)block ^ (int)((block >> 31) >> 1); - - return ((hash << (hash_shift - 6)) ^ - (hash >> 13) ^ - (hash << (hash_shift - 12))) & (table->hash_size - 1); + return hash_64(block, journal->j_revoke->hash_shift); } static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr, @@ -3154,7 +3154,8 @@ static int do_tmpfile(int dfd, struct filename *pathname, if (error) goto out2; audit_inode(pathname, nd->path.dentry, 0); - error = may_open(&nd->path, op->acc_mode, op->open_flag); + /* Don't check for other permissions, the inode was just created */ + error = may_open(&nd->path, MAY_OPEN, op->open_flag); if (error) goto out2; file->f_path.mnt = nd->path.mnt; diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 5228f20..4f46f7a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) loff_t offset = header->args.offset; size_t count = header->args.count; struct page **pages = header->args.pages; - int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; + int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT; unsigned int pg_len; struct blk_plug plug; int i; diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 2a15fa4..dbe5839 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); + mutex_lock(&nn->bl_mutex); bl_pipe_msg.bl_wq = &nn->bl_wq; b->simple.len += 4; /* single volume */ if (b->simple.len > PAGE_SIZE) - return -EIO; + goto out_unlock; memset(msg, 0, sizeof(*msg)); msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out; + goto out_free_data; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT, @@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, rc = rpc_queue_upcall(nn->bl_device_pipe, msg); if (rc < 0) { remove_wait_queue(&nn->bl_wq, &wq); - goto out; + goto out_free_data; } set_current_state(TASK_UNINTERRUPTIBLE); @@ -97,12 +98,14 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, if (reply->status != BL_DEVICE_REQUEST_PROC) { printk(KERN_WARNING "%s failed to decode device: %d\n", __func__, reply->status); - goto out; + goto out_free_data; } dev = MKDEV(reply->major, reply->minor); -out: +out_free_data: kfree(msg->data); +out_unlock: + mutex_unlock(&nn->bl_mutex); return dev; } @@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net) struct nfs_net *nn = net_generic(net, nfs_net_id); struct dentry *dentry; + mutex_init(&nn->bl_mutex); init_waitqueue_head(&nn->bl_wq); nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); if (IS_ERR(nn->bl_device_pipe)) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 5853f53..7f3f606 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -125,6 +125,8 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; + if (!nfs4_valid_open_stateid(state)) + continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); @@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation * { int res = 0; - res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync); + if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + res = nfs4_proc_delegreturn(inode, + delegation->cred, + &delegation->stateid, + issync); nfs_free_delegation(delegation); return res; } @@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_inode *nfsi = NFS_I(inode); - int err; + int err = 0; if (delegation == NULL) return 0; do { + if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + break; err = nfs_delegation_claim_opens(inode, &delegation->stateid); if (!issync || err != -EAGAIN) break; @@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } +static void nfs_revoke_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL) { + set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + nfs_mark_return_delegation(NFS_SERVER(inode), delegation); + } + rcu_read_unlock(); +} + void nfs_remove_bad_delegation(struct inode *inode) { struct nfs_delegation *delegation; + nfs_revoke_delegation(inode); delegation = nfs_inode_detach_delegation(inode); if (delegation) { nfs_inode_find_state_and_recover(inode, &delegation->stateid); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 5c1cce3..e3c20a3 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -31,6 +31,7 @@ enum { NFS_DELEGATION_RETURN_IF_CLOSED, NFS_DELEGATION_REFERENCED, NFS_DELEGATION_RETURNING, + NFS_DELEGATION_REVOKED, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 105ccc3..9b0c55c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, case -ENOENT: d_drop(dentry); d_add(dentry, NULL); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); break; case -EISDIR: case -ENOTDIR: diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 20cffc8..10bf072 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref) { struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); + nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo); if (dreq->l_ctx != NULL) nfs_put_lock_context(dreq->l_ctx); if (dreq->ctx != NULL) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 46fab1cb..7afb52f 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6388a59..00689a8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -626,7 +626,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; - int err; + int err = 0; trace_nfs_getattr_enter(inode); /* Flush out writes to the server in order to update c/mtime. */ diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index ef221fb..f0e06e4 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -19,6 +19,7 @@ struct nfs_net { struct rpc_pipe *bl_device_pipe; struct bl_dev_msg bl_mount_reply; wait_queue_head_t bl_wq; + struct mutex bl_mutex; struct list_head nfs_client_list; struct list_head nfs_volume_list; #if IS_ENABLED(CONFIG_NFS_V4) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405bd95..69dc20a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -370,11 +370,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) { - nfs_remove_bad_delegation(inode); - exception->retry = 1; - break; - } if (state == NULL) break; ret = nfs4_schedule_stateid_recovery(server, state); @@ -1654,7 +1649,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct nfs_inode_find_state_and_recover(state->inode, stateid); nfs4_schedule_stateid_recovery(server, state); - return 0; + return -EAGAIN; case -NFS4ERR_DELAY: case -NFS4ERR_GRACE: set_bit(NFS_DELEGATED_STATE, &state->flags); @@ -2109,46 +2104,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state) +{ + nfs_remove_bad_delegation(state->inode); + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); + write_sequnlock(&state->seqlock); + clear_bit(NFS_DELEGATED_STATE, &state->flags); +} + +static void nfs40_clear_delegation_stateid(struct nfs4_state *state) +{ + if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL) + nfs_finish_clear_delegation_stateid(state); +} + +static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + /* NFSv4.0 doesn't allow for delegation recovery on open expire */ + nfs40_clear_delegation_stateid(state); + return nfs4_open_expired(sp, state); +} + #if defined(CONFIG_NFS_V4_1) -static void nfs41_clear_delegation_stateid(struct nfs4_state *state) +static void nfs41_check_delegation_stateid(struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); - nfs4_stateid *stateid = &state->stateid; + nfs4_stateid stateid; struct nfs_delegation *delegation; - struct rpc_cred *cred = NULL; - int status = -NFS4ERR_BAD_STATEID; - - /* If a state reset has been done, test_stateid is unneeded */ - if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - return; + struct rpc_cred *cred; + int status; /* Get the delegation credential for use by test/free_stateid */ rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); - if (delegation != NULL && - nfs4_stateid_match(&delegation->stateid, stateid)) { - cred = get_rpccred(delegation->cred); - rcu_read_unlock(); - status = nfs41_test_stateid(server, stateid, cred); - trace_nfs4_test_delegation_stateid(state, NULL, status); - } else + if (delegation == NULL) { rcu_read_unlock(); + return; + } + + nfs4_stateid_copy(&stateid, &delegation->stateid); + cred = get_rpccred(delegation->cred); + rcu_read_unlock(); + status = nfs41_test_stateid(server, &stateid, cred); + trace_nfs4_test_delegation_stateid(state, NULL, status); if (status != NFS_OK) { /* Free the stateid unless the server explicitly * informs us the stateid is unrecognized. */ if (status != -NFS4ERR_BAD_STATEID) - nfs41_free_stateid(server, stateid, cred); - nfs_remove_bad_delegation(state->inode); - - write_seqlock(&state->seqlock); - nfs4_stateid_copy(&state->stateid, &state->open_stateid); - write_sequnlock(&state->seqlock); - clear_bit(NFS_DELEGATED_STATE, &state->flags); + nfs41_free_stateid(server, &stateid, cred); + nfs_finish_clear_delegation_stateid(state); } - if (cred != NULL) - put_rpccred(cred); + put_rpccred(cred); } /** @@ -2192,7 +2201,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st { int status; - nfs41_clear_delegation_stateid(state); + nfs41_check_delegation_stateid(state); status = nfs41_check_open_stateid(state); if (status != NFS_OK) status = nfs4_open_expired(sp, state); @@ -2231,19 +2240,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); ret = _nfs4_proc_open(opendata); - if (ret != 0) { - if (ret == -ENOENT) { - dentry = opendata->dentry; - if (dentry->d_inode) - d_delete(dentry); - else if (d_unhashed(dentry)) - d_add(dentry, NULL); - - nfs_set_verifier(dentry, - nfs_save_change_attribute(opendata->dir->d_inode)); - } + if (ret != 0) goto out; - } state = nfs4_opendata_to_nfs4_state(opendata); ret = PTR_ERR(state); @@ -4841,9 +4839,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -8341,7 +8336,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, + .recover_open = nfs40_open_expired, .recover_lock = nfs4_lock_expired, .establish_clid = nfs4_init_clientid, }; @@ -8408,8 +8403,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1 - | NFS_CAP_SEEK, + | NFS_CAP_ATOMIC_OPEN_V1, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, @@ -8431,7 +8425,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 - | NFS_CAP_ATOMIC_OPEN_V1, + | NFS_CAP_ATOMIC_OPEN_V1 + | NFS_CAP_SEEK, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1249384..f83b02d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -715,8 +715,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) nfs_release_request(req); - else - WARN_ON_ONCE(1); } static void diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index cdeb3cf..0beb023 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1272,7 +1272,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp) */ if (argp->opcnt == resp->opcnt) return false; - + if (next->opnum == OP_ILLEGAL) + return false; nextd = OPDESC(next); /* * Rest of 2.6.3.1.1: certain operations will return WRONGSEC @@ -1589,7 +1590,8 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - return NFS4_MAX_SESSIONID_LEN + 20; + return (op_encode_hdr_size + + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32); } static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) @@ -1893,6 +1895,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_func = (nfsd4op_func)nfsd4_sequence, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", + .op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize, }, [OP_DESTROY_CLIENTID] = { .op_func = (nfsd4op_func)nfsd4_destroy_clientid, diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 7001299..41e39102 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, &fsnotify_mark_srcu); } + /* + * We need to merge inode & vfsmount mark lists so that inode mark + * ignore masks are properly reflected for mount mark notifications. + * That's why this traversal is so complicated... + */ while (inode_node || vfsmount_node) { - inode_group = vfsmount_group = NULL; + inode_group = NULL; + inode_mark = NULL; + vfsmount_group = NULL; + vfsmount_mark = NULL; if (inode_node) { inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu), @@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, vfsmount_group = vfsmount_mark->group; } - if (inode_group > vfsmount_group) { - /* handle inode */ - ret = send_to_group(to_tell, inode_mark, NULL, mask, - data, data_is, cookie, file_name); - /* we didn't use the vfsmount_mark */ - vfsmount_group = NULL; - } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, - data, data_is, cookie, file_name); - inode_group = NULL; - } else { - ret = send_to_group(to_tell, inode_mark, vfsmount_mark, - mask, data, data_is, cookie, - file_name); + if (inode_group && vfsmount_group) { + int cmp = fsnotify_compare_groups(inode_group, + vfsmount_group); + if (cmp > 0) { + inode_group = NULL; + inode_mark = NULL; + } else if (cmp < 0) { + vfsmount_group = NULL; + vfsmount_mark = NULL; + } } + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, + data, data_is, cookie, file_name); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 9c0898c..3b68b0a 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; +/* compare two groups for sorting of marks lists */ +extern int fsnotify_compare_groups(struct fsnotify_group *a, + struct fsnotify_group *b); + extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, __u32 mask); /* add a mark to an inode */ diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 9ce0622..dfbf544 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, { struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_INODE; @@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list); @@ -288,20 +286,25 @@ void fsnotify_unmount_inodes(struct list_head *list) spin_unlock(&inode->i_lock); /* In case the dropping of a reference would nuke next_i. */ - if ((&next_i->i_sb_list != list) && - atomic_read(&next_i->i_count)) { + while (&next_i->i_sb_list != list) { spin_lock(&next_i->i_lock); - if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { + if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) && + atomic_read(&next_i->i_count)) { __iget(next_i); need_iput = next_i; + spin_unlock(&next_i->i_lock); + break; } spin_unlock(&next_i->i_lock); + next_i = list_entry(next_i->i_sb_list.next, + struct inode, i_sb_list); } /* - * We can safely drop inode_sb_list_lock here because we hold - * references on both inode and next_i. Also no new inodes - * will be added since the umount has begun. + * We can safely drop inode_sb_list_lock here because either + * we actually hold references on both inode and next_i or + * end of list. Also no new inodes will be added since the + * umount has begun. */ spin_unlock(&inode_sb_list_lock); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index d90deaa..34c38fa 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas } /* + * Sorting function for lists of fsnotify marks. + * + * Fanotify supports different notification classes (reflected as priority of + * notification group). Events shall be passed to notification groups in + * decreasing priority order. To achieve this marks in notification lists for + * inodes and vfsmounts are sorted so that priorities of corresponding groups + * are descending. + * + * Furthermore correct handling of the ignore mask requires processing inode + * and vfsmount marks of each group together. Using the group address as + * further sort criterion provides a unique sorting order and thus we can + * merge inode and vfsmount lists of marks in linear time and find groups + * present in both lists. + * + * A return value of 1 signifies that b has priority over a. + * A return value of 0 signifies that the two marks have to be handled together. + * A return value of -1 signifies that a has priority over b. + */ +int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) +{ + if (a == b) + return 0; + if (!a) + return 1; + if (!b) + return -1; + if (a->priority < b->priority) + return 1; + if (a->priority > b->priority) + return -1; + if (a < b) + return 1; + return -1; +} + +/* * Attach an initialized mark to a given group and fs object. * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index ac851e8..faefa72 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, struct mount *m = real_mount(mnt); struct fsnotify_mark *lmark, *last = NULL; int ret = 0; + int cmp; mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; @@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, goto out; } - if (mark->group->priority < lmark->group->priority) - continue; - - if ((mark->group->priority == lmark->group->priority) && - (mark->group < lmark->group)) + cmp = fsnotify_compare_groups(lmark->group, mark->group); + if (cmp < 0) continue; hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 97de0fb..a960440 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, size_t veclen, size_t total) { int ret; - struct msghdr msg; + struct msghdr msg = {.msg_flags = 0,}; if (sock == NULL) { ret = -EINVAL; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 8add6f1..b931e04 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -158,7 +158,7 @@ bail_add: * NOTE: This dentry already has ->d_op set from * ocfs2_get_parent() and ocfs2_get_dentry() */ - if (ret) + if (!IS_ERR_OR_NULL(ret)) dentry = ret; status = ocfs2_dentry_attach_lock(dentry, inode, diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index e601259..3435581 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -1,4 +1,4 @@ -config OVERLAYFS_FS +config OVERLAY_FS tristate "Overlay filesystem support" help An overlay filesystem combines two filesystems - an 'upper' filesystem diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile index 8f91889..900daed 100644 --- a/fs/overlayfs/Makefile +++ b/fs/overlayfs/Makefile @@ -2,6 +2,6 @@ # Makefile for the overlay filesystem. # -obj-$(CONFIG_OVERLAYFS_FS) += overlayfs.o +obj-$(CONFIG_OVERLAY_FS) += overlay.o -overlayfs-objs := super.o inode.o dir.o readdir.o copy_up.o +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 15cd91a..8ffc4b9 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -284,8 +284,7 @@ out: return ERR_PTR(err); } -static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, - enum ovl_path_type type) +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) { int err; struct dentry *ret = NULL; @@ -294,8 +293,17 @@ static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry, err = ovl_check_empty_dir(dentry, &list); if (err) ret = ERR_PTR(err); - else if (type == OVL_PATH_MERGE) - ret = ovl_clear_empty(dentry, &list); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } ovl_cache_free(&list); @@ -487,8 +495,7 @@ out: return err; } -static int ovl_remove_and_whiteout(struct dentry *dentry, - enum ovl_path_type type, bool is_dir) +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) { struct dentry *workdir = ovl_workdir(dentry); struct inode *wdir = workdir->d_inode; @@ -500,7 +507,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, int err; if (is_dir) { - opaquedir = ovl_check_empty_and_clear(dentry, type); + opaquedir = ovl_check_empty_and_clear(dentry); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) goto out; @@ -515,9 +522,10 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, if (IS_ERR(whiteout)) goto out_unlock; - if (type == OVL_PATH_LOWER) { + upper = ovl_dentry_upper(dentry); + if (!upper) { upper = lookup_one_len(dentry->d_name.name, upperdir, - dentry->d_name.len); + dentry->d_name.len); err = PTR_ERR(upper); if (IS_ERR(upper)) goto kill_whiteout; @@ -529,7 +537,6 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, } else { int flags = 0; - upper = ovl_dentry_upper(dentry); if (opaquedir) upper = opaquedir; err = -ESTALE; @@ -648,7 +655,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir) cap_raise(override_cred->cap_effective, CAP_CHOWN); old_cred = override_creds(override_cred); - err = ovl_remove_and_whiteout(dentry, type, is_dir); + err = ovl_remove_and_whiteout(dentry, is_dir); revert_creds(old_cred); put_cred(override_cred); @@ -781,7 +788,7 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old, } if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) { - opaquedir = ovl_check_empty_and_clear(new, new_type); + opaquedir = ovl_check_empty_and_clear(new); err = PTR_ERR(opaquedir); if (IS_ERR(opaquedir)) { opaquedir = NULL; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index af2d18c..07d74b2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -235,26 +235,36 @@ out: return err; } +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode); +} + ssize_t ovl_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) return -ENODATA; - return vfs_getxattr(ovl_dentry_real(dentry), name, value, size); + return vfs_getxattr(realpath.dentry, name, value, size); } ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); ssize_t res; int off; - res = vfs_listxattr(ovl_dentry_real(dentry), list, size); + res = vfs_listxattr(realpath.dentry, list, size); if (res <= 0 || size == 0) return res; - if (ovl_path_type(dentry->d_parent) != OVL_PATH_MERGE) + if (!ovl_need_xattr_filter(dentry, type)) return res; /* filter out private xattrs */ @@ -279,17 +289,16 @@ int ovl_removexattr(struct dentry *dentry, const char *name) { int err; struct path realpath; - enum ovl_path_type type; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); err = ovl_want_write(dentry); if (err) goto out; - if (ovl_path_type(dentry->d_parent) == OVL_PATH_MERGE && - ovl_is_private_xattr(name)) + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) goto out_drop_write; - type = ovl_path_real(dentry, &realpath); if (type == OVL_PATH_LOWER) { err = vfs_getxattr(realpath.dentry, name, NULL, 0); if (err < 0) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 301f64a..c020599 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -168,7 +168,7 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) { struct ovl_dir_cache *cache = od->cache; - list_del(&od->cursor.l_node); + list_del_init(&od->cursor.l_node); WARN_ON(cache->refcount <= 0); cache->refcount--; if (!cache->refcount) { @@ -276,11 +276,11 @@ static int ovl_dir_mark_whiteouts(struct dentry *dir, return 0; } -static inline int ovl_dir_read_merged(struct path *upperpath, - struct path *lowerpath, - struct list_head *list) +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) { int err; + struct path lowerpath; + struct path upperpath; struct ovl_readdir_data rdd = { .ctx.actor = ovl_fill_merge, .list = list, @@ -288,25 +288,28 @@ static inline int ovl_dir_read_merged(struct path *upperpath, .is_merge = false, }; - if (upperpath->dentry) { - err = ovl_dir_read(upperpath, &rdd); + ovl_path_lower(dentry, &lowerpath); + ovl_path_upper(dentry, &upperpath); + + if (upperpath.dentry) { + err = ovl_dir_read(&upperpath, &rdd); if (err) goto out; - if (lowerpath->dentry) { - err = ovl_dir_mark_whiteouts(upperpath->dentry, &rdd); + if (lowerpath.dentry) { + err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd); if (err) goto out; } } - if (lowerpath->dentry) { + if (lowerpath.dentry) { /* * Insert lowerpath entries before upperpath ones, this allows * offsets to be reasonably constant */ list_add(&rdd.middle, rdd.list); rdd.is_merge = true; - err = ovl_dir_read(lowerpath, &rdd); + err = ovl_dir_read(&lowerpath, &rdd); list_del(&rdd.middle); } out: @@ -331,8 +334,6 @@ static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) { int res; - struct path lowerpath; - struct path upperpath; struct ovl_dir_cache *cache; cache = ovl_dir_cache(dentry); @@ -349,10 +350,7 @@ static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) cache->refcount = 1; INIT_LIST_HEAD(&cache->entries); - ovl_path_lower(dentry, &lowerpath); - ovl_path_upper(dentry, &upperpath); - - res = ovl_dir_read_merged(&upperpath, &lowerpath, &cache->entries); + res = ovl_dir_read_merged(dentry, &cache->entries); if (res) { ovl_cache_free(&cache->entries); kfree(cache); @@ -454,10 +452,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, /* * Need to check if we started out being a lower dir, but got copied up */ - if (!od->is_upper && ovl_path_type(dentry) == OVL_PATH_MERGE) { + if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) { struct inode *inode = file_inode(file); - realfile =lockless_dereference(od->upperfile); + realfile = lockless_dereference(od->upperfile); if (!realfile) { struct path upperpath; @@ -540,14 +538,9 @@ const struct file_operations ovl_dir_operations = { int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) { int err; - struct path lowerpath; - struct path upperpath; struct ovl_cache_entry *p; - ovl_path_upper(dentry, &upperpath); - ovl_path_lower(dentry, &lowerpath); - - err = ovl_dir_read_merged(&upperpath, &lowerpath, list); + err = ovl_dir_read_merged(dentry, list); if (err) return err; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 08b704c..f16d318 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -24,7 +24,7 @@ MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); -#define OVERLAYFS_SUPER_MAGIC 0x794c764f +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 struct ovl_config { char *lowerdir; @@ -84,12 +84,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry) static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) { - struct dentry *upperdentry = ACCESS_ONCE(oe->__upperdentry); - /* - * Make sure to order reads to upperdentry wrt ovl_dentry_update() - */ - smp_read_barrier_depends(); - return upperdentry; + return lockless_dereference(oe->__upperdentry); } void ovl_path_upper(struct dentry *dentry, struct path *path) @@ -462,11 +457,34 @@ static const match_table_t ovl_tokens = { {OPT_ERR, NULL} }; +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + static int ovl_parse_opt(char *opt, struct ovl_config *config) { char *p; - while ((p = strsep(&opt, ",")) != NULL) { + while ((p = ovl_next_opt(&opt)) != NULL) { int token; substring_t args[MAX_OPT_ARGS]; @@ -554,15 +572,34 @@ out_dput: goto out_unlock; } +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + static int ovl_mount_dir(const char *name, struct path *path) { int err; + char *tmp = kstrdup(name, GFP_KERNEL); + + if (!tmp) + return -ENOMEM; - err = kern_path(name, LOOKUP_FOLLOW, path); + ovl_unescape(tmp); + err = kern_path(tmp, LOOKUP_FOLLOW, path); if (err) { - pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err); err = -EINVAL; } + kfree(tmp); return err; } @@ -776,11 +813,11 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, static struct file_system_type ovl_fs_type = { .owner = THIS_MODULE, - .name = "overlayfs", + .name = "overlay", .mount = ovl_mount, .kill_sb = kill_anon_super, }; -MODULE_ALIAS_FS("overlayfs"); +MODULE_ALIAS_FS("overlay"); static int __init ovl_init(void) { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 8b663b2..6b45272 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -634,7 +634,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type) dqstats_inc(DQST_LOOKUPS); err = sb->dq_op->write_dquot(dquot); if (!ret && err) - err = ret; + ret = err; dqput(dquot); spin_lock(&dq_list_lock); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 92e8f99..2810026 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1338,7 +1338,10 @@ xfs_free_file_space( goto out; } - +/* + * Preallocate and zero a range of a file. This mechanism has the allocation + * semantics of fallocate and in addition converts data in the range to zeroes. + */ int xfs_zero_file_space( struct xfs_inode *ip, @@ -1346,65 +1349,30 @@ xfs_zero_file_space( xfs_off_t len) { struct xfs_mount *mp = ip->i_mount; - uint granularity; - xfs_off_t start_boundary; - xfs_off_t end_boundary; + uint blksize; int error; trace_xfs_zero_file_space(ip); - granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); + blksize = 1 << mp->m_sb.sb_blocklog; /* - * Round the range of extents we are going to convert inwards. If the - * offset is aligned, then it doesn't get changed so we zero from the - * start of the block offset points to. + * Punch a hole and prealloc the range. We use hole punch rather than + * unwritten extent conversion for two reasons: + * + * 1.) Hole punch handles partial block zeroing for us. + * + * 2.) If prealloc returns ENOSPC, the file range is still zero-valued + * by virtue of the hole punch. */ - start_boundary = round_up(offset, granularity); - end_boundary = round_down(offset + len, granularity); - - ASSERT(start_boundary >= offset); - ASSERT(end_boundary <= offset + len); - - if (start_boundary < end_boundary - 1) { - /* - * Writeback the range to ensure any inode size updates due to - * appending writes make it to disk (otherwise we could just - * punch out the delalloc blocks). - */ - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - start_boundary, end_boundary - 1); - if (error) - goto out; - truncate_pagecache_range(VFS_I(ip), start_boundary, - end_boundary - 1); - - /* convert the blocks */ - error = xfs_alloc_file_space(ip, start_boundary, - end_boundary - start_boundary - 1, - XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT); - if (error) - goto out; - - /* We've handled the interior of the range, now for the edges */ - if (start_boundary != offset) { - error = xfs_iozero(ip, offset, start_boundary - offset); - if (error) - goto out; - } - - if (end_boundary != offset + len) - error = xfs_iozero(ip, end_boundary, - offset + len - end_boundary); - - } else { - /* - * It's either a sub-granularity range or the range spanned lies - * partially across two adjacent blocks. - */ - error = xfs_iozero(ip, offset, len); - } + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out; + error = xfs_alloc_file_space(ip, round_down(offset, blksize), + round_up(offset + len, blksize) - + round_down(offset, blksize), + XFS_BMAPI_PREALLOC); out: return error; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index f1deb96..894924a 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk( XFS_WANT_CORRUPTED_RETURN(stat == 1); /* Check if the record contains the inode in request */ - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) - return -EINVAL; + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { + *icount = 0; + return 0; + } idx = agino - irec->ir_startino + 1; if (idx < XFS_INODES_PER_CHUNK && @@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk( #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) +struct xfs_bulkstat_agichunk { + char __user **ac_ubuffer;/* pointer into user's buffer */ + int ac_ubleft; /* bytes left in user's buffer */ + int ac_ubelem; /* spaces used in user's buffer */ +}; + /* * Process inodes in chunk with a pointer to a formatter function * that will iget the inode and fill in the appropriate structure. */ -int +static int xfs_bulkstat_ag_ichunk( struct xfs_mount *mp, xfs_agnumber_t agno, struct xfs_inobt_rec_incore *irbp, bulkstat_one_pf formatter, size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp) + struct xfs_bulkstat_agichunk *acp, + xfs_agino_t *last_agino) { - xfs_ino_t lastino = acp->ac_lastino; char __user **ubufp = acp->ac_ubuffer; - int ubleft = acp->ac_ubleft; - int ubelem = acp->ac_ubelem; - int chunkidx, clustidx; + int chunkidx; int error = 0; - xfs_agino_t agino; + xfs_agino_t agino = irbp->ir_startino; - for (agino = irbp->ir_startino, chunkidx = clustidx = 0; - XFS_BULKSTAT_UBLEFT(ubleft) && - irbp->ir_freecount < XFS_INODES_PER_CHUNK; - chunkidx++, clustidx++, agino++) { - int fmterror; /* bulkstat formatter result */ + for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; + chunkidx++, agino++) { + int fmterror; int ubused; - xfs_ino_t ino = XFS_AGINO_TO_INO(mp, agno, agino); - ASSERT(chunkidx < XFS_INODES_PER_CHUNK); + /* inode won't fit in buffer, we are done */ + if (acp->ac_ubleft < statstruct_size) + break; /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) { - lastino = ino; + if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) continue; - } - - /* - * Count used inodes as free so we can tell when the - * chunk is used up. - */ - irbp->ir_freecount++; /* Get the inode and fill in a single buffer */ ubused = statstruct_size; - error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror); - if (fmterror == BULKSTAT_RV_NOTHING) { - if (error && error != -ENOENT && error != -EINVAL) { - ubleft = 0; - break; - } - lastino = ino; - continue; - } - if (fmterror == BULKSTAT_RV_GIVEUP) { - ubleft = 0; + error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), + *ubufp, acp->ac_ubleft, &ubused, &fmterror); + + if (fmterror == BULKSTAT_RV_GIVEUP || + (error && error != -ENOENT && error != -EINVAL)) { + acp->ac_ubleft = 0; ASSERT(error); break; } - if (*ubufp) - *ubufp += ubused; - ubleft -= ubused; - ubelem++; - lastino = ino; + + /* be careful not to leak error if at end of chunk */ + if (fmterror == BULKSTAT_RV_NOTHING || error) { + error = 0; + continue; + } + + *ubufp += ubused; + acp->ac_ubleft -= ubused; + acp->ac_ubelem++; } - acp->ac_lastino = lastino; - acp->ac_ubleft = ubleft; - acp->ac_ubelem = ubelem; + /* + * Post-update *last_agino. At this point, agino will always point one + * inode past the last inode we processed successfully. Hence we + * substract that inode when setting the *last_agino cursor so that we + * return the correct cookie to userspace. On the next bulkstat call, + * the inode under the lastino cookie will be skipped as we have already + * processed it here. + */ + *last_agino = agino - 1; return error; } @@ -353,45 +356,33 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - int end_of_ag; /* set if we've seen the ag end */ - int error; /* error code */ - int fmterror;/* bulkstat formatter result */ - int i; /* loop index */ - int icount; /* count of inodes good in irbuf */ size_t irbsize; /* size of irec buffer in bytes */ - xfs_ino_t ino; /* inode number (filesystem) */ - xfs_inobt_rec_incore_t *irbp; /* current irec buffer pointer */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - xfs_inobt_rec_incore_t *irbufend; /* end of good irec buffer entries */ - xfs_ino_t lastino; /* last inode number returned */ int nirbuf; /* size of irbuf */ - int rval; /* return value error code */ - int tmp; /* result value from btree calls */ int ubcount; /* size of user's buffer */ - int ubleft; /* bytes left in user's buffer */ - char __user *ubufp; /* pointer into user's buffer */ - int ubelem; /* spaces used in user's buffer */ + struct xfs_bulkstat_agichunk ac; + int error = 0; /* * Get the last inode value, see if there's nothing to do. */ - ino = (xfs_ino_t)*lastinop; - lastino = ino; - agno = XFS_INO_TO_AGNO(mp, ino); - agino = XFS_INO_TO_AGINO(mp, ino); + agno = XFS_INO_TO_AGNO(mp, *lastinop); + agino = XFS_INO_TO_AGINO(mp, *lastinop); if (agno >= mp->m_sb.sb_agcount || - ino != XFS_AGINO_TO_INO(mp, agno, agino)) { + *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { *done = 1; *ubcountp = 0; return 0; } ubcount = *ubcountp; /* statstruct's */ - ubleft = ubcount * statstruct_size; /* bytes */ - *ubcountp = ubelem = 0; + ac.ac_ubuffer = &ubuffer; + ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; + ac.ac_ubelem = 0; + + *ubcountp = 0; *done = 0; - fmterror = 0; - ubufp = ubuffer; + irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); if (!irbuf) return -ENOMEM; @@ -402,9 +393,13 @@ xfs_bulkstat( * Loop over the allocation groups, starting from the last * inode returned; 0 means start of the allocation group. */ - rval = 0; - while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { - cond_resched(); + while (agno < mp->m_sb.sb_agcount) { + struct xfs_inobt_rec_incore *irbp = irbuf; + struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; + bool end_of_ag = false; + int icount = 0; + int stat; + error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); if (error) break; @@ -414,10 +409,6 @@ xfs_bulkstat( */ cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO); - irbp = irbuf; - irbufend = irbuf + nirbuf; - end_of_ag = 0; - icount = 0; if (agino > 0) { /* * In the middle of an allocation group, we need to get @@ -427,22 +418,23 @@ xfs_bulkstat( error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); if (error) - break; + goto del_cursor; if (icount) { irbp->ir_startino = r.ir_startino; irbp->ir_freecount = r.ir_freecount; irbp->ir_free = r.ir_free; irbp++; - agino = r.ir_startino + XFS_INODES_PER_CHUNK; } /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); } else { /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp); + error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); + } + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } - if (error) - break; /* * Loop through inode btree records in this ag, @@ -451,10 +443,10 @@ xfs_bulkstat( while (irbp < irbufend && icount < ubcount) { struct xfs_inobt_rec_incore r; - error = xfs_inobt_get_rec(cur, &r, &i); - if (error || i == 0) { - end_of_ag = 1; - break; + error = xfs_inobt_get_rec(cur, &r, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; } /* @@ -469,77 +461,79 @@ xfs_bulkstat( irbp++; icount += XFS_INODES_PER_CHUNK - r.ir_freecount; } - /* - * Set agino to after this chunk and bump the cursor. - */ - agino = r.ir_startino + XFS_INODES_PER_CHUNK; - error = xfs_btree_increment(cur, 0, &tmp); + error = xfs_btree_increment(cur, 0, &stat); + if (error || stat == 0) { + end_of_ag = true; + goto del_cursor; + } cond_resched(); } + /* - * Drop the btree buffers and the agi buffer. - * We can't hold any of the locks these represent - * when calling iget. + * Drop the btree buffers and the agi buffer as we can't hold any + * of the locks these represent when calling iget. If there is a + * pending error, then we are done. */ +del_cursor: xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); xfs_buf_relse(agbp); + if (error) + break; /* - * Now format all the good inodes into the user's buffer. + * Now format all the good inodes into the user's buffer. The + * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer + * for the next loop iteration. */ irbufend = irbp; for (irbp = irbuf; - irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) { - struct xfs_bulkstat_agichunk ac; - - ac.ac_lastino = lastino; - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubleft; - ac.ac_ubelem = ubelem; + irbp < irbufend && ac.ac_ubleft >= statstruct_size; + irbp++) { error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac); + formatter, statstruct_size, &ac, + &agino); if (error) - rval = error; - - lastino = ac.ac_lastino; - ubleft = ac.ac_ubleft; - ubelem = ac.ac_ubelem; + break; cond_resched(); } + /* - * Set up for the next loop iteration. + * If we've run out of space or had a formatting error, we + * are now done */ - if (XFS_BULKSTAT_UBLEFT(ubleft)) { - if (end_of_ag) { - agno++; - agino = 0; - } else - agino = XFS_INO_TO_AGINO(mp, lastino); - } else + if (ac.ac_ubleft < statstruct_size || error) break; + + if (end_of_ag) { + agno++; + agino = 0; + } } /* * Done, we're either out of filesystem or space to put the data. */ kmem_free(irbuf); - *ubcountp = ubelem; + *ubcountp = ac.ac_ubelem; + /* - * Found some inodes, return them now and return the error next time. + * We found some inodes, so clear the error status and return them. + * The lastino pointer will point directly at the inode that triggered + * any error that occurred, so on the next call the error will be + * triggered again and propagated to userspace as there will be no + * formatted inodes in the buffer. */ - if (ubelem) - rval = 0; - if (agno >= mp->m_sb.sb_agcount) { - /* - * If we ran out of filesystem, mark lastino as off - * the end of the filesystem, so the next call - * will return immediately. - */ - *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0); + if (ac.ac_ubelem) + error = 0; + + /* + * If we ran out of filesystem, lastino will point off the end of + * the filesystem so the next call will return immediately. + */ + *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); + if (agno >= mp->m_sb.sb_agcount) *done = 1; - } else - *lastinop = (xfs_ino_t)lastino; - return rval; + return error; } int diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index aaed080..6ea8b39 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, int *ubused, int *stat); -struct xfs_bulkstat_agichunk { - xfs_ino_t ac_lastino; /* last inode returned */ - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp); - /* * Values for stat return value. */ |