diff options
Diffstat (limited to 'fs')
41 files changed, 1015 insertions, 883 deletions
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 5e64de9..03f5ce1 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -779,7 +779,6 @@ extern void ext2_evict_inode(struct inode *); extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); -extern void ext2_get_inode_flags(struct ext2_inode_info *); extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); @@ -796,7 +795,8 @@ void ext2_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ext2_msg(struct super_block *, const char *, const char *, ...); extern void ext2_update_dynamic_rev (struct super_block *sb); -extern void ext2_write_super (struct super_block *); +extern void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait); /* * Inodes and files operations diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 128cce5..3a38c1b 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1384,25 +1384,6 @@ void ext2_set_inode_flags(struct inode *inode) inode->i_flags |= S_DAX; } -/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */ -void ext2_get_inode_flags(struct ext2_inode_info *ei) -{ - unsigned int flags = ei->vfs_inode.i_flags; - - ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL| - EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL); - if (flags & S_SYNC) - ei->i_flags |= EXT2_SYNC_FL; - if (flags & S_APPEND) - ei->i_flags |= EXT2_APPEND_FL; - if (flags & S_IMMUTABLE) - ei->i_flags |= EXT2_IMMUTABLE_FL; - if (flags & S_NOATIME) - ei->i_flags |= EXT2_NOATIME_FL; - if (flags & S_DIRSYNC) - ei->i_flags |= EXT2_DIRSYNC_FL; -} - struct inode *ext2_iget (struct super_block *sb, unsigned long ino) { struct ext2_inode_info *ei; @@ -1563,7 +1544,6 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) if (ei->i_state & EXT2_STATE_NEW) memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size); - ext2_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); if (!(test_opt(sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid)); @@ -1615,7 +1595,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync) EXT2_SET_RO_COMPAT_FEATURE(sb, EXT2_FEATURE_RO_COMPAT_LARGE_FILE); spin_unlock(&EXT2_SB(sb)->s_lock); - ext2_write_super(sb); + ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1); } } } diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 191e02b..087f122 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -29,7 +29,6 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case EXT2_IOC_GETFLAGS: - ext2_get_inode_flags(ei); flags = ei->i_flags & EXT2_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT2_IOC_SETFLAGS: { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 9e25a71..8ac673c 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -36,8 +36,7 @@ #include "xattr.h" #include "acl.h" -static void ext2_sync_super(struct super_block *sb, - struct ext2_super_block *es, int wait); +static void ext2_write_super(struct super_block *sb); static int ext2_remount (struct super_block * sb, int * flags, char * data); static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf); static int ext2_sync_fs(struct super_block *sb, int wait); @@ -123,13 +122,29 @@ void ext2_update_dynamic_rev(struct super_block *sb) */ } +#ifdef CONFIG_QUOTA +static int ext2_quota_off(struct super_block *sb, int type); + +static void ext2_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + ext2_quota_off(sb, type); +} +#else +static inline void ext2_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void ext2_put_super (struct super_block * sb) { int db_count; int i; struct ext2_sb_info *sbi = EXT2_SB(sb); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext2_quota_off_umount(sb); if (sbi->s_mb_cache) { ext2_xattr_destroy_cache(sbi->s_mb_cache); @@ -314,10 +329,23 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path); static struct dquot **ext2_get_dquots(struct inode *inode) { return EXT2_I(inode)->i_dquot; } + +static const struct quotactl_ops ext2_quotactl_ops = { + .quota_on = ext2_quota_on, + .quota_off = ext2_quota_off, + .quota_sync = dquot_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; #endif static const struct super_operations ext2_sops = { @@ -1117,7 +1145,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; + sb->s_qcop = &ext2_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif @@ -1194,8 +1222,8 @@ static void ext2_clear_super_error(struct super_block *sb) } } -static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, - int wait) +void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es, + int wait) { ext2_clear_super_error(sb); spin_lock(&EXT2_SB(sb)->s_lock); @@ -1270,7 +1298,7 @@ static int ext2_unfreeze(struct super_block *sb) return 0; } -void ext2_write_super(struct super_block *sb) +static void ext2_write_super(struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) ext2_sync_fs(sb, 1); @@ -1548,6 +1576,51 @@ out: return len - towrite; } +static int ext2_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + int err; + struct inode *inode; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + inode_lock(inode); + EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); + + return 0; +} + +static int ext2_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} + #endif static struct file_system_type ext2_fs_type = { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index fb69ee2..f7b465b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2477,7 +2477,6 @@ extern int ext4_truncate(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); -extern void ext4_get_inode_flags(struct ext4_inode_info *); extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b9ffa9f..10b574a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4502,31 +4502,6 @@ void ext4_set_inode_flags(struct inode *inode) S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); } -/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ -void ext4_get_inode_flags(struct ext4_inode_info *ei) -{ - unsigned int vfs_fl; - unsigned long old_fl, new_fl; - - do { - vfs_fl = ei->vfs_inode.i_flags; - old_fl = ei->i_flags; - new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL| - EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL| - EXT4_DIRSYNC_FL); - if (vfs_fl & S_SYNC) - new_fl |= EXT4_SYNC_FL; - if (vfs_fl & S_APPEND) - new_fl |= EXT4_APPEND_FL; - if (vfs_fl & S_IMMUTABLE) - new_fl |= EXT4_IMMUTABLE_FL; - if (vfs_fl & S_NOATIME) - new_fl |= EXT4_NOATIME_FL; - if (vfs_fl & S_DIRSYNC) - new_fl |= EXT4_DIRSYNC_FL; - } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl); -} - static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, struct ext4_inode_info *ei) { @@ -4963,7 +4938,6 @@ static int ext4_do_update_inode(handle_t *handle, if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); - ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); i_gid = i_gid_read(inode); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a4273dd..184e74e 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -500,7 +500,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case EXT4_IOC_GETFLAGS: - ext4_get_inode_flags(ei); flags = ei->i_flags & EXT4_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { @@ -888,7 +887,6 @@ resizefs_out: struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - ext4_get_inode_flags(ei); fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); if (ext4_has_feature_project(inode->i_sb)) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a9448db..a9c72e3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -839,6 +839,28 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) } } +#ifdef CONFIG_QUOTA +static int ext4_quota_off(struct super_block *sb, int type); + +static inline void ext4_quota_off_umount(struct super_block *sb) +{ + int type; + + if (ext4_has_feature_quota(sb)) { + dquot_disable(sb, -1, + DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + } else { + /* Use our quota_off function to clear inode flags etc. */ + for (type = 0; type < EXT4_MAXQUOTAS; type++) + ext4_quota_off(sb, type); + } +} +#else +static inline void ext4_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -847,7 +869,7 @@ static void ext4_put_super(struct super_block *sb) int i, err; ext4_unregister_li_request(sb); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + ext4_quota_off_umount(sb); flush_workqueue(sbi->rsv_conversion_wq); destroy_workqueue(sbi->rsv_conversion_wq); @@ -1218,7 +1240,6 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot); static int ext4_write_info(struct super_block *sb, int type); static int ext4_quota_on(struct super_block *sb, int type, int format_id, const struct path *path); -static int ext4_quota_off(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type); static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); @@ -5344,11 +5365,33 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (err) return err; } + lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) + if (err) { lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_NORMAL); + } else { + struct inode *inode = d_inode(path->dentry); + handle_t *handle; + + /* + * Set inode flags to prevent userspace from messing with quota + * files. If this fails, we return success anyway since quotas + * are already enabled and this is not a hard failure. + */ + inode_lock(inode); + handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); + if (IS_ERR(handle)) + goto unlock_inode; + EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + ext4_mark_inode_dirty(handle, inode); + ext4_journal_stop(handle); + unlock_inode: + inode_unlock(inode); + } return err; } @@ -5422,24 +5465,39 @@ static int ext4_quota_off(struct super_block *sb, int type) { struct inode *inode = sb_dqopt(sb)->files[type]; handle_t *handle; + int err; /* Force all delayed allocation blocks to be allocated. * Caller already holds s_umount sem */ if (test_opt(sb, DELALLOC)) sync_filesystem(sb); - if (!inode) + if (!inode || !igrab(inode)) goto out; - /* Update modification times of quota files when userspace can - * start looking at them */ + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + /* + * Update modification times of quota files when userspace can + * start looking at them. If we fail, we return success anyway since + * this is not a hard failure and quotas are already disabled. + */ handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); if (IS_ERR(handle)) - goto out; + goto out_unlock; + EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - +out_unlock: + inode_unlock(inode); +out_put: + iput(inode); + return err; out: return dquot_quota_off(sb, type); } @@ -371,9 +371,6 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_lru); address_space_init_once(&inode->i_data); i_size_ordered_init(inode); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&inode->i_fsnotify_marks); -#endif } EXPORT_SYMBOL(inode_init_once); diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index fc89f94..5c5ac5b 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -64,7 +64,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) switch (cmd) { case JFS_IOC_GETFLAGS: - jfs_get_inode_flags(jfs_inode); flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE; flags = jfs_map_ext2(flags, 0); return put_user(flags, (int __user *) arg); @@ -98,7 +97,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Lock against other parallel changes of flags */ inode_lock(inode); - jfs_get_inode_flags(jfs_inode); oldflags = jfs_inode->mode2; /* diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 6aca224..f36ef68 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -3148,7 +3148,6 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip) else dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns, jfs_ip->saved_gid)); - jfs_get_inode_flags(jfs_ip); /* * mode2 is only needed for storing the higher order bits. * Trust i_mode for the lower order ones diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index 375dd25..5e9b7bb 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -45,24 +45,6 @@ void jfs_set_inode_flags(struct inode *inode) S_DIRSYNC | S_SYNC); } -void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip) -{ - unsigned int flags = jfs_ip->vfs_inode.i_flags; - - jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL | - JFS_DIRSYNC_FL | JFS_SYNC_FL); - if (flags & S_IMMUTABLE) - jfs_ip->mode2 |= JFS_IMMUTABLE_FL; - if (flags & S_APPEND) - jfs_ip->mode2 |= JFS_APPEND_FL; - if (flags & S_NOATIME) - jfs_ip->mode2 |= JFS_NOATIME_FL; - if (flags & S_DIRSYNC) - jfs_ip->mode2 |= JFS_DIRSYNC_FL; - if (flags & S_SYNC) - jfs_ip->mode2 |= JFS_SYNC_FL; -} - /* * NAME: ialloc() * diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 9271cfe4..7b0b3a4 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -33,7 +33,6 @@ extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); extern struct dentry *jfs_get_parent(struct dentry *dentry); -extern void jfs_get_inode_flags(struct jfs_inode_info *); extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, diff --git a/fs/jfs/super.c b/fs/jfs/super.c index c64c257..e8aad7d 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -45,6 +45,7 @@ #include "jfs_acl.h" #include "jfs_debug.h" #include "jfs_xattr.h" +#include "jfs_dinode.h" MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); @@ -181,6 +182,35 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +#ifdef CONFIG_QUOTA +static int jfs_quota_off(struct super_block *sb, int type); +static int jfs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path); + +static void jfs_quota_off_umount(struct super_block *sb) +{ + int type; + + for (type = 0; type < MAXQUOTAS; type++) + jfs_quota_off(sb, type); +} + +static const struct quotactl_ops jfs_quotactl_ops = { + .quota_on = jfs_quota_on, + .quota_off = jfs_quota_off, + .quota_sync = dquot_quota_sync, + .get_state = dquot_get_state, + .set_info = dquot_set_dqinfo, + .get_dqblk = dquot_get_dqblk, + .set_dqblk = dquot_set_dqblk, + .get_nextdqblk = dquot_get_next_dqblk, +}; +#else +static inline void jfs_quota_off_umount(struct super_block *sb) +{ +} +#endif + static void jfs_put_super(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); @@ -188,7 +218,7 @@ static void jfs_put_super(struct super_block *sb) jfs_info("In jfs_put_super"); - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + jfs_quota_off_umount(sb); rc = jfs_umount(sb); if (rc) @@ -536,7 +566,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = jfs_xattr_handlers; #ifdef CONFIG_QUOTA sb->dq_op = &dquot_operations; - sb->s_qcop = &dquot_quotactl_ops; + sb->s_qcop = &jfs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; #endif @@ -840,6 +870,51 @@ static struct dquot **jfs_get_dquots(struct inode *inode) { return JFS_IP(inode)->i_dquot; } + +static int jfs_quota_on(struct super_block *sb, int type, int format_id, + const struct path *path) +{ + int err; + struct inode *inode; + + err = dquot_quota_on(sb, type, format_id, path); + if (err) + return err; + + inode = d_inode(path->dentry); + inode_lock(inode); + JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL; + inode_set_flags(inode, S_NOATIME | S_IMMUTABLE, + S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); + + return 0; +} + +static int jfs_quota_off(struct super_block *sb, int type) +{ + struct inode *inode = sb_dqopt(sb)->files[type]; + int err; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL); + inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} #endif static const struct super_operations jfs_super_operations = { @@ -59,7 +59,7 @@ struct mount { struct mountpoint *mnt_mp; /* where is it mounted */ struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ #ifdef CONFIG_FSNOTIFY - struct hlist_head mnt_fsnotify_marks; + struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; #endif int mnt_id; /* mount identifier */ diff --git a/fs/namespace.c b/fs/namespace.c index cc1375ef..b3b115b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -236,9 +236,6 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); INIT_HLIST_NODE(&mnt->mnt_mp_list); -#ifdef CONFIG_FSNOTIFY - INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); -#endif init_fs_pin(&mnt->mnt_umount, drop_mountpoint); } return mnt; diff --git a/fs/notify/Makefile b/fs/notify/Makefile index 96d3420..3e969ae 100644 --- a/fs/notify/Makefile +++ b/fs/notify/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \ - mark.o vfsmount_mark.o fdinfo.o +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o mark.o \ + fdinfo.o obj-y += dnotify/ obj-y += inotify/ diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 5a4ec30..2430a04 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -52,7 +52,7 @@ struct dnotify_mark { */ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) { - __u32 new_mask, old_mask; + __u32 new_mask = 0; struct dnotify_struct *dn; struct dnotify_mark *dn_mark = container_of(fsn_mark, struct dnotify_mark, @@ -60,17 +60,13 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark) assert_spin_locked(&fsn_mark->lock); - old_mask = fsn_mark->mask; - new_mask = 0; for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next) new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT); - fsnotify_set_mark_mask_locked(fsn_mark, new_mask); - - if (old_mask == new_mask) + if (fsn_mark->mask == new_mask) return; + fsn_mark->mask = new_mask; - if (fsn_mark->inode) - fsnotify_recalc_inode_mask(fsn_mark->inode); + fsnotify_recalc_mask(fsn_mark->connector); } /* @@ -86,7 +82,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; @@ -138,6 +135,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark) static struct fsnotify_ops dnotify_fsnotify_ops = { .handle_event = dnotify_handle_event, + .free_mark = dnotify_free_mark, }; /* @@ -160,7 +158,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id) if (!S_ISDIR(inode->i_mode)) return; - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (!fsn_mark) return; dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); @@ -308,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; - fsnotify_init_mark(new_fsn_mark, dnotify_free_mark); + fsnotify_init_mark(new_fsn_mark, dnotify_group); new_fsn_mark->mask = mask; new_dn_mark->dn = NULL; @@ -316,13 +314,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) mutex_lock(&dnotify_group->mark_mutex); /* add the new_fsn_mark or find an old one. */ - fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group); if (fsn_mark) { dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); spin_lock(&fsn_mark->lock); } else { - fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode, - NULL, 0); + fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0); spin_lock(&new_fsn_mark->lock); fsn_mark = new_fsn_mark; dn_mark = new_dn_mark; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e5f7e47..2fa99ae 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -57,14 +57,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS static int fanotify_get_response(struct fsnotify_group *group, - struct fanotify_perm_event_info *event) + struct fanotify_perm_event_info *event, + struct fsnotify_iter_info *iter_info) { int ret; pr_debug("%s: group=%p event=%p\n", __func__, group, event); + /* + * fsnotify_prepare_user_wait() fails if we race with mark deletion. + * Just let the operation pass in that case. + */ + if (!fsnotify_prepare_user_wait(iter_info)) { + event->response = FAN_ALLOW; + goto out; + } + wait_event(group->fanotify_data.access_waitq, event->response); + fsnotify_finish_user_wait(iter_info); +out: /* userspace responded, convert to something usable */ switch (event->response) { case FAN_ALLOW: @@ -174,7 +186,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { int ret = 0; struct fanotify_event_info *event; @@ -215,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS if (mask & FAN_ALL_PERM_EVENTS) { - ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); + ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), + iter_info); fsnotify_destroy_event(group, fsn_event); } #endif @@ -248,8 +262,14 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) kmem_cache_free(fanotify_event_cachep, event); } +static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + kmem_cache_free(fanotify_mark_cache, fsn_mark); +} + const struct fsnotify_ops fanotify_fsnotify_ops = { .handle_event = fanotify_handle_event, .free_group_priv = fanotify_free_group_priv, .free_event = fanotify_free_event, + .free_mark = fanotify_free_mark, }; diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 4500a74..4eb6f5e 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -2,6 +2,7 @@ #include <linux/path.h> #include <linux/slab.h> +extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2b37f27..907a481 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -41,7 +41,7 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops; -static struct kmem_cache *fanotify_mark_cache __read_mostly; +struct kmem_cache *fanotify_mark_cache __read_mostly; struct kmem_cache *fanotify_event_cachep __read_mostly; struct kmem_cache *fanotify_perm_event_cachep __read_mostly; @@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, } ret = copy_event_to_user(group, kevent, buf); + if (unlikely(ret == -EOPENSTALE)) { + /* + * We cannot report events with stale fd so drop it. + * Setting ret to 0 will continue the event loop and + * do the right thing if there are no more events to + * read (i.e. return bytes read, -EAGAIN or wait). + */ + ret = 0; + } + /* * Permission events get queued to wait for response. Other * events can be destroyed now. */ if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { fsnotify_destroy_event(group, kevent); - if (ret < 0) - break; } else { #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS - if (ret < 0) { + if (ret <= 0) { FANOTIFY_PE(kevent)->response = FAN_DENY; wake_up(&group->fanotify_data.access_waitq); - break; + } else { + spin_lock(&group->notification_lock); + list_add_tail(&kevent->list, + &group->fanotify_data.access_list); + spin_unlock(&group->notification_lock); } - spin_lock(&group->notification_lock); - list_add_tail(&kevent->list, - &group->fanotify_data.access_list); - spin_unlock(&group->notification_lock); #endif } + if (ret < 0) + break; buf += ret; count -= ret; } @@ -445,11 +455,6 @@ static const struct file_operations fanotify_fops = { .llseek = noop_llseek, }; -static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - kmem_cache_free(fanotify_mark_cache, fsn_mark); -} - static int fanotify_find_path(int dfd, const char __user *filename, struct path *path, unsigned int flags) { @@ -511,13 +516,12 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, tmask &= ~FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask & ~mask; if (flags & FAN_MARK_ONDIR) tmask &= ~FAN_ONDIR; - - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; } *destroy = !(fsn_mark->mask | fsn_mark->ignored_mask); spin_unlock(&fsn_mark->lock); @@ -534,7 +538,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -542,6 +547,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & real_mount(mnt)->mnt_fsnotify_mask) + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); @@ -549,9 +556,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group, fsnotify_free_mark(fsn_mark); fsnotify_put_mark(fsn_mark); - if (removed & real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); - return 0; } @@ -564,7 +568,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, int destroy_mark; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { mutex_unlock(&group->mark_mutex); return -ENOENT; @@ -572,16 +576,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group, removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags, &destroy_mark); + if (removed & inode->i_fsnotify_mask) + fsnotify_recalc_mask(inode->i_fsnotify_marks); if (destroy_mark) fsnotify_detach_mark(fsn_mark); mutex_unlock(&group->mark_mutex); if (destroy_mark) fsnotify_free_mark(fsn_mark); - /* matches the fsnotify_find_inode_mark() */ + /* matches the fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); - if (removed & inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); return 0; } @@ -600,13 +604,13 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, tmask |= FAN_ONDIR; oldmask = fsn_mark->mask; - fsnotify_set_mark_mask_locked(fsn_mark, tmask); + fsn_mark->mask = tmask; } else { __u32 tmask = fsn_mark->ignored_mask | mask; if (flags & FAN_MARK_ONDIR) tmask |= FAN_ONDIR; - fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask); + fsn_mark->ignored_mask = tmask; if (flags & FAN_MARK_IGNORED_SURV_MODIFY) fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY; } @@ -629,8 +633,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group, if (!mark) return ERR_PTR(-ENOMEM); - fsnotify_init_mark(mark, fanotify_free_mark); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0); + fsnotify_init_mark(mark, group); + ret = fsnotify_add_mark_locked(mark, inode, mnt, 0); if (ret) { fsnotify_put_mark(mark); return ERR_PTR(ret); @@ -648,7 +652,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, __u32 added; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); + fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks, + group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, NULL, mnt); if (IS_ERR(fsn_mark)) { @@ -657,10 +662,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~real_mount(mnt)->mnt_fsnotify_mask) - fsnotify_recalc_vfsmount_mask(mnt); + fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; @@ -686,7 +690,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, return 0; mutex_lock(&group->mark_mutex); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) { fsn_mark = fanotify_add_new_mark(group, inode, NULL); if (IS_ERR(fsn_mark)) { @@ -695,10 +699,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group, } } added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); - mutex_unlock(&group->mark_mutex); - if (added & ~inode->i_fsnotify_mask) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); + mutex_unlock(&group->mark_mutex); fsnotify_put_mark(fsn_mark); return 0; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index fd98e51..dd63aa9 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -76,12 +76,11 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) struct inotify_inode_mark *inode_mark; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) || - !(mark->flags & FSNOTIFY_MARK_FLAG_INODE)) + if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE)) return; inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark); - inode = igrab(mark->inode); + inode = igrab(mark->connector->inode); if (inode) { /* * IN_ALL_EVENTS represents all of the mask bits @@ -113,14 +112,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) unsigned int mflags = 0; struct inode *inode; - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) - return; - if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) mflags |= FAN_MARK_IGNORED_SURV_MODIFY; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = igrab(mark->inode); + if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = igrab(mark->connector->inode); if (!inode) return; seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", @@ -129,8 +125,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) show_mark_fhandle(m, inode); seq_putc(m, '\n'); iput(inode); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) { - struct mount *mnt = real_mount(mark->mnt); + } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + struct mount *mnt = real_mount(mark->connector->mnt); seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n", mnt->mnt_id, mflags, mark->mask, mark->ignored_mask); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b41515d..01a9f0f 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -41,6 +41,63 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +/** + * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. + * @sb: superblock being unmounted. + * + * Called during unmount with no locks held, so needs to be safe against + * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. + */ +void fsnotify_unmount_inodes(struct super_block *sb) +{ + struct inode *inode, *iput_inode = NULL; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + /* + * We cannot __iget() an inode in state I_FREEING, + * I_WILL_FREE, or I_NEW which is fine because by that point + * the inode cannot have any associated watches. + */ + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { + spin_unlock(&inode->i_lock); + continue; + } + + /* + * If i_count is zero, the inode cannot have any watches and + * doing an __iget/iput with MS_ACTIVE clear would actually + * evict all inodes with zero i_count from icache which is + * unnecessarily violent and may in fact be illegal to do. + */ + if (!atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + continue; + } + + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); + + /* for each watch, send FS_UNMOUNT and then remove it */ + fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); + + fsnotify_inode_delete(inode); + + iput_inode = inode; + + spin_lock(&sb->s_inode_list_lock); + } + spin_unlock(&sb->s_inode_list_lock); + + if (iput_inode) + iput(iput_inode); +} + /* * Given an inode, first check if we care what happens to our children. Inotify * and dnotify both tell their parents about events. If we care about any event @@ -127,7 +184,8 @@ static int send_to_group(struct inode *to_tell, struct fsnotify_mark *vfsmount_mark, __u32 mask, const void *data, int data_is, u32 cookie, - const unsigned char *file_name) + const unsigned char *file_name, + struct fsnotify_iter_info *iter_info) { struct fsnotify_group *group = NULL; __u32 inode_test_mask = 0; @@ -178,7 +236,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name, cookie); + file_name, cookie, iter_info); } /* @@ -193,8 +251,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; struct fsnotify_group *inode_group, *vfsmount_group; + struct fsnotify_mark_connector *inode_conn, *vfsmount_conn; + struct fsnotify_iter_info iter_info; struct mount *mnt; - int idx, ret = 0; + int ret = 0; /* global tests shouldn't care about events on child only the specific event */ __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); @@ -210,8 +270,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, * SRCU because we have no references to any objects and do not * need SRCU to keep them "alive". */ - if (hlist_empty(&to_tell->i_fsnotify_marks) && - (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks))) + if (!to_tell->i_fsnotify_marks && + (!mnt || !mnt->mnt_fsnotify_marks)) return 0; /* * if this is a modify event we may need to clear the ignored masks @@ -223,19 +283,30 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, !(mnt && test_mask & mnt->mnt_fsnotify_mask)) return 0; - idx = srcu_read_lock(&fsnotify_mark_srcu); + iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); if ((mask & FS_MODIFY) || - (test_mask & to_tell->i_fsnotify_mask)) - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + (test_mask & to_tell->i_fsnotify_mask)) { + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + } if (mnt && ((mask & FS_MODIFY) || (test_mask & mnt->mnt_fsnotify_mask))) { - vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first, - &fsnotify_mark_srcu); - inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first, + inode_conn = srcu_dereference(to_tell->i_fsnotify_marks, &fsnotify_mark_srcu); + if (inode_conn) + inode_node = srcu_dereference(inode_conn->list.first, + &fsnotify_mark_srcu); + vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks, + &fsnotify_mark_srcu); + if (vfsmount_conn) + vfsmount_node = srcu_dereference( + vfsmount_conn->list.first, + &fsnotify_mark_srcu); } /* @@ -272,8 +343,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, vfsmount_mark = NULL; } } + + iter_info.inode_mark = inode_mark; + iter_info.vfsmount_mark = vfsmount_mark; + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, - data, data_is, cookie, file_name); + data, data_is, cookie, file_name, + &iter_info); if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) goto out; @@ -287,12 +363,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is, } ret = 0; out: - srcu_read_unlock(&fsnotify_mark_srcu, idx); + srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); return ret; } EXPORT_SYMBOL_GPL(fsnotify); +extern struct kmem_cache *fsnotify_mark_connector_cachep; + static __init int fsnotify_init(void) { int ret; @@ -303,6 +381,9 @@ static __init int fsnotify_init(void) if (ret) panic("initializing fsnotify_mark_srcu"); + fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector, + SLAB_PANIC); + return 0; } core_initcall(fsnotify_init); diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 0a3bc2c..bf012e8 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -8,60 +8,36 @@ #include "../mount.h" +struct fsnotify_iter_info { + struct fsnotify_mark *inode_mark; + struct fsnotify_mark *vfsmount_mark; + int srcu_idx; +}; + /* destroy all events sitting in this groups notification queue */ extern void fsnotify_flush_notify(struct fsnotify_group *group); /* protects reads of inode and vfsmount marks list */ extern struct srcu_struct fsnotify_mark_srcu; -/* Calculate mask of events for a list of marks */ -extern u32 fsnotify_recalc_mask(struct hlist_head *head); - /* compare two groups for sorting of marks lists */ extern int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b); -extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, - __u32 mask); -/* Add mark to a proper place in mark list */ -extern int fsnotify_add_mark_list(struct hlist_head *head, - struct fsnotify_mark *mark, - int allow_dups); -/* add a mark to an inode */ -extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups); -/* add a mark to a vfsmount */ -extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups); - -/* vfsmount specific destruction of a mark */ -extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark); -/* inode specific destruction of a mark */ -extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark); -/* Find mark belonging to given group in the list of marks */ -extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group); -/* Destroy all marks in the given list protected by 'lock' */ -extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock); +/* Destroy all marks connected via given connector */ +extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp); /* run the list of all marks associated with inode and destroy them */ static inline void fsnotify_clear_marks_by_inode(struct inode *inode) { - fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock); + fsnotify_destroy_marks(&inode->i_fsnotify_marks); } /* run the list of all marks associated with vfsmount and destroy them */ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) { - fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks, - &mnt->mnt_root->d_lock); + fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks); } -/* prepare for freeing all marks associated with given group */ -extern void fsnotify_detach_group_marks(struct fsnotify_group *group); -/* - * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list - */ -extern void fsnotify_mark_destroy_list(void); +/* Wait until all marks queued for destruction are destroyed */ +extern void fsnotify_wait_marks_destroyed(void); /* * update the dentry->d_flags of all of inode's children to indicate if inode cares diff --git a/fs/notify/group.c b/fs/notify/group.c index fbe3cbe..32357534 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -66,14 +66,23 @@ void fsnotify_destroy_group(struct fsnotify_group *group) */ fsnotify_group_stop_queueing(group); - /* clear all inode marks for this group, attach them to destroy_list */ - fsnotify_detach_group_marks(group); + /* Clear all marks for this group and queue them for destruction */ + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES); /* - * Wait for fsnotify_mark_srcu period to end and free all marks in - * destroy_list + * Some marks can still be pinned when waiting for response from + * userspace. Wait for those now. fsnotify_prepare_user_wait() will + * not succeed now so this wait is race-free. */ - fsnotify_mark_destroy_list(); + wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); + + /* + * Wait until all marks get really destroyed. We could actually destroy + * them ourselves instead of waiting for worker to do it, however that + * would be racy as worker can already be processing some marks before + * we even entered fsnotify_destroy_group(). + */ + fsnotify_wait_marks_destroyed(); /* * Since we have waited for fsnotify_mark_srcu in @@ -124,6 +133,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) /* set to 0 when there a no external references to this group */ atomic_set(&group->refcnt, 1); atomic_set(&group->num_marks, 0); + atomic_set(&group->user_waits, 0); spin_lock_init(&group->notification_lock); INIT_LIST_HEAD(&group->notification_list); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c deleted file mode 100644 index a364524..0000000 --- a/fs/notify/inode_mark.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> - -#include <linux/atomic.h> - -#include <linux/fsnotify_backend.h> -#include "fsnotify.h" - -#include "../internal.h" - -/* - * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this inode. - */ -void fsnotify_recalc_inode_mask(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - __fsnotify_update_child_dentry_flags(inode); -} - -void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark) -{ - struct inode *inode = mark->inode; - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->inode = NULL; - - /* - * this mark is now off the inode->i_fsnotify_marks list and we - * hold the inode->i_lock, so this is the perfect time to update the - * inode->i_fsnotify_mask - */ - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); -} - -/* - * Given a group clear all of the inode marks associated with that group. - */ -void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE); -} - -/* - * given a group and inode, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, - struct inode *inode) -{ - struct fsnotify_mark *mark; - - spin_lock(&inode->i_lock); - mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); - spin_unlock(&inode->i_lock); - - return mark; -} - -/* - * If we are setting a mark mask on an inode mark we should pin the inode - * in memory. - */ -void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark, - __u32 mask) -{ - struct inode *inode; - - assert_spin_locked(&mark->lock); - - if (mask && - mark->inode && - !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) { - mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED; - inode = igrab(mark->inode); - /* - * we shouldn't be able to get here if the inode wasn't - * already safely held in memory. But bug in case it - * ever is wrong. - */ - BUG_ON(!inode); - } -} - -/* - * Attach an initialized mark to a given inode. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which group and for which inodes. These - * marks are ordered according to priority, highest number first, and then by - * the group's location in memory. - */ -int fsnotify_add_inode_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, - int allow_dups) -{ - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_INODE; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&inode->i_lock); - mark->inode = inode; - ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark, - allow_dups); - inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks); - spin_unlock(&inode->i_lock); - - return ret; -} - -/** - * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. - * @sb: superblock being unmounted. - * - * Called during unmount with no locks held, so needs to be safe against - * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block. - */ -void fsnotify_unmount_inodes(struct super_block *sb) -{ - struct inode *inode, *iput_inode = NULL; - - spin_lock(&sb->s_inode_list_lock); - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { - /* - * We cannot __iget() an inode in state I_FREEING, - * I_WILL_FREE, or I_NEW which is fine because by that point - * the inode cannot have any associated watches. - */ - spin_lock(&inode->i_lock); - if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { - spin_unlock(&inode->i_lock); - continue; - } - - /* - * If i_count is zero, the inode cannot have any watches and - * doing an __iget/iput with MS_ACTIVE clear would actually - * evict all inodes with zero i_count from icache which is - * unnecessarily violent and may in fact be illegal to do. - */ - if (!atomic_read(&inode->i_count)) { - spin_unlock(&inode->i_lock); - continue; - } - - __iget(inode); - spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); - - /* for each watch, send FS_UNMOUNT and then remove it */ - fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0); - - fsnotify_inode_delete(inode); - - iput_inode = inode; - - spin_lock(&sb->s_inode_list_lock); - } - spin_unlock(&sb->s_inode_list_lock); - - if (iput_inode) - iput(iput_inode); -} diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 7c461fd..9ff67b6 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,9 +27,11 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); extern const struct fsnotify_ops inotify_fsnotify_ops; +extern struct kmem_cache *inotify_inode_mark_cachep; #ifdef CONFIG_INOTIFY_USER static inline void dec_inotify_instances(struct ucounts *ucounts) diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index 1aeb837..8b73332 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -68,7 +68,8 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie) + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -156,8 +157,8 @@ static int idr_callback(int id, void *p, void *data) * BUG() that was here. */ if (fsn_mark) - printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n", - fsn_mark->group, fsn_mark->inode, i_mark->wd); + printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", + fsn_mark->group, i_mark->wd); return 0; } @@ -175,9 +176,20 @@ static void inotify_free_event(struct fsnotify_event *fsn_event) kfree(INOTIFY_E(fsn_event)); } +/* ding dong the mark is dead */ +static void inotify_free_mark(struct fsnotify_mark *fsn_mark) +{ + struct inotify_inode_mark *i_mark; + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + + kmem_cache_free(inotify_inode_mark_cachep, i_mark); +} + const struct fsnotify_ops inotify_fsnotify_ops = { .handle_event = inotify_handle_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, + .free_mark = inotify_free_mark, }; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 498d609..7cc7d3f 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -47,7 +47,7 @@ /* configurable via /proc/sys/fs/inotify/ */ static int inotify_max_queued_events __read_mostly; -static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; +struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #ifdef CONFIG_SYSCTL @@ -395,21 +395,6 @@ static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, return i_mark; } -static void do_inotify_remove_from_idr(struct fsnotify_group *group, - struct inotify_inode_mark *i_mark) -{ - struct idr *idr = &group->inotify_data.idr; - spinlock_t *idr_lock = &group->inotify_data.idr_lock; - int wd = i_mark->wd; - - assert_spin_locked(idr_lock); - - idr_remove(idr, wd); - - /* removed from the idr, drop that ref */ - fsnotify_put_mark(&i_mark->fsn_mark); -} - /* * Remove the mark from the idr (if present) and drop the reference * on the mark because it was in the idr. @@ -417,6 +402,7 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group, static void inotify_remove_from_idr(struct fsnotify_group *group, struct inotify_inode_mark *i_mark) { + struct idr *idr = &group->inotify_data.idr; spinlock_t *idr_lock = &group->inotify_data.idr_lock; struct inotify_inode_mark *found_i_mark = NULL; int wd; @@ -429,18 +415,16 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, * if it wasn't.... */ if (wd == -1) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } /* Lets look in the idr to see if we find it */ found_i_mark = inotify_idr_find_locked(group, wd); if (unlikely(!found_i_mark)) { - WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); goto out; } @@ -451,35 +435,33 @@ static void inotify_remove_from_idr(struct fsnotify_group *group, */ if (unlikely(found_i_mark != i_mark)) { WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " - "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " - "found_i_mark->group=%p found_i_mark->inode=%p\n", - __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, - i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd, - found_i_mark->fsn_mark.group, - found_i_mark->fsn_mark.inode); + "found_i_mark=%p found_i_mark->wd=%d " + "found_i_mark->group=%p\n", __func__, i_mark, + i_mark->wd, i_mark->fsn_mark.group, found_i_mark, + found_i_mark->wd, found_i_mark->fsn_mark.group); goto out; } /* * One ref for being in the idr - * one ref held by the caller trying to kill us * one ref grabbed by inotify_idr_find */ - if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { - printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" - " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, - i_mark->fsn_mark.group, i_mark->fsn_mark.inode); + if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) { + printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group); /* we can't really recover with bad ref cnting.. */ BUG(); } - do_inotify_remove_from_idr(group, i_mark); + idr_remove(idr, wd); + /* Removed from the idr, drop that ref. */ + fsnotify_put_mark(&i_mark->fsn_mark); out: + i_mark->wd = -1; + spin_unlock(idr_lock); /* match the ref taken by inotify_idr_find_locked() */ if (found_i_mark) fsnotify_put_mark(&found_i_mark->fsn_mark); - i_mark->wd = -1; - spin_unlock(idr_lock); } /* @@ -492,7 +474,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL, 0); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ @@ -501,16 +483,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, dec_inotify_watches(group->inotify_data.ucounts); } -/* ding dong the mark is dead */ -static void inotify_free_mark(struct fsnotify_mark *fsn_mark) -{ - struct inotify_inode_mark *i_mark; - - i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); - - kmem_cache_free(inotify_inode_mark_cachep, i_mark); -} - static int inotify_update_existing_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) @@ -524,21 +496,19 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, mask = inotify_arg_to_mask(arg); - fsn_mark = fsnotify_find_inode_mark(group, inode); + fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); spin_lock(&fsn_mark->lock); - old_mask = fsn_mark->mask; if (add) - fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask)); + fsn_mark->mask |= mask; else - fsnotify_set_mark_mask_locked(fsn_mark, mask); + fsn_mark->mask = mask; new_mask = fsn_mark->mask; - spin_unlock(&fsn_mark->lock); if (old_mask != new_mask) { @@ -549,7 +519,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* update the inode with this new fsn_mark */ if (dropped || do_inode) - fsnotify_recalc_inode_mask(inode); + fsnotify_recalc_mask(inode->i_fsnotify_marks); } @@ -578,7 +548,7 @@ static int inotify_new_watch(struct fsnotify_group *group, if (unlikely(!tmp_i_mark)) return -ENOMEM; - fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark); + fsnotify_init_mark(&tmp_i_mark->fsn_mark, group); tmp_i_mark->fsn_mark.mask = mask; tmp_i_mark->wd = -1; @@ -594,8 +564,7 @@ static int inotify_new_watch(struct fsnotify_group *group, } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode, - NULL, 0); + ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0); if (ret) { /* we failed to get on the inode, get off the idr */ inotify_remove_from_idr(group, tmp_i_mark); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 6043306..9991f88 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -33,7 +33,7 @@ * * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock * * group->mark_mutex protects the marks_list anchored inside a given group and * each mark is hooked via the g_list. It also protects the groups private @@ -44,14 +44,22 @@ * is assigned to as well as the access to a reference of the inode/vfsmount * that is being watched by the mark. * - * inode->i_lock protects the i_fsnotify_marks list anchored inside a - * given inode and each mark is hooked via the i_list. (and sorta the - * free_i_list) + * mark->connector->lock protects the list of marks anchored inside an + * inode / vfsmount and each mark is hooked via the i_list. * + * A list of notification marks relating to inode / mnt is contained in + * fsnotify_mark_connector. That structure is alive as long as there are any + * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_connector when last reference to the mark is + * dropped. Thus having mark reference is enough to protect mark->connector + * pointer and to make sure fsnotify_mark_connector cannot disappear. Also + * because we remove mark from g_list before dropping mark reference associated + * with that, any mark found through g_list is guaranteed to have + * mark->connector set until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their - * refcnt==0. + * refcnt==0. Marks are also protected by fsnotify_mark_srcu. * * The inode mark can be cleared for a number of different reasons including: * - The inode is unlinked for the last time. (fsnotify_inode_remove) @@ -61,17 +69,6 @@ * - The fsnotify_group associated with the mark is going away and all such marks * need to be cleaned up. (fsnotify_clear_marks_by_group) * - * Worst case we are given an inode and need to clean up all the marks on that - * inode. We take i_lock and walk the i_fsnotify_marks safely. For each - * mark on the list we take a reference (so the mark can't disappear under us). - * We remove that mark form the inode's list of marks and we add this mark to a - * private list anchored on the stack using i_free_list; we walk i_free_list - * and before we destroy the mark we make sure that we dont race with a - * concurrent destroy_group by getting a ref to the marks group and taking the - * groups mutex. - - * Very similarly for freeing by group, except we use free_g_list. - * * This has the very interesting property of being able to run concurrently with * any (or all) other directions. */ @@ -94,94 +91,281 @@ #define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ struct srcu_struct fsnotify_mark_srcu; +struct kmem_cache *fsnotify_mark_connector_cachep; + static DEFINE_SPINLOCK(destroy_lock); static LIST_HEAD(destroy_list); +static struct fsnotify_mark_connector *connector_destroy_list; static void fsnotify_mark_destroy_workfn(struct work_struct *work); static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); +static void fsnotify_connector_destroy_workfn(struct work_struct *work); +static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn); + void fsnotify_get_mark(struct fsnotify_mark *mark) { + WARN_ON_ONCE(!atomic_read(&mark->refcnt)); atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) +/* + * Get mark reference when we found the mark via lockless traversal of object + * list. Mark can be already removed from the list by now and on its way to be + * destroyed once SRCU period ends. + */ +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) { - if (atomic_dec_and_test(&mark->refcnt)) { - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); - } + return atomic_inc_not_zero(&mark->refcnt); } -/* Calculate mask of events for a list of marks */ -u32 fsnotify_recalc_mask(struct hlist_head *head) +static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) - new_mask |= mark->mask; - return new_mask; + assert_spin_locked(&conn->lock); + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) + new_mask |= mark->mask; + } + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + conn->inode->i_fsnotify_mask = new_mask; + else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) + real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask; } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. - * - * Must be called with group->mark_mutex held. + * Calculate mask of events for a list of marks. The caller must make sure + * connector and connector->inode cannot disappear under us. Callers achieve + * this by holding a mark->lock or mark->group->mark_mutex for a mark on this + * list. */ -void fsnotify_detach_mark(struct fsnotify_mark *mark) +void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) +{ + if (!conn) + return; + + spin_lock(&conn->lock); + __fsnotify_recalc_mask(conn); + spin_unlock(&conn->lock); + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) + __fsnotify_update_child_dentry_flags(conn->inode); +} + +/* Free all connectors queued for freeing once SRCU period ends */ +static void fsnotify_connector_destroy_workfn(struct work_struct *work) +{ + struct fsnotify_mark_connector *conn, *free; + + spin_lock(&destroy_lock); + conn = connector_destroy_list; + connector_destroy_list = NULL; + spin_unlock(&destroy_lock); + + synchronize_srcu(&fsnotify_mark_srcu); + while (conn) { + free = conn; + conn = conn->destroy_next; + kmem_cache_free(fsnotify_mark_connector_cachep, free); + } +} + +static struct inode *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn) { struct inode *inode = NULL; + + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = conn->inode; + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); + inode->i_fsnotify_mask = 0; + conn->inode = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, + NULL); + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; + conn->mnt = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + } + + return inode; +} + +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ struct fsnotify_group *group = mark->group; - BUG_ON(!mutex_is_locked(&group->mark_mutex)); + if (WARN_ON_ONCE(!group)) + return; + group->ops->free_mark(mark); + fsnotify_put_group(group); +} - spin_lock(&mark->lock); +void fsnotify_put_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_mark_connector *conn; + struct inode *inode = NULL; + bool free_conn = false; - /* something else already called this function on this mark */ - if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { - spin_unlock(&mark->lock); + /* Catch marks that were actually never attached to object */ + if (!mark->connector) { + if (atomic_dec_and_test(&mark->refcnt)) + fsnotify_final_mark_destroy(mark); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + return; - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) { - inode = mark->inode; - fsnotify_destroy_inode_mark(mark); - } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) - fsnotify_destroy_vfsmount_mark(mark); - else - BUG(); + conn = mark->connector; + hlist_del_init_rcu(&mark->obj_list); + if (hlist_empty(&conn->list)) { + inode = fsnotify_detach_connector_from_object(conn); + free_conn = true; + } else { + __fsnotify_recalc_mask(conn); + } + mark->connector = NULL; + spin_unlock(&conn->lock); + + iput(inode); + + if (free_conn) { + spin_lock(&destroy_lock); + conn->destroy_next = connector_destroy_list; + connector_destroy_list = conn; + spin_unlock(&destroy_lock); + queue_work(system_unbound_wq, &connector_reaper_work); + } /* * Note that we didn't update flags telling whether inode cares about * what's happening with children. We update these flags from * __fsnotify_parent() lazily when next event happens on one of our * children. */ + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); +} - list_del_init(&mark->g_list); +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group; - spin_unlock(&mark->lock); + if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) + return false; + + if (iter_info->inode_mark) + group = iter_info->inode_mark->group; + else + group = iter_info->vfsmount_mark->group; + + /* + * Since acquisition of mark reference is an atomic op as well, we can + * be sure this inc is seen before any effect of refcount increment. + */ + atomic_inc(&group->user_waits); + + if (iter_info->inode_mark) { + /* This can fail if mark is being removed */ + if (!fsnotify_get_mark_safe(iter_info->inode_mark)) + goto out_wait; + } + if (iter_info->vfsmount_mark) { + if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) + goto out_inode; + } - if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) - iput(inode); + /* + * Now that both marks are pinned by refcount in the inode / vfsmount + * lists, we can drop SRCU lock, and safely resume the list iteration + * once userspace returns. + */ + srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); + + return true; +out_inode: + if (iter_info->inode_mark) + fsnotify_put_mark(iter_info->inode_mark); +out_wait: + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); + return false; +} + +void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) +{ + struct fsnotify_group *group = NULL; + + iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); + if (iter_info->inode_mark) { + group = iter_info->inode_mark->group; + fsnotify_put_mark(iter_info->inode_mark); + } + if (iter_info->vfsmount_mark) { + group = iter_info->vfsmount_mark->group; + fsnotify_put_mark(iter_info->vfsmount_mark); + } + /* + * We abuse notification_waitq on group shutdown for waiting for all + * marks pinned when waiting for userspace. + */ + if (atomic_dec_and_test(&group->user_waits) && group->shutdown) + wake_up(&group->notification_waitq); +} + +/* + * Mark mark as detached, remove it from group list. Mark still stays in object + * list until its last reference is dropped. Note that we rely on mark being + * removed from group list before corresponding reference to it is dropped. In + * particular we rely on mark->connector being valid while we hold + * group->mark_mutex if we found the mark through g_list. + * + * Must be called with group->mark_mutex held. The caller must either hold + * reference to the mark or be protected by fsnotify_mark_srcu. + */ +void fsnotify_detach_mark(struct fsnotify_mark *mark) +{ + struct fsnotify_group *group = mark->group; + + WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); + WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) && + atomic_read(&mark->refcnt) < 1 + + !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); + + spin_lock(&mark->lock); + /* something else already called this function on this mark */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { + spin_unlock(&mark->lock); + return; + } + mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; + list_del_init(&mark->g_list); + spin_unlock(&mark->lock); atomic_dec(&group->num_marks); + + /* Drop mark reference acquired in fsnotify_add_mark_locked() */ + fsnotify_put_mark(mark); } /* - * Prepare mark for freeing and add it to the list of marks prepared for - * freeing. The actual freeing must happen after SRCU period ends and the - * caller is responsible for this. + * Free fsnotify mark. The mark is actually only marked as being freed. The + * freeing is actually happening only once last reference to the mark is + * dropped from a workqueue which first waits for srcu period end. * - * The function returns true if the mark was added to the list of marks for - * freeing. The function returns false if someone else has already called - * __fsnotify_free_mark() for the mark. + * Caller must have a reference to the mark or be protected by + * fsnotify_mark_srcu. */ -static bool __fsnotify_free_mark(struct fsnotify_mark *mark) +void fsnotify_free_mark(struct fsnotify_mark *mark) { struct fsnotify_group *group = mark->group; @@ -189,7 +373,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { spin_unlock(&mark->lock); - return false; + return; } mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; spin_unlock(&mark->lock); @@ -201,25 +385,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark) */ if (group->ops->freeing_mark) group->ops->freeing_mark(mark, group); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - - return true; -} - -/* - * Free fsnotify mark. The freeing is actually happening from a workqueue which - * first waits for srcu period end. Caller must have a reference to the mark - * or be protected by fsnotify_mark_srcu. - */ -void fsnotify_free_mark(struct fsnotify_mark *mark) -{ - if (__fsnotify_free_mark(mark)) { - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - } } void fsnotify_destroy_mark(struct fsnotify_mark *mark, @@ -231,54 +396,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark, fsnotify_free_mark(mark); } -void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock) -{ - struct fsnotify_mark *mark; - - while (1) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop 'lock', - * mark can get removed from the obj_list and destroyed. But - * we are holding mark reference so mark cannot be freed and - * calling fsnotify_destroy_mark() more than once is fine. - */ - spin_lock(lock); - if (hlist_empty(head)) { - spin_unlock(lock); - break; - } - mark = hlist_entry(head->first, struct fsnotify_mark, obj_list); - /* - * We don't update i_fsnotify_mask / mnt_fsnotify_mask here - * since inode / mount is going away anyway. So just remove - * mark from the list. - */ - hlist_del_init_rcu(&mark->obj_list); - fsnotify_get_mark(mark); - spin_unlock(lock); - fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); - } -} - -void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->mask = mask; - - if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) - fsnotify_set_inode_mark_mask_locked(mark, mask); -} - -void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask) -{ - assert_spin_locked(&mark->lock); - - mark->ignored_mask = mask; -} - /* * Sorting function for lists of fsnotify marks. * @@ -315,37 +432,133 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b) return -1; } -/* Add mark into proper place in given list of marks */ -int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, - int allow_dups) +static int fsnotify_attach_connector_to_object( + struct fsnotify_mark_connector __rcu **connp, + struct inode *inode, + struct vfsmount *mnt) +{ + struct fsnotify_mark_connector *conn; + + conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL); + if (!conn) + return -ENOMEM; + spin_lock_init(&conn->lock); + INIT_HLIST_HEAD(&conn->list); + if (inode) { + conn->flags = FSNOTIFY_OBJ_TYPE_INODE; + conn->inode = igrab(inode); + } else { + conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT; + conn->mnt = mnt; + } + /* + * cmpxchg() provides the barrier so that readers of *connp can see + * only initialized structure + */ + if (cmpxchg(connp, NULL, conn)) { + /* Someone else created list structure for us */ + if (inode) + iput(inode); + kmem_cache_free(fsnotify_mark_connector_cachep, conn); + } + + return 0; +} + +/* + * Get mark connector, make sure it is alive and return with its lock held. + * This is for users that get connector pointer from inode or mount. Users that + * hold reference to a mark on the list may directly lock connector->lock as + * they are sure list cannot go away under them. + */ +static struct fsnotify_mark_connector *fsnotify_grab_connector( + struct fsnotify_mark_connector __rcu **connp) +{ + struct fsnotify_mark_connector *conn; + int idx; + + idx = srcu_read_lock(&fsnotify_mark_srcu); + conn = srcu_dereference(*connp, &fsnotify_mark_srcu); + if (!conn) + goto out; + spin_lock(&conn->lock); + if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE | + FSNOTIFY_OBJ_TYPE_VFSMOUNT))) { + spin_unlock(&conn->lock); + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return NULL; + } +out: + srcu_read_unlock(&fsnotify_mark_srcu, idx); + return conn; +} + +/* + * Add mark into proper place in given list of marks. These marks may be used + * for the fsnotify backend to determine which event types should be delivered + * to which group and for which inodes. These marks are ordered according to + * priority, highest number first, and then by the group's location in memory. + */ +static int fsnotify_add_mark_list(struct fsnotify_mark *mark, + struct inode *inode, struct vfsmount *mnt, + int allow_dups) { struct fsnotify_mark *lmark, *last = NULL; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark_connector __rcu **connp; int cmp; + int err = 0; + + if (WARN_ON(!inode && !mnt)) + return -EINVAL; + if (inode) + connp = &inode->i_fsnotify_marks; + else + connp = &real_mount(mnt)->mnt_fsnotify_marks; +restart: + spin_lock(&mark->lock); + conn = fsnotify_grab_connector(connp); + if (!conn) { + spin_unlock(&mark->lock); + err = fsnotify_attach_connector_to_object(connp, inode, mnt); + if (err) + return err; + goto restart; + } /* is mark the first mark? */ - if (hlist_empty(head)) { - hlist_add_head_rcu(&mark->obj_list, head); - return 0; + if (hlist_empty(&conn->list)) { + hlist_add_head_rcu(&mark->obj_list, &conn->list); + goto added; } /* should mark be in the middle of the current list? */ - hlist_for_each_entry(lmark, head, obj_list) { + hlist_for_each_entry(lmark, &conn->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) - return -EEXIST; + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { + err = -EEXIST; + goto out_err; + } cmp = fsnotify_compare_groups(lmark->group, mark->group); if (cmp >= 0) { hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list); - return 0; + goto added; } } BUG_ON(last == NULL); /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); - return 0; +added: + mark->connector = conn; +out_err: + spin_unlock(&conn->lock); + spin_unlock(&mark->lock); + return err; } /* @@ -353,10 +566,10 @@ int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark, * These marks may be used for the fsnotify backend to determine which * event types should be delivered to which group. */ -int fsnotify_add_mark_locked(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct inode *inode, +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups) { + struct fsnotify_group *group = mark->group; int ret = 0; BUG_ON(inode && mnt); @@ -367,61 +580,42 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, * LOCKING ORDER!!!! * group->mark_mutex * mark->lock - * inode->i_lock + * mark->connector->lock */ spin_lock(&mark->lock); mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED; - fsnotify_get_group(group); - mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ - - if (inode) { - ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups); - if (ret) - goto err; - } else if (mnt) { - ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups); - if (ret) - goto err; - } else { - BUG(); - } - - /* this will pin the object if appropriate */ - fsnotify_set_mark_mask_locked(mark, mark->mask); + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); - if (inode) - __fsnotify_update_child_dentry_flags(inode); + ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); + if (ret) + goto err; + + if (mark->mask) + fsnotify_recalc_mask(mark->connector); return ret; err: - mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE | + FSNOTIFY_MARK_FLAG_ATTACHED); list_del_init(&mark->g_list); - fsnotify_put_group(group); - mark->group = NULL; atomic_dec(&group->num_marks); - spin_unlock(&mark->lock); - - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - + fsnotify_put_mark(mark); return ret; } -int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups) { int ret; + struct fsnotify_group *group = mark->group; + mutex_lock(&group->mark_mutex); - ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); + ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups); mutex_unlock(&group->mark_mutex); return ret; } @@ -430,29 +624,42 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, * Given a list of marks, find the mark associated with given group. If found * take a reference to that mark and return it, else return NULL. */ -struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head, - struct fsnotify_group *group) +struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group) { + struct fsnotify_mark_connector *conn; struct fsnotify_mark *mark; - hlist_for_each_entry(mark, head, obj_list) { - if (mark->group == group) { + conn = fsnotify_grab_connector(connp); + if (!conn) + return NULL; + + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); + spin_unlock(&conn->lock); return mark; } } + spin_unlock(&conn->lock); return NULL; } -/* - * clear any marks in a group in which mark->flags & flags is true - */ -void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, - unsigned int flags) +/* Clear any marks in a group with given type */ +void fsnotify_clear_marks_by_group(struct fsnotify_group *group, + unsigned int type) { struct fsnotify_mark *lmark, *mark; LIST_HEAD(to_free); + struct list_head *head = &to_free; + /* Skip selection step if we want to clear all marks. */ + if (type == FSNOTIFY_OBJ_ALL_TYPES) { + head = &group->marks_list; + goto clear; + } /* * We have to be really careful here. Anytime we drop mark_mutex, e.g. * fsnotify_clear_marks_by_inode() can come and free marks. Even in our @@ -464,18 +671,19 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, */ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { - if (mark->flags & flags) + if (mark->connector->flags & type) list_move(&mark->g_list, &to_free); } mutex_unlock(&group->mark_mutex); +clear: while (1) { mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&to_free)) { + if (list_empty(head)) { mutex_unlock(&group->mark_mutex); break; } - mark = list_first_entry(&to_free, struct fsnotify_mark, g_list); + mark = list_first_entry(head, struct fsnotify_mark, g_list); fsnotify_get_mark(mark); fsnotify_detach_mark(mark); mutex_unlock(&group->mark_mutex); @@ -484,49 +692,62 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, } } -/* - * Given a group, prepare for freeing all the marks associated with that group. - * The marks are attached to the list of marks prepared for destruction, the - * caller is responsible for freeing marks in that list after SRCU period has - * ended. - */ -void fsnotify_detach_group_marks(struct fsnotify_group *group) +/* Destroy all marks attached to inode / vfsmount */ +void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) { - struct fsnotify_mark *mark; + struct fsnotify_mark_connector *conn; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while (1) { - mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); - if (list_empty(&group->marks_list)) { - mutex_unlock(&group->mark_mutex); - break; - } - mark = list_first_entry(&group->marks_list, - struct fsnotify_mark, g_list); + conn = fsnotify_grab_connector(connp); + if (!conn) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining conn->lock. + */ + hlist_for_each_entry(mark, &conn->list, obj_list) { fsnotify_get_mark(mark); - fsnotify_detach_mark(mark); - mutex_unlock(&group->mark_mutex); - __fsnotify_free_mark(mark); - fsnotify_put_mark(mark); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; + fsnotify_destroy_mark(mark, mark->group); + spin_lock(&conn->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_connector_from_object(conn); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + iput(inode); } /* * Nothing fancy, just initialize lists and locks and counters. */ void fsnotify_init_mark(struct fsnotify_mark *mark, - void (*free_mark)(struct fsnotify_mark *mark)) + struct fsnotify_group *group) { memset(mark, 0, sizeof(*mark)); spin_lock_init(&mark->lock); atomic_set(&mark->refcnt, 1); - mark->free_mark = free_mark; + fsnotify_get_group(group); + mark->group = group; } /* * Destroy all marks in destroy_list, waits for SRCU period to finish before * actually freeing marks. */ -void fsnotify_mark_destroy_list(void) +static void fsnotify_mark_destroy_workfn(struct work_struct *work) { struct fsnotify_mark *mark, *next; struct list_head private_destroy_list; @@ -540,11 +761,12 @@ void fsnotify_mark_destroy_list(void) list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { list_del_init(&mark->g_list); - fsnotify_put_mark(mark); + fsnotify_final_mark_destroy(mark); } } -static void fsnotify_mark_destroy_workfn(struct work_struct *work) +/* Wait for all marks queued for destruction to be actually destroyed */ +void fsnotify_wait_marks_destroyed(void) { - fsnotify_mark_destroy_list(); + flush_delayed_work(&reaper_work); } diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c deleted file mode 100644 index a8fcab6..0000000 --- a/fs/notify/vfsmount_mark.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/mount.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> - -#include <linux/atomic.h> - -#include <linux/fsnotify_backend.h> -#include "fsnotify.h" - -void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) -{ - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT); -} - -/* - * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types - * any notifier is interested in hearing for this mount point - */ -void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - - spin_lock(&mnt->mnt_root->d_lock); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark) -{ - struct vfsmount *mnt = mark->mnt; - struct mount *m = real_mount(mnt); - - BUG_ON(!mutex_is_locked(&mark->group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - - hlist_del_init_rcu(&mark->obj_list); - mark->mnt = NULL; - - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); -} - -/* - * given a group and vfsmount, find the mark associated with that combination. - * if found take a reference to that mark and return it, else return NULL - */ -struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, - struct vfsmount *mnt) -{ - struct mount *m = real_mount(mnt); - struct fsnotify_mark *mark; - - spin_lock(&mnt->mnt_root->d_lock); - mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group); - spin_unlock(&mnt->mnt_root->d_lock); - - return mark; -} - -/* - * Attach an initialized mark to a given group and vfsmount. - * These marks may be used for the fsnotify backend to determine which - * event types should be delivered to which groups. - */ -int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, struct vfsmount *mnt, - int allow_dups) -{ - struct mount *m = real_mount(mnt); - int ret; - - mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT; - - BUG_ON(!mutex_is_locked(&group->mark_mutex)); - assert_spin_locked(&mark->lock); - - spin_lock(&mnt->mnt_root->d_lock); - mark->mnt = mnt; - ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups); - m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks); - spin_unlock(&mnt->mnt_root->d_lock); - - return ret; -} diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 74b489e..ebf80c7 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2188,8 +2188,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags) /* This can happen when suspending quotas on remount-ro... */ if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) { inode_lock(toputinode[cnt]); - toputinode[cnt]->i_flags &= ~(S_IMMUTABLE | - S_NOATIME | S_NOQUOTA); + toputinode[cnt]->i_flags &= ~S_NOQUOTA; truncate_inode_pages(&toputinode[cnt]->i_data, 0); inode_unlock(toputinode[cnt]); mark_inode_dirty_sync(toputinode[cnt]); @@ -2237,7 +2236,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); int error; - int oldflags = -1; if (!fmt) return -ESRCH; @@ -2285,9 +2283,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, * possible) Also nobody should write to the file - we use * special IO operations which ignore the immutable bit. */ inode_lock(inode); - oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE | - S_NOQUOTA); - inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE; + inode->i_flags |= S_NOQUOTA; inode_unlock(inode); /* * When S_NOQUOTA is set, remove dquot references as no more @@ -2329,14 +2325,9 @@ out_file_init: dqopt->files[type] = NULL; iput(inode); out_file_flags: - if (oldflags != -1) { - inode_lock(inode); - /* Set the flags back (in the case of accidental quotaon() - * on a wrong file we don't want to mess up the flags) */ - inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE); - inode->i_flags |= oldflags; - inode_unlock(inode); - } + inode_lock(inode); + inode->i_flags &= ~S_NOQUOTA; + inode_unlock(inode); out_fmt: put_quota_format(fmt); @@ -2780,18 +2771,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii) } EXPORT_SYMBOL(dquot_set_dqinfo); -const struct quotactl_ops dquot_quotactl_ops = { - .quota_on = dquot_quota_on, - .quota_off = dquot_quota_off, - .quota_sync = dquot_quota_sync, - .get_state = dquot_get_state, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .get_nextdqblk = dquot_get_next_dqblk, - .set_dqblk = dquot_set_dqblk -}; -EXPORT_SYMBOL(dquot_quotactl_ops); - const struct quotactl_ops dquot_quotactl_sysfile_ops = { .quota_enable = dquot_quota_enable, .quota_disable = dquot_quota_disable, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6ab9d6..873fc04 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1375,7 +1375,6 @@ static void init_inode(struct inode *inode, struct treepath *path) static void inode2sd(void *sd, struct inode *inode, loff_t size) { struct stat_data *sd_v2 = (struct stat_data *)sd; - __u16 flags; set_sd_v2_mode(sd_v2, inode->i_mode); set_sd_v2_nlink(sd_v2, inode->i_nlink); @@ -1390,9 +1389,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size) set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev)); else set_sd_v2_generation(sd_v2, inode->i_generation); - flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, &flags); - set_sd_v2_attrs(sd_v2, flags); + set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs); } /* used to copy inode's fields to old stat data */ @@ -2002,10 +1999,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, /* uid and gid must already be set by the caller for quota init */ - /* symlink cannot be immutable or append only, right? */ - if (S_ISLNK(inode->i_mode)) - inode->i_flags &= ~(S_IMMUTABLE | S_APPEND); - inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); inode->i_size = i_size; inode->i_blocks = 0; @@ -3095,28 +3088,6 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode) } } -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs) -{ - if (reiserfs_attrs(inode->i_sb)) { - if (inode->i_flags & S_IMMUTABLE) - *sd_attrs |= REISERFS_IMMUTABLE_FL; - else - *sd_attrs &= ~REISERFS_IMMUTABLE_FL; - if (inode->i_flags & S_SYNC) - *sd_attrs |= REISERFS_SYNC_FL; - else - *sd_attrs &= ~REISERFS_SYNC_FL; - if (inode->i_flags & S_NOATIME) - *sd_attrs |= REISERFS_NOATIME_FL; - else - *sd_attrs &= ~REISERFS_NOATIME_FL; - if (REISERFS_I(inode)->i_flags & i_nopack_mask) - *sd_attrs |= REISERFS_NOTAIL_FL; - else - *sd_attrs &= ~REISERFS_NOTAIL_FL; - } -} - /* * decide if this buffer needs to stay around for data logging or ordered * write purposes diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 1f4692a..acbbaf7 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -47,7 +47,6 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } flags = REISERFS_I(inode)->i_attrs; - i_attrs_to_sd_attrs(inode, (__u16 *) & flags); err = put_user(flags, (int __user *)arg); break; case REISERFS_IOC_SETFLAGS:{ diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index aa40c24..da01f49 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1961,7 +1961,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * will be requeued because superblock is being shutdown and doesn't * have MS_ACTIVE set. */ - cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); + reiserfs_cancel_old_flush(sb); /* wait for all commits to finish */ cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c index 249594a..f5cebd7 100644 --- a/fs/reiserfs/lbalance.c +++ b/fs/reiserfs/lbalance.c @@ -475,7 +475,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi, * 'cpy_bytes'; create new item header; * n_ih = new item_header; */ - memcpy(&n_ih, ih, SHORT_KEY_SIZE); + memcpy(&n_ih.ih_key, &ih->ih_key, KEY_SIZE); /* Endian safe, both le */ n_ih.ih_version = ih->ih_version; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 2adcde1..1d34377 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1326,7 +1326,6 @@ struct cpu_key { #define KEY_NOT_FOUND 0 #define KEY_SIZE (sizeof(struct reiserfs_key)) -#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32)) /* return values for search_by_key and clones */ #define ITEM_FOUND 1 @@ -2949,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); void reiserfs_schedule_old_flush(struct super_block *s); +void reiserfs_cancel_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); @@ -3099,7 +3099,6 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, } void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); -void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index feabcde..685f1e0 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -89,11 +89,27 @@ static void flush_old_commits(struct work_struct *work) sbi = container_of(work, struct reiserfs_sb_info, old_work.work); s = sbi->s_journal->j_work_sb; + /* + * We need s_umount for protecting quota writeback. We have to use + * trylock as reiserfs_cancel_old_flush() may be waiting for this work + * to complete with s_umount held. + */ + if (!down_read_trylock(&s->s_umount)) { + /* Requeue work if we are not cancelling it */ + spin_lock(&sbi->old_work_lock); + if (sbi->work_queued == 1) + queue_delayed_work(system_long_wq, &sbi->old_work, HZ); + spin_unlock(&sbi->old_work_lock); + return; + } spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Avoid clobbering the cancel state... */ + if (sbi->work_queued == 1) + sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); reiserfs_sync_fs(s, 1); + up_read(&s->s_umount); } void reiserfs_schedule_old_flush(struct super_block *s) @@ -117,21 +133,22 @@ void reiserfs_schedule_old_flush(struct super_block *s) spin_unlock(&sbi->old_work_lock); } -static void cancel_old_flush(struct super_block *s) +void reiserfs_cancel_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Make sure no new flushes will be queued */ + sbi->work_queued = 2; spin_unlock(&sbi->old_work_lock); + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); } static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; - cancel_old_flush(s); + reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { @@ -152,7 +169,13 @@ static int reiserfs_freeze(struct super_block *s) static int reiserfs_unfreeze(struct super_block *s) { + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + reiserfs_allow_writes(s); + spin_lock(&sbi->old_work_lock); + /* Allow old_work to run again */ + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); return 0; } @@ -547,12 +570,28 @@ static void reiserfs_kill_sb(struct super_block *s) kill_block_super(s); } +#ifdef CONFIG_QUOTA +static int reiserfs_quota_off(struct super_block *sb, int type); + +static void reiserfs_quota_off_umount(struct super_block *s) +{ + int type; + + for (type = 0; type < REISERFS_MAXQUOTAS; type++) + reiserfs_quota_off(s, type); +} +#else +static inline void reiserfs_quota_off_umount(struct super_block *s) +{ +} +#endif + static void reiserfs_put_super(struct super_block *s) { struct reiserfs_transaction_handle th; th.t_trans_id = 0; - dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); + reiserfs_quota_off_umount(s); reiserfs_write_lock(s); @@ -817,7 +856,7 @@ static const struct dquot_operations reiserfs_quota_operations = { static const struct quotactl_ops reiserfs_qctl_operations = { .quota_on = reiserfs_quota_on, - .quota_off = dquot_quota_off, + .quota_off = reiserfs_quota_off, .quota_sync = dquot_quota_sync, .get_state = dquot_get_state, .set_info = dquot_set_dqinfo, @@ -2194,7 +2233,7 @@ error_unlocked: if (sbi->commit_wq) destroy_workqueue(sbi->commit_wq); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + reiserfs_cancel_old_flush(s); reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) @@ -2405,12 +2444,47 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, goto out; } reiserfs_write_unlock(sb); - return dquot_quota_on(sb, type, format_id, path); + err = dquot_quota_on(sb, type, format_id, path); + if (!err) { + inode_lock(inode); + REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL | + REISERFS_NOATIME_FL; + inode_set_flags(inode, S_IMMUTABLE | S_NOATIME, + S_IMMUTABLE | S_NOATIME); + inode_unlock(inode); + mark_inode_dirty(inode); + } + return err; out: reiserfs_write_unlock(sb); return err; } +static int reiserfs_quota_off(struct super_block *sb, int type) +{ + int err; + struct inode *inode = sb_dqopt(sb)->files[type]; + + if (!inode || !igrab(inode)) + goto out; + + err = dquot_quota_off(sb, type); + if (err) + goto out_put; + + inode_lock(inode); + REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL | + REISERFS_NOATIME_FL); + inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME); + inode_unlock(inode); + mark_inode_dirty(inode); +out_put: + iput(inode); + return err; +out: + return dquot_quota_off(sb, type); +} + /* * Read data from quotafile - avoid pagecache and such because we cannot afford * acquiring the locks... As quota files are never truncated and quota code diff --git a/fs/udf/file.c b/fs/udf/file.c index e04cc0c..f5eb2d5 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -44,12 +44,12 @@ static void __udf_adinicb_readpage(struct page *page) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); - kaddr = kmap(page); + kaddr = kmap_atomic(page); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size); memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size); flush_dcache_page(page); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); } static int udf_adinicb_readpage(struct file *file, struct page *page) @@ -70,11 +70,11 @@ static int udf_adinicb_writepage(struct page *page, BUG_ON(!PageLocked(page)); - kaddr = kmap(page); + kaddr = kmap_atomic(page); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); - mark_inode_dirty(inode); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); + mark_inode_dirty(inode); unlock_page(page); return 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a8d8f71..98c510e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -276,14 +276,14 @@ int udf_expand_file_adinicb(struct inode *inode) return -ENOMEM; if (!PageUptodate(page)) { - kaddr = kmap(page); + kaddr = kmap_atomic(page); memset(kaddr + iinfo->i_lenAlloc, 0x00, PAGE_SIZE - iinfo->i_lenAlloc); memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, iinfo->i_lenAlloc); flush_dcache_page(page); SetPageUptodate(page); - kunmap(page); + kunmap_atomic(kaddr); } down_write(&iinfo->i_data_sem); memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, @@ -300,11 +300,11 @@ int udf_expand_file_adinicb(struct inode *inode) if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); - kaddr = kmap(page); down_write(&iinfo->i_data_sem); + kaddr = kmap_atomic(page); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); - kunmap(page); + kunmap_atomic(kaddr); unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; @@ -1535,7 +1535,7 @@ reread: inode->i_data.a_ops = &udf_symlink_aops; inode->i_op = &udf_symlink_inode_operations; inode_nohighmem(inode); - inode->i_mode = S_IFLNK | S_IRWXUGO; + inode->i_mode = S_IFLNK | 0777; break; case ICBTAG_FILE_TYPE_MAIN: udf_debug("METADATA FILE-----\n"); @@ -1591,9 +1591,9 @@ static umode_t udf_convert_permissions(struct fileEntry *fe) permissions = le32_to_cpu(fe->permissions); flags = le16_to_cpu(fe->icbTag.flags); - mode = ((permissions) & S_IRWXO) | - ((permissions >> 2) & S_IRWXG) | - ((permissions >> 4) & S_IRWXU) | + mode = ((permissions) & 0007) | + ((permissions >> 2) & 0070) | + ((permissions >> 4) & 0700) | ((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) | ((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) | ((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0); @@ -1669,9 +1669,9 @@ static int udf_update_inode(struct inode *inode, int do_sync) else fe->gid = cpu_to_le32(i_gid_read(inode)); - udfperms = ((inode->i_mode & S_IRWXO)) | - ((inode->i_mode & S_IRWXG) << 2) | - ((inode->i_mode & S_IRWXU) << 4); + udfperms = ((inode->i_mode & 0007)) | + ((inode->i_mode & 0070) << 2) | + ((inode->i_mode & 0700) << 4); udfperms |= (le32_to_cpu(fe->permissions) & (FE_PERM_O_DELETE | FE_PERM_O_CHATTR | diff --git a/fs/udf/namei.c b/fs/udf/namei.c index babf48d..385ee89 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -906,7 +906,7 @@ out: static int udf_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO); + struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777); struct pathComponent *pc; const char *compstart; struct extent_position epos = {}; |