summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/inode.c22
-rw-r--r--fs/ext2/ioctl.c1
-rw-r--r--fs/ext2/super.c87
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/inode.c26
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/super.c74
-rw-r--r--fs/inode.c3
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_imap.c1
-rw-r--r--fs/jfs/jfs_inode.c18
-rw-r--r--fs/jfs/jfs_inode.h1
-rw-r--r--fs/jfs/super.c79
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/notify/Makefile4
-rw-r--r--fs/notify/dnotify/dnotify.c25
-rw-r--r--fs/notify/fanotify/fanotify.c26
-rw-r--r--fs/notify/fanotify/fanotify.h1
-rw-r--r--fs/notify/fanotify/fanotify_user.c77
-rw-r--r--fs/notify/fdinfo.c16
-rw-r--r--fs/notify/fsnotify.c107
-rw-r--r--fs/notify/fsnotify.h48
-rw-r--r--fs/notify/group.c20
-rw-r--r--fs/notify/inode_mark.c199
-rw-r--r--fs/notify/inotify/inotify.h4
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c18
-rw-r--r--fs/notify/inotify/inotify_user.c81
-rw-r--r--fs/notify/mark.c642
-rw-r--r--fs/notify/vfsmount_mark.c108
-rw-r--r--fs/quota/dquot.c31
-rw-r--r--fs/reiserfs/inode.c31
-rw-r--r--fs/reiserfs/ioctl.c1
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/lbalance.c2
-rw-r--r--fs/reiserfs/reiserfs.h3
-rw-r--r--fs/reiserfs/super.c92
-rw-r--r--fs/udf/file.c10
-rw-r--r--fs/udf/inode.c22
-rw-r--r--fs/udf/namei.c2
41 files changed, 1015 insertions, 883 deletions
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 5e64de9..03f5ce1 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -779,7 +779,6 @@ extern void ext2_evict_inode(struct inode *);
extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern int ext2_setattr (struct dentry *, struct iattr *);
extern void ext2_set_inode_flags(struct inode *inode);
-extern void ext2_get_inode_flags(struct ext2_inode_info *);
extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
@@ -796,7 +795,8 @@ void ext2_error(struct super_block *, const char *, const char *, ...);
extern __printf(3, 4)
void ext2_msg(struct super_block *, const char *, const char *, ...);
extern void ext2_update_dynamic_rev (struct super_block *sb);
-extern void ext2_write_super (struct super_block *);
+extern void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
+ int wait);
/*
* Inodes and files operations
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 128cce5..3a38c1b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1384,25 +1384,6 @@ void ext2_set_inode_flags(struct inode *inode)
inode->i_flags |= S_DAX;
}
-/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
-void ext2_get_inode_flags(struct ext2_inode_info *ei)
-{
- unsigned int flags = ei->vfs_inode.i_flags;
-
- ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
- EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
- if (flags & S_SYNC)
- ei->i_flags |= EXT2_SYNC_FL;
- if (flags & S_APPEND)
- ei->i_flags |= EXT2_APPEND_FL;
- if (flags & S_IMMUTABLE)
- ei->i_flags |= EXT2_IMMUTABLE_FL;
- if (flags & S_NOATIME)
- ei->i_flags |= EXT2_NOATIME_FL;
- if (flags & S_DIRSYNC)
- ei->i_flags |= EXT2_DIRSYNC_FL;
-}
-
struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
{
struct ext2_inode_info *ei;
@@ -1563,7 +1544,6 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
if (ei->i_state & EXT2_STATE_NEW)
memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
- ext2_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
if (!(test_opt(sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
@@ -1615,7 +1595,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
EXT2_SET_RO_COMPAT_FEATURE(sb,
EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
spin_unlock(&EXT2_SB(sb)->s_lock);
- ext2_write_super(sb);
+ ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1);
}
}
}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 191e02b..087f122 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -29,7 +29,6 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case EXT2_IOC_GETFLAGS:
- ext2_get_inode_flags(ei);
flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT2_IOC_SETFLAGS: {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9e25a71..8ac673c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -36,8 +36,7 @@
#include "xattr.h"
#include "acl.h"
-static void ext2_sync_super(struct super_block *sb,
- struct ext2_super_block *es, int wait);
+static void ext2_write_super(struct super_block *sb);
static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
static int ext2_sync_fs(struct super_block *sb, int wait);
@@ -123,13 +122,29 @@ void ext2_update_dynamic_rev(struct super_block *sb)
*/
}
+#ifdef CONFIG_QUOTA
+static int ext2_quota_off(struct super_block *sb, int type);
+
+static void ext2_quota_off_umount(struct super_block *sb)
+{
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ ext2_quota_off(sb, type);
+}
+#else
+static inline void ext2_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
static void ext2_put_super (struct super_block * sb)
{
int db_count;
int i;
struct ext2_sb_info *sbi = EXT2_SB(sb);
- dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ ext2_quota_off_umount(sb);
if (sbi->s_mb_cache) {
ext2_xattr_destroy_cache(sbi->s_mb_cache);
@@ -314,10 +329,23 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_QUOTA
static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
+static int ext2_quota_on(struct super_block *sb, int type, int format_id,
+ const struct path *path);
static struct dquot **ext2_get_dquots(struct inode *inode)
{
return EXT2_I(inode)->i_dquot;
}
+
+static const struct quotactl_ops ext2_quotactl_ops = {
+ .quota_on = ext2_quota_on,
+ .quota_off = ext2_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_state = dquot_get_state,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
+};
#endif
static const struct super_operations ext2_sops = {
@@ -1117,7 +1145,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_QUOTA
sb->dq_op = &dquot_operations;
- sb->s_qcop = &dquot_quotactl_ops;
+ sb->s_qcop = &ext2_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
@@ -1194,8 +1222,8 @@ static void ext2_clear_super_error(struct super_block *sb)
}
}
-static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
- int wait)
+void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
+ int wait)
{
ext2_clear_super_error(sb);
spin_lock(&EXT2_SB(sb)->s_lock);
@@ -1270,7 +1298,7 @@ static int ext2_unfreeze(struct super_block *sb)
return 0;
}
-void ext2_write_super(struct super_block *sb)
+static void ext2_write_super(struct super_block *sb)
{
if (!(sb->s_flags & MS_RDONLY))
ext2_sync_fs(sb, 1);
@@ -1548,6 +1576,51 @@ out:
return len - towrite;
}
+static int ext2_quota_on(struct super_block *sb, int type, int format_id,
+ const struct path *path)
+{
+ int err;
+ struct inode *inode;
+
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (err)
+ return err;
+
+ inode = d_inode(path->dentry);
+ inode_lock(inode);
+ EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL;
+ inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+ S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
+static int ext2_quota_off(struct super_block *sb, int type)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ int err;
+
+ if (!inode || !igrab(inode))
+ goto out;
+
+ err = dquot_quota_off(sb, type);
+ if (err)
+ goto out_put;
+
+ inode_lock(inode);
+ EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL);
+ inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+out_put:
+ iput(inode);
+ return err;
+out:
+ return dquot_quota_off(sb, type);
+}
+
#endif
static struct file_system_type ext2_fs_type = {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fb69ee2..f7b465b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2477,7 +2477,6 @@ extern int ext4_truncate(struct inode *);
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
extern void ext4_set_inode_flags(struct inode *);
-extern void ext4_get_inode_flags(struct ext4_inode_info *);
extern int ext4_alloc_da_blocks(struct inode *inode);
extern void ext4_set_aops(struct inode *inode);
extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b9ffa9f..10b574a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4502,31 +4502,6 @@ void ext4_set_inode_flags(struct inode *inode)
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
}
-/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
-void ext4_get_inode_flags(struct ext4_inode_info *ei)
-{
- unsigned int vfs_fl;
- unsigned long old_fl, new_fl;
-
- do {
- vfs_fl = ei->vfs_inode.i_flags;
- old_fl = ei->i_flags;
- new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
- EXT4_DIRSYNC_FL);
- if (vfs_fl & S_SYNC)
- new_fl |= EXT4_SYNC_FL;
- if (vfs_fl & S_APPEND)
- new_fl |= EXT4_APPEND_FL;
- if (vfs_fl & S_IMMUTABLE)
- new_fl |= EXT4_IMMUTABLE_FL;
- if (vfs_fl & S_NOATIME)
- new_fl |= EXT4_NOATIME_FL;
- if (vfs_fl & S_DIRSYNC)
- new_fl |= EXT4_DIRSYNC_FL;
- } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
-}
-
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
{
@@ -4963,7 +4938,6 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
- ext4_get_inode_flags(ei);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a4273dd..184e74e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -500,7 +500,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case EXT4_IOC_GETFLAGS:
- ext4_get_inode_flags(ei);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
@@ -888,7 +887,6 @@ resizefs_out:
struct fsxattr fa;
memset(&fa, 0, sizeof(struct fsxattr));
- ext4_get_inode_flags(ei);
fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
if (ext4_has_feature_project(inode->i_sb)) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9448db..a9c72e3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -839,6 +839,28 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
}
}
+#ifdef CONFIG_QUOTA
+static int ext4_quota_off(struct super_block *sb, int type);
+
+static inline void ext4_quota_off_umount(struct super_block *sb)
+{
+ int type;
+
+ if (ext4_has_feature_quota(sb)) {
+ dquot_disable(sb, -1,
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ } else {
+ /* Use our quota_off function to clear inode flags etc. */
+ for (type = 0; type < EXT4_MAXQUOTAS; type++)
+ ext4_quota_off(sb, type);
+ }
+}
+#else
+static inline void ext4_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -847,7 +869,7 @@ static void ext4_put_super(struct super_block *sb)
int i, err;
ext4_unregister_li_request(sb);
- dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ ext4_quota_off_umount(sb);
flush_workqueue(sbi->rsv_conversion_wq);
destroy_workqueue(sbi->rsv_conversion_wq);
@@ -1218,7 +1240,6 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
const struct path *path);
-static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
@@ -5344,11 +5365,33 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
if (err)
return err;
}
+
lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
err = dquot_quota_on(sb, type, format_id, path);
- if (err)
+ if (err) {
lockdep_set_quota_inode(path->dentry->d_inode,
I_DATA_SEM_NORMAL);
+ } else {
+ struct inode *inode = d_inode(path->dentry);
+ handle_t *handle;
+
+ /*
+ * Set inode flags to prevent userspace from messing with quota
+ * files. If this fails, we return success anyway since quotas
+ * are already enabled and this is not a hard failure.
+ */
+ inode_lock(inode);
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
+ if (IS_ERR(handle))
+ goto unlock_inode;
+ EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
+ inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+ S_NOATIME | S_IMMUTABLE);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+ unlock_inode:
+ inode_unlock(inode);
+ }
return err;
}
@@ -5422,24 +5465,39 @@ static int ext4_quota_off(struct super_block *sb, int type)
{
struct inode *inode = sb_dqopt(sb)->files[type];
handle_t *handle;
+ int err;
/* Force all delayed allocation blocks to be allocated.
* Caller already holds s_umount sem */
if (test_opt(sb, DELALLOC))
sync_filesystem(sb);
- if (!inode)
+ if (!inode || !igrab(inode))
goto out;
- /* Update modification times of quota files when userspace can
- * start looking at them */
+ err = dquot_quota_off(sb, type);
+ if (err)
+ goto out_put;
+
+ inode_lock(inode);
+ /*
+ * Update modification times of quota files when userspace can
+ * start looking at them. If we fail, we return success anyway since
+ * this is not a hard failure and quotas are already disabled.
+ */
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
if (IS_ERR(handle))
- goto out;
+ goto out_unlock;
+ EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
+ inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
-
+out_unlock:
+ inode_unlock(inode);
+out_put:
+ iput(inode);
+ return err;
out:
return dquot_quota_off(sb, type);
}
diff --git a/fs/inode.c b/fs/inode.c
index 88110fd..131b2bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -371,9 +371,6 @@ void inode_init_once(struct inode *inode)
INIT_LIST_HEAD(&inode->i_lru);
address_space_init_once(&inode->i_data);
i_size_ordered_init(inode);
-#ifdef CONFIG_FSNOTIFY
- INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
-#endif
}
EXPORT_SYMBOL(inode_init_once);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index fc89f94..5c5ac5b 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -64,7 +64,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case JFS_IOC_GETFLAGS:
- jfs_get_inode_flags(jfs_inode);
flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
flags = jfs_map_ext2(flags, 0);
return put_user(flags, (int __user *) arg);
@@ -98,7 +97,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Lock against other parallel changes of flags */
inode_lock(inode);
- jfs_get_inode_flags(jfs_inode);
oldflags = jfs_inode->mode2;
/*
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 6aca224..f36ef68 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3148,7 +3148,6 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
else
dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
jfs_ip->saved_gid));
- jfs_get_inode_flags(jfs_ip);
/*
* mode2 is only needed for storing the higher order bits.
* Trust i_mode for the lower order ones
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 375dd25..5e9b7bb 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -45,24 +45,6 @@ void jfs_set_inode_flags(struct inode *inode)
S_DIRSYNC | S_SYNC);
}
-void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
-{
- unsigned int flags = jfs_ip->vfs_inode.i_flags;
-
- jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
- JFS_DIRSYNC_FL | JFS_SYNC_FL);
- if (flags & S_IMMUTABLE)
- jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
- if (flags & S_APPEND)
- jfs_ip->mode2 |= JFS_APPEND_FL;
- if (flags & S_NOATIME)
- jfs_ip->mode2 |= JFS_NOATIME_FL;
- if (flags & S_DIRSYNC)
- jfs_ip->mode2 |= JFS_DIRSYNC_FL;
- if (flags & S_SYNC)
- jfs_ip->mode2 |= JFS_SYNC_FL;
-}
-
/*
* NAME: ialloc()
*
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 9271cfe4..7b0b3a4 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -33,7 +33,6 @@ extern void jfs_truncate(struct inode *);
extern void jfs_truncate_nolock(struct inode *, loff_t);
extern void jfs_free_zero_link(struct inode *);
extern struct dentry *jfs_get_parent(struct dentry *dentry);
-extern void jfs_get_inode_flags(struct jfs_inode_info *);
extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type);
extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c64c257..e8aad7d 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -45,6 +45,7 @@
#include "jfs_acl.h"
#include "jfs_debug.h"
#include "jfs_xattr.h"
+#include "jfs_dinode.h"
MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
@@ -181,6 +182,35 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
+#ifdef CONFIG_QUOTA
+static int jfs_quota_off(struct super_block *sb, int type);
+static int jfs_quota_on(struct super_block *sb, int type, int format_id,
+ const struct path *path);
+
+static void jfs_quota_off_umount(struct super_block *sb)
+{
+ int type;
+
+ for (type = 0; type < MAXQUOTAS; type++)
+ jfs_quota_off(sb, type);
+}
+
+static const struct quotactl_ops jfs_quotactl_ops = {
+ .quota_on = jfs_quota_on,
+ .quota_off = jfs_quota_off,
+ .quota_sync = dquot_quota_sync,
+ .get_state = dquot_get_state,
+ .set_info = dquot_set_dqinfo,
+ .get_dqblk = dquot_get_dqblk,
+ .set_dqblk = dquot_set_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
+};
+#else
+static inline void jfs_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
static void jfs_put_super(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
@@ -188,7 +218,7 @@ static void jfs_put_super(struct super_block *sb)
jfs_info("In jfs_put_super");
- dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ jfs_quota_off_umount(sb);
rc = jfs_umount(sb);
if (rc)
@@ -536,7 +566,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_xattr = jfs_xattr_handlers;
#ifdef CONFIG_QUOTA
sb->dq_op = &dquot_operations;
- sb->s_qcop = &dquot_quotactl_ops;
+ sb->s_qcop = &jfs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
#endif
@@ -840,6 +870,51 @@ static struct dquot **jfs_get_dquots(struct inode *inode)
{
return JFS_IP(inode)->i_dquot;
}
+
+static int jfs_quota_on(struct super_block *sb, int type, int format_id,
+ const struct path *path)
+{
+ int err;
+ struct inode *inode;
+
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (err)
+ return err;
+
+ inode = d_inode(path->dentry);
+ inode_lock(inode);
+ JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL;
+ inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+ S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+
+ return 0;
+}
+
+static int jfs_quota_off(struct super_block *sb, int type)
+{
+ struct inode *inode = sb_dqopt(sb)->files[type];
+ int err;
+
+ if (!inode || !igrab(inode))
+ goto out;
+
+ err = dquot_quota_off(sb, type);
+ if (err)
+ goto out_put;
+
+ inode_lock(inode);
+ JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL);
+ inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+out_put:
+ iput(inode);
+ return err;
+out:
+ return dquot_quota_off(sb, type);
+}
#endif
static const struct super_operations jfs_super_operations = {
diff --git a/fs/mount.h b/fs/mount.h
index 2826543..bf1fda6 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -59,7 +59,7 @@ struct mount {
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
#ifdef CONFIG_FSNOTIFY
- struct hlist_head mnt_fsnotify_marks;
+ struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
#endif
int mnt_id; /* mount identifier */
diff --git a/fs/namespace.c b/fs/namespace.c
index cc1375ef..b3b115b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -236,9 +236,6 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
-#ifdef CONFIG_FSNOTIFY
- INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
-#endif
init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
}
return mnt;
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 96d3420..3e969ae 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,5 +1,5 @@
-obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
- mark.o vfsmount_mark.o fdinfo.o
+obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o mark.o \
+ fdinfo.o
obj-y += dnotify/
obj-y += inotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 5a4ec30..2430a04 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -52,7 +52,7 @@ struct dnotify_mark {
*/
static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
{
- __u32 new_mask, old_mask;
+ __u32 new_mask = 0;
struct dnotify_struct *dn;
struct dnotify_mark *dn_mark = container_of(fsn_mark,
struct dnotify_mark,
@@ -60,17 +60,13 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
assert_spin_locked(&fsn_mark->lock);
- old_mask = fsn_mark->mask;
- new_mask = 0;
for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
- fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
-
- if (old_mask == new_mask)
+ if (fsn_mark->mask == new_mask)
return;
+ fsn_mark->mask = new_mask;
- if (fsn_mark->inode)
- fsnotify_recalc_inode_mask(fsn_mark->inode);
+ fsnotify_recalc_mask(fsn_mark->connector);
}
/*
@@ -86,7 +82,8 @@ static int dnotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, const void *data, int data_type,
- const unsigned char *file_name, u32 cookie)
+ const unsigned char *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info)
{
struct dnotify_mark *dn_mark;
struct dnotify_struct *dn;
@@ -138,6 +135,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
static struct fsnotify_ops dnotify_fsnotify_ops = {
.handle_event = dnotify_handle_event,
+ .free_mark = dnotify_free_mark,
};
/*
@@ -160,7 +158,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
if (!S_ISDIR(inode->i_mode))
return;
- fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
if (!fsn_mark)
return;
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -308,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
/* set up the new_fsn_mark and new_dn_mark */
new_fsn_mark = &new_dn_mark->fsn_mark;
- fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
+ fsnotify_init_mark(new_fsn_mark, dnotify_group);
new_fsn_mark->mask = mask;
new_dn_mark->dn = NULL;
@@ -316,13 +314,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
mutex_lock(&dnotify_group->mark_mutex);
/* add the new_fsn_mark or find an old one. */
- fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
if (fsn_mark) {
dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
} else {
- fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode,
- NULL, 0);
+ fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0);
spin_lock(&new_fsn_mark->lock);
fsn_mark = new_fsn_mark;
dn_mark = new_dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e5f7e47..2fa99ae 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -57,14 +57,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response(struct fsnotify_group *group,
- struct fanotify_perm_event_info *event)
+ struct fanotify_perm_event_info *event,
+ struct fsnotify_iter_info *iter_info)
{
int ret;
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+ /*
+ * fsnotify_prepare_user_wait() fails if we race with mark deletion.
+ * Just let the operation pass in that case.
+ */
+ if (!fsnotify_prepare_user_wait(iter_info)) {
+ event->response = FAN_ALLOW;
+ goto out;
+ }
+
wait_event(group->fanotify_data.access_waitq, event->response);
+ fsnotify_finish_user_wait(iter_info);
+out:
/* userspace responded, convert to something usable */
switch (event->response) {
case FAN_ALLOW:
@@ -174,7 +186,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *fanotify_mark,
u32 mask, const void *data, int data_type,
- const unsigned char *file_name, u32 cookie)
+ const unsigned char *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info)
{
int ret = 0;
struct fanotify_event_info *event;
@@ -215,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (mask & FAN_ALL_PERM_EVENTS) {
- ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event));
+ ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
+ iter_info);
fsnotify_destroy_event(group, fsn_event);
}
#endif
@@ -248,8 +262,14 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
kmem_cache_free(fanotify_event_cachep, event);
}
+static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+ kmem_cache_free(fanotify_mark_cache, fsn_mark);
+}
+
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
.free_group_priv = fanotify_free_group_priv,
.free_event = fanotify_free_event,
+ .free_mark = fanotify_free_mark,
};
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 4500a74..4eb6f5e 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -2,6 +2,7 @@
#include <linux/path.h>
#include <linux/slab.h>
+extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2b37f27..907a481 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -41,7 +41,7 @@
extern const struct fsnotify_ops fanotify_fsnotify_ops;
-static struct kmem_cache *fanotify_mark_cache __read_mostly;
+struct kmem_cache *fanotify_mark_cache __read_mostly;
struct kmem_cache *fanotify_event_cachep __read_mostly;
struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
@@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
}
ret = copy_event_to_user(group, kevent, buf);
+ if (unlikely(ret == -EOPENSTALE)) {
+ /*
+ * We cannot report events with stale fd so drop it.
+ * Setting ret to 0 will continue the event loop and
+ * do the right thing if there are no more events to
+ * read (i.e. return bytes read, -EAGAIN or wait).
+ */
+ ret = 0;
+ }
+
/*
* Permission events get queued to wait for response. Other
* events can be destroyed now.
*/
if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) {
fsnotify_destroy_event(group, kevent);
- if (ret < 0)
- break;
} else {
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- if (ret < 0) {
+ if (ret <= 0) {
FANOTIFY_PE(kevent)->response = FAN_DENY;
wake_up(&group->fanotify_data.access_waitq);
- break;
+ } else {
+ spin_lock(&group->notification_lock);
+ list_add_tail(&kevent->list,
+ &group->fanotify_data.access_list);
+ spin_unlock(&group->notification_lock);
}
- spin_lock(&group->notification_lock);
- list_add_tail(&kevent->list,
- &group->fanotify_data.access_list);
- spin_unlock(&group->notification_lock);
#endif
}
+ if (ret < 0)
+ break;
buf += ret;
count -= ret;
}
@@ -445,11 +455,6 @@ static const struct file_operations fanotify_fops = {
.llseek = noop_llseek,
};
-static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
-{
- kmem_cache_free(fanotify_mark_cache, fsn_mark);
-}
-
static int fanotify_find_path(int dfd, const char __user *filename,
struct path *path, unsigned int flags)
{
@@ -511,13 +516,12 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
tmask &= ~FAN_ONDIR;
oldmask = fsn_mark->mask;
- fsnotify_set_mark_mask_locked(fsn_mark, tmask);
+ fsn_mark->mask = tmask;
} else {
__u32 tmask = fsn_mark->ignored_mask & ~mask;
if (flags & FAN_MARK_ONDIR)
tmask &= ~FAN_ONDIR;
-
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+ fsn_mark->ignored_mask = tmask;
}
*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
spin_unlock(&fsn_mark->lock);
@@ -534,7 +538,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
int destroy_mark;
mutex_lock(&group->mark_mutex);
- fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+ fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
+ group);
if (!fsn_mark) {
mutex_unlock(&group->mark_mutex);
return -ENOENT;
@@ -542,6 +547,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
&destroy_mark);
+ if (removed & real_mount(mnt)->mnt_fsnotify_mask)
+ fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
if (destroy_mark)
fsnotify_detach_mark(fsn_mark);
mutex_unlock(&group->mark_mutex);
@@ -549,9 +556,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
fsnotify_free_mark(fsn_mark);
fsnotify_put_mark(fsn_mark);
- if (removed & real_mount(mnt)->mnt_fsnotify_mask)
- fsnotify_recalc_vfsmount_mask(mnt);
-
return 0;
}
@@ -564,7 +568,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
int destroy_mark;
mutex_lock(&group->mark_mutex);
- fsn_mark = fsnotify_find_inode_mark(group, inode);
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark) {
mutex_unlock(&group->mark_mutex);
return -ENOENT;
@@ -572,16 +576,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
&destroy_mark);
+ if (removed & inode->i_fsnotify_mask)
+ fsnotify_recalc_mask(inode->i_fsnotify_marks);
if (destroy_mark)
fsnotify_detach_mark(fsn_mark);
mutex_unlock(&group->mark_mutex);
if (destroy_mark)
fsnotify_free_mark(fsn_mark);
- /* matches the fsnotify_find_inode_mark() */
+ /* matches the fsnotify_find_mark() */
fsnotify_put_mark(fsn_mark);
- if (removed & inode->i_fsnotify_mask)
- fsnotify_recalc_inode_mask(inode);
return 0;
}
@@ -600,13 +604,13 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
tmask |= FAN_ONDIR;
oldmask = fsn_mark->mask;
- fsnotify_set_mark_mask_locked(fsn_mark, tmask);
+ fsn_mark->mask = tmask;
} else {
__u32 tmask = fsn_mark->ignored_mask | mask;
if (flags & FAN_MARK_ONDIR)
tmask |= FAN_ONDIR;
- fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+ fsn_mark->ignored_mask = tmask;
if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
}
@@ -629,8 +633,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
if (!mark)
return ERR_PTR(-ENOMEM);
- fsnotify_init_mark(mark, fanotify_free_mark);
- ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0);
+ fsnotify_init_mark(mark, group);
+ ret = fsnotify_add_mark_locked(mark, inode, mnt, 0);
if (ret) {
fsnotify_put_mark(mark);
return ERR_PTR(ret);
@@ -648,7 +652,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
__u32 added;
mutex_lock(&group->mark_mutex);
- fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+ fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
+ group);
if (!fsn_mark) {
fsn_mark = fanotify_add_new_mark(group, NULL, mnt);
if (IS_ERR(fsn_mark)) {
@@ -657,10 +662,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
}
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
- mutex_unlock(&group->mark_mutex);
-
if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
- fsnotify_recalc_vfsmount_mask(mnt);
+ fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+ mutex_unlock(&group->mark_mutex);
fsnotify_put_mark(fsn_mark);
return 0;
@@ -686,7 +690,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
return 0;
mutex_lock(&group->mark_mutex);
- fsn_mark = fsnotify_find_inode_mark(group, inode);
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark) {
fsn_mark = fanotify_add_new_mark(group, inode, NULL);
if (IS_ERR(fsn_mark)) {
@@ -695,10 +699,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
}
}
added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
- mutex_unlock(&group->mark_mutex);
-
if (added & ~inode->i_fsnotify_mask)
- fsnotify_recalc_inode_mask(inode);
+ fsnotify_recalc_mask(inode->i_fsnotify_marks);
+ mutex_unlock(&group->mark_mutex);
fsnotify_put_mark(fsn_mark);
return 0;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index fd98e51..dd63aa9 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -76,12 +76,11 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
struct inotify_inode_mark *inode_mark;
struct inode *inode;
- if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
- !(mark->flags & FSNOTIFY_MARK_FLAG_INODE))
+ if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE))
return;
inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
- inode = igrab(mark->inode);
+ inode = igrab(mark->connector->inode);
if (inode) {
/*
* IN_ALL_EVENTS represents all of the mask bits
@@ -113,14 +112,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
unsigned int mflags = 0;
struct inode *inode;
- if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
- return;
-
if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
- if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
- inode = igrab(mark->inode);
+ if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = igrab(mark->connector->inode);
if (!inode)
return;
seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
@@ -129,8 +125,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
show_mark_fhandle(m, inode);
seq_putc(m, '\n');
iput(inode);
- } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
- struct mount *mnt = real_mount(mark->mnt);
+ } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ struct mount *mnt = real_mount(mark->connector->mnt);
seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index b41515d..01a9f0f 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -41,6 +41,63 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
fsnotify_clear_marks_by_mount(mnt);
}
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
+ * @sb: superblock being unmounted.
+ *
+ * Called during unmount with no locks held, so needs to be safe against
+ * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
+ */
+void fsnotify_unmount_inodes(struct super_block *sb)
+{
+ struct inode *inode, *iput_inode = NULL;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ /*
+ * We cannot __iget() an inode in state I_FREEING,
+ * I_WILL_FREE, or I_NEW which is fine because by that point
+ * the inode cannot have any associated watches.
+ */
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ /*
+ * If i_count is zero, the inode cannot have any watches and
+ * doing an __iget/iput with MS_ACTIVE clear would actually
+ * evict all inodes with zero i_count from icache which is
+ * unnecessarily violent and may in fact be illegal to do.
+ */
+ if (!atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&sb->s_inode_list_lock);
+
+ if (iput_inode)
+ iput(iput_inode);
+
+ /* for each watch, send FS_UNMOUNT and then remove it */
+ fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+ fsnotify_inode_delete(inode);
+
+ iput_inode = inode;
+
+ spin_lock(&sb->s_inode_list_lock);
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+
+ if (iput_inode)
+ iput(iput_inode);
+}
+
/*
* Given an inode, first check if we care what happens to our children. Inotify
* and dnotify both tell their parents about events. If we care about any event
@@ -127,7 +184,8 @@ static int send_to_group(struct inode *to_tell,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, const void *data,
int data_is, u32 cookie,
- const unsigned char *file_name)
+ const unsigned char *file_name,
+ struct fsnotify_iter_info *iter_info)
{
struct fsnotify_group *group = NULL;
__u32 inode_test_mask = 0;
@@ -178,7 +236,7 @@ static int send_to_group(struct inode *to_tell,
return group->ops->handle_event(group, to_tell, inode_mark,
vfsmount_mark, mask, data, data_is,
- file_name, cookie);
+ file_name, cookie, iter_info);
}
/*
@@ -193,8 +251,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
+ struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
+ struct fsnotify_iter_info iter_info;
struct mount *mnt;
- int idx, ret = 0;
+ int ret = 0;
/* global tests shouldn't care about events on child only the specific event */
__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
@@ -210,8 +270,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
* SRCU because we have no references to any objects and do not
* need SRCU to keep them "alive".
*/
- if (hlist_empty(&to_tell->i_fsnotify_marks) &&
- (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks)))
+ if (!to_tell->i_fsnotify_marks &&
+ (!mnt || !mnt->mnt_fsnotify_marks))
return 0;
/*
* if this is a modify event we may need to clear the ignored masks
@@ -223,19 +283,30 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
!(mnt && test_mask & mnt->mnt_fsnotify_mask))
return 0;
- idx = srcu_read_lock(&fsnotify_mark_srcu);
+ iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
if ((mask & FS_MODIFY) ||
- (test_mask & to_tell->i_fsnotify_mask))
- inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+ (test_mask & to_tell->i_fsnotify_mask)) {
+ inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
&fsnotify_mark_srcu);
+ if (inode_conn)
+ inode_node = srcu_dereference(inode_conn->list.first,
+ &fsnotify_mark_srcu);
+ }
if (mnt && ((mask & FS_MODIFY) ||
(test_mask & mnt->mnt_fsnotify_mask))) {
- vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
- &fsnotify_mark_srcu);
- inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+ inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
&fsnotify_mark_srcu);
+ if (inode_conn)
+ inode_node = srcu_dereference(inode_conn->list.first,
+ &fsnotify_mark_srcu);
+ vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
+ &fsnotify_mark_srcu);
+ if (vfsmount_conn)
+ vfsmount_node = srcu_dereference(
+ vfsmount_conn->list.first,
+ &fsnotify_mark_srcu);
}
/*
@@ -272,8 +343,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
vfsmount_mark = NULL;
}
}
+
+ iter_info.inode_mark = inode_mark;
+ iter_info.vfsmount_mark = vfsmount_mark;
+
ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
- data, data_is, cookie, file_name);
+ data, data_is, cookie, file_name,
+ &iter_info);
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
goto out;
@@ -287,12 +363,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
}
ret = 0;
out:
- srcu_read_unlock(&fsnotify_mark_srcu, idx);
+ srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
return ret;
}
EXPORT_SYMBOL_GPL(fsnotify);
+extern struct kmem_cache *fsnotify_mark_connector_cachep;
+
static __init int fsnotify_init(void)
{
int ret;
@@ -303,6 +381,9 @@ static __init int fsnotify_init(void)
if (ret)
panic("initializing fsnotify_mark_srcu");
+ fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
+ SLAB_PANIC);
+
return 0;
}
core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 0a3bc2c..bf012e8 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -8,60 +8,36 @@
#include "../mount.h"
+struct fsnotify_iter_info {
+ struct fsnotify_mark *inode_mark;
+ struct fsnotify_mark *vfsmount_mark;
+ int srcu_idx;
+};
+
/* destroy all events sitting in this groups notification queue */
extern void fsnotify_flush_notify(struct fsnotify_group *group);
/* protects reads of inode and vfsmount marks list */
extern struct srcu_struct fsnotify_mark_srcu;
-/* Calculate mask of events for a list of marks */
-extern u32 fsnotify_recalc_mask(struct hlist_head *head);
-
/* compare two groups for sorting of marks lists */
extern int fsnotify_compare_groups(struct fsnotify_group *a,
struct fsnotify_group *b);
-extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
- __u32 mask);
-/* Add mark to a proper place in mark list */
-extern int fsnotify_add_mark_list(struct hlist_head *head,
- struct fsnotify_mark *mark,
- int allow_dups);
-/* add a mark to an inode */
-extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
- struct fsnotify_group *group, struct inode *inode,
- int allow_dups);
-/* add a mark to a vfsmount */
-extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
- struct fsnotify_group *group, struct vfsmount *mnt,
- int allow_dups);
-
-/* vfsmount specific destruction of a mark */
-extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
-/* inode specific destruction of a mark */
-extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
-/* Find mark belonging to given group in the list of marks */
-extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
- struct fsnotify_group *group);
-/* Destroy all marks in the given list protected by 'lock' */
-extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock);
+/* Destroy all marks connected via given connector */
+extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp);
/* run the list of all marks associated with inode and destroy them */
static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
{
- fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock);
+ fsnotify_destroy_marks(&inode->i_fsnotify_marks);
}
/* run the list of all marks associated with vfsmount and destroy them */
static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
{
- fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks,
- &mnt->mnt_root->d_lock);
+ fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
}
-/* prepare for freeing all marks associated with given group */
-extern void fsnotify_detach_group_marks(struct fsnotify_group *group);
-/*
- * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list
- */
-extern void fsnotify_mark_destroy_list(void);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
/*
* update the dentry->d_flags of all of inode's children to indicate if inode cares
diff --git a/fs/notify/group.c b/fs/notify/group.c
index fbe3cbe..32357534 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -66,14 +66,23 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
*/
fsnotify_group_stop_queueing(group);
- /* clear all inode marks for this group, attach them to destroy_list */
- fsnotify_detach_group_marks(group);
+ /* Clear all marks for this group and queue them for destruction */
+ fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES);
/*
- * Wait for fsnotify_mark_srcu period to end and free all marks in
- * destroy_list
+ * Some marks can still be pinned when waiting for response from
+ * userspace. Wait for those now. fsnotify_prepare_user_wait() will
+ * not succeed now so this wait is race-free.
*/
- fsnotify_mark_destroy_list();
+ wait_event(group->notification_waitq, !atomic_read(&group->user_waits));
+
+ /*
+ * Wait until all marks get really destroyed. We could actually destroy
+ * them ourselves instead of waiting for worker to do it, however that
+ * would be racy as worker can already be processing some marks before
+ * we even entered fsnotify_destroy_group().
+ */
+ fsnotify_wait_marks_destroyed();
/*
* Since we have waited for fsnotify_mark_srcu in
@@ -124,6 +133,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
/* set to 0 when there a no external references to this group */
atomic_set(&group->refcnt, 1);
atomic_set(&group->num_marks, 0);
+ atomic_set(&group->user_waits, 0);
spin_lock_init(&group->notification_lock);
INIT_LIST_HEAD(&group->notification_list);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
deleted file mode 100644
index a364524..0000000
--- a/fs/notify/inode_mark.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-
-#include <linux/atomic.h>
-
-#include <linux/fsnotify_backend.h>
-#include "fsnotify.h"
-
-#include "../internal.h"
-
-/*
- * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
- * any notifier is interested in hearing for this inode.
- */
-void fsnotify_recalc_inode_mask(struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
- spin_unlock(&inode->i_lock);
-
- __fsnotify_update_child_dentry_flags(inode);
-}
-
-void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
-{
- struct inode *inode = mark->inode;
-
- BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
- assert_spin_locked(&mark->lock);
-
- spin_lock(&inode->i_lock);
-
- hlist_del_init_rcu(&mark->obj_list);
- mark->inode = NULL;
-
- /*
- * this mark is now off the inode->i_fsnotify_marks list and we
- * hold the inode->i_lock, so this is the perfect time to update the
- * inode->i_fsnotify_mask
- */
- inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
- spin_unlock(&inode->i_lock);
-}
-
-/*
- * Given a group clear all of the inode marks associated with that group.
- */
-void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
-{
- fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
-}
-
-/*
- * given a group and inode, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
- struct inode *inode)
-{
- struct fsnotify_mark *mark;
-
- spin_lock(&inode->i_lock);
- mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
- spin_unlock(&inode->i_lock);
-
- return mark;
-}
-
-/*
- * If we are setting a mark mask on an inode mark we should pin the inode
- * in memory.
- */
-void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
- __u32 mask)
-{
- struct inode *inode;
-
- assert_spin_locked(&mark->lock);
-
- if (mask &&
- mark->inode &&
- !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
- mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
- inode = igrab(mark->inode);
- /*
- * we shouldn't be able to get here if the inode wasn't
- * already safely held in memory. But bug in case it
- * ever is wrong.
- */
- BUG_ON(!inode);
- }
-}
-
-/*
- * Attach an initialized mark to a given inode.
- * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which group and for which inodes. These
- * marks are ordered according to priority, highest number first, and then by
- * the group's location in memory.
- */
-int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
- struct fsnotify_group *group, struct inode *inode,
- int allow_dups)
-{
- int ret;
-
- mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
-
- BUG_ON(!mutex_is_locked(&group->mark_mutex));
- assert_spin_locked(&mark->lock);
-
- spin_lock(&inode->i_lock);
- mark->inode = inode;
- ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark,
- allow_dups);
- inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
- spin_unlock(&inode->i_lock);
-
- return ret;
-}
-
-/**
- * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
- * @sb: superblock being unmounted.
- *
- * Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
- */
-void fsnotify_unmount_inodes(struct super_block *sb)
-{
- struct inode *inode, *iput_inode = NULL;
-
- spin_lock(&sb->s_inode_list_lock);
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
- /*
- * We cannot __iget() an inode in state I_FREEING,
- * I_WILL_FREE, or I_NEW which is fine because by that point
- * the inode cannot have any associated watches.
- */
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- /*
- * If i_count is zero, the inode cannot have any watches and
- * doing an __iget/iput with MS_ACTIVE clear would actually
- * evict all inodes with zero i_count from icache which is
- * unnecessarily violent and may in fact be illegal to do.
- */
- if (!atomic_read(&inode->i_count)) {
- spin_unlock(&inode->i_lock);
- continue;
- }
-
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&sb->s_inode_list_lock);
-
- if (iput_inode)
- iput(iput_inode);
-
- /* for each watch, send FS_UNMOUNT and then remove it */
- fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
-
- fsnotify_inode_delete(inode);
-
- iput_inode = inode;
-
- spin_lock(&sb->s_inode_list_lock);
- }
- spin_unlock(&sb->s_inode_list_lock);
-
- if (iput_inode)
- iput(iput_inode);
-}
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7c461fd..9ff67b6 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -27,9 +27,11 @@ extern int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, const void *data, int data_type,
- const unsigned char *file_name, u32 cookie);
+ const unsigned char *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info);
extern const struct fsnotify_ops inotify_fsnotify_ops;
+extern struct kmem_cache *inotify_inode_mark_cachep;
#ifdef CONFIG_INOTIFY_USER
static inline void dec_inotify_instances(struct ucounts *ucounts)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1aeb837..8b73332 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -68,7 +68,8 @@ int inotify_handle_event(struct fsnotify_group *group,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
u32 mask, const void *data, int data_type,
- const unsigned char *file_name, u32 cookie)
+ const unsigned char *file_name, u32 cookie,
+ struct fsnotify_iter_info *iter_info)
{
struct inotify_inode_mark *i_mark;
struct inotify_event_info *event;
@@ -156,8 +157,8 @@ static int idr_callback(int id, void *p, void *data)
* BUG() that was here.
*/
if (fsn_mark)
- printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
- fsn_mark->group, fsn_mark->inode, i_mark->wd);
+ printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n",
+ fsn_mark->group, i_mark->wd);
return 0;
}
@@ -175,9 +176,20 @@ static void inotify_free_event(struct fsnotify_event *fsn_event)
kfree(INOTIFY_E(fsn_event));
}
+/* ding dong the mark is dead */
+static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+ struct inotify_inode_mark *i_mark;
+
+ i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
+
+ kmem_cache_free(inotify_inode_mark_cachep, i_mark);
+}
+
const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
.free_group_priv = inotify_free_group_priv,
.free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,
+ .free_mark = inotify_free_mark,
};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 498d609..7cc7d3f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -47,7 +47,7 @@
/* configurable via /proc/sys/fs/inotify/ */
static int inotify_max_queued_events __read_mostly;
-static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
#ifdef CONFIG_SYSCTL
@@ -395,21 +395,6 @@ static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
return i_mark;
}
-static void do_inotify_remove_from_idr(struct fsnotify_group *group,
- struct inotify_inode_mark *i_mark)
-{
- struct idr *idr = &group->inotify_data.idr;
- spinlock_t *idr_lock = &group->inotify_data.idr_lock;
- int wd = i_mark->wd;
-
- assert_spin_locked(idr_lock);
-
- idr_remove(idr, wd);
-
- /* removed from the idr, drop that ref */
- fsnotify_put_mark(&i_mark->fsn_mark);
-}
-
/*
* Remove the mark from the idr (if present) and drop the reference
* on the mark because it was in the idr.
@@ -417,6 +402,7 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group,
static void inotify_remove_from_idr(struct fsnotify_group *group,
struct inotify_inode_mark *i_mark)
{
+ struct idr *idr = &group->inotify_data.idr;
spinlock_t *idr_lock = &group->inotify_data.idr_lock;
struct inotify_inode_mark *found_i_mark = NULL;
int wd;
@@ -429,18 +415,16 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
* if it wasn't....
*/
if (wd == -1) {
- WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
- " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
- i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+ WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+ __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
goto out;
}
/* Lets look in the idr to see if we find it */
found_i_mark = inotify_idr_find_locked(group, wd);
if (unlikely(!found_i_mark)) {
- WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
- " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
- i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+ WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+ __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
goto out;
}
@@ -451,35 +435,33 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
*/
if (unlikely(found_i_mark != i_mark)) {
WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
- "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
- "found_i_mark->group=%p found_i_mark->inode=%p\n",
- __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
- i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd,
- found_i_mark->fsn_mark.group,
- found_i_mark->fsn_mark.inode);
+ "found_i_mark=%p found_i_mark->wd=%d "
+ "found_i_mark->group=%p\n", __func__, i_mark,
+ i_mark->wd, i_mark->fsn_mark.group, found_i_mark,
+ found_i_mark->wd, found_i_mark->fsn_mark.group);
goto out;
}
/*
* One ref for being in the idr
- * one ref held by the caller trying to kill us
* one ref grabbed by inotify_idr_find
*/
- if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
- printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
- " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
- i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+ if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) {
+ printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+ __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
/* we can't really recover with bad ref cnting.. */
BUG();
}
- do_inotify_remove_from_idr(group, i_mark);
+ idr_remove(idr, wd);
+ /* Removed from the idr, drop that ref. */
+ fsnotify_put_mark(&i_mark->fsn_mark);
out:
+ i_mark->wd = -1;
+ spin_unlock(idr_lock);
/* match the ref taken by inotify_idr_find_locked() */
if (found_i_mark)
fsnotify_put_mark(&found_i_mark->fsn_mark);
- i_mark->wd = -1;
- spin_unlock(idr_lock);
}
/*
@@ -492,7 +474,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
/* Queue ignore event for the watch */
inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
- NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
+ NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL);
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
@@ -501,16 +483,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
dec_inotify_watches(group->inotify_data.ucounts);
}
-/* ding dong the mark is dead */
-static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
-{
- struct inotify_inode_mark *i_mark;
-
- i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
-
- kmem_cache_free(inotify_inode_mark_cachep, i_mark);
-}
-
static int inotify_update_existing_watch(struct fsnotify_group *group,
struct inode *inode,
u32 arg)
@@ -524,21 +496,19 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
mask = inotify_arg_to_mask(arg);
- fsn_mark = fsnotify_find_inode_mark(group, inode);
+ fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
if (!fsn_mark)
return -ENOENT;
i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
spin_lock(&fsn_mark->lock);
-
old_mask = fsn_mark->mask;
if (add)
- fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
+ fsn_mark->mask |= mask;
else
- fsnotify_set_mark_mask_locked(fsn_mark, mask);
+ fsn_mark->mask = mask;
new_mask = fsn_mark->mask;
-
spin_unlock(&fsn_mark->lock);
if (old_mask != new_mask) {
@@ -549,7 +519,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
/* update the inode with this new fsn_mark */
if (dropped || do_inode)
- fsnotify_recalc_inode_mask(inode);
+ fsnotify_recalc_mask(inode->i_fsnotify_marks);
}
@@ -578,7 +548,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
if (unlikely(!tmp_i_mark))
return -ENOMEM;
- fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
+ fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
tmp_i_mark->fsn_mark.mask = mask;
tmp_i_mark->wd = -1;
@@ -594,8 +564,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
}
/* we are on the idr, now get on the inode */
- ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
- NULL, 0);
+ ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0);
if (ret) {
/* we failed to get on the inode, get off the idr */
inotify_remove_from_idr(group, tmp_i_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 6043306..9991f88 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -33,7 +33,7 @@
*
* group->mark_mutex
* mark->lock
- * inode->i_lock
+ * mark->connector->lock
*
* group->mark_mutex protects the marks_list anchored inside a given group and
* each mark is hooked via the g_list. It also protects the groups private
@@ -44,14 +44,22 @@
* is assigned to as well as the access to a reference of the inode/vfsmount
* that is being watched by the mark.
*
- * inode->i_lock protects the i_fsnotify_marks list anchored inside a
- * given inode and each mark is hooked via the i_list. (and sorta the
- * free_i_list)
+ * mark->connector->lock protects the list of marks anchored inside an
+ * inode / vfsmount and each mark is hooked via the i_list.
*
+ * A list of notification marks relating to inode / mnt is contained in
+ * fsnotify_mark_connector. That structure is alive as long as there are any
+ * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
+ * detached from fsnotify_mark_connector when last reference to the mark is
+ * dropped. Thus having mark reference is enough to protect mark->connector
+ * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
+ * because we remove mark from g_list before dropping mark reference associated
+ * with that, any mark found through g_list is guaranteed to have
+ * mark->connector set until we drop group->mark_mutex.
*
* LIFETIME:
* Inode marks survive between when they are added to an inode and when their
- * refcnt==0.
+ * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
*
* The inode mark can be cleared for a number of different reasons including:
* - The inode is unlinked for the last time. (fsnotify_inode_remove)
@@ -61,17 +69,6 @@
* - The fsnotify_group associated with the mark is going away and all such marks
* need to be cleaned up. (fsnotify_clear_marks_by_group)
*
- * Worst case we are given an inode and need to clean up all the marks on that
- * inode. We take i_lock and walk the i_fsnotify_marks safely. For each
- * mark on the list we take a reference (so the mark can't disappear under us).
- * We remove that mark form the inode's list of marks and we add this mark to a
- * private list anchored on the stack using i_free_list; we walk i_free_list
- * and before we destroy the mark we make sure that we dont race with a
- * concurrent destroy_group by getting a ref to the marks group and taking the
- * groups mutex.
-
- * Very similarly for freeing by group, except we use free_g_list.
- *
* This has the very interesting property of being able to run concurrently with
* any (or all) other directions.
*/
@@ -94,94 +91,281 @@
#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */
struct srcu_struct fsnotify_mark_srcu;
+struct kmem_cache *fsnotify_mark_connector_cachep;
+
static DEFINE_SPINLOCK(destroy_lock);
static LIST_HEAD(destroy_list);
+static struct fsnotify_mark_connector *connector_destroy_list;
static void fsnotify_mark_destroy_workfn(struct work_struct *work);
static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
+static void fsnotify_connector_destroy_workfn(struct work_struct *work);
+static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
+
void fsnotify_get_mark(struct fsnotify_mark *mark)
{
+ WARN_ON_ONCE(!atomic_read(&mark->refcnt));
atomic_inc(&mark->refcnt);
}
-void fsnotify_put_mark(struct fsnotify_mark *mark)
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
{
- if (atomic_dec_and_test(&mark->refcnt)) {
- if (mark->group)
- fsnotify_put_group(mark->group);
- mark->free_mark(mark);
- }
+ return atomic_inc_not_zero(&mark->refcnt);
}
-/* Calculate mask of events for a list of marks */
-u32 fsnotify_recalc_mask(struct hlist_head *head)
+static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
{
u32 new_mask = 0;
struct fsnotify_mark *mark;
- hlist_for_each_entry(mark, head, obj_list)
- new_mask |= mark->mask;
- return new_mask;
+ assert_spin_locked(&conn->lock);
+ hlist_for_each_entry(mark, &conn->list, obj_list) {
+ if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
+ new_mask |= mark->mask;
+ }
+ if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
+ conn->inode->i_fsnotify_mask = new_mask;
+ else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+ real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask;
}
/*
- * Remove mark from inode / vfsmount list, group list, drop inode reference
- * if we got one.
- *
- * Must be called with group->mark_mutex held.
+ * Calculate mask of events for a list of marks. The caller must make sure
+ * connector and connector->inode cannot disappear under us. Callers achieve
+ * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
+ * list.
*/
-void fsnotify_detach_mark(struct fsnotify_mark *mark)
+void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+{
+ if (!conn)
+ return;
+
+ spin_lock(&conn->lock);
+ __fsnotify_recalc_mask(conn);
+ spin_unlock(&conn->lock);
+ if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
+ __fsnotify_update_child_dentry_flags(conn->inode);
+}
+
+/* Free all connectors queued for freeing once SRCU period ends */
+static void fsnotify_connector_destroy_workfn(struct work_struct *work)
+{
+ struct fsnotify_mark_connector *conn, *free;
+
+ spin_lock(&destroy_lock);
+ conn = connector_destroy_list;
+ connector_destroy_list = NULL;
+ spin_unlock(&destroy_lock);
+
+ synchronize_srcu(&fsnotify_mark_srcu);
+ while (conn) {
+ free = conn;
+ conn = conn->destroy_next;
+ kmem_cache_free(fsnotify_mark_connector_cachep, free);
+ }
+}
+
+static struct inode *fsnotify_detach_connector_from_object(
+ struct fsnotify_mark_connector *conn)
{
struct inode *inode = NULL;
+
+ if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
+ inode = conn->inode;
+ rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
+ inode->i_fsnotify_mask = 0;
+ conn->inode = NULL;
+ conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
+ } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+ rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks,
+ NULL);
+ real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
+ conn->mnt = NULL;
+ conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+ }
+
+ return inode;
+}
+
+static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
+{
struct fsnotify_group *group = mark->group;
- BUG_ON(!mutex_is_locked(&group->mark_mutex));
+ if (WARN_ON_ONCE(!group))
+ return;
+ group->ops->free_mark(mark);
+ fsnotify_put_group(group);
+}
- spin_lock(&mark->lock);
+void fsnotify_put_mark(struct fsnotify_mark *mark)
+{
+ struct fsnotify_mark_connector *conn;
+ struct inode *inode = NULL;
+ bool free_conn = false;
- /* something else already called this function on this mark */
- if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
- spin_unlock(&mark->lock);
+ /* Catch marks that were actually never attached to object */
+ if (!mark->connector) {
+ if (atomic_dec_and_test(&mark->refcnt))
+ fsnotify_final_mark_destroy(mark);
return;
}
- mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
+ /*
+ * We have to be careful so that traversals of obj_list under lock can
+ * safely grab mark reference.
+ */
+ if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+ return;
- if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
- inode = mark->inode;
- fsnotify_destroy_inode_mark(mark);
- } else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
- fsnotify_destroy_vfsmount_mark(mark);
- else
- BUG();
+ conn = mark->connector;
+ hlist_del_init_rcu(&mark->obj_list);
+ if (hlist_empty(&conn->list)) {
+ inode = fsnotify_detach_connector_from_object(conn);
+ free_conn = true;
+ } else {
+ __fsnotify_recalc_mask(conn);
+ }
+ mark->connector = NULL;
+ spin_unlock(&conn->lock);
+
+ iput(inode);
+
+ if (free_conn) {
+ spin_lock(&destroy_lock);
+ conn->destroy_next = connector_destroy_list;
+ connector_destroy_list = conn;
+ spin_unlock(&destroy_lock);
+ queue_work(system_unbound_wq, &connector_reaper_work);
+ }
/*
* Note that we didn't update flags telling whether inode cares about
* what's happening with children. We update these flags from
* __fsnotify_parent() lazily when next event happens on one of our
* children.
*/
+ spin_lock(&destroy_lock);
+ list_add(&mark->g_list, &destroy_list);
+ spin_unlock(&destroy_lock);
+ queue_delayed_work(system_unbound_wq, &reaper_work,
+ FSNOTIFY_REAPER_DELAY);
+}
- list_del_init(&mark->g_list);
+bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+{
+ struct fsnotify_group *group;
- spin_unlock(&mark->lock);
+ if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
+ return false;
+
+ if (iter_info->inode_mark)
+ group = iter_info->inode_mark->group;
+ else
+ group = iter_info->vfsmount_mark->group;
+
+ /*
+ * Since acquisition of mark reference is an atomic op as well, we can
+ * be sure this inc is seen before any effect of refcount increment.
+ */
+ atomic_inc(&group->user_waits);
+
+ if (iter_info->inode_mark) {
+ /* This can fail if mark is being removed */
+ if (!fsnotify_get_mark_safe(iter_info->inode_mark))
+ goto out_wait;
+ }
+ if (iter_info->vfsmount_mark) {
+ if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
+ goto out_inode;
+ }
- if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
- iput(inode);
+ /*
+ * Now that both marks are pinned by refcount in the inode / vfsmount
+ * lists, we can drop SRCU lock, and safely resume the list iteration
+ * once userspace returns.
+ */
+ srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
+
+ return true;
+out_inode:
+ if (iter_info->inode_mark)
+ fsnotify_put_mark(iter_info->inode_mark);
+out_wait:
+ if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+ wake_up(&group->notification_waitq);
+ return false;
+}
+
+void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
+{
+ struct fsnotify_group *group = NULL;
+
+ iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+ if (iter_info->inode_mark) {
+ group = iter_info->inode_mark->group;
+ fsnotify_put_mark(iter_info->inode_mark);
+ }
+ if (iter_info->vfsmount_mark) {
+ group = iter_info->vfsmount_mark->group;
+ fsnotify_put_mark(iter_info->vfsmount_mark);
+ }
+ /*
+ * We abuse notification_waitq on group shutdown for waiting for all
+ * marks pinned when waiting for userspace.
+ */
+ if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+ wake_up(&group->notification_waitq);
+}
+
+/*
+ * Mark mark as detached, remove it from group list. Mark still stays in object
+ * list until its last reference is dropped. Note that we rely on mark being
+ * removed from group list before corresponding reference to it is dropped. In
+ * particular we rely on mark->connector being valid while we hold
+ * group->mark_mutex if we found the mark through g_list.
+ *
+ * Must be called with group->mark_mutex held. The caller must either hold
+ * reference to the mark or be protected by fsnotify_mark_srcu.
+ */
+void fsnotify_detach_mark(struct fsnotify_mark *mark)
+{
+ struct fsnotify_group *group = mark->group;
+
+ WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
+ WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
+ atomic_read(&mark->refcnt) < 1 +
+ !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
+
+ spin_lock(&mark->lock);
+ /* something else already called this function on this mark */
+ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+ spin_unlock(&mark->lock);
+ return;
+ }
+ mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
+ list_del_init(&mark->g_list);
+ spin_unlock(&mark->lock);
atomic_dec(&group->num_marks);
+
+ /* Drop mark reference acquired in fsnotify_add_mark_locked() */
+ fsnotify_put_mark(mark);
}
/*
- * Prepare mark for freeing and add it to the list of marks prepared for
- * freeing. The actual freeing must happen after SRCU period ends and the
- * caller is responsible for this.
+ * Free fsnotify mark. The mark is actually only marked as being freed. The
+ * freeing is actually happening only once last reference to the mark is
+ * dropped from a workqueue which first waits for srcu period end.
*
- * The function returns true if the mark was added to the list of marks for
- * freeing. The function returns false if someone else has already called
- * __fsnotify_free_mark() for the mark.
+ * Caller must have a reference to the mark or be protected by
+ * fsnotify_mark_srcu.
*/
-static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
+void fsnotify_free_mark(struct fsnotify_mark *mark)
{
struct fsnotify_group *group = mark->group;
@@ -189,7 +373,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
/* something else already called this function on this mark */
if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
spin_unlock(&mark->lock);
- return false;
+ return;
}
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
spin_unlock(&mark->lock);
@@ -201,25 +385,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
*/
if (group->ops->freeing_mark)
group->ops->freeing_mark(mark, group);
-
- spin_lock(&destroy_lock);
- list_add(&mark->g_list, &destroy_list);
- spin_unlock(&destroy_lock);
-
- return true;
-}
-
-/*
- * Free fsnotify mark. The freeing is actually happening from a workqueue which
- * first waits for srcu period end. Caller must have a reference to the mark
- * or be protected by fsnotify_mark_srcu.
- */
-void fsnotify_free_mark(struct fsnotify_mark *mark)
-{
- if (__fsnotify_free_mark(mark)) {
- queue_delayed_work(system_unbound_wq, &reaper_work,
- FSNOTIFY_REAPER_DELAY);
- }
}
void fsnotify_destroy_mark(struct fsnotify_mark *mark,
@@ -231,54 +396,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
fsnotify_free_mark(mark);
}
-void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
-{
- struct fsnotify_mark *mark;
-
- while (1) {
- /*
- * We have to be careful since we can race with e.g.
- * fsnotify_clear_marks_by_group() and once we drop 'lock',
- * mark can get removed from the obj_list and destroyed. But
- * we are holding mark reference so mark cannot be freed and
- * calling fsnotify_destroy_mark() more than once is fine.
- */
- spin_lock(lock);
- if (hlist_empty(head)) {
- spin_unlock(lock);
- break;
- }
- mark = hlist_entry(head->first, struct fsnotify_mark, obj_list);
- /*
- * We don't update i_fsnotify_mask / mnt_fsnotify_mask here
- * since inode / mount is going away anyway. So just remove
- * mark from the list.
- */
- hlist_del_init_rcu(&mark->obj_list);
- fsnotify_get_mark(mark);
- spin_unlock(lock);
- fsnotify_destroy_mark(mark, mark->group);
- fsnotify_put_mark(mark);
- }
-}
-
-void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
-{
- assert_spin_locked(&mark->lock);
-
- mark->mask = mask;
-
- if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
- fsnotify_set_inode_mark_mask_locked(mark, mask);
-}
-
-void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
-{
- assert_spin_locked(&mark->lock);
-
- mark->ignored_mask = mask;
-}
-
/*
* Sorting function for lists of fsnotify marks.
*
@@ -315,37 +432,133 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
return -1;
}
-/* Add mark into proper place in given list of marks */
-int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
- int allow_dups)
+static int fsnotify_attach_connector_to_object(
+ struct fsnotify_mark_connector __rcu **connp,
+ struct inode *inode,
+ struct vfsmount *mnt)
+{
+ struct fsnotify_mark_connector *conn;
+
+ conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
+ if (!conn)
+ return -ENOMEM;
+ spin_lock_init(&conn->lock);
+ INIT_HLIST_HEAD(&conn->list);
+ if (inode) {
+ conn->flags = FSNOTIFY_OBJ_TYPE_INODE;
+ conn->inode = igrab(inode);
+ } else {
+ conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+ conn->mnt = mnt;
+ }
+ /*
+ * cmpxchg() provides the barrier so that readers of *connp can see
+ * only initialized structure
+ */
+ if (cmpxchg(connp, NULL, conn)) {
+ /* Someone else created list structure for us */
+ if (inode)
+ iput(inode);
+ kmem_cache_free(fsnotify_mark_connector_cachep, conn);
+ }
+
+ return 0;
+}
+
+/*
+ * Get mark connector, make sure it is alive and return with its lock held.
+ * This is for users that get connector pointer from inode or mount. Users that
+ * hold reference to a mark on the list may directly lock connector->lock as
+ * they are sure list cannot go away under them.
+ */
+static struct fsnotify_mark_connector *fsnotify_grab_connector(
+ struct fsnotify_mark_connector __rcu **connp)
+{
+ struct fsnotify_mark_connector *conn;
+ int idx;
+
+ idx = srcu_read_lock(&fsnotify_mark_srcu);
+ conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
+ if (!conn)
+ goto out;
+ spin_lock(&conn->lock);
+ if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE |
+ FSNOTIFY_OBJ_TYPE_VFSMOUNT))) {
+ spin_unlock(&conn->lock);
+ srcu_read_unlock(&fsnotify_mark_srcu, idx);
+ return NULL;
+ }
+out:
+ srcu_read_unlock(&fsnotify_mark_srcu, idx);
+ return conn;
+}
+
+/*
+ * Add mark into proper place in given list of marks. These marks may be used
+ * for the fsnotify backend to determine which event types should be delivered
+ * to which group and for which inodes. These marks are ordered according to
+ * priority, highest number first, and then by the group's location in memory.
+ */
+static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+ struct inode *inode, struct vfsmount *mnt,
+ int allow_dups)
{
struct fsnotify_mark *lmark, *last = NULL;
+ struct fsnotify_mark_connector *conn;
+ struct fsnotify_mark_connector __rcu **connp;
int cmp;
+ int err = 0;
+
+ if (WARN_ON(!inode && !mnt))
+ return -EINVAL;
+ if (inode)
+ connp = &inode->i_fsnotify_marks;
+ else
+ connp = &real_mount(mnt)->mnt_fsnotify_marks;
+restart:
+ spin_lock(&mark->lock);
+ conn = fsnotify_grab_connector(connp);
+ if (!conn) {
+ spin_unlock(&mark->lock);
+ err = fsnotify_attach_connector_to_object(connp, inode, mnt);
+ if (err)
+ return err;
+ goto restart;
+ }
/* is mark the first mark? */
- if (hlist_empty(head)) {
- hlist_add_head_rcu(&mark->obj_list, head);
- return 0;
+ if (hlist_empty(&conn->list)) {
+ hlist_add_head_rcu(&mark->obj_list, &conn->list);
+ goto added;
}
/* should mark be in the middle of the current list? */
- hlist_for_each_entry(lmark, head, obj_list) {
+ hlist_for_each_entry(lmark, &conn->list, obj_list) {
last = lmark;
- if ((lmark->group == mark->group) && !allow_dups)
- return -EEXIST;
+ if ((lmark->group == mark->group) &&
+ (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
+ !allow_dups) {
+ err = -EEXIST;
+ goto out_err;
+ }
cmp = fsnotify_compare_groups(lmark->group, mark->group);
if (cmp >= 0) {
hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
- return 0;
+ goto added;
}
}
BUG_ON(last == NULL);
/* mark should be the last entry. last is the current last entry */
hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
- return 0;
+added:
+ mark->connector = conn;
+out_err:
+ spin_unlock(&conn->lock);
+ spin_unlock(&mark->lock);
+ return err;
}
/*
@@ -353,10 +566,10 @@ int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
* These marks may be used for the fsnotify backend to determine which
* event types should be delivered to which group.
*/
-int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
- struct fsnotify_group *group, struct inode *inode,
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
struct vfsmount *mnt, int allow_dups)
{
+ struct fsnotify_group *group = mark->group;
int ret = 0;
BUG_ON(inode && mnt);
@@ -367,61 +580,42 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
* LOCKING ORDER!!!!
* group->mark_mutex
* mark->lock
- * inode->i_lock
+ * mark->connector->lock
*/
spin_lock(&mark->lock);
mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
- fsnotify_get_group(group);
- mark->group = group;
list_add(&mark->g_list, &group->marks_list);
atomic_inc(&group->num_marks);
- fsnotify_get_mark(mark); /* for i_list and g_list */
-
- if (inode) {
- ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
- if (ret)
- goto err;
- } else if (mnt) {
- ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
- if (ret)
- goto err;
- } else {
- BUG();
- }
-
- /* this will pin the object if appropriate */
- fsnotify_set_mark_mask_locked(mark, mark->mask);
+ fsnotify_get_mark(mark); /* for g_list */
spin_unlock(&mark->lock);
- if (inode)
- __fsnotify_update_child_dentry_flags(inode);
+ ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
+ if (ret)
+ goto err;
+
+ if (mark->mask)
+ fsnotify_recalc_mask(mark->connector);
return ret;
err:
- mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+ mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
+ FSNOTIFY_MARK_FLAG_ATTACHED);
list_del_init(&mark->g_list);
- fsnotify_put_group(group);
- mark->group = NULL;
atomic_dec(&group->num_marks);
- spin_unlock(&mark->lock);
-
- spin_lock(&destroy_lock);
- list_add(&mark->g_list, &destroy_list);
- spin_unlock(&destroy_lock);
- queue_delayed_work(system_unbound_wq, &reaper_work,
- FSNOTIFY_REAPER_DELAY);
-
+ fsnotify_put_mark(mark);
return ret;
}
-int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
- struct inode *inode, struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
+ struct vfsmount *mnt, int allow_dups)
{
int ret;
+ struct fsnotify_group *group = mark->group;
+
mutex_lock(&group->mark_mutex);
- ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
+ ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups);
mutex_unlock(&group->mark_mutex);
return ret;
}
@@ -430,29 +624,42 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
* Given a list of marks, find the mark associated with given group. If found
* take a reference to that mark and return it, else return NULL.
*/
-struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
- struct fsnotify_group *group)
+struct fsnotify_mark *fsnotify_find_mark(
+ struct fsnotify_mark_connector __rcu **connp,
+ struct fsnotify_group *group)
{
+ struct fsnotify_mark_connector *conn;
struct fsnotify_mark *mark;
- hlist_for_each_entry(mark, head, obj_list) {
- if (mark->group == group) {
+ conn = fsnotify_grab_connector(connp);
+ if (!conn)
+ return NULL;
+
+ hlist_for_each_entry(mark, &conn->list, obj_list) {
+ if (mark->group == group &&
+ (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
fsnotify_get_mark(mark);
+ spin_unlock(&conn->lock);
return mark;
}
}
+ spin_unlock(&conn->lock);
return NULL;
}
-/*
- * clear any marks in a group in which mark->flags & flags is true
- */
-void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
- unsigned int flags)
+/* Clear any marks in a group with given type */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+ unsigned int type)
{
struct fsnotify_mark *lmark, *mark;
LIST_HEAD(to_free);
+ struct list_head *head = &to_free;
+ /* Skip selection step if we want to clear all marks. */
+ if (type == FSNOTIFY_OBJ_ALL_TYPES) {
+ head = &group->marks_list;
+ goto clear;
+ }
/*
* We have to be really careful here. Anytime we drop mark_mutex, e.g.
* fsnotify_clear_marks_by_inode() can come and free marks. Even in our
@@ -464,18 +671,19 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
*/
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
- if (mark->flags & flags)
+ if (mark->connector->flags & type)
list_move(&mark->g_list, &to_free);
}
mutex_unlock(&group->mark_mutex);
+clear:
while (1) {
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
- if (list_empty(&to_free)) {
+ if (list_empty(head)) {
mutex_unlock(&group->mark_mutex);
break;
}
- mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
+ mark = list_first_entry(head, struct fsnotify_mark, g_list);
fsnotify_get_mark(mark);
fsnotify_detach_mark(mark);
mutex_unlock(&group->mark_mutex);
@@ -484,49 +692,62 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
}
}
-/*
- * Given a group, prepare for freeing all the marks associated with that group.
- * The marks are attached to the list of marks prepared for destruction, the
- * caller is responsible for freeing marks in that list after SRCU period has
- * ended.
- */
-void fsnotify_detach_group_marks(struct fsnotify_group *group)
+/* Destroy all marks attached to inode / vfsmount */
+void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp)
{
- struct fsnotify_mark *mark;
+ struct fsnotify_mark_connector *conn;
+ struct fsnotify_mark *mark, *old_mark = NULL;
+ struct inode *inode;
- while (1) {
- mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
- if (list_empty(&group->marks_list)) {
- mutex_unlock(&group->mark_mutex);
- break;
- }
- mark = list_first_entry(&group->marks_list,
- struct fsnotify_mark, g_list);
+ conn = fsnotify_grab_connector(connp);
+ if (!conn)
+ return;
+ /*
+ * We have to be careful since we can race with e.g.
+ * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
+ * list can get modified. However we are holding mark reference and
+ * thus our mark cannot be removed from obj_list so we can continue
+ * iteration after regaining conn->lock.
+ */
+ hlist_for_each_entry(mark, &conn->list, obj_list) {
fsnotify_get_mark(mark);
- fsnotify_detach_mark(mark);
- mutex_unlock(&group->mark_mutex);
- __fsnotify_free_mark(mark);
- fsnotify_put_mark(mark);
+ spin_unlock(&conn->lock);
+ if (old_mark)
+ fsnotify_put_mark(old_mark);
+ old_mark = mark;
+ fsnotify_destroy_mark(mark, mark->group);
+ spin_lock(&conn->lock);
}
+ /*
+ * Detach list from object now so that we don't pin inode until all
+ * mark references get dropped. It would lead to strange results such
+ * as delaying inode deletion or blocking unmount.
+ */
+ inode = fsnotify_detach_connector_from_object(conn);
+ spin_unlock(&conn->lock);
+ if (old_mark)
+ fsnotify_put_mark(old_mark);
+ iput(inode);
}
/*
* Nothing fancy, just initialize lists and locks and counters.
*/
void fsnotify_init_mark(struct fsnotify_mark *mark,
- void (*free_mark)(struct fsnotify_mark *mark))
+ struct fsnotify_group *group)
{
memset(mark, 0, sizeof(*mark));
spin_lock_init(&mark->lock);
atomic_set(&mark->refcnt, 1);
- mark->free_mark = free_mark;
+ fsnotify_get_group(group);
+ mark->group = group;
}
/*
* Destroy all marks in destroy_list, waits for SRCU period to finish before
* actually freeing marks.
*/
-void fsnotify_mark_destroy_list(void)
+static void fsnotify_mark_destroy_workfn(struct work_struct *work)
{
struct fsnotify_mark *mark, *next;
struct list_head private_destroy_list;
@@ -540,11 +761,12 @@ void fsnotify_mark_destroy_list(void)
list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
list_del_init(&mark->g_list);
- fsnotify_put_mark(mark);
+ fsnotify_final_mark_destroy(mark);
}
}
-static void fsnotify_mark_destroy_workfn(struct work_struct *work)
+/* Wait for all marks queued for destruction to be actually destroyed */
+void fsnotify_wait_marks_destroyed(void)
{
- fsnotify_mark_destroy_list();
+ flush_delayed_work(&reaper_work);
}
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
deleted file mode 100644
index a8fcab6..0000000
--- a/fs/notify/vfsmount_mark.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mount.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-
-#include <linux/atomic.h>
-
-#include <linux/fsnotify_backend.h>
-#include "fsnotify.h"
-
-void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
-{
- fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
-}
-
-/*
- * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
- * any notifier is interested in hearing for this mount point
- */
-void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
-{
- struct mount *m = real_mount(mnt);
-
- spin_lock(&mnt->mnt_root->d_lock);
- m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
- spin_unlock(&mnt->mnt_root->d_lock);
-}
-
-void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
-{
- struct vfsmount *mnt = mark->mnt;
- struct mount *m = real_mount(mnt);
-
- BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
- assert_spin_locked(&mark->lock);
-
- spin_lock(&mnt->mnt_root->d_lock);
-
- hlist_del_init_rcu(&mark->obj_list);
- mark->mnt = NULL;
-
- m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
- spin_unlock(&mnt->mnt_root->d_lock);
-}
-
-/*
- * given a group and vfsmount, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
- struct vfsmount *mnt)
-{
- struct mount *m = real_mount(mnt);
- struct fsnotify_mark *mark;
-
- spin_lock(&mnt->mnt_root->d_lock);
- mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group);
- spin_unlock(&mnt->mnt_root->d_lock);
-
- return mark;
-}
-
-/*
- * Attach an initialized mark to a given group and vfsmount.
- * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which groups.
- */
-int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
- struct fsnotify_group *group, struct vfsmount *mnt,
- int allow_dups)
-{
- struct mount *m = real_mount(mnt);
- int ret;
-
- mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
-
- BUG_ON(!mutex_is_locked(&group->mark_mutex));
- assert_spin_locked(&mark->lock);
-
- spin_lock(&mnt->mnt_root->d_lock);
- mark->mnt = mnt;
- ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups);
- m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
- spin_unlock(&mnt->mnt_root->d_lock);
-
- return ret;
-}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 74b489e..ebf80c7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2188,8 +2188,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
/* This can happen when suspending quotas on remount-ro... */
if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) {
inode_lock(toputinode[cnt]);
- toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
- S_NOATIME | S_NOQUOTA);
+ toputinode[cnt]->i_flags &= ~S_NOQUOTA;
truncate_inode_pages(&toputinode[cnt]->i_data, 0);
inode_unlock(toputinode[cnt]);
mark_inode_dirty_sync(toputinode[cnt]);
@@ -2237,7 +2236,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
struct super_block *sb = inode->i_sb;
struct quota_info *dqopt = sb_dqopt(sb);
int error;
- int oldflags = -1;
if (!fmt)
return -ESRCH;
@@ -2285,9 +2283,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
* possible) Also nobody should write to the file - we use
* special IO operations which ignore the immutable bit. */
inode_lock(inode);
- oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
- S_NOQUOTA);
- inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+ inode->i_flags |= S_NOQUOTA;
inode_unlock(inode);
/*
* When S_NOQUOTA is set, remove dquot references as no more
@@ -2329,14 +2325,9 @@ out_file_init:
dqopt->files[type] = NULL;
iput(inode);
out_file_flags:
- if (oldflags != -1) {
- inode_lock(inode);
- /* Set the flags back (in the case of accidental quotaon()
- * on a wrong file we don't want to mess up the flags) */
- inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
- inode->i_flags |= oldflags;
- inode_unlock(inode);
- }
+ inode_lock(inode);
+ inode->i_flags &= ~S_NOQUOTA;
+ inode_unlock(inode);
out_fmt:
put_quota_format(fmt);
@@ -2780,18 +2771,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
}
EXPORT_SYMBOL(dquot_set_dqinfo);
-const struct quotactl_ops dquot_quotactl_ops = {
- .quota_on = dquot_quota_on,
- .quota_off = dquot_quota_off,
- .quota_sync = dquot_quota_sync,
- .get_state = dquot_get_state,
- .set_info = dquot_set_dqinfo,
- .get_dqblk = dquot_get_dqblk,
- .get_nextdqblk = dquot_get_next_dqblk,
- .set_dqblk = dquot_set_dqblk
-};
-EXPORT_SYMBOL(dquot_quotactl_ops);
-
const struct quotactl_ops dquot_quotactl_sysfile_ops = {
.quota_enable = dquot_quota_enable,
.quota_disable = dquot_quota_disable,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6ab9d6..873fc04 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1375,7 +1375,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
static void inode2sd(void *sd, struct inode *inode, loff_t size)
{
struct stat_data *sd_v2 = (struct stat_data *)sd;
- __u16 flags;
set_sd_v2_mode(sd_v2, inode->i_mode);
set_sd_v2_nlink(sd_v2, inode->i_nlink);
@@ -1390,9 +1389,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
else
set_sd_v2_generation(sd_v2, inode->i_generation);
- flags = REISERFS_I(inode)->i_attrs;
- i_attrs_to_sd_attrs(inode, &flags);
- set_sd_v2_attrs(sd_v2, flags);
+ set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs);
}
/* used to copy inode's fields to old stat data */
@@ -2002,10 +1999,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
/* uid and gid must already be set by the caller for quota init */
- /* symlink cannot be immutable or append only, right? */
- if (S_ISLNK(inode->i_mode))
- inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
-
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
inode->i_size = i_size;
inode->i_blocks = 0;
@@ -3095,28 +3088,6 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
}
}
-void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
-{
- if (reiserfs_attrs(inode->i_sb)) {
- if (inode->i_flags & S_IMMUTABLE)
- *sd_attrs |= REISERFS_IMMUTABLE_FL;
- else
- *sd_attrs &= ~REISERFS_IMMUTABLE_FL;
- if (inode->i_flags & S_SYNC)
- *sd_attrs |= REISERFS_SYNC_FL;
- else
- *sd_attrs &= ~REISERFS_SYNC_FL;
- if (inode->i_flags & S_NOATIME)
- *sd_attrs |= REISERFS_NOATIME_FL;
- else
- *sd_attrs &= ~REISERFS_NOATIME_FL;
- if (REISERFS_I(inode)->i_flags & i_nopack_mask)
- *sd_attrs |= REISERFS_NOTAIL_FL;
- else
- *sd_attrs &= ~REISERFS_NOTAIL_FL;
- }
-}
-
/*
* decide if this buffer needs to stay around for data logging or ordered
* write purposes
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 1f4692a..acbbaf7 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -47,7 +47,6 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
flags = REISERFS_I(inode)->i_attrs;
- i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
err = put_user(flags, (int __user *)arg);
break;
case REISERFS_IOC_SETFLAGS:{
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index aa40c24..da01f49 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1961,7 +1961,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
* will be requeued because superblock is being shutdown and doesn't
* have MS_ACTIVE set.
*/
- cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
+ reiserfs_cancel_old_flush(sb);
/* wait for all commits to finish */
cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 249594a..f5cebd7 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -475,7 +475,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
* 'cpy_bytes'; create new item header;
* n_ih = new item_header;
*/
- memcpy(&n_ih, ih, SHORT_KEY_SIZE);
+ memcpy(&n_ih.ih_key, &ih->ih_key, KEY_SIZE);
/* Endian safe, both le */
n_ih.ih_version = ih->ih_version;
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 2adcde1..1d34377 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1326,7 +1326,6 @@ struct cpu_key {
#define KEY_NOT_FOUND 0
#define KEY_SIZE (sizeof(struct reiserfs_key))
-#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
/* return values for search_by_key and clones */
#define ITEM_FOUND 1
@@ -2949,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s,
struct reiserfs_list_bitmap *, unsigned int);
void reiserfs_schedule_old_flush(struct super_block *s);
+void reiserfs_cancel_old_flush(struct super_block *s);
void add_save_link(struct reiserfs_transaction_handle *th,
struct inode *inode, int truncate);
int remove_save_link(struct inode *inode, int truncate);
@@ -3099,7 +3099,6 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
}
void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
-void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index feabcde..685f1e0 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -89,11 +89,27 @@ static void flush_old_commits(struct work_struct *work)
sbi = container_of(work, struct reiserfs_sb_info, old_work.work);
s = sbi->s_journal->j_work_sb;
+ /*
+ * We need s_umount for protecting quota writeback. We have to use
+ * trylock as reiserfs_cancel_old_flush() may be waiting for this work
+ * to complete with s_umount held.
+ */
+ if (!down_read_trylock(&s->s_umount)) {
+ /* Requeue work if we are not cancelling it */
+ spin_lock(&sbi->old_work_lock);
+ if (sbi->work_queued == 1)
+ queue_delayed_work(system_long_wq, &sbi->old_work, HZ);
+ spin_unlock(&sbi->old_work_lock);
+ return;
+ }
spin_lock(&sbi->old_work_lock);
- sbi->work_queued = 0;
+ /* Avoid clobbering the cancel state... */
+ if (sbi->work_queued == 1)
+ sbi->work_queued = 0;
spin_unlock(&sbi->old_work_lock);
reiserfs_sync_fs(s, 1);
+ up_read(&s->s_umount);
}
void reiserfs_schedule_old_flush(struct super_block *s)
@@ -117,21 +133,22 @@ void reiserfs_schedule_old_flush(struct super_block *s)
spin_unlock(&sbi->old_work_lock);
}
-static void cancel_old_flush(struct super_block *s)
+void reiserfs_cancel_old_flush(struct super_block *s)
{
struct reiserfs_sb_info *sbi = REISERFS_SB(s);
- cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
spin_lock(&sbi->old_work_lock);
- sbi->work_queued = 0;
+ /* Make sure no new flushes will be queued */
+ sbi->work_queued = 2;
spin_unlock(&sbi->old_work_lock);
+ cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
}
static int reiserfs_freeze(struct super_block *s)
{
struct reiserfs_transaction_handle th;
- cancel_old_flush(s);
+ reiserfs_cancel_old_flush(s);
reiserfs_write_lock(s);
if (!(s->s_flags & MS_RDONLY)) {
@@ -152,7 +169,13 @@ static int reiserfs_freeze(struct super_block *s)
static int reiserfs_unfreeze(struct super_block *s)
{
+ struct reiserfs_sb_info *sbi = REISERFS_SB(s);
+
reiserfs_allow_writes(s);
+ spin_lock(&sbi->old_work_lock);
+ /* Allow old_work to run again */
+ sbi->work_queued = 0;
+ spin_unlock(&sbi->old_work_lock);
return 0;
}
@@ -547,12 +570,28 @@ static void reiserfs_kill_sb(struct super_block *s)
kill_block_super(s);
}
+#ifdef CONFIG_QUOTA
+static int reiserfs_quota_off(struct super_block *sb, int type);
+
+static void reiserfs_quota_off_umount(struct super_block *s)
+{
+ int type;
+
+ for (type = 0; type < REISERFS_MAXQUOTAS; type++)
+ reiserfs_quota_off(s, type);
+}
+#else
+static inline void reiserfs_quota_off_umount(struct super_block *s)
+{
+}
+#endif
+
static void reiserfs_put_super(struct super_block *s)
{
struct reiserfs_transaction_handle th;
th.t_trans_id = 0;
- dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ reiserfs_quota_off_umount(s);
reiserfs_write_lock(s);
@@ -817,7 +856,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
static const struct quotactl_ops reiserfs_qctl_operations = {
.quota_on = reiserfs_quota_on,
- .quota_off = dquot_quota_off,
+ .quota_off = reiserfs_quota_off,
.quota_sync = dquot_quota_sync,
.get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
@@ -2194,7 +2233,7 @@ error_unlocked:
if (sbi->commit_wq)
destroy_workqueue(sbi->commit_wq);
- cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
+ reiserfs_cancel_old_flush(s);
reiserfs_free_bitmap_cache(s);
if (SB_BUFFER_WITH_SB(s))
@@ -2405,12 +2444,47 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
goto out;
}
reiserfs_write_unlock(sb);
- return dquot_quota_on(sb, type, format_id, path);
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (!err) {
+ inode_lock(inode);
+ REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL |
+ REISERFS_NOATIME_FL;
+ inode_set_flags(inode, S_IMMUTABLE | S_NOATIME,
+ S_IMMUTABLE | S_NOATIME);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+ }
+ return err;
out:
reiserfs_write_unlock(sb);
return err;
}
+static int reiserfs_quota_off(struct super_block *sb, int type)
+{
+ int err;
+ struct inode *inode = sb_dqopt(sb)->files[type];
+
+ if (!inode || !igrab(inode))
+ goto out;
+
+ err = dquot_quota_off(sb, type);
+ if (err)
+ goto out_put;
+
+ inode_lock(inode);
+ REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL |
+ REISERFS_NOATIME_FL);
+ inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME);
+ inode_unlock(inode);
+ mark_inode_dirty(inode);
+out_put:
+ iput(inode);
+ return err;
+out:
+ return dquot_quota_off(sb, type);
+}
+
/*
* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
diff --git a/fs/udf/file.c b/fs/udf/file.c
index e04cc0c..f5eb2d5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -44,12 +44,12 @@ static void __udf_adinicb_readpage(struct page *page)
char *kaddr;
struct udf_inode_info *iinfo = UDF_I(inode);
- kaddr = kmap(page);
+ kaddr = kmap_atomic(page);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
flush_dcache_page(page);
SetPageUptodate(page);
- kunmap(page);
+ kunmap_atomic(kaddr);
}
static int udf_adinicb_readpage(struct file *file, struct page *page)
@@ -70,11 +70,11 @@ static int udf_adinicb_writepage(struct page *page,
BUG_ON(!PageLocked(page));
- kaddr = kmap(page);
+ kaddr = kmap_atomic(page);
memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
- mark_inode_dirty(inode);
SetPageUptodate(page);
- kunmap(page);
+ kunmap_atomic(kaddr);
+ mark_inode_dirty(inode);
unlock_page(page);
return 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a8d8f71..98c510e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -276,14 +276,14 @@ int udf_expand_file_adinicb(struct inode *inode)
return -ENOMEM;
if (!PageUptodate(page)) {
- kaddr = kmap(page);
+ kaddr = kmap_atomic(page);
memset(kaddr + iinfo->i_lenAlloc, 0x00,
PAGE_SIZE - iinfo->i_lenAlloc);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
iinfo->i_lenAlloc);
flush_dcache_page(page);
SetPageUptodate(page);
- kunmap(page);
+ kunmap_atomic(kaddr);
}
down_write(&iinfo->i_data_sem);
memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
@@ -300,11 +300,11 @@ int udf_expand_file_adinicb(struct inode *inode)
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
- kaddr = kmap(page);
down_write(&iinfo->i_data_sem);
+ kaddr = kmap_atomic(page);
memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
inode->i_size);
- kunmap(page);
+ kunmap_atomic(kaddr);
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
@@ -1535,7 +1535,7 @@ reread:
inode->i_data.a_ops = &udf_symlink_aops;
inode->i_op = &udf_symlink_inode_operations;
inode_nohighmem(inode);
- inode->i_mode = S_IFLNK | S_IRWXUGO;
+ inode->i_mode = S_IFLNK | 0777;
break;
case ICBTAG_FILE_TYPE_MAIN:
udf_debug("METADATA FILE-----\n");
@@ -1591,9 +1591,9 @@ static umode_t udf_convert_permissions(struct fileEntry *fe)
permissions = le32_to_cpu(fe->permissions);
flags = le16_to_cpu(fe->icbTag.flags);
- mode = ((permissions) & S_IRWXO) |
- ((permissions >> 2) & S_IRWXG) |
- ((permissions >> 4) & S_IRWXU) |
+ mode = ((permissions) & 0007) |
+ ((permissions >> 2) & 0070) |
+ ((permissions >> 4) & 0700) |
((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) |
((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) |
((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0);
@@ -1669,9 +1669,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
else
fe->gid = cpu_to_le32(i_gid_read(inode));
- udfperms = ((inode->i_mode & S_IRWXO)) |
- ((inode->i_mode & S_IRWXG) << 2) |
- ((inode->i_mode & S_IRWXU) << 4);
+ udfperms = ((inode->i_mode & 0007)) |
+ ((inode->i_mode & 0070) << 2) |
+ ((inode->i_mode & 0700) << 4);
udfperms |= (le32_to_cpu(fe->permissions) &
(FE_PERM_O_DELETE | FE_PERM_O_CHATTR |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index babf48d..385ee89 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -906,7 +906,7 @@ out:
static int udf_symlink(struct inode *dir, struct dentry *dentry,
const char *symname)
{
- struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO);
+ struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777);
struct pathComponent *pc;
const char *compstart;
struct extent_position epos = {};
OpenPOWER on IntegriCloud