41 files changed, 1015 insertions, 883 deletions
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 5e64de9..03f5ce1 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -779,7 +779,6 @@ extern void ext2_evict_inode(struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
-extern void ext2_get_inode_flags(struct ext2_inode_info *);
 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len);
 
@@ -796,7 +795,8 @@ void ext2_error(struct super_block *, const char *, const char *, ...);
 extern __printf(3, 4)
 void ext2_msg(struct super_block *, const char *, const char *, ...);
 extern void ext2_update_dynamic_rev (struct super_block *sb);
-extern void ext2_write_super (struct super_block *);
+extern void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
+			    int wait);
 
 /*
  * Inodes and files operations
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 128cce5..3a38c1b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1384,25 +1384,6 @@ void ext2_set_inode_flags(struct inode *inode)
 		inode->i_flags |= S_DAX;
 }
 
-/* Propagate flags from i_flags to EXT2_I(inode)->i_flags */
-void ext2_get_inode_flags(struct ext2_inode_info *ei)
-{
-	unsigned int flags = ei->vfs_inode.i_flags;
-
-	ei->i_flags &= ~(EXT2_SYNC_FL|EXT2_APPEND_FL|
-			EXT2_IMMUTABLE_FL|EXT2_NOATIME_FL|EXT2_DIRSYNC_FL);
-	if (flags & S_SYNC)
-		ei->i_flags |= EXT2_SYNC_FL;
-	if (flags & S_APPEND)
-		ei->i_flags |= EXT2_APPEND_FL;
-	if (flags & S_IMMUTABLE)
-		ei->i_flags |= EXT2_IMMUTABLE_FL;
-	if (flags & S_NOATIME)
-		ei->i_flags |= EXT2_NOATIME_FL;
-	if (flags & S_DIRSYNC)
-		ei->i_flags |= EXT2_DIRSYNC_FL;
-}
-
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
 	struct ext2_inode_info *ei;
@@ -1563,7 +1544,6 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
 	if (ei->i_state & EXT2_STATE_NEW)
 		memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
 
-	ext2_get_inode_flags(ei);
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
 	if (!(test_opt(sb, NO_UID32))) {
 		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(uid));
@@ -1615,7 +1595,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
 				EXT2_SET_RO_COMPAT_FEATURE(sb,
 					EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
 				spin_unlock(&EXT2_SB(sb)->s_lock);
-				ext2_write_super(sb);
+				ext2_sync_super(sb, EXT2_SB(sb)->s_es, 1);
 			}
 		}
 	}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 191e02b..087f122 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -29,7 +29,6 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case EXT2_IOC_GETFLAGS:
-		ext2_get_inode_flags(ei);
 		flags = ei->i_flags & EXT2_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
 	case EXT2_IOC_SETFLAGS: {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9e25a71..8ac673c 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -36,8 +36,7 @@
 #include "xattr.h"
 #include "acl.h"
 
-static void ext2_sync_super(struct super_block *sb,
-			    struct ext2_super_block *es, int wait);
+static void ext2_write_super(struct super_block *sb);
 static int ext2_remount (struct super_block * sb, int * flags, char * data);
 static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
 static int ext2_sync_fs(struct super_block *sb, int wait);
@@ -123,13 +122,29 @@ void ext2_update_dynamic_rev(struct super_block *sb)
 	 */
 }
 
+#ifdef CONFIG_QUOTA
+static int ext2_quota_off(struct super_block *sb, int type);
+
+static void ext2_quota_off_umount(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		ext2_quota_off(sb, type);
+}
+#else
+static inline void ext2_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
 static void ext2_put_super (struct super_block * sb)
 {
 	int db_count;
 	int i;
 	struct ext2_sb_info *sbi = EXT2_SB(sb);
 
-	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+	ext2_quota_off_umount(sb);
 
 	if (sbi->s_mb_cache) {
 		ext2_xattr_destroy_cache(sbi->s_mb_cache);
@@ -314,10 +329,23 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
 #ifdef CONFIG_QUOTA
 static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
 static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
+static int ext2_quota_on(struct super_block *sb, int type, int format_id,
+			 const struct path *path);
 static struct dquot **ext2_get_dquots(struct inode *inode)
 {
 	return EXT2_I(inode)->i_dquot;
 }
+
+static const struct quotactl_ops ext2_quotactl_ops = {
+	.quota_on	= ext2_quota_on,
+	.quota_off	= ext2_quota_off,
+	.quota_sync	= dquot_quota_sync,
+	.get_state	= dquot_get_state,
+	.set_info	= dquot_set_dqinfo,
+	.get_dqblk	= dquot_get_dqblk,
+	.set_dqblk	= dquot_set_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
+};
 #endif
 
 static const struct super_operations ext2_sops = {
@@ -1117,7 +1145,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &dquot_operations;
-	sb->s_qcop = &dquot_quotactl_ops;
+	sb->s_qcop = &ext2_quotactl_ops;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 
@@ -1194,8 +1222,8 @@ static void ext2_clear_super_error(struct super_block *sb)
 	}
 }
 
-static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
-			    int wait)
+void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
+		     int wait)
 {
 	ext2_clear_super_error(sb);
 	spin_lock(&EXT2_SB(sb)->s_lock);
@@ -1270,7 +1298,7 @@ static int ext2_unfreeze(struct super_block *sb)
 	return 0;
 }
 
-void ext2_write_super(struct super_block *sb)
+static void ext2_write_super(struct super_block *sb)
 {
 	if (!(sb->s_flags & MS_RDONLY))
 		ext2_sync_fs(sb, 1);
@@ -1548,6 +1576,51 @@ out:
 	return len - towrite;
 }
 
+static int ext2_quota_on(struct super_block *sb, int type, int format_id,
+			 const struct path *path)
+{
+	int err;
+	struct inode *inode;
+
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (err)
+		return err;
+
+	inode = d_inode(path->dentry);
+	inode_lock(inode);
+	EXT2_I(inode)->i_flags |= EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL;
+	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+			S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	mark_inode_dirty(inode);
+
+	return 0;
+}
+
+static int ext2_quota_off(struct super_block *sb, int type)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	int err;
+
+	if (!inode || !igrab(inode))
+		goto out;
+
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	EXT2_I(inode)->i_flags &= ~(EXT2_NOATIME_FL | EXT2_IMMUTABLE_FL);
+	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	mark_inode_dirty(inode);
+out_put:
+	iput(inode);
+	return err;
+out:
+	return dquot_quota_off(sb, type);
+}
+
 #endif
 
 static struct file_system_type ext2_fs_type = {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fb69ee2..f7b465b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2477,7 +2477,6 @@ extern int ext4_truncate(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
 extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
 extern void ext4_set_inode_flags(struct inode *);
-extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b9ffa9f..10b574a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4502,31 +4502,6 @@ void ext4_set_inode_flags(struct inode *inode)
 			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
 }
 
-/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
-void ext4_get_inode_flags(struct ext4_inode_info *ei)
-{
-	unsigned int vfs_fl;
-	unsigned long old_fl, new_fl;
-
-	do {
-		vfs_fl = ei->vfs_inode.i_flags;
-		old_fl = ei->i_flags;
-		new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
-				EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
-				EXT4_DIRSYNC_FL);
-		if (vfs_fl & S_SYNC)
-			new_fl |= EXT4_SYNC_FL;
-		if (vfs_fl & S_APPEND)
-			new_fl |= EXT4_APPEND_FL;
-		if (vfs_fl & S_IMMUTABLE)
-			new_fl |= EXT4_IMMUTABLE_FL;
-		if (vfs_fl & S_NOATIME)
-			new_fl |= EXT4_NOATIME_FL;
-		if (vfs_fl & S_DIRSYNC)
-			new_fl |= EXT4_DIRSYNC_FL;
-	} while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
-}
-
 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
 				  struct ext4_inode_info *ei)
 {
@@ -4963,7 +4938,6 @@ static int ext4_do_update_inode(handle_t *handle,
 	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
-	ext4_get_inode_flags(ei);
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
 	i_uid = i_uid_read(inode);
 	i_gid = i_gid_read(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a4273dd..184e74e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -500,7 +500,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case EXT4_IOC_GETFLAGS:
-		ext4_get_inode_flags(ei);
 		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
 	case EXT4_IOC_SETFLAGS: {
@@ -888,7 +887,6 @@ resizefs_out:
 		struct fsxattr fa;
 
 		memset(&fa, 0, sizeof(struct fsxattr));
-		ext4_get_inode_flags(ei);
 		fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
 
 		if (ext4_has_feature_project(inode->i_sb)) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a9448db..a9c72e3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -839,6 +839,28 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 	}
 }
 
+#ifdef CONFIG_QUOTA
+static int ext4_quota_off(struct super_block *sb, int type);
+
+static inline void ext4_quota_off_umount(struct super_block *sb)
+{
+	int type;
+
+	if (ext4_has_feature_quota(sb)) {
+		dquot_disable(sb, -1,
+			      DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+	} else {
+		/* Use our quota_off function to clear inode flags etc. */
+		for (type = 0; type < EXT4_MAXQUOTAS; type++)
+			ext4_quota_off(sb, type);
+	}
+}
+#else
+static inline void ext4_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
 static void ext4_put_super(struct super_block *sb)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -847,7 +869,7 @@ static void ext4_put_super(struct super_block *sb)
 	int i, err;
 
 	ext4_unregister_li_request(sb);
-	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+	ext4_quota_off_umount(sb);
 
 	flush_workqueue(sbi->rsv_conversion_wq);
 	destroy_workqueue(sbi->rsv_conversion_wq);
@@ -1218,7 +1240,6 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
 static int ext4_write_info(struct super_block *sb, int type);
 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 			 const struct path *path);
-static int ext4_quota_off(struct super_block *sb, int type);
 static int ext4_quota_on_mount(struct super_block *sb, int type);
 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off);
@@ -5344,11 +5365,33 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		if (err)
 			return err;
 	}
+
 	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
 	err = dquot_quota_on(sb, type, format_id, path);
-	if (err)
+	if (err) {
 		lockdep_set_quota_inode(path->dentry->d_inode,
 					     I_DATA_SEM_NORMAL);
+	} else {
+		struct inode *inode = d_inode(path->dentry);
+		handle_t *handle;
+
+		/*
+		 * Set inode flags to prevent userspace from messing with quota
+		 * files. If this fails, we return success anyway since quotas
+		 * are already enabled and this is not a hard failure.
+		 */
+		inode_lock(inode);
+		handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
+		if (IS_ERR(handle))
+			goto unlock_inode;
+		EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
+		inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+				S_NOATIME | S_IMMUTABLE);
+		ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_stop(handle);
+	unlock_inode:
+		inode_unlock(inode);
+	}
 	return err;
 }
 
@@ -5422,24 +5465,39 @@ static int ext4_quota_off(struct super_block *sb, int type)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
 	handle_t *handle;
+	int err;
 
 	/* Force all delayed allocation blocks to be allocated.
 	 * Caller already holds s_umount sem */
 	if (test_opt(sb, DELALLOC))
 		sync_filesystem(sb);
 
-	if (!inode)
+	if (!inode || !igrab(inode))
 		goto out;
 
-	/* Update modification times of quota files when userspace can
-	 * start looking at them */
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	/*
+	 * Update modification times of quota files when userspace can
+	 * start looking at them. If we fail, we return success anyway since
+	 * this is not a hard failure and quotas are already disabled.
+	 */
 	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
 	if (IS_ERR(handle))
-		goto out;
+		goto out_unlock;
+	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
+	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
 	inode->i_mtime = inode->i_ctime = current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
 	ext4_journal_stop(handle);
-
+out_unlock:
+	inode_unlock(inode);
+out_put:
+	iput(inode);
+	return err;
 out:
 	return dquot_quota_off(sb, type);
 }
diff --git a/fs/inode.c b/fs/inode.c
index 88110fd..131b2bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -371,9 +371,6 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->i_lru);
 	address_space_init_once(&inode->i_data);
 	i_size_ordered_init(inode);
-#ifdef CONFIG_FSNOTIFY
-	INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
-#endif
 }
 EXPORT_SYMBOL(inode_init_once);
 
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index fc89f94..5c5ac5b 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -64,7 +64,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case JFS_IOC_GETFLAGS:
-		jfs_get_inode_flags(jfs_inode);
 		flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
 		flags = jfs_map_ext2(flags, 0);
 		return put_user(flags, (int __user *) arg);
@@ -98,7 +97,6 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		/* Lock against other parallel changes of flags */
 		inode_lock(inode);
 
-		jfs_get_inode_flags(jfs_inode);
 		oldflags = jfs_inode->mode2;
 
 		/*
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 6aca224..f36ef68 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3148,7 +3148,6 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 	else
 		dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
 						    jfs_ip->saved_gid));
-	jfs_get_inode_flags(jfs_ip);
 	/*
 	 * mode2 is only needed for storing the higher order bits.
 	 * Trust i_mode for the lower order ones
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 375dd25..5e9b7bb 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -45,24 +45,6 @@ void jfs_set_inode_flags(struct inode *inode)
 			S_DIRSYNC | S_SYNC);
 }
 
-void jfs_get_inode_flags(struct jfs_inode_info *jfs_ip)
-{
-	unsigned int flags = jfs_ip->vfs_inode.i_flags;
-
-	jfs_ip->mode2 &= ~(JFS_IMMUTABLE_FL | JFS_APPEND_FL | JFS_NOATIME_FL |
-			   JFS_DIRSYNC_FL | JFS_SYNC_FL);
-	if (flags & S_IMMUTABLE)
-		jfs_ip->mode2 |= JFS_IMMUTABLE_FL;
-	if (flags & S_APPEND)
-		jfs_ip->mode2 |= JFS_APPEND_FL;
-	if (flags & S_NOATIME)
-		jfs_ip->mode2 |= JFS_NOATIME_FL;
-	if (flags & S_DIRSYNC)
-		jfs_ip->mode2 |= JFS_DIRSYNC_FL;
-	if (flags & S_SYNC)
-		jfs_ip->mode2 |= JFS_SYNC_FL;
-}
-
 /*
  * NAME:	ialloc()
  *
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 9271cfe4..7b0b3a4 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -33,7 +33,6 @@ extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
-extern void jfs_get_inode_flags(struct jfs_inode_info *);
 extern struct dentry *jfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 	int fh_len, int fh_type);
 extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index c64c257..e8aad7d 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -45,6 +45,7 @@
 #include "jfs_acl.h"
 #include "jfs_debug.h"
 #include "jfs_xattr.h"
+#include "jfs_dinode.h"
 
 MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
 MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
@@ -181,6 +182,35 @@ static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
+#ifdef CONFIG_QUOTA
+static int jfs_quota_off(struct super_block *sb, int type);
+static int jfs_quota_on(struct super_block *sb, int type, int format_id,
+			const struct path *path);
+
+static void jfs_quota_off_umount(struct super_block *sb)
+{
+	int type;
+
+	for (type = 0; type < MAXQUOTAS; type++)
+		jfs_quota_off(sb, type);
+}
+
+static const struct quotactl_ops jfs_quotactl_ops = {
+	.quota_on	= jfs_quota_on,
+	.quota_off	= jfs_quota_off,
+	.quota_sync	= dquot_quota_sync,
+	.get_state	= dquot_get_state,
+	.set_info	= dquot_set_dqinfo,
+	.get_dqblk	= dquot_get_dqblk,
+	.set_dqblk	= dquot_set_dqblk,
+	.get_nextdqblk	= dquot_get_next_dqblk,
+};
+#else
+static inline void jfs_quota_off_umount(struct super_block *sb)
+{
+}
+#endif
+
 static void jfs_put_super(struct super_block *sb)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
@@ -188,7 +218,7 @@ static void jfs_put_super(struct super_block *sb)
 
 	jfs_info("In jfs_put_super");
 
-	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+	jfs_quota_off_umount(sb);
 
 	rc = jfs_umount(sb);
 	if (rc)
@@ -536,7 +566,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_xattr = jfs_xattr_handlers;
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &dquot_operations;
-	sb->s_qcop = &dquot_quotactl_ops;
+	sb->s_qcop = &jfs_quotactl_ops;
 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 
@@ -840,6 +870,51 @@ static struct dquot **jfs_get_dquots(struct inode *inode)
 {
 	return JFS_IP(inode)->i_dquot;
 }
+
+static int jfs_quota_on(struct super_block *sb, int type, int format_id,
+			const struct path *path)
+{
+	int err;
+	struct inode *inode;
+
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (err)
+		return err;
+
+	inode = d_inode(path->dentry);
+	inode_lock(inode);
+	JFS_IP(inode)->mode2 |= JFS_NOATIME_FL | JFS_IMMUTABLE_FL;
+	inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
+			S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	mark_inode_dirty(inode);
+
+	return 0;
+}
+
+static int jfs_quota_off(struct super_block *sb, int type)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	int err;
+
+	if (!inode || !igrab(inode))
+		goto out;
+
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	JFS_IP(inode)->mode2 &= ~(JFS_NOATIME_FL | JFS_IMMUTABLE_FL);
+	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
+	inode_unlock(inode);
+	mark_inode_dirty(inode);
+out_put:
+	iput(inode);
+	return err;
+out:
+	return dquot_quota_off(sb, type);
+}
 #endif
 
 static const struct super_operations jfs_super_operations = {
diff --git a/fs/mount.h b/fs/mount.h
index 2826543..bf1fda6 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -59,7 +59,7 @@ struct mount {
 	struct mountpoint *mnt_mp;	/* where is it mounted */
 	struct hlist_node mnt_mp_list;	/* list mounts with the same mountpoint */
 #ifdef CONFIG_FSNOTIFY
-	struct hlist_head mnt_fsnotify_marks;
+	struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
 	__u32 mnt_fsnotify_mask;
 #endif
 	int mnt_id;			/* mount identifier */
diff --git a/fs/namespace.c b/fs/namespace.c
index cc1375ef..b3b115b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -236,9 +236,6 @@ static struct mount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
 		INIT_HLIST_NODE(&mnt->mnt_mp_list);
-#ifdef CONFIG_FSNOTIFY
-		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
-#endif
 		init_fs_pin(&mnt->mnt_umount, drop_mountpoint);
 	}
 	return mnt;
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 96d3420..3e969ae 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,5 +1,5 @@
-obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o vfsmount_mark.o fdinfo.o
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o mark.o \
+				   fdinfo.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 5a4ec30..2430a04 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -52,7 +52,7 @@ struct dnotify_mark {
  */
 static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 {
-	__u32 new_mask, old_mask;
+	__u32 new_mask = 0;
 	struct dnotify_struct *dn;
 	struct dnotify_mark *dn_mark  = container_of(fsn_mark,
 						     struct dnotify_mark,
@@ -60,17 +60,13 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 
 	assert_spin_locked(&fsn_mark->lock);
 
-	old_mask = fsn_mark->mask;
-	new_mask = 0;
 	for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
 		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-	fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
-
-	if (old_mask == new_mask)
+	if (fsn_mark->mask == new_mask)
 		return;
+	fsn_mark->mask = new_mask;
 
-	if (fsn_mark->inode)
-		fsnotify_recalc_inode_mask(fsn_mark->inode);
+	fsnotify_recalc_mask(fsn_mark->connector);
 }
 
 /*
@@ -86,7 +82,8 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_mark *inode_mark,
 				struct fsnotify_mark *vfsmount_mark,
 				u32 mask, const void *data, int data_type,
-				const unsigned char *file_name, u32 cookie)
+				const unsigned char *file_name, u32 cookie,
+				struct fsnotify_iter_info *iter_info)
 {
 	struct dnotify_mark *dn_mark;
 	struct dnotify_struct *dn;
@@ -138,6 +135,7 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 
 static struct fsnotify_ops dnotify_fsnotify_ops = {
 	.handle_event = dnotify_handle_event,
+	.free_mark = dnotify_free_mark,
 };
 
 /*
@@ -160,7 +158,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
 	if (!fsn_mark)
 		return;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -308,7 +306,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 
 	/* set up the new_fsn_mark and new_dn_mark */
 	new_fsn_mark = &new_dn_mark->fsn_mark;
-	fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
+	fsnotify_init_mark(new_fsn_mark, dnotify_group);
 	new_fsn_mark->mask = mask;
 	new_dn_mark->dn = NULL;
 
@@ -316,13 +314,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	mutex_lock(&dnotify_group->mark_mutex);
 
 	/* add the new_fsn_mark or find an old one. */
-	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, dnotify_group);
 	if (fsn_mark) {
 		dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 		spin_lock(&fsn_mark->lock);
 	} else {
-		fsnotify_add_mark_locked(new_fsn_mark, dnotify_group, inode,
-					 NULL, 0);
+		fsnotify_add_mark_locked(new_fsn_mark, inode, NULL, 0);
 		spin_lock(&new_fsn_mark->lock);
 		fsn_mark = new_fsn_mark;
 		dn_mark = new_dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index e5f7e47..2fa99ae 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -57,14 +57,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 static int fanotify_get_response(struct fsnotify_group *group,
-				 struct fanotify_perm_event_info *event)
+				 struct fanotify_perm_event_info *event,
+				 struct fsnotify_iter_info *iter_info)
 {
 	int ret;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
+	/*
+	 * fsnotify_prepare_user_wait() fails if we race with mark deletion.
+	 * Just let the operation pass in that case.
+	 */
+	if (!fsnotify_prepare_user_wait(iter_info)) {
+		event->response = FAN_ALLOW;
+		goto out;
+	}
+
 	wait_event(group->fanotify_data.access_waitq, event->response);
 
+	fsnotify_finish_user_wait(iter_info);
+out:
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
 	case FAN_ALLOW:
@@ -174,7 +186,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 				 struct fsnotify_mark *inode_mark,
 				 struct fsnotify_mark *fanotify_mark,
 				 u32 mask, const void *data, int data_type,
-				 const unsigned char *file_name, u32 cookie)
+				 const unsigned char *file_name, u32 cookie,
+				 struct fsnotify_iter_info *iter_info)
 {
 	int ret = 0;
 	struct fanotify_event_info *event;
@@ -215,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	if (mask & FAN_ALL_PERM_EVENTS) {
-		ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event));
+		ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event),
+					    iter_info);
 		fsnotify_destroy_event(group, fsn_event);
 	}
 #endif
@@ -248,8 +262,14 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
 	kmem_cache_free(fanotify_event_cachep, event);
 }
 
+static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+	kmem_cache_free(fanotify_mark_cache, fsn_mark);
+}
+
 const struct fsnotify_ops fanotify_fsnotify_ops = {
 	.handle_event = fanotify_handle_event,
 	.free_group_priv = fanotify_free_group_priv,
 	.free_event = fanotify_free_event,
+	.free_mark = fanotify_free_mark,
 };
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 4500a74..4eb6f5e 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -2,6 +2,7 @@
 #include <linux/path.h>
 #include <linux/slab.h>
 
+extern struct kmem_cache *fanotify_mark_cache;
 extern struct kmem_cache *fanotify_event_cachep;
 extern struct kmem_cache *fanotify_perm_event_cachep;
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 2b37f27..907a481 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -41,7 +41,7 @@
 
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
-static struct kmem_cache *fanotify_mark_cache __read_mostly;
+struct kmem_cache *fanotify_mark_cache __read_mostly;
 struct kmem_cache *fanotify_event_cachep __read_mostly;
 struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 
@@ -295,27 +295,37 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		}
 
 		ret = copy_event_to_user(group, kevent, buf);
+		if (unlikely(ret == -EOPENSTALE)) {
+			/*
+			 * We cannot report events with stale fd so drop it.
+			 * Setting ret to 0 will continue the event loop and
+			 * do the right thing if there are no more events to
+			 * read (i.e. return bytes read, -EAGAIN or wait).
+			 */
+			ret = 0;
+		}
+
 		/*
 		 * Permission events get queued to wait for response.  Other
 		 * events can be destroyed now.
 		 */
 		if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) {
 			fsnotify_destroy_event(group, kevent);
-			if (ret < 0)
-				break;
 		} else {
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
-			if (ret < 0) {
+			if (ret <= 0) {
 				FANOTIFY_PE(kevent)->response = FAN_DENY;
 				wake_up(&group->fanotify_data.access_waitq);
-				break;
+			} else {
+				spin_lock(&group->notification_lock);
+				list_add_tail(&kevent->list,
+					&group->fanotify_data.access_list);
+				spin_unlock(&group->notification_lock);
 			}
-			spin_lock(&group->notification_lock);
-			list_add_tail(&kevent->list,
-				      &group->fanotify_data.access_list);
-			spin_unlock(&group->notification_lock);
 #endif
 		}
+		if (ret < 0)
+			break;
 		buf += ret;
 		count -= ret;
 	}
@@ -445,11 +455,6 @@ static const struct file_operations fanotify_fops = {
 	.llseek		= noop_llseek,
 };
 
-static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
-{
-	kmem_cache_free(fanotify_mark_cache, fsn_mark);
-}
-
 static int fanotify_find_path(int dfd, const char __user *filename,
 			      struct path *path, unsigned int flags)
 {
@@ -511,13 +516,12 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 			tmask &= ~FAN_ONDIR;
 
 		oldmask = fsn_mark->mask;
-		fsnotify_set_mark_mask_locked(fsn_mark, tmask);
+		fsn_mark->mask = tmask;
 	} else {
 		__u32 tmask = fsn_mark->ignored_mask & ~mask;
 		if (flags & FAN_MARK_ONDIR)
 			tmask &= ~FAN_ONDIR;
-
-		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+		fsn_mark->ignored_mask = tmask;
 	}
 	*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
 	spin_unlock(&fsn_mark->lock);
@@ -534,7 +538,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 	int destroy_mark;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
+				      group);
 	if (!fsn_mark) {
 		mutex_unlock(&group->mark_mutex);
 		return -ENOENT;
@@ -542,6 +547,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
+	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
+		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
@@ -549,9 +556,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 		fsnotify_free_mark(fsn_mark);
 
 	fsnotify_put_mark(fsn_mark);
-	if (removed & real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_vfsmount_mask(mnt);
-
 	return 0;
 }
 
@@ -564,7 +568,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	int destroy_mark;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark) {
 		mutex_unlock(&group->mark_mutex);
 		return -ENOENT;
@@ -572,16 +576,16 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags,
 						 &destroy_mark);
+	if (removed & inode->i_fsnotify_mask)
+		fsnotify_recalc_mask(inode->i_fsnotify_marks);
 	if (destroy_mark)
 		fsnotify_detach_mark(fsn_mark);
 	mutex_unlock(&group->mark_mutex);
 	if (destroy_mark)
 		fsnotify_free_mark(fsn_mark);
 
-	/* matches the fsnotify_find_inode_mark() */
+	/* matches the fsnotify_find_mark() */
 	fsnotify_put_mark(fsn_mark);
-	if (removed & inode->i_fsnotify_mask)
-		fsnotify_recalc_inode_mask(inode);
 
 	return 0;
 }
@@ -600,13 +604,13 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 			tmask |= FAN_ONDIR;
 
 		oldmask = fsn_mark->mask;
-		fsnotify_set_mark_mask_locked(fsn_mark, tmask);
+		fsn_mark->mask = tmask;
 	} else {
 		__u32 tmask = fsn_mark->ignored_mask | mask;
 		if (flags & FAN_MARK_ONDIR)
 			tmask |= FAN_ONDIR;
 
-		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+		fsn_mark->ignored_mask = tmask;
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
@@ -629,8 +633,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 	if (!mark)
 		return ERR_PTR(-ENOMEM);
 
-	fsnotify_init_mark(mark, fanotify_free_mark);
-	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, 0);
+	fsnotify_init_mark(mark, group);
+	ret = fsnotify_add_mark_locked(mark, inode, mnt, 0);
 	if (ret) {
 		fsnotify_put_mark(mark);
 		return ERR_PTR(ret);
@@ -648,7 +652,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 	__u32 added;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	fsn_mark = fsnotify_find_mark(&real_mount(mnt)->mnt_fsnotify_marks,
+				      group);
 	if (!fsn_mark) {
 		fsn_mark = fanotify_add_new_mark(group, NULL, mnt);
 		if (IS_ERR(fsn_mark)) {
@@ -657,10 +662,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	mutex_unlock(&group->mark_mutex);
-
 	if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
-		fsnotify_recalc_vfsmount_mask(mnt);
+		fsnotify_recalc_mask(real_mount(mnt)->mnt_fsnotify_marks);
+	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
 	return 0;
@@ -686,7 +690,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		return 0;
 
 	mutex_lock(&group->mark_mutex);
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark) {
 		fsn_mark = fanotify_add_new_mark(group, inode, NULL);
 		if (IS_ERR(fsn_mark)) {
@@ -695,10 +699,9 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	mutex_unlock(&group->mark_mutex);
-
 	if (added & ~inode->i_fsnotify_mask)
-		fsnotify_recalc_inode_mask(inode);
+		fsnotify_recalc_mask(inode->i_fsnotify_marks);
+	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
 	return 0;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index fd98e51..dd63aa9 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -76,12 +76,11 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	struct inotify_inode_mark *inode_mark;
 	struct inode *inode;
 
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE) ||
-	    !(mark->flags & FSNOTIFY_MARK_FLAG_INODE))
+	if (!(mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE))
 		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
-	inode = igrab(mark->inode);
+	inode = igrab(mark->connector->inode);
 	if (inode) {
 		/*
 		 * IN_ALL_EVENTS represents all of the mask bits
@@ -113,14 +112,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	unsigned int mflags = 0;
 	struct inode *inode;
 
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
-		return;
-
 	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
 
-	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
-		inode = igrab(mark->inode);
+	if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_INODE) {
+		inode = igrab(mark->connector->inode);
 		if (!inode)
 			return;
 		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
@@ -129,8 +125,8 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		show_mark_fhandle(m, inode);
 		seq_putc(m, '\n');
 		iput(inode);
-	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
-		struct mount *mnt = real_mount(mark->mnt);
+	} else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+		struct mount *mnt = real_mount(mark->connector->mnt);
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index b41515d..01a9f0f 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -41,6 +41,63 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 	fsnotify_clear_marks_by_mount(mnt);
 }
 
+/**
+ * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
+ * @sb: superblock being unmounted.
+ *
+ * Called during unmount with no locks held, so needs to be safe against
+ * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
+ */
+void fsnotify_unmount_inodes(struct super_block *sb)
+{
+	struct inode *inode, *iput_inode = NULL;
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		/*
+		 * We cannot __iget() an inode in state I_FREEING,
+		 * I_WILL_FREE, or I_NEW which is fine because by that point
+		 * the inode cannot have any associated watches.
+		 */
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+
+		/*
+		 * If i_count is zero, the inode cannot have any watches and
+		 * doing an __iget/iput with MS_ACTIVE clear would actually
+		 * evict all inodes with zero i_count from icache which is
+		 * unnecessarily violent and may in fact be illegal to do.
+		 */
+		if (!atomic_read(&inode->i_count)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+
+		__iget(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+
+		if (iput_inode)
+			iput(iput_inode);
+
+		/* for each watch, send FS_UNMOUNT and then remove it */
+		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+
+		fsnotify_inode_delete(inode);
+
+		iput_inode = inode;
+
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+
+	if (iput_inode)
+		iput(iput_inode);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
@@ -127,7 +184,8 @@ static int send_to_group(struct inode *to_tell,
 			 struct fsnotify_mark *vfsmount_mark,
 			 __u32 mask, const void *data,
 			 int data_is, u32 cookie,
-			 const unsigned char *file_name)
+			 const unsigned char *file_name,
+			 struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_group *group = NULL;
 	__u32 inode_test_mask = 0;
@@ -178,7 +236,7 @@ static int send_to_group(struct inode *to_tell,
 
 	return group->ops->handle_event(group, to_tell, inode_mark,
 					vfsmount_mark, mask, data, data_is,
-					file_name, cookie);
+					file_name, cookie, iter_info);
 }
 
 /*
@@ -193,8 +251,10 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
 	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
 	struct fsnotify_group *inode_group, *vfsmount_group;
+	struct fsnotify_mark_connector *inode_conn, *vfsmount_conn;
+	struct fsnotify_iter_info iter_info;
 	struct mount *mnt;
-	int idx, ret = 0;
+	int ret = 0;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
@@ -210,8 +270,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	 * SRCU because we have no references to any objects and do not
 	 * need SRCU to keep them "alive".
 	 */
-	if (hlist_empty(&to_tell->i_fsnotify_marks) &&
-	    (!mnt || hlist_empty(&mnt->mnt_fsnotify_marks)))
+	if (!to_tell->i_fsnotify_marks &&
+	    (!mnt || !mnt->mnt_fsnotify_marks))
 		return 0;
 	/*
 	 * if this is a modify event we may need to clear the ignored masks
@@ -223,19 +283,30 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	    !(mnt && test_mask & mnt->mnt_fsnotify_mask))
 		return 0;
 
-	idx = srcu_read_lock(&fsnotify_mark_srcu);
+	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 
 	if ((mask & FS_MODIFY) ||
-	    (test_mask & to_tell->i_fsnotify_mask))
-		inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+	    (test_mask & to_tell->i_fsnotify_mask)) {
+		inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
 					      &fsnotify_mark_srcu);
+		if (inode_conn)
+			inode_node = srcu_dereference(inode_conn->list.first,
+						      &fsnotify_mark_srcu);
+	}
 
 	if (mnt && ((mask & FS_MODIFY) ||
 		    (test_mask & mnt->mnt_fsnotify_mask))) {
-		vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
-						 &fsnotify_mark_srcu);
-		inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+		inode_conn = srcu_dereference(to_tell->i_fsnotify_marks,
 					      &fsnotify_mark_srcu);
+		if (inode_conn)
+			inode_node = srcu_dereference(inode_conn->list.first,
+						      &fsnotify_mark_srcu);
+		vfsmount_conn = srcu_dereference(mnt->mnt_fsnotify_marks,
+					         &fsnotify_mark_srcu);
+		if (vfsmount_conn)
+			vfsmount_node = srcu_dereference(
+						vfsmount_conn->list.first,
+						&fsnotify_mark_srcu);
 	}
 
 	/*
@@ -272,8 +343,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 				vfsmount_mark = NULL;
 			}
 		}
+
+		iter_info.inode_mark = inode_mark;
+		iter_info.vfsmount_mark = vfsmount_mark;
+
 		ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
-				    data, data_is, cookie, file_name);
+				    data, data_is, cookie, file_name,
+				    &iter_info);
 
 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
 			goto out;
@@ -287,12 +363,14 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	}
 	ret = 0;
 out:
-	srcu_read_unlock(&fsnotify_mark_srcu, idx);
+	srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(fsnotify);
 
+extern struct kmem_cache *fsnotify_mark_connector_cachep;
+
 static __init int fsnotify_init(void)
 {
 	int ret;
@@ -303,6 +381,9 @@ static __init int fsnotify_init(void)
 	if (ret)
 		panic("initializing fsnotify_mark_srcu");
 
+	fsnotify_mark_connector_cachep = KMEM_CACHE(fsnotify_mark_connector,
+						    SLAB_PANIC);
+
 	return 0;
 }
 core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 0a3bc2c..bf012e8 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -8,60 +8,36 @@
 
 #include "../mount.h"
 
+struct fsnotify_iter_info {
+	struct fsnotify_mark *inode_mark;
+	struct fsnotify_mark *vfsmount_mark;
+	int srcu_idx;
+};
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
 /* protects reads of inode and vfsmount marks list */
 extern struct srcu_struct fsnotify_mark_srcu;
 
-/* Calculate mask of events for a list of marks */
-extern u32 fsnotify_recalc_mask(struct hlist_head *head);
-
 /* compare two groups for sorting of marks lists */
 extern int fsnotify_compare_groups(struct fsnotify_group *a,
 				   struct fsnotify_group *b);
 
-extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
-						__u32 mask);
-/* Add mark to a proper place in mark list */
-extern int fsnotify_add_mark_list(struct hlist_head *head,
-				  struct fsnotify_mark *mark,
-				  int allow_dups);
-/* add a mark to an inode */
-extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
-				   struct fsnotify_group *group, struct inode *inode,
-				   int allow_dups);
-/* add a mark to a vfsmount */
-extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
-				      struct fsnotify_group *group, struct vfsmount *mnt,
-				      int allow_dups);
-
-/* vfsmount specific destruction of a mark */
-extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
-/* inode specific destruction of a mark */
-extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
-/* Find mark belonging to given group in the list of marks */
-extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
-						struct fsnotify_group *group);
-/* Destroy all marks in the given list protected by 'lock' */
-extern void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock);
+/* Destroy all marks connected via given connector */
+extern void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp);
 /* run the list of all marks associated with inode and destroy them */
 static inline void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
-	fsnotify_destroy_marks(&inode->i_fsnotify_marks, &inode->i_lock);
+	fsnotify_destroy_marks(&inode->i_fsnotify_marks);
 }
 /* run the list of all marks associated with vfsmount and destroy them */
 static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
-	fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks,
-			       &mnt->mnt_root->d_lock);
+	fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
 }
-/* prepare for freeing all marks associated with given group */
-extern void fsnotify_detach_group_marks(struct fsnotify_group *group);
-/*
- * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list
- */
-extern void fsnotify_mark_destroy_list(void);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
diff --git a/fs/notify/group.c b/fs/notify/group.c
index fbe3cbe..32357534 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -66,14 +66,23 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 	 */
 	fsnotify_group_stop_queueing(group);
 
-	/* clear all inode marks for this group, attach them to destroy_list */
-	fsnotify_detach_group_marks(group);
+	/* Clear all marks for this group and queue them for destruction */
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_ALL_TYPES);
 
 	/*
-	 * Wait for fsnotify_mark_srcu period to end and free all marks in
-	 * destroy_list
+	 * Some marks can still be pinned when waiting for response from
+	 * userspace. Wait for those now. fsnotify_prepare_user_wait() will
+	 * not succeed now so this wait is race-free.
 	 */
-	fsnotify_mark_destroy_list();
+	wait_event(group->notification_waitq, !atomic_read(&group->user_waits));
+
+	/*
+	 * Wait until all marks get really destroyed. We could actually destroy
+	 * them ourselves instead of waiting for worker to do it, however that
+	 * would be racy as worker can already be processing some marks before
+	 * we even entered fsnotify_destroy_group().
+	 */
+	fsnotify_wait_marks_destroyed();
 
 	/*
 	 * Since we have waited for fsnotify_mark_srcu in
@@ -124,6 +133,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	/* set to 0 when there a no external references to this group */
 	atomic_set(&group->refcnt, 1);
 	atomic_set(&group->num_marks, 0);
+	atomic_set(&group->user_waits, 0);
 
 	spin_lock_init(&group->notification_lock);
 	INIT_LIST_HEAD(&group->notification_list);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
deleted file mode 100644
index a364524..0000000
--- a/fs/notify/inode_mark.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-
-#include <linux/atomic.h>
-
-#include <linux/fsnotify_backend.h>
-#include "fsnotify.h"
-
-#include "../internal.h"
-
-/*
- * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
- * any notifier is interested in hearing for this inode.
- */
-void fsnotify_recalc_inode_mask(struct inode *inode)
-{
-	spin_lock(&inode->i_lock);
-	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
-	spin_unlock(&inode->i_lock);
-
-	__fsnotify_update_child_dentry_flags(inode);
-}
-
-void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
-{
-	struct inode *inode = mark->inode;
-
-	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
-	assert_spin_locked(&mark->lock);
-
-	spin_lock(&inode->i_lock);
-
-	hlist_del_init_rcu(&mark->obj_list);
-	mark->inode = NULL;
-
-	/*
-	 * this mark is now off the inode->i_fsnotify_marks list and we
-	 * hold the inode->i_lock, so this is the perfect time to update the
-	 * inode->i_fsnotify_mask
-	 */
-	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
-	spin_unlock(&inode->i_lock);
-}
-
-/*
- * Given a group clear all of the inode marks associated with that group.
- */
-void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
-{
-	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
-}
-
-/*
- * given a group and inode, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
-					       struct inode *inode)
-{
-	struct fsnotify_mark *mark;
-
-	spin_lock(&inode->i_lock);
-	mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
-	spin_unlock(&inode->i_lock);
-
-	return mark;
-}
-
-/*
- * If we are setting a mark mask on an inode mark we should pin the inode
- * in memory.
- */
-void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
-					 __u32 mask)
-{
-	struct inode *inode;
-
-	assert_spin_locked(&mark->lock);
-
-	if (mask &&
-	    mark->inode &&
-	    !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
-		mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
-		inode = igrab(mark->inode);
-		/*
-		 * we shouldn't be able to get here if the inode wasn't
-		 * already safely held in memory.  But bug in case it
-		 * ever is wrong.
-		 */
-		BUG_ON(!inode);
-	}
-}
-
-/*
- * Attach an initialized mark to a given inode.
- * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which group and for which inodes.  These
- * marks are ordered according to priority, highest number first, and then by
- * the group's location in memory.
- */
-int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
-			    struct fsnotify_group *group, struct inode *inode,
-			    int allow_dups)
-{
-	int ret;
-
-	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
-
-	BUG_ON(!mutex_is_locked(&group->mark_mutex));
-	assert_spin_locked(&mark->lock);
-
-	spin_lock(&inode->i_lock);
-	mark->inode = inode;
-	ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark,
-				     allow_dups);
-	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
-	spin_unlock(&inode->i_lock);
-
-	return ret;
-}
-
-/**
- * fsnotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
- * @sb: superblock being unmounted.
- *
- * Called during unmount with no locks held, so needs to be safe against
- * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
- */
-void fsnotify_unmount_inodes(struct super_block *sb)
-{
-	struct inode *inode, *iput_inode = NULL;
-
-	spin_lock(&sb->s_inode_list_lock);
-	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		/*
-		 * We cannot __iget() an inode in state I_FREEING,
-		 * I_WILL_FREE, or I_NEW which is fine because by that point
-		 * the inode cannot have any associated watches.
-		 */
-		spin_lock(&inode->i_lock);
-		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count)) {
-			spin_unlock(&inode->i_lock);
-			continue;
-		}
-
-		__iget(inode);
-		spin_unlock(&inode->i_lock);
-		spin_unlock(&sb->s_inode_list_lock);
-
-		if (iput_inode)
-			iput(iput_inode);
-
-		/* for each watch, send FS_UNMOUNT and then remove it */
-		fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
-
-		fsnotify_inode_delete(inode);
-
-		iput_inode = inode;
-
-		spin_lock(&sb->s_inode_list_lock);
-	}
-	spin_unlock(&sb->s_inode_list_lock);
-
-	if (iput_inode)
-		iput(iput_inode);
-}
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7c461fd..9ff67b6 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -27,9 +27,11 @@ extern int inotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_mark *inode_mark,
 				struct fsnotify_mark *vfsmount_mark,
 				u32 mask, const void *data, int data_type,
-				const unsigned char *file_name, u32 cookie);
+				const unsigned char *file_name, u32 cookie,
+				struct fsnotify_iter_info *iter_info);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
+extern struct kmem_cache *inotify_inode_mark_cachep;
 
 #ifdef CONFIG_INOTIFY_USER
 static inline void dec_inotify_instances(struct ucounts *ucounts)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1aeb837..8b73332 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -68,7 +68,8 @@ int inotify_handle_event(struct fsnotify_group *group,
 			 struct fsnotify_mark *inode_mark,
 			 struct fsnotify_mark *vfsmount_mark,
 			 u32 mask, const void *data, int data_type,
-			 const unsigned char *file_name, u32 cookie)
+			 const unsigned char *file_name, u32 cookie,
+			 struct fsnotify_iter_info *iter_info)
 {
 	struct inotify_inode_mark *i_mark;
 	struct inotify_event_info *event;
@@ -156,8 +157,8 @@ static int idr_callback(int id, void *p, void *data)
 	 * BUG() that was here.
 	 */
 	if (fsn_mark)
-		printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
-			fsn_mark->group, fsn_mark->inode, i_mark->wd);
+		printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n",
+			fsn_mark->group, i_mark->wd);
 	return 0;
 }
 
@@ -175,9 +176,20 @@ static void inotify_free_event(struct fsnotify_event *fsn_event)
 	kfree(INOTIFY_E(fsn_event));
 }
 
+/* ding dong the mark is dead */
+static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+	struct inotify_inode_mark *i_mark;
+
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
+
+	kmem_cache_free(inotify_inode_mark_cachep, i_mark);
+}
+
 const struct fsnotify_ops inotify_fsnotify_ops = {
 	.handle_event = inotify_handle_event,
 	.free_group_priv = inotify_free_group_priv,
 	.free_event = inotify_free_event,
 	.freeing_mark = inotify_freeing_mark,
+	.free_mark = inotify_free_mark,
 };
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 498d609..7cc7d3f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -47,7 +47,7 @@
 /* configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_queued_events __read_mostly;
 
-static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
+struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 
 #ifdef CONFIG_SYSCTL
 
@@ -395,21 +395,6 @@ static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
 	return i_mark;
 }
 
-static void do_inotify_remove_from_idr(struct fsnotify_group *group,
-				       struct inotify_inode_mark *i_mark)
-{
-	struct idr *idr = &group->inotify_data.idr;
-	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
-	int wd = i_mark->wd;
-
-	assert_spin_locked(idr_lock);
-
-	idr_remove(idr, wd);
-
-	/* removed from the idr, drop that ref */
-	fsnotify_put_mark(&i_mark->fsn_mark);
-}
-
 /*
  * Remove the mark from the idr (if present) and drop the reference
  * on the mark because it was in the idr.
@@ -417,6 +402,7 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group,
 static void inotify_remove_from_idr(struct fsnotify_group *group,
 				    struct inotify_inode_mark *i_mark)
 {
+	struct idr *idr = &group->inotify_data.idr;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
 	struct inotify_inode_mark *found_i_mark = NULL;
 	int wd;
@@ -429,18 +415,16 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 * if it wasn't....
 	 */
 	if (wd == -1) {
-		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
-			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
 		goto out;
 	}
 
 	/* Lets look in the idr to see if we find it */
 	found_i_mark = inotify_idr_find_locked(group, wd);
 	if (unlikely(!found_i_mark)) {
-		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
-			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
 		goto out;
 	}
 
@@ -451,35 +435,33 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 */
 	if (unlikely(found_i_mark != i_mark)) {
 		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
-			"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
-			"found_i_mark->group=%p found_i_mark->inode=%p\n",
-			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
-			i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd,
-			found_i_mark->fsn_mark.group,
-			found_i_mark->fsn_mark.inode);
+			"found_i_mark=%p found_i_mark->wd=%d "
+			"found_i_mark->group=%p\n", __func__, i_mark,
+			i_mark->wd, i_mark->fsn_mark.group, found_i_mark,
+			found_i_mark->wd, found_i_mark->fsn_mark.group);
 		goto out;
 	}
 
 	/*
 	 * One ref for being in the idr
-	 * one ref held by the caller trying to kill us
 	 * one ref grabbed by inotify_idr_find
 	 */
-	if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
-		printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
-			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
+	if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 2)) {
+		printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p\n",
+			 __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group);
 		/* we can't really recover with bad ref cnting.. */
 		BUG();
 	}
 
-	do_inotify_remove_from_idr(group, i_mark);
+	idr_remove(idr, wd);
+	/* Removed from the idr, drop that ref. */
+	fsnotify_put_mark(&i_mark->fsn_mark);
 out:
+	i_mark->wd = -1;
+	spin_unlock(idr_lock);
 	/* match the ref taken by inotify_idr_find_locked() */
 	if (found_i_mark)
 		fsnotify_put_mark(&found_i_mark->fsn_mark);
-	i_mark->wd = -1;
-	spin_unlock(idr_lock);
 }
 
 /*
@@ -492,7 +474,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 
 	/* Queue ignore event for the watch */
 	inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
-			     NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
+			     NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL);
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 	/* remove this mark from the idr */
@@ -501,16 +483,6 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 	dec_inotify_watches(group->inotify_data.ucounts);
 }
 
-/* ding dong the mark is dead */
-static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
-{
-	struct inotify_inode_mark *i_mark;
-
-	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
-
-	kmem_cache_free(inotify_inode_mark_cachep, i_mark);
-}
-
 static int inotify_update_existing_watch(struct fsnotify_group *group,
 					 struct inode *inode,
 					 u32 arg)
@@ -524,21 +496,19 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 
 	mask = inotify_arg_to_mask(arg);
 
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	if (!fsn_mark)
 		return -ENOENT;
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
 	spin_lock(&fsn_mark->lock);
-
 	old_mask = fsn_mark->mask;
 	if (add)
-		fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
+		fsn_mark->mask |= mask;
 	else
-		fsnotify_set_mark_mask_locked(fsn_mark, mask);
+		fsn_mark->mask = mask;
 	new_mask = fsn_mark->mask;
-
 	spin_unlock(&fsn_mark->lock);
 
 	if (old_mask != new_mask) {
@@ -549,7 +519,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 
 		/* update the inode with this new fsn_mark */
 		if (dropped || do_inode)
-			fsnotify_recalc_inode_mask(inode);
+			fsnotify_recalc_mask(inode->i_fsnotify_marks);
 
 	}
 
@@ -578,7 +548,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	if (unlikely(!tmp_i_mark))
 		return -ENOMEM;
 
-	fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
+	fsnotify_init_mark(&tmp_i_mark->fsn_mark, group);
 	tmp_i_mark->fsn_mark.mask = mask;
 	tmp_i_mark->wd = -1;
 
@@ -594,8 +564,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	}
 
 	/* we are on the idr, now get on the inode */
-	ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
-				       NULL, 0);
+	ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, inode, NULL, 0);
 	if (ret) {
 		/* we failed to get on the inode, get off the idr */
 		inotify_remove_from_idr(group, tmp_i_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 6043306..9991f88 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -33,7 +33,7 @@
  *
  * group->mark_mutex
  * mark->lock
- * inode->i_lock
+ * mark->connector->lock
  *
  * group->mark_mutex protects the marks_list anchored inside a given group and
  * each mark is hooked via the g_list.  It also protects the groups private
@@ -44,14 +44,22 @@
  * is assigned to as well as the access to a reference of the inode/vfsmount
  * that is being watched by the mark.
  *
- * inode->i_lock protects the i_fsnotify_marks list anchored inside a
- * given inode and each mark is hooked via the i_list. (and sorta the
- * free_i_list)
+ * mark->connector->lock protects the list of marks anchored inside an
+ * inode / vfsmount and each mark is hooked via the i_list.
  *
+ * A list of notification marks relating to inode / mnt is contained in
+ * fsnotify_mark_connector. That structure is alive as long as there are any
+ * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets
+ * detached from fsnotify_mark_connector when last reference to the mark is
+ * dropped.  Thus having mark reference is enough to protect mark->connector
+ * pointer and to make sure fsnotify_mark_connector cannot disappear. Also
+ * because we remove mark from g_list before dropping mark reference associated
+ * with that, any mark found through g_list is guaranteed to have
+ * mark->connector set until we drop group->mark_mutex.
  *
  * LIFETIME:
  * Inode marks survive between when they are added to an inode and when their
- * refcnt==0.
+ * refcnt==0. Marks are also protected by fsnotify_mark_srcu.
  *
  * The inode mark can be cleared for a number of different reasons including:
  * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
@@ -61,17 +69,6 @@
  * - The fsnotify_group associated with the mark is going away and all such marks
  *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  *
- * Worst case we are given an inode and need to clean up all the marks on that
- * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
- * mark on the list we take a reference (so the mark can't disappear under us).
- * We remove that mark form the inode's list of marks and we add this mark to a
- * private list anchored on the stack using i_free_list; we walk i_free_list
- * and before we destroy the mark we make sure that we dont race with a
- * concurrent destroy_group by getting a ref to the marks group and taking the
- * groups mutex.
-
- * Very similarly for freeing by group, except we use free_g_list.
- *
  * This has the very interesting property of being able to run concurrently with
  * any (or all) other directions.
  */
@@ -94,94 +91,281 @@
 #define FSNOTIFY_REAPER_DELAY	(1)	/* 1 jiffy */
 
 struct srcu_struct fsnotify_mark_srcu;
+struct kmem_cache *fsnotify_mark_connector_cachep;
+
 static DEFINE_SPINLOCK(destroy_lock);
 static LIST_HEAD(destroy_list);
+static struct fsnotify_mark_connector *connector_destroy_list;
 
 static void fsnotify_mark_destroy_workfn(struct work_struct *work);
 static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn);
 
+static void fsnotify_connector_destroy_workfn(struct work_struct *work);
+static DECLARE_WORK(connector_reaper_work, fsnotify_connector_destroy_workfn);
+
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
+	WARN_ON_ONCE(!atomic_read(&mark->refcnt));
 	atomic_inc(&mark->refcnt);
 }
 
-void fsnotify_put_mark(struct fsnotify_mark *mark)
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
 {
-	if (atomic_dec_and_test(&mark->refcnt)) {
-		if (mark->group)
-			fsnotify_put_group(mark->group);
-		mark->free_mark(mark);
-	}
+	return atomic_inc_not_zero(&mark->refcnt);
 }
 
-/* Calculate mask of events for a list of marks */
-u32 fsnotify_recalc_mask(struct hlist_head *head)
+static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
 	struct fsnotify_mark *mark;
 
-	hlist_for_each_entry(mark, head, obj_list)
-		new_mask |= mark->mask;
-	return new_mask;
+	assert_spin_locked(&conn->lock);
+	hlist_for_each_entry(mark, &conn->list, obj_list) {
+		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)
+			new_mask |= mark->mask;
+	}
+	if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
+		conn->inode->i_fsnotify_mask = new_mask;
+	else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+		real_mount(conn->mnt)->mnt_fsnotify_mask = new_mask;
 }
 
 /*
- * Remove mark from inode / vfsmount list, group list, drop inode reference
- * if we got one.
- *
- * Must be called with group->mark_mutex held.
+ * Calculate mask of events for a list of marks. The caller must make sure
+ * connector and connector->inode cannot disappear under us.  Callers achieve
+ * this by holding a mark->lock or mark->group->mark_mutex for a mark on this
+ * list.
  */
-void fsnotify_detach_mark(struct fsnotify_mark *mark)
+void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
+{
+	if (!conn)
+		return;
+
+	spin_lock(&conn->lock);
+	__fsnotify_recalc_mask(conn);
+	spin_unlock(&conn->lock);
+	if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE)
+		__fsnotify_update_child_dentry_flags(conn->inode);
+}
+
+/* Free all connectors queued for freeing once SRCU period ends */
+static void fsnotify_connector_destroy_workfn(struct work_struct *work)
+{
+	struct fsnotify_mark_connector *conn, *free;
+
+	spin_lock(&destroy_lock);
+	conn = connector_destroy_list;
+	connector_destroy_list = NULL;
+	spin_unlock(&destroy_lock);
+
+	synchronize_srcu(&fsnotify_mark_srcu);
+	while (conn) {
+		free = conn;
+		conn = conn->destroy_next;
+		kmem_cache_free(fsnotify_mark_connector_cachep, free);
+	}
+}
+
+static struct inode *fsnotify_detach_connector_from_object(
+					struct fsnotify_mark_connector *conn)
 {
 	struct inode *inode = NULL;
+
+	if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) {
+		inode = conn->inode;
+		rcu_assign_pointer(inode->i_fsnotify_marks, NULL);
+		inode->i_fsnotify_mask = 0;
+		conn->inode = NULL;
+		conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE;
+	} else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
+		rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks,
+				   NULL);
+		real_mount(conn->mnt)->mnt_fsnotify_mask = 0;
+		conn->mnt = NULL;
+		conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+	}
+
+	return inode;
+}
+
+static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
+{
 	struct fsnotify_group *group = mark->group;
 
-	BUG_ON(!mutex_is_locked(&group->mark_mutex));
+	if (WARN_ON_ONCE(!group))
+		return;
+	group->ops->free_mark(mark);
+	fsnotify_put_group(group);
+}
 
-	spin_lock(&mark->lock);
+void fsnotify_put_mark(struct fsnotify_mark *mark)
+{
+	struct fsnotify_mark_connector *conn;
+	struct inode *inode = NULL;
+	bool free_conn = false;
 
-	/* something else already called this function on this mark */
-	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
-		spin_unlock(&mark->lock);
+	/* Catch marks that were actually never attached to object */
+	if (!mark->connector) {
+		if (atomic_dec_and_test(&mark->refcnt))
+			fsnotify_final_mark_destroy(mark);
 		return;
 	}
 
-	mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
+	/*
+	 * We have to be careful so that traversals of obj_list under lock can
+	 * safely grab mark reference.
+	 */
+	if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock))
+		return;
 
-	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
-		inode = mark->inode;
-		fsnotify_destroy_inode_mark(mark);
-	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
-		fsnotify_destroy_vfsmount_mark(mark);
-	else
-		BUG();
+	conn = mark->connector;
+	hlist_del_init_rcu(&mark->obj_list);
+	if (hlist_empty(&conn->list)) {
+		inode = fsnotify_detach_connector_from_object(conn);
+		free_conn = true;
+	} else {
+		__fsnotify_recalc_mask(conn);
+	}
+	mark->connector = NULL;
+	spin_unlock(&conn->lock);
+
+	iput(inode);
+
+	if (free_conn) {
+		spin_lock(&destroy_lock);
+		conn->destroy_next = connector_destroy_list;
+		connector_destroy_list = conn;
+		spin_unlock(&destroy_lock);
+		queue_work(system_unbound_wq, &connector_reaper_work);
+	}
 	/*
 	 * Note that we didn't update flags telling whether inode cares about
 	 * what's happening with children. We update these flags from
 	 * __fsnotify_parent() lazily when next event happens on one of our
 	 * children.
 	 */
+	spin_lock(&destroy_lock);
+	list_add(&mark->g_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	queue_delayed_work(system_unbound_wq, &reaper_work,
+			   FSNOTIFY_REAPER_DELAY);
+}
 
-	list_del_init(&mark->g_list);
+bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+{
+	struct fsnotify_group *group;
 
-	spin_unlock(&mark->lock);
+	if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
+		return false;
+
+	if (iter_info->inode_mark)
+		group = iter_info->inode_mark->group;
+	else
+		group = iter_info->vfsmount_mark->group;
+
+	/*
+	 * Since acquisition of mark reference is an atomic op as well, we can
+	 * be sure this inc is seen before any effect of refcount increment.
+	 */
+	atomic_inc(&group->user_waits);
+
+	if (iter_info->inode_mark) {
+		/* This can fail if mark is being removed */
+		if (!fsnotify_get_mark_safe(iter_info->inode_mark))
+			goto out_wait;
+	}
+	if (iter_info->vfsmount_mark) {
+		if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
+			goto out_inode;
+	}
 
-	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
-		iput(inode);
+	/*
+	 * Now that both marks are pinned by refcount in the inode / vfsmount
+	 * lists, we can drop SRCU lock, and safely resume the list iteration
+	 * once userspace returns.
+	 */
+	srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
+
+	return true;
+out_inode:
+	if (iter_info->inode_mark)
+		fsnotify_put_mark(iter_info->inode_mark);
+out_wait:
+	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+		wake_up(&group->notification_waitq);
+	return false;
+}
+
+void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
+{
+	struct fsnotify_group *group = NULL;
+
+	iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
+	if (iter_info->inode_mark) {
+		group = iter_info->inode_mark->group;
+		fsnotify_put_mark(iter_info->inode_mark);
+	}
+	if (iter_info->vfsmount_mark) {
+		group = iter_info->vfsmount_mark->group;
+		fsnotify_put_mark(iter_info->vfsmount_mark);
+	}
+	/*
+	 * We abuse notification_waitq on group shutdown for waiting for all
+	 * marks pinned when waiting for userspace.
+	 */
+	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+		wake_up(&group->notification_waitq);
+}
+
+/*
+ * Mark mark as detached, remove it from group list. Mark still stays in object
+ * list until its last reference is dropped. Note that we rely on mark being
+ * removed from group list before corresponding reference to it is dropped. In
+ * particular we rely on mark->connector being valid while we hold
+ * group->mark_mutex if we found the mark through g_list.
+ *
+ * Must be called with group->mark_mutex held. The caller must either hold
+ * reference to the mark or be protected by fsnotify_mark_srcu.
+ */
+void fsnotify_detach_mark(struct fsnotify_mark *mark)
+{
+	struct fsnotify_group *group = mark->group;
+
+	WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
+	WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
+		     atomic_read(&mark->refcnt) < 1 +
+			!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
+
+	spin_lock(&mark->lock);
+	/* something else already called this function on this mark */
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+		spin_unlock(&mark->lock);
+		return;
+	}
+	mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED;
+	list_del_init(&mark->g_list);
+	spin_unlock(&mark->lock);
 
 	atomic_dec(&group->num_marks);
+
+	/* Drop mark reference acquired in fsnotify_add_mark_locked() */
+	fsnotify_put_mark(mark);
 }
 
 /*
- * Prepare mark for freeing and add it to the list of marks prepared for
- * freeing. The actual freeing must happen after SRCU period ends and the
- * caller is responsible for this.
+ * Free fsnotify mark. The mark is actually only marked as being freed.  The
+ * freeing is actually happening only once last reference to the mark is
+ * dropped from a workqueue which first waits for srcu period end.
  *
- * The function returns true if the mark was added to the list of marks for
- * freeing. The function returns false if someone else has already called
- * __fsnotify_free_mark() for the mark.
+ * Caller must have a reference to the mark or be protected by
+ * fsnotify_mark_srcu.
  */
-static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
+void fsnotify_free_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_group *group = mark->group;
 
@@ -189,7 +373,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		return false;
+		return;
 	}
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	spin_unlock(&mark->lock);
@@ -201,25 +385,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
 	 */
 	if (group->ops->freeing_mark)
 		group->ops->freeing_mark(mark, group);
-
-	spin_lock(&destroy_lock);
-	list_add(&mark->g_list, &destroy_list);
-	spin_unlock(&destroy_lock);
-
-	return true;
-}
-
-/*
- * Free fsnotify mark. The freeing is actually happening from a workqueue which
- * first waits for srcu period end. Caller must have a reference to the mark
- * or be protected by fsnotify_mark_srcu.
- */
-void fsnotify_free_mark(struct fsnotify_mark *mark)
-{
-	if (__fsnotify_free_mark(mark)) {
-		queue_delayed_work(system_unbound_wq, &reaper_work,
-				   FSNOTIFY_REAPER_DELAY);
-	}
 }
 
 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
@@ -231,54 +396,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	fsnotify_free_mark(mark);
 }
 
-void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
-{
-	struct fsnotify_mark *mark;
-
-	while (1) {
-		/*
-		 * We have to be careful since we can race with e.g.
-		 * fsnotify_clear_marks_by_group() and once we drop 'lock',
-		 * mark can get removed from the obj_list and destroyed. But
-		 * we are holding mark reference so mark cannot be freed and
-		 * calling fsnotify_destroy_mark() more than once is fine.
-		 */
-		spin_lock(lock);
-		if (hlist_empty(head)) {
-			spin_unlock(lock);
-			break;
-		}
-		mark = hlist_entry(head->first, struct fsnotify_mark, obj_list);
-		/*
-		 * We don't update i_fsnotify_mask / mnt_fsnotify_mask here
-		 * since inode / mount is going away anyway. So just remove
-		 * mark from the list.
-		 */
-		hlist_del_init_rcu(&mark->obj_list);
-		fsnotify_get_mark(mark);
-		spin_unlock(lock);
-		fsnotify_destroy_mark(mark, mark->group);
-		fsnotify_put_mark(mark);
-	}
-}
-
-void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
-{
-	assert_spin_locked(&mark->lock);
-
-	mark->mask = mask;
-
-	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
-		fsnotify_set_inode_mark_mask_locked(mark, mask);
-}
-
-void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
-{
-	assert_spin_locked(&mark->lock);
-
-	mark->ignored_mask = mask;
-}
-
 /*
  * Sorting function for lists of fsnotify marks.
  *
@@ -315,37 +432,133 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
-/* Add mark into proper place in given list of marks */
-int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
-			   int allow_dups)
+static int fsnotify_attach_connector_to_object(
+				struct fsnotify_mark_connector __rcu **connp,
+				struct inode *inode,
+				struct vfsmount *mnt)
+{
+	struct fsnotify_mark_connector *conn;
+
+	conn = kmem_cache_alloc(fsnotify_mark_connector_cachep, GFP_KERNEL);
+	if (!conn)
+		return -ENOMEM;
+	spin_lock_init(&conn->lock);
+	INIT_HLIST_HEAD(&conn->list);
+	if (inode) {
+		conn->flags = FSNOTIFY_OBJ_TYPE_INODE;
+		conn->inode = igrab(inode);
+	} else {
+		conn->flags = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+		conn->mnt = mnt;
+	}
+	/*
+	 * cmpxchg() provides the barrier so that readers of *connp can see
+	 * only initialized structure
+	 */
+	if (cmpxchg(connp, NULL, conn)) {
+		/* Someone else created list structure for us */
+		if (inode)
+			iput(inode);
+		kmem_cache_free(fsnotify_mark_connector_cachep, conn);
+	}
+
+	return 0;
+}
+
+/*
+ * Get mark connector, make sure it is alive and return with its lock held.
+ * This is for users that get connector pointer from inode or mount. Users that
+ * hold reference to a mark on the list may directly lock connector->lock as
+ * they are sure list cannot go away under them.
+ */
+static struct fsnotify_mark_connector *fsnotify_grab_connector(
+				struct fsnotify_mark_connector __rcu **connp)
+{
+	struct fsnotify_mark_connector *conn;
+	int idx;
+
+	idx = srcu_read_lock(&fsnotify_mark_srcu);
+	conn = srcu_dereference(*connp, &fsnotify_mark_srcu);
+	if (!conn)
+		goto out;
+	spin_lock(&conn->lock);
+	if (!(conn->flags & (FSNOTIFY_OBJ_TYPE_INODE |
+			     FSNOTIFY_OBJ_TYPE_VFSMOUNT))) {
+		spin_unlock(&conn->lock);
+		srcu_read_unlock(&fsnotify_mark_srcu, idx);
+		return NULL;
+	}
+out:
+	srcu_read_unlock(&fsnotify_mark_srcu, idx);
+	return conn;
+}
+
+/*
+ * Add mark into proper place in given list of marks. These marks may be used
+ * for the fsnotify backend to determine which event types should be delivered
+ * to which group and for which inodes. These marks are ordered according to
+ * priority, highest number first, and then by the group's location in memory.
+ */
+static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
+				  struct inode *inode, struct vfsmount *mnt,
+				  int allow_dups)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
+	struct fsnotify_mark_connector *conn;
+	struct fsnotify_mark_connector __rcu **connp;
 	int cmp;
+	int err = 0;
+
+	if (WARN_ON(!inode && !mnt))
+		return -EINVAL;
+	if (inode)
+		connp = &inode->i_fsnotify_marks;
+	else
+		connp = &real_mount(mnt)->mnt_fsnotify_marks;
+restart:
+	spin_lock(&mark->lock);
+	conn = fsnotify_grab_connector(connp);
+	if (!conn) {
+		spin_unlock(&mark->lock);
+		err = fsnotify_attach_connector_to_object(connp, inode, mnt);
+		if (err)
+			return err;
+		goto restart;
+	}
 
 	/* is mark the first mark? */
-	if (hlist_empty(head)) {
-		hlist_add_head_rcu(&mark->obj_list, head);
-		return 0;
+	if (hlist_empty(&conn->list)) {
+		hlist_add_head_rcu(&mark->obj_list, &conn->list);
+		goto added;
 	}
 
 	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, head, obj_list) {
+	hlist_for_each_entry(lmark, &conn->list, obj_list) {
 		last = lmark;
 
-		if ((lmark->group == mark->group) && !allow_dups)
-			return -EEXIST;
+		if ((lmark->group == mark->group) &&
+		    (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) &&
+		    !allow_dups) {
+			err = -EEXIST;
+			goto out_err;
+		}
 
 		cmp = fsnotify_compare_groups(lmark->group, mark->group);
 		if (cmp >= 0) {
 			hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
-			return 0;
+			goto added;
 		}
 	}
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
 	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
-	return 0;
+added:
+	mark->connector = conn;
+out_err:
+	spin_unlock(&conn->lock);
+	spin_unlock(&mark->lock);
+	return err;
 }
 
 /*
@@ -353,10 +566,10 @@ int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
  */
-int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
-			     struct fsnotify_group *group, struct inode *inode,
+int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode,
 			     struct vfsmount *mnt, int allow_dups)
 {
+	struct fsnotify_group *group = mark->group;
 	int ret = 0;
 
 	BUG_ON(inode && mnt);
@@ -367,61 +580,42 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 	 * LOCKING ORDER!!!!
 	 * group->mark_mutex
 	 * mark->lock
-	 * inode->i_lock
+	 * mark->connector->lock
 	 */
 	spin_lock(&mark->lock);
 	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_ATTACHED;
 
-	fsnotify_get_group(group);
-	mark->group = group;
 	list_add(&mark->g_list, &group->marks_list);
 	atomic_inc(&group->num_marks);
-	fsnotify_get_mark(mark); /* for i_list and g_list */
-
-	if (inode) {
-		ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
-		if (ret)
-			goto err;
-	} else if (mnt) {
-		ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
-		if (ret)
-			goto err;
-	} else {
-		BUG();
-	}
-
-	/* this will pin the object if appropriate */
-	fsnotify_set_mark_mask_locked(mark, mark->mask);
+	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	if (inode)
-		__fsnotify_update_child_dentry_flags(inode);
+	ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups);
+	if (ret)
+		goto err;
+
+	if (mark->mask)
+		fsnotify_recalc_mask(mark->connector);
 
 	return ret;
 err:
-	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+	mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
+			 FSNOTIFY_MARK_FLAG_ATTACHED);
 	list_del_init(&mark->g_list);
-	fsnotify_put_group(group);
-	mark->group = NULL;
 	atomic_dec(&group->num_marks);
 
-	spin_unlock(&mark->lock);
-
-	spin_lock(&destroy_lock);
-	list_add(&mark->g_list, &destroy_list);
-	spin_unlock(&destroy_lock);
-	queue_delayed_work(system_unbound_wq, &reaper_work,
-				FSNOTIFY_REAPER_DELAY);
-
+	fsnotify_put_mark(mark);
 	return ret;
 }
 
-int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
-		      struct inode *inode, struct vfsmount *mnt, int allow_dups)
+int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode,
+		      struct vfsmount *mnt, int allow_dups)
 {
 	int ret;
+	struct fsnotify_group *group = mark->group;
+
 	mutex_lock(&group->mark_mutex);
-	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups);
+	ret = fsnotify_add_mark_locked(mark, inode, mnt, allow_dups);
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
@@ -430,29 +624,42 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
  * Given a list of marks, find the mark associated with given group. If found
  * take a reference to that mark and return it, else return NULL.
  */
-struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
-					 struct fsnotify_group *group)
+struct fsnotify_mark *fsnotify_find_mark(
+				struct fsnotify_mark_connector __rcu **connp,
+				struct fsnotify_group *group)
 {
+	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark;
 
-	hlist_for_each_entry(mark, head, obj_list) {
-		if (mark->group == group) {
+	conn = fsnotify_grab_connector(connp);
+	if (!conn)
+		return NULL;
+
+	hlist_for_each_entry(mark, &conn->list, obj_list) {
+		if (mark->group == group &&
+		    (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
 			fsnotify_get_mark(mark);
+			spin_unlock(&conn->lock);
 			return mark;
 		}
 	}
+	spin_unlock(&conn->lock);
 	return NULL;
 }
 
-/*
- * clear any marks in a group in which mark->flags & flags is true
- */
-void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
-					 unsigned int flags)
+/* Clear any marks in a group with given type */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
+				   unsigned int type)
 {
 	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(to_free);
+	struct list_head *head = &to_free;
 
+	/* Skip selection step if we want to clear all marks. */
+	if (type == FSNOTIFY_OBJ_ALL_TYPES) {
+		head = &group->marks_list;
+		goto clear;
+	}
 	/*
 	 * We have to be really careful here. Anytime we drop mark_mutex, e.g.
 	 * fsnotify_clear_marks_by_inode() can come and free marks. Even in our
@@ -464,18 +671,19 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	 */
 	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
-		if (mark->flags & flags)
+		if (mark->connector->flags & type)
 			list_move(&mark->g_list, &to_free);
 	}
 	mutex_unlock(&group->mark_mutex);
 
+clear:
 	while (1) {
 		mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
-		if (list_empty(&to_free)) {
+		if (list_empty(head)) {
 			mutex_unlock(&group->mark_mutex);
 			break;
 		}
-		mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
+		mark = list_first_entry(head, struct fsnotify_mark, g_list);
 		fsnotify_get_mark(mark);
 		fsnotify_detach_mark(mark);
 		mutex_unlock(&group->mark_mutex);
@@ -484,49 +692,62 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
 	}
 }
 
-/*
- * Given a group, prepare for freeing all the marks associated with that group.
- * The marks are attached to the list of marks prepared for destruction, the
- * caller is responsible for freeing marks in that list after SRCU period has
- * ended.
- */
-void fsnotify_detach_group_marks(struct fsnotify_group *group)
+/* Destroy all marks attached to inode / vfsmount */
+void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp)
 {
-	struct fsnotify_mark *mark;
+	struct fsnotify_mark_connector *conn;
+	struct fsnotify_mark *mark, *old_mark = NULL;
+	struct inode *inode;
 
-	while (1) {
-		mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
-		if (list_empty(&group->marks_list)) {
-			mutex_unlock(&group->mark_mutex);
-			break;
-		}
-		mark = list_first_entry(&group->marks_list,
-					struct fsnotify_mark, g_list);
+	conn = fsnotify_grab_connector(connp);
+	if (!conn)
+		return;
+	/*
+	 * We have to be careful since we can race with e.g.
+	 * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the
+	 * list can get modified. However we are holding mark reference and
+	 * thus our mark cannot be removed from obj_list so we can continue
+	 * iteration after regaining conn->lock.
+	 */
+	hlist_for_each_entry(mark, &conn->list, obj_list) {
 		fsnotify_get_mark(mark);
-		fsnotify_detach_mark(mark);
-		mutex_unlock(&group->mark_mutex);
-		__fsnotify_free_mark(mark);
-		fsnotify_put_mark(mark);
+		spin_unlock(&conn->lock);
+		if (old_mark)
+			fsnotify_put_mark(old_mark);
+		old_mark = mark;
+		fsnotify_destroy_mark(mark, mark->group);
+		spin_lock(&conn->lock);
 	}
+	/*
+	 * Detach list from object now so that we don't pin inode until all
+	 * mark references get dropped. It would lead to strange results such
+	 * as delaying inode deletion or blocking unmount.
+	 */
+	inode = fsnotify_detach_connector_from_object(conn);
+	spin_unlock(&conn->lock);
+	if (old_mark)
+		fsnotify_put_mark(old_mark);
+	iput(inode);
 }
 
 /*
  * Nothing fancy, just initialize lists and locks and counters.
  */
 void fsnotify_init_mark(struct fsnotify_mark *mark,
-			void (*free_mark)(struct fsnotify_mark *mark))
+			struct fsnotify_group *group)
 {
 	memset(mark, 0, sizeof(*mark));
 	spin_lock_init(&mark->lock);
 	atomic_set(&mark->refcnt, 1);
-	mark->free_mark = free_mark;
+	fsnotify_get_group(group);
+	mark->group = group;
 }
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
  * actually freeing marks.
  */
-void fsnotify_mark_destroy_list(void)
+static void fsnotify_mark_destroy_workfn(struct work_struct *work)
 {
 	struct fsnotify_mark *mark, *next;
 	struct list_head private_destroy_list;
@@ -540,11 +761,12 @@ void fsnotify_mark_destroy_list(void)
 
 	list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 		list_del_init(&mark->g_list);
-		fsnotify_put_mark(mark);
+		fsnotify_final_mark_destroy(mark);
 	}
 }
 
-static void fsnotify_mark_destroy_workfn(struct work_struct *work)
+/* Wait for all marks queued for destruction to be actually destroyed */
+void fsnotify_wait_marks_destroyed(void)
 {
-	fsnotify_mark_destroy_list();
+	flush_delayed_work(&reaper_work);
 }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
deleted file mode 100644
index a8fcab6..0000000
--- a/fs/notify/vfsmount_mark.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2, or (at your option)
- *  any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; see the file COPYING.  If not, write to
- *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mount.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-
-#include <linux/atomic.h>
-
-#include <linux/fsnotify_backend.h>
-#include "fsnotify.h"
-
-void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
-{
-	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
-}
-
-/*
- * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
- * any notifier is interested in hearing for this mount point
- */
-void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
-{
-	struct mount *m = real_mount(mnt);
-
-	spin_lock(&mnt->mnt_root->d_lock);
-	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
-	spin_unlock(&mnt->mnt_root->d_lock);
-}
-
-void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
-{
-	struct vfsmount *mnt = mark->mnt;
-	struct mount *m = real_mount(mnt);
-
-	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
-	assert_spin_locked(&mark->lock);
-
-	spin_lock(&mnt->mnt_root->d_lock);
-
-	hlist_del_init_rcu(&mark->obj_list);
-	mark->mnt = NULL;
-
-	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
-	spin_unlock(&mnt->mnt_root->d_lock);
-}
-
-/*
- * given a group and vfsmount, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
-						  struct vfsmount *mnt)
-{
-	struct mount *m = real_mount(mnt);
-	struct fsnotify_mark *mark;
-
-	spin_lock(&mnt->mnt_root->d_lock);
-	mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group);
-	spin_unlock(&mnt->mnt_root->d_lock);
-
-	return mark;
-}
-
-/*
- * Attach an initialized mark to a given group and vfsmount.
- * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which groups.
- */
-int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
-			       struct fsnotify_group *group, struct vfsmount *mnt,
-			       int allow_dups)
-{
-	struct mount *m = real_mount(mnt);
-	int ret;
-
-	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
-
-	BUG_ON(!mutex_is_locked(&group->mark_mutex));
-	assert_spin_locked(&mark->lock);
-
-	spin_lock(&mnt->mnt_root->d_lock);
-	mark->mnt = mnt;
-	ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups);
-	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
-	spin_unlock(&mnt->mnt_root->d_lock);
-
-	return ret;
-}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 74b489e..ebf80c7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2188,8 +2188,7 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
 		/* This can happen when suspending quotas on remount-ro... */
 		if (toputinode[cnt] && !sb_has_quota_loaded(sb, cnt)) {
 			inode_lock(toputinode[cnt]);
-			toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
-				  S_NOATIME | S_NOQUOTA);
+			toputinode[cnt]->i_flags &= ~S_NOQUOTA;
 			truncate_inode_pages(&toputinode[cnt]->i_data, 0);
 			inode_unlock(toputinode[cnt]);
 			mark_inode_dirty_sync(toputinode[cnt]);
@@ -2237,7 +2236,6 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 	struct super_block *sb = inode->i_sb;
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int error;
-	int oldflags = -1;
 
 	if (!fmt)
 		return -ESRCH;
@@ -2285,9 +2283,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		 * possible) Also nobody should write to the file - we use
 		 * special IO operations which ignore the immutable bit. */
 		inode_lock(inode);
-		oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
-					     S_NOQUOTA);
-		inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
+		inode->i_flags |= S_NOQUOTA;
 		inode_unlock(inode);
 		/*
 		 * When S_NOQUOTA is set, remove dquot references as no more
@@ -2329,14 +2325,9 @@ out_file_init:
 	dqopt->files[type] = NULL;
 	iput(inode);
 out_file_flags:
-	if (oldflags != -1) {
-		inode_lock(inode);
-		/* Set the flags back (in the case of accidental quotaon()
-		 * on a wrong file we don't want to mess up the flags) */
-		inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
-		inode->i_flags |= oldflags;
-		inode_unlock(inode);
-	}
+	inode_lock(inode);
+	inode->i_flags &= ~S_NOQUOTA;
+	inode_unlock(inode);
 out_fmt:
 	put_quota_format(fmt);
 
@@ -2780,18 +2771,6 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct qc_info *ii)
 }
 EXPORT_SYMBOL(dquot_set_dqinfo);
 
-const struct quotactl_ops dquot_quotactl_ops = {
-	.quota_on	= dquot_quota_on,
-	.quota_off	= dquot_quota_off,
-	.quota_sync	= dquot_quota_sync,
-	.get_state	= dquot_get_state,
-	.set_info	= dquot_set_dqinfo,
-	.get_dqblk	= dquot_get_dqblk,
-	.get_nextdqblk	= dquot_get_next_dqblk,
-	.set_dqblk	= dquot_set_dqblk
-};
-EXPORT_SYMBOL(dquot_quotactl_ops);
-
 const struct quotactl_ops dquot_quotactl_sysfile_ops = {
 	.quota_enable	= dquot_quota_enable,
 	.quota_disable	= dquot_quota_disable,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index a6ab9d6..873fc04 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1375,7 +1375,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 static void inode2sd(void *sd, struct inode *inode, loff_t size)
 {
 	struct stat_data *sd_v2 = (struct stat_data *)sd;
-	__u16 flags;
 
 	set_sd_v2_mode(sd_v2, inode->i_mode);
 	set_sd_v2_nlink(sd_v2, inode->i_nlink);
@@ -1390,9 +1389,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
 		set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
 	else
 		set_sd_v2_generation(sd_v2, inode->i_generation);
-	flags = REISERFS_I(inode)->i_attrs;
-	i_attrs_to_sd_attrs(inode, &flags);
-	set_sd_v2_attrs(sd_v2, flags);
+	set_sd_v2_attrs(sd_v2, REISERFS_I(inode)->i_attrs);
 }
 
 /* used to copy inode's fields to old stat data */
@@ -2002,10 +1999,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
 	/* uid and gid must already be set by the caller for quota init */
 
-	/* symlink cannot be immutable or append only, right? */
-	if (S_ISLNK(inode->i_mode))
-		inode->i_flags &= ~(S_IMMUTABLE | S_APPEND);
-
 	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 	inode->i_size = i_size;
 	inode->i_blocks = 0;
@@ -3095,28 +3088,6 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode)
 	}
 }
 
-void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs)
-{
-	if (reiserfs_attrs(inode->i_sb)) {
-		if (inode->i_flags & S_IMMUTABLE)
-			*sd_attrs |= REISERFS_IMMUTABLE_FL;
-		else
-			*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
-		if (inode->i_flags & S_SYNC)
-			*sd_attrs |= REISERFS_SYNC_FL;
-		else
-			*sd_attrs &= ~REISERFS_SYNC_FL;
-		if (inode->i_flags & S_NOATIME)
-			*sd_attrs |= REISERFS_NOATIME_FL;
-		else
-			*sd_attrs &= ~REISERFS_NOATIME_FL;
-		if (REISERFS_I(inode)->i_flags & i_nopack_mask)
-			*sd_attrs |= REISERFS_NOTAIL_FL;
-		else
-			*sd_attrs &= ~REISERFS_NOTAIL_FL;
-	}
-}
-
 /*
  * decide if this buffer needs to stay around for data logging or ordered
  * write purposes
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 1f4692a..acbbaf7 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -47,7 +47,6 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		}
 
 		flags = REISERFS_I(inode)->i_attrs;
-		i_attrs_to_sd_attrs(inode, (__u16 *) & flags);
 		err = put_user(flags, (int __user *)arg);
 		break;
 	case REISERFS_IOC_SETFLAGS:{
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index aa40c24..da01f49 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1961,7 +1961,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
 	 * will be requeued because superblock is being shutdown and doesn't
 	 * have MS_ACTIVE set.
 	 */
-	cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work);
+	reiserfs_cancel_old_flush(sb);
 	/* wait for all commits to finish */
 	cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work);
 
diff --git a/fs/reiserfs/lbalance.c b/fs/reiserfs/lbalance.c
index 249594a..f5cebd7 100644
--- a/fs/reiserfs/lbalance.c
+++ b/fs/reiserfs/lbalance.c
@@ -475,7 +475,7 @@ static void leaf_item_bottle(struct buffer_info *dest_bi,
 			 * 'cpy_bytes'; create new item header;
 			 * n_ih = new item_header;
 			 */
-			memcpy(&n_ih, ih, SHORT_KEY_SIZE);
+			memcpy(&n_ih.ih_key, &ih->ih_key, KEY_SIZE);
 
 			/* Endian safe, both le */
 			n_ih.ih_version = ih->ih_version;
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 2adcde1..1d34377 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1326,7 +1326,6 @@ struct cpu_key {
 #define KEY_NOT_FOUND 0
 
 #define KEY_SIZE (sizeof(struct reiserfs_key))
-#define SHORT_KEY_SIZE (sizeof (__u32) + sizeof (__u32))
 
 /* return values for search_by_key and clones */
 #define ITEM_FOUND 1
@@ -2949,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s,
 				   struct reiserfs_list_bitmap *, unsigned int);
 
 void reiserfs_schedule_old_flush(struct super_block *s);
+void reiserfs_cancel_old_flush(struct super_block *s);
 void add_save_link(struct reiserfs_transaction_handle *th,
 		   struct inode *inode, int truncate);
 int remove_save_link(struct inode *inode, int truncate);
@@ -3099,7 +3099,6 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
 }
 
 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
-void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
 
 int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index feabcde..685f1e0 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -89,11 +89,27 @@ static void flush_old_commits(struct work_struct *work)
 	sbi = container_of(work, struct reiserfs_sb_info, old_work.work);
 	s = sbi->s_journal->j_work_sb;
 
+	/*
+	 * We need s_umount for protecting quota writeback. We have to use
+	 * trylock as reiserfs_cancel_old_flush() may be waiting for this work
+	 * to complete with s_umount held.
+	 */
+	if (!down_read_trylock(&s->s_umount)) {
+		/* Requeue work if we are not cancelling it */
+		spin_lock(&sbi->old_work_lock);
+		if (sbi->work_queued == 1)
+			queue_delayed_work(system_long_wq, &sbi->old_work, HZ);
+		spin_unlock(&sbi->old_work_lock);
+		return;
+	}
 	spin_lock(&sbi->old_work_lock);
-	sbi->work_queued = 0;
+	/* Avoid clobbering the cancel state... */
+	if (sbi->work_queued == 1)
+		sbi->work_queued = 0;
 	spin_unlock(&sbi->old_work_lock);
 
 	reiserfs_sync_fs(s, 1);
+	up_read(&s->s_umount);
 }
 
 void reiserfs_schedule_old_flush(struct super_block *s)
@@ -117,21 +133,22 @@ void reiserfs_schedule_old_flush(struct super_block *s)
 	spin_unlock(&sbi->old_work_lock);
 }
 
-static void cancel_old_flush(struct super_block *s)
+void reiserfs_cancel_old_flush(struct super_block *s)
 {
 	struct reiserfs_sb_info *sbi = REISERFS_SB(s);
 
-	cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
 	spin_lock(&sbi->old_work_lock);
-	sbi->work_queued = 0;
+	/* Make sure no new flushes will be queued */
+	sbi->work_queued = 2;
 	spin_unlock(&sbi->old_work_lock);
+	cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
 }
 
 static int reiserfs_freeze(struct super_block *s)
 {
 	struct reiserfs_transaction_handle th;
 
-	cancel_old_flush(s);
+	reiserfs_cancel_old_flush(s);
 
 	reiserfs_write_lock(s);
 	if (!(s->s_flags & MS_RDONLY)) {
@@ -152,7 +169,13 @@ static int reiserfs_freeze(struct super_block *s)
 
 static int reiserfs_unfreeze(struct super_block *s)
 {
+	struct reiserfs_sb_info *sbi = REISERFS_SB(s);
+
 	reiserfs_allow_writes(s);
+	spin_lock(&sbi->old_work_lock);
+	/* Allow old_work to run again */
+	sbi->work_queued = 0;
+	spin_unlock(&sbi->old_work_lock);
 	return 0;
 }
 
@@ -547,12 +570,28 @@ static void reiserfs_kill_sb(struct super_block *s)
 	kill_block_super(s);
 }
 
+#ifdef CONFIG_QUOTA
+static int reiserfs_quota_off(struct super_block *sb, int type);
+
+static void reiserfs_quota_off_umount(struct super_block *s)
+{
+	int type;
+
+	for (type = 0; type < REISERFS_MAXQUOTAS; type++)
+		reiserfs_quota_off(s, type);
+}
+#else
+static inline void reiserfs_quota_off_umount(struct super_block *s)
+{
+}
+#endif
+
 static void reiserfs_put_super(struct super_block *s)
 {
 	struct reiserfs_transaction_handle th;
 	th.t_trans_id = 0;
 
-	dquot_disable(s, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+	reiserfs_quota_off_umount(s);
 
 	reiserfs_write_lock(s);
 
@@ -817,7 +856,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
 
 static const struct quotactl_ops reiserfs_qctl_operations = {
 	.quota_on = reiserfs_quota_on,
-	.quota_off = dquot_quota_off,
+	.quota_off = reiserfs_quota_off,
 	.quota_sync = dquot_quota_sync,
 	.get_state = dquot_get_state,
 	.set_info = dquot_set_dqinfo,
@@ -2194,7 +2233,7 @@ error_unlocked:
 	if (sbi->commit_wq)
 		destroy_workqueue(sbi->commit_wq);
 
-	cancel_delayed_work_sync(&REISERFS_SB(s)->old_work);
+	reiserfs_cancel_old_flush(s);
 
 	reiserfs_free_bitmap_cache(s);
 	if (SB_BUFFER_WITH_SB(s))
@@ -2405,12 +2444,47 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
 			goto out;
 	}
 	reiserfs_write_unlock(sb);
-	return dquot_quota_on(sb, type, format_id, path);
+	err = dquot_quota_on(sb, type, format_id, path);
+	if (!err) {
+		inode_lock(inode);
+		REISERFS_I(inode)->i_attrs |= REISERFS_IMMUTABLE_FL |
+					      REISERFS_NOATIME_FL;
+		inode_set_flags(inode, S_IMMUTABLE | S_NOATIME,
+				S_IMMUTABLE | S_NOATIME);
+		inode_unlock(inode);
+		mark_inode_dirty(inode);
+	}
+	return err;
 out:
 	reiserfs_write_unlock(sb);
 	return err;
 }
 
+static int reiserfs_quota_off(struct super_block *sb, int type)
+{
+	int err;
+	struct inode *inode = sb_dqopt(sb)->files[type];
+
+	if (!inode || !igrab(inode))
+		goto out;
+
+	err = dquot_quota_off(sb, type);
+	if (err)
+		goto out_put;
+
+	inode_lock(inode);
+	REISERFS_I(inode)->i_attrs &= ~(REISERFS_IMMUTABLE_FL |
+					REISERFS_NOATIME_FL);
+	inode_set_flags(inode, 0, S_IMMUTABLE | S_NOATIME);
+	inode_unlock(inode);
+	mark_inode_dirty(inode);
+out_put:
+	iput(inode);
+	return err;
+out:
+	return dquot_quota_off(sb, type);
+}
+
 /*
  * Read data from quotafile - avoid pagecache and such because we cannot afford
  * acquiring the locks... As quota files are never truncated and quota code
diff --git a/fs/udf/file.c b/fs/udf/file.c
index e04cc0c..f5eb2d5 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -44,12 +44,12 @@ static void __udf_adinicb_readpage(struct page *page)
 	char *kaddr;
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
-	kaddr = kmap(page);
+	kaddr = kmap_atomic(page);
 	memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
 	memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
-	kunmap(page);
+	kunmap_atomic(kaddr);
 }
 
 static int udf_adinicb_readpage(struct file *file, struct page *page)
@@ -70,11 +70,11 @@ static int udf_adinicb_writepage(struct page *page,
 
 	BUG_ON(!PageLocked(page));
 
-	kaddr = kmap(page);
+	kaddr = kmap_atomic(page);
 	memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size);
-	mark_inode_dirty(inode);
 	SetPageUptodate(page);
-	kunmap(page);
+	kunmap_atomic(kaddr);
+	mark_inode_dirty(inode);
 	unlock_page(page);
 
 	return 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a8d8f71..98c510e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -276,14 +276,14 @@ int udf_expand_file_adinicb(struct inode *inode)
 		return -ENOMEM;
 
 	if (!PageUptodate(page)) {
-		kaddr = kmap(page);
+		kaddr = kmap_atomic(page);
 		memset(kaddr + iinfo->i_lenAlloc, 0x00,
 		       PAGE_SIZE - iinfo->i_lenAlloc);
 		memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
 			iinfo->i_lenAlloc);
 		flush_dcache_page(page);
 		SetPageUptodate(page);
-		kunmap(page);
+		kunmap_atomic(kaddr);
 	}
 	down_write(&iinfo->i_data_sem);
 	memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
@@ -300,11 +300,11 @@ int udf_expand_file_adinicb(struct inode *inode)
 	if (err) {
 		/* Restore everything back so that we don't lose data... */
 		lock_page(page);
-		kaddr = kmap(page);
 		down_write(&iinfo->i_data_sem);
+		kaddr = kmap_atomic(page);
 		memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
 		       inode->i_size);
-		kunmap(page);
+		kunmap_atomic(kaddr);
 		unlock_page(page);
 		iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
 		inode->i_data.a_ops = &udf_adinicb_aops;
@@ -1535,7 +1535,7 @@ reread:
 		inode->i_data.a_ops = &udf_symlink_aops;
 		inode->i_op = &udf_symlink_inode_operations;
 		inode_nohighmem(inode);
-		inode->i_mode = S_IFLNK | S_IRWXUGO;
+		inode->i_mode = S_IFLNK | 0777;
 		break;
 	case ICBTAG_FILE_TYPE_MAIN:
 		udf_debug("METADATA FILE-----\n");
@@ -1591,9 +1591,9 @@ static umode_t udf_convert_permissions(struct fileEntry *fe)
 	permissions = le32_to_cpu(fe->permissions);
 	flags = le16_to_cpu(fe->icbTag.flags);
 
-	mode =	((permissions) & S_IRWXO) |
-		((permissions >> 2) & S_IRWXG) |
-		((permissions >> 4) & S_IRWXU) |
+	mode =	((permissions) & 0007) |
+		((permissions >> 2) & 0070) |
+		((permissions >> 4) & 0700) |
 		((flags & ICBTAG_FLAG_SETUID) ? S_ISUID : 0) |
 		((flags & ICBTAG_FLAG_SETGID) ? S_ISGID : 0) |
 		((flags & ICBTAG_FLAG_STICKY) ? S_ISVTX : 0);
@@ -1669,9 +1669,9 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 	else
 		fe->gid = cpu_to_le32(i_gid_read(inode));
 
-	udfperms = ((inode->i_mode & S_IRWXO)) |
-		   ((inode->i_mode & S_IRWXG) << 2) |
-		   ((inode->i_mode & S_IRWXU) << 4);
+	udfperms = ((inode->i_mode & 0007)) |
+		   ((inode->i_mode & 0070) << 2) |
+		   ((inode->i_mode & 0700) << 4);
 
 	udfperms |= (le32_to_cpu(fe->permissions) &
 		    (FE_PERM_O_DELETE | FE_PERM_O_CHATTR |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index babf48d..385ee89 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -906,7 +906,7 @@ out:
 static int udf_symlink(struct inode *dir, struct dentry *dentry,
 		       const char *symname)
 {
-	struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO);
+	struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777);
 	struct pathComponent *pc;
 	const char *compstart;
 	struct extent_position epos = {};