From e3559997ac5dbd45a27805ed7951c7252b2b8ad1 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Tue, 25 May 2010 22:07:53 +0800
Subject: ncpfs: Remove duplicated #include

Remove duplicated #include('s) in
  fs/ncpfs/ioctl.c

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 fs/ncpfs/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 023c03d..84a8cfc 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -20,7 +20,6 @@
 #include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 
-- 
cgit v1.1


From 70d9e384aa7df681cfffd65947af72b22e86690e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave.bueso@gmail.com>
Date: Tue, 6 Jul 2010 00:50:58 -0400
Subject: omfs: fix memory leak

In the error path of omfs_fill_super(), the FS super block info
(sbi) is not being freed.  Correct this.

Signed-off-by: Davidlohr Bueso <dave@gnu.org>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
---
 fs/omfs/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 089839a..b5d6380 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -529,6 +529,8 @@ out_brelse_bh2:
 out_brelse_bh:
 	brelse(bh);
 end:
+	if (ret)
+		kfree(sbi);
 	return ret;
 }
 
-- 
cgit v1.1


From f068272cb2f134a194b93e94a8e0672bfce48cd8 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Sat, 6 Sep 2008 17:51:53 -0400
Subject: omfs: check bounds on block numbers before passing to sb_bread

In case of filesystem corruption, passing unchecked block numbers into
sb_bread can result in an infinite loop in __getblk().  Introduce a wrapper
function omfs_sbread() to check the block numbers and to also perform the
clus_to_blk() scaling.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
---
 fs/omfs/dir.c   | 22 ++++++++--------------
 fs/omfs/file.c  |  8 ++++----
 fs/omfs/inode.c | 27 ++++++++++++++++-----------
 fs/omfs/omfs.h  |  1 +
 4 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index b42d624..393f3f6 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -25,11 +25,10 @@ static struct buffer_head *omfs_get_bucket(struct inode *dir,
 		const char *name, int namelen, int *ofs)
 {
 	int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
-	int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
 	int bucket = omfs_hash(name, namelen, nbuckets);
 
 	*ofs = OMFS_DIR_START + bucket * 8;
-	return sb_bread(dir->i_sb, block);
+	return omfs_bread(dir->i_sb, dir->i_ino);
 }
 
 static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
@@ -42,8 +41,7 @@ static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
 	*prev_block = ~0;
 
 	while (block != ~0) {
-		bh = sb_bread(dir->i_sb,
-			clus_to_blk(OMFS_SB(dir->i_sb), block));
+		bh = omfs_bread(dir->i_sb, block);
 		if (!bh) {
 			err = -EIO;
 			goto err;
@@ -86,11 +84,10 @@ static struct buffer_head *omfs_find_entry(struct inode *dir,
 int omfs_make_empty(struct inode *inode, struct super_block *sb)
 {
 	struct omfs_sb_info *sbi = OMFS_SB(sb);
-	int block = clus_to_blk(sbi, inode->i_ino);
 	struct buffer_head *bh;
 	struct omfs_inode *oi;
 
-	bh = sb_bread(sb, block);
+	bh = omfs_bread(sb, inode->i_ino);
 	if (!bh)
 		return -ENOMEM;
 
@@ -134,7 +131,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
 	brelse(bh);
 
 	/* now set the sibling and parent pointers on the new inode */
-	bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
+	bh = omfs_bread(dir->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -190,8 +187,7 @@ static int omfs_delete_entry(struct dentry *dentry)
 	if (prev != ~0) {
 		/* found in middle of list, get list ptr */
 		brelse(bh);
-		bh = sb_bread(dir->i_sb,
-			clus_to_blk(OMFS_SB(dir->i_sb), prev));
+		bh = omfs_bread(dir->i_sb, prev);
 		if (!bh)
 			goto out;
 
@@ -224,8 +220,7 @@ static int omfs_dir_is_empty(struct inode *inode)
 	u64 *ptr;
 	int i;
 
-	bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
-			inode->i_ino));
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 
 	if (!bh)
 		return 0;
@@ -353,8 +348,7 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
 
 	/* follow chain in this bucket */
 	while (fsblock != ~0) {
-		bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
-				fsblock));
+		bh = omfs_bread(dir->i_sb, fsblock);
 		if (!bh)
 			goto out;
 
@@ -466,7 +460,7 @@ static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	hchain = (filp->f_pos >> 20) - 1;
 	hindex = filp->f_pos & 0xfffff;
 
-	bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
+	bh = omfs_bread(dir->i_sb, dir->i_ino);
 	if (!bh)
 		goto out;
 
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6e7a329..76bc21b 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -50,7 +50,7 @@ int omfs_shrink_inode(struct inode *inode)
 	if (inode->i_size != 0)
 		goto out;
 
-	bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+	bh = omfs_bread(inode->i_sb, next);
 	if (!bh)
 		goto out;
 
@@ -90,7 +90,7 @@ int omfs_shrink_inode(struct inode *inode)
 		if (next == ~0)
 			break;
 
-		bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+		bh = omfs_bread(inode->i_sb, next);
 		if (!bh)
 			goto out;
 		oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
@@ -232,7 +232,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 	int remain;
 
 	ret = -EIO;
-	bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -265,7 +265,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 			break;
 
 		brelse(bh);
-		bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
+		bh = omfs_bread(inode->i_sb, next);
 		if (!bh)
 			goto out;
 		oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index b5d6380..bd4bf75 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -19,6 +19,15 @@ MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
 MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
 MODULE_LICENSE("GPL");
 
+struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
+{
+	struct omfs_sb_info *sbi = OMFS_SB(sb);
+	if (block >= sbi->s_num_blocks)
+		return NULL;
+
+	return sb_bread(sb, clus_to_blk(sbi, block));
+}
+
 struct inode *omfs_new_inode(struct inode *dir, int mode)
 {
 	struct inode *inode;
@@ -93,15 +102,13 @@ static int __omfs_write_inode(struct inode *inode, int wait)
 	struct omfs_inode *oi;
 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
 	struct buffer_head *bh, *bh2;
-	unsigned int block;
 	u64 ctime;
 	int i;
 	int ret = -EIO;
 	int sync_failed = 0;
 
 	/* get current inode since we may have written sibling ptrs etc. */
-	block = clus_to_blk(sbi, inode->i_ino);
-	bh = sb_bread(inode->i_sb, block);
+	bh = omfs_bread(inode->i_sb, inode->i_ino);
 	if (!bh)
 		goto out;
 
@@ -140,8 +147,7 @@ static int __omfs_write_inode(struct inode *inode, int wait)
 
 	/* if mirroring writes, copy to next fsblock */
 	for (i = 1; i < sbi->s_mirrors; i++) {
-		bh2 = sb_bread(inode->i_sb, block + i *
-			(sbi->s_blocksize / sbi->s_sys_blocksize));
+		bh2 = omfs_bread(inode->i_sb, inode->i_ino + i);
 		if (!bh2)
 			goto out_brelse;
 
@@ -193,7 +199,6 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 	struct omfs_sb_info *sbi = OMFS_SB(sb);
 	struct omfs_inode *oi;
 	struct buffer_head *bh;
-	unsigned int block;
 	u64 ctime;
 	unsigned long nsecs;
 	struct inode *inode;
@@ -204,8 +209,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 	if (!(inode->i_state & I_NEW))
 		return inode;
 
-	block = clus_to_blk(sbi, ino);
-	bh = sb_bread(inode->i_sb, block);
+	bh = omfs_bread(inode->i_sb, ino);
 	if (!bh)
 		goto iget_failed;
 
@@ -319,6 +323,9 @@ static int omfs_get_imap(struct super_block *sb)
 		goto nomem;
 
 	block = clus_to_blk(sbi, sbi->s_bitmap_ino);
+	if (block >= sbi->s_num_blocks)
+		goto nomem;
+
 	ptr = sbi->s_imap;
 	for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
 		bh = sb_bread(sb, block++);
@@ -417,7 +424,6 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	struct omfs_root_block *omfs_rb;
 	struct omfs_sb_info *sbi;
 	struct inode *root;
-	sector_t start;
 	int ret = -EINVAL;
 
 	save_mount_options(sb, (char *) data);
@@ -486,8 +492,7 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
 		get_bitmask_order(sbi->s_sys_blocksize);
 
-	start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
-	bh2 = sb_bread(sb, start);
+	bh2 = omfs_bread(sb, be64_to_cpu(omfs_sb->s_root_block));
 	if (!bh2)
 		goto out_brelse_bh;
 
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index ebe2fdb..7d414fe 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -58,6 +58,7 @@ extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
 extern int omfs_shrink_inode(struct inode *inode);
 
 /* inode.c */
+extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
 extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
 extern struct inode *omfs_new_inode(struct inode *dir, int mode);
 extern int omfs_reserve_block(struct super_block *sb, sector_t block);
-- 
cgit v1.1


From 9442e54f433eff9b6fbd0836611df4c1919df370 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Thu, 14 Aug 2008 18:43:59 -0400
Subject: omfs: refuse to mount if bitmap pointer is obviously wrong

If the free space bitmap pointer is corrupted such that it lies outside
of the number of blocks in the filesystem, print a message and fail the
mount so the user can fix it offline.

Signed-off-by: Bob Copeland <me@bobcopeland.com>
---
 fs/omfs/inode.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index bd4bf75..0af5d0a 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -509,6 +509,15 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_brelse_bh2;
 	}
 
+	if (sbi->s_bitmap_ino != ~0ULL &&
+	    sbi->s_bitmap_ino > sbi->s_num_blocks) {
+		printk(KERN_ERR "omfs: free space bitmap location is corrupt "
+			"(%llx, total blocks %llx)\n",
+			(unsigned long long) sbi->s_bitmap_ino,
+			(unsigned long long) sbi->s_num_blocks);
+		goto out_brelse_bh2;
+	}
+
 	ret = omfs_get_imap(sb);
 	if (ret)
 		goto out_brelse_bh2;
-- 
cgit v1.1


From 8800a044c71a128633cf3febaf4780531a991334 Mon Sep 17 00:00:00 2001
From: Bob Copeland <me@bobcopeland.com>
Date: Tue, 6 Jul 2010 11:16:46 -0400
Subject: omfs: sanity check cluster size

A corrupt filesystem could have a bad cluster size; this could result in
the filesystem allocating too much space for files if too large, or
getting stuck in omfs_allocate_block if too small.  The proper range is
1-8 blocks.

Reported-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
---
 fs/omfs/inode.c   | 6 ++++++
 fs/omfs/omfs_fs.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 0af5d0a..579d33f 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -517,6 +517,12 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
 			(unsigned long long) sbi->s_num_blocks);
 		goto out_brelse_bh2;
 	}
+	if (sbi->s_clustersize < 1 ||
+	    sbi->s_clustersize > OMFS_MAX_CLUSTER_SIZE) {
+		printk(KERN_ERR "omfs: cluster size out of range (%d)",
+			sbi->s_clustersize);
+		goto out_brelse_bh2;
+	}
 
 	ret = omfs_get_imap(sb);
 	if (ret)
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
index 12cca24..ee5e432 100644
--- a/fs/omfs/omfs_fs.h
+++ b/fs/omfs/omfs_fs.h
@@ -17,6 +17,7 @@
 #define OMFS_EXTENT_CONT 0x40
 #define OMFS_XOR_COUNT 19
 #define OMFS_MAX_BLOCK_SIZE 8192
+#define OMFS_MAX_CLUSTER_SIZE 8
 
 struct omfs_super_block {
 	char s_fill1[256];
-- 
cgit v1.1


From b7ba83715317007962ee318587de92f14e9c3aaa Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:04 -0500
Subject: inotify: simplify the inotify idr handling

This patch moves all of the idr editing operations into their own idr
functions.  It makes it easier to prove locking correctness and to to
understand the code flow.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 194 ++++++++++++++++++++++++++++-----------
 1 file changed, 139 insertions(+), 55 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e46ca68..653c507 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -357,6 +357,77 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 	return error;
 }
 
+static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
+			      int last_wd,
+			      struct inotify_inode_mark_entry *ientry)
+{
+	int ret;
+
+	do {
+		if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
+			return -ENOMEM;
+
+		spin_lock(idr_lock);
+		ret = idr_get_new_above(idr, ientry, last_wd + 1,
+					&ientry->wd);
+		/* we added the mark to the idr, take a reference */
+		if (!ret)
+			fsnotify_get_mark(&ientry->fsn_entry);
+		spin_unlock(idr_lock);
+	} while (ret == -EAGAIN);
+
+	return ret;
+}
+
+static struct inotify_inode_mark_entry *inotify_idr_find_locked(struct fsnotify_group *group,
+								int wd)
+{
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	struct inotify_inode_mark_entry *ientry;
+
+	assert_spin_locked(idr_lock);
+
+	ientry = idr_find(idr, wd);
+	if (ientry) {
+		struct fsnotify_mark_entry *fsn_entry = &ientry->fsn_entry;
+
+		fsnotify_get_mark(fsn_entry);
+		/* One ref for being in the idr, one ref we just took */
+		BUG_ON(atomic_read(&fsn_entry->refcnt) < 2);
+	}
+
+	return ientry;
+}
+
+static struct inotify_inode_mark_entry *inotify_idr_find(struct fsnotify_group *group,
+							 int wd)
+{
+	struct inotify_inode_mark_entry *ientry;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+
+	spin_lock(idr_lock);
+	ientry = inotify_idr_find_locked(group, wd);
+	spin_unlock(idr_lock);
+
+	return ientry;
+}
+
+static void do_inotify_remove_from_idr(struct fsnotify_group *group,
+				       struct inotify_inode_mark_entry *ientry)
+{
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	int wd = ientry->wd;
+
+	assert_spin_locked(idr_lock);
+
+	idr_remove(idr, wd);
+
+	/* removed from the idr, drop that ref */
+	fsnotify_put_mark(&ientry->fsn_entry);
+}
+
 /*
  * Remove the mark from the idr (if present) and drop the reference
  * on the mark because it was in the idr.
@@ -364,42 +435,72 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 static void inotify_remove_from_idr(struct fsnotify_group *group,
 				    struct inotify_inode_mark_entry *ientry)
 {
-	struct idr *idr;
-	struct fsnotify_mark_entry *entry;
-	struct inotify_inode_mark_entry *found_ientry;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
+	struct inotify_inode_mark_entry *found_ientry = NULL;
 	int wd;
 
-	spin_lock(&group->inotify_data.idr_lock);
-	idr = &group->inotify_data.idr;
+	spin_lock(idr_lock);
 	wd = ientry->wd;
 
-	if (wd == -1)
+	/*
+	 * does this ientry think it is in the idr?  we shouldn't get called
+	 * if it wasn't....
+	 */
+	if (wd == -1) {
+		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
+			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+		WARN_ON(1);
 		goto out;
+	}
 
-	entry = idr_find(&group->inotify_data.idr, wd);
-	if (unlikely(!entry))
+	/* Lets look in the idr to see if we find it */
+	found_ientry = inotify_idr_find_locked(group, wd);
+	if (unlikely(!found_ientry)) {
+		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
+			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+		WARN_ON(1);
 		goto out;
+	}
 
-	found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	/*
+	 * We found an entry in the idr at the right wd, but it's
+	 * not the entry we were told to remove.  eparis seriously
+	 * fucked up somewhere.
+	 */
 	if (unlikely(found_ientry != ientry)) {
-		/* We found an entry in the idr with the right wd, but it's
-		 * not the entry we were told to remove.  eparis seriously
-		 * fucked up somewhere. */
 		WARN_ON(1);
-		ientry->wd = -1;
+		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p "
+			"entry->inode=%p found_ientry=%p found_ientry->wd=%d "
+			"found_ientry->group=%p found_ientry->inode=%p\n",
+			__func__, ientry, ientry->wd, ientry->fsn_entry.group,
+			ientry->fsn_entry.inode, found_ientry, found_ientry->wd,
+			found_ientry->fsn_entry.group,
+			found_ientry->fsn_entry.inode);
 		goto out;
 	}
 
-	/* One ref for being in the idr, one ref held by the caller */
-	BUG_ON(atomic_read(&entry->refcnt) < 2);
-
-	idr_remove(idr, wd);
-	ientry->wd = -1;
+	/*
+	 * One ref for being in the idr
+	 * one ref held by the caller trying to kill us
+	 * one ref grabbed by inotify_idr_find
+	 */
+	if (unlikely(atomic_read(&ientry->fsn_entry.refcnt) < 3)) {
+		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
+			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+		/* we can't really recover with bad ref cnting.. */
+		BUG();
+	}
 
-	/* removed from the idr, drop that ref */
-	fsnotify_put_mark(entry);
+	do_inotify_remove_from_idr(group, ientry);
 out:
-	spin_unlock(&group->inotify_data.idr_lock);
+	/* match the ref taken by inotify_idr_find_locked() */
+	if (found_ientry)
+		fsnotify_put_mark(&found_ientry->fsn_entry);
+	ientry->wd = -1;
+	spin_unlock(idr_lock);
 }
 
 /*
@@ -524,6 +625,8 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	struct inotify_inode_mark_entry *tmp_ientry;
 	__u32 mask;
 	int ret;
+	struct idr *idr = &group->inotify_data.idr;
+	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
@@ -541,28 +644,11 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	ret = -ENOSPC;
 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
 		goto out_err;
-retry:
-	ret = -ENOMEM;
-	if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
-		goto out_err;
-
-	/* we are putting the mark on the idr, take a reference */
-	fsnotify_get_mark(&tmp_ientry->fsn_entry);
-
-	spin_lock(&group->inotify_data.idr_lock);
-	ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
-				group->inotify_data.last_wd+1,
-				&tmp_ientry->wd);
-	spin_unlock(&group->inotify_data.idr_lock);
-	if (ret) {
-		/* we didn't get on the idr, drop the idr reference */
-		fsnotify_put_mark(&tmp_ientry->fsn_entry);
 
-		/* idr was out of memory allocate and try again */
-		if (ret == -EAGAIN)
-			goto retry;
+	ret = inotify_add_to_idr(idr, idr_lock, group->inotify_data.last_wd,
+				 tmp_ientry);
+	if (ret)
 		goto out_err;
-	}
 
 	/* we are on the idr, now get on the inode */
 	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
@@ -726,7 +812,7 @@ fput_and_out:
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
 	struct fsnotify_group *group;
-	struct fsnotify_mark_entry *entry;
+	struct inotify_inode_mark_entry *ientry;
 	struct file *filp;
 	int ret = 0, fput_needed;
 
@@ -735,25 +821,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 		return -EBADF;
 
 	/* verify that this is indeed an inotify instance */
-	if (unlikely(filp->f_op != &inotify_fops)) {
-		ret = -EINVAL;
+	ret = -EINVAL;
+	if (unlikely(filp->f_op != &inotify_fops))
 		goto out;
-	}
 
 	group = filp->private_data;
 
-	spin_lock(&group->inotify_data.idr_lock);
-	entry = idr_find(&group->inotify_data.idr, wd);
-	if (unlikely(!entry)) {
-		spin_unlock(&group->inotify_data.idr_lock);
-		ret = -EINVAL;
+	ret = -EINVAL;
+	ientry = inotify_idr_find(group, wd);
+	if (unlikely(!ientry))
 		goto out;
-	}
-	fsnotify_get_mark(entry);
-	spin_unlock(&group->inotify_data.idr_lock);
 
-	fsnotify_destroy_mark_by_entry(entry);
-	fsnotify_put_mark(entry);
+	ret = 0;
+
+	fsnotify_destroy_mark_by_entry(&ientry->fsn_entry);
+
+	/* match ref taken by inotify_idr_find */
+	fsnotify_put_mark(&ientry->fsn_entry);
 
 out:
 	fput_light(filp, fput_needed);
-- 
cgit v1.1


From 9e1c74321d87a8b079f04d89e750b39a43365e1f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:05 -0500
Subject: fsnotify: duplicate fsnotify_mark_entry data between 2 marks

Simple copy fsnotify information from one mark to another in preparation
for the second mark to replace the first.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0399bcb..a13cf9e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -282,12 +282,20 @@ struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *grou
 	return NULL;
 }
 
+void fsnotify_duplicate_mark(struct fsnotify_mark_entry *new, struct fsnotify_mark_entry *old)
+{
+	assert_spin_locked(&old->lock);
+	new->inode = old->inode;
+	new->group = old->group;
+	new->mask = old->mask;
+	new->free_mark = old->free_mark;
+}
+
 /*
  * Nothing fancy, just initialize lists and locks and counters.
  */
 void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
 			void (*free_mark)(struct fsnotify_mark_entry *entry))
-
 {
 	spin_lock_init(&entry->lock);
 	atomic_set(&entry->refcnt, 1);
-- 
cgit v1.1


From 40554c3dae83bd892b7fbfaa2ea9de739cbcf065 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:05 -0500
Subject: fsnotify: allow addition of duplicate fsnotify marks

This patch allows a task to add a second fsnotify mark to an inode for the
same group.  This mark will be added to the end of the inode's list and
this will never be found by the stand fsnotify_find_mark() function.   This
is useful if a user wants to add a new mark before removing the old one.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c      | 2 +-
 fs/notify/inode_mark.c           | 8 +++++---
 fs/notify/inotify/inotify_user.c | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 7e54e52..85b97fc 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -362,7 +362,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
 		spin_lock(&entry->lock);
 	} else {
-		fsnotify_add_mark(new_entry, dnotify_group, inode);
+		fsnotify_add_mark(new_entry, dnotify_group, inode, 0);
 		spin_lock(&new_entry->lock);
 		entry = new_entry;
 		dnentry = new_dnentry;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index a13cf9e..7d2962e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -312,9 +312,10 @@ void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
  * event types should be delivered to which group and for which inodes.
  */
 int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
-		      struct fsnotify_group *group, struct inode *inode)
+		      struct fsnotify_group *group, struct inode *inode,
+		      int allow_dups)
 {
-	struct fsnotify_mark_entry *lentry;
+	struct fsnotify_mark_entry *lentry = NULL;
 	int ret = 0;
 
 	inode = igrab(inode);
@@ -331,7 +332,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	lentry = fsnotify_find_mark_entry(group, inode);
+	if (!allow_dups)
+		lentry = fsnotify_find_mark_entry(group, inode);
 	if (!lentry) {
 		entry->group = group;
 		entry->inode = inode;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 653c507..f22a040 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -651,7 +651,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 		goto out_err;
 
 	/* we are on the idr, now get on the inode */
-	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
+	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode, 0);
 	if (ret) {
 		/* we failed to get on the inode, get off the idr */
 		inotify_remove_from_idr(group, tmp_ientry);
-- 
cgit v1.1


From b4277d3dd5a7400c1ea7fd4e7d64bda8899f84f5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:06 -0500
Subject: fsnotify: use fsnotify_create_event to allocate the q_overflow event

Currently fsnotify defines a static fsnotify event which is sent when a
group overflows its allotted queue length.  This patch just allocates that
event from the event cache rather than defining it statically.  There is no
known reason that the current implementation is wrong, but this makes sure the
event is initialized and created like any other.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b8bf53b..8481253 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -56,7 +56,7 @@ static struct kmem_cache *fsnotify_event_holder_cachep;
  * it is needed.  It's refcnt is set 1 at kernel init time and will never
  * get set to 0 so it will never get 'freed'
  */
-static struct fsnotify_event q_overflow_event;
+static struct fsnotify_event *q_overflow_event;
 static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
 
 /**
@@ -195,7 +195,7 @@ alloc_holder:
 	mutex_lock(&group->notification_mutex);
 
 	if (group->q_len >= group->max_events) {
-		event = &q_overflow_event;
+		event = q_overflow_event;
 		ret = -EOVERFLOW;
 		/* sorry, no private data on the overflow event */
 		priv = NULL;
@@ -412,8 +412,11 @@ __init int fsnotify_notification_init(void)
 	fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
 	fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
 
-	initialize_event(&q_overflow_event);
-	q_overflow_event.mask = FS_Q_OVERFLOW;
+	q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
+						 FSNOTIFY_EVENT_NONE, NULL, 0,
+						 GFP_KERNEL);
+	if (!q_overflow_event)
+		panic("unable to allocate fsnotify q_overflow_event\n");
 
 	return 0;
 }
-- 
cgit v1.1


From 31ddd3268dcb6c1d70e9930a83be43bf86e4bf17 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:06 -0500
Subject: inotify: use container_of instead of casting

inotify_free_mark casts directly from an fsnotify_mark_entry to an
inotify_inode_mark_entry.  This works, but should use container_of instead
for future proofing.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f22a040..a0e40f7 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -550,7 +550,9 @@ skip_send_ignore:
 /* ding dong the mark is dead */
 static void inotify_free_mark(struct fsnotify_mark_entry *entry)
 {
-	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
+	struct inotify_inode_mark_entry *ientry;
+
+	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
 
 	kmem_cache_free(inotify_inode_mark_cachep, ientry);
 }
-- 
cgit v1.1


From f0553af054d31f48a75fddd3ef8beb5c39715019 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:06 -0500
Subject: fsnotify: kzalloc fsnotify groups

Use kzalloc for fsnotify_groups so that none of the fields can leak any
information accidentally.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0e16771..777ca82 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -207,7 +207,7 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	struct fsnotify_group *group, *tgroup;
 
 	/* very low use, simpler locking if we just always alloc */
-	group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
+	group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
 	if (!group)
 		return ERR_PTR(-ENOMEM);
 
-- 
cgit v1.1


From 6f3a539e3bd8ed2eafa532443723d56896153a00 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:12:07 -0500
Subject: fsnotify: use kmem_cache_zalloc to simplify event initialization

fsnotify event initialization is done entry by entry with almost everything
set to either 0 or NULL.  Use kmem_cache_zalloc and only initialize things
that need non-zero initialization.  Also means we don't have to change
initialization entries based on the config options.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 8481253..b34ce7a 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -314,25 +314,14 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 
 static void initialize_event(struct fsnotify_event *event)
 {
-	event->holder.event = NULL;
 	INIT_LIST_HEAD(&event->holder.event_list);
 	atomic_set(&event->refcnt, 1);
 
 	spin_lock_init(&event->lock);
 
-	event->path.dentry = NULL;
-	event->path.mnt = NULL;
-	event->inode = NULL;
 	event->data_type = FSNOTIFY_EVENT_NONE;
 
 	INIT_LIST_HEAD(&event->private_data_list);
-
-	event->to_tell = NULL;
-
-	event->file_name = NULL;
-	event->name_len = 0;
-
-	event->sync_cookie = 0;
 }
 
 /*
@@ -353,7 +342,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 {
 	struct fsnotify_event *event;
 
-	event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
+	event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
 	if (!event)
 		return NULL;
 
-- 
cgit v1.1


From 7050c48826d5adb2210bddfb6a67aa13bbe984ed Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:27:10 -0500
Subject: inotify: do not reuse watch descriptors

Prior to 2.6.31 inotify would not reuse watch descriptors until all of
them had been used at least once.  After the rewrite inotify would reuse
watch descriptors.  The selinux utility 'restorecond' was found to have
problems when watch descriptors were reused.  This patch reverts to the
pre inotify rewrite behavior to not reuse watch descriptors.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a0e40f7..ce21eba 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -358,7 +358,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 }
 
 static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
-			      int last_wd,
+			      int *last_wd,
 			      struct inotify_inode_mark_entry *ientry)
 {
 	int ret;
@@ -368,11 +368,13 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
 			return -ENOMEM;
 
 		spin_lock(idr_lock);
-		ret = idr_get_new_above(idr, ientry, last_wd + 1,
+		ret = idr_get_new_above(idr, ientry, *last_wd + 1,
 					&ientry->wd);
 		/* we added the mark to the idr, take a reference */
-		if (!ret)
+		if (!ret) {
 			fsnotify_get_mark(&ientry->fsn_entry);
+			*last_wd = ientry->wd;
+		}
 		spin_unlock(idr_lock);
 	} while (ret == -EAGAIN);
 
@@ -647,7 +649,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
 		goto out_err;
 
-	ret = inotify_add_to_idr(idr, idr_lock, group->inotify_data.last_wd,
+	ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
 				 tmp_ientry);
 	if (ret)
 		goto out_err;
@@ -660,9 +662,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
 		goto out_err;
 	}
 
-	/* update the idr hint, who cares about races, it's just a hint */
-	group->inotify_data.last_wd = tmp_ientry->wd;
-
 	/* increment the number of watches the user has */
 	atomic_inc(&group->inotify_data.user->inotify_watches);
 
-- 
cgit v1.1


From 2dfc1cae4c42b93b831b2417540df2b895ab7108 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 20:30:52 -0500
Subject: inotify: remove inotify in kernel interface

nothing uses inotify in the kernel, drop it!

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/inode.c                  |   6 -
 fs/notify/inotify/Kconfig   |  15 -
 fs/notify/inotify/Makefile  |   1 -
 fs/notify/inotify/inotify.c | 873 --------------------------------------------
 fs/open.c                   |   1 +
 5 files changed, 1 insertion(+), 895 deletions(-)
 delete mode 100644 fs/notify/inotify/inotify.c

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 722860b..8e1bee9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,7 +20,6 @@
 #include <linux/pagemap.h>
 #include <linux/cdev.h>
 #include <linux/bootmem.h>
-#include <linux/inotify.h>
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/async.h>
@@ -264,10 +263,6 @@ void inode_init_once(struct inode *inode)
 	INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
 	i_size_ordered_init(inode);
-#ifdef CONFIG_INOTIFY
-	INIT_LIST_HEAD(&inode->inotify_watches);
-	mutex_init(&inode->inotify_mutex);
-#endif
 #ifdef CONFIG_FSNOTIFY
 	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
 #endif
@@ -413,7 +408,6 @@ int invalidate_inodes(struct super_block *sb)
 
 	down_write(&iprune_sem);
 	spin_lock(&inode_lock);
-	inotify_unmount_inodes(&sb->s_inodes);
 	fsnotify_unmount_inodes(&sb->s_inodes);
 	busy = invalidate_list(&sb->s_inodes, &throw_away);
 	spin_unlock(&inode_lock);
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b3a159b..b981fc0 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,18 +1,3 @@
-config INOTIFY
-	bool "Inotify file change notification support"
-	default n
-	---help---
-	  Say Y here to enable legacy in kernel inotify support.  Inotify is a
-	  file change notification system.  It is a replacement for dnotify.
-	  This option only provides the legacy inotify in kernel API.  There
-	  are no in tree kernel users of this interface since it is deprecated.
-	  You only need this if you are loading an out of tree kernel module
-	  that uses inotify.
-
-	  For more information, see <file:Documentation/filesystems/inotify.txt>
-
-	  If unsure, say N.
-
 config INOTIFY_USER
 	bool "Inotify support for userspace"
 	select ANON_INODES
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index 9438281..a380dab 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1 @@
-obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
deleted file mode 100644
index 27b75eb..0000000
--- a/fs/notify/inotify/inotify.c
+++ /dev/null
@@ -1,873 +0,0 @@
-/*
- * fs/inotify.c - inode-based file event notifications
- *
- * Authors:
- *	John McCutchan	<ttb@tentacle.dhs.org>
- *	Robert Love	<rml@novell.com>
- *
- * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
- *
- * Copyright (C) 2005 John McCutchan
- * Copyright 2006 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/writeback.h>
-#include <linux/inotify.h>
-#include <linux/fsnotify_backend.h>
-
-static atomic_t inotify_cookie;
-
-/*
- * Lock ordering:
- *
- * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
- * iprune_mutex (synchronize shrink_icache_memory())
- * 	inode_lock (protects the super_block->s_inodes list)
- * 	inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
- * 		inotify_handle->mutex (protects inotify_handle and watches->h_list)
- *
- * The inode->inotify_mutex and inotify_handle->mutex and held during execution
- * of a caller's event handler.  Thus, the caller must not hold any locks
- * taken in their event handler while calling any of the published inotify
- * interfaces.
- */
-
-/*
- * Lifetimes of the three main data structures--inotify_handle, inode, and
- * inotify_watch--are managed by reference count.
- *
- * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
- * Additional references can bump the count via get_inotify_handle() and drop
- * the count via put_inotify_handle().
- *
- * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
- * to remove_watch_no_event().  Additional references can bump the count via
- * get_inotify_watch() and drop the count via put_inotify_watch().  The caller
- * is reponsible for the final put after receiving IN_IGNORED, or when using
- * IN_ONESHOT after receiving the first event.  Inotify does the final put if
- * inotify_destroy() is called.
- *
- * inode: Pinned so long as the inode is associated with a watch, from
- * inotify_add_watch() to the final put_inotify_watch().
- */
-
-/*
- * struct inotify_handle - represents an inotify instance
- *
- * This structure is protected by the mutex 'mutex'.
- */
-struct inotify_handle {
-	struct idr		idr;		/* idr mapping wd -> watch */
-	struct mutex		mutex;		/* protects this bad boy */
-	struct list_head	watches;	/* list of watches */
-	atomic_t		count;		/* reference count */
-	u32			last_wd;	/* the last wd allocated */
-	const struct inotify_operations *in_ops; /* inotify caller operations */
-};
-
-static inline void get_inotify_handle(struct inotify_handle *ih)
-{
-	atomic_inc(&ih->count);
-}
-
-static inline void put_inotify_handle(struct inotify_handle *ih)
-{
-	if (atomic_dec_and_test(&ih->count)) {
-		idr_destroy(&ih->idr);
-		kfree(ih);
-	}
-}
-
-/**
- * get_inotify_watch - grab a reference to an inotify_watch
- * @watch: watch to grab
- */
-void get_inotify_watch(struct inotify_watch *watch)
-{
-	atomic_inc(&watch->count);
-}
-EXPORT_SYMBOL_GPL(get_inotify_watch);
-
-int pin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	if (atomic_inc_not_zero(&sb->s_active)) {
-		atomic_inc(&watch->count);
-		return 1;
-	}
-	return 0;
-}
-
-/**
- * put_inotify_watch - decrements the ref count on a given watch.  cleans up
- * watch references if the count reaches zero.  inotify_watch is freed by
- * inotify callers via the destroy_watch() op.
- * @watch: watch to release
- */
-void put_inotify_watch(struct inotify_watch *watch)
-{
-	if (atomic_dec_and_test(&watch->count)) {
-		struct inotify_handle *ih = watch->ih;
-
-		iput(watch->inode);
-		ih->in_ops->destroy_watch(watch);
-		put_inotify_handle(ih);
-	}
-}
-EXPORT_SYMBOL_GPL(put_inotify_watch);
-
-void unpin_inotify_watch(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	deactivate_super(sb);
-}
-
-/*
- * inotify_handle_get_wd - returns the next WD for use by the given handle
- *
- * Callers must hold ih->mutex.  This function can sleep.
- */
-static int inotify_handle_get_wd(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	int ret;
-
-	do {
-		if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
-			return -ENOSPC;
-		ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
-	} while (ret == -EAGAIN);
-
-	if (likely(!ret))
-		ih->last_wd = watch->wd;
-
-	return ret;
-}
-
-/*
- * inotify_inode_watched - returns nonzero if there are watches on this inode
- * and zero otherwise.  We call this lockless, we do not care if we race.
- */
-static inline int inotify_inode_watched(struct inode *inode)
-{
-	return !list_empty(&inode->inotify_watches);
-}
-
-/*
- * Get child dentry flag into synch with parent inode.
- * Flag should always be clear for negative dentrys.
- */
-static void set_dentry_child_flags(struct inode *inode, int watched)
-{
-	struct dentry *alias;
-
-	spin_lock(&dcache_lock);
-	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
-		struct dentry *child;
-
-		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
-			if (!child->d_inode)
-				continue;
-
-			spin_lock(&child->d_lock);
-			if (watched)
-				child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-			else
-				child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
-			spin_unlock(&child->d_lock);
-		}
-	}
-	spin_unlock(&dcache_lock);
-}
-
-/*
- * inotify_find_handle - find the watch associated with the given inode and
- * handle
- *
- * Callers must hold inode->inotify_mutex.
- */
-static struct inotify_watch *inode_find_handle(struct inode *inode,
-					       struct inotify_handle *ih)
-{
-	struct inotify_watch *watch;
-
-	list_for_each_entry(watch, &inode->inotify_watches, i_list) {
-		if (watch->ih == ih)
-			return watch;
-	}
-
-	return NULL;
-}
-
-/*
- * remove_watch_no_event - remove watch without the IN_IGNORED event.
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-static void remove_watch_no_event(struct inotify_watch *watch,
-				  struct inotify_handle *ih)
-{
-	list_del(&watch->i_list);
-	list_del(&watch->h_list);
-
-	if (!inotify_inode_watched(watch->inode))
-		set_dentry_child_flags(watch->inode, 0);
-
-	idr_remove(&ih->idr, watch->wd);
-}
-
-/**
- * inotify_remove_watch_locked - Remove a watch from both the handle and the
- * inode.  Sends the IN_IGNORED event signifying that the inode is no longer
- * watched.  May be invoked from a caller's event handler.
- * @ih: inotify handle associated with watch
- * @watch: watch to remove
- *
- * Callers must hold both inode->inotify_mutex and ih->mutex.
- */
-void inotify_remove_watch_locked(struct inotify_handle *ih,
-				 struct inotify_watch *watch)
-{
-	remove_watch_no_event(watch, ih);
-	ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
-
-/* Kernel API for producing events */
-
-/*
- * inotify_d_instantiate - instantiate dcache entry for inode
- */
-void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
-{
-	struct dentry *parent;
-
-	if (!inode)
-		return;
-
-	spin_lock(&entry->d_lock);
-	parent = entry->d_parent;
-	if (parent->d_inode && inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	spin_unlock(&entry->d_lock);
-}
-
-/*
- * inotify_d_move - dcache entry has been moved
- */
-void inotify_d_move(struct dentry *entry)
-{
-	struct dentry *parent;
-
-	parent = entry->d_parent;
-	if (inotify_inode_watched(parent->d_inode))
-		entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
-	else
-		entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
-}
-
-/**
- * inotify_inode_queue_event - queue an event to all watches on this inode
- * @inode: inode event is originating from
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- * @n_inode: inode associated with name
- */
-void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
-			       const char *name, struct inode *n_inode)
-{
-	struct inotify_watch *watch, *next;
-
-	if (!inotify_inode_watched(inode))
-		return;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		u32 watch_mask = watch->mask;
-		if (watch_mask & mask) {
-			struct inotify_handle *ih= watch->ih;
-			mutex_lock(&ih->mutex);
-			if (watch_mask & IN_ONESHOT)
-				remove_watch_no_event(watch, ih);
-			ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
-						 name, n_inode);
-			mutex_unlock(&ih->mutex);
-		}
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
-
-/**
- * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
- * @dentry: the dentry in question, we queue against this dentry's parent
- * @mask: event mask describing this event
- * @cookie: cookie for synchronization, or zero
- * @name: filename, if any
- */
-void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
-				       u32 cookie, const char *name)
-{
-	struct dentry *parent;
-	struct inode *inode;
-
-	if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
-		return;
-
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
-	inode = parent->d_inode;
-
-	if (inotify_inode_watched(inode)) {
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		inotify_inode_queue_event(inode, mask, cookie, name,
-					  dentry->d_inode);
-		dput(parent);
-	} else
-		spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
-
-/**
- * inotify_get_cookie - return a unique cookie for use in synchronizing events.
- */
-u32 inotify_get_cookie(void)
-{
-	return atomic_inc_return(&inotify_cookie);
-}
-EXPORT_SYMBOL_GPL(inotify_get_cookie);
-
-/**
- * inotify_unmount_inodes - an sb is unmounting.  handle any watched inodes.
- * @list: list of inodes being unmounted (sb->s_inodes)
- *
- * Called with inode_lock held, protecting the unmounting super block's list
- * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
- * We temporarily drop inode_lock, however, and CAN block.
- */
-void inotify_unmount_inodes(struct list_head *list)
-{
-	struct inode *inode, *next_i, *need_iput = NULL;
-
-	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
-		struct inotify_watch *watch, *next_w;
-		struct inode *need_iput_tmp;
-		struct list_head *watches;
-
-		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
-		 * I_WILL_FREE, or I_NEW which is fine because by that point
-		 * the inode cannot have any associated watches.
-		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
-			continue;
-
-		/*
-		 * If i_count is zero, the inode cannot have any watches and
-		 * doing an __iget/iput with MS_ACTIVE clear would actually
-		 * evict all inodes with zero i_count from icache which is
-		 * unnecessarily violent and may in fact be illegal to do.
-		 */
-		if (!atomic_read(&inode->i_count))
-			continue;
-
-		need_iput_tmp = need_iput;
-		need_iput = NULL;
-		/* In case inotify_remove_watch_locked() drops a reference. */
-		if (inode != need_iput_tmp)
-			__iget(inode);
-		else
-			need_iput_tmp = NULL;
-		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-				atomic_read(&next_i->i_count) &&
-				!(next_i->i_state & (I_CLEAR | I_FREEING |
-					I_WILL_FREE))) {
-			__iget(next_i);
-			need_iput = next_i;
-		}
-
-		/*
-		 * We can safely drop inode_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.  Finally,
-		 * iprune_mutex keeps shrink_icache_memory() away.
-		 */
-		spin_unlock(&inode_lock);
-
-		if (need_iput_tmp)
-			iput(need_iput_tmp);
-
-		/* for each watch, send IN_UNMOUNT and then remove it */
-		mutex_lock(&inode->inotify_mutex);
-		watches = &inode->inotify_watches;
-		list_for_each_entry_safe(watch, next_w, watches, i_list) {
-			struct inotify_handle *ih= watch->ih;
-			get_inotify_watch(watch);
-			mutex_lock(&ih->mutex);
-			ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
-						 NULL, NULL);
-			inotify_remove_watch_locked(ih, watch);
-			mutex_unlock(&ih->mutex);
-			put_inotify_watch(watch);
-		}
-		mutex_unlock(&inode->inotify_mutex);
-		iput(inode);		
-
-		spin_lock(&inode_lock);
-	}
-}
-EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
-
-/**
- * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
- * @inode: inode that is about to be removed
- */
-void inotify_inode_is_dead(struct inode *inode)
-{
-	struct inotify_watch *watch, *next;
-
-	mutex_lock(&inode->inotify_mutex);
-	list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
-		struct inotify_handle *ih = watch->ih;
-		mutex_lock(&ih->mutex);
-		inotify_remove_watch_locked(ih, watch);
-		mutex_unlock(&ih->mutex);
-	}
-	mutex_unlock(&inode->inotify_mutex);
-}
-EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
-
-/* Kernel Consumer API */
-
-/**
- * inotify_init - allocate and initialize an inotify instance
- * @ops: caller's inotify operations
- */
-struct inotify_handle *inotify_init(const struct inotify_operations *ops)
-{
-	struct inotify_handle *ih;
-
-	ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
-	if (unlikely(!ih))
-		return ERR_PTR(-ENOMEM);
-
-	idr_init(&ih->idr);
-	INIT_LIST_HEAD(&ih->watches);
-	mutex_init(&ih->mutex);
-	ih->last_wd = 0;
-	ih->in_ops = ops;
-	atomic_set(&ih->count, 0);
-	get_inotify_handle(ih);
-
-	return ih;
-}
-EXPORT_SYMBOL_GPL(inotify_init);
-
-/**
- * inotify_init_watch - initialize an inotify watch
- * @watch: watch to initialize
- */
-void inotify_init_watch(struct inotify_watch *watch)
-{
-	INIT_LIST_HEAD(&watch->h_list);
-	INIT_LIST_HEAD(&watch->i_list);
-	atomic_set(&watch->count, 0);
-	get_inotify_watch(watch); /* initial get */
-}
-EXPORT_SYMBOL_GPL(inotify_init_watch);
-
-/*
- * Watch removals suck violently.  To kick the watch out we need (in this
- * order) inode->inotify_mutex and ih->mutex.  That's fine if we have
- * a hold on inode; however, for all other cases we need to make damn sure
- * we don't race with umount.  We can *NOT* just grab a reference to a
- * watch - inotify_unmount_inodes() will happily sail past it and we'll end
- * with reference to inode potentially outliving its superblock.  Ideally
- * we just want to grab an active reference to superblock if we can; that
- * will make sure we won't go into inotify_umount_inodes() until we are
- * done.  Cleanup is just deactivate_super().  However, that leaves a messy
- * case - what if we *are* racing with umount() and active references to
- * superblock can't be acquired anymore?  We can bump ->s_count, grab
- * ->s_umount, which will wait until the superblock is shut down and the
- * watch in question is pining for fjords.
- *
- * And yes, this is far beyond mere "not very pretty"; so's the entire
- * concept of inotify to start with.
- */
-
-/**
- * pin_to_kill - pin the watch down for removal
- * @ih: inotify handle
- * @watch: watch to kill
- *
- * Called with ih->mutex held, drops it.  Possible return values:
- * 0 - nothing to do, it has died
- * 1 - remove it, drop the reference and deactivate_super()
- */
-static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-
-	if (atomic_inc_not_zero(&sb->s_active)) {
-		get_inotify_watch(watch);
-		mutex_unlock(&ih->mutex);
-		return 1;	/* the best outcome */
-	}
-	spin_lock(&sb_lock);
-	sb->s_count++;
-	spin_unlock(&sb_lock);
-	mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
-	down_read(&sb->s_umount);
-	/* fs is already shut down; the watch is dead */
-	drop_super(sb);
-	return 0;
-}
-
-static void unpin_and_kill(struct inotify_watch *watch)
-{
-	struct super_block *sb = watch->inode->i_sb;
-	put_inotify_watch(watch);
-	deactivate_super(sb);
-}
-
-/**
- * inotify_destroy - clean up and destroy an inotify instance
- * @ih: inotify handle
- */
-void inotify_destroy(struct inotify_handle *ih)
-{
-	/*
-	 * Destroy all of the watches for this handle. Unfortunately, not very
-	 * pretty.  We cannot do a simple iteration over the list, because we
-	 * do not know the inode until we iterate to the watch.  But we need to
-	 * hold inode->inotify_mutex before ih->mutex.  The following works.
-	 *
-	 * AV: it had to become even uglier to start working ;-/
-	 */
-	while (1) {
-		struct inotify_watch *watch;
-		struct list_head *watches;
-		struct super_block *sb;
-		struct inode *inode;
-
-		mutex_lock(&ih->mutex);
-		watches = &ih->watches;
-		if (list_empty(watches)) {
-			mutex_unlock(&ih->mutex);
-			break;
-		}
-		watch = list_first_entry(watches, struct inotify_watch, h_list);
-		sb = watch->inode->i_sb;
-		if (!pin_to_kill(ih, watch))
-			continue;
-
-		inode = watch->inode;
-		mutex_lock(&inode->inotify_mutex);
-		mutex_lock(&ih->mutex);
-
-		/* make sure we didn't race with another list removal */
-		if (likely(idr_find(&ih->idr, watch->wd))) {
-			remove_watch_no_event(watch, ih);
-			put_inotify_watch(watch);
-		}
-
-		mutex_unlock(&ih->mutex);
-		mutex_unlock(&inode->inotify_mutex);
-		unpin_and_kill(watch);
-	}
-
-	/* free this handle: the put matching the get in inotify_init() */
-	put_inotify_handle(ih);
-}
-EXPORT_SYMBOL_GPL(inotify_destroy);
-
-/**
- * inotify_find_watch - find an existing watch for an (ih,inode) pair
- * @ih: inotify handle
- * @inode: inode to watch
- * @watchp: pointer to existing inotify_watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
-		       struct inotify_watch **watchp)
-{
-	struct inotify_watch *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	old = inode_find_handle(inode, ih);
-	if (unlikely(old)) {
-		get_inotify_watch(old); /* caller must put watch */
-		*watchp = old;
-		ret = old->wd;
-	}
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_watch);
-
-/**
- * inotify_find_update_watch - find and update the mask of an existing watch
- * @ih: inotify handle
- * @inode: inode's watch to update
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- */
-s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
-			      u32 mask)
-{
-	struct inotify_watch *old;
-	int mask_add = 0;
-	int ret;
-
-	if (mask & IN_MASK_ADD)
-		mask_add = 1;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/*
-	 * Handle the case of re-adding a watch on an (inode,ih) pair that we
-	 * are already watching.  We just update the mask and return its wd.
-	 */
-	old = inode_find_handle(inode, ih);
-	if (unlikely(!old)) {
-		ret = -ENOENT;
-		goto out;
-	}
-
-	if (mask_add)
-		old->mask |= mask;
-	else
-		old->mask = mask;
-	ret = old->wd;
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_find_update_watch);
-
-/**
- * inotify_add_watch - add a watch to an inotify instance
- * @ih: inotify handle
- * @watch: caller allocated watch structure
- * @inode: inode to watch
- * @mask: mask of events to watch
- *
- * Caller must pin given inode (via nameidata).
- * Caller must ensure it only calls inotify_add_watch() once per watch.
- * Calls inotify_handle_get_wd() so may sleep.
- */
-s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
-		      struct inode *inode, u32 mask)
-{
-	int ret = 0;
-	int newly_watched;
-
-	/* don't allow invalid bits: we don't want flags set */
-	mask &= IN_ALL_EVENTS | IN_ONESHOT;
-	if (unlikely(!mask))
-		return -EINVAL;
-	watch->mask = mask;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, watch);
-	if (unlikely(ret))
-		goto out;
-	ret = watch->wd;
-
-	/* save a reference to handle and bump the count to make it official */
-	get_inotify_handle(ih);
-	watch->ih = ih;
-
-	/*
-	 * Save a reference to the inode and bump the ref count to make it
-	 * official.  We hold a reference to nameidata, which makes this safe.
-	 */
-	watch->inode = igrab(inode);
-
-	/* Add the watch to the handle's and the inode's list */
-	newly_watched = !inotify_inode_watched(inode);
-	list_add(&watch->h_list, &ih->watches);
-	list_add(&watch->i_list, &inode->inotify_watches);
-	/*
-	 * Set child flags _after_ adding the watch, so there is no race
-	 * windows where newly instantiated children could miss their parent's
-	 * watched flag.
-	 */
-	if (newly_watched)
-		set_dentry_child_flags(inode, 1);
-
-out:
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(inotify_add_watch);
-
-/**
- * inotify_clone_watch - put the watch next to existing one
- * @old: already installed watch
- * @new: new watch
- *
- * Caller must hold the inotify_mutex of inode we are dealing with;
- * it is expected to remove the old watch before unlocking the inode.
- */
-s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
-{
-	struct inotify_handle *ih = old->ih;
-	int ret = 0;
-
-	new->mask = old->mask;
-	new->ih = ih;
-
-	mutex_lock(&ih->mutex);
-
-	/* Initialize a new watch */
-	ret = inotify_handle_get_wd(ih, new);
-	if (unlikely(ret))
-		goto out;
-	ret = new->wd;
-
-	get_inotify_handle(ih);
-
-	new->inode = igrab(old->inode);
-
-	list_add(&new->h_list, &ih->watches);
-	list_add(&new->i_list, &old->inode->inotify_watches);
-out:
-	mutex_unlock(&ih->mutex);
-	return ret;
-}
-
-void inotify_evict_watch(struct inotify_watch *watch)
-{
-	get_inotify_watch(watch);
-	mutex_lock(&watch->ih->mutex);
-	inotify_remove_watch_locked(watch->ih, watch);
-	mutex_unlock(&watch->ih->mutex);
-}
-
-/**
- * inotify_rm_wd - remove a watch from an inotify instance
- * @ih: inotify handle
- * @wd: watch descriptor to remove
- *
- * Can sleep.
- */
-int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
-{
-	struct inotify_watch *watch;
-	struct super_block *sb;
-	struct inode *inode;
-
-	mutex_lock(&ih->mutex);
-	watch = idr_find(&ih->idr, wd);
-	if (unlikely(!watch)) {
-		mutex_unlock(&ih->mutex);
-		return -EINVAL;
-	}
-	sb = watch->inode->i_sb;
-	if (!pin_to_kill(ih, watch))
-		return 0;
-
-	inode = watch->inode;
-
-	mutex_lock(&inode->inotify_mutex);
-	mutex_lock(&ih->mutex);
-
-	/* make sure that we did not race */
-	if (likely(idr_find(&ih->idr, wd) == watch))
-		inotify_remove_watch_locked(ih, watch);
-
-	mutex_unlock(&ih->mutex);
-	mutex_unlock(&inode->inotify_mutex);
-	unpin_and_kill(watch);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(inotify_rm_wd);
-
-/**
- * inotify_rm_watch - remove a watch from an inotify instance
- * @ih: inotify handle
- * @watch: watch to remove
- *
- * Can sleep.
- */
-int inotify_rm_watch(struct inotify_handle *ih,
-		     struct inotify_watch *watch)
-{
-	return inotify_rm_wd(ih, watch->wd);
-}
-EXPORT_SYMBOL_GPL(inotify_rm_watch);
-
-/*
- * inotify_setup - core initialization function
- */
-static int __init inotify_setup(void)
-{
-	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
-	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
-	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
-	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
-	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
-	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
-	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
-	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
-	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
-	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
-	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
-	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
-	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
-
-	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
-	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
-	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
-	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
-
-	atomic_set(&inotify_cookie, 0);
-
-	return 0;
-}
-
-module_init(inotify_setup);
diff --git a/fs/open.c b/fs/open.c
index 5463266..94d54d3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -29,6 +29,7 @@
 #include <linux/falloc.h>
 #include <linux/fs_struct.h>
 #include <linux/ima.h>
+#include <linux/dnotify.h>
 
 #include "internal.h"
 
-- 
cgit v1.1


From d7f0ce4e436b6109527c51b0efe0deff53d215f7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 22 Dec 2009 23:16:33 -0500
Subject: inotify: do not spam console without limit

inotify was supposed to have a dmesg printk ratelimitor which would cause
inotify to only emit one message per boot.  The static bool was never set
so it kept firing messages.  This patch correctly limits warnings in multiple
places.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ce21eba..f2b5424 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -449,20 +449,18 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 * if it wasn't....
 	 */
 	if (wd == -1) {
-		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
 			ientry->fsn_entry.group, ientry->fsn_entry.inode);
-		WARN_ON(1);
 		goto out;
 	}
 
 	/* Lets look in the idr to see if we find it */
 	found_ientry = inotify_idr_find_locked(group, wd);
 	if (unlikely(!found_ientry)) {
-		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
 			ientry->fsn_entry.group, ientry->fsn_entry.inode);
-		WARN_ON(1);
 		goto out;
 	}
 
@@ -472,8 +470,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 * fucked up somewhere.
 	 */
 	if (unlikely(found_ientry != ientry)) {
-		WARN_ON(1);
-		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p "
+		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p "
 			"entry->inode=%p found_ientry=%p found_ientry->wd=%d "
 			"found_ientry->group=%p found_ientry->inode=%p\n",
 			__func__, ientry, ientry->wd, ientry->fsn_entry.group,
@@ -489,7 +486,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 * one ref grabbed by inotify_idr_find
 	 */
 	if (unlikely(atomic_read(&ientry->fsn_entry.refcnt) < 3)) {
-		printk(KERN_WARNING "%s: ientry=%p ientry->wd=%d ientry->group=%p"
+		printk(KERN_ERR "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
 			ientry->fsn_entry.group, ientry->fsn_entry.inode);
 		/* we can't really recover with bad ref cnting.. */
-- 
cgit v1.1


From 7b0a04fbfb35650941af87728d4891515b4fc179 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: provide the data type to should_send_event

fanotify is only interested in event types which contain enough information
to open the original file in the context of the fanotify listener.  Since
fanotify may not want to send events if that data isn't present we pass
the data type to the should_send_event function call so fanotify can express
its lack of interest.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 3 ++-
 fs/notify/fsnotify.c                 | 2 +-
 fs/notify/inotify/inotify_fsnotify.c | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 85b97fc..6f30f49 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -133,7 +133,8 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  * userspace notification for that pair.
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
-				      struct inode *inode, __u32 mask)
+				      struct inode *inode, __u32 mask,
+				      int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fcc2f06..fc06e47 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -157,7 +157,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
 		if (test_mask & group->mask) {
-			if (!group->ops->should_send_event(group, to_tell, mask))
+			if (!group->ops->should_send_event(group, to_tell, mask, data_is))
 				continue;
 			if (!event) {
 				event = fsnotify_create_event(to_tell, mask, data,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e27960c..fc7c495 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -86,7 +86,8 @@ static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnot
 	inotify_ignored_and_remove_idr(entry, group);
 }
 
-static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
+static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
+				      __u32 mask, int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
-- 
cgit v1.1


From 8112e2d6a7356e8c3ff1f7f3c86f375ed0305705 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: include data in should_send calls

fanotify is going to need to look at file->private_data to know if an event
should be sent or not.  This passes the data (which might be a file,
dentry, inode, or none) to the should_send function calls so fanotify can
get that information when available

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 2 +-
 fs/notify/fsnotify.c                 | 3 ++-
 fs/notify/inotify/inotify_fsnotify.c | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 6f30f49..a213b83 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -134,7 +134,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct inode *inode, __u32 mask,
-				      int data_type)
+				      void *data, int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fc06e47..523337b 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -157,7 +157,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
 		if (test_mask & group->mask) {
-			if (!group->ops->should_send_event(group, to_tell, mask, data_is))
+			if (!group->ops->should_send_event(group, to_tell, mask,
+							   data, data_is))
 				continue;
 			if (!event) {
 				event = fsnotify_create_event(to_tell, mask, data,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index fc7c495..1f33234 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -87,7 +87,7 @@ static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnot
 }
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-				      __u32 mask, int data_type)
+				      __u32 mask, void *data, int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
-- 
cgit v1.1


From 2a12a9d7814631e918dec93abad856e692d5286d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: pass a file instead of an inode to open, read, and write

fanotify, the upcoming notification system actually needs a struct path so it can
do opens in the context of listeners, and it needs a file so it can get f_flags
from the original process.  Close was the only operation that already was passing
a struct file to the notification hook.  This patch passes a file for access,
modify, and open as well as they are easily available to these hooks.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/compat.c     | 5 ++---
 fs/exec.c       | 4 ++--
 fs/nfsd/vfs.c   | 4 ++--
 fs/open.c       | 2 +-
 fs/read_write.c | 8 ++++----
 5 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/compat.c b/fs/compat.c
index 6490d21..ce02278b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1193,11 +1193,10 @@ out:
 	if (iov != iovstack)
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
-		struct dentry *dentry = file->f_path.dentry;
 		if (type == READ)
-			fsnotify_access(dentry);
+			fsnotify_access(file);
 		else
-			fsnotify_modify(dentry);
+			fsnotify_modify(file);
 	}
 	return ret;
 }
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a..f2de04a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -129,7 +129,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	fsnotify_open(file->f_path.dentry);
+	fsnotify_open(file);
 
 	error = -ENOEXEC;
 	if(file->f_op) {
@@ -683,7 +683,7 @@ struct file *open_exec(const char *name)
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	fsnotify_open(file->f_path.dentry);
+	fsnotify_open(file);
 
 	err = deny_write_access(file);
 	if (err)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 3c11112..16114a8 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -951,7 +951,7 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		nfsdstats.io_read += host_err;
 		*count = host_err;
 		err = 0;
-		fsnotify_access(file->f_path.dentry);
+		fsnotify_access(file);
 	} else 
 		err = nfserrno(host_err);
 out:
@@ -1062,7 +1062,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		goto out_nfserr;
 	*cnt = host_err;
 	nfsdstats.io_write += host_err;
-	fsnotify_modify(file->f_path.dentry);
+	fsnotify_modify(file);
 
 	/* clear setuid/setgid flag after write */
 	if (inode->i_mode & (S_ISUID | S_ISGID))
diff --git a/fs/open.c b/fs/open.c
index 94d54d3..bf08263 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -889,7 +889,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
 				put_unused_fd(fd);
 				fd = PTR_ERR(f);
 			} else {
-				fsnotify_open(f->f_path.dentry);
+				fsnotify_open(f);
 				fd_install(fd, f);
 			}
 		}
diff --git a/fs/read_write.c b/fs/read_write.c
index 9c04852..74e3658 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -311,7 +311,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 		else
 			ret = do_sync_read(file, buf, count, pos);
 		if (ret > 0) {
-			fsnotify_access(file->f_path.dentry);
+			fsnotify_access(file);
 			add_rchar(current, ret);
 		}
 		inc_syscr(current);
@@ -367,7 +367,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 		else
 			ret = do_sync_write(file, buf, count, pos);
 		if (ret > 0) {
-			fsnotify_modify(file->f_path.dentry);
+			fsnotify_modify(file);
 			add_wchar(current, ret);
 		}
 		inc_syscw(current);
@@ -675,9 +675,9 @@ out:
 		kfree(iov);
 	if ((ret + (type == READ)) > 0) {
 		if (type == READ)
-			fsnotify_access(file->f_path.dentry);
+			fsnotify_access(file);
 		else
-			fsnotify_modify(file->f_path.dentry);
+			fsnotify_modify(file);
 	}
 	return ret;
 }
-- 
cgit v1.1


From 28c60e37f874dcbb93c4afc839ba5e4911c4f4bc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: send struct file when sending events to parents when
 possible

fanotify needs a path in order to open an fd to the object which changed.
Currently notifications to inode's parents are done using only the inode.
For some parental notification we have the entire file, send that so
fanotify can use it.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 523337b..806beed 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -78,13 +78,16 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct dentry *dentry, __u32 mask)
+void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
 	bool send = false;
 	bool should_update_children = false;
 
+	if (file)
+		dentry = file->f_path.dentry;
+
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 		return;
 
@@ -115,8 +118,12 @@ void __fsnotify_parent(struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-			 dentry->d_name.name, 0);
+		if (file)
+			fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
+				 dentry->d_name.name, 0);
+		else
+			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+				 dentry->d_name.name, 0);
 		dput(parent);
 	}
 
-- 
cgit v1.1


From 74766bbfa99adf8cb8119df6121851edba21c9d9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: per group notification queue merge types

inotify only wishes to merge a new event with the last event on the
notification fifo.  fanotify is willing to merge any events including by
means of bitwise OR masks of multiple events together.  This patch moves
the inotify event merging logic out of the generic fsnotify notification.c
and into the inotify code.  This allows each use of fsnotify to provide
their own merge functionality.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_fsnotify.c | 56 ++++++++++++++++++++++++++-
 fs/notify/inotify/inotify_user.c     |  2 +-
 fs/notify/notification.c             | 73 ++++++++++--------------------------
 3 files changed, 75 insertions(+), 56 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1f33234..0a0f5d0 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -32,6 +32,60 @@
 
 #include "inotify.h"
 
+/*
+ * Check if 2 events contain the same information.  We do not compare private data
+ * but at this moment that isn't a problem for any know fsnotify listeners.
+ */
+static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type) &&
+	    (old->name_len == new->name_len)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_INODE):
+			/* remember, after old was put on the wait_q we aren't
+			 * allowed to look at the inode any more, only thing
+			 * left to check was if the file_name is the same */
+			if (!old->name_len ||
+			    !strcmp(old->file_name, new->file_name))
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_PATH):
+			if ((old->path.mnt == new->path.mnt) &&
+			    (old->path.dentry == new->path.dentry))
+				return true;
+			break;
+		case (FSNOTIFY_EVENT_NONE):
+			if (old->mask & FS_Q_OVERFLOW)
+				return true;
+			else if (old->mask & FS_IN_IGNORED)
+				return false;
+			return true;
+		};
+	}
+	return false;
+}
+
+static int inotify_merge(struct list_head *list, struct fsnotify_event *event)
+{
+	struct fsnotify_event_holder *last_holder;
+	struct fsnotify_event *last_event;
+	int ret = 0;
+
+	/* and the list better be locked by something too */
+	spin_lock(&event->lock);
+
+	last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
+	last_event = last_holder->event;
+	if (event_compare(last_event, event))
+		ret = -EEXIST;
+
+	spin_unlock(&event->lock);
+
+	return ret;
+}
+
 static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	struct fsnotify_mark_entry *entry;
@@ -62,7 +116,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	fsn_event_priv->group = group;
 	event_priv->wd = wd;
 
-	ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
+	ret = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
 	if (ret) {
 		inotify_free_event_priv(fsn_event_priv);
 		/* EEXIST says we tail matched, EOVERFLOW isn't something
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f2b5424..cbe16df 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -531,7 +531,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 	fsn_event_priv->group = group;
 	event_priv->wd = ientry->wd;
 
-	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
+	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
 	if (ret)
 		inotify_free_event_priv(fsn_event_priv);
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b34ce7a..6dc96b3 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -104,7 +104,8 @@ struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
 
 void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
 {
-	kmem_cache_free(fsnotify_event_holder_cachep, holder);
+	if (holder)
+		kmem_cache_free(fsnotify_event_holder_cachep, holder);
 }
 
 /*
@@ -129,53 +130,17 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
 }
 
 /*
- * Check if 2 events contain the same information.  We do not compare private data
- * but at this moment that isn't a problem for any know fsnotify listeners.
- */
-static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
-{
-	if ((old->mask == new->mask) &&
-	    (old->to_tell == new->to_tell) &&
-	    (old->data_type == new->data_type) &&
-	    (old->name_len == new->name_len)) {
-		switch (old->data_type) {
-		case (FSNOTIFY_EVENT_INODE):
-			/* remember, after old was put on the wait_q we aren't
-			 * allowed to look at the inode any more, only thing
-			 * left to check was if the file_name is the same */
-			if (!old->name_len ||
-			    !strcmp(old->file_name, new->file_name))
-				return true;
-			break;
-		case (FSNOTIFY_EVENT_PATH):
-			if ((old->path.mnt == new->path.mnt) &&
-			    (old->path.dentry == new->path.dentry))
-				return true;
-			break;
-		case (FSNOTIFY_EVENT_NONE):
-			if (old->mask & FS_Q_OVERFLOW)
-				return true;
-			else if (old->mask & FS_IN_IGNORED)
-				return false;
-			return false;
-		};
-	}
-	return false;
-}
-
-/*
  * Add an event to the group notification queue.  The group can later pull this
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
 int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
-			      struct fsnotify_event_private_data *priv)
+			      struct fsnotify_event_private_data *priv,
+			      int (*merge)(struct list_head *, struct fsnotify_event *))
 {
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
-	struct fsnotify_event_holder *last_holder;
-	struct fsnotify_event *last_event;
-	int ret = 0;
+	int rc = 0;
 
 	/*
 	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
@@ -196,11 +161,23 @@ alloc_holder:
 
 	if (group->q_len >= group->max_events) {
 		event = q_overflow_event;
-		ret = -EOVERFLOW;
+		rc = -EOVERFLOW;
 		/* sorry, no private data on the overflow event */
 		priv = NULL;
 	}
 
+	if (!list_empty(list) && merge) {
+		int ret;
+
+		ret = merge(list, event);
+		if (ret) {
+			mutex_unlock(&group->notification_mutex);
+			if (holder != &event->holder)
+				fsnotify_destroy_event_holder(holder);
+			return ret;
+		}
+	}
+
 	spin_lock(&event->lock);
 
 	if (list_empty(&event->holder.event_list)) {
@@ -215,18 +192,6 @@ alloc_holder:
 		goto alloc_holder;
 	}
 
-	if (!list_empty(list)) {
-		last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
-		last_event = last_holder->event;
-		if (event_compare(last_event, event)) {
-			spin_unlock(&event->lock);
-			mutex_unlock(&group->notification_mutex);
-			if (holder != &event->holder)
-				fsnotify_destroy_event_holder(holder);
-			return -EEXIST;
-		}
-	}
-
 	group->q_len++;
 	holder->event = event;
 
@@ -238,7 +203,7 @@ alloc_holder:
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
-	return ret;
+	return rc;
 }
 
 /*
-- 
cgit v1.1


From b4e4e1407312ae5a267ed7d716e6d4e7120a8430 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:21 -0500
Subject: fsnotify: clone existing events

fsnotify_clone_event will take an event, clone it, and return the cloned
event to the caller.  Since events may be in use by multiple fsnotify
groups simultaneously certain event entries (such as the mask) cannot be
changed after the event was created.  Since fanotify would like to merge
events happening on the same file it needs a new clean event to work with
so it can change any fields it wishes.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 6dc96b3..bc9470c 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -284,11 +284,33 @@ static void initialize_event(struct fsnotify_event *event)
 
 	spin_lock_init(&event->lock);
 
-	event->data_type = FSNOTIFY_EVENT_NONE;
-
 	INIT_LIST_HEAD(&event->private_data_list);
 }
 
+struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
+{
+	struct fsnotify_event *event;
+
+	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	if (!event)
+		return NULL;
+
+	memcpy(event, old_event, sizeof(*event));
+	initialize_event(event);
+
+	if (event->name_len) {
+		event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
+		if (!event->file_name) {
+			kmem_cache_free(fsnotify_event_cachep, event);
+			return NULL;
+		}
+	}
+	if (event->data_type == FSNOTIFY_EVENT_PATH)
+		path_get(&event->path);
+
+	return event;
+}
+
 /*
  * fsnotify_create_event - Allocate a new event which will be sent to each
  * group's handle_event function if the group was interested in this
@@ -324,6 +346,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 
 	event->sync_cookie = cookie;
 	event->to_tell = to_tell;
+	event->data_type = data_type;
 
 	switch (data_type) {
 	case FSNOTIFY_EVENT_FILE: {
@@ -340,12 +363,10 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		event->path.dentry = path->dentry;
 		event->path.mnt = path->mnt;
 		path_get(&event->path);
-		event->data_type = FSNOTIFY_EVENT_PATH;
 		break;
 	}
 	case FSNOTIFY_EVENT_INODE:
 		event->inode = data;
-		event->data_type = FSNOTIFY_EVENT_INODE;
 		break;
 	case FSNOTIFY_EVENT_NONE:
 		event->inode = NULL;
-- 
cgit v1.1


From 1201a5361b9bd6512ae01e6f2b7aa79d458cafb1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: replace an event on a list

fanotify would like to clone events already on its notification list, make
changes to the new event, and then replace the old event on the list with
the new event.  This patch implements the replace functionality of that
process.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index bc9470c..b493c37 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -287,6 +287,62 @@ static void initialize_event(struct fsnotify_event *event)
 	INIT_LIST_HEAD(&event->private_data_list);
 }
 
+/*
+ * Caller damn well better be holding whatever mutex is protecting the
+ * old_holder->event_list.
+ */
+int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
+			   struct fsnotify_event *new_event)
+{
+	struct fsnotify_event *old_event = old_holder->event;
+	struct fsnotify_event_holder *new_holder = NULL;
+
+	/*
+	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
+	 * Check if we expect to be able to use that holder.  If not alloc a new
+	 * holder.
+	 * For the overflow event it's possible that something will use the in
+	 * event holder before we get the lock so we may need to jump back and
+	 * alloc a new holder, this can't happen for most events...
+	 */
+	if (!list_empty(&new_event->holder.event_list)) {
+alloc_holder:
+		new_holder = fsnotify_alloc_event_holder();
+		if (!new_holder)
+			return -ENOMEM;
+	}
+
+	spin_lock(&old_event->lock);
+	spin_lock(&new_event->lock);
+
+	if (list_empty(&new_event->holder.event_list)) {
+		if (unlikely(new_holder))
+			fsnotify_destroy_event_holder(new_holder);
+		new_holder = &new_event->holder;
+	} else if (unlikely(!new_holder)) {
+		/* between the time we checked above and got the lock the in
+		 * event holder was used, go back and get a new one */
+		spin_unlock(&new_event->lock);
+		spin_unlock(&old_event->lock);
+		goto alloc_holder;
+	}
+
+	new_holder->event = new_event;
+	list_replace_init(&old_holder->event_list, &new_holder->event_list);
+
+	spin_unlock(&new_event->lock);
+	spin_unlock(&old_event->lock);
+
+	/* event == holder means we are referenced through the in event holder */
+	if (old_holder != &old_event->holder)
+		fsnotify_destroy_event_holder(old_holder);
+
+	fsnotify_get_event(new_event); /* on the list take reference */
+	fsnotify_put_event(old_event); /* off the list, drop reference */
+
+	return 0;
+}
+
 struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
 {
 	struct fsnotify_event *event;
-- 
cgit v1.1


From cac69dad32899c6f4c66bb4f9baf69b0d3c7d3d1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: lock annotation for event replacement

fsnotify_replace_event need to lock both the old and the new event.  This
causes lockdep to get all pissed off since it dosn't know this is safe.
It's safe in this case since the new event is impossible to be reached from
other places in the kernel.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/notification.c | 41 +++++++++++++----------------------------
 1 file changed, 13 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b493c37..dafd0b7 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -289,43 +289,28 @@ static void initialize_event(struct fsnotify_event *event)
 
 /*
  * Caller damn well better be holding whatever mutex is protecting the
- * old_holder->event_list.
+ * old_holder->event_list and the new_event must be a clean event which
+ * cannot be found anywhere else in the kernel.
  */
 int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
 			   struct fsnotify_event *new_event)
 {
 	struct fsnotify_event *old_event = old_holder->event;
-	struct fsnotify_event_holder *new_holder = NULL;
+	struct fsnotify_event_holder *new_holder = &new_event->holder;
+
+	enum event_spinlock_class {
+		SPINLOCK_OLD,
+		SPINLOCK_NEW,
+	};
 
 	/*
-	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
-	 * Check if we expect to be able to use that holder.  If not alloc a new
-	 * holder.
-	 * For the overflow event it's possible that something will use the in
-	 * event holder before we get the lock so we may need to jump back and
-	 * alloc a new holder, this can't happen for most events...
+	 * if the new_event's embedded holder is in use someone
+	 * screwed up and didn't give us a clean new event.
 	 */
-	if (!list_empty(&new_event->holder.event_list)) {
-alloc_holder:
-		new_holder = fsnotify_alloc_event_holder();
-		if (!new_holder)
-			return -ENOMEM;
-	}
+	BUG_ON(!list_empty(&new_holder->event_list));
 
-	spin_lock(&old_event->lock);
-	spin_lock(&new_event->lock);
-
-	if (list_empty(&new_event->holder.event_list)) {
-		if (unlikely(new_holder))
-			fsnotify_destroy_event_holder(new_holder);
-		new_holder = &new_event->holder;
-	} else if (unlikely(!new_holder)) {
-		/* between the time we checked above and got the lock the in
-		 * event holder was used, go back and get a new one */
-		spin_unlock(&new_event->lock);
-		spin_unlock(&old_event->lock);
-		goto alloc_holder;
-	}
+	spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
+	spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
 
 	new_holder->event = new_event;
 	list_replace_init(&old_holder->event_list, &new_holder->event_list);
-- 
cgit v1.1


From 74be0cc82835aecad332a29896b0f212ba893403 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: remove group_num altogether

The original fsnotify interface has a group-num which was intended to be
able to find a group after it was added.  I no longer think this is a
necessary thing to do and so we remove the group_num.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c      |  3 +--
 fs/notify/group.c                | 48 ++--------------------------------------
 fs/notify/inotify/inotify_user.c | 11 +--------
 3 files changed, 4 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index a213b83..1f46aea 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -433,8 +433,7 @@ static int __init dnotify_init(void)
 	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
 	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
 
-	dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
-					      0, &dnotify_fsnotify_ops);
+	dnotify_group = fsnotify_obtain_group(0, &dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
 		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 777ca82..62fb896 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -78,15 +78,6 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 }
 
 /*
- * Take a reference to a group so things found under the fsnotify_grp_mutex
- * can't get freed under us
- */
-static void fsnotify_get_group(struct fsnotify_group *group)
-{
-	atomic_inc(&group->refcnt);
-}
-
-/*
  * Final freeing of a group
  */
 void fsnotify_final_destroy_group(struct fsnotify_group *group)
@@ -171,40 +162,14 @@ void fsnotify_put_group(struct fsnotify_group *group)
 }
 
 /*
- * Simply run the fsnotify_groups list and find a group which matches
- * the given parameters.  If a group is found we take a reference to that
- * group.
- */
-static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
-						  const struct fsnotify_ops *ops)
-{
-	struct fsnotify_group *group_iter;
-	struct fsnotify_group *group = NULL;
-
-	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
-
-	list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
-		if (group_iter->group_num == group_num) {
-			if ((group_iter->mask == mask) &&
-			    (group_iter->ops == ops)) {
-				fsnotify_get_group(group_iter);
-				group = group_iter;
-			} else
-				group = ERR_PTR(-EEXIST);
-		}
-	}
-	return group;
-}
-
-/*
  * Either finds an existing group which matches the group_num, mask, and ops or
  * creates a new group and adds it to the global group list.  In either case we
  * take a reference for the group returned.
  */
-struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
+struct fsnotify_group *fsnotify_obtain_group(__u32 mask,
 					     const struct fsnotify_ops *ops)
 {
-	struct fsnotify_group *group, *tgroup;
+	struct fsnotify_group *group;
 
 	/* very low use, simpler locking if we just always alloc */
 	group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
@@ -214,7 +179,6 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	atomic_set(&group->refcnt, 1);
 
 	group->on_group_list = 0;
-	group->group_num = group_num;
 	group->mask = mask;
 
 	mutex_init(&group->notification_mutex);
@@ -230,14 +194,6 @@ struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
 	group->ops = ops;
 
 	mutex_lock(&fsnotify_grp_mutex);
-	tgroup = fsnotify_find_group(group_num, mask, ops);
-	if (tgroup) {
-		/* group already exists */
-		mutex_unlock(&fsnotify_grp_mutex);
-		/* destroy the new one we made */
-		fsnotify_put_group(group);
-		return tgroup;
-	}
 
 	/* group not found, add a new one */
 	list_add_rcu(&group->group_list, &fsnotify_groups);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cbe16df..cae317f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -51,12 +51,6 @@ int inotify_max_user_watches __read_mostly;
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 struct kmem_cache *event_priv_cachep __read_mostly;
 
-/*
- * When inotify registers a new group it increments this and uses that
- * value as an offset to set the fsnotify group "name" and priority.
- */
-static atomic_t inotify_grp_num;
-
 #ifdef CONFIG_SYSCTL
 
 #include <linux/sysctl.h>
@@ -700,11 +694,8 @@ retry:
 static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
 {
 	struct fsnotify_group *group;
-	unsigned int grp_num;
 
-	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
-	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
-	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
+	group = fsnotify_obtain_group(0, &inotify_fsnotify_ops);
 	if (IS_ERR(group))
 		return group;
 
-- 
cgit v1.1


From cd7752ce7cac5184ca35aecebffafae9662570bc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: fsnotify_obtain_group kzalloc cleanup

fsnotify_obtain_group uses kzalloc but then proceedes to set things to 0.
This patch just deletes those useless lines.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index 62fb896..934860e 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -178,17 +178,14 @@ struct fsnotify_group *fsnotify_obtain_group(__u32 mask,
 
 	atomic_set(&group->refcnt, 1);
 
-	group->on_group_list = 0;
 	group->mask = mask;
 
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
 	init_waitqueue_head(&group->notification_waitq);
-	group->q_len = 0;
 	group->max_events = UINT_MAX;
 
 	spin_lock_init(&group->mark_lock);
-	atomic_set(&group->num_marks, 0);
 	INIT_LIST_HEAD(&group->mark_entries);
 
 	group->ops = ops;
-- 
cgit v1.1


From ffab83402f01555a5fa32efb48a4dd0ce8d12ef5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: fsnotify_obtain_group should be fsnotify_alloc_group

fsnotify_obtain_group was intended to be able to find an already existing
group.  Nothing uses that functionality.  This just renames it to
fsnotify_alloc_group so it is clear what it is doing.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c      |  2 +-
 fs/notify/group.c                | 10 +++-------
 fs/notify/inotify/inotify_user.c |  2 +-
 3 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 1f46aea..51e4fe3 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -433,7 +433,7 @@ static int __init dnotify_init(void)
 	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
 	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
 
-	dnotify_group = fsnotify_obtain_group(0, &dnotify_fsnotify_ops);
+	dnotify_group = fsnotify_alloc_group(0, &dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
 		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 934860e..1d20d26 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -162,16 +162,13 @@ void fsnotify_put_group(struct fsnotify_group *group)
 }
 
 /*
- * Either finds an existing group which matches the group_num, mask, and ops or
- * creates a new group and adds it to the global group list.  In either case we
- * take a reference for the group returned.
+ * Create a new fsnotify_group and hold a reference for the group returned.
  */
-struct fsnotify_group *fsnotify_obtain_group(__u32 mask,
-					     const struct fsnotify_ops *ops)
+struct fsnotify_group *fsnotify_alloc_group(__u32 mask,
+					    const struct fsnotify_ops *ops)
 {
 	struct fsnotify_group *group;
 
-	/* very low use, simpler locking if we just always alloc */
 	group = kzalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
 	if (!group)
 		return ERR_PTR(-ENOMEM);
@@ -192,7 +189,6 @@ struct fsnotify_group *fsnotify_obtain_group(__u32 mask,
 
 	mutex_lock(&fsnotify_grp_mutex);
 
-	/* group not found, add a new one */
 	list_add_rcu(&group->group_list, &fsnotify_groups);
 	group->on_group_list = 1;
 	/* being on the fsnotify_groups list holds one num_marks */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index cae317f..25a2854 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -695,7 +695,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 {
 	struct fsnotify_group *group;
 
-	group = fsnotify_obtain_group(0, &inotify_fsnotify_ops);
+	group = fsnotify_alloc_group(0, &inotify_fsnotify_ops);
 	if (IS_ERR(group))
 		return group;
 
-- 
cgit v1.1


From 0d2e2a1d00d7d23e5bd9bb0935cde7c3d5835c56 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:22 -0500
Subject: fsnotify: drop mask argument from fsnotify_alloc_group

Nothing uses the mask argument to fsnotify_alloc_group.  This patch drops
that argument.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c      | 2 +-
 fs/notify/group.c                | 8 +-------
 fs/notify/inotify/inotify_user.c | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 51e4fe3..e0a847b 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -433,7 +433,7 @@ static int __init dnotify_init(void)
 	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
 	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
 
-	dnotify_group = fsnotify_alloc_group(0, &dnotify_fsnotify_ops);
+	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
 		panic("unable to allocate fsnotify group for dnotify\n");
 	return 0;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1d20d26..1657349 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -164,8 +164,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
  */
-struct fsnotify_group *fsnotify_alloc_group(__u32 mask,
-					    const struct fsnotify_ops *ops)
+struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 {
 	struct fsnotify_group *group;
 
@@ -175,8 +174,6 @@ struct fsnotify_group *fsnotify_alloc_group(__u32 mask,
 
 	atomic_set(&group->refcnt, 1);
 
-	group->mask = mask;
-
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
 	init_waitqueue_head(&group->notification_waitq);
@@ -196,8 +193,5 @@ struct fsnotify_group *fsnotify_alloc_group(__u32 mask,
 
 	mutex_unlock(&fsnotify_grp_mutex);
 
-	if (mask)
-		fsnotify_recalc_global_mask();
-
 	return group;
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 25a2854..a48d68a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -695,7 +695,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
 {
 	struct fsnotify_group *group;
 
-	group = fsnotify_alloc_group(0, &inotify_fsnotify_ops);
+	group = fsnotify_alloc_group(&inotify_fsnotify_ops);
 	if (IS_ERR(group))
 		return group;
 
-- 
cgit v1.1


From 19c2a0e1a2f60112c158342ba5f568f72b741c2c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: rename fsnotify_groups to fsnotify_inode_groups

Simple renaming patch.  fsnotify is about to support mount point listeners
so I am renaming fsnotify_groups and fsnotify_mask to indicate these are lists
used only for groups which have watches on inodes.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c |  6 +++---
 fs/notify/fsnotify.h |  8 ++++----
 fs/notify/group.c    | 30 +++++++++++++++++++-----------
 3 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 806beed..23b5cfb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -148,10 +148,10 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	if (list_empty(&fsnotify_groups))
+	if (list_empty(&fsnotify_inode_groups))
 		return;
 
-	if (!(test_mask & fsnotify_mask))
+	if (!(test_mask & fsnotify_inode_mask))
 		return;
 
 	if (!(test_mask & to_tell->i_fsnotify_mask))
@@ -162,7 +162,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	 * anything other than walk the list so it's crazy to pre-allocate.
 	 */
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
-	list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
+	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
 		if (test_mask & group->mask) {
 			if (!group->ops->should_send_event(group, to_tell, mask,
 							   data, data_is))
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 4dc2408..ec5aee4 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -8,10 +8,10 @@
 
 /* protects reads of fsnotify_groups */
 extern struct srcu_struct fsnotify_grp_srcu;
-/* all groups which receive fsnotify events */
-extern struct list_head fsnotify_groups;
-/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
-extern __u32 fsnotify_mask;
+/* all groups which receive inode fsnotify events */
+extern struct list_head fsnotify_inode_groups;
+/* all bitwise OR of all event types (FS_*) for all fsnotify_inode_groups */
+extern __u32 fsnotify_inode_mask;
 
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 1657349..c808097 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -33,9 +33,9 @@ static DEFINE_MUTEX(fsnotify_grp_mutex);
 /* protects reads while running the fsnotify_groups list */
 struct srcu_struct fsnotify_grp_srcu;
 /* all groups registered to receive filesystem notifications */
-LIST_HEAD(fsnotify_groups);
+LIST_HEAD(fsnotify_inode_groups);
 /* bitwise OR of all events (FS_*) interesting to some group on this system */
-__u32 fsnotify_mask;
+__u32 fsnotify_inode_mask;
 
 /*
  * When a new group registers or changes it's set of interesting events
@@ -48,10 +48,10 @@ void fsnotify_recalc_global_mask(void)
 	int idx;
 
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
-	list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
+	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
 		mask |= group->mask;
 	srcu_read_unlock(&fsnotify_grp_srcu, idx);
-	fsnotify_mask = mask;
+	fsnotify_inode_mask = mask;
 }
 
 /*
@@ -77,6 +77,17 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 		fsnotify_recalc_global_mask();
 }
 
+static void fsnotify_add_group(struct fsnotify_group *group)
+{
+	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+
+	group->on_inode_group_list = 1;
+	/* being on the fsnotify_groups list holds one num_marks */
+	atomic_inc(&group->num_marks);
+
+	list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
+}
+
 /*
  * Final freeing of a group
  */
@@ -118,9 +129,9 @@ static void __fsnotify_evict_group(struct fsnotify_group *group)
 {
 	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
 
-	if (group->on_group_list)
-		list_del_rcu(&group->group_list);
-	group->on_group_list = 0;
+	if (group->on_inode_group_list)
+		list_del_rcu(&group->inode_group_list);
+	group->on_inode_group_list = 0;
 }
 
 /*
@@ -186,10 +197,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
 	mutex_lock(&fsnotify_grp_mutex);
 
-	list_add_rcu(&group->group_list, &fsnotify_groups);
-	group->on_group_list = 1;
-	/* being on the fsnotify_groups list holds one num_marks */
-	atomic_inc(&group->num_marks);
+	fsnotify_add_group(group);
 
 	mutex_unlock(&fsnotify_grp_mutex);
 
-- 
cgit v1.1


From 36fddebaa8a9186d4f5817ab798a83400b2fb2e7 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: initialize the group->num_marks in a better place

Currently the comments say that group->num_marks is held because the group
is on the fsnotify_group list.  This isn't strictly the case, we really
just hold the num_marks for the life of the group (any time group->refcnt
is != 0)  This patch moves the initialization stuff and makes it clear when
it is really being held.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index c808097..656c534 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -82,9 +82,6 @@ static void fsnotify_add_group(struct fsnotify_group *group)
 	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
 
 	group->on_inode_group_list = 1;
-	/* being on the fsnotify_groups list holds one num_marks */
-	atomic_inc(&group->num_marks);
-
 	list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
 }
 
@@ -183,7 +180,14 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	if (!group)
 		return ERR_PTR(-ENOMEM);
 
+	/* set to 0 when there a no external references to this group */
 	atomic_set(&group->refcnt, 1);
+	/*
+	 * hits 0 when there are no external references AND no marks for
+	 * this group
+	 */
+	atomic_set(&group->num_marks, 1);
+
 
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
-- 
cgit v1.1


From 4ca763523e040dc61191d4866a82981a5d30a4e9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: add groups to fsnotify_inode_groups when registering inode
 watch

Currently all fsnotify groups are added immediately to the
fsnotify_inode_groups list upon creation.  This means, even groups with no
watches (common for audit) will be on the global tracking list and will
get checked for every event.  This patch adds groups to the global list on
when the first inode mark is added to the group.

Signed-of-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.h   |  2 ++
 fs/notify/group.c      | 18 ++++++++----------
 fs/notify/inode_mark.c |  7 +++++++
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index ec5aee4..5bd2241 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -16,6 +16,8 @@ extern __u32 fsnotify_inode_mask;
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+/* add a group to the inode group list */
+extern void fsnotify_add_inode_group(struct fsnotify_group *group);
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 656c534..34fccbd 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -77,12 +77,15 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 		fsnotify_recalc_global_mask();
 }
 
-static void fsnotify_add_group(struct fsnotify_group *group)
+void fsnotify_add_inode_group(struct fsnotify_group *group)
 {
-	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
+	mutex_lock(&fsnotify_grp_mutex);
 
+	if (!group->on_inode_group_list)
+		list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
 	group->on_inode_group_list = 1;
-	list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
+
+	mutex_unlock(&fsnotify_grp_mutex);
 }
 
 /*
@@ -188,22 +191,17 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	 */
 	atomic_set(&group->num_marks, 1);
 
-
 	mutex_init(&group->notification_mutex);
 	INIT_LIST_HEAD(&group->notification_list);
 	init_waitqueue_head(&group->notification_waitq);
 	group->max_events = UINT_MAX;
 
+	INIT_LIST_HEAD(&group->inode_group_list);
+
 	spin_lock_init(&group->mark_lock);
 	INIT_LIST_HEAD(&group->mark_entries);
 
 	group->ops = ops;
 
-	mutex_lock(&fsnotify_grp_mutex);
-
-	fsnotify_add_group(group);
-
-	mutex_unlock(&fsnotify_grp_mutex);
-
 	return group;
 }
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 7d2962e..a3230c4 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -323,6 +323,13 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 		return -EINVAL;
 
 	/*
+	 * if this group isn't being testing for inode type events we need
+	 * to start testing
+	 */
+	if (unlikely(list_empty(&group->inode_group_list)))
+		fsnotify_add_inode_group(group);
+
+	/*
 	 * LOCKING ORDER!!!!
 	 * entry->lock
 	 * group->mark_lock
-- 
cgit v1.1


From 7131485a93679ff9a543b74df280cfd119eb03ca Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: mount point listeners list and global mask

currently all of the notification systems implemented select which inodes
they care about and receive messages only about those inodes (or the
children of those inodes.)  This patch begins to flesh out fsnotify support
for the concept of listeners that want to hear notification for an inode
accessed below a given monut point.  This patch implements a second list
of fsnotify groups to hold these types of groups and a second global mask
to hold the events of interest for this type of group.

The reason we want a second group list and mask is because the inode based
notification should_send_event support which makes each group look for a mark
on the given inode.  With one nfsmount listener that means that every group would
have to take the inode->i_lock, look for their mark, not find one, and return
for every operation.   By seperating vfsmount from inode listeners only when
there is a inode listener will the inode groups have to look for their
mark and take the inode lock.  vfsmount listeners will have to grab the lock and
look for a mark but there should be fewer of them, and one vfsmount listener
won't cause the i_lock to be grabbed and released for every fsnotify group
on every io operation.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c   | 92 ++++++++++++++++++++++++++++++++++++++------------
 fs/notify/fsnotify.h   |  6 ++++
 fs/notify/group.c      | 33 +++++++++++++++---
 fs/notify/inode_mark.c |  7 ++++
 4 files changed, 112 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 23b5cfb..a61aaa7 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -21,6 +21,7 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/mount.h>
 #include <linux/srcu.h>
 
 #include <linux/fsnotify_backend.h>
@@ -134,6 +135,45 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
+static void send_to_group(__u32 mask,
+			  struct fsnotify_group *group,
+			  void *data, int data_is, const char *file_name,
+			  u32 cookie, struct fsnotify_event **event,
+			  struct inode *to_tell)
+{
+	if (!group->ops->should_send_event(group, to_tell, mask,
+					   data, data_is))
+		return;
+	if (!*event) {
+		*event = fsnotify_create_event(to_tell, mask, data,
+						data_is, file_name,
+						cookie, GFP_KERNEL);
+		/*
+		 * shit, we OOM'd and now we can't tell, maybe
+		 * someday someone else will want to do something
+		 * here
+		 */
+		if (!*event)
+			return;
+	}
+	group->ops->handle_event(group, *event);
+}
+
+static bool needed_by_vfsmount(__u32 test_mask, void *data, int data_is)
+{
+	struct path *path;
+
+	if (data_is == FSNOTIFY_EVENT_PATH)
+		path = (struct path *)data;
+	else if (data_is == FSNOTIFY_EVENT_FILE)
+		path = &((struct file *)data)->f_path;
+	else
+		return false;
+
+	/* hook in this when mnt->mnt_fsnotify_mask is defined */
+	/* return (test_mask & path->mnt->mnt_fsnotify_mask); */
+	return false;
+}
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
@@ -148,38 +188,46 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	if (list_empty(&fsnotify_inode_groups))
-		return;
+	/* if no fsnotify listeners, nothing to do */
+	if (list_empty(&fsnotify_inode_groups) &&
+	    list_empty(&fsnotify_vfsmount_groups))
+                return;
+ 
+	/* if none of the directed listeners or vfsmount listeners care */
+	if (!(test_mask & fsnotify_inode_mask) &&
+	    !(test_mask & fsnotify_vfsmount_mask))
+                return;
+ 
+	/* if this inode's directed listeners don't care and nothing on the vfsmount
+	 * listeners list cares, nothing to do */
+	if (!(test_mask & to_tell->i_fsnotify_mask) &&
+	    !needed_by_vfsmount(test_mask, data, data_is))
+                return;
 
-	if (!(test_mask & fsnotify_inode_mask))
-		return;
-
-	if (!(test_mask & to_tell->i_fsnotify_mask))
-		return;
 	/*
 	 * SRCU!!  the groups list is very very much read only and the path is
 	 * very hot.  The VAST majority of events are not going to need to do
 	 * anything other than walk the list so it's crazy to pre-allocate.
 	 */
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
-	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
-		if (test_mask & group->mask) {
-			if (!group->ops->should_send_event(group, to_tell, mask,
-							   data, data_is))
-				continue;
-			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data,
-							      data_is, file_name, cookie,
-							      GFP_KERNEL);
-				/* shit, we OOM'd and now we can't tell, maybe
-				 * someday someone else will want to do something
-				 * here */
-				if (!event)
-					break;
+
+	if (test_mask & to_tell->i_fsnotify_mask) {
+		list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
+			if (test_mask & group->mask) {
+				send_to_group(mask, group, data, data_is,
+					      file_name, cookie, &event, to_tell);
 			}
-			group->ops->handle_event(group, event);
 		}
 	}
+	if (needed_by_vfsmount(test_mask, data, data_is)) {
+		list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
+			if (test_mask & group->mask) {
+				send_to_group(mask, group, data, data_is,
+					      file_name, cookie, &event, to_tell);
+			}
+		}
+	}
+
 	srcu_read_unlock(&fsnotify_grp_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 5bd2241..2ba5915 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -10,14 +10,20 @@
 extern struct srcu_struct fsnotify_grp_srcu;
 /* all groups which receive inode fsnotify events */
 extern struct list_head fsnotify_inode_groups;
+/* all groups which receive vfsmount fsnotify events */
+extern struct list_head fsnotify_vfsmount_groups;
 /* all bitwise OR of all event types (FS_*) for all fsnotify_inode_groups */
 extern __u32 fsnotify_inode_mask;
+/* all bitwise OR of all event types (FS_*) for all fsnotify_vfsmount_groups */
+extern __u32 fsnotify_vfsmount_mask;
 
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
 /* add a group to the inode group list */
 extern void fsnotify_add_inode_group(struct fsnotify_group *group);
+/* add a group to the vfsmount group list */
+extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 34fccbd..aa4654f 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -32,10 +32,14 @@
 static DEFINE_MUTEX(fsnotify_grp_mutex);
 /* protects reads while running the fsnotify_groups list */
 struct srcu_struct fsnotify_grp_srcu;
-/* all groups registered to receive filesystem notifications */
+/* all groups registered to receive inode filesystem notifications */
 LIST_HEAD(fsnotify_inode_groups);
+/* all groups registered to receive mount point filesystem notifications */
+LIST_HEAD(fsnotify_vfsmount_groups);
 /* bitwise OR of all events (FS_*) interesting to some group on this system */
 __u32 fsnotify_inode_mask;
+/* bitwise OR of all events (FS_*) interesting to some group on this system */
+__u32 fsnotify_vfsmount_mask;
 
 /*
  * When a new group registers or changes it's set of interesting events
@@ -44,14 +48,20 @@ __u32 fsnotify_inode_mask;
 void fsnotify_recalc_global_mask(void)
 {
 	struct fsnotify_group *group;
-	__u32 mask = 0;
+	__u32 inode_mask = 0;
+	__u32 vfsmount_mask = 0;
 	int idx;
 
 	idx = srcu_read_lock(&fsnotify_grp_srcu);
 	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
-		mask |= group->mask;
+		inode_mask |= group->mask;
+	list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
+		vfsmount_mask |= group->mask;
+		
 	srcu_read_unlock(&fsnotify_grp_srcu, idx);
-	fsnotify_inode_mask = mask;
+
+	fsnotify_inode_mask = inode_mask;
+	fsnotify_vfsmount_mask = vfsmount_mask;
 }
 
 /*
@@ -77,6 +87,17 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 		fsnotify_recalc_global_mask();
 }
 
+void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
+{
+	mutex_lock(&fsnotify_grp_mutex);
+
+	if (!group->on_vfsmount_group_list)
+		list_add_tail_rcu(&group->vfsmount_group_list, &fsnotify_vfsmount_groups);
+	group->on_vfsmount_group_list = 1;
+
+	mutex_unlock(&fsnotify_grp_mutex);
+}
+
 void fsnotify_add_inode_group(struct fsnotify_group *group)
 {
 	mutex_lock(&fsnotify_grp_mutex);
@@ -132,6 +153,9 @@ static void __fsnotify_evict_group(struct fsnotify_group *group)
 	if (group->on_inode_group_list)
 		list_del_rcu(&group->inode_group_list);
 	group->on_inode_group_list = 0;
+	if (group->on_vfsmount_group_list)
+		list_del_rcu(&group->vfsmount_group_list);
+	group->on_vfsmount_group_list = 0;
 }
 
 /*
@@ -197,6 +221,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	group->max_events = UINT_MAX;
 
 	INIT_LIST_HEAD(&group->inode_group_list);
+	INIT_LIST_HEAD(&group->vfsmount_group_list);
 
 	spin_lock_init(&group->mark_lock);
 	INIT_LIST_HEAD(&group->mark_entries);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index a3230c4..beffebb 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -328,6 +328,13 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	 */
 	if (unlikely(list_empty(&group->inode_group_list)))
 		fsnotify_add_inode_group(group);
+	/*
+	 * XXX This is where we could also do the fsnotify_add_vfsmount_group
+	 * if we are setting and vfsmount mark....
+
+	if (unlikely(list_empty(&group->vfsmount_group_list)))
+		fsnotify_add_vfsmount_group(group);
+	 */
 
 	/*
 	 * LOCKING ORDER!!!!
-- 
cgit v1.1


From 3a9fb89f4cd04c23e16397befba92efb5d989b74 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: include vfsmount in should_send_event when appropriate

To ensure that a group will not duplicate events when it receives it based
on the vfsmount and the inode should_send_event test we should distinguish
those two cases.  We pass a vfsmount to this function so groups can make
their own determinations.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  4 ++--
 fs/notify/fsnotify.c                 | 39 ++++++++++++++++++------------------
 fs/notify/inotify/inotify_fsnotify.c |  3 ++-
 3 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e0a847b..9eddafa 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -133,8 +133,8 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  * userspace notification for that pair.
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
-				      struct inode *inode, __u32 mask,
-				      void *data, int data_type)
+				      struct inode *inode, struct vfsmount *mnt,
+				      __u32 mask, void *data, int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index a61aaa7..78c440c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -135,13 +135,12 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
-static void send_to_group(__u32 mask,
-			  struct fsnotify_group *group,
-			  void *data, int data_is, const char *file_name,
-			  u32 cookie, struct fsnotify_event **event,
-			  struct inode *to_tell)
+static void send_to_group(struct fsnotify_group *group, struct inode *to_tell,
+			  struct vfsmount *mnt, __u32 mask, void *data,
+			  int data_is, u32 cookie, const char *file_name,
+			  struct fsnotify_event **event)
 {
-	if (!group->ops->should_send_event(group, to_tell, mask,
+	if (!group->ops->should_send_event(group, to_tell, mnt, mask,
 					   data, data_is))
 		return;
 	if (!*event) {
@@ -159,15 +158,9 @@ static void send_to_group(__u32 mask,
 	group->ops->handle_event(group, *event);
 }
 
-static bool needed_by_vfsmount(__u32 test_mask, void *data, int data_is)
+static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
 {
-	struct path *path;
-
-	if (data_is == FSNOTIFY_EVENT_PATH)
-		path = (struct path *)data;
-	else if (data_is == FSNOTIFY_EVENT_FILE)
-		path = &((struct file *)data)->f_path;
-	else
+	if (!mnt)
 		return false;
 
 	/* hook in this when mnt->mnt_fsnotify_mask is defined */
@@ -184,6 +177,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
+	struct vfsmount *mnt = NULL;
 	int idx;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
@@ -198,10 +192,15 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	    !(test_mask & fsnotify_vfsmount_mask))
                 return;
  
+	if (data_is == FSNOTIFY_EVENT_PATH)
+		mnt = ((struct path *)data)->mnt;
+	else if (data_is == FSNOTIFY_EVENT_FILE)
+		mnt = ((struct file *)data)->f_path.mnt;
+
 	/* if this inode's directed listeners don't care and nothing on the vfsmount
 	 * listeners list cares, nothing to do */
 	if (!(test_mask & to_tell->i_fsnotify_mask) &&
-	    !needed_by_vfsmount(test_mask, data, data_is))
+	    !needed_by_vfsmount(test_mask, mnt))
                 return;
 
 	/*
@@ -214,16 +213,16 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	if (test_mask & to_tell->i_fsnotify_mask) {
 		list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
 			if (test_mask & group->mask) {
-				send_to_group(mask, group, data, data_is,
-					      file_name, cookie, &event, to_tell);
+				send_to_group(group, to_tell, NULL, mask, data, data_is,
+					      cookie, file_name, &event);
 			}
 		}
 	}
-	if (needed_by_vfsmount(test_mask, data, data_is)) {
+	if (needed_by_vfsmount(test_mask, mnt)) {
 		list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
 			if (test_mask & group->mask) {
-				send_to_group(mask, group, data, data_is,
-					      file_name, cookie, &event, to_tell);
+				send_to_group(group, to_tell, mnt, mask, data, data_is,
+					      cookie, file_name, &event);
 			}
 		}
 	}
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 0a0f5d0..8075ae7 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -141,7 +141,8 @@ static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnot
 }
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-				      __u32 mask, void *data, int data_type)
+				      struct vfsmount *mnt, __u32 mask, void *data,
+				      int data_type)
 {
 	struct fsnotify_mark_entry *entry;
 	bool send;
-- 
cgit v1.1


From 2823e04de4f1a49087b58ff2bb8f61361ffd9321 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:23 -0500
Subject: fsnotify: put inode specific fields in an fsnotify_mark in a union

The addition of marks on vfs mounts will be simplified if the inode
specific parts of a mark and the vfsmnt specific parts of a mark are
actually in a union so naming can be easy.  This patch just implements the
inode struct and the union.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  4 ++--
 fs/notify/inode_mark.c               | 28 ++++++++++++++--------------
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/inotify/inotify_user.c     | 10 +++++-----
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 9eddafa..fc3a9dc 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -70,8 +70,8 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
 	if (old_mask == new_mask)
 		return;
 
-	if (entry->inode)
-		fsnotify_recalc_inode_mask(entry->inode);
+	if (entry->i.inode)
+		fsnotify_recalc_inode_mask(entry->i.inode);
 }
 
 /*
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index beffebb..6731408 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -117,7 +117,7 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i.i_list)
 		new_mask |= entry->mask;
 	inode->i_fsnotify_mask = new_mask;
 }
@@ -148,7 +148,7 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	spin_lock(&entry->lock);
 
 	group = entry->group;
-	inode = entry->inode;
+	inode = entry->i.inode;
 
 	BUG_ON(group && !inode);
 	BUG_ON(!group && inode);
@@ -165,8 +165,8 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	hlist_del_init(&entry->i_list);
-	entry->inode = NULL;
+	hlist_del_init(&entry->i.i_list);
+	entry->i.inode = NULL;
 
 	list_del_init(&entry->g_list);
 	entry->group = NULL;
@@ -248,14 +248,14 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
-		list_add(&entry->free_i_list, &free_list);
-		hlist_del_init(&entry->i_list);
+	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i.i_list) {
+		list_add(&entry->i.free_i_list, &free_list);
+		hlist_del_init(&entry->i.i_list);
 		fsnotify_get_mark(entry);
 	}
 	spin_unlock(&inode->i_lock);
 
-	list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
+	list_for_each_entry_safe(entry, lentry, &free_list, i.free_i_list) {
 		fsnotify_destroy_mark_by_entry(entry);
 		fsnotify_put_mark(entry);
 	}
@@ -273,7 +273,7 @@ struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *grou
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i.i_list) {
 		if (entry->group == group) {
 			fsnotify_get_mark(entry);
 			return entry;
@@ -285,7 +285,7 @@ struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *grou
 void fsnotify_duplicate_mark(struct fsnotify_mark_entry *new, struct fsnotify_mark_entry *old)
 {
 	assert_spin_locked(&old->lock);
-	new->inode = old->inode;
+	new->i.inode = old->i.inode;
 	new->group = old->group;
 	new->mask = old->mask;
 	new->free_mark = old->free_mark;
@@ -299,10 +299,10 @@ void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
 {
 	spin_lock_init(&entry->lock);
 	atomic_set(&entry->refcnt, 1);
-	INIT_HLIST_NODE(&entry->i_list);
+	INIT_HLIST_NODE(&entry->i.i_list);
 	entry->group = NULL;
 	entry->mask = 0;
-	entry->inode = NULL;
+	entry->i.inode = NULL;
 	entry->free_mark = free_mark;
 }
 
@@ -350,9 +350,9 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 		lentry = fsnotify_find_mark_entry(group, inode);
 	if (!lentry) {
 		entry->group = group;
-		entry->inode = inode;
+		entry->i.inode = inode;
 
-		hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
+		hlist_add_head(&entry->i.i_list, &inode->i_fsnotify_mark_entries);
 		list_add(&entry->g_list, &group->mark_entries);
 
 		fsnotify_get_mark(entry); /* for i_list and g_list */
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 8075ae7..3edb51c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -193,7 +193,7 @@ static int idr_callback(int id, void *p, void *data)
 	 */
 	if (entry)
 		printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
-			entry->group, entry->inode, ientry->wd);
+			entry->group, entry->i.inode, ientry->wd);
 	return 0;
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a48d68a..4b1587f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -445,7 +445,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (wd == -1) {
 		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
 		goto out;
 	}
 
@@ -454,7 +454,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (unlikely(!found_ientry)) {
 		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
 		goto out;
 	}
 
@@ -468,9 +468,9 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 			"entry->inode=%p found_ientry=%p found_ientry->wd=%d "
 			"found_ientry->group=%p found_ientry->inode=%p\n",
 			__func__, ientry, ientry->wd, ientry->fsn_entry.group,
-			ientry->fsn_entry.inode, found_ientry, found_ientry->wd,
+			ientry->fsn_entry.i.inode, found_ientry, found_ientry->wd,
 			found_ientry->fsn_entry.group,
-			found_ientry->fsn_entry.inode);
+			found_ientry->fsn_entry.i.inode);
 		goto out;
 	}
 
@@ -482,7 +482,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (unlikely(atomic_read(&ientry->fsn_entry.refcnt) < 3)) {
 		printk(KERN_ERR "%s: ientry=%p ientry->wd=%d ientry->group=%p"
 			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.inode);
+			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
 		/* we can't really recover with bad ref cnting.. */
 		BUG();
 	}
-- 
cgit v1.1


From 098cf2fc77ee190c92bf9d08d69a13305f2487ec Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: fsnotify: add flags to fsnotify_mark_entries

To differentiate between inode and vfsmount (or other future) types of
marks we add a flags field and set the inode bit on inode marks (the only
currently supported type of mark)

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 6731408..b000658 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -322,6 +322,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 	if (unlikely(!inode))
 		return -EINVAL;
 
+	entry->flags = FSNOTIFY_MARK_FLAG_INODE;
+
 	/*
 	 * if this group isn't being testing for inode type events we need
 	 * to start testing
-- 
cgit v1.1


From 72acc854427948efed7a83da27f7dc3239ac9afc Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: fsnotify: kill FSNOTIFY_EVENT_FILE

Some fsnotify operations send a struct file.  This is more information than
we technically need.  We instead send a struct path in all cases instead of
sometimes a path and sometimes a file.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c     | 12 +++++-------
 fs/notify/notification.c |  9 ---------
 2 files changed, 5 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 78c440c..60e84fd 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -79,15 +79,15 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
+void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
 	bool send = false;
 	bool should_update_children = false;
 
-	if (file)
-		dentry = file->f_path.dentry;
+	if (!dentry)
+		dentry = path->dentry;
 
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 		return;
@@ -119,8 +119,8 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		if (file)
-			fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
+		if (path)
+			fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
 				 dentry->d_name.name, 0);
 		else
 			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
@@ -194,8 +194,6 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
  
 	if (data_is == FSNOTIFY_EVENT_PATH)
 		mnt = ((struct path *)data)->mnt;
-	else if (data_is == FSNOTIFY_EVENT_FILE)
-		mnt = ((struct file *)data)->f_path.mnt;
 
 	/* if this inode's directed listeners don't care and nothing on the vfsmount
 	 * listeners list cares, nothing to do */
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index dafd0b7..066f1f9 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -390,15 +390,6 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	event->data_type = data_type;
 
 	switch (data_type) {
-	case FSNOTIFY_EVENT_FILE: {
-		struct file *file = data;
-		struct path *path = &file->f_path;
-		event->path.dentry = path->dentry;
-		event->path.mnt = path->mnt;
-		path_get(&event->path);
-		event->data_type = FSNOTIFY_EVENT_PATH;
-		break;
-	}
 	case FSNOTIFY_EVENT_PATH: {
 		struct path *path = data;
 		event->path.dentry = path->dentry;
-- 
cgit v1.1


From e61ce86737b4d60521e4e71f9892fe4bdcfb688b Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: fsnotify: rename fsnotify_mark_entry to just fsnotify_mark

The name is long and it serves no real purpose.  So rename
fsnotify_mark_entry to just fsnotify_mark.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/inode.c                           |  2 +-
 fs/notify/dnotify/dnotify.c          | 24 +++++++++---------
 fs/notify/group.c                    |  8 +++---
 fs/notify/inode_mark.c               | 48 ++++++++++++++++++------------------
 fs/notify/inotify/inotify.h          |  6 ++---
 fs/notify/inotify/inotify_fsnotify.c |  8 +++---
 fs/notify/inotify/inotify_user.c     |  8 +++---
 7 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 8e1bee9..a2da778 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -264,7 +264,7 @@ void inode_init_once(struct inode *inode)
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
 	i_size_ordered_init(inode);
 #ifdef CONFIG_FSNOTIFY
-	INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
+	INIT_HLIST_HEAD(&inode->i_fsnotify_marks);
 #endif
 }
 EXPORT_SYMBOL(inode_init_once);
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index fc3a9dc..e6edae6 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -34,12 +34,12 @@ static struct fsnotify_group *dnotify_group __read_mostly;
 static DEFINE_MUTEX(dnotify_mark_mutex);
 
 /*
- * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
+ * dnotify will attach one of these to each inode (i_fsnotify_marks) which
  * is being watched by dnotify.  If multiple userspace applications are watching
  * the same directory with dnotify their information is chained in dn
  */
 struct dnotify_mark_entry {
-	struct fsnotify_mark_entry fsn_entry;
+	struct fsnotify_mark fsn_entry;
 	struct dnotify_struct *dn;
 };
 
@@ -51,7 +51,7 @@ struct dnotify_mark_entry {
  * it calls the fsnotify function so it can update the set of all events relevant
  * to this inode.
  */
-static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
+static void dnotify_recalc_inode_mask(struct fsnotify_mark *entry)
 {
 	__u32 new_mask, old_mask;
 	struct dnotify_struct *dn;
@@ -85,7 +85,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
 static int dnotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_event *event)
 {
-	struct fsnotify_mark_entry *entry = NULL;
+	struct fsnotify_mark *entry = NULL;
 	struct dnotify_mark_entry *dnentry;
 	struct inode *to_tell;
 	struct dnotify_struct *dn;
@@ -136,7 +136,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct inode *inode, struct vfsmount *mnt,
 				      __u32 mask, void *data, int data_type)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	bool send;
 
 	/* !dir_notify_enable should never get here, don't waste time checking
@@ -163,7 +163,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	return send;
 }
 
-static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
+static void dnotify_free_mark(struct fsnotify_mark *entry)
 {
 	struct dnotify_mark_entry *dnentry = container_of(entry,
 							  struct dnotify_mark_entry,
@@ -184,14 +184,14 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
 
 /*
  * Called every time a file is closed.  Looks first for a dnotify mark on the
- * inode.  If one is found run all of the ->dn entries attached to that
+ * inode.  If one is found run all of the ->dn structures attached to that
  * mark for one relevant to this process closing the file and remove that
  * dnotify_struct.  If that was the last dnotify_struct also remove the
- * fsnotify_mark_entry.
+ * fsnotify_mark.
  */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct dnotify_mark_entry *dnentry;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
@@ -260,7 +260,7 @@ static __u32 convert_arg(unsigned long arg)
 
 /*
  * If multiple processes watch the same inode with dnotify there is only one
- * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
+ * dnotify mark in inode->i_fsnotify_marks but we chain a dnotify_struct
  * onto that mark.  This function either attaches the new dnotify_struct onto
  * that list, or it |= the mask onto an existing dnofiy_struct.
  */
@@ -298,7 +298,7 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
 	struct dnotify_mark_entry *new_dnentry, *dnentry;
-	struct fsnotify_mark_entry *new_entry, *entry;
+	struct fsnotify_mark *new_entry, *entry;
 	struct dnotify_struct *dn;
 	struct inode *inode;
 	fl_owner_t id = current->files;
@@ -378,7 +378,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	/* if (f != filp) means that we lost a race and another task/thread
 	 * actually closed the fd we are still playing with before we grabbed
 	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
-	 * only time we clean up the mark entries we need to get our mark off
+	 * only time we clean up the marks we need to get our mark off
 	 * the list. */
 	if (f != filp) {
 		/* if we added ourselves, shoot ourselves, it's possible that
diff --git a/fs/notify/group.c b/fs/notify/group.c
index aa4654f..b70e7d2 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -74,10 +74,10 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 {
 	__u32 mask = 0;
 	__u32 old_mask = group->mask;
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 
 	spin_lock(&group->mark_lock);
-	list_for_each_entry(entry, &group->mark_entries, g_list)
+	list_for_each_entry(entry, &group->marks_list, g_list)
 		mask |= entry->mask;
 	spin_unlock(&group->mark_lock);
 
@@ -133,7 +133,7 @@ void fsnotify_final_destroy_group(struct fsnotify_group *group)
  */
 static void fsnotify_destroy_group(struct fsnotify_group *group)
 {
-	/* clear all inode mark entries for this group */
+	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
 
 	/* past the point of no return, matches the initial value of 1 */
@@ -224,7 +224,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	INIT_LIST_HEAD(&group->vfsmount_group_list);
 
 	spin_lock_init(&group->mark_lock);
-	INIT_LIST_HEAD(&group->mark_entries);
+	INIT_LIST_HEAD(&group->marks_list);
 
 	group->ops = ops;
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index b000658..7e69f6b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -38,12 +38,12 @@
  * that lock to dereference either of these things (they could be NULL even with
  * the lock)
  *
- * group->mark_lock protects the mark_entries list anchored inside a given group
+ * group->mark_lock protects the marks_list anchored inside a given group
  * and each entry is hooked via the g_list.  It also sorta protects the
  * free_g_list, which when used is anchored by a private list on the stack of the
  * task which held the group->mark_lock.
  *
- * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
+ * inode->i_lock protects the i_fsnotify_marks list anchored inside a
  * given inode and each entry is hooked via the i_list. (and sorta the
  * free_i_list)
  *
@@ -61,7 +61,7 @@
  *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  *
  * Worst case we are given an inode and need to clean up all the marks on that
- * inode.  We take i_lock and walk the i_fsnotify_mark_entries safely.  For each
+ * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
  * mark on the list we take a reference (so the mark can't disappear under us).
  * We remove that mark form the inode's list of marks and we add this mark to a
  * private list anchored on the stack using i_free_list;  At this point we no
@@ -95,12 +95,12 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
+void fsnotify_get_mark(struct fsnotify_mark *entry)
 {
 	atomic_inc(&entry->refcnt);
 }
 
-void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
+void fsnotify_put_mark(struct fsnotify_mark *entry)
 {
 	if (atomic_dec_and_test(&entry->refcnt))
 		entry->free_mark(entry);
@@ -111,13 +111,13 @@ void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
  */
 static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i.i_list)
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_marks, i.i_list)
 		new_mask |= entry->mask;
 	inode->i_fsnotify_mask = new_mask;
 }
@@ -140,7 +140,7 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the entry->lock
  */
-void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
+void fsnotify_destroy_mark_by_entry(struct fsnotify_mark *entry)
 {
 	struct fsnotify_group *group;
 	struct inode *inode;
@@ -174,7 +174,7 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
 	fsnotify_put_mark(entry); /* for i_list and g_list */
 
 	/*
-	 * this mark is now off the inode->i_fsnotify_mark_entries list and we
+	 * this mark is now off the inode->i_fsnotify_marks list and we
 	 * hold the inode->i_lock, so this is the perfect time to update the
 	 * inode->i_fsnotify_mask
 	 */
@@ -221,11 +221,11 @@ void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
  */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 {
-	struct fsnotify_mark_entry *lentry, *entry;
+	struct fsnotify_mark *lentry, *entry;
 	LIST_HEAD(free_list);
 
 	spin_lock(&group->mark_lock);
-	list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
+	list_for_each_entry_safe(entry, lentry, &group->marks_list, g_list) {
 		list_add(&entry->free_g_list, &free_list);
 		list_del_init(&entry->g_list);
 		fsnotify_get_mark(entry);
@@ -243,12 +243,12 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
  */
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry, *lentry;
+	struct fsnotify_mark *entry, *lentry;
 	struct hlist_node *pos, *n;
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i.i_list) {
+	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_marks, i.i_list) {
 		list_add(&entry->i.free_i_list, &free_list);
 		hlist_del_init(&entry->i.i_list);
 		fsnotify_get_mark(entry);
@@ -265,15 +265,15 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
-struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
-						     struct inode *inode)
+struct fsnotify_mark *fsnotify_find_mark_entry(struct fsnotify_group *group,
+					       struct inode *inode)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct hlist_node *pos;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i.i_list) {
+	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_marks, i.i_list) {
 		if (entry->group == group) {
 			fsnotify_get_mark(entry);
 			return entry;
@@ -282,7 +282,7 @@ struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *grou
 	return NULL;
 }
 
-void fsnotify_duplicate_mark(struct fsnotify_mark_entry *new, struct fsnotify_mark_entry *old)
+void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 {
 	assert_spin_locked(&old->lock);
 	new->i.inode = old->i.inode;
@@ -294,8 +294,8 @@ void fsnotify_duplicate_mark(struct fsnotify_mark_entry *new, struct fsnotify_ma
 /*
  * Nothing fancy, just initialize lists and locks and counters.
  */
-void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
-			void (*free_mark)(struct fsnotify_mark_entry *entry))
+void fsnotify_init_mark(struct fsnotify_mark *entry,
+			void (*free_mark)(struct fsnotify_mark *entry))
 {
 	spin_lock_init(&entry->lock);
 	atomic_set(&entry->refcnt, 1);
@@ -311,11 +311,11 @@ void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.
  */
-int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
+int fsnotify_add_mark(struct fsnotify_mark *entry,
 		      struct fsnotify_group *group, struct inode *inode,
 		      int allow_dups)
 {
-	struct fsnotify_mark_entry *lentry = NULL;
+	struct fsnotify_mark *lentry = NULL;
 	int ret = 0;
 
 	inode = igrab(inode);
@@ -354,8 +354,8 @@ int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
 		entry->group = group;
 		entry->i.inode = inode;
 
-		hlist_add_head(&entry->i.i_list, &inode->i_fsnotify_mark_entries);
-		list_add(&entry->g_list, &group->mark_entries);
+		hlist_add_head(&entry->i.i_list, &inode->i_fsnotify_marks);
+		list_add(&entry->g_list, &group->marks_list);
 
 		fsnotify_get_mark(entry); /* for i_list and g_list */
 
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index f234f3a..07be6df 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -10,12 +10,12 @@ struct inotify_event_private_data {
 };
 
 struct inotify_inode_mark_entry {
-	/* fsnotify_mark_entry MUST be the first thing */
-	struct fsnotify_mark_entry fsn_entry;
+	/* fsnotify_mark MUST be the first thing */
+	struct fsnotify_mark fsn_entry;
 	int wd;
 };
 
-extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
 					   struct fsnotify_group *group);
 extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 3edb51c..f33a9bd 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -88,7 +88,7 @@ static int inotify_merge(struct list_head *list, struct fsnotify_event *event)
 
 static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct inotify_inode_mark_entry *ientry;
 	struct inode *to_tell;
 	struct inotify_event_private_data *event_priv;
@@ -135,7 +135,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	return ret;
 }
 
-static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
+static void inotify_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
 {
 	inotify_ignored_and_remove_idr(entry, group);
 }
@@ -144,7 +144,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 				      struct vfsmount *mnt, __u32 mask, void *data,
 				      int data_type)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	bool send;
 
 	spin_lock(&inode->i_lock);
@@ -171,7 +171,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
  */
 static int idr_callback(int id, void *p, void *data)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct inotify_inode_mark_entry *ientry;
 	static bool warned = false;
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 4b1587f..7be5dcf 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -386,7 +386,7 @@ static struct inotify_inode_mark_entry *inotify_idr_find_locked(struct fsnotify_
 
 	ientry = idr_find(idr, wd);
 	if (ientry) {
-		struct fsnotify_mark_entry *fsn_entry = &ientry->fsn_entry;
+		struct fsnotify_mark *fsn_entry = &ientry->fsn_entry;
 
 		fsnotify_get_mark(fsn_entry);
 		/* One ref for being in the idr, one ref we just took */
@@ -499,7 +499,7 @@ out:
 /*
  * Send IN_IGNORED for this wd, remove this wd from the idr.
  */
-void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
+void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
 				    struct fsnotify_group *group)
 {
 	struct inotify_inode_mark_entry *ientry;
@@ -541,7 +541,7 @@ skip_send_ignore:
 }
 
 /* ding dong the mark is dead */
-static void inotify_free_mark(struct fsnotify_mark_entry *entry)
+static void inotify_free_mark(struct fsnotify_mark *entry)
 {
 	struct inotify_inode_mark_entry *ientry;
 
@@ -554,7 +554,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 					 struct inode *inode,
 					 u32 arg)
 {
-	struct fsnotify_mark_entry *entry;
+	struct fsnotify_mark *entry;
 	struct inotify_inode_mark_entry *ientry;
 	__u32 old_mask, new_mask;
 	__u32 mask;
-- 
cgit v1.1


From d07754412f9cdc2f4a99318d5ee81ace6715ea99 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: fsnotify: rename fsnotify_find_mark_entry to fsnotify_find_mark

the _entry portion of fsnotify functions is useless.  Drop it.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 14 +++++++-------
 fs/notify/inode_mark.c               | 14 +++++++-------
 fs/notify/inotify/inotify_fsnotify.c |  4 ++--
 fs/notify/inotify/inotify_user.c     |  6 +++---
 4 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e6edae6..b202bc5 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -96,7 +96,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 	to_tell = event->to_tell;
 
 	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark_entry(group, to_tell);
+	entry = fsnotify_find_mark(group, to_tell);
 	spin_unlock(&to_tell->i_lock);
 
 	/* unlikely since we alreay passed dnotify_should_send_event() */
@@ -148,7 +148,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 		return false;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
+	entry = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
 
 	/* no mark means no dnotify watch */
@@ -158,7 +158,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	mask = (mask & ~FS_EVENT_ON_CHILD);
 	send = (mask & entry->mask);
 
-	fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
+	fsnotify_put_mark(entry); /* matches fsnotify_find_mark */
 
 	return send;
 }
@@ -202,7 +202,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 		return;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	entry = fsnotify_find_mark(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
 	if (!entry)
 		return;
@@ -226,7 +226,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
 	if (dnentry->dn == NULL)
-		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_destroy_mark(entry);
 
 	fsnotify_recalc_group_mask(dnotify_group);
 
@@ -357,7 +357,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 
 	/* add the new_entry or find an old one. */
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(dnotify_group, inode);
+	entry = fsnotify_find_mark(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
 	if (entry) {
 		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
@@ -414,7 +414,7 @@ out:
 	spin_unlock(&entry->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_destroy_mark(entry);
 
 	fsnotify_recalc_group_mask(dnotify_group);
 
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 7e69f6b..01c4263 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -56,7 +56,7 @@
  * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
  * - The inode is being evicted from cache. (fsnotify_inode_delete)
  * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
- * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark_by_entry)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
  * - The fsnotify_group associated with the mark is going away and all such marks
  *   need to be cleaned up. (fsnotify_clear_marks_by_group)
  *
@@ -140,7 +140,7 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
  * The caller had better be holding a reference to this mark so we don't actually
  * do the final put under the entry->lock
  */
-void fsnotify_destroy_mark_by_entry(struct fsnotify_mark *entry)
+void fsnotify_destroy_mark(struct fsnotify_mark *entry)
 {
 	struct fsnotify_group *group;
 	struct inode *inode;
@@ -233,7 +233,7 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 	spin_unlock(&group->mark_lock);
 
 	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
-		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_destroy_mark(entry);
 		fsnotify_put_mark(entry);
 	}
 }
@@ -256,7 +256,7 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 
 	list_for_each_entry_safe(entry, lentry, &free_list, i.free_i_list) {
-		fsnotify_destroy_mark_by_entry(entry);
+		fsnotify_destroy_mark(entry);
 		fsnotify_put_mark(entry);
 	}
 }
@@ -265,8 +265,8 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
-struct fsnotify_mark *fsnotify_find_mark_entry(struct fsnotify_group *group,
-					       struct inode *inode)
+struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
+					 struct inode *inode)
 {
 	struct fsnotify_mark *entry;
 	struct hlist_node *pos;
@@ -349,7 +349,7 @@ int fsnotify_add_mark(struct fsnotify_mark *entry,
 	spin_lock(&inode->i_lock);
 
 	if (!allow_dups)
-		lentry = fsnotify_find_mark_entry(group, inode);
+		lentry = fsnotify_find_mark(group, inode);
 	if (!lentry) {
 		entry->group = group;
 		entry->i.inode = inode;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f33a9bd..f8a2a6e 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -98,7 +98,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	to_tell = event->to_tell;
 
 	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark_entry(group, to_tell);
+	entry = fsnotify_find_mark(group, to_tell);
 	spin_unlock(&to_tell->i_lock);
 	/* race with watch removal?  We already passes should_send */
 	if (unlikely(!entry))
@@ -148,7 +148,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	bool send;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
+	entry = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
 	if (!entry)
 		return false;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 7be5dcf..118085c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -567,7 +567,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 		return -EINVAL;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark_entry(group, inode);
+	entry = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
 	if (!entry)
 		return -ENOENT;
@@ -607,7 +607,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	/* return the wd */
 	ret = ientry->wd;
 
-	/* match the get from fsnotify_find_mark_entry() */
+	/* match the get from fsnotify_find_mark() */
 	fsnotify_put_mark(entry);
 
 	return ret;
@@ -823,7 +823,7 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 
 	ret = 0;
 
-	fsnotify_destroy_mark_by_entry(&ientry->fsn_entry);
+	fsnotify_destroy_mark(&ientry->fsn_entry);
 
 	/* match ref taken by inotify_idr_find */
 	fsnotify_put_mark(&ientry->fsn_entry);
-- 
cgit v1.1


From 841bdc10f573aa010dd5818d35a5690b7d9f73ce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: fsnotify: rename mark_entry to just mark

previously I used mark_entry when talking about marks on inodes.  The
_entry is pretty useless.  Just use "mark" instead.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/group.c      |   6 +-
 fs/notify/inode_mark.c | 148 ++++++++++++++++++++++++-------------------------
 2 files changed, 77 insertions(+), 77 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/group.c b/fs/notify/group.c
index b70e7d2..9e9eb40 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -74,11 +74,11 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 {
 	__u32 mask = 0;
 	__u32 old_mask = group->mask;
-	struct fsnotify_mark *entry;
+	struct fsnotify_mark *mark;
 
 	spin_lock(&group->mark_lock);
-	list_for_each_entry(entry, &group->marks_list, g_list)
-		mask |= entry->mask;
+	list_for_each_entry(mark, &group->marks_list, g_list)
+		mask |= mark->mask;
 	spin_unlock(&group->mark_lock);
 
 	group->mask = mask;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 01c4263..27c1b43 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -30,21 +30,21 @@
  * There are 3 spinlocks involved with fsnotify inode marks and they MUST
  * be taken in order as follows:
  *
- * entry->lock
+ * mark->lock
  * group->mark_lock
  * inode->i_lock
  *
- * entry->lock protects 2 things, entry->group and entry->inode.  You must hold
+ * mark->lock protects 2 things, mark->group and mark->inode.  You must hold
  * that lock to dereference either of these things (they could be NULL even with
  * the lock)
  *
  * group->mark_lock protects the marks_list anchored inside a given group
- * and each entry is hooked via the g_list.  It also sorta protects the
+ * and each mark is hooked via the g_list.  It also sorta protects the
  * free_g_list, which when used is anchored by a private list on the stack of the
  * task which held the group->mark_lock.
  *
  * inode->i_lock protects the i_fsnotify_marks list anchored inside a
- * given inode and each entry is hooked via the i_list. (and sorta the
+ * given inode and each mark is hooked via the i_list. (and sorta the
  * free_i_list)
  *
  *
@@ -95,15 +95,15 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-void fsnotify_get_mark(struct fsnotify_mark *entry)
+void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
-	atomic_inc(&entry->refcnt);
+	atomic_inc(&mark->refcnt);
 }
 
-void fsnotify_put_mark(struct fsnotify_mark *entry)
+void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
-	if (atomic_dec_and_test(&entry->refcnt))
-		entry->free_mark(entry);
+	if (atomic_dec_and_test(&mark->refcnt))
+		mark->free_mark(mark);
 }
 
 /*
@@ -111,14 +111,14 @@ void fsnotify_put_mark(struct fsnotify_mark *entry)
  */
 static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 {
-	struct fsnotify_mark *entry;
+	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 	__u32 new_mask = 0;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_marks, i.i_list)
-		new_mask |= entry->mask;
+	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list)
+		new_mask |= mark->mask;
 	inode->i_fsnotify_mask = new_mask;
 }
 
@@ -138,40 +138,40 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 /*
  * Any time a mark is getting freed we end up here.
  * The caller had better be holding a reference to this mark so we don't actually
- * do the final put under the entry->lock
+ * do the final put under the mark->lock
  */
-void fsnotify_destroy_mark(struct fsnotify_mark *entry)
+void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_group *group;
 	struct inode *inode;
 
-	spin_lock(&entry->lock);
+	spin_lock(&mark->lock);
 
-	group = entry->group;
-	inode = entry->i.inode;
+	group = mark->group;
+	inode = mark->i.inode;
 
 	BUG_ON(group && !inode);
 	BUG_ON(!group && inode);
 
 	/* if !group something else already marked this to die */
 	if (!group) {
-		spin_unlock(&entry->lock);
+		spin_unlock(&mark->lock);
 		return;
 	}
 
 	/* 1 from caller and 1 for being on i_list/g_list */
-	BUG_ON(atomic_read(&entry->refcnt) < 2);
+	BUG_ON(atomic_read(&mark->refcnt) < 2);
 
 	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
-	hlist_del_init(&entry->i.i_list);
-	entry->i.inode = NULL;
+	hlist_del_init(&mark->i.i_list);
+	mark->i.inode = NULL;
 
-	list_del_init(&entry->g_list);
-	entry->group = NULL;
+	list_del_init(&mark->g_list);
+	mark->group = NULL;
 
-	fsnotify_put_mark(entry); /* for i_list and g_list */
+	fsnotify_put_mark(mark); /* for i_list and g_list */
 
 	/*
 	 * this mark is now off the inode->i_fsnotify_marks list and we
@@ -182,21 +182,21 @@ void fsnotify_destroy_mark(struct fsnotify_mark *entry)
 
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&group->mark_lock);
-	spin_unlock(&entry->lock);
+	spin_unlock(&mark->lock);
 
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
-	 * callback to the group function to let it know that this entry
+	 * callback to the group function to let it know that this mark
 	 * is being freed.
 	 */
 	if (group->ops->freeing_mark)
-		group->ops->freeing_mark(entry, group);
+		group->ops->freeing_mark(mark, group);
 
 	/*
 	 * __fsnotify_update_child_dentry_flags(inode);
 	 *
 	 * I really want to call that, but we can't, we have no idea if the inode
-	 * still exists the second we drop the entry->lock.
+	 * still exists the second we drop the mark->lock.
 	 *
 	 * The next time an event arrive to this inode from one of it's children
 	 * __fsnotify_parent will see that the inode doesn't care about it's
@@ -221,20 +221,20 @@ void fsnotify_destroy_mark(struct fsnotify_mark *entry)
  */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 {
-	struct fsnotify_mark *lentry, *entry;
+	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(free_list);
 
 	spin_lock(&group->mark_lock);
-	list_for_each_entry_safe(entry, lentry, &group->marks_list, g_list) {
-		list_add(&entry->free_g_list, &free_list);
-		list_del_init(&entry->g_list);
-		fsnotify_get_mark(entry);
+	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+		list_add(&mark->free_g_list, &free_list);
+		list_del_init(&mark->g_list);
+		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&group->mark_lock);
 
-	list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
-		fsnotify_destroy_mark(entry);
-		fsnotify_put_mark(entry);
+	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
 	}
 }
 
@@ -243,21 +243,21 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
  */
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
-	struct fsnotify_mark *entry, *lentry;
+	struct fsnotify_mark *mark, *lmark;
 	struct hlist_node *pos, *n;
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_marks, i.i_list) {
-		list_add(&entry->i.free_i_list, &free_list);
-		hlist_del_init(&entry->i.i_list);
-		fsnotify_get_mark(entry);
+	hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
+		list_add(&mark->i.free_i_list, &free_list);
+		hlist_del_init(&mark->i.i_list);
+		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&inode->i_lock);
 
-	list_for_each_entry_safe(entry, lentry, &free_list, i.free_i_list) {
-		fsnotify_destroy_mark(entry);
-		fsnotify_put_mark(entry);
+	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
 	}
 }
 
@@ -268,15 +268,15 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
 					 struct inode *inode)
 {
-	struct fsnotify_mark *entry;
+	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
 
 	assert_spin_locked(&inode->i_lock);
 
-	hlist_for_each_entry(entry, pos, &inode->i_fsnotify_marks, i.i_list) {
-		if (entry->group == group) {
-			fsnotify_get_mark(entry);
-			return entry;
+	hlist_for_each_entry(mark, pos, &inode->i_fsnotify_marks, i.i_list) {
+		if (mark->group == group) {
+			fsnotify_get_mark(mark);
+			return mark;
 		}
 	}
 	return NULL;
@@ -294,35 +294,35 @@ void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *ol
 /*
  * Nothing fancy, just initialize lists and locks and counters.
  */
-void fsnotify_init_mark(struct fsnotify_mark *entry,
-			void (*free_mark)(struct fsnotify_mark *entry))
+void fsnotify_init_mark(struct fsnotify_mark *mark,
+			void (*free_mark)(struct fsnotify_mark *mark))
 {
-	spin_lock_init(&entry->lock);
-	atomic_set(&entry->refcnt, 1);
-	INIT_HLIST_NODE(&entry->i.i_list);
-	entry->group = NULL;
-	entry->mask = 0;
-	entry->i.inode = NULL;
-	entry->free_mark = free_mark;
+	spin_lock_init(&mark->lock);
+	atomic_set(&mark->refcnt, 1);
+	INIT_HLIST_NODE(&mark->i.i_list);
+	mark->group = NULL;
+	mark->mask = 0;
+	mark->i.inode = NULL;
+	mark->free_mark = free_mark;
 }
 
 /*
- * Attach an initialized mark entry to a given group and inode.
+ * Attach an initialized mark mark to a given group and inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.
  */
-int fsnotify_add_mark(struct fsnotify_mark *entry,
+int fsnotify_add_mark(struct fsnotify_mark *mark,
 		      struct fsnotify_group *group, struct inode *inode,
 		      int allow_dups)
 {
-	struct fsnotify_mark *lentry = NULL;
+	struct fsnotify_mark *lmark = NULL;
 	int ret = 0;
 
 	inode = igrab(inode);
 	if (unlikely(!inode))
 		return -EINVAL;
 
-	entry->flags = FSNOTIFY_MARK_FLAG_INODE;
+	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
 
 	/*
 	 * if this group isn't being testing for inode type events we need
@@ -340,24 +340,24 @@ int fsnotify_add_mark(struct fsnotify_mark *entry,
 
 	/*
 	 * LOCKING ORDER!!!!
-	 * entry->lock
+	 * mark->lock
 	 * group->mark_lock
 	 * inode->i_lock
 	 */
-	spin_lock(&entry->lock);
+	spin_lock(&mark->lock);
 	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
 	if (!allow_dups)
-		lentry = fsnotify_find_mark(group, inode);
-	if (!lentry) {
-		entry->group = group;
-		entry->i.inode = inode;
+		lmark = fsnotify_find_mark(group, inode);
+	if (!lmark) {
+		mark->group = group;
+		mark->i.inode = inode;
 
-		hlist_add_head(&entry->i.i_list, &inode->i_fsnotify_marks);
-		list_add(&entry->g_list, &group->marks_list);
+		hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks);
+		list_add(&mark->g_list, &group->marks_list);
 
-		fsnotify_get_mark(entry); /* for i_list and g_list */
+		fsnotify_get_mark(mark); /* for i_list and g_list */
 
 		atomic_inc(&group->num_marks);
 
@@ -366,12 +366,12 @@ int fsnotify_add_mark(struct fsnotify_mark *entry,
 
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&group->mark_lock);
-	spin_unlock(&entry->lock);
+	spin_unlock(&mark->lock);
 
-	if (lentry) {
+	if (lmark) {
 		ret = -EEXIST;
 		iput(inode);
-		fsnotify_put_mark(lentry);
+		fsnotify_put_mark(lmark);
 	} else {
 		__fsnotify_update_child_dentry_flags(inode);
 	}
-- 
cgit v1.1


From 000285deb99a5e0636fdd3c6a2483a5d039ee2c2 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: inotify: rename mark_entry to just mark

rename anything in inotify that deals with mark_entry to just be mark.  It
makes a lot more sense.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify.h          |   7 +-
 fs/notify/inotify/inotify_fsnotify.c |  48 ++++-----
 fs/notify/inotify/inotify_user.c     | 192 +++++++++++++++++------------------
 3 files changed, 123 insertions(+), 124 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 07be6df..b6642e4 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -9,13 +9,12 @@ struct inotify_event_private_data {
 	int wd;
 };
 
-struct inotify_inode_mark_entry {
-	/* fsnotify_mark MUST be the first thing */
-	struct fsnotify_mark fsn_entry;
+struct inotify_inode_mark {
+	struct fsnotify_mark fsn_mark;
 	int wd;
 };
 
-extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
+extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 					   struct fsnotify_group *group);
 extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f8a2a6e..12dc72b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -88,8 +88,8 @@ static int inotify_merge(struct list_head *list, struct fsnotify_event *event)
 
 static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
-	struct fsnotify_mark *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_mark *fsn_mark;
+	struct inotify_inode_mark *i_mark;
 	struct inode *to_tell;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
@@ -98,14 +98,14 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	to_tell = event->to_tell;
 
 	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark(group, to_tell);
+	fsn_mark = fsnotify_find_mark(group, to_tell);
 	spin_unlock(&to_tell->i_lock);
 	/* race with watch removal?  We already passes should_send */
-	if (unlikely(!entry))
+	if (unlikely(!fsn_mark))
 		return 0;
-	ientry = container_of(entry, struct inotify_inode_mark_entry,
-			      fsn_entry);
-	wd = ientry->wd;
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark,
+			      fsn_mark);
+	wd = i_mark->wd;
 
 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
 	if (unlikely(!event_priv))
@@ -127,37 +127,37 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	}
 
 	/*
-	 * If we hold the entry until after the event is on the queue
+	 * If we hold the fsn_mark until after the event is on the queue
 	 * IN_IGNORED won't be able to pass this event in the queue
 	 */
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 
 	return ret;
 }
 
-static void inotify_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
+static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
 {
-	inotify_ignored_and_remove_idr(entry, group);
+	inotify_ignored_and_remove_idr(fsn_mark, group);
 }
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
 				      struct vfsmount *mnt, __u32 mask, void *data,
 				      int data_type)
 {
-	struct fsnotify_mark *entry;
+	struct fsnotify_mark *fsn_mark;
 	bool send;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
-	if (!entry)
+	if (!fsn_mark)
 		return false;
 
 	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (entry->mask & mask);
+	send = (fsn_mark->mask & mask);
 
 	/* find took a reference */
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 
 	return send;
 }
@@ -171,18 +171,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
  */
 static int idr_callback(int id, void *p, void *data)
 {
-	struct fsnotify_mark *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_mark *fsn_mark;
+	struct inotify_inode_mark *i_mark;
 	static bool warned = false;
 
 	if (warned)
 		return 0;
 
 	warned = true;
-	entry = p;
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	fsn_mark = p;
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in "
+	WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
 		"idr.  Probably leaking memory\n", id, p, data);
 
 	/*
@@ -191,9 +191,9 @@ static int idr_callback(int id, void *p, void *data)
 	 * out why we got here and the panic is no worse than the original
 	 * BUG() that was here.
 	 */
-	if (entry)
-		printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n",
-			entry->group, entry->i.inode, ientry->wd);
+	if (fsn_mark)
+		printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
+			fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
 	return 0;
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 118085c..80d102a 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -353,7 +353,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 
 static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
 			      int *last_wd,
-			      struct inotify_inode_mark_entry *ientry)
+			      struct inotify_inode_mark *i_mark)
 {
 	int ret;
 
@@ -362,12 +362,12 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
 			return -ENOMEM;
 
 		spin_lock(idr_lock);
-		ret = idr_get_new_above(idr, ientry, *last_wd + 1,
-					&ientry->wd);
+		ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
+					&i_mark->wd);
 		/* we added the mark to the idr, take a reference */
 		if (!ret) {
-			fsnotify_get_mark(&ientry->fsn_entry);
-			*last_wd = ientry->wd;
+			*last_wd = i_mark->wd;
+			fsnotify_get_mark(&i_mark->fsn_mark);
 		}
 		spin_unlock(idr_lock);
 	} while (ret == -EAGAIN);
@@ -375,53 +375,53 @@ static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
 	return ret;
 }
 
-static struct inotify_inode_mark_entry *inotify_idr_find_locked(struct fsnotify_group *group,
+static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
 								int wd)
 {
 	struct idr *idr = &group->inotify_data.idr;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
-	struct inotify_inode_mark_entry *ientry;
+	struct inotify_inode_mark *i_mark;
 
 	assert_spin_locked(idr_lock);
 
-	ientry = idr_find(idr, wd);
-	if (ientry) {
-		struct fsnotify_mark *fsn_entry = &ientry->fsn_entry;
+	i_mark = idr_find(idr, wd);
+	if (i_mark) {
+		struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
 
-		fsnotify_get_mark(fsn_entry);
+		fsnotify_get_mark(fsn_mark);
 		/* One ref for being in the idr, one ref we just took */
-		BUG_ON(atomic_read(&fsn_entry->refcnt) < 2);
+		BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
 	}
 
-	return ientry;
+	return i_mark;
 }
 
-static struct inotify_inode_mark_entry *inotify_idr_find(struct fsnotify_group *group,
+static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
 							 int wd)
 {
-	struct inotify_inode_mark_entry *ientry;
+	struct inotify_inode_mark *i_mark;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
 
 	spin_lock(idr_lock);
-	ientry = inotify_idr_find_locked(group, wd);
+	i_mark = inotify_idr_find_locked(group, wd);
 	spin_unlock(idr_lock);
 
-	return ientry;
+	return i_mark;
 }
 
 static void do_inotify_remove_from_idr(struct fsnotify_group *group,
-				       struct inotify_inode_mark_entry *ientry)
+				       struct inotify_inode_mark *i_mark)
 {
 	struct idr *idr = &group->inotify_data.idr;
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
-	int wd = ientry->wd;
+	int wd = i_mark->wd;
 
 	assert_spin_locked(idr_lock);
 
 	idr_remove(idr, wd);
 
 	/* removed from the idr, drop that ref */
-	fsnotify_put_mark(&ientry->fsn_entry);
+	fsnotify_put_mark(&i_mark->fsn_mark);
 }
 
 /*
@@ -429,48 +429,48 @@ static void do_inotify_remove_from_idr(struct fsnotify_group *group,
  * on the mark because it was in the idr.
  */
 static void inotify_remove_from_idr(struct fsnotify_group *group,
-				    struct inotify_inode_mark_entry *ientry)
+				    struct inotify_inode_mark *i_mark)
 {
 	spinlock_t *idr_lock = &group->inotify_data.idr_lock;
-	struct inotify_inode_mark_entry *found_ientry = NULL;
+	struct inotify_inode_mark *found_i_mark = NULL;
 	int wd;
 
 	spin_lock(idr_lock);
-	wd = ientry->wd;
+	wd = i_mark->wd;
 
 	/*
-	 * does this ientry think it is in the idr?  we shouldn't get called
+	 * does this i_mark think it is in the idr?  we shouldn't get called
 	 * if it wasn't....
 	 */
 	if (wd == -1) {
-		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
-			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
 		goto out;
 	}
 
 	/* Lets look in the idr to see if we find it */
-	found_ientry = inotify_idr_find_locked(group, wd);
-	if (unlikely(!found_ientry)) {
-		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p"
-			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
+	found_i_mark = inotify_idr_find_locked(group, wd);
+	if (unlikely(!found_i_mark)) {
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
 		goto out;
 	}
 
 	/*
-	 * We found an entry in the idr at the right wd, but it's
-	 * not the entry we were told to remove.  eparis seriously
+	 * We found an mark in the idr at the right wd, but it's
+	 * not the mark we were told to remove.  eparis seriously
 	 * fucked up somewhere.
 	 */
-	if (unlikely(found_ientry != ientry)) {
-		WARN_ONCE(1, "%s: ientry=%p ientry->wd=%d ientry->group=%p "
-			"entry->inode=%p found_ientry=%p found_ientry->wd=%d "
-			"found_ientry->group=%p found_ientry->inode=%p\n",
-			__func__, ientry, ientry->wd, ientry->fsn_entry.group,
-			ientry->fsn_entry.i.inode, found_ientry, found_ientry->wd,
-			found_ientry->fsn_entry.group,
-			found_ientry->fsn_entry.i.inode);
+	if (unlikely(found_i_mark != i_mark)) {
+		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
+			"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
+			"found_i_mark->group=%p found_i_mark->inode=%p\n",
+			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
+			i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
+			found_i_mark->fsn_mark.group,
+			found_i_mark->fsn_mark.i.inode);
 		goto out;
 	}
 
@@ -479,30 +479,30 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	 * one ref held by the caller trying to kill us
 	 * one ref grabbed by inotify_idr_find
 	 */
-	if (unlikely(atomic_read(&ientry->fsn_entry.refcnt) < 3)) {
-		printk(KERN_ERR "%s: ientry=%p ientry->wd=%d ientry->group=%p"
-			" ientry->inode=%p\n", __func__, ientry, ientry->wd,
-			ientry->fsn_entry.group, ientry->fsn_entry.i.inode);
+	if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
+		printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
+			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
+			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
 		/* we can't really recover with bad ref cnting.. */
 		BUG();
 	}
 
-	do_inotify_remove_from_idr(group, ientry);
+	do_inotify_remove_from_idr(group, i_mark);
 out:
 	/* match the ref taken by inotify_idr_find_locked() */
-	if (found_ientry)
-		fsnotify_put_mark(&found_ientry->fsn_entry);
-	ientry->wd = -1;
+	if (found_i_mark)
+		fsnotify_put_mark(&found_i_mark->fsn_mark);
+	i_mark->wd = -1;
 	spin_unlock(idr_lock);
 }
 
 /*
  * Send IN_IGNORED for this wd, remove this wd from the idr.
  */
-void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
+void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 				    struct fsnotify_group *group)
 {
-	struct inotify_inode_mark_entry *ientry;
+	struct inotify_inode_mark *i_mark;
 	struct fsnotify_event *ignored_event;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
@@ -514,7 +514,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
 	if (!ignored_event)
 		return;
 
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
 	if (unlikely(!event_priv))
@@ -523,7 +523,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *entry,
 	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
 
 	fsn_event_priv->group = group;
-	event_priv->wd = ientry->wd;
+	event_priv->wd = i_mark->wd;
 
 	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
 	if (ret)
@@ -534,28 +534,28 @@ skip_send_ignore:
 	/* matches the reference taken when the event was created */
 	fsnotify_put_event(ignored_event);
 
-	/* remove this entry from the idr */
-	inotify_remove_from_idr(group, ientry);
+	/* remove this mark from the idr */
+	inotify_remove_from_idr(group, i_mark);
 
 	atomic_dec(&group->inotify_data.user->inotify_watches);
 }
 
 /* ding dong the mark is dead */
-static void inotify_free_mark(struct fsnotify_mark *entry)
+static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
-	struct inotify_inode_mark_entry *ientry;
+	struct inotify_inode_mark *i_mark;
 
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	kmem_cache_free(inotify_inode_mark_cachep, ientry);
+	kmem_cache_free(inotify_inode_mark_cachep, i_mark);
 }
 
 static int inotify_update_existing_watch(struct fsnotify_group *group,
 					 struct inode *inode,
 					 u32 arg)
 {
-	struct fsnotify_mark *entry;
-	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_mark *fsn_mark;
+	struct inotify_inode_mark *i_mark;
 	__u32 old_mask, new_mask;
 	__u32 mask;
 	int add = (arg & IN_MASK_ADD);
@@ -567,35 +567,35 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 		return -EINVAL;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
-	if (!entry)
+	if (!fsn_mark)
 		return -ENOENT;
 
-	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
+	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 
-	spin_lock(&entry->lock);
+	spin_lock(&fsn_mark->lock);
 
-	old_mask = entry->mask;
+	old_mask = fsn_mark->mask;
 	if (add) {
-		entry->mask |= mask;
-		new_mask = entry->mask;
+		fsn_mark->mask |= mask;
+		new_mask = fsn_mark->mask;
 	} else {
-		entry->mask = mask;
-		new_mask = entry->mask;
+		fsn_mark->mask = mask;
+		new_mask = fsn_mark->mask;
 	}
 
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	if (old_mask != new_mask) {
 		/* more bits in old than in new? */
 		int dropped = (old_mask & ~new_mask);
-		/* more bits in this entry than the inode's mask? */
+		/* more bits in this fsn_mark than the inode's mask? */
 		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
-		/* more bits in this entry than the group? */
+		/* more bits in this fsn_mark than the group? */
 		int do_group = (new_mask & ~group->mask);
 
-		/* update the inode with this new entry */
+		/* update the inode with this new fsn_mark */
 		if (dropped || do_inode)
 			fsnotify_recalc_inode_mask(inode);
 
@@ -605,10 +605,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	}
 
 	/* return the wd */
-	ret = ientry->wd;
+	ret = i_mark->wd;
 
 	/* match the get from fsnotify_find_mark() */
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 
 	return ret;
 }
@@ -617,7 +617,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 			     struct inode *inode,
 			     u32 arg)
 {
-	struct inotify_inode_mark_entry *tmp_ientry;
+	struct inotify_inode_mark *tmp_i_mark;
 	__u32 mask;
 	int ret;
 	struct idr *idr = &group->inotify_data.idr;
@@ -628,44 +628,44 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	if (unlikely(!mask))
 		return -EINVAL;
 
-	tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
-	if (unlikely(!tmp_ientry))
+	tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
+	if (unlikely(!tmp_i_mark))
 		return -ENOMEM;
 
-	fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
-	tmp_ientry->fsn_entry.mask = mask;
-	tmp_ientry->wd = -1;
+	fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
+	tmp_i_mark->fsn_mark.mask = mask;
+	tmp_i_mark->wd = -1;
 
 	ret = -ENOSPC;
 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
 		goto out_err;
 
 	ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
-				 tmp_ientry);
+				 tmp_i_mark);
 	if (ret)
 		goto out_err;
 
 	/* we are on the idr, now get on the inode */
-	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode, 0);
+	ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, 0);
 	if (ret) {
 		/* we failed to get on the inode, get off the idr */
-		inotify_remove_from_idr(group, tmp_ientry);
+		inotify_remove_from_idr(group, tmp_i_mark);
 		goto out_err;
 	}
 
 	/* increment the number of watches the user has */
 	atomic_inc(&group->inotify_data.user->inotify_watches);
 
-	/* return the watch descriptor for this new entry */
-	ret = tmp_ientry->wd;
+	/* return the watch descriptor for this new mark */
+	ret = tmp_i_mark->wd;
 
 	/* if this mark added a new event update the group mask */
 	if (mask & ~group->mask)
 		fsnotify_recalc_group_mask(group);
 
 out_err:
-	/* match the ref from fsnotify_init_markentry() */
-	fsnotify_put_mark(&tmp_ientry->fsn_entry);
+	/* match the ref from fsnotify_init_mark() */
+	fsnotify_put_mark(&tmp_i_mark->fsn_mark);
 
 	return ret;
 }
@@ -801,7 +801,7 @@ fput_and_out:
 SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 {
 	struct fsnotify_group *group;
-	struct inotify_inode_mark_entry *ientry;
+	struct inotify_inode_mark *i_mark;
 	struct file *filp;
 	int ret = 0, fput_needed;
 
@@ -817,16 +817,16 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
 	group = filp->private_data;
 
 	ret = -EINVAL;
-	ientry = inotify_idr_find(group, wd);
-	if (unlikely(!ientry))
+	i_mark = inotify_idr_find(group, wd);
+	if (unlikely(!i_mark))
 		goto out;
 
 	ret = 0;
 
-	fsnotify_destroy_mark(&ientry->fsn_entry);
+	fsnotify_destroy_mark(&i_mark->fsn_mark);
 
 	/* match ref taken by inotify_idr_find */
-	fsnotify_put_mark(&ientry->fsn_entry);
+	fsnotify_put_mark(&i_mark->fsn_mark);
 
 out:
 	fput_light(filp, fput_needed);
@@ -840,7 +840,7 @@ out:
  */
 static int __init inotify_user_setup(void)
 {
-	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
+	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
 	inotify_max_queued_events = 16384;
-- 
cgit v1.1


From ef5e2b785fb3216269e6d0656d38ec286b98dbe5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:24 -0500
Subject: dnotify: rename mark_entry to mark

nomenclature change.  Used to call things 'entries' but now we just call
them 'marks.'  Do those changes for dnotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c | 170 ++++++++++++++++++++++----------------------
 1 file changed, 85 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b202bc5..3efb8b9 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -29,7 +29,7 @@
 int dir_notify_enable __read_mostly = 1;
 
 static struct kmem_cache *dnotify_struct_cache __read_mostly;
-static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
+static struct kmem_cache *dnotify_mark_cache __read_mostly;
 static struct fsnotify_group *dnotify_group __read_mostly;
 static DEFINE_MUTEX(dnotify_mark_mutex);
 
@@ -38,8 +38,8 @@ static DEFINE_MUTEX(dnotify_mark_mutex);
  * is being watched by dnotify.  If multiple userspace applications are watching
  * the same directory with dnotify their information is chained in dn
  */
-struct dnotify_mark_entry {
-	struct fsnotify_mark fsn_entry;
+struct dnotify_mark {
+	struct fsnotify_mark fsn_mark;
 	struct dnotify_struct *dn;
 };
 
@@ -51,27 +51,27 @@ struct dnotify_mark_entry {
  * it calls the fsnotify function so it can update the set of all events relevant
  * to this inode.
  */
-static void dnotify_recalc_inode_mask(struct fsnotify_mark *entry)
+static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 {
 	__u32 new_mask, old_mask;
 	struct dnotify_struct *dn;
-	struct dnotify_mark_entry *dnentry  = container_of(entry,
-							   struct dnotify_mark_entry,
-							   fsn_entry);
+	struct dnotify_mark *dn_mark  = container_of(fsn_mark,
+						     struct dnotify_mark,
+						     fsn_mark);
 
-	assert_spin_locked(&entry->lock);
+	assert_spin_locked(&fsn_mark->lock);
 
-	old_mask = entry->mask;
+	old_mask = fsn_mark->mask;
 	new_mask = 0;
-	for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
+	for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
 		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-	entry->mask = new_mask;
+	fsn_mark->mask = new_mask;
 
 	if (old_mask == new_mask)
 		return;
 
-	if (entry->i.inode)
-		fsnotify_recalc_inode_mask(entry->i.inode);
+	if (fsn_mark->i.inode)
+		fsnotify_recalc_inode_mask(fsn_mark->i.inode);
 }
 
 /*
@@ -85,8 +85,8 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *entry)
 static int dnotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_event *event)
 {
-	struct fsnotify_mark *entry = NULL;
-	struct dnotify_mark_entry *dnentry;
+	struct fsnotify_mark *fsn_mark = NULL;
+	struct dnotify_mark *dn_mark;
 	struct inode *to_tell;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
@@ -96,16 +96,16 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 	to_tell = event->to_tell;
 
 	spin_lock(&to_tell->i_lock);
-	entry = fsnotify_find_mark(group, to_tell);
+	fsn_mark = fsnotify_find_mark(group, to_tell);
 	spin_unlock(&to_tell->i_lock);
 
 	/* unlikely since we alreay passed dnotify_should_send_event() */
-	if (unlikely(!entry))
+	if (unlikely(!fsn_mark))
 		return 0;
-	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 
-	spin_lock(&entry->lock);
-	prev = &dnentry->dn;
+	spin_lock(&fsn_mark->lock);
+	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_mask & test_mask) == 0) {
 			prev = &dn->dn_next;
@@ -118,12 +118,12 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 		else {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(entry);
+			dnotify_recalc_inode_mask(fsn_mark);
 		}
 	}
 
-	spin_unlock(&entry->lock);
-	fsnotify_put_mark(entry);
+	spin_unlock(&fsn_mark->lock);
+	fsnotify_put_mark(fsn_mark);
 
 	return 0;
 }
@@ -136,7 +136,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct inode *inode, struct vfsmount *mnt,
 				      __u32 mask, void *data, int data_type)
 {
-	struct fsnotify_mark *entry;
+	struct fsnotify_mark *fsn_mark;
 	bool send;
 
 	/* !dir_notify_enable should never get here, don't waste time checking
@@ -148,30 +148,30 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 		return false;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_mark(group, inode);
 	spin_unlock(&inode->i_lock);
 
 	/* no mark means no dnotify watch */
-	if (!entry)
+	if (!fsn_mark)
 		return false;
 
 	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (mask & entry->mask);
+	send = (mask & fsn_mark->mask);
 
-	fsnotify_put_mark(entry); /* matches fsnotify_find_mark */
+	fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_mark */
 
 	return send;
 }
 
-static void dnotify_free_mark(struct fsnotify_mark *entry)
+static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
-	struct dnotify_mark_entry *dnentry = container_of(entry,
-							  struct dnotify_mark_entry,
-							  fsn_entry);
+	struct dnotify_mark *dn_mark = container_of(fsn_mark,
+						    struct dnotify_mark,
+						    fsn_mark);
 
-	BUG_ON(dnentry->dn);
+	BUG_ON(dn_mark->dn);
 
-	kmem_cache_free(dnotify_mark_entry_cache, dnentry);
+	kmem_cache_free(dnotify_mark_cache, dn_mark);
 }
 
 static struct fsnotify_ops dnotify_fsnotify_ops = {
@@ -191,8 +191,8 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
  */
 void dnotify_flush(struct file *filp, fl_owner_t id)
 {
-	struct fsnotify_mark *entry;
-	struct dnotify_mark_entry *dnentry;
+	struct fsnotify_mark *fsn_mark;
+	struct dnotify_mark *dn_mark;
 	struct dnotify_struct *dn;
 	struct dnotify_struct **prev;
 	struct inode *inode;
@@ -202,37 +202,37 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 		return;
 
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
-	if (!entry)
+	if (!fsn_mark)
 		return;
-	dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
+	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 
 	mutex_lock(&dnotify_mark_mutex);
 
-	spin_lock(&entry->lock);
-	prev = &dnentry->dn;
+	spin_lock(&fsn_mark->lock);
+	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(entry);
+			dnotify_recalc_inode_mask(fsn_mark);
 			break;
 		}
 		prev = &dn->dn_next;
 	}
 
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	/* nothing else could have found us thanks to the dnotify_mark_mutex */
-	if (dnentry->dn == NULL)
-		fsnotify_destroy_mark(entry);
+	if (dn_mark->dn == NULL)
+		fsnotify_destroy_mark(fsn_mark);
 
 	fsnotify_recalc_group_mask(dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
 
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 }
 
 /* this conversion is done only at watch creation */
@@ -264,12 +264,12 @@ static __u32 convert_arg(unsigned long arg)
  * onto that mark.  This function either attaches the new dnotify_struct onto
  * that list, or it |= the mask onto an existing dnofiy_struct.
  */
-static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
+static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
 		     fl_owner_t id, int fd, struct file *filp, __u32 mask)
 {
 	struct dnotify_struct *odn;
 
-	odn = dnentry->dn;
+	odn = dn_mark->dn;
 	while (odn != NULL) {
 		/* adding more events to existing dnofiy_struct? */
 		if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
@@ -284,8 +284,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
 	dn->dn_fd = fd;
 	dn->dn_filp = filp;
 	dn->dn_owner = id;
-	dn->dn_next = dnentry->dn;
-	dnentry->dn = dn;
+	dn->dn_next = dn_mark->dn;
+	dn_mark->dn = dn;
 
 	return 0;
 }
@@ -297,8 +297,8 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnent
  */
 int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 {
-	struct dnotify_mark_entry *new_dnentry, *dnentry;
-	struct fsnotify_mark *new_entry, *entry;
+	struct dnotify_mark *new_dn_mark, *dn_mark;
+	struct fsnotify_mark *new_fsn_mark, *fsn_mark;
 	struct dnotify_struct *dn;
 	struct inode *inode;
 	fl_owner_t id = current->files;
@@ -307,7 +307,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	__u32 mask;
 
 	/* we use these to tell if we need to kfree */
-	new_entry = NULL;
+	new_fsn_mark = NULL;
 	dn = NULL;
 
 	if (!dir_notify_enable) {
@@ -337,8 +337,8 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	}
 
 	/* new fsnotify mark, we expect most fcntl calls to add a new mark */
-	new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
-	if (!new_dnentry) {
+	new_dn_mark = kmem_cache_alloc(dnotify_mark_cache, GFP_KERNEL);
+	if (!new_dn_mark) {
 		error = -ENOMEM;
 		goto out_err;
 	}
@@ -346,29 +346,29 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
 	mask = convert_arg(arg);
 
-	/* set up the new_entry and new_dnentry */
-	new_entry = &new_dnentry->fsn_entry;
-	fsnotify_init_mark(new_entry, dnotify_free_mark);
-	new_entry->mask = mask;
-	new_dnentry->dn = NULL;
+	/* set up the new_fsn_mark and new_dn_mark */
+	new_fsn_mark = &new_dn_mark->fsn_mark;
+	fsnotify_init_mark(new_fsn_mark, dnotify_free_mark);
+	new_fsn_mark->mask = mask;
+	new_dn_mark->dn = NULL;
 
 	/* this is needed to prevent the fcntl/close race described below */
 	mutex_lock(&dnotify_mark_mutex);
 
-	/* add the new_entry or find an old one. */
+	/* add the new_fsn_mark or find an old one. */
 	spin_lock(&inode->i_lock);
-	entry = fsnotify_find_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
 	spin_unlock(&inode->i_lock);
-	if (entry) {
-		dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
-		spin_lock(&entry->lock);
+	if (fsn_mark) {
+		dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
+		spin_lock(&fsn_mark->lock);
 	} else {
-		fsnotify_add_mark(new_entry, dnotify_group, inode, 0);
-		spin_lock(&new_entry->lock);
-		entry = new_entry;
-		dnentry = new_dnentry;
-		/* we used new_entry, so don't free it */
-		new_entry = NULL;
+		fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, 0);
+		spin_lock(&new_fsn_mark->lock);
+		fsn_mark = new_fsn_mark;
+		dn_mark = new_dn_mark;
+		/* we used new_fsn_mark, so don't free it */
+		new_fsn_mark = NULL;
 	}
 
 	rcu_read_lock();
@@ -377,17 +377,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 
 	/* if (f != filp) means that we lost a race and another task/thread
 	 * actually closed the fd we are still playing with before we grabbed
-	 * the dnotify_mark_mutex and entry->lock.  Since closing the fd is the
+	 * the dnotify_mark_mutex and fsn_mark->lock.  Since closing the fd is the
 	 * only time we clean up the marks we need to get our mark off
 	 * the list. */
 	if (f != filp) {
 		/* if we added ourselves, shoot ourselves, it's possible that
-		 * the flush actually did shoot this entry.  That's fine too
+		 * the flush actually did shoot this fsn_mark.  That's fine too
 		 * since multiple calls to destroy_mark is perfectly safe, if
-		 * we found a dnentry already attached to the inode, just sod
+		 * we found a dn_mark already attached to the inode, just sod
 		 * off silently as the flush at close time dealt with it.
 		 */
-		if (dnentry == new_dnentry)
+		if (dn_mark == new_dn_mark)
 			destroy = 1;
 		goto out;
 	}
@@ -395,13 +395,13 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 	if (error) {
 		/* if we added, we must shoot */
-		if (dnentry == new_dnentry)
+		if (dn_mark == new_dn_mark)
 			destroy = 1;
 		goto out;
 	}
 
-	error = attach_dn(dn, dnentry, id, fd, filp, mask);
-	/* !error means that we attached the dn to the dnentry, so don't free it */
+	error = attach_dn(dn, dn_mark, id, fd, filp, mask);
+	/* !error means that we attached the dn to the dn_mark, so don't free it */
 	if (!error)
 		dn = NULL;
 	/* -EEXIST means that we didn't add this new dn and used an old one.
@@ -409,20 +409,20 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	else if (error == -EEXIST)
 		error = 0;
 
-	dnotify_recalc_inode_mask(entry);
+	dnotify_recalc_inode_mask(fsn_mark);
 out:
-	spin_unlock(&entry->lock);
+	spin_unlock(&fsn_mark->lock);
 
 	if (destroy)
-		fsnotify_destroy_mark(entry);
+		fsnotify_destroy_mark(fsn_mark);
 
 	fsnotify_recalc_group_mask(dnotify_group);
 
 	mutex_unlock(&dnotify_mark_mutex);
-	fsnotify_put_mark(entry);
+	fsnotify_put_mark(fsn_mark);
 out_err:
-	if (new_entry)
-		fsnotify_put_mark(new_entry);
+	if (new_fsn_mark)
+		fsnotify_put_mark(new_fsn_mark);
 	if (dn)
 		kmem_cache_free(dnotify_struct_cache, dn);
 	return error;
@@ -431,7 +431,7 @@ out_err:
 static int __init dnotify_init(void)
 {
 	dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
-	dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
+	dnotify_mark_cache = KMEM_CACHE(dnotify_mark, SLAB_PANIC);
 
 	dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops);
 	if (IS_ERR(dnotify_group))
-- 
cgit v1.1


From 35566087099c3ff8901d65ee98af56347ee66e5a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fsnotify: take inode->i_lock inside fsnotify_find_mark_entry()

All callers to fsnotify_find_mark_entry() except one take and
release inode->i_lock around the call.  Take the lock inside
fsnotify_find_mark_entry() instead.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 12 ------------
 fs/notify/inode_mark.c               | 26 +++++++++++++++++++-------
 fs/notify/inotify/inotify_fsnotify.c |  4 ----
 fs/notify/inotify/inotify_user.c     |  2 --
 4 files changed, 19 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 3efb8b9..cac2eb8 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -95,11 +95,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 
 	to_tell = event->to_tell;
 
-	spin_lock(&to_tell->i_lock);
 	fsn_mark = fsnotify_find_mark(group, to_tell);
-	spin_unlock(&to_tell->i_lock);
-
-	/* unlikely since we alreay passed dnotify_should_send_event() */
 	if (unlikely(!fsn_mark))
 		return 0;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -147,11 +143,7 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	if (!S_ISDIR(inode->i_mode))
 		return false;
 
-	spin_lock(&inode->i_lock);
 	fsn_mark = fsnotify_find_mark(group, inode);
-	spin_unlock(&inode->i_lock);
-
-	/* no mark means no dnotify watch */
 	if (!fsn_mark)
 		return false;
 
@@ -201,9 +193,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	spin_lock(&inode->i_lock);
 	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
-	spin_unlock(&inode->i_lock);
 	if (!fsn_mark)
 		return;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -356,9 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	mutex_lock(&dnotify_mark_mutex);
 
 	/* add the new_fsn_mark or find an old one. */
-	spin_lock(&inode->i_lock);
 	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
-	spin_unlock(&inode->i_lock);
 	if (fsn_mark) {
 		dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 		spin_lock(&fsn_mark->lock);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 27c1b43..ba6f983 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -261,12 +261,8 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	}
 }
 
-/*
- * given a group and inode, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
-					 struct inode *inode)
+static struct fsnotify_mark *fsnotify_find_mark_locked(struct fsnotify_group *group,
+						       struct inode *inode)
 {
 	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
@@ -282,6 +278,22 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
 	return NULL;
 }
 
+/*
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
+					 struct inode *inode)
+{
+	struct fsnotify_mark *mark;
+
+	spin_lock(&inode->i_lock);
+	mark = fsnotify_find_mark_locked(group, inode);
+	spin_unlock(&inode->i_lock);
+
+	return mark;
+}
+
 void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 {
 	assert_spin_locked(&old->lock);
@@ -349,7 +361,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	spin_lock(&inode->i_lock);
 
 	if (!allow_dups)
-		lmark = fsnotify_find_mark(group, inode);
+		lmark = fsnotify_find_mark_locked(group, inode);
 	if (!lmark) {
 		mark->group = group;
 		mark->i.inode = inode;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 12dc72b..cc8f6bc 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -97,9 +97,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 
 	to_tell = event->to_tell;
 
-	spin_lock(&to_tell->i_lock);
 	fsn_mark = fsnotify_find_mark(group, to_tell);
-	spin_unlock(&to_tell->i_lock);
 	/* race with watch removal?  We already passes should_send */
 	if (unlikely(!fsn_mark))
 		return 0;
@@ -147,9 +145,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	struct fsnotify_mark *fsn_mark;
 	bool send;
 
-	spin_lock(&inode->i_lock);
 	fsn_mark = fsnotify_find_mark(group, inode);
-	spin_unlock(&inode->i_lock);
 	if (!fsn_mark)
 		return false;
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 80d102a..ad5a1ea 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -566,9 +566,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	if (unlikely(!mask))
 		return -EINVAL;
 
-	spin_lock(&inode->i_lock);
 	fsn_mark = fsnotify_find_mark(group, inode);
-	spin_unlock(&inode->i_lock);
 	if (!fsn_mark)
 		return -ENOENT;
 
-- 
cgit v1.1


From ff0b16a9850e8a240ad59e10b0a1291a8fcf7cbc Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fanotify: fscking all notification system

fanotify is a novel file notification system which bases notification on
giving userspace both an event type (open, close, read, write) and an open
file descriptor to the object in question.  This should address a number of
races and problems with other notification systems like inotify and dnotify
and should allow the future implementation of blocking or access controlled
notification.  These are useful for on access scanners or hierachical storage
management schemes.

This patch just implements the basics of the fsnotify functions.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/Kconfig             |  1 +
 fs/notify/Makefile            |  1 +
 fs/notify/fanotify/Kconfig    | 11 ++++++
 fs/notify/fanotify/Makefile   |  1 +
 fs/notify/fanotify/fanotify.c | 78 +++++++++++++++++++++++++++++++++++++++++++
 fs/notify/fanotify/fanotify.h | 12 +++++++
 6 files changed, 104 insertions(+)
 create mode 100644 fs/notify/fanotify/Kconfig
 create mode 100644 fs/notify/fanotify/Makefile
 create mode 100644 fs/notify/fanotify/fanotify.c
 create mode 100644 fs/notify/fanotify/fanotify.h

(limited to 'fs')

diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index dffbb09..22c629e 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,3 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 0922cc8..396a387 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
+obj-y			+= fanotify/
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
new file mode 100644
index 0000000..f9d7ae0
--- /dev/null
+++ b/fs/notify/fanotify/Kconfig
@@ -0,0 +1,11 @@
+config FANOTIFY
+	bool "Filesystem wide access notification"
+	select FSNOTIFY
+	default y
+	---help---
+	   Say Y here to enable fanotify suport.  fanotify is a file access
+	   notification system which differs from inotify in that it sends
+	   and open file descriptor to the userspace listener along with
+	   the event.
+
+	   If unsure, say Y.
diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
new file mode 100644
index 0000000..e7d39c0
--- /dev/null
+++ b/fs/notify/fanotify/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_FANOTIFY)		+= fanotify.o
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
new file mode 100644
index 0000000..3ffb9db
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.c
@@ -0,0 +1,78 @@
+#include <linux/fdtable.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/kernel.h> /* UINT_MAX */
+#include <linux/types.h>
+
+#include "fanotify.h"
+
+static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+{
+	int ret;
+
+
+	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
+	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	ret = fsnotify_add_notify_event(group, event, NULL, NULL);
+
+	return ret;
+}
+
+static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
+				       struct vfsmount *mnt, __u32 mask, void *data,
+				       int data_type)
+{
+	struct fsnotify_mark *fsn_mark;
+	bool send;
+
+	pr_debug("%s: group=%p inode=%p mask=%x data=%p data_type=%d\n",
+		 __func__, group, inode, mask, data, data_type);
+
+	/* sorry, fanotify only gives a damn about files and dirs */
+	if (!S_ISREG(inode->i_mode) &&
+	    !S_ISDIR(inode->i_mode))
+		return false;
+
+	/* if we don't have enough info to send an event to userspace say no */
+	if (data_type != FSNOTIFY_EVENT_PATH)
+		return false;
+
+	fsn_mark = fsnotify_find_mark(group, inode);
+	if (!fsn_mark)
+		return false;
+
+	/* if the event is for a child and this inode doesn't care about
+	 * events on the child, don't send it! */
+	if ((mask & FS_EVENT_ON_CHILD) &&
+	    !(fsn_mark->mask & FS_EVENT_ON_CHILD)) {
+		send = false;
+	} else {
+		/*
+		 * We care about children, but do we care about this particular
+		 * type of event?
+		 */
+		mask = (mask & ~FS_EVENT_ON_CHILD);
+		send = (fsn_mark->mask & mask);
+	}
+
+	/* find took a reference */
+	fsnotify_put_mark(fsn_mark);
+
+	return send;
+}
+
+const struct fsnotify_ops fanotify_fsnotify_ops = {
+	.handle_event = fanotify_handle_event,
+	.should_send_event = fanotify_should_send_event,
+	.free_group_priv = NULL,
+	.free_event_priv = NULL,
+	.freeing_mark = NULL,
+};
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
new file mode 100644
index 0000000..50765eb
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.h
@@ -0,0 +1,12 @@
+#include <linux/fanotify.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/net.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+static inline bool fanotify_mask_valid(__u32 mask)
+{
+	if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS))
+		return false;
+	return true;
+}
-- 
cgit v1.1


From 767cd46c332d1360cdbe46ef18d80c3ade06fdc1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fanotify:drop notification if they exist in the outgoing queue

fanotify listeners get an open file descriptor to the object in question so
the ordering of operations is not as important as in other notification
systems.  inotify will drop events if the last event in the event FIFO is
the same as the current event.  This patch will drop fanotify events if
they are the same as another event anywhere in the event FIFO.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 45 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 3ffb9db..c35c117 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -6,6 +6,45 @@
 
 #include "fanotify.h"
 
+static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+{
+	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
+
+	if ((old->mask == new->mask) &&
+	    (old->to_tell == new->to_tell) &&
+	    (old->data_type == new->data_type)) {
+		switch (old->data_type) {
+		case (FSNOTIFY_EVENT_PATH):
+			if ((old->path.mnt == new->path.mnt) &&
+			    (old->path.dentry == new->path.dentry))
+				return true;
+		case (FSNOTIFY_EVENT_NONE):
+			return true;
+		default:
+			BUG();
+		};
+	}
+	return false;
+}
+
+static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
+{
+	struct fsnotify_event_holder *holder;
+	struct fsnotify_event *test_event;
+
+	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+
+	/* and the list better be locked by something too! */
+
+	list_for_each_entry_reverse(holder, list, event_list) {
+		test_event = holder->event;
+		if (should_merge(test_event, event))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
 static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	int ret;
@@ -21,8 +60,10 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fsnotify_add_notify_event(group, event, NULL, NULL);
-
+	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
+	/* -EEXIST means this event was merged with another, not that it was an error */
+	if (ret == -EEXIST)
+		ret = 0;
 	return ret;
 }
 
-- 
cgit v1.1


From a12a7dd3284f5644326af1ea53b35030f205dd29 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fanotify: merge notification events with different masks

Instead of just merging fanotify events if they are exactly the same, merge
notification events with different masks.  To do this we have to clone the
old event, update the mask in the new event with the new merged mask, and
put the new event in place of the old event.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index c35c117..8e574d6 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -10,8 +10,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 {
 	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
 
-	if ((old->mask == new->mask) &&
-	    (old->to_tell == new->to_tell) &&
+	if ((old->to_tell == new->to_tell) &&
 	    (old->data_type == new->data_type)) {
 		switch (old->data_type) {
 		case (FSNOTIFY_EVENT_PATH):
@@ -29,20 +28,42 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 
 static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 {
-	struct fsnotify_event_holder *holder;
+	struct fsnotify_event_holder *test_holder;
 	struct fsnotify_event *test_event;
+	struct fsnotify_event *new_event;
+	int ret = 0;
 
 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
 
 	/* and the list better be locked by something too! */
 
-	list_for_each_entry_reverse(holder, list, event_list) {
-		test_event = holder->event;
-		if (should_merge(test_event, event))
-			return -EEXIST;
+	list_for_each_entry_reverse(test_holder, list, event_list) {
+		test_event = test_holder->event;
+		if (should_merge(test_event, event)) {
+			ret = -EEXIST;
+
+			/* if they are exactly the same we are done */
+			if (test_event->mask == event->mask)
+				goto out;
+
+			/* can't allocate memory, merge was no possible */
+			new_event = fsnotify_clone_event(test_event);
+			if (unlikely(!new_event)) {
+				ret = 0;
+				goto out;
+			}
+
+			/* build new event and replace it on the list */
+			new_event->mask = (test_event->mask | event->mask);
+			fsnotify_replace_event(test_holder, new_event);
+			/* match ref from fsnotify_clone_event() */
+			fsnotify_put_event(new_event);
+
+			break;
+		}
 	}
-
-	return 0;
+out:
+	return ret;
 }
 
 static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
-- 
cgit v1.1


From 9dced01a0939f3e952eca8c21427ceec1f473dcf Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fanotify: do not clone on merge unless needed

Currently if 2 events are going to be merged on the notication queue with
different masks the second event will be cloned and will replace the first
event.  However if this notification queue is the only place referencing
the event in question there is no reason not to just update the event in
place.  We can tell this if the event->refcnt == 1.  Since we hold a
reference for each queue this event is on we know that when refcnt == 1
this is the only queue.  The other concern is that it might be about to be
added to a new queue, but this can't be the case since fsnotify holds a
reference on the event until it is finished adding it to queues.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 8e574d6..5b0b6b4 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -46,6 +46,16 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 			if (test_event->mask == event->mask)
 				goto out;
 
+			/*
+			 * if the refcnt == 1 this is the only queue
+			 * for this event and so we can update the mask
+			 * in place.
+			 */
+			if (atomic_read(&test_event->refcnt) == 1) {
+				test_event->mask |= event->mask;
+				goto out;
+			}
+
 			/* can't allocate memory, merge was no possible */
 			new_event = fsnotify_clone_event(test_event);
 			if (unlikely(!new_event)) {
-- 
cgit v1.1


From 11637e4b7dc098e9a863f0a619d55ebc60f5949e Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:25 -0500
Subject: fanotify: fanotify_init syscall declaration

This patch defines a new syscall fanotify_init() of the form:

int sys_fanotify_init(unsigned int flags, unsigned int event_f_flags,
		      unsigned int priority)

This syscall is used to create and fanotify group.  This is very similar to
the inotify_init() syscall.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/Makefile        |  2 +-
 fs/notify/fanotify/fanotify_user.c | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 fs/notify/fanotify/fanotify_user.c

(limited to 'fs')

diff --git a/fs/notify/fanotify/Makefile b/fs/notify/fanotify/Makefile
index e7d39c0..0999213 100644
--- a/fs/notify/fanotify/Makefile
+++ b/fs/notify/fanotify/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_FANOTIFY)		+= fanotify.o
+obj-$(CONFIG_FANOTIFY)		+= fanotify.o fanotify_user.o
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
new file mode 100644
index 0000000..cf176fc
--- /dev/null
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -0,0 +1,13 @@
+#include <linux/fcntl.h>
+#include <linux/fs.h>
+#include <linux/fsnotify_backend.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+
+#include "fanotify.h"
+
+SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
+		unsigned int, priority)
+{
+	return -ENOSYS;
+}
-- 
cgit v1.1


From 52c923dd079df49f58016a9e56df184b132611d6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: fanotify_init syscall implementation

NAME
	fanotify_init - initialize an fanotify group

SYNOPSIS
	int fanotify_init(unsigned int flags, unsigned int event_f_flags, int priority);

DESCRIPTION
	fanotify_init() initializes a new fanotify instance and returns a file
	descriptor associated with the new fanotify event queue.

	The following values can be OR'd into the flags field:

	FAN_NONBLOCK Set the O_NONBLOCK file status flag on the new open file description.
		Using this flag saves extra calls to fcntl(2) to achieve the same
		result.

	FAN_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file descriptor.
		See the description of the O_CLOEXEC flag in open(2) for reasons why
		this may be useful.

	The event_f_flags argument is unused and must be set to 0

	The priority argument is unused and must be set to 0

RETURN VALUE
	On success, this system call return a new file descriptor. On error, -1 is
	returned, and errno is set to indicate the error.

ERRORS
	EINVAL An invalid value was specified in flags.

	EINVAL A non-zero valid was passed in event_f_flags or in priority

	ENFILE The system limit on the total number of file descriptors has been reached.

	ENOMEM Insufficient kernel memory is available.

CONFORMING TO
	These system calls are Linux-specific.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.h      |  2 ++
 fs/notify/fanotify/fanotify_user.c | 61 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 50765eb..dd656cf 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -4,6 +4,8 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 
+extern const struct fsnotify_ops fanotify_fsnotify_ops;
+
 static inline bool fanotify_mask_valid(__u32 mask)
 {
 	if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS))
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index cf176fc..67c0b5e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,13 +1,72 @@
 #include <linux/fcntl.h>
 #include <linux/fs.h>
+#include <linux/anon_inodes.h>
 #include <linux/fsnotify_backend.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 
 #include "fanotify.h"
 
+static int fanotify_release(struct inode *ignored, struct file *file)
+{
+	struct fsnotify_group *group = file->private_data;
+
+	pr_debug("%s: file=%p group=%p\n", __func__, file, group);
+
+	/* matches the fanotify_init->fsnotify_alloc_group */
+	fsnotify_put_group(group);
+
+	return 0;
+}
+
+static const struct file_operations fanotify_fops = {
+	.poll		= NULL,
+	.read		= NULL,
+	.fasync		= NULL,
+	.release	= fanotify_release,
+	.unlocked_ioctl	= NULL,
+	.compat_ioctl	= NULL,
+};
+
+/* fanotify syscalls */
 SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 		unsigned int, priority)
 {
-	return -ENOSYS;
+	struct fsnotify_group *group;
+	int f_flags, fd;
+
+	pr_debug("%s: flags=%d event_f_flags=%d priority=%d\n",
+		__func__, flags, event_f_flags, priority);
+
+	if (event_f_flags)
+		return -EINVAL;
+	if (priority)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	if (flags & ~FAN_ALL_INIT_FLAGS)
+		return -EINVAL;
+
+	f_flags = (O_RDONLY | FMODE_NONOTIFY);
+	if (flags & FAN_CLOEXEC)
+		f_flags |= O_CLOEXEC;
+	if (flags & FAN_NONBLOCK)
+		f_flags |= O_NONBLOCK;
+
+	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
+	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
+	if (fd < 0)
+		goto out_put_group;
+
+	return fd;
+
+out_put_group:
+	fsnotify_put_group(group);
+	return fd;
 }
-- 
cgit v1.1


From bbaa4168b2d2d8cc674e6d35806e8426aef464b8 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: sys_fanotify_mark declartion

This patch simply declares the new sys_fanotify_mark syscall

int fanotify_mark(int fanotify_fd, unsigned int flags, u64_mask,
		  int dfd const char *pathname)

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 67c0b5e..55d6e37 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -70,3 +70,9 @@ out_put_group:
 	fsnotify_put_group(group);
 	return fd;
 }
+
+SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
+		__u64, mask, int, dfd, const char  __user *, pathname)
+{
+	return -ENOSYS;
+}
-- 
cgit v1.1


From 2a3edf86040a7e15684525a2aadc29f532c51325 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: fanotify_mark syscall implementation

NAME
	fanotify_mark - add, remove, or modify an fanotify mark on a
filesystem object

SYNOPSIS
	int fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask,
			  int dfd, const char *pathname)

DESCRIPTION
	fanotify_mark() is used to add remove or modify a mark on a filesystem
	object.  Marks are used to indicate that the fanotify group is
	interested in events which occur on that object.  At this point in
	time marks may only be added to files and directories.

	fanotify_fd must be a file descriptor returned by fanotify_init()

	The flags field must contain exactly one of the following:

	FAN_MARK_ADD - or the bits in mask and ignored mask into the mark
	FAN_MARK_REMOVE - bitwise remove the bits in mask and ignored mark
		from the mark

	The following values can be OR'd into the flags field:

	FAN_MARK_DONT_FOLLOW - same meaning as O_NOFOLLOW as described in open(2)
	FAN_MARK_ONLYDIR - same meaning as O_DIRECTORY as described in open(2)

	dfd may be any of the following:
	AT_FDCWD: the object will be lookup up based on pathname similar
		to open(2)

	file descriptor of a directory: if pathname is not NULL the
		object to modify will be lookup up similar to openat(2)

	file descriptor of the final object: if pathname is NULL the
		object to modify will be the object referenced by dfd

	The mask is the bitwise OR of the set of events of interest such as:
	FAN_ACCESS		- object was accessed (read)
	FAN_MODIFY		- object was modified (write)
	FAN_CLOSE_WRITE		- object was writable and was closed
	FAN_CLOSE_NOWRITE	- object was read only and was closed
	FAN_OPEN		- object was opened
	FAN_EVENT_ON_CHILD	- interested in objected that happen to
				  children.  Only relavent when the object
				  is a directory
	FAN_Q_OVERFLOW		- event queue overflowed (not implemented)

RETURN VALUE
	On success, this system call returns 0. On error, -1 is
	returned, and errno is set to indicate the error.

ERRORS
	EINVAL An invalid value was specified in flags.

	EINVAL An invalid value was specified in mask.

	EINVAL An invalid value was specified in ignored_mask.

	EINVAL fanotify_fd is not a file descriptor as returned by
	fanotify_init()

	EBADF fanotify_fd is not a valid file descriptor

	EBADF dfd is not a valid file descriptor and path is NULL.

	ENOTDIR dfd is not a directory and path is not NULL

	EACCESS no search permissions on some part of the path

	ENENT file not found

	ENOMEM Insufficient kernel memory is available.

CONFORMING TO
	These system calls are Linux-specific.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.h      |  18 +++
 fs/notify/fanotify/fanotify_user.c | 239 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 256 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index dd656cf..59c3331 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -6,6 +6,24 @@
 
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
+static inline bool fanotify_mark_flags_valid(unsigned int flags)
+{
+	/* must be either and add or a remove */
+	if (!(flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)))
+		return false;
+
+	/* cannot be both add and remove */
+	if ((flags & FAN_MARK_ADD) &&
+	    (flags & FAN_MARK_REMOVE))
+		return false;
+
+	/* cannot have more flags than we know about */
+	if (flags & ~FAN_ALL_MARK_FLAGS)
+		return false;
+
+	return true;
+}
+
 static inline bool fanotify_mask_valid(__u32 mask)
 {
 	if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS))
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 55d6e37..bc4fa48 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,12 +1,18 @@
 #include <linux/fcntl.h>
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/anon_inodes.h>
 #include <linux/fsnotify_backend.h>
+#include <linux/init.h>
+#include <linux/namei.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/types.h>
 
 #include "fanotify.h"
 
+static struct kmem_cache *fanotify_mark_cache __read_mostly;
+
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
@@ -28,6 +34,185 @@ static const struct file_operations fanotify_fops = {
 	.compat_ioctl	= NULL,
 };
 
+static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
+{
+	kmem_cache_free(fanotify_mark_cache, fsn_mark);
+}
+
+static int fanotify_find_path(int dfd, const char __user *filename,
+			      struct path *path, unsigned int flags)
+{
+	int ret;
+
+	pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
+		 dfd, filename, flags);
+
+	if (filename == NULL) {
+		struct file *file;
+		int fput_needed;
+
+		ret = -EBADF;
+		file = fget_light(dfd, &fput_needed);
+		if (!file)
+			goto out;
+
+		ret = -ENOTDIR;
+		if ((flags & FAN_MARK_ONLYDIR) &&
+		    !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
+			fput_light(file, fput_needed);
+			goto out;
+		}
+
+		*path = file->f_path;
+		path_get(path);
+		fput_light(file, fput_needed);
+	} else {
+		unsigned int lookup_flags = 0;
+
+		if (!(flags & FAN_MARK_DONT_FOLLOW))
+			lookup_flags |= LOOKUP_FOLLOW;
+		if (flags & FAN_MARK_ONLYDIR)
+			lookup_flags |= LOOKUP_DIRECTORY;
+
+		ret = user_path_at(dfd, filename, lookup_flags, path);
+		if (ret)
+			goto out;
+	}
+
+	/* you can only watch an inode if you have read permissions on it */
+	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (ret)
+		path_put(path);
+out:
+	return ret;
+}
+
+static int fanotify_remove_mark(struct fsnotify_group *group,
+				struct inode *inode,
+				__u32 mask)
+{
+	struct fsnotify_mark *fsn_mark;
+	__u32 new_mask;
+
+	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
+		 group, inode, mask);
+
+	fsn_mark = fsnotify_find_mark(group, inode);
+	if (!fsn_mark)
+		return -ENOENT;
+
+	spin_lock(&fsn_mark->lock);
+	fsn_mark->mask &= ~mask;
+	new_mask = fsn_mark->mask;
+	spin_unlock(&fsn_mark->lock);
+
+	if (!new_mask)
+		fsnotify_destroy_mark(fsn_mark);
+	else
+		fsnotify_recalc_inode_mask(inode);
+
+	fsnotify_recalc_group_mask(group);
+
+	/* matches the fsnotify_find_mark() */
+	fsnotify_put_mark(fsn_mark);
+
+	return 0;
+}
+
+static int fanotify_add_mark(struct fsnotify_group *group,
+			     struct inode *inode,
+			     __u32 mask)
+{
+	struct fsnotify_mark *fsn_mark;
+	__u32 old_mask, new_mask;
+	int ret;
+
+	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
+		 group, inode, mask);
+
+	fsn_mark = fsnotify_find_mark(group, inode);
+	if (!fsn_mark) {
+		struct fsnotify_mark *new_fsn_mark;
+
+		ret = -ENOMEM;
+		new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!new_fsn_mark)
+			goto out;
+
+		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(new_fsn_mark, group, inode, 0);
+		if (ret) {
+			fanotify_free_mark(new_fsn_mark);
+			goto out;
+		}
+
+		fsn_mark = new_fsn_mark;
+	}
+
+	ret = 0;
+
+	spin_lock(&fsn_mark->lock);
+	old_mask = fsn_mark->mask;
+	fsn_mark->mask |= mask;
+	new_mask = fsn_mark->mask;
+	spin_unlock(&fsn_mark->lock);
+
+	/* we made changes to a mask, update the group mask and the inode mask
+	 * so things happen quickly. */
+	if (old_mask != new_mask) {
+		/* more bits in old than in new? */
+		int dropped = (old_mask & ~new_mask);
+		/* more bits in this mark than the inode's mask? */
+		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
+		/* more bits in this mark than the group? */
+		int do_group = (new_mask & ~group->mask);
+
+		/* update the inode with this new mark */
+		if (dropped || do_inode)
+			fsnotify_recalc_inode_mask(inode);
+
+		/* update the group mask with the new mask */
+		if (dropped || do_group)
+			fsnotify_recalc_group_mask(group);
+	}
+
+	/* match the init or the find.... */
+	fsnotify_put_mark(fsn_mark);
+out:
+	return ret;
+}
+
+static int fanotify_update_mark(struct fsnotify_group *group,
+				struct inode *inode, int flags,
+				__u32 mask)
+{
+	pr_debug("%s: group=%p inode=%p flags=%x mask=%x\n", __func__,
+		 group, inode, flags, mask);
+
+	if (flags & FAN_MARK_ADD)
+		fanotify_add_mark(group, inode, mask);
+	else if (flags & FAN_MARK_REMOVE)
+		fanotify_remove_mark(group, inode, mask);
+	else
+		BUG();
+
+	return 0;
+}
+
+static bool fanotify_mark_validate_input(int flags,
+					 __u32 mask)
+{
+	pr_debug("%s: flags=%x mask=%x\n", __func__, flags, mask);
+
+	/* are flags valid of this operation? */
+	if (!fanotify_mark_flags_valid(flags))
+		return false;
+	/* is the mask valid? */
+	if (!fanotify_mask_valid(mask))
+		return false;
+	return true;
+}
+
 /* fanotify syscalls */
 SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 		unsigned int, priority)
@@ -74,5 +259,57 @@ out_put_group:
 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
 		__u64, mask, int, dfd, const char  __user *, pathname)
 {
-	return -ENOSYS;
+	struct inode *inode;
+	struct fsnotify_group *group;
+	struct file *filp;
+	struct path path;
+	int ret, fput_needed;
+
+	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
+		 __func__, fanotify_fd, flags, dfd, pathname, mask);
+
+	/* we only use the lower 32 bits as of right now. */
+	if (mask & ((__u64)0xffffffff << 32))
+		return -EINVAL;
+
+	if (!fanotify_mark_validate_input(flags, mask))
+		return -EINVAL;
+
+	filp = fget_light(fanotify_fd, &fput_needed);
+	if (unlikely(!filp))
+		return -EBADF;
+
+	/* verify that this is indeed an fanotify instance */
+	ret = -EINVAL;
+	if (unlikely(filp->f_op != &fanotify_fops))
+		goto fput_and_out;
+
+	ret = fanotify_find_path(dfd, pathname, &path, flags);
+	if (ret)
+		goto fput_and_out;
+
+	/* inode held in place by reference to path; group by fget on fd */
+	inode = path.dentry->d_inode;
+	group = filp->private_data;
+
+	/* create/update an inode mark */
+	ret = fanotify_update_mark(group, inode, flags, mask);
+
+	path_put(&path);
+fput_and_out:
+	fput_light(filp, fput_needed);
+	return ret;
+}
+
+/*
+ * fanotify_user_setup - Our initialization function.  Note that we cannnot return
+ * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
+ * must result in panic().
+ */
+static int __init fanotify_user_setup(void)
+{
+	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+
+	return 0;
 }
+device_initcall(fanotify_user_setup);
-- 
cgit v1.1


From a1014f102322398e67524b68b3300acf384e6c1f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: send events using read

Send events to userspace by reading the file descriptor from fanotify_init().
One will get blocks of data which look like:

struct fanotify_event_metadata {
	__u32 event_len;
	__u32 vers;
	__s32 fd;
	__u64 mask;
	__s64 pid;
	__u64 cookie;
} __attribute__ ((packed));

Simple code to retrieve and deal with events is below

	while ((len = read(fan_fd, buf, sizeof(buf))) > 0) {
		struct fanotify_event_metadata *metadata;

		metadata = (void *)buf;
		while(FAN_EVENT_OK(metadata, len)) {
			[PROCESS HERE!!]
			if (metadata->fd >= 0 && close(metadata->fd) != 0)
				goto fail;
			metadata = FAN_EVENT_NEXT(metadata, len);
		}
	}

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.h      |   5 +
 fs/notify/fanotify/fanotify_user.c | 220 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 221 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 59c3331..5608783 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -30,3 +30,8 @@ static inline bool fanotify_mask_valid(__u32 mask)
 		return false;
 	return true;
 }
+
+static inline __u32 fanotify_outgoing_mask(__u32 mask)
+{
+	return mask & FAN_ALL_OUTGOING_EVENTS;
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bc4fa48..a99550f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -4,15 +4,202 @@
 #include <linux/anon_inodes.h>
 #include <linux/fsnotify_backend.h>
 #include <linux/init.h>
+#include <linux/mount.h>
 #include <linux/namei.h>
+#include <linux/poll.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include <asm/ioctls.h>
 
 #include "fanotify.h"
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
 
+/*
+ * Get an fsnotify notification event if one exists and is small
+ * enough to fit in "count". Return an error pointer if the count
+ * is not large enough.
+ *
+ * Called with the group->notification_mutex held.
+ */
+static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
+					    size_t count)
+{
+	BUG_ON(!mutex_is_locked(&group->notification_mutex));
+
+	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
+
+	if (fsnotify_notify_queue_is_empty(group))
+		return NULL;
+
+	if (FAN_EVENT_METADATA_LEN > count)
+		return ERR_PTR(-EINVAL);
+
+	/* held the notification_mutex the whole time, so this is the
+	 * same event we peeked above */
+	return fsnotify_remove_notify_event(group);
+}
+
+static int create_and_fill_fd(struct fsnotify_group *group,
+			      struct fanotify_event_metadata *metadata,
+			      struct fsnotify_event *event)
+{
+	int client_fd;
+	struct dentry *dentry;
+	struct vfsmount *mnt;
+	struct file *new_file;
+
+	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group,
+		 metadata, event);
+
+	client_fd = get_unused_fd();
+	if (client_fd < 0)
+		return client_fd;
+
+	if (event->data_type != FSNOTIFY_EVENT_PATH) {
+		WARN_ON(1);
+		put_unused_fd(client_fd);
+		return -EINVAL;
+	}
+
+	/*
+	 * we need a new file handle for the userspace program so it can read even if it was
+	 * originally opened O_WRONLY.
+	 */
+	dentry = dget(event->path.dentry);
+	mnt = mntget(event->path.mnt);
+	/* it's possible this event was an overflow event.  in that case dentry and mnt
+	 * are NULL;  That's fine, just don't call dentry open */
+	if (dentry && mnt)
+		new_file = dentry_open(dentry, mnt,
+				       O_RDONLY | O_LARGEFILE | FMODE_NONOTIFY,
+				       current_cred());
+	else
+		new_file = ERR_PTR(-EOVERFLOW);
+	if (IS_ERR(new_file)) {
+		/*
+		 * we still send an event even if we can't open the file.  this
+		 * can happen when say tasks are gone and we try to open their
+		 * /proc files or we try to open a WRONLY file like in sysfs
+		 * we just send the errno to userspace since there isn't much
+		 * else we can do.
+		 */
+		put_unused_fd(client_fd);
+		client_fd = PTR_ERR(new_file);
+	} else {
+		fd_install(client_fd, new_file);
+	}
+
+	metadata->fd = client_fd;
+
+	return 0;
+}
+
+static ssize_t fill_event_metadata(struct fsnotify_group *group,
+				   struct fanotify_event_metadata *metadata,
+				   struct fsnotify_event *event)
+{
+	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
+		 group, metadata, event);
+
+	metadata->event_len = FAN_EVENT_METADATA_LEN;
+	metadata->vers = FANOTIFY_METADATA_VERSION;
+	metadata->mask = fanotify_outgoing_mask(event->mask);
+
+	return create_and_fill_fd(group, metadata, event);
+
+}
+
+static ssize_t copy_event_to_user(struct fsnotify_group *group,
+				  struct fsnotify_event *event,
+				  char __user *buf)
+{
+	struct fanotify_event_metadata fanotify_event_metadata;
+	int ret;
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+		return -EFAULT;
+
+	return FAN_EVENT_METADATA_LEN;
+}
+
+/* intofiy userspace file descriptor functions */
+static unsigned int fanotify_poll(struct file *file, poll_table *wait)
+{
+	struct fsnotify_group *group = file->private_data;
+	int ret = 0;
+
+	poll_wait(file, &group->notification_waitq, wait);
+	mutex_lock(&group->notification_mutex);
+	if (!fsnotify_notify_queue_is_empty(group))
+		ret = POLLIN | POLLRDNORM;
+	mutex_unlock(&group->notification_mutex);
+
+	return ret;
+}
+
+static ssize_t fanotify_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event *kevent;
+	char __user *start;
+	int ret;
+	DEFINE_WAIT(wait);
+
+	start = buf;
+	group = file->private_data;
+
+	pr_debug("%s: group=%p\n", __func__, group);
+
+	while (1) {
+		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
+
+		mutex_lock(&group->notification_mutex);
+		kevent = get_one_event(group, count);
+		mutex_unlock(&group->notification_mutex);
+
+		if (kevent) {
+			ret = PTR_ERR(kevent);
+			if (IS_ERR(kevent))
+				break;
+			ret = copy_event_to_user(group, kevent, buf);
+			fsnotify_put_event(kevent);
+			if (ret < 0)
+				break;
+			buf += ret;
+			count -= ret;
+			continue;
+		}
+
+		ret = -EAGAIN;
+		if (file->f_flags & O_NONBLOCK)
+			break;
+		ret = -EINTR;
+		if (signal_pending(current))
+			break;
+
+		if (start != buf)
+			break;
+
+		schedule();
+	}
+
+	finish_wait(&group->notification_waitq, &wait);
+	if (start != buf && ret != -EFAULT)
+		ret = buf - start;
+	return ret;
+}
+
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
@@ -25,13 +212,38 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	return 0;
 }
 
+static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct fsnotify_group *group;
+	struct fsnotify_event_holder *holder;
+	void __user *p;
+	int ret = -ENOTTY;
+	size_t send_len = 0;
+
+	group = file->private_data;
+
+	p = (void __user *) arg;
+
+	switch (cmd) {
+	case FIONREAD:
+		mutex_lock(&group->notification_mutex);
+		list_for_each_entry(holder, &group->notification_list, event_list)
+			send_len += FAN_EVENT_METADATA_LEN;
+		mutex_unlock(&group->notification_mutex);
+		ret = put_user(send_len, (int __user *) p);
+		break;
+	}
+
+	return ret;
+}
+
 static const struct file_operations fanotify_fops = {
-	.poll		= NULL,
-	.read		= NULL,
+	.poll		= fanotify_poll,
+	.read		= fanotify_read,
 	.fasync		= NULL,
 	.release	= fanotify_release,
-	.unlocked_ioctl	= NULL,
-	.compat_ioctl	= NULL,
+	.unlocked_ioctl	= fanotify_ioctl,
+	.compat_ioctl	= fanotify_ioctl,
 };
 
 static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
-- 
cgit v1.1


From ef601a9cfd21fe9ce57e0ee3f4a31552ffb96366 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: select ANON_INODES.

fanotify references anon_inode_getfd(), which is only available with
ANON_INODES enabled. Presently this bails out with the following:

  LD      vmlinux
fs/built-in.o: In function `sys_fanotify_init':
(.text+0x26d1c): undefined reference to `anon_inode_getfd'
make: *** [vmlinux] Error 1

which is trivially corrected by adding an ANON_INODES select.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index f9d7ae0..668e5df 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,6 +1,7 @@
 config FANOTIFY
 	bool "Filesystem wide access notification"
 	select FSNOTIFY
+	select ANON_INODES
 	default y
 	---help---
 	   Say Y here to enable fanotify suport.  fanotify is a file access
-- 
cgit v1.1


From 9bbfc964b89009d0cadcec7027afc92ee742e95e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: CONFIG_HAVE_SYSCALL_WRAPPERS for sys_fanotify_mark

Please note that you need the patch below in addition, otherwise the
syscall wrapper stuff won't work on those 32 bit architectures which enable
the wrappers.

When enabled the syscall wrapper defines always take long parameters and then
cast them to whatever is needed. This approach doesn't work for the 32 bit
case where the original syscall takes a long long parameter, since we would
lose the upper 32 bits.
So syscalls with 64 bit arguments are special cases wrt to syscall wrappers
and enp up in the ugliness below (see also sys_fallocate). In addition these
special cased syscall wrappers have the drawback that ftrace syscall tracing
doesn't work on them, since they don't get defined by using the usual macros.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a99550f..a9ced3f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -468,8 +468,9 @@ out_put_group:
 	return fd;
 }
 
-SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
-		__u64, mask, int, dfd, const char  __user *, pathname)
+SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
+			      __u64 mask, int dfd,
+			      const char  __user * pathname)
 {
 	struct inode *inode;
 	struct fsnotify_group *group;
@@ -513,6 +514,17 @@ fput_and_out:
 	return ret;
 }
 
+#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+asmlinkage long SyS_fanotify_mark(long fanotify_fd, long flags, __u64 mask,
+				  long dfd, long pathname)
+{
+	return SYSC_fanotify_mark((int) fanotify_fd, (unsigned int) flags,
+				  mask, (int) dfd,
+				  (const char  __user *) pathname);
+}
+SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
+#endif
+
 /*
  * fanotify_user_setup - Our initialization function.  Note that we cannnot return
  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
-- 
cgit v1.1


From 22aa425dec9e47051624714ae283eb2b6a473013 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:26 -0500
Subject: fanotify: create_fd cleanup

Code cleanup which does the fd creation work seperately from the userspace
metadata creation.  It fits better with the other code.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a9ced3f..cf9c300 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -43,17 +43,14 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	return fsnotify_remove_notify_event(group);
 }
 
-static int create_and_fill_fd(struct fsnotify_group *group,
-			      struct fanotify_event_metadata *metadata,
-			      struct fsnotify_event *event)
+static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	int client_fd;
 	struct dentry *dentry;
 	struct vfsmount *mnt;
 	struct file *new_file;
 
-	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group,
-		 metadata, event);
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
 	client_fd = get_unused_fd();
 	if (client_fd < 0)
@@ -93,9 +90,7 @@ static int create_and_fill_fd(struct fsnotify_group *group,
 		fd_install(client_fd, new_file);
 	}
 
-	metadata->fd = client_fd;
-
-	return 0;
+	return client_fd;
 }
 
 static ssize_t fill_event_metadata(struct fsnotify_group *group,
@@ -108,9 +103,9 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group,
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->mask = fanotify_outgoing_mask(event->mask);
+	metadata->fd = create_fd(group, event);
 
-	return create_and_fill_fd(group, metadata, event);
-
+	return metadata->fd;
 }
 
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
@@ -123,7 +118,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
 	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
-- 
cgit v1.1


From 32c3263221bd63316815286dccacdc7abfd7f3c4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fanotify: Add pids to events

Pass the process identifiers of the triggering processes to fanotify
listeners: this information is useful for event filtering and logging.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      | 5 +++--
 fs/notify/fanotify/fanotify_user.c | 1 +
 fs/notify/notification.c           | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 5b0b6b4..881067d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -10,8 +10,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 {
 	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
 
-	if ((old->to_tell == new->to_tell) &&
-	    (old->data_type == new->data_type)) {
+	if (old->to_tell == new->to_tell &&
+	    old->data_type == new->data_type &&
+	    old->tgid == new->tgid) {
 		switch (old->data_type) {
 		case (FSNOTIFY_EVENT_PATH):
 			if ((old->path.mnt == new->path.mnt) &&
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index cf9c300..66e38fc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -103,6 +103,7 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group,
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->mask = fanotify_outgoing_mask(event->mask);
+	metadata->pid = pid_vnr(event->tgid);
 	metadata->fd = create_fd(group, event);
 
 	return metadata->fd;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 066f1f9..7fc8d00 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -93,6 +93,7 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		BUG_ON(!list_empty(&event->private_data_list));
 
 		kfree(event->file_name);
+		put_pid(event->tgid);
 		kmem_cache_free(fsnotify_event_cachep, event);
 	}
 }
@@ -346,6 +347,7 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
 			return NULL;
 		}
 	}
+	event->tgid = get_pid(old_event->tgid);
 	if (event->data_type == FSNOTIFY_EVENT_PATH)
 		path_get(&event->path);
 
@@ -385,6 +387,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		event->name_len = strlen(event->file_name);
 	}
 
+	event->tgid = get_pid(task_tgid(current));
 	event->sync_cookie = cookie;
 	event->to_tell = to_tell;
 	event->data_type = data_type;
-- 
cgit v1.1


From 5444e2981c31d0ed7465475e451b8437084337e5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fsnotify: split generic and inode specific mark code

currently all marking is done by functions in inode-mark.c.  Some of this
is pretty generic and should be instead done in a generic function and we
should only put the inode specific code in inode-mark.c

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/Makefile                   |   3 +-
 fs/notify/dnotify/dnotify.c          |  12 +-
 fs/notify/fanotify/fanotify.c        |   2 +-
 fs/notify/fanotify/fanotify_user.c   |   8 +-
 fs/notify/fsnotify.h                 |   7 +
 fs/notify/inode_mark.c               | 246 +++--------------------------
 fs/notify/inotify/inotify_fsnotify.c |   4 +-
 fs/notify/inotify/inotify_user.c     |   4 +-
 fs/notify/mark.c                     | 294 +++++++++++++++++++++++++++++++++++
 9 files changed, 337 insertions(+), 243 deletions(-)
 create mode 100644 fs/notify/mark.c

(limited to 'fs')

diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 396a387..8f7f3b0 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,5 @@
-obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o
+obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
+				   mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index cac2eb8..69f42df 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -95,7 +95,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 
 	to_tell = event->to_tell;
 
-	fsn_mark = fsnotify_find_mark(group, to_tell);
+	fsn_mark = fsnotify_find_inode_mark(group, to_tell);
 	if (unlikely(!fsn_mark))
 		return 0;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -143,14 +143,14 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	if (!S_ISDIR(inode->i_mode))
 		return false;
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return false;
 
 	mask = (mask & ~FS_EVENT_ON_CHILD);
 	send = (mask & fsn_mark->mask);
 
-	fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_mark */
+	fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_inode_mark */
 
 	return send;
 }
@@ -193,7 +193,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	if (!S_ISDIR(inode->i_mode))
 		return;
 
-	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
 	if (!fsn_mark)
 		return;
 	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -346,12 +346,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 	mutex_lock(&dnotify_mark_mutex);
 
 	/* add the new_fsn_mark or find an old one. */
-	fsn_mark = fsnotify_find_mark(dnotify_group, inode);
+	fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
 	if (fsn_mark) {
 		dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
 		spin_lock(&fsn_mark->lock);
 	} else {
-		fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, 0);
+		fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
 		spin_lock(&new_fsn_mark->lock);
 		fsn_mark = new_fsn_mark;
 		dn_mark = new_dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 881067d..aa5e926 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -118,7 +118,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 	if (data_type != FSNOTIFY_EVENT_PATH)
 		return false;
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return false;
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 66e38fc..0535193 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -305,7 +305,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
 	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
 		 group, inode, mask);
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return -ENOENT;
 
@@ -321,7 +321,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
 
 	fsnotify_recalc_group_mask(group);
 
-	/* matches the fsnotify_find_mark() */
+	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 
 	return 0;
@@ -338,7 +338,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
 		 group, inode, mask);
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
 		struct fsnotify_mark *new_fsn_mark;
 
@@ -348,7 +348,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 			goto out;
 
 		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
-		ret = fsnotify_add_mark(new_fsn_mark, group, inode, 0);
+		ret = fsnotify_add_mark(new_fsn_mark, group, inode, NULL, 0);
 		if (ret) {
 			fanotify_free_mark(new_fsn_mark);
 			goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 2ba5915..7c7a904 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -20,6 +20,11 @@ extern __u32 fsnotify_vfsmount_mask;
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+/* add a mark to an inode */
+extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
+				   struct fsnotify_group *group, struct inode *inode,
+				   int allow_dups);
+
 /* add a group to the inode group list */
 extern void fsnotify_add_inode_group(struct fsnotify_group *group);
 /* add a group to the vfsmount group list */
@@ -27,6 +32,8 @@ extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
+/* inode specific destruction of a mark */
+extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 /*
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index ba6f983..c925579 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -16,72 +16,6 @@
  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-/*
- * fsnotify inode mark locking/lifetime/and refcnting
- *
- * REFCNT:
- * The mark->refcnt tells how many "things" in the kernel currently are
- * referencing this object.  The object typically will live inside the kernel
- * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
- * which can find this object holding the appropriete locks, can take a reference
- * and the object itself is guarenteed to survive until the reference is dropped.
- *
- * LOCKING:
- * There are 3 spinlocks involved with fsnotify inode marks and they MUST
- * be taken in order as follows:
- *
- * mark->lock
- * group->mark_lock
- * inode->i_lock
- *
- * mark->lock protects 2 things, mark->group and mark->inode.  You must hold
- * that lock to dereference either of these things (they could be NULL even with
- * the lock)
- *
- * group->mark_lock protects the marks_list anchored inside a given group
- * and each mark is hooked via the g_list.  It also sorta protects the
- * free_g_list, which when used is anchored by a private list on the stack of the
- * task which held the group->mark_lock.
- *
- * inode->i_lock protects the i_fsnotify_marks list anchored inside a
- * given inode and each mark is hooked via the i_list. (and sorta the
- * free_i_list)
- *
- *
- * LIFETIME:
- * Inode marks survive between when they are added to an inode and when their
- * refcnt==0.
- *
- * The inode mark can be cleared for a number of different reasons including:
- * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
- * - The inode is being evicted from cache. (fsnotify_inode_delete)
- * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
- * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
- * - The fsnotify_group associated with the mark is going away and all such marks
- *   need to be cleaned up. (fsnotify_clear_marks_by_group)
- *
- * Worst case we are given an inode and need to clean up all the marks on that
- * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
- * mark on the list we take a reference (so the mark can't disappear under us).
- * We remove that mark form the inode's list of marks and we add this mark to a
- * private list anchored on the stack using i_free_list;  At this point we no
- * longer fear anything finding the mark using the inode's list of marks.
- *
- * We can safely and locklessly run the private list on the stack of everything
- * we just unattached from the original inode.  For each mark on the private list
- * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
- * we see the group and inode are not NULL we take those locks.  Now holding all
- * 3 locks we can completely remove the mark from other tasks finding it in the
- * future.  Remember, 10 things might already be referencing this mark, but they
- * better be holding a ref.  We drop our reference we took before we unhooked it
- * from the inode.  When the ref hits 0 we can free the mark.
- *
- * Very similarly for freeing by group, except we use free_g_list.
- *
- * This has the very interesting property of being able to run concurrently with
- * any (or all) other directions.
- */
-
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -95,17 +29,6 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-void fsnotify_get_mark(struct fsnotify_mark *mark)
-{
-	atomic_inc(&mark->refcnt);
-}
-
-void fsnotify_put_mark(struct fsnotify_mark *mark)
-{
-	if (atomic_dec_and_test(&mark->refcnt))
-		mark->free_mark(mark);
-}
-
 /*
  * Recalculate the mask of events relevant to a given inode locked.
  */
@@ -135,44 +58,18 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 	__fsnotify_update_child_dentry_flags(inode);
 }
 
-/*
- * Any time a mark is getting freed we end up here.
- * The caller had better be holding a reference to this mark so we don't actually
- * do the final put under the mark->lock
- */
-void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 {
-	struct fsnotify_group *group;
-	struct inode *inode;
-
-	spin_lock(&mark->lock);
+	struct inode *inode = mark->i.inode;
 
-	group = mark->group;
-	inode = mark->i.inode;
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&mark->group->mark_lock);
 
-	BUG_ON(group && !inode);
-	BUG_ON(!group && inode);
-
-	/* if !group something else already marked this to die */
-	if (!group) {
-		spin_unlock(&mark->lock);
-		return;
-	}
-
-	/* 1 from caller and 1 for being on i_list/g_list */
-	BUG_ON(atomic_read(&mark->refcnt) < 2);
-
-	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
 	hlist_del_init(&mark->i.i_list);
 	mark->i.inode = NULL;
 
-	list_del_init(&mark->g_list);
-	mark->group = NULL;
-
-	fsnotify_put_mark(mark); /* for i_list and g_list */
-
 	/*
 	 * this mark is now off the inode->i_fsnotify_marks list and we
 	 * hold the inode->i_lock, so this is the perfect time to update the
@@ -181,61 +78,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	fsnotify_recalc_inode_mask_locked(inode);
 
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&group->mark_lock);
-	spin_unlock(&mark->lock);
-
-	/*
-	 * Some groups like to know that marks are being freed.  This is a
-	 * callback to the group function to let it know that this mark
-	 * is being freed.
-	 */
-	if (group->ops->freeing_mark)
-		group->ops->freeing_mark(mark, group);
-
-	/*
-	 * __fsnotify_update_child_dentry_flags(inode);
-	 *
-	 * I really want to call that, but we can't, we have no idea if the inode
-	 * still exists the second we drop the mark->lock.
-	 *
-	 * The next time an event arrive to this inode from one of it's children
-	 * __fsnotify_parent will see that the inode doesn't care about it's
-	 * children and will update all of these flags then.  So really this
-	 * is just a lazy update (and could be a perf win...)
-	 */
-
-
-	iput(inode);
-
-	/*
-	 * it's possible that this group tried to destroy itself, but this
-	 * this mark was simultaneously being freed by inode.  If that's the
-	 * case, we finish freeing the group here.
-	 */
-	if (unlikely(atomic_dec_and_test(&group->num_marks)))
-		fsnotify_final_destroy_group(group);
-}
-
-/*
- * Given a group, destroy all of the marks associated with that group.
- */
-void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
-{
-	struct fsnotify_mark *lmark, *mark;
-	LIST_HEAD(free_list);
-
-	spin_lock(&group->mark_lock);
-	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
-		list_add(&mark->free_g_list, &free_list);
-		list_del_init(&mark->g_list);
-		fsnotify_get_mark(mark);
-	}
-	spin_unlock(&group->mark_lock);
-
-	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
-		fsnotify_destroy_mark(mark);
-		fsnotify_put_mark(mark);
-	}
 }
 
 /*
@@ -261,8 +103,12 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	}
 }
 
-static struct fsnotify_mark *fsnotify_find_mark_locked(struct fsnotify_group *group,
-						       struct inode *inode)
+/*
+ * given a group and inode, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
+						      struct inode *inode)
 {
 	struct fsnotify_mark *mark;
 	struct hlist_node *pos;
@@ -282,50 +128,26 @@ static struct fsnotify_mark *fsnotify_find_mark_locked(struct fsnotify_group *gr
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
-struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group,
-					 struct inode *inode)
+struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
+					       struct inode *inode)
 {
 	struct fsnotify_mark *mark;
 
 	spin_lock(&inode->i_lock);
-	mark = fsnotify_find_mark_locked(group, inode);
+	mark = fsnotify_find_inode_mark_locked(group, inode);
 	spin_unlock(&inode->i_lock);
 
 	return mark;
 }
 
-void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
-{
-	assert_spin_locked(&old->lock);
-	new->i.inode = old->i.inode;
-	new->group = old->group;
-	new->mask = old->mask;
-	new->free_mark = old->free_mark;
-}
-
-/*
- * Nothing fancy, just initialize lists and locks and counters.
- */
-void fsnotify_init_mark(struct fsnotify_mark *mark,
-			void (*free_mark)(struct fsnotify_mark *mark))
-{
-	spin_lock_init(&mark->lock);
-	atomic_set(&mark->refcnt, 1);
-	INIT_HLIST_NODE(&mark->i.i_list);
-	mark->group = NULL;
-	mark->mask = 0;
-	mark->i.inode = NULL;
-	mark->free_mark = free_mark;
-}
-
 /*
  * Attach an initialized mark mark to a given group and inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.
  */
-int fsnotify_add_mark(struct fsnotify_mark *mark,
-		      struct fsnotify_group *group, struct inode *inode,
-		      int allow_dups)
+int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
+			    struct fsnotify_group *group, struct inode *inode,
+			    int allow_dups)
 {
 	struct fsnotify_mark *lmark = NULL;
 	int ret = 0;
@@ -336,56 +158,26 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 
 	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
 
-	/*
-	 * if this group isn't being testing for inode type events we need
-	 * to start testing
-	 */
-	if (unlikely(list_empty(&group->inode_group_list)))
-		fsnotify_add_inode_group(group);
-	/*
-	 * XXX This is where we could also do the fsnotify_add_vfsmount_group
-	 * if we are setting and vfsmount mark....
-
-	if (unlikely(list_empty(&group->vfsmount_group_list)))
-		fsnotify_add_vfsmount_group(group);
-	 */
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&group->mark_lock);
 
-	/*
-	 * LOCKING ORDER!!!!
-	 * mark->lock
-	 * group->mark_lock
-	 * inode->i_lock
-	 */
-	spin_lock(&mark->lock);
-	spin_lock(&group->mark_lock);
 	spin_lock(&inode->i_lock);
 
 	if (!allow_dups)
-		lmark = fsnotify_find_mark_locked(group, inode);
+		lmark = fsnotify_find_inode_mark_locked(group, inode);
 	if (!lmark) {
-		mark->group = group;
 		mark->i.inode = inode;
 
 		hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks);
-		list_add(&mark->g_list, &group->marks_list);
-
-		fsnotify_get_mark(mark); /* for i_list and g_list */
-
-		atomic_inc(&group->num_marks);
 
 		fsnotify_recalc_inode_mask_locked(inode);
 	}
 
 	spin_unlock(&inode->i_lock);
-	spin_unlock(&group->mark_lock);
-	spin_unlock(&mark->lock);
 
 	if (lmark) {
 		ret = -EEXIST;
 		iput(inode);
-		fsnotify_put_mark(lmark);
-	} else {
-		__fsnotify_update_child_dentry_flags(inode);
 	}
 
 	return ret;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index cc8f6bc..1d237e1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -97,7 +97,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 
 	to_tell = event->to_tell;
 
-	fsn_mark = fsnotify_find_mark(group, to_tell);
+	fsn_mark = fsnotify_find_inode_mark(group, to_tell);
 	/* race with watch removal?  We already passes should_send */
 	if (unlikely(!fsn_mark))
 		return 0;
@@ -145,7 +145,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	struct fsnotify_mark *fsn_mark;
 	bool send;
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return false;
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ad5a1ea..a12315a75 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -566,7 +566,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	if (unlikely(!mask))
 		return -EINVAL;
 
-	fsn_mark = fsnotify_find_mark(group, inode);
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return -ENOENT;
 
@@ -644,7 +644,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 		goto out_err;
 
 	/* we are on the idr, now get on the inode */
-	ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, 0);
+	ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
 	if (ret) {
 		/* we failed to get on the inode, get off the idr */
 		inotify_remove_from_idr(group, tmp_i_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
new file mode 100644
index 0000000..e56e8768
--- /dev/null
+++ b/fs/notify/mark.c
@@ -0,0 +1,294 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * fsnotify inode mark locking/lifetime/and refcnting
+ *
+ * REFCNT:
+ * The mark->refcnt tells how many "things" in the kernel currently are
+ * referencing this object.  The object typically will live inside the kernel
+ * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task
+ * which can find this object holding the appropriete locks, can take a reference
+ * and the object itself is guarenteed to survive until the reference is dropped.
+ *
+ * LOCKING:
+ * There are 3 spinlocks involved with fsnotify inode marks and they MUST
+ * be taken in order as follows:
+ *
+ * mark->lock
+ * group->mark_lock
+ * inode->i_lock
+ *
+ * mark->lock protects 2 things, mark->group and mark->inode.  You must hold
+ * that lock to dereference either of these things (they could be NULL even with
+ * the lock)
+ *
+ * group->mark_lock protects the marks_list anchored inside a given group
+ * and each mark is hooked via the g_list.  It also sorta protects the
+ * free_g_list, which when used is anchored by a private list on the stack of the
+ * task which held the group->mark_lock.
+ *
+ * inode->i_lock protects the i_fsnotify_marks list anchored inside a
+ * given inode and each mark is hooked via the i_list. (and sorta the
+ * free_i_list)
+ *
+ *
+ * LIFETIME:
+ * Inode marks survive between when they are added to an inode and when their
+ * refcnt==0.
+ *
+ * The inode mark can be cleared for a number of different reasons including:
+ * - The inode is unlinked for the last time.  (fsnotify_inode_remove)
+ * - The inode is being evicted from cache. (fsnotify_inode_delete)
+ * - The fs the inode is on is unmounted.  (fsnotify_inode_delete/fsnotify_unmount_inodes)
+ * - Something explicitly requests that it be removed.  (fsnotify_destroy_mark)
+ * - The fsnotify_group associated with the mark is going away and all such marks
+ *   need to be cleaned up. (fsnotify_clear_marks_by_group)
+ *
+ * Worst case we are given an inode and need to clean up all the marks on that
+ * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each
+ * mark on the list we take a reference (so the mark can't disappear under us).
+ * We remove that mark form the inode's list of marks and we add this mark to a
+ * private list anchored on the stack using i_free_list;  At this point we no
+ * longer fear anything finding the mark using the inode's list of marks.
+ *
+ * We can safely and locklessly run the private list on the stack of everything
+ * we just unattached from the original inode.  For each mark on the private list
+ * we grab the mark-> and can thus dereference mark->group and mark->inode.  If
+ * we see the group and inode are not NULL we take those locks.  Now holding all
+ * 3 locks we can completely remove the mark from other tasks finding it in the
+ * future.  Remember, 10 things might already be referencing this mark, but they
+ * better be holding a ref.  We drop our reference we took before we unhooked it
+ * from the inode.  When the ref hits 0 we can free the mark.
+ *
+ * Very similarly for freeing by group, except we use free_g_list.
+ *
+ * This has the very interesting property of being able to run concurrently with
+ * any (or all) other directions.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_get_mark(struct fsnotify_mark *mark)
+{
+	atomic_inc(&mark->refcnt);
+}
+
+void fsnotify_put_mark(struct fsnotify_mark *mark)
+{
+	if (atomic_dec_and_test(&mark->refcnt))
+		mark->free_mark(mark);
+}
+
+/*
+ * Any time a mark is getting freed we end up here.
+ * The caller had better be holding a reference to this mark so we don't actually
+ * do the final put under the mark->lock
+ */
+void fsnotify_destroy_mark(struct fsnotify_mark *mark)
+{
+	struct fsnotify_group *group;
+	struct inode *inode;
+
+	spin_lock(&mark->lock);
+
+	group = mark->group;
+	inode = mark->i.inode;
+
+	BUG_ON(group && !inode);
+	BUG_ON(!group && inode);
+
+	/* if !group something else already marked this to die */
+	if (!group) {
+		spin_unlock(&mark->lock);
+		return;
+	}
+
+	/* 1 from caller and 1 for being on i_list/g_list */
+	BUG_ON(atomic_read(&mark->refcnt) < 2);
+
+	spin_lock(&group->mark_lock);
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
+		fsnotify_destroy_inode_mark(mark);
+	else
+		BUG();
+
+	list_del_init(&mark->g_list);
+	mark->group = NULL;
+
+	fsnotify_put_mark(mark); /* for i_list and g_list */
+
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&mark->lock);
+
+	/*
+	 * Some groups like to know that marks are being freed.  This is a
+	 * callback to the group function to let it know that this mark
+	 * is being freed.
+	 */
+	if (group->ops->freeing_mark)
+		group->ops->freeing_mark(mark, group);
+
+	/*
+	 * __fsnotify_update_child_dentry_flags(inode);
+	 *
+	 * I really want to call that, but we can't, we have no idea if the inode
+	 * still exists the second we drop the mark->lock.
+	 *
+	 * The next time an event arrive to this inode from one of it's children
+	 * __fsnotify_parent will see that the inode doesn't care about it's
+	 * children and will update all of these flags then.  So really this
+	 * is just a lazy update (and could be a perf win...)
+	 */
+
+
+	iput(inode);
+
+	/*
+	 * it's possible that this group tried to destroy itself, but this
+	 * this mark was simultaneously being freed by inode.  If that's the
+	 * case, we finish freeing the group here.
+	 */
+	if (unlikely(atomic_dec_and_test(&group->num_marks)))
+		fsnotify_final_destroy_group(group);
+}
+
+/*
+ * Attach an initialized mark to a given group and fs object.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which group.
+ */
+int fsnotify_add_mark(struct fsnotify_mark *mark,
+		      struct fsnotify_group *group, struct inode *inode,
+		      struct vfsmount *mnt, int allow_dups)
+{
+	int ret = 0;
+
+	BUG_ON(mnt);
+	BUG_ON(inode && mnt);
+	BUG_ON(!inode && !mnt);
+
+	/*
+	 * if this group isn't being testing for inode type events we need
+	 * to start testing
+	 */
+	if (inode && unlikely(list_empty(&group->inode_group_list)))
+		fsnotify_add_inode_group(group);
+	else if (mnt && unlikely(list_empty(&group->vfsmount_group_list)))
+		fsnotify_add_vfsmount_group(group);
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * mark->lock
+	 * group->mark_lock
+	 * inode->i_lock
+	 */
+	spin_lock(&mark->lock);
+	spin_lock(&group->mark_lock);
+
+	mark->group = group;
+	list_add(&mark->g_list, &group->marks_list);
+	atomic_inc(&group->num_marks);
+	fsnotify_get_mark(mark); /* for i_list and g_list */
+
+	if (inode) {
+		ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
+		if (ret)
+			goto err;
+	} else {
+		BUG();
+	}
+
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&mark->lock);
+
+	if (inode)
+		__fsnotify_update_child_dentry_flags(inode);
+
+	return ret;
+err:
+	mark->group = NULL;
+	list_del_init(&mark->g_list);
+	atomic_dec(&group->num_marks);
+	fsnotify_put_mark(mark);
+
+	spin_unlock(&group->mark_lock);
+	spin_unlock(&mark->lock);
+
+	return ret;
+}
+
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	struct fsnotify_mark *lmark, *mark;
+	LIST_HEAD(free_list);
+
+	spin_lock(&group->mark_lock);
+	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
+		list_add(&mark->free_g_list, &free_list);
+		list_del_init(&mark->g_list);
+		fsnotify_get_mark(mark);
+	}
+	spin_unlock(&group->mark_lock);
+
+	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
+	}
+}
+
+void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
+{
+	assert_spin_locked(&old->lock);
+	new->i.inode = old->i.inode;
+	new->m.mnt = old->m.mnt;
+	new->group = old->group;
+	new->mask = old->mask;
+	new->free_mark = old->free_mark;
+}
+
+/*
+ * Nothing fancy, just initialize lists and locks and counters.
+ */
+void fsnotify_init_mark(struct fsnotify_mark *mark,
+			void (*free_mark)(struct fsnotify_mark *mark))
+{
+	spin_lock_init(&mark->lock);
+	atomic_set(&mark->refcnt, 1);
+	INIT_HLIST_NODE(&mark->i.i_list);
+	mark->group = NULL;
+	mark->mask = 0;
+	mark->i.inode = NULL;
+	mark->free_mark = free_mark;
+}
-- 
cgit v1.1


From ba643f04cdda170215c8820acd3e201936fc512d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fsnotify: clear marks to 0 in fsnotify_init_mark

Currently fsnotify_init_mark sets some fields to 0/NULL.  Some users
already used some sorts of zalloc, some didn't.  This patch uses memset to
explicitly zero everything in the fsnotify_mark when it is initialized so we
don't have to be careful if fields are later added to marks.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index e56e8768..57bb1d7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -284,11 +284,8 @@ void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *ol
 void fsnotify_init_mark(struct fsnotify_mark *mark,
 			void (*free_mark)(struct fsnotify_mark *mark))
 {
+	memset(mark, 0, sizeof(*mark));
 	spin_lock_init(&mark->lock);
 	atomic_set(&mark->refcnt, 1);
-	INIT_HLIST_NODE(&mark->i.i_list);
-	mark->group = NULL;
-	mark->mask = 0;
-	mark->i.inode = NULL;
 	mark->free_mark = free_mark;
 }
-- 
cgit v1.1


From 2504c5d63b811b71bbaa8d5d5af163e698f4df1f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fsnotify/vfsmount: add fsnotify fields to struct vfsmount

This patch adds the list and mask fields needed to support vfsmount marks.
These are the same fields fsnotify needs on an inode.  They are not used,
just declared and we note where the cleanup hook should be (the function is
not yet defined)

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/namespace.c       | 4 ++++
 fs/notify/fsnotify.c | 4 +---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 88058de..a2d681a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -29,6 +29,7 @@
 #include <linux/log2.h>
 #include <linux/idr.h>
 #include <linux/fs_struct.h>
+#include <linux/fsnotify.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include "pnode.h"
@@ -150,6 +151,9 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_share);
 		INIT_LIST_HEAD(&mnt->mnt_slave_list);
 		INIT_LIST_HEAD(&mnt->mnt_slave);
+#ifdef CONFIG_FSNOTIFY
+		INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
+#endif
 #ifdef CONFIG_SMP
 		mnt->mnt_writers = alloc_percpu(int);
 		if (!mnt->mnt_writers)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 60e84fd..e0bf869 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -163,9 +163,7 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
 	if (!mnt)
 		return false;
 
-	/* hook in this when mnt->mnt_fsnotify_mask is defined */
-	/* return (test_mask & path->mnt->mnt_fsnotify_mask); */
-	return false;
+	return (test_mask & mnt->mnt_fsnotify_mask);
 }
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
-- 
cgit v1.1


From 0d48b7f01f442bc88a69aa98f3b6b015f2817608 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fsnotify: vfsmount marks generic functions

Much like inode-mark.c has all of the code dealing with marks on inodes
this patch adds a vfsmount-mark.c which has similar code but is intended
for marks on vfsmounts.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/Makefile        |   2 +-
 fs/notify/fsnotify.h      |   6 ++
 fs/notify/mark.c          |  20 +++---
 fs/notify/vfsmount_mark.c | 171 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 189 insertions(+), 10 deletions(-)
 create mode 100644 fs/notify/vfsmount_mark.c

(limited to 'fs')

diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 8f7f3b0..ae5f33a 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,5 +1,5 @@
 obj-$(CONFIG_FSNOTIFY)		+= fsnotify.o notification.o group.o inode_mark.o \
-				   mark.o
+				   mark.o vfsmount_mark.o
 
 obj-y			+= dnotify/
 obj-y			+= inotify/
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7c7a904..38f3fb5 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -24,6 +24,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
 extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 				   struct fsnotify_group *group, struct inode *inode,
 				   int allow_dups);
+/* add a mark to a vfsmount */
+extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
+				      struct fsnotify_group *group, struct vfsmount *mnt,
+				      int allow_dups);
 
 /* add a group to the inode group list */
 extern void fsnotify_add_inode_group(struct fsnotify_group *group);
@@ -32,6 +36,8 @@ extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
+/* vfsmount specific destruction of a mark */
+extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
 /* inode specific destruction of a mark */
 extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
 /* run the list of all marks associated with inode and flag them to be freed */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 57bb1d7..d296ec9 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -115,15 +115,11 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_group *group;
-	struct inode *inode;
+	struct inode *inode = NULL;
 
 	spin_lock(&mark->lock);
 
 	group = mark->group;
-	inode = mark->i.inode;
-
-	BUG_ON(group && !inode);
-	BUG_ON(!group && inode);
 
 	/* if !group something else already marked this to die */
 	if (!group) {
@@ -136,8 +132,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 
 	spin_lock(&group->mark_lock);
 
-	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 		fsnotify_destroy_inode_mark(mark);
+		inode = mark->i.inode;
+	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
+		fsnotify_destroy_vfsmount_mark(mark);
 	else
 		BUG();
 
@@ -169,8 +168,8 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-
-	iput(inode);
+	if (inode)
+		iput(inode);
 
 	/*
 	 * it's possible that this group tried to destroy itself, but this
@@ -192,7 +191,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 {
 	int ret = 0;
 
-	BUG_ON(mnt);
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
 
@@ -223,6 +221,10 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 		ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
 		if (ret)
 			goto err;
+	} else if (mnt) {
+		ret = fsnotify_add_vfsmount_mark(mark, group, mnt, allow_dups);
+		if (ret)
+			goto err;
 	} else {
 		BUG();
 	}
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
new file mode 100644
index 0000000..1b61d0a
--- /dev/null
+++ b/fs/notify/vfsmount_mark.c
@@ -0,0 +1,171 @@
+/*
+ *  Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/writeback.h> /* for inode_lock */
+
+#include <asm/atomic.h>
+
+#include <linux/fsnotify_backend.h>
+#include "fsnotify.h"
+
+void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark, *lmark;
+	struct hlist_node *pos, *n;
+	LIST_HEAD(free_list);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+	hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
+		list_add(&mark->m.free_m_list, &free_list);
+		hlist_del_init(&mark->m.m_list);
+		fsnotify_get_mark(mark);
+	}
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
+		fsnotify_destroy_mark(mark);
+		fsnotify_put_mark(mark);
+	}
+}
+
+/*
+ * Recalculate the mask of events relevant to a given vfsmount locked.
+ */
+static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+	struct hlist_node *pos;
+	__u32 new_mask = 0;
+
+	assert_spin_locked(&mnt->mnt_root->d_lock);
+
+	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list)
+		new_mask |= mark->mask;
+	mnt->mnt_fsnotify_mask = new_mask;
+}
+
+/*
+ * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
+ * any notifier is interested in hearing for this mount point
+ */
+void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
+{
+	spin_lock(&mnt->mnt_root->d_lock);
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	spin_unlock(&mnt->mnt_root->d_lock);
+}
+
+void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
+{
+	struct vfsmount *mnt = mark->m.mnt;
+
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&mark->group->mark_lock);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+
+	hlist_del_init(&mark->m.m_list);
+	mark->m.mnt = NULL;
+
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
+
+	spin_unlock(&mnt->mnt_root->d_lock);
+}
+
+static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
+								struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+	struct hlist_node *pos;
+
+	assert_spin_locked(&mnt->mnt_root->d_lock);
+
+	hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) {
+		if (mark->group == group) {
+			fsnotify_get_mark(mark);
+			return mark;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * given a group and vfsmount, find the mark associated with that combination.
+ * if found take a reference to that mark and return it, else return NULL
+ */
+struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
+						  struct vfsmount *mnt)
+{
+	struct fsnotify_mark *mark;
+
+	spin_lock(&mnt->mnt_root->d_lock);
+	mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	return mark;
+}
+
+/*
+ * Attach an initialized mark to a given group and vfsmount.
+ * These marks may be used for the fsnotify backend to determine which
+ * event types should be delivered to which groups.
+ */
+int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
+			       struct fsnotify_group *group, struct vfsmount *mnt,
+			       int allow_dups)
+{
+	struct fsnotify_mark *lmark = NULL;
+	int ret = 0;
+
+	mark->flags = FSNOTIFY_MARK_FLAG_VFSMOUNT;
+
+	/*
+	 * LOCKING ORDER!!!!
+	 * mark->lock
+	 * group->mark_lock
+	 * mnt->mnt_root->d_lock
+	 */
+	assert_spin_locked(&mark->lock);
+	assert_spin_locked(&group->mark_lock);
+
+	spin_lock(&mnt->mnt_root->d_lock);
+
+	if (!allow_dups)
+		lmark = fsnotify_find_vfsmount_mark_locked(group, mnt);
+	if (!lmark) {
+		mark->m.mnt = mnt;
+
+		hlist_add_head(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+
+		fsnotify_recalc_vfsmount_mask_locked(mnt);
+	} else {
+		ret = -EEXIST;
+	}
+
+	spin_unlock(&mnt->mnt_root->d_lock);
+
+	return ret;
+}
-- 
cgit v1.1


From ca9c726eea013394d1e846331b117effb21ead83 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:27 -0500
Subject: fsnotify: Infrastructure for per-mount watches

Per-mount watches allow groups to listen to fsnotify events on an entire
mount.  This patch simply adds and initializes the fields needed in the
vfsmount struct to make this happen.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/namespace.c       | 1 +
 fs/notify/fsnotify.c | 5 +++++
 fs/notify/fsnotify.h | 2 ++
 3 files changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index a2d681a..1969d6b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -614,6 +614,7 @@ static inline void __mntput(struct vfsmount *mnt)
 	 * provides barriers, so count_mnt_writers() below is safe.  AV
 	 */
 	WARN_ON(count_mnt_writers(mnt));
+	fsnotify_vfsmount_delete(mnt);
 	dput(mnt->mnt_root);
 	free_vfsmnt(mnt);
 	deactivate_super(sb);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index e0bf869..7f14ddc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -36,6 +36,11 @@ void __fsnotify_inode_delete(struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
 
+void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
+{
+	fsnotify_clear_marks_by_mount(mnt);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 38f3fb5..204353c 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -42,6 +42,8 @@ extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
 extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
+/* run the list of all marks associated with vfsmount and flag them to be freed */
+extern void fsnotify_clear_marks_by_mount(struct vfsmount *mnt);
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
  * about events that happen to its children.
-- 
cgit v1.1


From 1c529063a3e4c15eaae28db31326a7aaab7091b5 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: should_send_event needs to handle vfsmounts

currently should_send_event in fanotify only cares about marks on inodes.
This patch extends that interface to indicate that it cares about events
that happened on vfsmounts.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 56 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index aa5e926..202be8a 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -2,6 +2,7 @@
 #include <linux/fsnotify_backend.h>
 #include <linux/init.h>
 #include <linux/kernel.h> /* UINT_MAX */
+#include <linux/mount.h>
 #include <linux/types.h>
 
 #include "fanotify.h"
@@ -99,24 +100,35 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 	return ret;
 }
 
-static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-				       struct vfsmount *mnt, __u32 mask, void *data,
-				       int data_type)
+static bool should_send_vfsmount_event(struct fsnotify_group *group, struct vfsmount *mnt,
+				       __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
 	bool send;
 
-	pr_debug("%s: group=%p inode=%p mask=%x data=%p data_type=%d\n",
-		 __func__, group, inode, mask, data, data_type);
+	pr_debug("%s: group=%p vfsmount=%p mask=%x\n",
+		 __func__, group, mnt, mask);
 
-	/* sorry, fanotify only gives a damn about files and dirs */
-	if (!S_ISREG(inode->i_mode) &&
-	    !S_ISDIR(inode->i_mode))
+	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!fsn_mark)
 		return false;
 
-	/* if we don't have enough info to send an event to userspace say no */
-	if (data_type != FSNOTIFY_EVENT_PATH)
-		return false;
+	send = (mask & fsn_mark->mask);
+
+	/* find took a reference */
+	fsnotify_put_mark(fsn_mark);
+
+	return send;
+}
+
+static bool should_send_inode_event(struct fsnotify_group *group, struct inode *inode,
+				    __u32 mask)
+{
+	struct fsnotify_mark *fsn_mark;
+	bool send;
+
+	pr_debug("%s: group=%p inode=%p mask=%x\n",
+		 __func__, group, inode, mask);
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
@@ -142,6 +154,28 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 	return send;
 }
 
+static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *to_tell,
+				       struct vfsmount *mnt, __u32 mask, void *data,
+				       int data_type)
+{
+	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_type=%d\n",
+		 __func__, group, to_tell, mnt, mask, data, data_type);
+
+	/* sorry, fanotify only gives a damn about files and dirs */
+	if (!S_ISREG(to_tell->i_mode) &&
+	    !S_ISDIR(to_tell->i_mode))
+		return false;
+
+	/* if we don't have enough info to send an event to userspace say no */
+	if (data_type != FSNOTIFY_EVENT_PATH)
+		return false;
+
+	if (mnt)
+		return should_send_vfsmount_event(group, mnt, mask);
+	else
+		return should_send_inode_event(group, to_tell, mask);
+}
+
 const struct fsnotify_ops fanotify_fsnotify_ops = {
 	.handle_event = fanotify_handle_event,
 	.should_send_event = fanotify_should_send_event,
-- 
cgit v1.1


From 88826276dcaf4cef9cc7c2695ff15c6d20d4a74d Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: infrastructure to add an remove marks on vfsmounts

infrastructure work to add and remove marks on vfsmounts.  This should get
every set up except wiring the functions to the syscalls.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 185 ++++++++++++++++++++++++++-----------
 1 file changed, 133 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 0535193..cb7a0c5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -295,31 +295,83 @@ out:
 	return ret;
 }
 
-static int fanotify_remove_mark(struct fsnotify_group *group,
-				struct inode *inode,
-				__u32 mask)
+static void fanotify_update_object_mask(struct fsnotify_group *group,
+					struct inode *inode,
+					struct vfsmount *mnt,
+					struct fsnotify_mark *fsn_mark,
+					unsigned int flags,
+					__u32 mask)
 {
-	struct fsnotify_mark *fsn_mark;
-	__u32 new_mask;
-
-	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
-		 group, inode, mask);
+	__u32 old_mask, new_mask;
 
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
-	if (!fsn_mark)
-		return -ENOENT;
+	pr_debug("%s: group=%p inode=%p mnt=%p fsn_mark=%p flags=%x mask=%x\n",
+		 __func__, group, inode, mnt, fsn_mark, flags, mask);
 
 	spin_lock(&fsn_mark->lock);
-	fsn_mark->mask &= ~mask;
+	old_mask = fsn_mark->mask;
+	if (flags & FAN_MARK_ADD)
+		fsn_mark->mask |= mask;
+	else if (flags & FAN_MARK_REMOVE)
+		fsn_mark->mask &= ~mask;
+	else
+		BUG();
 	new_mask = fsn_mark->mask;
 	spin_unlock(&fsn_mark->lock);
 
 	if (!new_mask)
 		fsnotify_destroy_mark(fsn_mark);
+
+	/* we made changes to a mask, update the group mask and the object mask
+	 * so things happen quickly. */
+	if (old_mask != new_mask) {
+		__u32 dropped, do_object, do_group;
+
+		/* more bits in old than in new? */
+		dropped = (old_mask & ~new_mask);
+		/* more bits in this fsn_mark than the group? */
+		do_group = (new_mask & ~group->mask);
+
+		if (inode) {
+			/* more bits in this fsn_mark than the object's mask? */
+			do_object = (new_mask & ~inode->i_fsnotify_mask);
+			/* update the object with this new fsn_mark */
+			if (dropped || do_object)
+				fsnotify_recalc_inode_mask(inode);
+		} else if (mnt) {
+			/* more bits in this fsn_mark than the object's mask? */
+			do_object = (new_mask & ~mnt->mnt_fsnotify_mask);
+			/* update the object with this new fsn_mark */
+			if (dropped || do_object)
+				fsnotify_recalc_vfsmount_mask(mnt);
+		} else {
+			BUG();
+		}
+
+		/* update the group mask with the new mask */
+		if (dropped || do_group)
+			fsnotify_recalc_group_mask(group);
+	}
+}
+
+static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inode,
+				struct vfsmount *mnt, unsigned int flags, __u32 mask)
+{
+	struct fsnotify_mark *fsn_mark = NULL;
+
+	BUG_ON(inode && mnt);
+	BUG_ON(!inode && !mnt);
+
+	if (inode)
+		fsn_mark = fsnotify_find_inode_mark(group, inode);
+	else if (mnt)
+		fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	else
-		fsnotify_recalc_inode_mask(inode);
+		BUG();
 
-	fsnotify_recalc_group_mask(group);
+	if (!fsn_mark)
+		return -ENOENT;
+
+	fanotify_update_object_mask(group, inode, mnt, fsn_mark, flags, mask);
 
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
@@ -327,22 +379,48 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
 	return 0;
 }
 
-static int fanotify_add_mark(struct fsnotify_group *group,
-			     struct inode *inode,
-			     __u32 mask)
+static struct fsnotify_mark *fanotify_add_vfsmount_mark(struct fsnotify_group *group,
+							struct vfsmount *mnt)
 {
 	struct fsnotify_mark *fsn_mark;
-	__u32 old_mask, new_mask;
-	int ret;
 
-	pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
-		 group, inode, mask);
+	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!fsn_mark) {
+		struct fsnotify_mark *new_fsn_mark;
+		int ret;
+
+		fsn_mark = ERR_PTR(-ENOMEM);
+		new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!new_fsn_mark)
+			goto out;
+
+		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(new_fsn_mark, group, NULL, mnt, 0);
+		if (ret) {
+			fsn_mark = ERR_PTR(ret);
+			fanotify_free_mark(new_fsn_mark);
+			goto out;
+		}
+
+		fsn_mark = new_fsn_mark;
+	}
+out:
+	return fsn_mark;
+}
+
+static struct fsnotify_mark *fanotify_add_inode_mark(struct fsnotify_group *group,
+						     struct inode *inode)
+{
+	struct fsnotify_mark *fsn_mark;
+
+	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
 		struct fsnotify_mark *new_fsn_mark;
+		int ret;
 
-		ret = -ENOMEM;
+		fsn_mark = ERR_PTR(-ENOMEM);
 		new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!new_fsn_mark)
 			goto out;
@@ -350,57 +428,60 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(new_fsn_mark, group, inode, NULL, 0);
 		if (ret) {
+			fsn_mark = ERR_PTR(ret);
 			fanotify_free_mark(new_fsn_mark);
 			goto out;
 		}
 
 		fsn_mark = new_fsn_mark;
 	}
+out:
+	return fsn_mark;
+}
 
-	ret = 0;
+static int fanotify_add_mark(struct fsnotify_group *group, struct inode *inode,
+			     struct vfsmount *mnt, unsigned int flags, __u32 mask)
+{
+	struct fsnotify_mark *fsn_mark;
 
-	spin_lock(&fsn_mark->lock);
-	old_mask = fsn_mark->mask;
-	fsn_mark->mask |= mask;
-	new_mask = fsn_mark->mask;
-	spin_unlock(&fsn_mark->lock);
+	pr_debug("%s: group=%p inode=%p mnt=%p flags=%x mask=%x\n",
+		 __func__, group, inode, mnt, flags, mask);
 
-	/* we made changes to a mask, update the group mask and the inode mask
-	 * so things happen quickly. */
-	if (old_mask != new_mask) {
-		/* more bits in old than in new? */
-		int dropped = (old_mask & ~new_mask);
-		/* more bits in this mark than the inode's mask? */
-		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
-		/* more bits in this mark than the group? */
-		int do_group = (new_mask & ~group->mask);
+	BUG_ON(inode && mnt);
+	BUG_ON(!inode && !mnt);
 
-		/* update the inode with this new mark */
-		if (dropped || do_inode)
-			fsnotify_recalc_inode_mask(inode);
+	if (inode)
+		fsn_mark = fanotify_add_inode_mark(group, inode);
+	else if (mnt)
+		fsn_mark = fanotify_add_vfsmount_mark(group, mnt);
+	else
+		BUG();
 
-		/* update the group mask with the new mask */
-		if (dropped || do_group)
-			fsnotify_recalc_group_mask(group);
-	}
+	if (IS_ERR(fsn_mark))
+		goto out;
+
+	fanotify_update_object_mask(group, inode, mnt, fsn_mark, flags, mask);
 
 	/* match the init or the find.... */
 	fsnotify_put_mark(fsn_mark);
 out:
-	return ret;
+	return PTR_ERR(fsn_mark);
 }
 
 static int fanotify_update_mark(struct fsnotify_group *group,
-				struct inode *inode, int flags,
-				__u32 mask)
+				struct inode *inode, struct vfsmount *mnt,
+				int flags, __u32 mask)
 {
-	pr_debug("%s: group=%p inode=%p flags=%x mask=%x\n", __func__,
-		 group, inode, flags, mask);
+	pr_debug("%s: group=%p inode=%p mnt=%p flags=%x mask=%x\n",
+		 __func__, group, inode, mnt, flags, mask);
+
+	BUG_ON(inode && mnt);
+	BUG_ON(!inode && !mnt);
 
 	if (flags & FAN_MARK_ADD)
-		fanotify_add_mark(group, inode, mask);
+		fanotify_add_mark(group, inode, mnt, flags, mask);
 	else if (flags & FAN_MARK_REMOVE)
-		fanotify_remove_mark(group, inode, mask);
+		fanotify_remove_mark(group, inode, mnt, flags, mask);
 	else
 		BUG();
 
@@ -502,7 +583,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	group = filp->private_data;
 
 	/* create/update an inode mark */
-	ret = fanotify_update_mark(group, inode, flags, mask);
+	ret = fanotify_update_mark(group, inode, NULL, flags, mask);
 
 	path_put(&path);
 fput_and_out:
-- 
cgit v1.1


From c6223f464927cab9f4b10169b78c51d84228faf8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: remove fanotify_update_mark

fanotify_update_mark() doesn't do much useful;  remove it.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index cb7a0c5..0f25fc2 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -468,26 +468,6 @@ out:
 	return PTR_ERR(fsn_mark);
 }
 
-static int fanotify_update_mark(struct fsnotify_group *group,
-				struct inode *inode, struct vfsmount *mnt,
-				int flags, __u32 mask)
-{
-	pr_debug("%s: group=%p inode=%p mnt=%p flags=%x mask=%x\n",
-		 __func__, group, inode, mnt, flags, mask);
-
-	BUG_ON(inode && mnt);
-	BUG_ON(!inode && !mnt);
-
-	if (flags & FAN_MARK_ADD)
-		fanotify_add_mark(group, inode, mnt, flags, mask);
-	else if (flags & FAN_MARK_REMOVE)
-		fanotify_remove_mark(group, inode, mnt, flags, mask);
-	else
-		BUG();
-
-	return 0;
-}
-
 static bool fanotify_mark_validate_input(int flags,
 					 __u32 mask)
 {
@@ -583,7 +563,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	group = filp->private_data;
 
 	/* create/update an inode mark */
-	ret = fanotify_update_mark(group, inode, NULL, flags, mask);
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+	case FAN_MARK_ADD:
+		ret = fanotify_add_mark(group, inode, NULL, flags, mask);
+		break;
+	case FAN_MARK_REMOVE:
+		ret = fanotify_remove_mark(group, inode, NULL, flags, mask);
+		break;
+	default:
+		ret = -EINVAL;
+	}
 
 	path_put(&path);
 fput_and_out:
-- 
cgit v1.1


From 088b09b0ac7a866a35962eeaea5d5607bd1840b7 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: do not call fanotify_update_object_mask in
 fanotify_remove_mark

Recalculate masks in fanotify_remove_mark, don't use
fanotify_update_object_mask.  This gets us one step closers to readable
code.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 0f25fc2..96d4ffd 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -353,10 +353,26 @@ static void fanotify_update_object_mask(struct fsnotify_group *group,
 	}
 }
 
+static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask)
+{
+	__u32 oldmask;
+
+	spin_lock(&fsn_mark->lock);
+	oldmask = fsn_mark->mask;
+	fsn_mark->mask = oldmask & ~mask;
+	spin_unlock(&fsn_mark->lock);
+
+	if (!(oldmask & ~mask))
+		fsnotify_destroy_mark(fsn_mark);
+
+	return mask & oldmask;
+}
+
 static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inode,
-				struct vfsmount *mnt, unsigned int flags, __u32 mask)
+				struct vfsmount *mnt, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark = NULL;
+	__u32 removed;
 
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
@@ -371,11 +387,20 @@ static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inod
 	if (!fsn_mark)
 		return -ENOENT;
 
-	fanotify_update_object_mask(group, inode, mnt, fsn_mark, flags, mask);
-
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 
+	if (removed & group->mask)
+		fsnotify_recalc_group_mask(group);
+	if (inode) {
+		if (removed & inode->i_fsnotify_mask)
+			fsnotify_recalc_inode_mask(inode);
+	} else if (mnt) {
+		if (removed & mnt->mnt_fsnotify_mask)
+			fsnotify_recalc_vfsmount_mask(mnt);
+	}
+
 	return 0;
 }
 
@@ -568,7 +593,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		ret = fanotify_add_mark(group, inode, NULL, flags, mask);
 		break;
 	case FAN_MARK_REMOVE:
-		ret = fanotify_remove_mark(group, inode, NULL, flags, mask);
+		ret = fanotify_remove_mark(group, inode, NULL, mask);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.1


From 912ee3946c5e57de0d05baf3b60b65ce6bf3ff96 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: do not call fanotify_update_object_mask in
 fanotify_add_mark

Recalculate masks in fanotify_add_mark, don't use
fanotify_update_object_mask.  This gets us one step closers to readable
code.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 144 +++++++++++++------------------------
 1 file changed, 50 insertions(+), 94 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 96d4ffd..db74677 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -295,64 +295,6 @@ out:
 	return ret;
 }
 
-static void fanotify_update_object_mask(struct fsnotify_group *group,
-					struct inode *inode,
-					struct vfsmount *mnt,
-					struct fsnotify_mark *fsn_mark,
-					unsigned int flags,
-					__u32 mask)
-{
-	__u32 old_mask, new_mask;
-
-	pr_debug("%s: group=%p inode=%p mnt=%p fsn_mark=%p flags=%x mask=%x\n",
-		 __func__, group, inode, mnt, fsn_mark, flags, mask);
-
-	spin_lock(&fsn_mark->lock);
-	old_mask = fsn_mark->mask;
-	if (flags & FAN_MARK_ADD)
-		fsn_mark->mask |= mask;
-	else if (flags & FAN_MARK_REMOVE)
-		fsn_mark->mask &= ~mask;
-	else
-		BUG();
-	new_mask = fsn_mark->mask;
-	spin_unlock(&fsn_mark->lock);
-
-	if (!new_mask)
-		fsnotify_destroy_mark(fsn_mark);
-
-	/* we made changes to a mask, update the group mask and the object mask
-	 * so things happen quickly. */
-	if (old_mask != new_mask) {
-		__u32 dropped, do_object, do_group;
-
-		/* more bits in old than in new? */
-		dropped = (old_mask & ~new_mask);
-		/* more bits in this fsn_mark than the group? */
-		do_group = (new_mask & ~group->mask);
-
-		if (inode) {
-			/* more bits in this fsn_mark than the object's mask? */
-			do_object = (new_mask & ~inode->i_fsnotify_mask);
-			/* update the object with this new fsn_mark */
-			if (dropped || do_object)
-				fsnotify_recalc_inode_mask(inode);
-		} else if (mnt) {
-			/* more bits in this fsn_mark than the object's mask? */
-			do_object = (new_mask & ~mnt->mnt_fsnotify_mask);
-			/* update the object with this new fsn_mark */
-			if (dropped || do_object)
-				fsnotify_recalc_vfsmount_mask(mnt);
-		} else {
-			BUG();
-		}
-
-		/* update the group mask with the new mask */
-		if (dropped || do_group)
-			fsnotify_recalc_group_mask(group);
-	}
-}
-
 static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask)
 {
 	__u32 oldmask;
@@ -404,89 +346,103 @@ static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inod
 	return 0;
 }
 
+static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mask)
+{
+	__u32 oldmask;
+
+	spin_lock(&fsn_mark->lock);
+	oldmask = fsn_mark->mask;
+	fsn_mark->mask = oldmask | mask;
+	spin_unlock(&fsn_mark->lock);
+
+	return mask & ~oldmask;
+}
+
 static struct fsnotify_mark *fanotify_add_vfsmount_mark(struct fsnotify_group *group,
-							struct vfsmount *mnt)
+							struct vfsmount *mnt, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
+	__u32 added;
 
 	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	if (!fsn_mark) {
-		struct fsnotify_mark *new_fsn_mark;
 		int ret;
 
-		fsn_mark = ERR_PTR(-ENOMEM);
-		new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
-		if (!new_fsn_mark)
-			goto out;
+		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!fsn_mark)
+			return ERR_PTR(-ENOMEM);
 
-		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
-		ret = fsnotify_add_mark(new_fsn_mark, group, NULL, mnt, 0);
+		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
 		if (ret) {
-			fsn_mark = ERR_PTR(ret);
-			fanotify_free_mark(new_fsn_mark);
-			goto out;
+			fanotify_free_mark(fsn_mark);
+			return ERR_PTR(ret);
 		}
-
-		fsn_mark = new_fsn_mark;
 	}
-out:
+	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	if (added) {
+		if (added & ~group->mask)
+			fsnotify_recalc_group_mask(group);
+		if (added & ~mnt->mnt_fsnotify_mask)
+			fsnotify_recalc_vfsmount_mask(mnt);
+	}
 	return fsn_mark;
 }
 
 static struct fsnotify_mark *fanotify_add_inode_mark(struct fsnotify_group *group,
-						     struct inode *inode)
+						     struct inode *inode, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
+	__u32 added;
 
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
-		struct fsnotify_mark *new_fsn_mark;
 		int ret;
 
-		fsn_mark = ERR_PTR(-ENOMEM);
-		new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
-		if (!new_fsn_mark)
-			goto out;
+		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
+		if (!fsn_mark)
+			return ERR_PTR(-ENOMEM);
 
-		fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
-		ret = fsnotify_add_mark(new_fsn_mark, group, inode, NULL, 0);
+		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
+		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
 		if (ret) {
-			fsn_mark = ERR_PTR(ret);
-			fanotify_free_mark(new_fsn_mark);
-			goto out;
+			fanotify_free_mark(fsn_mark);
+			return ERR_PTR(ret);
 		}
-
-		fsn_mark = new_fsn_mark;
 	}
-out:
+	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	if (added) {
+		if (added & ~group->mask)
+			fsnotify_recalc_group_mask(group);
+		if (added & ~inode->i_fsnotify_mask)
+			fsnotify_recalc_inode_mask(inode);
+	}
 	return fsn_mark;
 }
 
 static int fanotify_add_mark(struct fsnotify_group *group, struct inode *inode,
-			     struct vfsmount *mnt, unsigned int flags, __u32 mask)
+			     struct vfsmount *mnt, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
 
-	pr_debug("%s: group=%p inode=%p mnt=%p flags=%x mask=%x\n",
-		 __func__, group, inode, mnt, flags, mask);
+	pr_debug("%s: group=%p inode=%p mnt=%p mask=%x\n",
+		 __func__, group, inode, mnt, mask);
 
 	BUG_ON(inode && mnt);
 	BUG_ON(!inode && !mnt);
 
 	if (inode)
-		fsn_mark = fanotify_add_inode_mark(group, inode);
+		fsn_mark = fanotify_add_inode_mark(group, inode, mask);
 	else if (mnt)
-		fsn_mark = fanotify_add_vfsmount_mark(group, mnt);
+		fsn_mark = fanotify_add_vfsmount_mark(group, mnt, mask);
 	else
 		BUG();
 
 	if (IS_ERR(fsn_mark))
 		goto out;
 
-	fanotify_update_object_mask(group, inode, mnt, fsn_mark, flags, mask);
-
 	/* match the init or the find.... */
 	fsnotify_put_mark(fsn_mark);
 out:
@@ -590,7 +546,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
-		ret = fanotify_add_mark(group, inode, NULL, flags, mask);
+		ret = fanotify_add_mark(group, inode, NULL, mask);
 		break;
 	case FAN_MARK_REMOVE:
 		ret = fanotify_remove_mark(group, inode, NULL, mask);
-- 
cgit v1.1


From 52202dfbd9107787dc68a2019cc7be4e79f52e5c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: do not return pointer from fanotify_add_*_mark

No need to return the mark from fanotify_add_*_mark to fanotify_add_mark

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index db74677..7d7c138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,8 +358,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mas
 	return mask & ~oldmask;
 }
 
-static struct fsnotify_mark *fanotify_add_vfsmount_mark(struct fsnotify_group *group,
-							struct vfsmount *mnt, __u32 mask)
+static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
+				      struct vfsmount *mnt, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -370,27 +370,28 @@ static struct fsnotify_mark *fanotify_add_vfsmount_mark(struct fsnotify_group *g
 
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
 		if (ret) {
 			fanotify_free_mark(fsn_mark);
-			return ERR_PTR(ret);
+			return ret;
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	fsnotify_put_mark(fsn_mark);
 	if (added) {
 		if (added & ~group->mask)
 			fsnotify_recalc_group_mask(group);
 		if (added & ~mnt->mnt_fsnotify_mask)
 			fsnotify_recalc_vfsmount_mask(mnt);
 	}
-	return fsn_mark;
+	return 0;
 }
 
-static struct fsnotify_mark *fanotify_add_inode_mark(struct fsnotify_group *group,
-						     struct inode *inode, __u32 mask)
+static int fanotify_add_inode_mark(struct fsnotify_group *group,
+				   struct inode *inode, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -403,29 +404,30 @@ static struct fsnotify_mark *fanotify_add_inode_mark(struct fsnotify_group *grou
 
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
 		if (ret) {
 			fanotify_free_mark(fsn_mark);
-			return ERR_PTR(ret);
+			return ret;
 		}
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	fsnotify_put_mark(fsn_mark);
 	if (added) {
 		if (added & ~group->mask)
 			fsnotify_recalc_group_mask(group);
 		if (added & ~inode->i_fsnotify_mask)
 			fsnotify_recalc_inode_mask(inode);
 	}
-	return fsn_mark;
+	return 0;
 }
 
 static int fanotify_add_mark(struct fsnotify_group *group, struct inode *inode,
 			     struct vfsmount *mnt, __u32 mask)
 {
-	struct fsnotify_mark *fsn_mark;
+	int ret;
 
 	pr_debug("%s: group=%p inode=%p mnt=%p mask=%x\n",
 		 __func__, group, inode, mnt, mask);
@@ -434,19 +436,13 @@ static int fanotify_add_mark(struct fsnotify_group *group, struct inode *inode,
 	BUG_ON(!inode && !mnt);
 
 	if (inode)
-		fsn_mark = fanotify_add_inode_mark(group, inode, mask);
+		ret = fanotify_add_inode_mark(group, inode, mask);
 	else if (mnt)
-		fsn_mark = fanotify_add_vfsmount_mark(group, mnt, mask);
+		ret = fanotify_add_vfsmount_mark(group, mnt, mask);
 	else
 		BUG();
 
-	if (IS_ERR(fsn_mark))
-		goto out;
-
-	/* match the init or the find.... */
-	fsnotify_put_mark(fsn_mark);
-out:
-	return PTR_ERR(fsn_mark);
+	return ret;
 }
 
 static bool fanotify_mark_validate_input(int flags,
-- 
cgit v1.1


From 90dd201d1ab064512078a77762a793e0bf5f3040 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:28 -0500
Subject: fanotify: remove fanotify_add_mark

fanotify_add_mark now does nothing useful anymore, drop it.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7d7c138..db80a0d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -424,27 +424,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 	return 0;
 }
 
-static int fanotify_add_mark(struct fsnotify_group *group, struct inode *inode,
-			     struct vfsmount *mnt, __u32 mask)
-{
-	int ret;
-
-	pr_debug("%s: group=%p inode=%p mnt=%p mask=%x\n",
-		 __func__, group, inode, mnt, mask);
-
-	BUG_ON(inode && mnt);
-	BUG_ON(!inode && !mnt);
-
-	if (inode)
-		ret = fanotify_add_inode_mark(group, inode, mask);
-	else if (mnt)
-		ret = fanotify_add_vfsmount_mark(group, mnt, mask);
-	else
-		BUG();
-
-	return ret;
-}
-
 static bool fanotify_mark_validate_input(int flags,
 					 __u32 mask)
 {
@@ -542,7 +521,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
-		ret = fanotify_add_mark(group, inode, NULL, mask);
+		ret = fanotify_add_inode_mark(group, inode, mask);
 		break;
 	case FAN_MARK_REMOVE:
 		ret = fanotify_remove_mark(group, inode, NULL, mask);
-- 
cgit v1.1


From 0ff21db9fcc39042b814dad8a4b7508710a75235 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:29 -0500
Subject: fanotify: hooks the fanotify_mark syscall to the vfsmount code

Create a new fanotify_mark flag which indicates we should attach the mark
to the vfsmount holding the object referenced by dfd and pathname rather
than the inode itself.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index db80a0d..8126726 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -485,7 +485,8 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 			      __u64 mask, int dfd,
 			      const char  __user * pathname)
 {
-	struct inode *inode;
+	struct inode *inode = NULL;
+	struct vfsmount *mnt = NULL;
 	struct fsnotify_group *group;
 	struct file *filp;
 	struct path path;
@@ -515,16 +516,22 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		goto fput_and_out;
 
 	/* inode held in place by reference to path; group by fget on fd */
-	inode = path.dentry->d_inode;
+	if (!(flags & FAN_MARK_ON_VFSMOUNT))
+		inode = path.dentry->d_inode;
+	else
+		mnt = path.mnt;
 	group = filp->private_data;
 
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
-		ret = fanotify_add_inode_mark(group, inode, mask);
+		if (flags & FAN_MARK_ON_VFSMOUNT)
+			ret = fanotify_add_vfsmount_mark(group, mnt, mask);
+		else
+			ret = fanotify_add_inode_mark(group, inode, mask);
 		break;
 	case FAN_MARK_REMOVE:
-		ret = fanotify_remove_mark(group, inode, NULL, mask);
+		ret = fanotify_remove_mark(group, inode, mnt, mask);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.1


From eac8e9e80ccbd30801b7b76a2ee4c6c5a681e53c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:29 -0500
Subject: fanotify: rename FAN_MARK_ON_VFSMOUNT to FAN_MARK_MOUNT

the term 'vfsmount' isn't sensicle to userspace.  instead call is 'mount.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8126726..091371e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -516,7 +516,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		goto fput_and_out;
 
 	/* inode held in place by reference to path; group by fget on fd */
-	if (!(flags & FAN_MARK_ON_VFSMOUNT))
+	if (!(flags & FAN_MARK_MOUNT))
 		inode = path.dentry->d_inode;
 	else
 		mnt = path.mnt;
@@ -525,7 +525,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
-		if (flags & FAN_MARK_ON_VFSMOUNT)
+		if (flags & FAN_MARK_MOUNT)
 			ret = fanotify_add_vfsmount_mark(group, mnt, mask);
 		else
 			ret = fanotify_add_inode_mark(group, inode, mask);
-- 
cgit v1.1


From f3640192c0a177506ec08ab07ed3178b912574da Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:29 -0500
Subject: fanotify: split fanotify_remove_mark

split fanotify_remove_mark into fanotify_remove_inode_mark and
fanotify_remove_vfsmount_mark.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 45 +++++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 091371e..00628d3 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -310,22 +310,33 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u3
 	return mask & oldmask;
 }
 
-static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inode,
-				struct vfsmount *mnt, __u32 mask)
+static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
+					 struct vfsmount *mnt, __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark = NULL;
 	__u32 removed;
 
-	BUG_ON(inode && mnt);
-	BUG_ON(!inode && !mnt);
+	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!fsn_mark)
+		return -ENOENT;
 
-	if (inode)
-		fsn_mark = fsnotify_find_inode_mark(group, inode);
-	else if (mnt)
-		fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
-	else
-		BUG();
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask);
+	fsnotify_put_mark(fsn_mark);
+	if (removed & group->mask)
+		fsnotify_recalc_group_mask(group);
+	if (removed & mnt->mnt_fsnotify_mask)
+		fsnotify_recalc_vfsmount_mask(mnt);
+
+	return 0;
+}
 
+static int fanotify_remove_inode_mark(struct fsnotify_group *group,
+				      struct inode *inode, __u32 mask)
+{
+	struct fsnotify_mark *fsn_mark = NULL;
+	__u32 removed;
+
+	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return -ENOENT;
 
@@ -335,13 +346,8 @@ static int fanotify_remove_mark(struct fsnotify_group *group, struct inode *inod
 
 	if (removed & group->mask)
 		fsnotify_recalc_group_mask(group);
-	if (inode) {
-		if (removed & inode->i_fsnotify_mask)
-			fsnotify_recalc_inode_mask(inode);
-	} else if (mnt) {
-		if (removed & mnt->mnt_fsnotify_mask)
-			fsnotify_recalc_vfsmount_mask(mnt);
-	}
+	if (removed & inode->i_fsnotify_mask)
+		fsnotify_recalc_inode_mask(inode);
 
 	return 0;
 }
@@ -531,7 +537,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 			ret = fanotify_add_inode_mark(group, inode, mask);
 		break;
 	case FAN_MARK_REMOVE:
-		ret = fanotify_remove_mark(group, inode, mnt, mask);
+		if (flags & FAN_MARK_MOUNT)
+			ret = fanotify_remove_vfsmount_mark(group, mnt, mask);
+		else
+			ret = fanotify_remove_inode_mark(group, inode, mask);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.1


From 88380fe66e0ac22529f5426ab27d67da00ed2628 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:29 -0500
Subject: fanotify: remove fanotify.h declarations

fanotify_mark_validate functions are all needlessly declared in headers as
static inlines.  Instead just do the checks where they are needed for code
readability.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.h      | 25 -------------------------
 fs/notify/fanotify/fanotify_user.c | 25 ++++++++++---------------
 2 files changed, 10 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 5608783..4d5723a 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -6,31 +6,6 @@
 
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
-static inline bool fanotify_mark_flags_valid(unsigned int flags)
-{
-	/* must be either and add or a remove */
-	if (!(flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)))
-		return false;
-
-	/* cannot be both add and remove */
-	if ((flags & FAN_MARK_ADD) &&
-	    (flags & FAN_MARK_REMOVE))
-		return false;
-
-	/* cannot have more flags than we know about */
-	if (flags & ~FAN_ALL_MARK_FLAGS)
-		return false;
-
-	return true;
-}
-
-static inline bool fanotify_mask_valid(__u32 mask)
-{
-	if (mask & ~((__u32)FAN_ALL_INCOMING_EVENTS))
-		return false;
-	return true;
-}
-
 static inline __u32 fanotify_outgoing_mask(__u32 mask)
 {
 	return mask & FAN_ALL_OUTGOING_EVENTS;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 00628d3..618867e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -430,20 +430,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 	return 0;
 }
 
-static bool fanotify_mark_validate_input(int flags,
-					 __u32 mask)
-{
-	pr_debug("%s: flags=%x mask=%x\n", __func__, flags, mask);
-
-	/* are flags valid of this operation? */
-	if (!fanotify_mark_flags_valid(flags))
-		return false;
-	/* is the mask valid? */
-	if (!fanotify_mask_valid(mask))
-		return false;
-	return true;
-}
-
 /* fanotify syscalls */
 SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 		unsigned int, priority)
@@ -505,7 +491,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	if (mask & ((__u64)0xffffffff << 32))
 		return -EINVAL;
 
-	if (!fanotify_mark_validate_input(flags, mask))
+	if (flags & ~FAN_ALL_MARK_FLAGS)
+		return -EINVAL;
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+	case FAN_MARK_ADD:
+	case FAN_MARK_REMOVE:
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
 		return -EINVAL;
 
 	filp = fget_light(fanotify_fd, &fput_needed);
-- 
cgit v1.1


From 33d3dfff451a2ab6fe2f6aaabed9b24e91aad109 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Thu, 17 Dec 2009 21:24:29 -0500
Subject: fanotify: remove outgoing function checks in fanotify.h

A number of validity checks on outgoing data are done in static inlines but
are only used in one place.  Instead just do them where they are used for
readability.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      |  3 +--
 fs/notify/fanotify/fanotify.h      | 12 ------------
 fs/notify/fanotify/fanotify_user.c |  5 +++--
 3 files changed, 4 insertions(+), 16 deletions(-)
 delete mode 100644 fs/notify/fanotify/fanotify.h

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 202be8a..f690002 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -1,3 +1,4 @@
+#include <linux/fanotify.h>
 #include <linux/fdtable.h>
 #include <linux/fsnotify_backend.h>
 #include <linux/init.h>
@@ -5,8 +6,6 @@
 #include <linux/mount.h>
 #include <linux/types.h>
 
-#include "fanotify.h"
-
 static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 {
 	pr_debug("%s: old=%p new=%p\n", __func__, old, new);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
deleted file mode 100644
index 4d5723a..0000000
--- a/fs/notify/fanotify/fanotify.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#include <linux/fanotify.h>
-#include <linux/fsnotify_backend.h>
-#include <linux/net.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-extern const struct fsnotify_ops fanotify_fsnotify_ops;
-
-static inline __u32 fanotify_outgoing_mask(__u32 mask)
-{
-	return mask & FAN_ALL_OUTGOING_EVENTS;
-}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 618867e..8415584 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,3 +1,4 @@
+#include <linux/fanotify.h>
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/fs.h>
@@ -14,7 +15,7 @@
 
 #include <asm/ioctls.h>
 
-#include "fanotify.h"
+extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
 
@@ -102,7 +103,7 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group,
 
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
-	metadata->mask = fanotify_outgoing_mask(event->mask);
+	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
 	metadata->fd = create_fd(group, event);
 
-- 
cgit v1.1


From 90b1e7a57880fb66437ab7db39e1e65ca0372822 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fsnotify: allow marks to not pin inodes in core

inotify marks must pin inodes in core.  dnotify doesn't technically need to
since they are closed when the directory is closed.  fanotify also need to
pin inodes in core as it works today.  But the next step is to introduce
the concept of 'ignored masks' which is actually a mask of events for an
inode of no interest.  I claim that these should be liberally sent to the
kernel and should not pin the inode in core.  If the inode is brought back
in the listener will get an event it may have thought excluded, but this is
not a serious situation and one any listener should deal with.

This patch lays the ground work for non-pinning inode marks by using lazy
inode pinning.  We do not pin a mark until it has a non-zero mask entry.  If a
listener new sets a mask we never pin the inode.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c        |  2 +-
 fs/notify/fanotify/fanotify_user.c |  4 ++--
 fs/notify/fsnotify.h               |  2 ++
 fs/notify/inode_mark.c             | 35 +++++++++++++++++++++++++++--------
 fs/notify/inotify/inotify_user.c   | 12 +++++-------
 fs/notify/mark.c                   | 17 ++++++++++++++++-
 6 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 69f42df..6624c2e 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -65,7 +65,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 	new_mask = 0;
 	for (dn = dn_mark->dn; dn != NULL; dn = dn->dn_next)
 		new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
-	fsn_mark->mask = new_mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, new_mask);
 
 	if (old_mask == new_mask)
 		return;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8415584..3320f0c 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -302,7 +302,7 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u3
 
 	spin_lock(&fsn_mark->lock);
 	oldmask = fsn_mark->mask;
-	fsn_mark->mask = oldmask & ~mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
 	spin_unlock(&fsn_mark->lock);
 
 	if (!(oldmask & ~mask))
@@ -359,7 +359,7 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mas
 
 	spin_lock(&fsn_mark->lock);
 	oldmask = fsn_mark->mask;
-	fsn_mark->mask = oldmask | mask;
+	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
 	spin_unlock(&fsn_mark->lock);
 
 	return mask & ~oldmask;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 204353c..1be54f6 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -20,6 +20,8 @@ extern __u32 fsnotify_vfsmount_mask;
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
+						__u32 mask);
 /* add a mark to an inode */
 extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 				   struct fsnotify_group *group, struct inode *inode,
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index c925579..4292f9e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -141,7 +141,32 @@ struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
 }
 
 /*
- * Attach an initialized mark mark to a given group and inode.
+ * If we are setting a mark mask on an inode mark we should pin the inode
+ * in memory.
+ */
+void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
+					 __u32 mask)
+{
+	struct inode *inode;
+
+	assert_spin_locked(&mark->lock);
+
+	if (mask &&
+	    mark->i.inode &&
+	    !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
+		mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
+		inode = igrab(mark->i.inode);
+		/*
+		 * we shouldn't be able to get here if the inode wasn't
+		 * already safely held in memory.  But bug in case it
+		 * ever is wrong.
+		 */
+		BUG_ON(!inode);
+	}
+}
+
+/*
+ * Attach an initialized mark to a given group and inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.
  */
@@ -152,10 +177,6 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 	struct fsnotify_mark *lmark = NULL;
 	int ret = 0;
 
-	inode = igrab(inode);
-	if (unlikely(!inode))
-		return -EINVAL;
-
 	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
 
 	assert_spin_locked(&mark->lock);
@@ -175,10 +196,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	spin_unlock(&inode->i_lock);
 
-	if (lmark) {
+	if (lmark)
 		ret = -EEXIST;
-		iput(inode);
-	}
 
 	return ret;
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a12315a75..19d2740 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -575,13 +575,11 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 	spin_lock(&fsn_mark->lock);
 
 	old_mask = fsn_mark->mask;
-	if (add) {
-		fsn_mark->mask |= mask;
-		new_mask = fsn_mark->mask;
-	} else {
-		fsn_mark->mask = mask;
-		new_mask = fsn_mark->mask;
-	}
+	if (add)
+		fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
+	else
+		fsnotify_set_mark_mask_locked(fsn_mark, mask);
+	new_mask = fsn_mark->mask;
 
 	spin_unlock(&fsn_mark->lock);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d296ec9..0ebc3fd 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -168,7 +168,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	 * is just a lazy update (and could be a perf win...)
 	 */
 
-	if (inode)
+	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED))
 		iput(inode);
 
 	/*
@@ -180,6 +180,17 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 		fsnotify_final_destroy_group(group);
 }
 
+void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+{
+	assert_spin_locked(&mark->lock);
+
+	mark->mask = mask;
+
+	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
+		fsnotify_set_inode_mark_mask_locked(mark, mask);
+}
+
+
 /*
  * Attach an initialized mark to a given group and fs object.
  * These marks may be used for the fsnotify backend to determine which
@@ -230,6 +241,10 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	}
 
 	spin_unlock(&group->mark_lock);
+
+	/* this will pin the object if appropriate */
+	fsnotify_set_mark_mask_locked(mark, mark->mask);
+
 	spin_unlock(&mark->lock);
 
 	if (inode)
-- 
cgit v1.1


From 33af5e32e0bb73c704b5e156f4411cdb53e6cc59 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fsnotify: ignored_mask - excluding notification

The ignored_mask is a new mask which is part of fsnotify marks.  A group's
should_send_event() function can use the ignored mask to determine that
certain events are not of interest.  In particular if a group registers a
mask including FS_OPEN on a vfsmount they could add FS_OPEN to the
ignored_mask for individual inodes and not send open events for those
inodes.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 0ebc3fd..cb1d822 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -190,6 +190,12 @@ void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 		fsnotify_set_inode_mark_mask_locked(mark, mask);
 }
 
+void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask)
+{
+	assert_spin_locked(&mark->lock);
+
+	mark->ignored_mask = mask;
+}
 
 /*
  * Attach an initialized mark to a given group and fs object.
-- 
cgit v1.1


From 32a4df13b88afef2a7d869bb7586a7beba90961f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fanotify: ignored_mask to ignore events

When fanotify receives an event it will check event->mask & ~ignored_mask.
If no bits are left the event will not be sent.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f690002..060b177 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -100,31 +100,39 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 }
 
 static bool should_send_vfsmount_event(struct fsnotify_group *group, struct vfsmount *mnt,
-				       __u32 mask)
+				       struct inode *inode, __u32 mask)
 {
-	struct fsnotify_mark *fsn_mark;
-	bool send;
+	struct fsnotify_mark *mnt_mark;
+	struct fsnotify_mark *inode_mark;
 
 	pr_debug("%s: group=%p vfsmount=%p mask=%x\n",
 		 __func__, group, mnt, mask);
 
-	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
-	if (!fsn_mark)
+	mnt_mark = fsnotify_find_vfsmount_mark(group, mnt);
+	if (!mnt_mark)
 		return false;
 
-	send = (mask & fsn_mark->mask);
+	mask &= mnt_mark->mask;
+	mask &= ~mnt_mark->ignored_mask;
+
+	if (mask) {
+		inode_mark = fsnotify_find_inode_mark(group, inode);
+		if (inode_mark) {
+			mask &= ~inode_mark->ignored_mask;
+			fsnotify_put_mark(inode_mark);
+		}
+	}
 
 	/* find took a reference */
-	fsnotify_put_mark(fsn_mark);
+	fsnotify_put_mark(mnt_mark);
 
-	return send;
+	return mask;
 }
 
 static bool should_send_inode_event(struct fsnotify_group *group, struct inode *inode,
 				    __u32 mask)
 {
 	struct fsnotify_mark *fsn_mark;
-	bool send;
 
 	pr_debug("%s: group=%p inode=%p mask=%x\n",
 		 __func__, group, inode, mask);
@@ -137,20 +145,21 @@ static bool should_send_inode_event(struct fsnotify_group *group, struct inode *
 	 * events on the child, don't send it! */
 	if ((mask & FS_EVENT_ON_CHILD) &&
 	    !(fsn_mark->mask & FS_EVENT_ON_CHILD)) {
-		send = false;
+		mask = 0;
 	} else {
 		/*
 		 * We care about children, but do we care about this particular
 		 * type of event?
 		 */
-		mask = (mask & ~FS_EVENT_ON_CHILD);
-		send = (fsn_mark->mask & mask);
+		mask &= ~FS_EVENT_ON_CHILD;
+		mask &= fsn_mark->mask;
+		mask &= ~fsn_mark->ignored_mask;
 	}
 
 	/* find took a reference */
 	fsnotify_put_mark(fsn_mark);
 
-	return send;
+	return mask;
 }
 
 static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *to_tell,
@@ -170,7 +179,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 		return false;
 
 	if (mnt)
-		return should_send_vfsmount_event(group, mnt, mask);
+		return should_send_vfsmount_event(group, mnt, to_tell, mask);
 	else
 		return should_send_inode_event(group, to_tell, mask);
 }
-- 
cgit v1.1


From b9e4e3bd0495fea9e8f8e712889c9cd8ffa43c94 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fanotify: allow users to set an ignored_mask

Change the sys_fanotify_mark() system call so users can set ignored_masks
on inodes.  Remember, if a user new sets a real mask, and only sets ignored
masks, the ignore will never be pinned in memory.  Thus ignored_masks can
be lost under memory pressure and the user may again get events they
previously thought were ignored.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 54 +++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3320f0c..ad02d47 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -296,13 +296,20 @@ out:
 	return ret;
 }
 
-static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u32 mask)
+static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
+					    __u32 mask,
+					    unsigned int flags)
 {
 	__u32 oldmask;
 
 	spin_lock(&fsn_mark->lock);
-	oldmask = fsn_mark->mask;
-	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
+		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask & ~mask));
+	} else {
+		oldmask = fsn_mark->ignored_mask;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask & ~mask));
+	}
 	spin_unlock(&fsn_mark->lock);
 
 	if (!(oldmask & ~mask))
@@ -312,7 +319,8 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark, __u3
 }
 
 static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
-					 struct vfsmount *mnt, __u32 mask)
+					 struct vfsmount *mnt, __u32 mask,
+					 unsigned int flags)
 {
 	struct fsnotify_mark *fsn_mark = NULL;
 	__u32 removed;
@@ -321,7 +329,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 	if (!fsn_mark)
 		return -ENOENT;
 
-	removed = fanotify_mark_remove_from_mask(fsn_mark, mask);
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
 	if (removed & group->mask)
 		fsnotify_recalc_group_mask(group);
@@ -332,7 +340,8 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 }
 
 static int fanotify_remove_inode_mark(struct fsnotify_group *group,
-				      struct inode *inode, __u32 mask)
+				      struct inode *inode, __u32 mask,
+				      unsigned int flags)
 {
 	struct fsnotify_mark *fsn_mark = NULL;
 	__u32 removed;
@@ -341,7 +350,7 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	if (!fsn_mark)
 		return -ENOENT;
 
-	removed = fanotify_mark_remove_from_mask(fsn_mark, mask);
+	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
 
@@ -353,20 +362,28 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	return 0;
 }
 
-static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark, __u32 mask)
+static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+				       __u32 mask,
+				       unsigned int flags)
 {
 	__u32 oldmask;
 
 	spin_lock(&fsn_mark->lock);
-	oldmask = fsn_mark->mask;
-	fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
+	if (!(flags & FAN_MARK_IGNORED_MASK)) {
+		oldmask = fsn_mark->mask;
+		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
+	} else {
+		oldmask = fsn_mark->ignored_mask;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+	}
 	spin_unlock(&fsn_mark->lock);
 
 	return mask & ~oldmask;
 }
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
-				      struct vfsmount *mnt, __u32 mask)
+				      struct vfsmount *mnt, __u32 mask,
+				      unsigned int flags)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -386,7 +403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 			return ret;
 		}
 	}
-	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
 	if (added) {
 		if (added & ~group->mask)
@@ -398,7 +415,8 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
-				   struct inode *inode, __u32 mask)
+				   struct inode *inode, __u32 mask,
+				   unsigned int flags)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -420,7 +438,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 			return ret;
 		}
 	}
-	added = fanotify_mark_add_to_mask(fsn_mark, mask);
+	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
 	if (added) {
 		if (added & ~group->mask)
@@ -528,15 +546,15 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
 		if (flags & FAN_MARK_MOUNT)
-			ret = fanotify_add_vfsmount_mark(group, mnt, mask);
+			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
 		else
-			ret = fanotify_add_inode_mark(group, inode, mask);
+			ret = fanotify_add_inode_mark(group, inode, mask, flags);
 		break;
 	case FAN_MARK_REMOVE:
 		if (flags & FAN_MARK_MOUNT)
-			ret = fanotify_remove_vfsmount_mark(group, mnt, mask);
+			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
 		else
-			ret = fanotify_remove_inode_mark(group, inode, mask);
+			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.1


From e898386146deb49a0b45ff1887d9da149c003209 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fsnotify: clear ignored mask on modify

On inode modification we clear the ignored mask for all of the marks on the
inode.  This allows userspace to ignore accesses to inodes until there is
something different.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 7f14ddc..3ad940d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -140,6 +140,33 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
+void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
+{
+	struct fsnotify_mark *mark;
+	struct hlist_node *node;
+
+	if (!hlist_empty(&inode->i_fsnotify_marks)) {
+		spin_lock(&inode->i_lock);
+		hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) {
+			mark->ignored_mask = 0;
+		}
+		spin_unlock(&inode->i_lock);
+	}
+
+	if (data_is == FSNOTIFY_EVENT_PATH) {
+		struct vfsmount *mnt;
+
+		mnt = ((struct path *)data)->mnt;
+		if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
+			spin_lock(&mnt->mnt_root->d_lock);
+			hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+				mark->ignored_mask = 0;
+			}
+			spin_unlock(&mnt->mnt_root->d_lock);
+		}
+	}
+}
+
 static void send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 			  struct vfsmount *mnt, __u32 mask, void *data,
 			  int data_is, u32 cookie, const char *file_name,
@@ -170,6 +197,7 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
 
 	return (test_mask & mnt->mnt_fsnotify_mask);
 }
+
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
@@ -190,6 +218,9 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 	    list_empty(&fsnotify_vfsmount_groups))
                 return;
  
+	if (mask & FS_MODIFY)
+		__fsnotify_flush_ignored_mask(to_tell, data, data_is);
+
 	/* if none of the directed listeners or vfsmount listeners care */
 	if (!(test_mask & fsnotify_inode_mask) &&
 	    !(test_mask & fsnotify_vfsmount_mask))
-- 
cgit v1.1


From c908370fc1ac27fd7e1fc0f34c693047b26564ce Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fsnotify: allow ignored_mask to survive modification

Some inodes a group may want to never hear about a set of events even if
the inode is modified.  We add a new mark flag which indicates that these
marks should not have their ignored_mask cleared on modification.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 3ad940d..54d58d5 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -148,7 +148,8 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 	if (!hlist_empty(&inode->i_fsnotify_marks)) {
 		spin_lock(&inode->i_lock);
 		hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) {
-			mark->ignored_mask = 0;
+			if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+				mark->ignored_mask = 0;
 		}
 		spin_unlock(&inode->i_lock);
 	}
@@ -160,7 +161,8 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 		if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
 			spin_lock(&mnt->mnt_root->d_lock);
 			hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
-				mark->ignored_mask = 0;
+				if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+					mark->ignored_mask = 0;
 			}
 			spin_unlock(&mnt->mnt_root->d_lock);
 		}
-- 
cgit v1.1


From c9778a98e7440fb73e0d27b8155a688663a0d493 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:33 -0500
Subject: fanotify: allow ignored_masks to survive modify

Some users may want to truely ignore an inode even if it has been modified.
Say you are wanting a mount which contains a log file and you really don't
want any notification about that file.  This patch allows the listener to
do that.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index ad02d47..3e275f1 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -375,6 +375,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 	} else {
 		oldmask = fsn_mark->ignored_mask;
 		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
+			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
 	spin_unlock(&fsn_mark->lock);
 
-- 
cgit v1.1


From 4d92604cc90aa18bbbe0f6e23b7a9fdb612836d3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fanotify: clear all fanotify marks

fanotify listeners may want to clear all marks.  They may want to do this
to destroy all of their inode marks which have nothing but ignores.
Realistically this is useful for av vendors who update policy and want to
clear all of their cached allows.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 12 ++++++++++--
 fs/notify/inode_mark.c             |  8 ++++++++
 fs/notify/mark.c                   | 21 ++++++++++++++++-----
 fs/notify/vfsmount_mark.c          |  5 +++++
 4 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3e275f1..9fe760b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -514,9 +514,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 
 	if (flags & ~FAN_ALL_MARK_FLAGS)
 		return -EINVAL;
-	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
 	case FAN_MARK_ADD:
 	case FAN_MARK_REMOVE:
+	case FAN_MARK_FLUSH:
 		break;
 	default:
 		return -EINVAL;
@@ -545,7 +546,7 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	group = filp->private_data;
 
 	/* create/update an inode mark */
-	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
 	case FAN_MARK_ADD:
 		if (flags & FAN_MARK_MOUNT)
 			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
@@ -558,6 +559,13 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		else
 			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
 		break;
+	case FAN_MARK_FLUSH:
+		if (flags & FAN_MARK_MOUNT)
+			fsnotify_clear_vfsmount_marks_by_group(group);
+		else
+			fsnotify_clear_inode_marks_by_group(group);
+		fsnotify_recalc_group_mask(group);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 4292f9e..0c0a48b 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -104,6 +104,14 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 }
 
 /*
+ * Given a group clear all of the inode marks associated with that group.
+ */
+void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
+}
+
+/*
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
  */
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index cb1d822..1e824e6 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -270,18 +270,21 @@ err:
 }
 
 /*
- * Given a group, destroy all of the marks associated with that group.
+ * clear any marks in a group in which mark->flags & flags is true
  */
-void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
+					 unsigned int flags)
 {
 	struct fsnotify_mark *lmark, *mark;
 	LIST_HEAD(free_list);
 
 	spin_lock(&group->mark_lock);
 	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
-		list_add(&mark->free_g_list, &free_list);
-		list_del_init(&mark->g_list);
-		fsnotify_get_mark(mark);
+		if (mark->flags & flags) {
+			list_add(&mark->free_g_list, &free_list);
+			list_del_init(&mark->g_list);
+			fsnotify_get_mark(mark);
+		}
 	}
 	spin_unlock(&group->mark_lock);
 
@@ -291,6 +294,14 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 	}
 }
 
+/*
+ * Given a group, destroy all of the marks associated with that group.
+ */
+void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1);
+}
+
 void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 {
 	assert_spin_locked(&old->lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 1b61d0a..8f1aa02 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -51,6 +51,11 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 	}
 }
 
+void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
+}
+
 /*
  * Recalculate the mask of events relevant to a given vfsmount locked.
  */
-- 
cgit v1.1


From cb2d429faf2cae62d3c51e28099a181d5fe8c244 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fsnotify: add group priorities

This introduces an ordering to fsnotify groups.  With purely asynchronous
notification based "things" implementing fsnotify (inotify, dnotify) ordering
isn't particularly important.  But if people want to use fsnotify for the
basis of sycronous notification or blocking notification ordering becomes
important.

eg. A Hierarchical Storage Management listener would need to get its event
before an AV scanner could get its event (since the HSM would need to
bring the data in for the AV scanner to scan.)  Typically asynchronous notification
would want to run after the AV scanner made any relevant access decisions
so as to not send notification about an event that was denied.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c |  4 ++--
 fs/notify/group.c                  | 40 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9fe760b..84d3e20 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -463,8 +463,6 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 
 	if (event_f_flags)
 		return -EINVAL;
-	if (priority)
-		return -EINVAL;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -483,6 +481,8 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	group->priority = priority;
+
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
 		goto out_put_group;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 9e9eb40..ada913f 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -89,10 +89,27 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 
 void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
 {
+	struct fsnotify_group *group_iter;
+	unsigned int priority = group->priority;
+
 	mutex_lock(&fsnotify_grp_mutex);
 
-	if (!group->on_vfsmount_group_list)
+	if (!group->on_vfsmount_group_list) {
+		list_for_each_entry(group_iter, &fsnotify_vfsmount_groups,
+				    vfsmount_group_list) {
+			/* insert in front of this one? */
+			if (priority < group_iter->priority) {
+				/* list_add_tail() insert in front of group_iter */
+				list_add_tail_rcu(&group->inode_group_list,
+						  &group_iter->inode_group_list);
+				goto out;
+			}
+		}
+
+		/* apparently we need to be the last entry */
 		list_add_tail_rcu(&group->vfsmount_group_list, &fsnotify_vfsmount_groups);
+	}
+out:
 	group->on_vfsmount_group_list = 1;
 
 	mutex_unlock(&fsnotify_grp_mutex);
@@ -100,10 +117,27 @@ void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
 
 void fsnotify_add_inode_group(struct fsnotify_group *group)
 {
+	struct fsnotify_group *group_iter;
+	unsigned int priority = group->priority;
+
 	mutex_lock(&fsnotify_grp_mutex);
 
-	if (!group->on_inode_group_list)
+	/* add to global group list, priority 0 first, UINT_MAX last */
+	if (!group->on_inode_group_list) {
+		list_for_each_entry(group_iter, &fsnotify_inode_groups,
+				    inode_group_list) {
+			if (priority < group_iter->priority) {
+				/* list_add_tail() insert in front of group_iter */
+				list_add_tail_rcu(&group->inode_group_list,
+						  &group_iter->inode_group_list);
+				goto out;
+			}
+		}
+
+		/* apparently we need to be the last entry */
 		list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
+	}
+out:
 	group->on_inode_group_list = 1;
 
 	mutex_unlock(&fsnotify_grp_mutex);
@@ -226,6 +260,8 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	spin_lock_init(&group->mark_lock);
 	INIT_LIST_HEAD(&group->marks_list);
 
+	group->priority = UINT_MAX;
+
 	group->ops = ops;
 
 	return group;
-- 
cgit v1.1


From 6e5f77b32e9097a8a68a8d453799676cacf70cad Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fsnotify: intoduce a notification merge argument

Each group can define their own notification (and secondary_q) merge
function.  Inotify does tail drop, fanotify does matching and drop which
can actually allocate a completely new event.  But for fanotify to properly
deal with permissions events it needs to know the new event which was
ultimately added to the notification queue.  This patch just implements a
void ** argument which is passed to the merge function.  fanotify can use
this field to pass the new event back to higher layers.

Signed-off-by: Eric Paris <eparis@redhat.com>
for fanotify to properly deal with permissions events
---
 fs/notify/fanotify/fanotify.c        | 6 ++++--
 fs/notify/inotify/inotify_fsnotify.c | 6 ++++--
 fs/notify/inotify/inotify_user.c     | 2 +-
 fs/notify/notification.c             | 7 +++++--
 4 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 060b177..95a330d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -27,7 +27,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 	return false;
 }
 
-static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
+static int fanotify_merge(struct list_head *list,
+			  struct fsnotify_event *event,
+			  void **arg)
 {
 	struct fsnotify_event_holder *test_holder;
 	struct fsnotify_event *test_event;
@@ -92,7 +94,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
+	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, NULL);
 	/* -EEXIST means this event was merged with another, not that it was an error */
 	if (ret == -EEXIST)
 		ret = 0;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1d237e1..daa666a 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -67,7 +67,9 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
 	return false;
 }
 
-static int inotify_merge(struct list_head *list, struct fsnotify_event *event)
+static int inotify_merge(struct list_head *list,
+			 struct fsnotify_event *event,
+			 void **arg)
 {
 	struct fsnotify_event_holder *last_holder;
 	struct fsnotify_event *last_event;
@@ -114,7 +116,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	fsn_event_priv->group = group;
 	event_priv->wd = wd;
 
-	ret = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
+	ret = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge, NULL);
 	if (ret) {
 		inotify_free_event_priv(fsn_event_priv);
 		/* EEXIST says we tail matched, EOVERFLOW isn't something
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 19d2740..1ce71f5 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -525,7 +525,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 	fsn_event_priv->group = group;
 	event_priv->wd = i_mark->wd;
 
-	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
+	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL, NULL);
 	if (ret)
 		inotify_free_event_priv(fsn_event_priv);
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 7fc8d00..2d50a40 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -137,7 +137,10 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
  */
 int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
 			      struct fsnotify_event_private_data *priv,
-			      int (*merge)(struct list_head *, struct fsnotify_event *))
+			      int (*merge)(struct list_head *,
+					   struct fsnotify_event *,
+					   void **arg),
+			      void **arg)
 {
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
@@ -170,7 +173,7 @@ alloc_holder:
 	if (!list_empty(list) && merge) {
 		int ret;
 
-		ret = merge(list, event);
+		ret = merge(list, event, arg);
 		if (ret) {
 			mutex_unlock(&group->notification_mutex);
 			if (holder != &event->holder)
-- 
cgit v1.1


From 43ed7e16a8b47059d7f6ff67ba76f383a2421de3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fanotify: use merge argument to determine actual event added to queue

fanotify needs to know the actual event added to queues so it can be
correctly checked for return values from userspace.  To do this we need to
pass that information from the merger code back to the main even handling
routine.  Currently that information is unused, but it will be.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 95a330d..4feed86 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -27,6 +27,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 	return false;
 }
 
+/* Note, if we return an event in *arg that a reference is being held... */
 static int fanotify_merge(struct list_head *list,
 			  struct fsnotify_event *event,
 			  void **arg)
@@ -34,17 +35,22 @@ static int fanotify_merge(struct list_head *list,
 	struct fsnotify_event_holder *test_holder;
 	struct fsnotify_event *test_event;
 	struct fsnotify_event *new_event;
+	struct fsnotify_event **return_event = (struct fsnotify_event **)arg;
 	int ret = 0;
 
 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
 
+	*return_event = NULL;
+
 	/* and the list better be locked by something too! */
 
 	list_for_each_entry_reverse(test_holder, list, event_list) {
 		test_event = test_holder->event;
 		if (should_merge(test_event, event)) {
-			ret = -EEXIST;
+			fsnotify_get_event(test_event);
+			*return_event = test_event;
 
+			ret = -EEXIST;
 			/* if they are exactly the same we are done */
 			if (test_event->mask == event->mask)
 				goto out;
@@ -66,11 +72,14 @@ static int fanotify_merge(struct list_head *list,
 				goto out;
 			}
 
+			/* we didn't return the test_event, so drop that ref */
+			fsnotify_put_event(test_event);
+			/* the reference we return on new_event is from clone */
+			*return_event = new_event;
+
 			/* build new event and replace it on the list */
 			new_event->mask = (test_event->mask | event->mask);
 			fsnotify_replace_event(test_holder, new_event);
-			/* match ref from fsnotify_clone_event() */
-			fsnotify_put_event(new_event);
 
 			break;
 		}
@@ -82,7 +91,7 @@ out:
 static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	int ret;
-
+	struct fsnotify_event *used_event;
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
@@ -94,10 +103,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, NULL);
+	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, (void **)&used_event);
 	/* -EEXIST means this event was merged with another, not that it was an error */
 	if (ret == -EEXIST)
 		ret = 0;
+	if (used_event)
+		fsnotify_put_event(used_event);
 	return ret;
 }
 
-- 
cgit v1.1


From 59b0df211bd9699d7e0d01fcf9345a149f75b033 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 8 Feb 2010 12:53:52 -0500
Subject: fsnotify: use unsigned char * for dentry->d_name.name

fsnotify was using char * when it passed around the d_name.name string
internally but it is actually an unsigned char *.  This patch switches
fsnotify to use unsigned and should silence some pointer signess warnings
which have popped out of xfs.  I do not add -Wpointer-sign to the fsnotify
code as there are still issues with kstrdup and strlen which would pop
out needless warnings.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/namei.c               | 2 +-
 fs/notify/fsnotify.c     | 5 +++--
 fs/notify/notification.c | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 868d0cb..3479b17 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2635,7 +2635,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 {
 	int error;
 	int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
-	const char *old_name;
+	const unsigned char *old_name;
 
 	if (old_dentry->d_inode == new_dentry->d_inode)
  		return 0;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 54d58d5..c5adf83 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -171,7 +171,7 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 
 static void send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 			  struct vfsmount *mnt, __u32 mask, void *data,
-			  int data_is, u32 cookie, const char *file_name,
+			  int data_is, u32 cookie, const unsigned char *file_name,
 			  struct fsnotify_event **event)
 {
 	if (!group->ops->should_send_event(group, to_tell, mnt, mask,
@@ -206,7 +206,8 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
+void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+	      const unsigned char *file_name, u32 cookie)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 2d50a40..b35faaf 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -370,8 +370,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
  * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
-					     int data_type, const char *name, u32 cookie,
-					     gfp_t gfp)
+					     int data_type, const unsigned char *name,
+					     u32 cookie, gfp_t gfp)
 {
 	struct fsnotify_event *event;
 
-- 
cgit v1.1


From c4ec54b40d33f8016fea970a383cc584dd0e6019 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fsnotify: new fsnotify hooks and events types for access decisions

introduce a new fsnotify hook, fsnotify_perm(), which is called from the
security code.  This hook is used to allow fsnotify groups to make access
control decisions about events on the system.  We also must change the
generic fsnotify function to return an error code if we intend these hooks
to be in any way useful.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 47 ++++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index c5adf83..6682686 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -169,27 +169,22 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 	}
 }
 
-static void send_to_group(struct fsnotify_group *group, struct inode *to_tell,
-			  struct vfsmount *mnt, __u32 mask, void *data,
-			  int data_is, u32 cookie, const unsigned char *file_name,
-			  struct fsnotify_event **event)
+static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
+			 struct vfsmount *mnt, __u32 mask, void *data,
+			 int data_is, u32 cookie, const unsigned char *file_name,
+			 struct fsnotify_event **event)
 {
 	if (!group->ops->should_send_event(group, to_tell, mnt, mask,
 					   data, data_is))
-		return;
+		return 0;
 	if (!*event) {
 		*event = fsnotify_create_event(to_tell, mask, data,
 						data_is, file_name,
 						cookie, GFP_KERNEL);
-		/*
-		 * shit, we OOM'd and now we can't tell, maybe
-		 * someday someone else will want to do something
-		 * here
-		 */
 		if (!*event)
-			return;
+			return -ENOMEM;
 	}
-	group->ops->handle_event(group, *event);
+	return group->ops->handle_event(group, *event);
 }
 
 static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
@@ -206,20 +201,20 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
  * out to all of the registered fsnotify_group.  Those groups can then use the
  * notification event in whatever means they feel necessary.
  */
-void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
-	      const unsigned char *file_name, u32 cookie)
+int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
+	     const unsigned char *file_name, u32 cookie)
 {
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
 	struct vfsmount *mnt = NULL;
-	int idx;
+	int idx, ret = 0;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	/* if no fsnotify listeners, nothing to do */
 	if (list_empty(&fsnotify_inode_groups) &&
 	    list_empty(&fsnotify_vfsmount_groups))
-                return;
+		return 0;
  
 	if (mask & FS_MODIFY)
 		__fsnotify_flush_ignored_mask(to_tell, data, data_is);
@@ -227,7 +222,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	/* if none of the directed listeners or vfsmount listeners care */
 	if (!(test_mask & fsnotify_inode_mask) &&
 	    !(test_mask & fsnotify_vfsmount_mask))
-                return;
+		return 0;
  
 	if (data_is == FSNOTIFY_EVENT_PATH)
 		mnt = ((struct path *)data)->mnt;
@@ -236,7 +231,7 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	 * listeners list cares, nothing to do */
 	if (!(test_mask & to_tell->i_fsnotify_mask) &&
 	    !needed_by_vfsmount(test_mask, mnt))
-                return;
+		return 0;
 
 	/*
 	 * SRCU!!  the groups list is very very much read only and the path is
@@ -248,20 +243,24 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	if (test_mask & to_tell->i_fsnotify_mask) {
 		list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
 			if (test_mask & group->mask) {
-				send_to_group(group, to_tell, NULL, mask, data, data_is,
-					      cookie, file_name, &event);
+				ret = send_to_group(group, to_tell, NULL, mask, data, data_is,
+						    cookie, file_name, &event);
+				if (ret)
+					goto out;
 			}
 		}
 	}
 	if (needed_by_vfsmount(test_mask, mnt)) {
 		list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
 			if (test_mask & group->mask) {
-				send_to_group(group, to_tell, mnt, mask, data, data_is,
-					      cookie, file_name, &event);
+				ret = send_to_group(group, to_tell, mnt, mask, data, data_is,
+						    cookie, file_name, &event);
+				if (ret)
+					goto out;
 			}
 		}
 	}
-
+out:
 	srcu_read_unlock(&fsnotify_grp_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
@@ -269,6 +268,8 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	 */
 	if (event)
 		fsnotify_put_event(event);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(fsnotify);
 
-- 
cgit v1.1


From 9e66e4233db9c7e31e9ee706be2c9ddd54cf99b3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fanotify: permissions and blocking

This is the backend work needed for fanotify to support the new
FS_OPEN_PERM and FS_ACCESS_PERM fsnotify events.  This is done using the
new fsnotify secondary queue.  No userspace interface is provided actually
respond to or request these events.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/Kconfig         | 14 ++++++++++
 fs/notify/fanotify/fanotify.c      | 54 +++++++++++++++++++++++++++++++++++---
 fs/notify/fanotify/fanotify_user.c |  5 ++++
 3 files changed, 69 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 668e5df..566de30 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -10,3 +10,17 @@ config FANOTIFY
 	   the event.
 
 	   If unsure, say Y.
+
+config FANOTIFY_ACCESS_PERMISSIONS
+	bool "fanotify permissions checking"
+	depends on FANOTIFY
+	depends on SECURITY
+	default n
+	---help---
+	   Say Y here is you want fanotify listeners to be able to make permissions
+	   decisions concerning filesystem events.  This is used by some fanotify
+	   listeners which need to scan files before allowing the system access to
+	   use those files.  This is used by some anti-malware vendors and by some
+	   hierarchical storage managent systems.
+
+	   If unsure, say N.
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 4feed86..52d0a55 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -2,9 +2,12 @@
 #include <linux/fdtable.h>
 #include <linux/fsnotify_backend.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h> /* UINT_MAX */
 #include <linux/mount.h>
+#include <linux/sched.h>
 #include <linux/types.h>
+#include <linux/wait.h>
 
 static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 {
@@ -88,10 +91,37 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static int fanotify_get_response_from_access(struct fsnotify_group *group,
+					     struct fsnotify_event *event)
+{
+	int ret;
+
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
+	wait_event(group->fanotify_data.access_waitq, event->response);
+
+	/* userspace responded, convert to something usable */
+	spin_lock(&event->lock);
+	switch (event->response) {
+	case FAN_ALLOW:
+		ret = 0;
+		break;
+	case FAN_DENY:
+	default:
+		ret = -EPERM;
+	}
+	event->response = 0;
+	spin_unlock(&event->lock);
+
+	return ret;
+}
+#endif
+
 static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
 	int ret;
-	struct fsnotify_event *used_event;
+	struct fsnotify_event *notify_event = NULL;
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
@@ -100,15 +130,31 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
 	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
 	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
+	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge, (void **)&used_event);
+	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge,
+					(void **)&notify_event);
 	/* -EEXIST means this event was merged with another, not that it was an error */
 	if (ret == -EEXIST)
 		ret = 0;
-	if (used_event)
-		fsnotify_put_event(used_event);
+	if (ret)
+		goto out;
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (event->mask & FAN_ALL_PERM_EVENTS) {
+		/* if we merged we need to wait on the new event */
+		if (notify_event)
+			event = notify_event;
+		ret = fanotify_get_response_from_access(group, event);
+	}
+#endif
+
+out:
+	if (notify_event)
+		fsnotify_put_event(notify_event);
 	return ret;
 }
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 84d3e20..09d9bdb 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -482,6 +482,11 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 		return PTR_ERR(group);
 
 	group->priority = priority;
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	mutex_init(&group->fanotify_data.access_mutex);
+	init_waitqueue_head(&group->fanotify_data.access_waitq);
+	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+#endif
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
-- 
cgit v1.1


From b2d879096ac799722e6017ee82c0586f0d101c9c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 17 Dec 2009 21:24:34 -0500
Subject: fanotify: userspace interface for permission responses

fanotify groups need to respond to events which include permissions types.
To do so groups will send a response using write() on the fanotify_fd they
have open.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c      |   3 +
 fs/notify/fanotify/fanotify_user.c | 182 +++++++++++++++++++++++++++++++++++--
 2 files changed, 179 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 52d0a55..bbcfccd 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -114,6 +114,9 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 	event->response = 0;
 	spin_unlock(&event->lock);
 
+	pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
+		 group, event, ret);
+	
 	return ret;
 }
 #endif
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 09d9bdb..87f0be8 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -18,6 +18,13 @@
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
+static struct kmem_cache *fanotify_response_event_cache __read_mostly;
+
+struct fanotify_response_event {
+	struct list_head list;
+	__s32 fd;
+	struct fsnotify_event *event;
+};
 
 /*
  * Get an fsnotify notification event if one exists and is small
@@ -110,23 +117,152 @@ static ssize_t fill_event_metadata(struct fsnotify_group *group,
 	return metadata->fd;
 }
 
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group,
+						  __s32 fd)
+{
+	struct fanotify_response_event *re, *return_re = NULL;
+
+	mutex_lock(&group->fanotify_data.access_mutex);
+	list_for_each_entry(re, &group->fanotify_data.access_list, list) {
+		if (re->fd != fd)
+			continue;
+
+		list_del_init(&re->list);
+		return_re = re;
+		break;
+	}
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	pr_debug("%s: found return_re=%p\n", __func__, return_re);
+
+	return return_re;
+}
+
+static int process_access_response(struct fsnotify_group *group,
+				   struct fanotify_response *response_struct)
+{
+	struct fanotify_response_event *re;
+	__s32 fd = response_struct->fd;
+	__u32 response = response_struct->response;
+
+	pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group,
+		 fd, response);
+	/*
+	 * make sure the response is valid, if invalid we do nothing and either
+	 * userspace can send a valid responce or we will clean it up after the
+	 * timeout
+	 */
+	switch (response) {
+	case FAN_ALLOW:
+	case FAN_DENY:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (fd < 0)
+		return -EINVAL;
+
+	re = dequeue_re(group, fd);
+	if (!re)
+		return -ENOENT;
+
+	re->event->response = response;
+
+	wake_up(&group->fanotify_data.access_waitq);
+
+	kmem_cache_free(fanotify_response_event_cache, re);
+
+	return 0;
+}
+
+static int prepare_for_access_response(struct fsnotify_group *group,
+				       struct fsnotify_event *event,
+				       __s32 fd)
+{
+	struct fanotify_response_event *re;
+
+	if (!(event->mask & FAN_ALL_PERM_EVENTS))
+		return 0;
+
+	re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL);
+	if (!re)
+		return -ENOMEM;
+
+	re->event = event;
+	re->fd = fd;
+
+	mutex_lock(&group->fanotify_data.access_mutex);
+	list_add_tail(&re->list, &group->fanotify_data.access_list);
+	mutex_unlock(&group->fanotify_data.access_mutex);
+
+	return 0;
+}
+
+static void remove_access_response(struct fsnotify_group *group,
+				   struct fsnotify_event *event,
+				   __s32 fd)
+{
+	struct fanotify_response_event *re;
+
+	if (!(event->mask & FAN_ALL_PERM_EVENTS))
+		return;
+
+	re = dequeue_re(group, fd);
+	if (!re)
+		return;
+
+	BUG_ON(re->event != event);
+
+	kmem_cache_free(fanotify_response_event_cache, re);
+
+	return;
+}
+#else
+static int prepare_for_access_response(struct fsnotify_group *group,
+				       struct fsnotify_event *event,
+				       __s32 fd)
+{
+	return 0;
+}
+
+static void remove_access_response(struct fsnotify_group *group,
+				   struct fsnotify_event *event,
+				   __s32 fd)
+{
+	return 0;
+}
+#endif
+
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
 				  struct fsnotify_event *event,
 				  char __user *buf)
 {
 	struct fanotify_event_metadata fanotify_event_metadata;
-	int ret;
+	int fd, ret;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
-	if (ret < 0)
-		return ret;
+	fd = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (fd < 0)
+		return fd;
+
+	ret = prepare_for_access_response(group, event, fd);
+	if (ret)
+		goto out_close_fd;
 
+	ret = -EFAULT;
 	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
-		return -EFAULT;
+		goto out_kill_access_response;
 
 	return FAN_EVENT_METADATA_LEN;
+
+out_kill_access_response:
+	remove_access_response(group, event, fd);
+out_close_fd:
+	sys_close(fd);
+	return ret;
 }
 
 /* intofiy userspace file descriptor functions */
@@ -197,6 +333,33 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 	return ret;
 }
 
+static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
+{
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	struct fanotify_response response = { .fd = -1, .response = -1 };
+	struct fsnotify_group *group;
+	int ret;
+
+	group = file->private_data;
+
+	if (count > sizeof(response))
+		count = sizeof(response);
+
+	pr_debug("%s: group=%p count=%zu\n", __func__, group, count);
+
+	if (copy_from_user(&response, buf, count))
+		return -EFAULT;
+
+	ret = process_access_response(group, &response);
+	if (ret < 0)
+		count = ret;
+
+	return count;
+#else
+	return -EINVAL;
+#endif
+}
+
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
@@ -237,6 +400,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
 static const struct file_operations fanotify_fops = {
 	.poll		= fanotify_poll,
 	.read		= fanotify_read,
+	.write		= fanotify_write,
 	.fasync		= NULL,
 	.release	= fanotify_release,
 	.unlocked_ioctl	= fanotify_ioctl,
@@ -470,7 +634,7 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 	if (flags & ~FAN_ALL_INIT_FLAGS)
 		return -EINVAL;
 
-	f_flags = (O_RDONLY | FMODE_NONOTIFY);
+	f_flags = O_RDWR | FMODE_NONOTIFY;
 	if (flags & FAN_CLOEXEC)
 		f_flags |= O_CLOEXEC;
 	if (flags & FAN_NONBLOCK)
@@ -527,7 +691,11 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	default:
 		return -EINVAL;
 	}
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
+#else
 	if (mask & ~(FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD))
+#endif
 		return -EINVAL;
 
 	filp = fget_light(fanotify_fd, &fput_needed);
@@ -600,6 +768,8 @@ SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
 static int __init fanotify_user_setup(void)
 {
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
+	fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
+						   SLAB_PANIC);
 
 	return 0;
 }
-- 
cgit v1.1


From 8860f060e473dce1a0873d92105d536f72b05908 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 23 Dec 2009 00:10:25 -0500
Subject: fanotify: do not return 0 in a void function

remove_access_response() is supposed to have a void return, but was
returning 0;

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 87f0be8..7c869fa 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -231,7 +231,7 @@ static void remove_access_response(struct fsnotify_group *group,
 				   struct fsnotify_event *event,
 				   __s32 fd)
 {
-	return 0;
+	return;
 }
 #endif
 
-- 
cgit v1.1


From 98b5c10d320adfa250c1c18f3ccaec2f78e5e11d Mon Sep 17 00:00:00 2001
From: Jean-Christophe Dubois <jcd@tribudubois.net>
Date: Tue, 23 Mar 2010 08:08:09 +0100
Subject: fanotify: do not always return 0 in fsnotify

It seems to me you are always returning 0 in fsnotify, when you should return
the error (EPERM) returned by fanotify.

Signed-off-by: Jean-Christophe DUBOIS <jcd@tribudubois.net>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 6682686..9810bab 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -269,7 +269,7 @@ out:
 	if (event)
 		fsnotify_put_event(event);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(fsnotify);
 
-- 
cgit v1.1


From b31d397e430a90cbe9d3656929a7d5f96e986666 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 21 Apr 2010 16:49:38 -0400
Subject: fsnotify: call iput on inodes when no longer marked

fsnotify takes an igrab on an inode when it adds a mark.  The code was
supposed to drop the reference when the mark was removed but didn't.
This caused problems when an fs was unmounted because those inodes would
clearly not be gone.  Thus resulting in the most devistating of messages:

VFS: Busy inodes after unmount of loop0. Self-destruct in 5 seconds.
>>> Have a nice day...

Jiri Slaby bisected the problem to a patch in the fsnotify tree.  The
code snippets below show my stupidity quite clearly.

void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
{
	...
	mark->inode = NULL;
	...
}

void fsnotify_destroy_mark(struct fsnotify_mark *mark)
{
	struct inode *inode = NULL;
	...
	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
		fsnotify_destroy_inode_mark(mark);
		inode = mark->i.inode;
	}
	...
	if (inode)
		iput(inode);
	...
}

Obviously the intent was to capture the inode before it was set to NULL in
fsnotify_destory_inode_mark() so we wouldn't be leaking inodes forever.
Instead we leaked them (and exploded on umount)

Reported-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 1e824e6..8f3b0e7 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -133,8 +133,8 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 	spin_lock(&group->mark_lock);
 
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
-		fsnotify_destroy_inode_mark(mark);
 		inode = mark->i.inode;
+		fsnotify_destroy_inode_mark(mark);
 	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
 		fsnotify_destroy_vfsmount_mark(mark);
 	else
-- 
cgit v1.1


From 0a24887afacefbe2c44e0eee4150b43959a60665 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 14 May 2010 15:35:21 -0500
Subject: inotify_user.c: make local symbol static

The symbol inotify_max_user_watches is not used outside this
file and should be static.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: John McCutchan <john@johnmccutchan.com>
Cc: Robert Love <rlove@rlove.org>
Cc: Eric Paris <eparis@parisplace.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1ce71f5..44aeb0f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,7 +46,7 @@
 /* these are configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_user_instances __read_mostly;
 static int inotify_max_queued_events __read_mostly;
-int inotify_max_user_watches __read_mostly;
+static int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 struct kmem_cache *event_priv_cachep __read_mostly;
-- 
cgit v1.1


From 269ed32a9ce00132b9372e9c00014532e054d6b2 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 27 May 2010 09:29:37 -0400
Subject: fanotify: default Kconfig to n

fanotify has default to y in linux-next since it's inception but default to
n in the final push to Linus.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 566de30..3ac36b7b 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -2,7 +2,7 @@ config FANOTIFY
 	bool "Filesystem wide access notification"
 	select FSNOTIFY
 	select ANON_INODES
-	default y
+	default n
 	---help---
 	   Say Y here to enable fanotify suport.  fanotify is a file access
 	   notification system which differs from inotify in that it sends
-- 
cgit v1.1


From 08ae89380a8210a9965d04083e1de78cb8bca4b1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 27 May 2010 09:41:40 -0400
Subject: fanotify: drop the useless priority argument

The priority argument in fanotify is useless.  Kill it.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c |  8 +++-----
 fs/notify/group.c                  | 10 +++-------
 2 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7c869fa..6641020 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -616,14 +616,13 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 }
 
 /* fanotify syscalls */
-SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
-		unsigned int, priority)
+SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 {
 	struct fsnotify_group *group;
 	int f_flags, fd;
 
-	pr_debug("%s: flags=%d event_f_flags=%d priority=%d\n",
-		__func__, flags, event_f_flags, priority);
+	pr_debug("%s: flags=%d event_f_flags=%d\n",
+		__func__, flags, event_f_flags);
 
 	if (event_f_flags)
 		return -EINVAL;
@@ -645,7 +644,6 @@ SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
-	group->priority = priority;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index ada913f..7ac65ed 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -90,7 +90,6 @@ void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
 {
 	struct fsnotify_group *group_iter;
-	unsigned int priority = group->priority;
 
 	mutex_lock(&fsnotify_grp_mutex);
 
@@ -98,7 +97,7 @@ void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
 		list_for_each_entry(group_iter, &fsnotify_vfsmount_groups,
 				    vfsmount_group_list) {
 			/* insert in front of this one? */
-			if (priority < group_iter->priority) {
+			if (group < group_iter) {
 				/* list_add_tail() insert in front of group_iter */
 				list_add_tail_rcu(&group->inode_group_list,
 						  &group_iter->inode_group_list);
@@ -118,15 +117,14 @@ out:
 void fsnotify_add_inode_group(struct fsnotify_group *group)
 {
 	struct fsnotify_group *group_iter;
-	unsigned int priority = group->priority;
 
 	mutex_lock(&fsnotify_grp_mutex);
 
-	/* add to global group list, priority 0 first, UINT_MAX last */
+	/* add to global group list */
 	if (!group->on_inode_group_list) {
 		list_for_each_entry(group_iter, &fsnotify_inode_groups,
 				    inode_group_list) {
-			if (priority < group_iter->priority) {
+			if (group < group_iter) {
 				/* list_add_tail() insert in front of group_iter */
 				list_add_tail_rcu(&group->inode_group_list,
 						  &group_iter->inode_group_list);
@@ -260,8 +258,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	spin_lock_init(&group->mark_lock);
 	INIT_LIST_HEAD(&group->marks_list);
 
-	group->priority = UINT_MAX;
-
 	group->ops = ops;
 
 	return group;
-- 
cgit v1.1


From e4e047a22058f48544b16728e0f15a3fc12bb0cf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 20 May 2010 01:36:28 +1000
Subject: fsnotify: update gfp/slab.h includes

Implicit slab.h inclusion via percpu.h is about to go away.  Make sure
gfp.h or slab.h is included as necessary.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 1 +
 fs/notify/vfsmount_mark.c          | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6641020..da01091 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -10,6 +10,7 @@
 #include <linux/poll.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 8f1aa02..ec580a2 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/mutex.h>
-#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/writeback.h> /* for inode_lock */
 
-- 
cgit v1.1


From ff311008ab8d2f2cfdbbefd407d1b05acc8164b2 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: inotify: fix inotify oneshot support

During the large inotify rewrite to fsnotify I completely dropped support
for IN_ONESHOT.  Reimplement that support.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_fsnotify.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index daa666a..388a150 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -126,6 +126,9 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 			ret = 0;
 	}
 
+	if (fsn_mark->mask & IN_ONESHOT)
+		fsnotify_destroy_mark(fsn_mark);
+
 	/*
 	 * If we hold the fsn_mark until after the event is on the queue
 	 * IN_IGNORED won't be able to pass this event in the queue
-- 
cgit v1.1


From 611da04f7a31b2208e838be55a42c7a1310ae321 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: inotify: send IN_UNMOUNT events

Since the .31 or so notify rewrite inotify has not sent events about
inodes which are unmounted.  This patch restores those events.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 44aeb0f..f381daf 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -90,8 +90,11 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
 {
 	__u32 mask;
 
-	/* everything should accept their own ignored and cares about children */
-	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
+	/*
+	 * everything should accept their own ignored, cares about children,
+	 * and should receive events when the inode is unmounted
+	 */
+	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
 
 	/* mask off the flags used to open the fd */
 	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
-- 
cgit v1.1


From 8c1934c8d70b22ca8333b216aec6c7d09fdbd6a6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: inotify: allow users to request not to recieve events on unlinked
 children

An inotify watch on a directory will send events for children even if those
children have been unlinked.  This patch add a new inotify flag IN_EXCL_UNLINK
which allows a watch to specificy they don't care about unlinked children.
This should fix performance problems seen by tasks which add a watch to
/tmp and then are overrun with events when other processes are reading and
writing to unlinked files they created in /tmp.

https://bugzilla.kernel.org/show_bug.cgi?id=16296

Requested-by: Matthias Clasen <mclasen@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_fsnotify.c | 9 +++++++++
 fs/notify/inotify/inotify_user.c     | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 388a150..9d332e7 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -22,6 +22,7 @@
  * General Public License for more details.
  */
 
+#include <linux/dcache.h> /* d_unlinked */
 #include <linux/fs.h> /* struct inode */
 #include <linux/fsnotify_backend.h>
 #include <linux/inotify.h>
@@ -157,6 +158,14 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	mask = (mask & ~FS_EVENT_ON_CHILD);
 	send = (fsn_mark->mask & mask);
 
+	if (send && (fsn_mark->mask & FS_EXCL_UNLINK) &&
+	    (data_type == FSNOTIFY_EVENT_PATH)) {
+		struct path *path  = data;
+
+		if (d_unlinked(path->dentry))
+			send = false;
+	}
+
 	/* find took a reference */
 	fsnotify_put_mark(fsn_mark);
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index f381daf..dfc80f7 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -97,7 +97,7 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
 	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
 
 	/* mask off the flags used to open the fd */
-	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
+	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
 
 	return mask;
 }
-- 
cgit v1.1


From f874e1ac21d7708464dc656a10312542c54719f1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: inotify: force inotify and fsnotify use same bits

inotify uses bits called IN_* and fsnotify uses bits called FS_*.  These
need to line up.  This patch adds build time checks to make sure noone can
change these bits so they are not the same.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index dfc80f7..c8203ce 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -839,6 +839,27 @@ out:
  */
 static int __init inotify_user_setup(void)
 {
+	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
+	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
+	BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
+	BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+	BUILD_BUG_ON(IN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(IN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(IN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
+	BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
+	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
+	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
+	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
+	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+
+	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
+
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
 
-- 
cgit v1.1


From 44b350fc23e36e95c8e042b7ded66217ea2b9d72 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: inotify: Fix mask checks

The mask checks in inotify_update_existing_watch() and
inotify_new_watch() are useless because inotify_arg_to_mask() sets
FS_IN_IGNORED and FS_EVENT_ON_CHILD bits anyway.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c8203ce..7dc940c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -566,7 +566,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
-	if (unlikely(!mask))
+	if (unlikely(!(mask & IN_ALL_EVENTS)))
 		return -EINVAL;
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
@@ -624,7 +624,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
 
 	/* don't allow invalid bits: we don't want flags set */
 	mask = inotify_arg_to_mask(arg);
-	if (unlikely(!mask))
+	if (unlikely(!(mask & IN_ALL_EVENTS)))
 		return -EINVAL;
 
 	tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
-- 
cgit v1.1


From 20dee624ca40db227aa70cb3f44d2d6cb4fdbab4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: fsnotify: check to make sure all fsnotify bits are unique

This patch adds a check to make sure that all fsnotify bits are unique and we
cannot accidentally use the same bit for 2 different fsnotify event types.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9810bab..076c10e 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -275,6 +275,8 @@ EXPORT_SYMBOL_GPL(fsnotify);
 
 static __init int fsnotify_init(void)
 {
+	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+
 	return init_srcu_struct(&fsnotify_grp_srcu);
 }
 subsys_initcall(fsnotify_init);
-- 
cgit v1.1


From 80af2588676483ac4e998b5092e9d008dab3ab62 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: fanotify: groups can specify their f_flags for new fd

Currently fanotify fds opened for thier listeners are done with f_flags
equal to O_RDONLY | O_LARGEFILE.  This patch instead takes f_flags from the
fanotify_init syscall and uses those when opening files in the context of
the listener.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify_user.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index da01091..7182c83 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -81,7 +81,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	 * are NULL;  That's fine, just don't call dentry open */
 	if (dentry && mnt)
 		new_file = dentry_open(dentry, mnt,
-				       O_RDONLY | O_LARGEFILE | FMODE_NONOTIFY,
+				       group->fanotify_data.f_flags | FMODE_NONOTIFY,
 				       current_cred());
 	else
 		new_file = ERR_PTR(-EOVERFLOW);
@@ -625,9 +625,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	pr_debug("%s: flags=%d event_f_flags=%d\n",
 		__func__, flags, event_f_flags);
 
-	if (event_f_flags)
-		return -EINVAL;
-
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
@@ -645,6 +642,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	group->fanotify_data.f_flags = event_f_flags;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
-- 
cgit v1.1


From 5ba08e2eeb06355f66ed62ae97bb87d145973a9a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: fsnotify: add pr_debug throughout

It can be hard to debug fsnotify since there are so few printks.  Use
pr_debug to allow for dynamic debugging.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c                 |  4 ++++
 fs/notify/inotify/inotify_fsnotify.c |  6 ++++++
 fs/notify/inotify/inotify_user.c     | 10 ++++++++++
 fs/notify/notification.c             | 13 +++++++++++++
 4 files changed, 33 insertions(+)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 076c10e..72aae40 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -174,6 +174,10 @@ static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 			 int data_is, u32 cookie, const unsigned char *file_name,
 			 struct fsnotify_event **event)
 {
+	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_is=%d"
+		 " cookie=%d event=%p\n", __func__, group, to_tell, mnt,
+		 mask, data, data_is, cookie, *event);
+
 	if (!group->ops->should_send_event(group, to_tell, mnt, mask,
 					   data, data_is))
 		return 0;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9d332e7..906b727 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -98,6 +98,9 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	struct fsnotify_event_private_data *fsn_event_priv;
 	int wd, ret;
 
+	pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
+		 event, event->to_tell, event->mask);
+
 	to_tell = event->to_tell;
 
 	fsn_mark = fsnotify_find_inode_mark(group, to_tell);
@@ -151,6 +154,9 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	struct fsnotify_mark *fsn_mark;
 	bool send;
 
+	pr_debug("%s: group=%p inode=%p mask=%x data=%p data_type=%d\n",
+		 __func__, group, inode, mask, data, data_type);
+
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark)
 		return false;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 7dc940c..1068e1c 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -141,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 
 	event = fsnotify_peek_notify_event(group);
 
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
 	if (event->name_len)
 		event_size += roundup(event->name_len + 1, event_size);
 
@@ -170,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	size_t event_size = sizeof(struct inotify_event);
 	size_t name_len = 0;
 
+	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+
 	/* we get the inotify watch descriptor from the event private data */
 	spin_lock(&event->lock);
 	fsn_priv = fsnotify_remove_priv_from_event(group, event);
@@ -242,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 		kevent = get_one_event(group, count);
 		mutex_unlock(&group->notification_mutex);
 
+		pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
+
 		if (kevent) {
 			ret = PTR_ERR(kevent);
 			if (IS_ERR(kevent))
@@ -286,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
 	struct fsnotify_group *group = file->private_data;
 	struct user_struct *user = group->inotify_data.user;
 
+	pr_debug("%s: group=%p\n", __func__, group);
+
 	fsnotify_clear_marks_by_group(group);
 
 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
@@ -309,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
 	group = file->private_data;
 	p = (void __user *) arg;
 
+	pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
+
 	switch (cmd) {
 	case FIONREAD:
 		mutex_lock(&group->notification_mutex);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index b35faaf..e6dde25 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -87,6 +87,8 @@ void fsnotify_put_event(struct fsnotify_event *event)
 		return;
 
 	if (atomic_dec_and_test(&event->refcnt)) {
+		pr_debug("%s: event=%p\n", __func__, event);
+
 		if (event->data_type == FSNOTIFY_EVENT_PATH)
 			path_put(&event->path);
 
@@ -146,6 +148,8 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
 	struct list_head *list = &group->notification_list;
 	int rc = 0;
 
+	pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
+
 	/*
 	 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
 	 * Check if we expect to be able to use that holder.  If not alloc a new
@@ -222,6 +226,8 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
 
 	BUG_ON(!mutex_is_locked(&group->notification_mutex));
 
+	pr_debug("%s: group=%p\n", __func__, group);
+
 	holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
 
 	event = holder->event;
@@ -307,6 +313,8 @@ int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
 		SPINLOCK_NEW,
 	};
 
+	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
+
 	/*
 	 * if the new_event's embedded holder is in use someone
 	 * screwed up and didn't give us a clean new event.
@@ -340,6 +348,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
 	if (!event)
 		return NULL;
 
+	pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
+
 	memcpy(event, old_event, sizeof(*event));
 	initialize_event(event);
 
@@ -379,6 +389,9 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	if (!event)
 		return NULL;
 
+	pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
+		 __func__, event, to_tell, mask, data, data_type);
+
 	initialize_event(event);
 
 	if (name) {
-- 
cgit v1.1


From f70ab54cc6c3907b0727ba332b3976f80f3846d0 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: fsnotify: fsnotify_add_notify_event should return an event

Rather than the horrific void ** argument and such just to pass the
fanotify_merge event back to the caller of fsnotify_add_notify_event() have
those things return an event if it was different than the event suggusted to
be added.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c        | 103 ++++++++++++++++-------------------
 fs/notify/inotify/inotify_fsnotify.c |  28 +++++-----
 fs/notify/inotify/inotify_user.c     |  11 +++-
 fs/notify/notification.c             |  42 +++++++++-----
 4 files changed, 96 insertions(+), 88 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index bbcfccd..f3c40c0 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -30,65 +30,58 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 	return false;
 }
 
-/* Note, if we return an event in *arg that a reference is being held... */
-static int fanotify_merge(struct list_head *list,
-			  struct fsnotify_event *event,
-			  void **arg)
+/* and the list better be locked by something too! */
+static struct fsnotify_event *fanotify_merge(struct list_head *list,
+					     struct fsnotify_event *event)
 {
 	struct fsnotify_event_holder *test_holder;
-	struct fsnotify_event *test_event;
+	struct fsnotify_event *test_event = NULL;
 	struct fsnotify_event *new_event;
-	struct fsnotify_event **return_event = (struct fsnotify_event **)arg;
-	int ret = 0;
 
 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
 
-	*return_event = NULL;
-
-	/* and the list better be locked by something too! */
 
 	list_for_each_entry_reverse(test_holder, list, event_list) {
-		test_event = test_holder->event;
-		if (should_merge(test_event, event)) {
-			fsnotify_get_event(test_event);
-			*return_event = test_event;
-
-			ret = -EEXIST;
-			/* if they are exactly the same we are done */
-			if (test_event->mask == event->mask)
-				goto out;
-
-			/*
-			 * if the refcnt == 1 this is the only queue
-			 * for this event and so we can update the mask
-			 * in place.
-			 */
-			if (atomic_read(&test_event->refcnt) == 1) {
-				test_event->mask |= event->mask;
-				goto out;
-			}
-
-			/* can't allocate memory, merge was no possible */
-			new_event = fsnotify_clone_event(test_event);
-			if (unlikely(!new_event)) {
-				ret = 0;
-				goto out;
-			}
-
-			/* we didn't return the test_event, so drop that ref */
-			fsnotify_put_event(test_event);
-			/* the reference we return on new_event is from clone */
-			*return_event = new_event;
-
-			/* build new event and replace it on the list */
-			new_event->mask = (test_event->mask | event->mask);
-			fsnotify_replace_event(test_holder, new_event);
-
+		if (should_merge(test_holder->event, event)) {
+			test_event = test_holder->event;
 			break;
 		}
 	}
-out:
-	return ret;
+
+	if (!test_event)
+		return NULL;
+
+	fsnotify_get_event(test_event);
+
+	/* if they are exactly the same we are done */
+	if (test_event->mask == event->mask)
+		return test_event;
+
+	/*
+	 * if the refcnt == 2 this is the only queue
+	 * for this event and so we can update the mask
+	 * in place.
+	 */
+	if (atomic_read(&test_event->refcnt) == 2) {
+		test_event->mask |= event->mask;
+		return test_event;
+	}
+
+	new_event = fsnotify_clone_event(test_event);
+
+	/* done with test_event */
+	fsnotify_put_event(test_event);
+
+	/* couldn't allocate memory, merge was not possible */
+	if (unlikely(!new_event))
+		return ERR_PTR(-ENOMEM);
+
+	/* build new event and replace it on the list */
+	new_event->mask = (test_event->mask | event->mask);
+	fsnotify_replace_event(test_holder, new_event);
+
+	/* we hold a reference on new_event from clone_event */
+	return new_event;
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -123,7 +116,7 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
 static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
 {
-	int ret;
+	int ret = 0;
 	struct fsnotify_event *notify_event = NULL;
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
@@ -138,13 +131,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	ret = fsnotify_add_notify_event(group, event, NULL, fanotify_merge,
-					(void **)&notify_event);
-	/* -EEXIST means this event was merged with another, not that it was an error */
-	if (ret == -EEXIST)
-		ret = 0;
-	if (ret)
-		goto out;
+	notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
+	if (IS_ERR(notify_event))
+		return PTR_ERR(notify_event);
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	if (event->mask & FAN_ALL_PERM_EVENTS) {
@@ -155,9 +144,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_e
 	}
 #endif
 
-out:
 	if (notify_event)
 		fsnotify_put_event(notify_event);
+
 	return ret;
 }
 
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 906b727..73a1106b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -68,13 +68,11 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
 	return false;
 }
 
-static int inotify_merge(struct list_head *list,
-			 struct fsnotify_event *event,
-			 void **arg)
+static struct fsnotify_event *inotify_merge(struct list_head *list,
+					    struct fsnotify_event *event)
 {
 	struct fsnotify_event_holder *last_holder;
 	struct fsnotify_event *last_event;
-	int ret = 0;
 
 	/* and the list better be locked by something too */
 	spin_lock(&event->lock);
@@ -82,11 +80,13 @@ static int inotify_merge(struct list_head *list,
 	last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
 	last_event = last_holder->event;
 	if (event_compare(last_event, event))
-		ret = -EEXIST;
+		fsnotify_get_event(last_event);
+	else
+		last_event = NULL;
 
 	spin_unlock(&event->lock);
 
-	return ret;
+	return last_event;
 }
 
 static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
@@ -96,7 +96,8 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	struct inode *to_tell;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
-	int wd, ret;
+	struct fsnotify_event *added_event;
+	int wd, ret = 0;
 
 	pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
 		 event, event->to_tell, event->mask);
@@ -120,14 +121,13 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
 	fsn_event_priv->group = group;
 	event_priv->wd = wd;
 
-	ret = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge, NULL);
-	if (ret) {
+	added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
+	if (added_event) {
 		inotify_free_event_priv(fsn_event_priv);
-		/* EEXIST says we tail matched, EOVERFLOW isn't something
-		 * to report up the stack. */
-		if ((ret == -EEXIST) ||
-		    (ret == -EOVERFLOW))
-			ret = 0;
+		if (!IS_ERR(added_event))
+			fsnotify_put_event(added_event);
+		else
+			ret = PTR_ERR(added_event);
 	}
 
 	if (fsn_mark->mask & IN_ONESHOT)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1068e1c..a4cd227 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -516,7 +516,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 				    struct fsnotify_group *group)
 {
 	struct inotify_inode_mark *i_mark;
-	struct fsnotify_event *ignored_event;
+	struct fsnotify_event *ignored_event, *notify_event;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
 	int ret;
@@ -538,9 +538,14 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 	fsn_event_priv->group = group;
 	event_priv->wd = i_mark->wd;
 
-	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL, NULL);
-	if (ret)
+	notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
+	if (notify_event) {
+		if (IS_ERR(notify_event))
+			ret = PTR_ERR(notify_event);
+		else
+			fsnotify_put_event(notify_event);
 		inotify_free_event_priv(fsn_event_priv);
+	}
 
 skip_send_ignore:
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index e6dde25..f39260f 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -137,16 +137,14 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
  * event off the queue to deal with.  If the event is successfully added to the
  * group's notification queue, a reference is taken on event.
  */
-int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
-			      struct fsnotify_event_private_data *priv,
-			      int (*merge)(struct list_head *,
-					   struct fsnotify_event *,
-					   void **arg),
-			      void **arg)
+struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
+						 struct fsnotify_event_private_data *priv,
+						 struct fsnotify_event *(*merge)(struct list_head *,
+										 struct fsnotify_event *))
 {
+	struct fsnotify_event *return_event = NULL;
 	struct fsnotify_event_holder *holder = NULL;
 	struct list_head *list = &group->notification_list;
-	int rc = 0;
 
 	pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
 
@@ -162,27 +160,37 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_even
 alloc_holder:
 		holder = fsnotify_alloc_event_holder();
 		if (!holder)
-			return -ENOMEM;
+			return ERR_PTR(-ENOMEM);
 	}
 
 	mutex_lock(&group->notification_mutex);
 
 	if (group->q_len >= group->max_events) {
 		event = q_overflow_event;
-		rc = -EOVERFLOW;
+
+		/*
+		 * we need to return the overflow event
+		 * which means we need a ref
+		 */
+		fsnotify_get_event(event);
+		return_event = event;
+
 		/* sorry, no private data on the overflow event */
 		priv = NULL;
 	}
 
 	if (!list_empty(list) && merge) {
-		int ret;
+		struct fsnotify_event *tmp;
 
-		ret = merge(list, event, arg);
-		if (ret) {
+		tmp = merge(list, event);
+		if (tmp) {
 			mutex_unlock(&group->notification_mutex);
+
+			if (return_event)
+				fsnotify_put_event(return_event);
 			if (holder != &event->holder)
 				fsnotify_destroy_event_holder(holder);
-			return ret;
+			return tmp;
 		}
 	}
 
@@ -197,6 +205,12 @@ alloc_holder:
 		 * event holder was used, go back and get a new one */
 		spin_unlock(&event->lock);
 		mutex_unlock(&group->notification_mutex);
+
+		if (return_event) {
+			fsnotify_put_event(return_event);
+			return_event = NULL;
+		}
+
 		goto alloc_holder;
 	}
 
@@ -211,7 +225,7 @@ alloc_holder:
 	mutex_unlock(&group->notification_mutex);
 
 	wake_up(&group->notification_waitq);
-	return rc;
+	return return_event;
 }
 
 /*
-- 
cgit v1.1


From 3bcf3860a4ff9bbc522820b4b765e65e4deceb3e Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:37 -0400
Subject: fsnotify: store struct file not struct path

Al explains that calling dentry_open() with a mnt/dentry pair is only
garunteed to be safe if they are already used in an open struct file.  To
make sure this is the case don't store and use a struct path in fsnotify,
always use a struct file.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c        |  8 ++++----
 fs/notify/fanotify/fanotify_user.c   |  6 +++---
 fs/notify/fsnotify.c                 | 16 ++++++++--------
 fs/notify/inotify/inotify_fsnotify.c | 12 ++++++------
 fs/notify/notification.c             | 20 +++++++++-----------
 5 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f3c40c0..c2a3029 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -17,9 +17,9 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
 	    old->data_type == new->data_type &&
 	    old->tgid == new->tgid) {
 		switch (old->data_type) {
-		case (FSNOTIFY_EVENT_PATH):
-			if ((old->path.mnt == new->path.mnt) &&
-			    (old->path.dentry == new->path.dentry))
+		case (FSNOTIFY_EVENT_FILE):
+			if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
+			    (old->file->f_path.dentry == new->file->f_path.dentry))
 				return true;
 		case (FSNOTIFY_EVENT_NONE):
 			return true;
@@ -226,7 +226,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 		return false;
 
 	/* if we don't have enough info to send an event to userspace say no */
-	if (data_type != FSNOTIFY_EVENT_PATH)
+	if (data_type != FSNOTIFY_EVENT_FILE)
 		return false;
 
 	if (mnt)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 7182c83..50cea74 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -65,7 +65,7 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	if (client_fd < 0)
 		return client_fd;
 
-	if (event->data_type != FSNOTIFY_EVENT_PATH) {
+	if (event->data_type != FSNOTIFY_EVENT_FILE) {
 		WARN_ON(1);
 		put_unused_fd(client_fd);
 		return -EINVAL;
@@ -75,8 +75,8 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	 * we need a new file handle for the userspace program so it can read even if it was
 	 * originally opened O_WRONLY.
 	 */
-	dentry = dget(event->path.dentry);
-	mnt = mntget(event->path.mnt);
+	dentry = dget(event->file->f_path.dentry);
+	mnt = mntget(event->file->f_path.mnt);
 	/* it's possible this event was an overflow event.  in that case dentry and mnt
 	 * are NULL;  That's fine, just don't call dentry open */
 	if (dentry && mnt)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 72aae40..4788c86 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,7 +84,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
@@ -92,7 +92,7 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 	bool should_update_children = false;
 
 	if (!dentry)
-		dentry = path->dentry;
+		dentry = file->f_path.dentry;
 
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
 		return;
@@ -124,8 +124,8 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
-		if (path)
-			fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+		if (file)
+			fsnotify(p_inode, mask, file, FSNOTIFY_EVENT_FILE,
 				 dentry->d_name.name, 0);
 		else
 			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
@@ -154,10 +154,10 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 		spin_unlock(&inode->i_lock);
 	}
 
-	if (data_is == FSNOTIFY_EVENT_PATH) {
+	if (data_is == FSNOTIFY_EVENT_FILE) {
 		struct vfsmount *mnt;
 
-		mnt = ((struct path *)data)->mnt;
+		mnt = ((struct file *)data)->f_path.mnt;
 		if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
 			spin_lock(&mnt->mnt_root->d_lock);
 			hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
@@ -228,8 +228,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	    !(test_mask & fsnotify_vfsmount_mask))
 		return 0;
  
-	if (data_is == FSNOTIFY_EVENT_PATH)
-		mnt = ((struct path *)data)->mnt;
+	if (data_is == FSNOTIFY_EVENT_FILE)
+		mnt = ((struct file *)data)->f_path.mnt;
 
 	/* if this inode's directed listeners don't care and nothing on the vfsmount
 	 * listeners list cares, nothing to do */
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 73a1106b..3c506e0 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -52,9 +52,9 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
 			    !strcmp(old->file_name, new->file_name))
 				return true;
 			break;
-		case (FSNOTIFY_EVENT_PATH):
-			if ((old->path.mnt == new->path.mnt) &&
-			    (old->path.dentry == new->path.dentry))
+		case (FSNOTIFY_EVENT_FILE):
+			if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
+			    (old->file->f_path.dentry == new->file->f_path.dentry))
 				return true;
 			break;
 		case (FSNOTIFY_EVENT_NONE):
@@ -165,10 +165,10 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 	send = (fsn_mark->mask & mask);
 
 	if (send && (fsn_mark->mask & FS_EXCL_UNLINK) &&
-	    (data_type == FSNOTIFY_EVENT_PATH)) {
-		struct path *path  = data;
+	    (data_type == FSNOTIFY_EVENT_FILE)) {
+		struct file *file  = data;
 
-		if (d_unlinked(path->dentry))
+		if (d_unlinked(file->f_path.dentry))
 			send = false;
 	}
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index f39260f..c106cdd 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -31,6 +31,7 @@
  * allocated and used.
  */
 
+#include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -89,8 +90,8 @@ void fsnotify_put_event(struct fsnotify_event *event)
 	if (atomic_dec_and_test(&event->refcnt)) {
 		pr_debug("%s: event=%p\n", __func__, event);
 
-		if (event->data_type == FSNOTIFY_EVENT_PATH)
-			path_put(&event->path);
+		if (event->data_type == FSNOTIFY_EVENT_FILE)
+			fput(event->file);
 
 		BUG_ON(!list_empty(&event->private_data_list));
 
@@ -375,8 +376,8 @@ struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
 		}
 	}
 	event->tgid = get_pid(old_event->tgid);
-	if (event->data_type == FSNOTIFY_EVENT_PATH)
-		path_get(&event->path);
+	if (event->data_type == FSNOTIFY_EVENT_FILE)
+		get_file(event->file);
 
 	return event;
 }
@@ -423,11 +424,9 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	event->data_type = data_type;
 
 	switch (data_type) {
-	case FSNOTIFY_EVENT_PATH: {
-		struct path *path = data;
-		event->path.dentry = path->dentry;
-		event->path.mnt = path->mnt;
-		path_get(&event->path);
+	case FSNOTIFY_EVENT_FILE: {
+		event->file = data;
+		get_file(event->file);
 		break;
 	}
 	case FSNOTIFY_EVENT_INODE:
@@ -435,8 +434,7 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 		break;
 	case FSNOTIFY_EVENT_NONE:
 		event->inode = NULL;
-		event->path.dentry = NULL;
-		event->path.mnt = NULL;
+		event->file = NULL;
 		break;
 	default:
 		BUG();
-- 
cgit v1.1


From c1e5c954020e123d30b4abf4038ce501861bcf9f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: vfs/fsnotify: fsnotify_close can delay the final work in fput

fanotify almost works like so:

user context calls fsnotify_* function with a struct file.
   fsnotify takes a reference on the struct path
user context goes about it's buissiness

at some later point in time the fsnotify listener gets the struct path
   fanotify listener calls dentry_open() to create a file which userspace can deal with
      listener drops the reference on the struct path
at some later point the listener calls close() on it's new file

With the switch from struct path to struct file this presents a problem for
fput() and fsnotify_close().  fsnotify_close() is called when the filp has
already reached 0 and __fput() wants to do it's cleanup.

The solution presented here is a bit odd.  If an event is created from a
struct file we take a reference on the file.  We check however if the f_count
was already 0 and if so we take an EXTRA reference EVEN THOUGH IT WAS ZERO.
In __fput() (where we know the f_count hit 0 once) we check if the f_count is
non-zero and if so we drop that 'extra' ref and return without destroying the
file.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/file_table.c          |  9 +++++++++
 fs/notify/notification.c | 13 +++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'fs')

diff --git a/fs/file_table.c b/fs/file_table.c
index 5c7d10e..b8a0bb6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -230,6 +230,15 @@ static void __fput(struct file *file)
 	might_sleep();
 
 	fsnotify_close(file);
+
+	/*
+	 * fsnotify_create_event may have taken one or more references on this
+	 * file.  If it did so it left one reference for us to drop to make sure
+	 * its calls to fput could not prematurely destroy the file.
+	 */
+	if (atomic_long_read(&file->f_count))
+		return fput(file);
+
 	/*
 	 * The function eventpoll_release() should be the first called
 	 * in the file cleanup chain.
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index c106cdd..d6c435a 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -426,6 +426,19 @@ struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 	switch (data_type) {
 	case FSNOTIFY_EVENT_FILE: {
 		event->file = data;
+		/*
+		 * if this file is about to disappear hold an extra reference
+		 * until we return to __fput so we don't have to worry about
+		 * future get/put destroying the file under us or generating
+		 * additional events.  Notice that we change f_mode without
+		 * holding f_lock.  This is safe since this is the only possible
+		 * reference to this object in the kernel (it was about to be
+		 * freed, remember?)
+		 */
+		if (!atomic_long_read(&event->file->f_count)) {
+			event->file->f_mode |= FMODE_NONOTIFY;
+			get_file(event->file);
+		}
 		get_file(event->file);
 		break;
 	}
-- 
cgit v1.1


From 0c6532e4e3b0c8bd18dd0a5cc1894a1944997cc6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: place marks on object in order of group memory address

fsnotify_marks currently are placed on objects (inodes or vfsmounts) in
arbitrary order.  This patch places them in order of the group memory address.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c    | 40 +++++++++++++++++++++++++++++-----------
 fs/notify/vfsmount_mark.c | 40 ++++++++++++++++++++++++++--------------
 2 files changed, 55 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0c0a48b..83ce6db 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -174,15 +174,17 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
 }
 
 /*
- * Attach an initialized mark to a given group and inode.
+ * Attach an initialized mark to a given inode.
  * These marks may be used for the fsnotify backend to determine which
- * event types should be delivered to which group and for which inodes.
+ * event types should be delivered to which group and for which inodes.  These
+ * marks are ordered according to the group's location in memory.
  */
 int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			    struct fsnotify_group *group, struct inode *inode,
 			    int allow_dups)
 {
-	struct fsnotify_mark *lmark = NULL;
+	struct fsnotify_mark *lmark;
+	struct hlist_node *node, *last = NULL;
 	int ret = 0;
 
 	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
@@ -192,21 +194,37 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	spin_lock(&inode->i_lock);
 
-	if (!allow_dups)
-		lmark = fsnotify_find_inode_mark_locked(group, inode);
-	if (!lmark) {
-		mark->i.inode = inode;
+	mark->i.inode = inode;
 
+	/* is mark the first mark? */
+	if (hlist_empty(&inode->i_fsnotify_marks)) {
 		hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks);
+		goto out;
+	}
+
+	/* should mark be in the middle of the current list? */
+	hlist_for_each_entry(lmark, node, &inode->i_fsnotify_marks, i.i_list) {
+		last = node;
+
+		if ((lmark->group == group) && !allow_dups) {
+			ret = -EEXIST;
+			goto out;
+		}
 
-		fsnotify_recalc_inode_mask_locked(inode);
+		if (mark->group < lmark->group)
+			continue;
+
+		hlist_add_before(&mark->i.i_list, &lmark->i.i_list);
+		goto out;
 	}
 
+	BUG_ON(last == NULL);
+	/* mark should be the last entry.  last is the current last entry */
+	hlist_add_after(last, &mark->i.i_list);
+out:
+	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
 
-	if (lmark)
-		ret = -EEXIST;
-
 	return ret;
 }
 
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ec580a2..c4b3f14 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -141,34 +141,46 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			       struct fsnotify_group *group, struct vfsmount *mnt,
 			       int allow_dups)
 {
-	struct fsnotify_mark *lmark = NULL;
+	struct fsnotify_mark *lmark;
+	struct hlist_node *node, *last = NULL;
 	int ret = 0;
 
 	mark->flags = FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
-	/*
-	 * LOCKING ORDER!!!!
-	 * mark->lock
-	 * group->mark_lock
-	 * mnt->mnt_root->d_lock
-	 */
 	assert_spin_locked(&mark->lock);
 	assert_spin_locked(&group->mark_lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
-	if (!allow_dups)
-		lmark = fsnotify_find_vfsmount_mark_locked(group, mnt);
-	if (!lmark) {
-		mark->m.mnt = mnt;
+	mark->m.mnt = mnt;
 
+	/* is mark the first mark? */
+	if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
 		hlist_add_head(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+		goto out;
+	}
+
+	/* should mark be in the middle of the current list? */
+	hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+		last = node;
+
+		if ((lmark->group == group) && !allow_dups) {
+			ret = -EEXIST;
+			goto out;
+		}
+
+		if (mark->group < lmark->group)
+			continue;
 
-		fsnotify_recalc_vfsmount_mask_locked(mnt);
-	} else {
-		ret = -EEXIST;
+		hlist_add_before(&mark->m.m_list, &lmark->m.m_list);
+		goto out;
 	}
 
+	BUG_ON(last == NULL);
+	/* mark should be the last entry.  last is the current last entry */
+	hlist_add_after(last, &mark->m.m_list);
+out:
+	fsnotify_recalc_vfsmount_mask_locked(mnt);
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	return ret;
-- 
cgit v1.1


From a4c6e9961fcb9da54648d98978d33c6fdcb7bb45 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: use _rcu functions for mark list traversal

In preparation for srcu locking use all _rcu appropiete functions for mark
list addition, removal, and traversal.  The operations are still done under a
spinlock at the end of this patch.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c    | 10 +++++-----
 fs/notify/vfsmount_mark.c | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 83ce6db..455cb41 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -67,7 +67,7 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 
 	spin_lock(&inode->i_lock);
 
-	hlist_del_init(&mark->i.i_list);
+	hlist_del_init_rcu(&mark->i.i_list);
 	mark->i.inode = NULL;
 
 	/*
@@ -92,7 +92,7 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	hlist_for_each_entry_safe(mark, pos, n, &inode->i_fsnotify_marks, i.i_list) {
 		list_add(&mark->i.free_i_list, &free_list);
-		hlist_del_init(&mark->i.i_list);
+		hlist_del_init_rcu(&mark->i.i_list);
 		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&inode->i_lock);
@@ -198,7 +198,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 
 	/* is mark the first mark? */
 	if (hlist_empty(&inode->i_fsnotify_marks)) {
-		hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks);
+		hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
 		goto out;
 	}
 
@@ -214,13 +214,13 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 		if (mark->group < lmark->group)
 			continue;
 
-		hlist_add_before(&mark->i.i_list, &lmark->i.i_list);
+		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
 		goto out;
 	}
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after(last, &mark->i.i_list);
+	hlist_add_after_rcu(last, &mark->i.i_list);
 out:
 	fsnotify_recalc_inode_mask_locked(inode);
 	spin_unlock(&inode->i_lock);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index c4b3f14..b7ae640 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -39,7 +39,7 @@ void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 	spin_lock(&mnt->mnt_root->d_lock);
 	hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) {
 		list_add(&mark->m.free_m_list, &free_list);
-		hlist_del_init(&mark->m.m_list);
+		hlist_del_init_rcu(&mark->m.m_list);
 		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&mnt->mnt_root->d_lock);
@@ -91,7 +91,7 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
-	hlist_del_init(&mark->m.m_list);
+	hlist_del_init_rcu(&mark->m.m_list);
 	mark->m.mnt = NULL;
 
 	fsnotify_recalc_vfsmount_mask_locked(mnt);
@@ -156,7 +156,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 
 	/* is mark the first mark? */
 	if (hlist_empty(&mnt->mnt_fsnotify_marks)) {
-		hlist_add_head(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
+		hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks);
 		goto out;
 	}
 
@@ -172,13 +172,13 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 		if (mark->group < lmark->group)
 			continue;
 
-		hlist_add_before(&mark->m.m_list, &lmark->m.m_list);
+		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
 		goto out;
 	}
 
 	BUG_ON(last == NULL);
 	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_after(last, &mark->m.m_list);
+	hlist_add_after_rcu(last, &mark->m.m_list);
 out:
 	fsnotify_recalc_vfsmount_mask_locked(mnt);
 	spin_unlock(&mnt->mnt_root->d_lock);
-- 
cgit v1.1


From 700307a29ad61090dcf1d45f8f4a135f5e9211ae Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: use an explicit flag to indicate fsnotify_destroy_mark has
 been called

Currently fsnotify check is mark->group is NULL to decide if
fsnotify_destroy_mark() has already been called or not.  With the upcoming
rcu work it is a heck of a lot easier to use an explicit flag than worry
about group being set to NULL.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inode_mark.c    |  2 +-
 fs/notify/mark.c          | 11 +++++++----
 fs/notify/vfsmount_mark.c |  2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 455cb41..37b460f 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -187,7 +187,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 	struct hlist_node *node, *last = NULL;
 	int ret = 0;
 
-	mark->flags = FSNOTIFY_MARK_FLAG_INODE;
+	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
 	assert_spin_locked(&mark->lock);
 	assert_spin_locked(&group->mark_lock);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 8f3b0e7..69c5a16 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -121,12 +121,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 
 	group = mark->group;
 
-	/* if !group something else already marked this to die */
-	if (!group) {
+	/* something else already called this function on this mark */
+	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
 		return;
 	}
 
+	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+
 	/* 1 from caller and 1 for being on i_list/g_list */
 	BUG_ON(atomic_read(&mark->refcnt) < 2);
 
@@ -141,7 +143,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 		BUG();
 
 	list_del_init(&mark->g_list);
-	mark->group = NULL;
 
 	fsnotify_put_mark(mark); /* for i_list and g_list */
 
@@ -229,6 +230,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	spin_lock(&mark->lock);
 	spin_lock(&group->mark_lock);
 
+	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE;
+
 	mark->group = group;
 	list_add(&mark->g_list, &group->marks_list);
 	atomic_inc(&group->num_marks);
@@ -258,7 +261,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 
 	return ret;
 err:
-	mark->group = NULL;
+	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	list_del_init(&mark->g_list);
 	atomic_dec(&group->num_marks);
 	fsnotify_put_mark(mark);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index b7ae640..56772b5 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -145,7 +145,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 	struct hlist_node *node, *last = NULL;
 	int ret = 0;
 
-	mark->flags = FSNOTIFY_MARK_FLAG_VFSMOUNT;
+	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
 	assert_spin_locked(&mark->lock);
 	assert_spin_locked(&group->mark_lock);
-- 
cgit v1.1


From 75c1be487a690db43da2c1234fcacd84c982803c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: srcu to protect read side of inode and vfsmount locks

Currently reading the inode->i_fsnotify_marks or
vfsmount->mnt_fsnotify_marks lists are protected by a spinlock on both the
read and the write side.  This patch protects the read side of those lists
with a new single srcu.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 69 ++++++++++++++++++++++++++++++++++------------------
 fs/notify/fsnotify.h |  5 ++--
 fs/notify/group.c    | 16 ++++--------
 fs/notify/mark.c     | 60 ++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 110 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4788c86..4678b41 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -144,14 +144,15 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 {
 	struct fsnotify_mark *mark;
 	struct hlist_node *node;
+	int idx;
+
+	idx = srcu_read_lock(&fsnotify_mark_srcu);
 
 	if (!hlist_empty(&inode->i_fsnotify_marks)) {
-		spin_lock(&inode->i_lock);
-		hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) {
+		hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) {
 			if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
 				mark->ignored_mask = 0;
 		}
-		spin_unlock(&inode->i_lock);
 	}
 
 	if (data_is == FSNOTIFY_EVENT_FILE) {
@@ -159,14 +160,14 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 
 		mnt = ((struct file *)data)->f_path.mnt;
 		if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
-			spin_lock(&mnt->mnt_root->d_lock);
-			hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+			hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
 				if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
 					mark->ignored_mask = 0;
 			}
-			spin_unlock(&mnt->mnt_root->d_lock);
 		}
 	}
+
+	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 }
 
 static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
@@ -208,8 +209,10 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
 int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	     const unsigned char *file_name, u32 cookie)
 {
+	struct fsnotify_mark *mark;
 	struct fsnotify_group *group;
 	struct fsnotify_event *event = NULL;
+	struct hlist_node *node;
 	struct vfsmount *mnt = NULL;
 	int idx, ret = 0;
 	/* global tests shouldn't care about events on child only the specific event */
@@ -237,35 +240,47 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	    !needed_by_vfsmount(test_mask, mnt))
 		return 0;
 
-	/*
-	 * SRCU!!  the groups list is very very much read only and the path is
-	 * very hot.  The VAST majority of events are not going to need to do
-	 * anything other than walk the list so it's crazy to pre-allocate.
-	 */
-	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	idx = srcu_read_lock(&fsnotify_mark_srcu);
 
 	if (test_mask & to_tell->i_fsnotify_mask) {
-		list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) {
-			if (test_mask & group->mask) {
-				ret = send_to_group(group, to_tell, NULL, mask, data, data_is,
-						    cookie, file_name, &event);
+		hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) {
+
+			pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
+				 __func__, mark, mark->mask, mark->ignored_mask);
+
+			if (test_mask & mark->mask & ~mark->ignored_mask) {
+				group = mark->group;
+				if (!group)
+					continue;
+				ret = send_to_group(group, to_tell, NULL, mask,
+						    data, data_is, cookie, file_name,
+						    &event);
 				if (ret)
 					goto out;
 			}
 		}
 	}
-	if (needed_by_vfsmount(test_mask, mnt)) {
-		list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) {
-			if (test_mask & group->mask) {
-				ret = send_to_group(group, to_tell, mnt, mask, data, data_is,
-						    cookie, file_name, &event);
+
+	if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) {
+		hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
+
+			pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
+				 __func__, mark, mark->mask, mark->ignored_mask);
+
+			if (test_mask & mark->mask & ~mark->ignored_mask)  {
+				group = mark->group;
+				if (!group)
+					continue;
+				ret = send_to_group(group, to_tell, mnt, mask,
+						    data, data_is, cookie, file_name,
+						    &event);
 				if (ret)
 					goto out;
 			}
 		}
 	}
 out:
-	srcu_read_unlock(&fsnotify_grp_srcu, idx);
+	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
 	 * up while we are still trying to add it to lists, drop that one.
@@ -279,8 +294,14 @@ EXPORT_SYMBOL_GPL(fsnotify);
 
 static __init int fsnotify_init(void)
 {
+	int ret;
+
 	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
 
-	return init_srcu_struct(&fsnotify_grp_srcu);
+	ret = init_srcu_struct(&fsnotify_mark_srcu);
+	if (ret)
+		panic("initializing fsnotify_mark_srcu");
+
+	return 0;
 }
-subsys_initcall(fsnotify_init);
+core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 1be54f6..7eed86f 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -6,8 +6,6 @@
 #include <linux/srcu.h>
 #include <linux/types.h>
 
-/* protects reads of fsnotify_groups */
-extern struct srcu_struct fsnotify_grp_srcu;
 /* all groups which receive inode fsnotify events */
 extern struct list_head fsnotify_inode_groups;
 /* all groups which receive vfsmount fsnotify events */
@@ -20,6 +18,9 @@ extern __u32 fsnotify_vfsmount_mask;
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
+/* protects reads of inode and vfsmount marks list */
+extern struct srcu_struct fsnotify_mark_srcu;
+
 extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
 						__u32 mask);
 /* add a mark to an inode */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 7ac65ed..48d3a6d 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -30,8 +30,6 @@
 
 /* protects writes to fsnotify_groups and fsnotify_mask */
 static DEFINE_MUTEX(fsnotify_grp_mutex);
-/* protects reads while running the fsnotify_groups list */
-struct srcu_struct fsnotify_grp_srcu;
 /* all groups registered to receive inode filesystem notifications */
 LIST_HEAD(fsnotify_inode_groups);
 /* all groups registered to receive mount point filesystem notifications */
@@ -50,18 +48,17 @@ void fsnotify_recalc_global_mask(void)
 	struct fsnotify_group *group;
 	__u32 inode_mask = 0;
 	__u32 vfsmount_mask = 0;
-	int idx;
 
-	idx = srcu_read_lock(&fsnotify_grp_srcu);
+	mutex_lock(&fsnotify_grp_mutex);
 	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
 		inode_mask |= group->mask;
 	list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
 		vfsmount_mask |= group->mask;
-		
-	srcu_read_unlock(&fsnotify_grp_srcu, idx);
 
 	fsnotify_inode_mask = inode_mask;
 	fsnotify_vfsmount_mask = vfsmount_mask;
+
+	mutex_unlock(&fsnotify_grp_mutex);
 }
 
 /*
@@ -168,6 +165,8 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
 	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
 
+	synchronize_srcu(&fsnotify_mark_srcu);
+
 	/* past the point of no return, matches the initial value of 1 */
 	if (atomic_dec_and_test(&group->num_marks))
 		fsnotify_final_destroy_group(group);
@@ -216,12 +215,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
 	 */
 	__fsnotify_evict_group(group);
 
-	/*
-	 * now it's off the list, so the only thing we might care about is
-	 * srcu access....
-	 */
 	mutex_unlock(&fsnotify_grp_mutex);
-	synchronize_srcu(&fsnotify_grp_srcu);
 
 	/* and now it is really dead. _Nothing_ could be seeing it */
 	fsnotify_recalc_global_mask();
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 69c5a16..41f3990 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -85,10 +85,12 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <linux/srcu.h>
 #include <linux/writeback.h> /* for inode_lock */
 
 #include <asm/atomic.h>
@@ -96,6 +98,11 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
+struct srcu_struct fsnotify_mark_srcu;
+static DEFINE_SPINLOCK(destroy_lock);
+static LIST_HEAD(destroy_list);
+static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
+
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
 	atomic_inc(&mark->refcnt);
@@ -144,11 +151,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
 
 	list_del_init(&mark->g_list);
 
-	fsnotify_put_mark(mark); /* for i_list and g_list */
-
 	spin_unlock(&group->mark_lock);
 	spin_unlock(&mark->lock);
 
+	spin_lock(&destroy_lock);
+	list_add(&mark->destroy_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	wake_up(&destroy_waitq);
+
 	/*
 	 * Some groups like to know that marks are being freed.  This is a
 	 * callback to the group function to let it know that this mark
@@ -263,12 +273,17 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 err:
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	list_del_init(&mark->g_list);
+	mark->group = NULL;
 	atomic_dec(&group->num_marks);
-	fsnotify_put_mark(mark);
 
 	spin_unlock(&group->mark_lock);
 	spin_unlock(&mark->lock);
 
+	spin_lock(&destroy_lock);
+	list_add(&mark->destroy_list, &destroy_list);
+	spin_unlock(&destroy_lock);
+	wake_up(&destroy_waitq);
+
 	return ret;
 }
 
@@ -326,3 +341,42 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
 	atomic_set(&mark->refcnt, 1);
 	mark->free_mark = free_mark;
 }
+
+static int fsnotify_mark_destroy(void *ignored)
+{
+	struct fsnotify_mark *mark, *next;
+	LIST_HEAD(private_destroy_list);
+
+	for (;;) {
+		spin_lock(&destroy_lock);
+		list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) {
+			list_del(&mark->destroy_list);
+			list_add(&mark->destroy_list, &private_destroy_list);
+		}
+		spin_unlock(&destroy_lock);
+
+		synchronize_srcu(&fsnotify_mark_srcu);
+
+		list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
+			list_del_init(&mark->destroy_list);
+			fsnotify_put_mark(mark);
+		}
+
+		wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
+	}
+
+	return 0;
+}
+
+static int __init fsnotify_mark_init(void)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(fsnotify_mark_destroy, NULL,
+			     "fsnotify_mark");
+	if (IS_ERR(thread))
+		panic("unable to start fsnotify mark destruction thread.");
+
+	return 0;
+}
+device_initcall(fsnotify_mark_init);
-- 
cgit v1.1


From 8778abb9a88fc4a74d8776ffaadf7214cf33c61e Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: Exchange list heads instead of moving elements

Instead of moving list elements from destroy_list to &private_destroy_list,
exchange the list heads.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/mark.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 41f3990..236f29b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -349,10 +349,8 @@ static int fsnotify_mark_destroy(void *ignored)
 
 	for (;;) {
 		spin_lock(&destroy_lock);
-		list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) {
-			list_del(&mark->destroy_list);
-			list_add(&mark->destroy_list, &private_destroy_list);
-		}
+		/* exchange the list head */
+		list_replace_init(&destroy_list, &private_destroy_list);
 		spin_unlock(&destroy_lock);
 
 		synchronize_srcu(&fsnotify_mark_srcu);
-- 
cgit v1.1


From 3a9b16b407f10b2a771bcae13fb5791e527d6bcf Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fsnotify: send fsnotify_mark to groups in event handling functions

With the change of fsnotify to use srcu walking the marks list instead of
walking the global groups list we now know the mark in question.  The code can
send the mark to the group's handling functions and the groups won't have to
find those marks themselves.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  4 +++-
 fs/notify/fanotify/fanotify.c        |  8 +++++---
 fs/notify/fsnotify.c                 | 19 ++++++++++---------
 fs/notify/inotify/inotify_fsnotify.c |  8 +++++---
 4 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 6624c2e..2cae9be 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -83,6 +83,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
  * events.
  */
 static int dnotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_mark *mark,
 				struct fsnotify_event *event)
 {
 	struct fsnotify_mark *fsn_mark = NULL;
@@ -130,7 +131,8 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct inode *inode, struct vfsmount *mnt,
-				      __u32 mask, void *data, int data_type)
+				      struct fsnotify_mark *mark, __u32 mask,
+				      void *data, int data_type)
 {
 	struct fsnotify_mark *fsn_mark;
 	bool send;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index c2a3029..abfba45 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -114,7 +114,9 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 }
 #endif
 
-static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+static int fanotify_handle_event(struct fsnotify_group *group,
+				 struct fsnotify_mark *mark,
+				 struct fsnotify_event *event)
 {
 	int ret = 0;
 	struct fsnotify_event *notify_event = NULL;
@@ -214,8 +216,8 @@ static bool should_send_inode_event(struct fsnotify_group *group, struct inode *
 }
 
 static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *to_tell,
-				       struct vfsmount *mnt, __u32 mask, void *data,
-				       int data_type)
+				       struct vfsmount *mnt, struct fsnotify_mark *mark,
+				       __u32 mask, void *data, int data_type)
 {
 	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_type=%d\n",
 		 __func__, group, to_tell, mnt, mask, data, data_type);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4678b41..59d6399 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -171,15 +171,16 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
 }
 
 static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
-			 struct vfsmount *mnt, __u32 mask, void *data,
-			 int data_is, u32 cookie, const unsigned char *file_name,
+			 struct vfsmount *mnt, struct fsnotify_mark *mark,
+			 __u32 mask, void *data, int data_is, u32 cookie,
+			 const unsigned char *file_name,
 			 struct fsnotify_event **event)
 {
-	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_is=%d"
-		 " cookie=%d event=%p\n", __func__, group, to_tell, mnt,
-		 mask, data, data_is, cookie, *event);
+	pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
+		 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
+		 mnt, mark, mask, data, data_is, cookie, *event);
 
-	if (!group->ops->should_send_event(group, to_tell, mnt, mask,
+	if (!group->ops->should_send_event(group, to_tell, mnt, mark, mask,
 					   data, data_is))
 		return 0;
 	if (!*event) {
@@ -189,7 +190,7 @@ static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 		if (!*event)
 			return -ENOMEM;
 	}
-	return group->ops->handle_event(group, *event);
+	return group->ops->handle_event(group, mark, *event);
 }
 
 static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
@@ -252,7 +253,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 				group = mark->group;
 				if (!group)
 					continue;
-				ret = send_to_group(group, to_tell, NULL, mask,
+				ret = send_to_group(group, to_tell, NULL, mark, mask,
 						    data, data_is, cookie, file_name,
 						    &event);
 				if (ret)
@@ -271,7 +272,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 				group = mark->group;
 				if (!group)
 					continue;
-				ret = send_to_group(group, to_tell, mnt, mask,
+				ret = send_to_group(group, to_tell, mnt, mark, mask,
 						    data, data_is, cookie, file_name,
 						    &event);
 				if (ret)
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 3c506e0..dbd76bb 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -89,7 +89,9 @@ static struct fsnotify_event *inotify_merge(struct list_head *list,
 	return last_event;
 }
 
-static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
+static int inotify_handle_event(struct fsnotify_group *group,
+				struct fsnotify_mark *mark,
+				struct fsnotify_event *event)
 {
 	struct fsnotify_mark *fsn_mark;
 	struct inotify_inode_mark *i_mark;
@@ -148,8 +150,8 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
 }
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-				      struct vfsmount *mnt, __u32 mask, void *data,
-				      int data_type)
+				      struct vfsmount *mnt, struct fsnotify_mark *mark,
+				      __u32 mask, void *data, int data_type)
 {
 	struct fsnotify_mark *fsn_mark;
 	bool send;
-- 
cgit v1.1


From 7f6b6117e1803777fcf48fe31bd236a7fbf740db Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: inotify: use the mark in handler functions

inotify now gets a mark in the should_send_event and handle_event
functions.  Rather than look up the mark themselves inotify should just use
the mark it was handed.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/inotify/inotify_fsnotify.c | 29 +++++------------------------
 1 file changed, 5 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index dbd76bb..aa3f93c 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -93,7 +93,6 @@ static int inotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_mark *mark,
 				struct fsnotify_event *event)
 {
-	struct fsnotify_mark *fsn_mark;
 	struct inotify_inode_mark *i_mark;
 	struct inode *to_tell;
 	struct inotify_event_private_data *event_priv;
@@ -106,11 +105,7 @@ static int inotify_handle_event(struct fsnotify_group *group,
 
 	to_tell = event->to_tell;
 
-	fsn_mark = fsnotify_find_inode_mark(group, to_tell);
-	/* race with watch removal?  We already passes should_send */
-	if (unlikely(!fsn_mark))
-		return 0;
-	i_mark = container_of(fsn_mark, struct inotify_inode_mark,
+	i_mark = container_of(mark, struct inotify_inode_mark,
 			      fsn_mark);
 	wd = i_mark->wd;
 
@@ -132,14 +127,8 @@ static int inotify_handle_event(struct fsnotify_group *group,
 			ret = PTR_ERR(added_event);
 	}
 
-	if (fsn_mark->mask & IN_ONESHOT)
-		fsnotify_destroy_mark(fsn_mark);
-
-	/*
-	 * If we hold the fsn_mark until after the event is on the queue
-	 * IN_IGNORED won't be able to pass this event in the queue
-	 */
-	fsnotify_put_mark(fsn_mark);
+	if (mark->mask & IN_ONESHOT)
+		fsnotify_destroy_mark(mark);
 
 	return ret;
 }
@@ -153,20 +142,15 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 				      struct vfsmount *mnt, struct fsnotify_mark *mark,
 				      __u32 mask, void *data, int data_type)
 {
-	struct fsnotify_mark *fsn_mark;
 	bool send;
 
 	pr_debug("%s: group=%p inode=%p mask=%x data=%p data_type=%d\n",
 		 __func__, group, inode, mask, data, data_type);
 
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
-	if (!fsn_mark)
-		return false;
-
 	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (fsn_mark->mask & mask);
+	send = (mark->mask & mask);
 
-	if (send && (fsn_mark->mask & FS_EXCL_UNLINK) &&
+	if (send && (mark->mask & FS_EXCL_UNLINK) &&
 	    (data_type == FSNOTIFY_EVENT_FILE)) {
 		struct file *file  = data;
 
@@ -174,9 +158,6 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 			send = false;
 	}
 
-	/* find took a reference */
-	fsnotify_put_mark(fsn_mark);
-
 	return send;
 }
 
-- 
cgit v1.1


From c496313fcc35a41e176e3f19cdda2544ea3a32a6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: dnotify: use the mark in handler functions

dnotify now gets a mark in the should_send_event and handle_event
functions.  Rather than look up the mark themselves dnotify should just use
the mark it was handed.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 2cae9be..e3e855f 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -86,7 +86,6 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 				struct fsnotify_mark *mark,
 				struct fsnotify_event *event)
 {
-	struct fsnotify_mark *fsn_mark = NULL;
 	struct dnotify_mark *dn_mark;
 	struct inode *to_tell;
 	struct dnotify_struct *dn;
@@ -96,12 +95,9 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 
 	to_tell = event->to_tell;
 
-	fsn_mark = fsnotify_find_inode_mark(group, to_tell);
-	if (unlikely(!fsn_mark))
-		return 0;
-	dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
+	dn_mark = container_of(mark, struct dnotify_mark, fsn_mark);
 
-	spin_lock(&fsn_mark->lock);
+	spin_lock(&mark->lock);
 	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_mask & test_mask) == 0) {
@@ -115,12 +111,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 		else {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(fsn_mark);
+			dnotify_recalc_inode_mask(mark);
 		}
 	}
 
-	spin_unlock(&fsn_mark->lock);
-	fsnotify_put_mark(fsn_mark);
+	spin_unlock(&mark->lock);
 
 	return 0;
 }
@@ -134,7 +129,6 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct fsnotify_mark *mark, __u32 mask,
 				      void *data, int data_type)
 {
-	struct fsnotify_mark *fsn_mark;
 	bool send;
 
 	/* !dir_notify_enable should never get here, don't waste time checking
@@ -145,14 +139,8 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 	if (!S_ISDIR(inode->i_mode))
 		return false;
 
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
-	if (!fsn_mark)
-		return false;
-
 	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (mask & fsn_mark->mask);
-
-	fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_inode_mark */
+	send = (mask & mark->mask);
 
 	return send;
 }
-- 
cgit v1.1


From 0215054f377ce5ac4ffc27b26b13b3f10e6410e6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:38 -0400
Subject: fanotify: use the mark in handler functions

fanotify now gets a mark in the should_send_event and handle_event
functions.  Rather than look up the mark themselves fanotify should just use
the mark it was handed.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fanotify/fanotify.c | 46 ++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index abfba45..666ccb7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -152,18 +152,16 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	return ret;
 }
 
-static bool should_send_vfsmount_event(struct fsnotify_group *group, struct vfsmount *mnt,
-				       struct inode *inode, __u32 mask)
+static bool should_send_vfsmount_event(struct fsnotify_group *group,
+				       struct vfsmount *mnt,
+				       struct inode *inode,
+				       struct fsnotify_mark *mnt_mark,
+				       __u32 mask)
 {
-	struct fsnotify_mark *mnt_mark;
 	struct fsnotify_mark *inode_mark;
 
-	pr_debug("%s: group=%p vfsmount=%p mask=%x\n",
-		 __func__, group, mnt, mask);
-
-	mnt_mark = fsnotify_find_vfsmount_mark(group, mnt);
-	if (!mnt_mark)
-		return false;
+	pr_debug("%s: group=%p vfsmount=%p mark=%p mask=%x\n",
+		 __func__, group, mnt, mnt_mark, mask);
 
 	mask &= mnt_mark->mask;
 	mask &= ~mnt_mark->ignored_mask;
@@ -176,28 +174,21 @@ static bool should_send_vfsmount_event(struct fsnotify_group *group, struct vfsm
 		}
 	}
 
-	/* find took a reference */
-	fsnotify_put_mark(mnt_mark);
-
 	return mask;
 }
 
-static bool should_send_inode_event(struct fsnotify_group *group, struct inode *inode,
+static bool should_send_inode_event(struct fsnotify_group *group,
+				    struct inode *inode,
+				    struct fsnotify_mark *mark,
 				    __u32 mask)
 {
-	struct fsnotify_mark *fsn_mark;
-
-	pr_debug("%s: group=%p inode=%p mask=%x\n",
-		 __func__, group, inode, mask);
-
-	fsn_mark = fsnotify_find_inode_mark(group, inode);
-	if (!fsn_mark)
-		return false;
+	pr_debug("%s: group=%p inode=%p mark=%p mask=%x\n",
+		 __func__, group, inode, mark, mask);
 
 	/* if the event is for a child and this inode doesn't care about
 	 * events on the child, don't send it! */
 	if ((mask & FS_EVENT_ON_CHILD) &&
-	    !(fsn_mark->mask & FS_EVENT_ON_CHILD)) {
+	    !(mark->mask & FS_EVENT_ON_CHILD)) {
 		mask = 0;
 	} else {
 		/*
@@ -205,13 +196,10 @@ static bool should_send_inode_event(struct fsnotify_group *group, struct inode *
 		 * type of event?
 		 */
 		mask &= ~FS_EVENT_ON_CHILD;
-		mask &= fsn_mark->mask;
-		mask &= ~fsn_mark->ignored_mask;
+		mask &= mark->mask;
+		mask &= ~mark->ignored_mask;
 	}
 
-	/* find took a reference */
-	fsnotify_put_mark(fsn_mark);
-
 	return mask;
 }
 
@@ -232,9 +220,9 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 		return false;
 
 	if (mnt)
-		return should_send_vfsmount_event(group, mnt, to_tell, mask);
+		return should_send_vfsmount_event(group, mnt, to_tell, mark, mask);
 	else
-		return should_send_inode_event(group, to_tell, mask);
+		return should_send_inode_event(group, to_tell, mark, mask);
 }
 
 const struct fsnotify_ops fanotify_fsnotify_ops = {
-- 
cgit v1.1


From 2612abb51b11ffd2d75c472b11178115f5808909 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: cleanup should_send_event

The change to use srcu and walk the object list rather than the global
fsnotify_group list means that should_send_event is no longer needed for a
number of groups and can be simplified for others.  Do that.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 11 +----------
 fs/notify/fanotify/fanotify.c        | 23 ++++++++---------------
 fs/notify/fsnotify.c                 |  4 ++--
 fs/notify/inotify/inotify_fsnotify.c | 14 +++-----------
 4 files changed, 14 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e3e855f..c3dc158 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -129,20 +129,11 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct fsnotify_mark *mark, __u32 mask,
 				      void *data, int data_type)
 {
-	bool send;
-
-	/* !dir_notify_enable should never get here, don't waste time checking
-	if (!dir_notify_enable)
-		return 0; */
-
 	/* not a dir, dnotify doesn't care */
 	if (!S_ISDIR(inode->i_mode))
 		return false;
 
-	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (mask & mark->mask);
-
-	return send;
+	return true;
 }
 
 static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 666ccb7..fbd7f35 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -185,22 +185,15 @@ static bool should_send_inode_event(struct fsnotify_group *group,
 	pr_debug("%s: group=%p inode=%p mark=%p mask=%x\n",
 		 __func__, group, inode, mark, mask);
 
-	/* if the event is for a child and this inode doesn't care about
-	 * events on the child, don't send it! */
+	/*
+	 * if the event is for a child and this inode doesn't care about
+	 * events on the child, don't send it!
+	 */
 	if ((mask & FS_EVENT_ON_CHILD) &&
-	    !(mark->mask & FS_EVENT_ON_CHILD)) {
-		mask = 0;
-	} else {
-		/*
-		 * We care about children, but do we care about this particular
-		 * type of event?
-		 */
-		mask &= ~FS_EVENT_ON_CHILD;
-		mask &= mark->mask;
-		mask &= ~mark->ignored_mask;
-	}
-
-	return mask;
+	    !(mark->mask & FS_EVENT_ON_CHILD))
+		return false;
+	else
+		return true;
 }
 
 static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *to_tell,
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 59d6399..53b31f4 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -180,8 +180,8 @@ static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 		 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
 		 mnt, mark, mask, data, data_is, cookie, *event);
 
-	if (!group->ops->should_send_event(group, to_tell, mnt, mark, mask,
-					   data, data_is))
+	if (group->ops->should_send_event(group, to_tell, mnt, mark, mask,
+					  data, data_is) == false)
 		return 0;
 	if (!*event) {
 		*event = fsnotify_create_event(to_tell, mask, data,
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index aa3f93c..7cf518b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -142,23 +142,15 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
 				      struct vfsmount *mnt, struct fsnotify_mark *mark,
 				      __u32 mask, void *data, int data_type)
 {
-	bool send;
-
-	pr_debug("%s: group=%p inode=%p mask=%x data=%p data_type=%d\n",
-		 __func__, group, inode, mask, data, data_type);
-
-	mask = (mask & ~FS_EVENT_ON_CHILD);
-	send = (mark->mask & mask);
-
-	if (send && (mark->mask & FS_EXCL_UNLINK) &&
+	if ((mark->mask & FS_EXCL_UNLINK) &&
 	    (data_type == FSNOTIFY_EVENT_FILE)) {
 		struct file *file  = data;
 
 		if (d_unlinked(file->f_path.dentry))
-			send = false;
+			return false;
 	}
 
-	return send;
+	return true;
 }
 
 /*
-- 
cgit v1.1


From 03930979afa63e079e9aefd4d3dd429240711027 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: remove the global masks

Because we walk the object->fsnotify_marks list instead of the global
fsnotify groups list we don't need the fsnotify_inode_mask and
fsnotify_vfsmount_mask as these were simply shortcuts in fsnotify() for
performance.  They are now extra checks, rip them out.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c |  5 -----
 fs/notify/fsnotify.h |  4 ----
 fs/notify/group.c    | 39 ++-------------------------------------
 3 files changed, 2 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 53b31f4..9ba29ee 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -227,11 +227,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	if (mask & FS_MODIFY)
 		__fsnotify_flush_ignored_mask(to_tell, data, data_is);
 
-	/* if none of the directed listeners or vfsmount listeners care */
-	if (!(test_mask & fsnotify_inode_mask) &&
-	    !(test_mask & fsnotify_vfsmount_mask))
-		return 0;
- 
 	if (data_is == FSNOTIFY_EVENT_FILE)
 		mnt = ((struct file *)data)->f_path.mnt;
 
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7eed86f..b41dbf5 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -10,10 +10,6 @@
 extern struct list_head fsnotify_inode_groups;
 /* all groups which receive vfsmount fsnotify events */
 extern struct list_head fsnotify_vfsmount_groups;
-/* all bitwise OR of all event types (FS_*) for all fsnotify_inode_groups */
-extern __u32 fsnotify_inode_mask;
-/* all bitwise OR of all event types (FS_*) for all fsnotify_vfsmount_groups */
-extern __u32 fsnotify_vfsmount_mask;
 
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 48d3a6d..8da532d 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -34,54 +34,21 @@ static DEFINE_MUTEX(fsnotify_grp_mutex);
 LIST_HEAD(fsnotify_inode_groups);
 /* all groups registered to receive mount point filesystem notifications */
 LIST_HEAD(fsnotify_vfsmount_groups);
-/* bitwise OR of all events (FS_*) interesting to some group on this system */
-__u32 fsnotify_inode_mask;
-/* bitwise OR of all events (FS_*) interesting to some group on this system */
-__u32 fsnotify_vfsmount_mask;
-
-/*
- * When a new group registers or changes it's set of interesting events
- * this function updates the fsnotify_mask to contain all interesting events
- */
-void fsnotify_recalc_global_mask(void)
-{
-	struct fsnotify_group *group;
-	__u32 inode_mask = 0;
-	__u32 vfsmount_mask = 0;
-
-	mutex_lock(&fsnotify_grp_mutex);
-	list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
-		inode_mask |= group->mask;
-	list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
-		vfsmount_mask |= group->mask;
-
-	fsnotify_inode_mask = inode_mask;
-	fsnotify_vfsmount_mask = vfsmount_mask;
-
-	mutex_unlock(&fsnotify_grp_mutex);
-}
 
 /*
  * Update the group->mask by running all of the marks associated with this
- * group and finding the bitwise | of all of the mark->mask.  If we change
- * the group->mask we need to update the global mask of events interesting
- * to the system.
+ * group and finding the bitwise | of all of the mark->mask.
  */
 void fsnotify_recalc_group_mask(struct fsnotify_group *group)
 {
 	__u32 mask = 0;
-	__u32 old_mask = group->mask;
 	struct fsnotify_mark *mark;
 
 	spin_lock(&group->mark_lock);
 	list_for_each_entry(mark, &group->marks_list, g_list)
 		mask |= mark->mask;
-	spin_unlock(&group->mark_lock);
-
 	group->mask = mask;
-
-	if (old_mask != mask)
-		fsnotify_recalc_global_mask();
+	spin_unlock(&group->mark_lock);
 }
 
 void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
@@ -217,8 +184,6 @@ void fsnotify_put_group(struct fsnotify_group *group)
 
 	mutex_unlock(&fsnotify_grp_mutex);
 
-	/* and now it is really dead. _Nothing_ could be seeing it */
-	fsnotify_recalc_global_mask();
 	fsnotify_destroy_group(group);
 }
 
-- 
cgit v1.1


From 43709a288ed03aa0e2979ab63dd089b3889645c4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: remove group->mask

group->mask is now useless.  It was originally a shortcut for fsnotify to
save on performance.  These checks are now redundant, so we remove them.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c        |  4 ----
 fs/notify/fanotify/fanotify_user.c | 23 +++++------------------
 fs/notify/group.c                  | 16 ----------------
 fs/notify/inotify/inotify_user.c   |  9 ---------
 4 files changed, 5 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index c3dc158..e92b2c8 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -199,8 +199,6 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 	if (dn_mark->dn == NULL)
 		fsnotify_destroy_mark(fsn_mark);
 
-	fsnotify_recalc_group_mask(dnotify_group);
-
 	mutex_unlock(&dnotify_mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
@@ -385,8 +383,6 @@ out:
 	if (destroy)
 		fsnotify_destroy_mark(fsn_mark);
 
-	fsnotify_recalc_group_mask(dnotify_group);
-
 	mutex_unlock(&dnotify_mark_mutex);
 	fsnotify_put_mark(fsn_mark);
 out_err:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 50cea74..25a3b4d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -496,8 +496,6 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
 
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
-	if (removed & group->mask)
-		fsnotify_recalc_group_mask(group);
 	if (removed & mnt->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
 
@@ -518,9 +516,6 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
 	removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
 	/* matches the fsnotify_find_inode_mark() */
 	fsnotify_put_mark(fsn_mark);
-
-	if (removed & group->mask)
-		fsnotify_recalc_group_mask(group);
 	if (removed & inode->i_fsnotify_mask)
 		fsnotify_recalc_inode_mask(inode);
 
@@ -572,12 +567,9 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
-	if (added) {
-		if (added & ~group->mask)
-			fsnotify_recalc_group_mask(group);
-		if (added & ~mnt->mnt_fsnotify_mask)
-			fsnotify_recalc_vfsmount_mask(mnt);
-	}
+	if (added & ~mnt->mnt_fsnotify_mask)
+		fsnotify_recalc_vfsmount_mask(mnt);
+
 	return 0;
 }
 
@@ -607,12 +599,8 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 	fsnotify_put_mark(fsn_mark);
-	if (added) {
-		if (added & ~group->mask)
-			fsnotify_recalc_group_mask(group);
-		if (added & ~inode->i_fsnotify_mask)
-			fsnotify_recalc_inode_mask(inode);
-	}
+	if (added & ~inode->i_fsnotify_mask)
+		fsnotify_recalc_inode_mask(inode);
 	return 0;
 }
 
@@ -734,7 +722,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 			fsnotify_clear_vfsmount_marks_by_group(group);
 		else
 			fsnotify_clear_inode_marks_by_group(group);
-		fsnotify_recalc_group_mask(group);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 8da532d..fc0d966 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -35,22 +35,6 @@ LIST_HEAD(fsnotify_inode_groups);
 /* all groups registered to receive mount point filesystem notifications */
 LIST_HEAD(fsnotify_vfsmount_groups);
 
-/*
- * Update the group->mask by running all of the marks associated with this
- * group and finding the bitwise | of all of the mark->mask.
- */
-void fsnotify_recalc_group_mask(struct fsnotify_group *group)
-{
-	__u32 mask = 0;
-	struct fsnotify_mark *mark;
-
-	spin_lock(&group->mark_lock);
-	list_for_each_entry(mark, &group->marks_list, g_list)
-		mask |= mark->mask;
-	group->mask = mask;
-	spin_unlock(&group->mark_lock);
-}
-
 void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
 {
 	struct fsnotify_group *group_iter;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index a4cd227..bf7f6d7 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -606,16 +606,11 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
 		int dropped = (old_mask & ~new_mask);
 		/* more bits in this fsn_mark than the inode's mask? */
 		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
-		/* more bits in this fsn_mark than the group? */
-		int do_group = (new_mask & ~group->mask);
 
 		/* update the inode with this new fsn_mark */
 		if (dropped || do_inode)
 			fsnotify_recalc_inode_mask(inode);
 
-		/* update the group mask with the new mask */
-		if (dropped || do_group)
-			fsnotify_recalc_group_mask(group);
 	}
 
 	/* return the wd */
@@ -673,10 +668,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
 	/* return the watch descriptor for this new mark */
 	ret = tmp_i_mark->wd;
 
-	/* if this mark added a new event update the group mask */
-	if (mask & ~group->mask)
-		fsnotify_recalc_group_mask(group);
-
 out_err:
 	/* match the ref from fsnotify_init_mark() */
 	fsnotify_put_mark(&tmp_i_mark->fsn_mark);
-- 
cgit v1.1


From 02436668d98385f5b5d9ffb695a37dadf98ed8a8 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: remove global fsnotify groups lists

The global fsnotify groups lists were invented as a way to increase the
performance of fsnotify by shortcutting events which were not interesting.
With the changes to walk the object lists rather than global groups lists
these shortcuts are not useful.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c |   5 ---
 fs/notify/fsnotify.h |   9 -----
 fs/notify/group.c    | 107 +--------------------------------------------------
 fs/notify/mark.c     |   9 -----
 4 files changed, 2 insertions(+), 128 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9ba29ee..1dd1fde 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -219,11 +219,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	/* if no fsnotify listeners, nothing to do */
-	if (list_empty(&fsnotify_inode_groups) &&
-	    list_empty(&fsnotify_vfsmount_groups))
-		return 0;
- 
 	if (mask & FS_MODIFY)
 		__fsnotify_flush_ignored_mask(to_tell, data, data_is);
 
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index b41dbf5..85e7d2b 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -6,11 +6,6 @@
 #include <linux/srcu.h>
 #include <linux/types.h>
 
-/* all groups which receive inode fsnotify events */
-extern struct list_head fsnotify_inode_groups;
-/* all groups which receive vfsmount fsnotify events */
-extern struct list_head fsnotify_vfsmount_groups;
-
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -28,10 +23,6 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 				      struct fsnotify_group *group, struct vfsmount *mnt,
 				      int allow_dups);
 
-/* add a group to the inode group list */
-extern void fsnotify_add_inode_group(struct fsnotify_group *group);
-/* add a group to the vfsmount group list */
-extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
 /* final kfree of a group */
 extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index fc0d966..d309f38 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -28,67 +28,6 @@
 
 #include <asm/atomic.h>
 
-/* protects writes to fsnotify_groups and fsnotify_mask */
-static DEFINE_MUTEX(fsnotify_grp_mutex);
-/* all groups registered to receive inode filesystem notifications */
-LIST_HEAD(fsnotify_inode_groups);
-/* all groups registered to receive mount point filesystem notifications */
-LIST_HEAD(fsnotify_vfsmount_groups);
-
-void fsnotify_add_vfsmount_group(struct fsnotify_group *group)
-{
-	struct fsnotify_group *group_iter;
-
-	mutex_lock(&fsnotify_grp_mutex);
-
-	if (!group->on_vfsmount_group_list) {
-		list_for_each_entry(group_iter, &fsnotify_vfsmount_groups,
-				    vfsmount_group_list) {
-			/* insert in front of this one? */
-			if (group < group_iter) {
-				/* list_add_tail() insert in front of group_iter */
-				list_add_tail_rcu(&group->inode_group_list,
-						  &group_iter->inode_group_list);
-				goto out;
-			}
-		}
-
-		/* apparently we need to be the last entry */
-		list_add_tail_rcu(&group->vfsmount_group_list, &fsnotify_vfsmount_groups);
-	}
-out:
-	group->on_vfsmount_group_list = 1;
-
-	mutex_unlock(&fsnotify_grp_mutex);
-}
-
-void fsnotify_add_inode_group(struct fsnotify_group *group)
-{
-	struct fsnotify_group *group_iter;
-
-	mutex_lock(&fsnotify_grp_mutex);
-
-	/* add to global group list */
-	if (!group->on_inode_group_list) {
-		list_for_each_entry(group_iter, &fsnotify_inode_groups,
-				    inode_group_list) {
-			if (group < group_iter) {
-				/* list_add_tail() insert in front of group_iter */
-				list_add_tail_rcu(&group->inode_group_list,
-						  &group_iter->inode_group_list);
-				goto out;
-			}
-		}
-
-		/* apparently we need to be the last entry */
-		list_add_tail_rcu(&group->inode_group_list, &fsnotify_inode_groups);
-	}
-out:
-	group->on_inode_group_list = 1;
-
-	mutex_unlock(&fsnotify_grp_mutex);
-}
-
 /*
  * Final freeing of a group
  */
@@ -124,51 +63,12 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
 }
 
 /*
- * Remove this group from the global list of groups that will get events
- * this can be done even if there are still references and things still using
- * this group.  This just stops the group from getting new events.
- */
-static void __fsnotify_evict_group(struct fsnotify_group *group)
-{
-	BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
-
-	if (group->on_inode_group_list)
-		list_del_rcu(&group->inode_group_list);
-	group->on_inode_group_list = 0;
-	if (group->on_vfsmount_group_list)
-		list_del_rcu(&group->vfsmount_group_list);
-	group->on_vfsmount_group_list = 0;
-}
-
-/*
- * Called when a group is no longer interested in getting events.  This can be
- * used if a group is misbehaving or if for some reason a group should no longer
- * get any filesystem events.
- */
-void fsnotify_evict_group(struct fsnotify_group *group)
-{
-	mutex_lock(&fsnotify_grp_mutex);
-	__fsnotify_evict_group(group);
-	mutex_unlock(&fsnotify_grp_mutex);
-}
-
-/*
  * Drop a reference to a group.  Free it if it's through.
  */
 void fsnotify_put_group(struct fsnotify_group *group)
 {
-	if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
-		return;
-
-	/*
-	 * OK, now we know that there's no other users *and* we hold mutex,
-	 * so no new references will appear
-	 */
-	__fsnotify_evict_group(group);
-
-	mutex_unlock(&fsnotify_grp_mutex);
-
-	fsnotify_destroy_group(group);
+	if (atomic_dec_and_test(&group->refcnt))
+		fsnotify_destroy_group(group);
 }
 
 /*
@@ -195,9 +95,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 	init_waitqueue_head(&group->notification_waitq);
 	group->max_events = UINT_MAX;
 
-	INIT_LIST_HEAD(&group->inode_group_list);
-	INIT_LIST_HEAD(&group->vfsmount_group_list);
-
 	spin_lock_init(&group->mark_lock);
 	INIT_LIST_HEAD(&group->marks_list);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 236f29b..325185e 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -223,15 +223,6 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
 	BUG_ON(!inode && !mnt);
 
 	/*
-	 * if this group isn't being testing for inode type events we need
-	 * to start testing
-	 */
-	if (inode && unlikely(list_empty(&group->inode_group_list)))
-		fsnotify_add_inode_group(group);
-	else if (mnt && unlikely(list_empty(&group->vfsmount_group_list)))
-		fsnotify_add_vfsmount_group(group);
-
-	/*
 	 * LOCKING ORDER!!!!
 	 * mark->lock
 	 * group->mark_lock
-- 
cgit v1.1


From 84a5b68e8da1490906c11129756490a556ae2c19 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: rework ignored mark flushing

currently ignored_mark clearing is done in a seperate list traversal
before the actual list traversal to send events.  There is no need for
this.  Do them at the same time.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 60 ++++++++++------------------------------------------
 1 file changed, 11 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 1dd1fde..0bb4aeb 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -140,36 +140,6 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
-void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
-{
-	struct fsnotify_mark *mark;
-	struct hlist_node *node;
-	int idx;
-
-	idx = srcu_read_lock(&fsnotify_mark_srcu);
-
-	if (!hlist_empty(&inode->i_fsnotify_marks)) {
-		hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) {
-			if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-				mark->ignored_mask = 0;
-		}
-	}
-
-	if (data_is == FSNOTIFY_EVENT_FILE) {
-		struct vfsmount *mnt;
-
-		mnt = ((struct file *)data)->f_path.mnt;
-		if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
-			hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
-				if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-					mark->ignored_mask = 0;
-			}
-		}
-	}
-
-	srcu_read_unlock(&fsnotify_mark_srcu, idx);
-}
-
 static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 			 struct vfsmount *mnt, struct fsnotify_mark *mark,
 			 __u32 mask, void *data, int data_is, u32 cookie,
@@ -193,14 +163,6 @@ static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 	return group->ops->handle_event(group, mark, *event);
 }
 
-static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
-{
-	if (!mnt)
-		return false;
-
-	return (test_mask & mnt->mnt_fsnotify_mask);
-}
-
 /*
  * This is the main call to fsnotify.  The VFS calls into hook specific functions
  * in linux/fsnotify.h.  Those functions then in turn call here.  Here will call
@@ -219,26 +181,21 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
-	if (mask & FS_MODIFY)
-		__fsnotify_flush_ignored_mask(to_tell, data, data_is);
-
 	if (data_is == FSNOTIFY_EVENT_FILE)
 		mnt = ((struct file *)data)->f_path.mnt;
 
-	/* if this inode's directed listeners don't care and nothing on the vfsmount
-	 * listeners list cares, nothing to do */
-	if (!(test_mask & to_tell->i_fsnotify_mask) &&
-	    !needed_by_vfsmount(test_mask, mnt))
-		return 0;
-
 	idx = srcu_read_lock(&fsnotify_mark_srcu);
 
-	if (test_mask & to_tell->i_fsnotify_mask) {
+	if ((test_mask & to_tell->i_fsnotify_mask) || (mask & FS_MODIFY)) {
 		hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) {
 
 			pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
 				 __func__, mark, mark->mask, mark->ignored_mask);
 
+			if ((mask & FS_MODIFY) &&
+			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+				mark->ignored_mask = 0;
+
 			if (test_mask & mark->mask & ~mark->ignored_mask) {
 				group = mark->group;
 				if (!group)
@@ -252,12 +209,17 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 		}
 	}
 
-	if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) {
+	if (mnt && ((test_mask & mnt->mnt_fsnotify_mask) ||
+		    (mask & FS_MODIFY))) {
 		hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
 
 			pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
 				 __func__, mark, mark->mask, mark->ignored_mask);
 
+			if ((mask & FS_MODIFY) &&
+			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+				mark->ignored_mask = 0;
+
 			if (test_mask & mark->mask & ~mark->ignored_mask)  {
 				group = mark->group;
 				if (!group)
-- 
cgit v1.1


From 613a807fe7c793ceb7d6f059773527a5a6c84a96 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: walk the inode and vfsmount lists simultaneously

We currently walk the list of marks on an inode followed by the list of
marks on the vfsmount.  These are in order (by the memory address of the
group) so lets walk them both together.  Eventually we can pass both the
inode mark and the vfsmount mark to helpers simultaneously.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c | 134 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 84 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0bb4aeb..cdaa51c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -140,19 +140,31 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
-static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
-			 struct vfsmount *mnt, struct fsnotify_mark *mark,
-			 __u32 mask, void *data, int data_is, u32 cookie,
+static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
+			 struct fsnotify_mark *mark,
+			__u32 mask, void *data,
+			 int data_is, u32 cookie,
 			 const unsigned char *file_name,
 			 struct fsnotify_event **event)
 {
+	struct fsnotify_group *group = mark->group;
+	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+
 	pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
 		 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
 		 mnt, mark, mask, data, data_is, cookie, *event);
 
+	if ((mask & FS_MODIFY) &&
+	    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+		mark->ignored_mask = 0;
+
+	if (!(test_mask & mark->mask & ~mark->ignored_mask))
+		return 0;
+
 	if (group->ops->should_send_event(group, to_tell, mnt, mark, mask,
 					  data, data_is) == false)
 		return 0;
+
 	if (!*event) {
 		*event = fsnotify_create_event(to_tell, mask, data,
 						data_is, file_name,
@@ -172,67 +184,89 @@ static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
 int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 	     const unsigned char *file_name, u32 cookie)
 {
-	struct fsnotify_mark *mark;
-	struct fsnotify_group *group;
+	struct hlist_node *inode_node, *vfsmount_node;
+	struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
+	struct fsnotify_group *inode_group, *vfsmount_group;
 	struct fsnotify_event *event = NULL;
-	struct hlist_node *node;
-	struct vfsmount *mnt = NULL;
+	struct vfsmount *mnt;
 	int idx, ret = 0;
+	bool used_inode = false, used_vfsmount = false;
 	/* global tests shouldn't care about events on child only the specific event */
 	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	if (data_is == FSNOTIFY_EVENT_FILE)
 		mnt = ((struct file *)data)->f_path.mnt;
+	else
+		mnt = NULL;
+
+	/*
+	 * if this is a modify event we may need to clear the ignored masks
+	 * otherwise return if neither the inode nor the vfsmount care about
+	 * this type of event.
+	 */
+	if (!(mask & FS_MODIFY) &&
+	    !(test_mask & to_tell->i_fsnotify_mask) &&
+	    !(mnt && test_mask & mnt->mnt_fsnotify_mask))
+		return 0;
 
 	idx = srcu_read_lock(&fsnotify_mark_srcu);
 
-	if ((test_mask & to_tell->i_fsnotify_mask) || (mask & FS_MODIFY)) {
-		hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) {
-
-			pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
-				 __func__, mark, mark->mask, mark->ignored_mask);
-
-			if ((mask & FS_MODIFY) &&
-			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-				mark->ignored_mask = 0;
-
-			if (test_mask & mark->mask & ~mark->ignored_mask) {
-				group = mark->group;
-				if (!group)
-					continue;
-				ret = send_to_group(group, to_tell, NULL, mark, mask,
-						    data, data_is, cookie, file_name,
-						    &event);
-				if (ret)
-					goto out;
-			}
-		}
+	if ((mask & FS_MODIFY) ||
+	    (test_mask & to_tell->i_fsnotify_mask))
+		inode_node = to_tell->i_fsnotify_marks.first;
+	else
+		inode_node = NULL;
+
+	if (mnt) {
+		if ((mask & FS_MODIFY) ||
+		    (test_mask & mnt->mnt_fsnotify_mask))
+			vfsmount_node = mnt->mnt_fsnotify_marks.first;
+		else
+			vfsmount_node = NULL;
+	} else {
+		mnt = NULL;
+		vfsmount_node = NULL;
 	}
 
-	if (mnt && ((test_mask & mnt->mnt_fsnotify_mask) ||
-		    (mask & FS_MODIFY))) {
-		hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
-
-			pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
-				 __func__, mark, mark->mask, mark->ignored_mask);
-
-			if ((mask & FS_MODIFY) &&
-			    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-				mark->ignored_mask = 0;
-
-			if (test_mask & mark->mask & ~mark->ignored_mask)  {
-				group = mark->group;
-				if (!group)
-					continue;
-				ret = send_to_group(group, to_tell, mnt, mark, mask,
-						    data, data_is, cookie, file_name,
-						    &event);
-				if (ret)
-					goto out;
-			}
+	while (inode_node || vfsmount_node) {
+		if (inode_node) {
+			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
+						 struct fsnotify_mark, i.i_list);
+			inode_group = inode_mark->group;
+		} else
+			inode_group = (void *)-1;
+
+		if (vfsmount_node) {
+			vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
+							struct fsnotify_mark, m.m_list);
+			vfsmount_group = vfsmount_mark->group;
+		} else
+			vfsmount_group = (void *)-1;
+
+		if (inode_group < vfsmount_group) {
+			/* handle inode */
+			send_to_group(to_tell, NULL, inode_mark, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_inode = true;
+		} else if (vfsmount_group < inode_group) {
+			send_to_group(to_tell, mnt, vfsmount_mark, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_vfsmount = true;
+		} else {
+			send_to_group(to_tell, mnt, vfsmount_mark, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_vfsmount = true;
+			send_to_group(to_tell, NULL, inode_mark, mask, data,
+				      data_is, cookie, file_name, &event);
+			used_inode = true;
 		}
+
+		if (used_inode)
+			inode_node = inode_node->next;
+		if (used_vfsmount)
+			vfsmount_node = vfsmount_node->next;
 	}
-out:
+
 	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
-- 
cgit v1.1


From ce8f76fb7320297ccbe7c950fd9a2d727dd6a5a0 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fsnotify: pass both the vfsmount mark and inode mark

should_send_event() and handle_event() will both need to look up the inode
event if they get a vfsmount event.  Lets just pass both at the same time
since we have them both after walking the lists in lockstep.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          | 18 ++++++----
 fs/notify/fanotify/fanotify.c        | 15 +++++---
 fs/notify/fsnotify.c                 | 70 ++++++++++++++++++++++++------------
 fs/notify/inotify/inotify_fsnotify.c | 12 ++++---
 4 files changed, 77 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index e92b2c8..bda588b 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -83,7 +83,8 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
  * events.
  */
 static int dnotify_handle_event(struct fsnotify_group *group,
-				struct fsnotify_mark *mark,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
 				struct fsnotify_event *event)
 {
 	struct dnotify_mark *dn_mark;
@@ -93,11 +94,13 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 	struct fown_struct *fown;
 	__u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
 
+	BUG_ON(vfsmount_mark);
+
 	to_tell = event->to_tell;
 
-	dn_mark = container_of(mark, struct dnotify_mark, fsn_mark);
+	dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
 
-	spin_lock(&mark->lock);
+	spin_lock(&inode_mark->lock);
 	prev = &dn_mark->dn;
 	while ((dn = *prev) != NULL) {
 		if ((dn->dn_mask & test_mask) == 0) {
@@ -111,11 +114,11 @@ static int dnotify_handle_event(struct fsnotify_group *group,
 		else {
 			*prev = dn->dn_next;
 			kmem_cache_free(dnotify_struct_cache, dn);
-			dnotify_recalc_inode_mask(mark);
+			dnotify_recalc_inode_mask(inode_mark);
 		}
 	}
 
-	spin_unlock(&mark->lock);
+	spin_unlock(&inode_mark->lock);
 
 	return 0;
 }
@@ -126,8 +129,9 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
 				      struct inode *inode, struct vfsmount *mnt,
-				      struct fsnotify_mark *mark, __u32 mask,
-				      void *data, int data_type)
+				      struct fsnotify_mark *inode_mark,
+				      struct fsnotify_mark *vfsmount_mark,
+				      __u32 mask, void *data, int data_type)
 {
 	/* not a dir, dnotify doesn't care */
 	if (!S_ISDIR(inode->i_mode))
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index fbd7f35..ef4fa4a 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -115,7 +115,8 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 #endif
 
 static int fanotify_handle_event(struct fsnotify_group *group,
-				 struct fsnotify_mark *mark,
+				 struct fsnotify_mark *inode_mark,
+				 struct fsnotify_mark *fanotify_mark,
 				 struct fsnotify_event *event)
 {
 	int ret = 0;
@@ -196,8 +197,11 @@ static bool should_send_inode_event(struct fsnotify_group *group,
 		return true;
 }
 
-static bool fanotify_should_send_event(struct fsnotify_group *group, struct inode *to_tell,
-				       struct vfsmount *mnt, struct fsnotify_mark *mark,
+static bool fanotify_should_send_event(struct fsnotify_group *group,
+				       struct inode *to_tell,
+				       struct vfsmount *mnt,
+				       struct fsnotify_mark *inode_mark,
+				       struct fsnotify_mark *vfsmount_mark,
 				       __u32 mask, void *data, int data_type)
 {
 	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_type=%d\n",
@@ -213,9 +217,10 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
 		return false;
 
 	if (mnt)
-		return should_send_vfsmount_event(group, mnt, to_tell, mark, mask);
+		return should_send_vfsmount_event(group, mnt, to_tell,
+						  vfsmount_mark, mask);
 	else
-		return should_send_inode_event(group, to_tell, mark, mask);
+		return should_send_inode_event(group, to_tell, inode_mark, mask);
 }
 
 const struct fsnotify_ops fanotify_fsnotify_ops = {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index cdaa51c..090b64c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -141,28 +141,51 @@ void __fsnotify_parent(struct file *file, struct dentry *dentry, __u32 mask)
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
 static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
-			 struct fsnotify_mark *mark,
-			__u32 mask, void *data,
+			 struct fsnotify_mark *inode_mark,
+			 struct fsnotify_mark *vfsmount_mark,
+			 __u32 mask, void *data,
 			 int data_is, u32 cookie,
 			 const unsigned char *file_name,
 			 struct fsnotify_event **event)
 {
-	struct fsnotify_group *group = mark->group;
-	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	struct fsnotify_group *group = inode_mark->group;
+	__u32 inode_test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 vfsmount_test_mask = (mask & ~FS_EVENT_ON_CHILD);
 
 	pr_debug("%s: group=%p to_tell=%p mnt=%p mark=%p mask=%x data=%p"
 		 " data_is=%d cookie=%d event=%p\n", __func__, group, to_tell,
-		 mnt, mark, mask, data, data_is, cookie, *event);
+		 mnt, inode_mark, mask, data, data_is, cookie, *event);
+
+	/* clear ignored on inode modification */
+	if (mask & FS_MODIFY) {
+		if (inode_mark &&
+		    !(inode_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+			inode_mark->ignored_mask = 0;
+		if (vfsmount_mark &&
+		    !(vfsmount_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
+			vfsmount_mark->ignored_mask = 0;
+	}
 
-	if ((mask & FS_MODIFY) &&
-	    !(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
-		mark->ignored_mask = 0;
+	/* does the inode mark tell us to do something? */
+	if (inode_mark) {
+		inode_test_mask &= inode_mark->mask;
+		inode_test_mask &= ~inode_mark->ignored_mask;
+	}
 
-	if (!(test_mask & mark->mask & ~mark->ignored_mask))
+	/* does the vfsmount_mark tell us to do something? */
+	if (vfsmount_mark) {
+		vfsmount_test_mask &= vfsmount_mark->mask;
+		vfsmount_test_mask &= ~vfsmount_mark->ignored_mask;
+		if (inode_mark)
+			vfsmount_test_mask &= ~inode_mark->ignored_mask;
+	}
+
+	if (!inode_test_mask && !vfsmount_test_mask)
 		return 0;
 
-	if (group->ops->should_send_event(group, to_tell, mnt, mark, mask,
-					  data, data_is) == false)
+	if (group->ops->should_send_event(group, to_tell, mnt, inode_mark,
+					  vfsmount_mark, mask, data,
+					  data_is) == false)
 		return 0;
 
 	if (!*event) {
@@ -172,7 +195,7 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
 		if (!*event)
 			return -ENOMEM;
 	}
-	return group->ops->handle_event(group, mark, *event);
+	return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
 }
 
 /*
@@ -213,14 +236,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
 	if ((mask & FS_MODIFY) ||
 	    (test_mask & to_tell->i_fsnotify_mask))
-		inode_node = to_tell->i_fsnotify_marks.first;
+		inode_node = srcu_dereference(to_tell->i_fsnotify_marks.first,
+					      &fsnotify_mark_srcu);
 	else
 		inode_node = NULL;
 
 	if (mnt) {
 		if ((mask & FS_MODIFY) ||
 		    (test_mask & mnt->mnt_fsnotify_mask))
-			vfsmount_node = mnt->mnt_fsnotify_marks.first;
+			vfsmount_node = srcu_dereference(mnt->mnt_fsnotify_marks.first,
+							 &fsnotify_mark_srcu);
 		else
 			vfsmount_node = NULL;
 	} else {
@@ -245,26 +270,27 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
 		if (inode_group < vfsmount_group) {
 			/* handle inode */
-			send_to_group(to_tell, NULL, inode_mark, mask, data,
+			send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
 				      data_is, cookie, file_name, &event);
 			used_inode = true;
 		} else if (vfsmount_group < inode_group) {
-			send_to_group(to_tell, mnt, vfsmount_mark, mask, data,
+			send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
 				      data_is, cookie, file_name, &event);
 			used_vfsmount = true;
 		} else {
-			send_to_group(to_tell, mnt, vfsmount_mark, mask, data,
-				      data_is, cookie, file_name, &event);
+			send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+				      mask, data, data_is, cookie, file_name,
+				      &event);
 			used_vfsmount = true;
-			send_to_group(to_tell, NULL, inode_mark, mask, data,
-				      data_is, cookie, file_name, &event);
 			used_inode = true;
 		}
 
 		if (used_inode)
-			inode_node = inode_node->next;
+			inode_node = srcu_dereference(inode_node->next,
+						      &fsnotify_mark_srcu);
 		if (used_vfsmount)
-			vfsmount_node = vfsmount_node->next;
+			vfsmount_node = srcu_dereference(vfsmount_node->next,
+							 &fsnotify_mark_srcu);
 	}
 
 	srcu_read_unlock(&fsnotify_mark_srcu, idx);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 7cf518b..e53f497 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -90,7 +90,8 @@ static struct fsnotify_event *inotify_merge(struct list_head *list,
 }
 
 static int inotify_handle_event(struct fsnotify_group *group,
-				struct fsnotify_mark *mark,
+				struct fsnotify_mark *inode_mark,
+				struct fsnotify_mark *vfsmount_mark,
 				struct fsnotify_event *event)
 {
 	struct inotify_inode_mark *i_mark;
@@ -100,12 +101,14 @@ static int inotify_handle_event(struct fsnotify_group *group,
 	struct fsnotify_event *added_event;
 	int wd, ret = 0;
 
+	BUG_ON(vfsmount_mark);
+
 	pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
 		 event, event->to_tell, event->mask);
 
 	to_tell = event->to_tell;
 
-	i_mark = container_of(mark, struct inotify_inode_mark,
+	i_mark = container_of(inode_mark, struct inotify_inode_mark,
 			      fsn_mark);
 	wd = i_mark->wd;
 
@@ -127,8 +130,8 @@ static int inotify_handle_event(struct fsnotify_group *group,
 			ret = PTR_ERR(added_event);
 	}
 
-	if (mark->mask & IN_ONESHOT)
-		fsnotify_destroy_mark(mark);
+	if (inode_mark->mask & IN_ONESHOT)
+		fsnotify_destroy_mark(inode_mark);
 
 	return ret;
 }
@@ -140,6 +143,7 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
 				      struct vfsmount *mnt, struct fsnotify_mark *mark,
+				      struct fsnotify_mark *vfsmount_mark,
 				      __u32 mask, void *data, int data_type)
 {
 	if ((mark->mask & FS_EXCL_UNLINK) &&
-- 
cgit v1.1


From 1968f5eed54ce47bde488fd9a450912e4a2d7138 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 28 Jul 2010 10:18:39 -0400
Subject: fanotify: use both marks when possible

fanotify currently, when given a vfsmount_mark will look up (if it exists)
the corresponding inode mark.  This patch drops that lookup and uses the
mark provided.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/dnotify/dnotify.c          |  2 +-
 fs/notify/fanotify/fanotify.c        | 88 ++++++++++++++----------------------
 fs/notify/fsnotify.c                 |  2 +-
 fs/notify/inotify/inotify_fsnotify.c |  4 +-
 4 files changed, 38 insertions(+), 58 deletions(-)

(limited to 'fs')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index bda588b..3344bdd 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -128,7 +128,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
  * userspace notification for that pair.
  */
 static bool dnotify_should_send_event(struct fsnotify_group *group,
-				      struct inode *inode, struct vfsmount *mnt,
+				      struct inode *inode,
 				      struct fsnotify_mark *inode_mark,
 				      struct fsnotify_mark *vfsmount_mark,
 				      __u32 mask, void *data, int data_type)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index ef4fa4a..eb8f73c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -153,59 +153,20 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	return ret;
 }
 
-static bool should_send_vfsmount_event(struct fsnotify_group *group,
-				       struct vfsmount *mnt,
-				       struct inode *inode,
-				       struct fsnotify_mark *mnt_mark,
-				       __u32 mask)
-{
-	struct fsnotify_mark *inode_mark;
-
-	pr_debug("%s: group=%p vfsmount=%p mark=%p mask=%x\n",
-		 __func__, group, mnt, mnt_mark, mask);
-
-	mask &= mnt_mark->mask;
-	mask &= ~mnt_mark->ignored_mask;
-
-	if (mask) {
-		inode_mark = fsnotify_find_inode_mark(group, inode);
-		if (inode_mark) {
-			mask &= ~inode_mark->ignored_mask;
-			fsnotify_put_mark(inode_mark);
-		}
-	}
-
-	return mask;
-}
-
-static bool should_send_inode_event(struct fsnotify_group *group,
-				    struct inode *inode,
-				    struct fsnotify_mark *mark,
-				    __u32 mask)
-{
-	pr_debug("%s: group=%p inode=%p mark=%p mask=%x\n",
-		 __func__, group, inode, mark, mask);
-
-	/*
-	 * if the event is for a child and this inode doesn't care about
-	 * events on the child, don't send it!
-	 */
-	if ((mask & FS_EVENT_ON_CHILD) &&
-	    !(mark->mask & FS_EVENT_ON_CHILD))
-		return false;
-	else
-		return true;
-}
-
 static bool fanotify_should_send_event(struct fsnotify_group *group,
 				       struct inode *to_tell,
-				       struct vfsmount *mnt,
 				       struct fsnotify_mark *inode_mark,
-				       struct fsnotify_mark *vfsmount_mark,
-				       __u32 mask, void *data, int data_type)
+				       struct fsnotify_mark *vfsmnt_mark,
+				       __u32 event_mask, void *data, int data_type)
 {
-	pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x data=%p data_type=%d\n",
-		 __func__, group, to_tell, mnt, mask, data, data_type);
+	__u32 marks_mask, marks_ignored_mask;
+
+	pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
+		 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
+		 inode_mark, vfsmnt_mark, event_mask, data, data_type);
+
+	pr_debug("%s: group=%p vfsmount_mark=%p inode_mark=%p mask=%x\n",
+		 __func__, group, vfsmnt_mark, inode_mark, event_mask);
 
 	/* sorry, fanotify only gives a damn about files and dirs */
 	if (!S_ISREG(to_tell->i_mode) &&
@@ -216,11 +177,30 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 	if (data_type != FSNOTIFY_EVENT_FILE)
 		return false;
 
-	if (mnt)
-		return should_send_vfsmount_event(group, mnt, to_tell,
-						  vfsmount_mark, mask);
-	else
-		return should_send_inode_event(group, to_tell, inode_mark, mask);
+	if (inode_mark && vfsmnt_mark) {
+		marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
+		marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
+	} else if (inode_mark) {
+		/*
+		 * if the event is for a child and this inode doesn't care about
+		 * events on the child, don't send it!
+		 */
+		if ((event_mask & FS_EVENT_ON_CHILD) &&
+		    !(inode_mark->mask & FS_EVENT_ON_CHILD))
+			return false;
+		marks_mask = inode_mark->mask;
+		marks_ignored_mask = inode_mark->ignored_mask;
+	} else if (vfsmnt_mark) {
+		marks_mask = vfsmnt_mark->mask;
+		marks_ignored_mask = vfsmnt_mark->ignored_mask;
+	} else {
+		BUG();
+	}
+
+	if (event_mask & marks_mask & ~marks_ignored_mask)
+		return true;
+
+	return false;
 }
 
 const struct fsnotify_ops fanotify_fsnotify_ops = {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 090b64c..4d2a82c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -183,7 +183,7 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt,
 	if (!inode_test_mask && !vfsmount_test_mask)
 		return 0;
 
-	if (group->ops->should_send_event(group, to_tell, mnt, inode_mark,
+	if (group->ops->should_send_event(group, to_tell, inode_mark,
 					  vfsmount_mark, mask, data,
 					  data_is) == false)
 		return 0;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e53f497..5e73eeb 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -142,11 +142,11 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
 }
 
 static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
-				      struct vfsmount *mnt, struct fsnotify_mark *mark,
+				      struct fsnotify_mark *inode_mark,
 				      struct fsnotify_mark *vfsmount_mark,
 				      __u32 mask, void *data, int data_type)
 {
-	if ((mark->mask & FS_EXCL_UNLINK) &&
+	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
 	    (data_type == FSNOTIFY_EVENT_FILE)) {
 		struct file *file  = data;
 
-- 
cgit v1.1


From 41f2df62894bfcd3bf868af916b32b90aa7168dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Jun 2010 08:54:16 +0200
Subject: block: BARRIER request should imply SYNC

A barrier request should by defintion have priority in get_request
and let the queue be unplugged immediately as it's blocking all forward
progress due to the queue draining.

Most filesystems already get this implicitly by the way how submit_bh
treats the buffer_ordered flag, and gfs2 sets it explicitly.  But btrfs
and XFS are still forgetting to set the flag, as is blkdev_issue_flush
and some places in DM/MD.

For XFS on metadata heavy workloads this gives a consistent speedup
in the 2-3% range.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/gfs2/log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24..efc3539 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
-- 
cgit v1.1


From 7b6d91daee5cac6402186ff224c3af39d79f4a0e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 7 Aug 2010 18:20:39 +0200
Subject: block: unify flags for struct bio and struct request

Remove the current bio flags and reuse the request flags for the bio, too.
This allows to more easily trace the type of I/O from the filesystem
down to the block driver.  There were two flags in the bio that were
missing in the requests:  BIO_RW_UNPLUG and BIO_RW_AHEAD.  Also I've
renamed two request flags that had a superflous RW in them.

Note that the flags are in bio.h despite having the REQ_ name - as
blkdev.h includes bio.h that is the only way to go for now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/bio.c             |  5 +++--
 fs/btrfs/disk-io.c   |  8 ++++----
 fs/btrfs/inode.c     |  6 +++---
 fs/btrfs/volumes.c   | 18 +++++++++---------
 fs/exofs/ios.c       |  2 +-
 fs/gfs2/log.c        |  4 ++--
 fs/gfs2/meta_io.c    |  8 ++++----
 fs/gfs2/ops_fstype.c |  2 +-
 fs/nilfs2/segbuf.c   |  2 +-
 9 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca..8abb2df 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	if (!bio)
 		goto out_bmd;
 
-	bio->bi_rw |= (!write_to_vm << BIO_RW);
+	if (!write_to_vm)
+		bio->bi_rw |= REQ_WRITE;
 
 	ret = 0;
 
@@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
 	 * set data direction, and check if mapped pages need bouncing
 	 */
 	if (!write_to_vm)
-		bio->bi_rw |= (1 << BIO_RW);
+		bio->bi_rw |= REQ_WRITE;
 
 	bio->bi_bdev = bdev;
 	bio->bi_flags |= (1 << BIO_USER_MAPPED);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34f7c37..64f1008 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
 	end_io_wq->work.func = end_workqueue_fn;
 	end_io_wq->work.flags = 0;
 
-	if (bio->bi_rw & (1 << BIO_RW)) {
+	if (bio->bi_rw & REQ_WRITE) {
 		if (end_io_wq->metadata)
 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
 					   &end_io_wq->work);
@@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
 
 	atomic_inc(&fs_info->nr_async_submits);
 
-	if (rw & (1 << BIO_RW_SYNCIO))
+	if (rw & REQ_SYNC)
 		btrfs_set_work_high_prio(&async->work);
 
 	btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 					  bio, 1);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		/*
 		 * called for a read, do the setup so that checksum validation
 		 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 	 * ram and up to date before trying to verify things.  For
 	 * blocksize <= pagesize, it is basically a noop
 	 */
-	if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+	if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
 	    !bio_ready_for_csum(bio)) {
 		btrfs_queue_worker(&fs_info->endio_meta_workers,
 				   &end_io_wq->work);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bff92a..e975d71 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		if (bio_flags & EXTENT_BIO_COMPRESSED) {
 			return btrfs_submit_compressed_read(inode, bio,
 						    mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
 	bio->bi_size = 0;
 
 	bio_add_page(bio, page, failrec->len, start - page_offset(page));
-	if (failed_bio->bi_rw & (1 << BIO_RW))
+	if (failed_bio->bi_rw & REQ_WRITE)
 		rw = WRITE;
 	else
 		rw = READ;
@@ -5642,7 +5642,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 	struct bio_vec *bvec = bio->bi_io_vec;
 	u64 start;
 	int skip_sum;
-	int write = rw & (1 << BIO_RW);
+	int write = rw & REQ_WRITE;
 	int ret = 0;
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8..dd318ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -258,7 +258,7 @@ loop_lock:
 
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-		if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+		if (cur->bi_rw & REQ_SYNC)
 			num_sync_run++;
 
 		submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 	int max_errors = 0;
 	struct btrfs_multi_bio *multi = NULL;
 
-	if (multi_ret && !(rw & (1 << BIO_RW)))
+	if (multi_ret && !(rw & REQ_WRITE))
 		stripes_allocated = 1;
 again:
 	if (multi_ret) {
@@ -2687,7 +2687,7 @@ again:
 		mirror_num = 0;
 
 	/* if our multi bio struct is too small, back off and try again */
-	if (rw & (1 << BIO_RW)) {
+	if (rw & REQ_WRITE) {
 		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
 				 BTRFS_BLOCK_GROUP_DUP)) {
 			stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@ again:
 			max_errors = 1;
 		}
 	}
-	if (multi_ret && (rw & (1 << BIO_RW)) &&
+	if (multi_ret && (rw & REQ_WRITE) &&
 	    stripes_allocated < stripes_required) {
 		stripes_allocated = map->num_stripes;
 		free_extent_map(em);
@@ -2733,7 +2733,7 @@ again:
 	num_stripes = 1;
 	stripe_index = 0;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@ again:
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (rw & (1 << BIO_RW))
+		if (rw & REQ_WRITE)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@ again:
 		stripe_index = do_div(stripe_nr, factor);
 		stripe_index *= map->sub_stripes;
 
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->sub_stripes;
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
 	struct btrfs_pending_bios *pending_bios;
 
 	/* don't bother with additional async steps for reads, right now */
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		bio_get(bio);
 		submit_bio(rw, bio);
 		bio_put(bio);
@@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
 	bio->bi_rw |= rw;
 
 	spin_lock(&device->io_lock);
-	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+	if (bio->bi_rw & REQ_SYNC)
 		pending_bios = &device->pending_sync_bios;
 	else
 		pending_bios = &device->pending_bios;
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 4337cad..e273220 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c
@@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
 			} else {
 				bio = master_dev->bio;
 				/* FIXME: bio_set_dir() */
-				bio->bi_rw |= (1 << BIO_RW);
+				bio->bi_rw |= REQ_WRITE;
 			}
 
 			osd_req_write(or, &ios->obj, per_dev->offset, bio,
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index efc3539..cde1248 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_BARRIER | REQ_META, bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
 		lock_buffer(bh);
 skip_barrier:
 		get_bh(bh);
-		submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+		submit_bh(WRITE_SYNC | REQ_META, bh);
 		wait_on_buffer(bh);
 	}
 	if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 18176d0..f3b071f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 {
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
-	int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
-			WRITE_SYNC_PLUG : WRITE));
+	int write_op = REQ_META |
+		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
 	}
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+	submit_bh(READ_SYNC | REQ_META, bh);
 	if (!(flags & DIO_WAIT))
 		return 0;
 
@@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
+		ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
 
 	dblock++;
 	extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3593b3a..fd4f894 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -275,7 +275,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+	submit_bio(READ_SYNC | REQ_META, bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
 	if (!PageUptodate(page)) {
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2e6a272..4588fb9 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 		res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
 	}
 
-- 
cgit v1.1


From c1955ce32fdb0877b7a1b22feb2669358f65be76 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 19 Jun 2010 23:08:06 +0200
Subject: writeback: remove wb_list

The wb_list member of struct backing_device_info always has exactly one
element.  Just use the direct bdi->wb pointer instead and simplify some
code.

Also remove bdi_task_init which is now trivial to prepare for the next
patch.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d5be169..d67989b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -73,9 +73,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 	 * If the default thread isn't there, make sure we add it. When
 	 * it gets created and wakes up, we'll run this work.
 	 */
-	if (unlikely(list_empty_careful(&bdi->wb_list)))
+	if (unlikely(!bdi->wb.task)) {
 		wake_up_process(default_backing_dev_info.wb.task);
-	else {
+	} else {
 		struct bdi_writeback *wb = &bdi->wb;
 
 		if (wb->task)
-- 
cgit v1.1


From 082439004b31adc146e96e5f1c574dd2b57dcd93 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 19 Jun 2010 23:08:22 +0200
Subject: writeback: merge bdi_writeback_task and bdi_start_fn

Move all code for the writeback thread into fs/fs-writeback.c instead of
splitting it over two functions in two files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d67989b..c8471b3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -775,12 +775,36 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct bdi_writeback *wb)
+int bdi_writeback_thread(void *data)
 {
+	struct bdi_writeback *wb = data;
+	struct backing_dev_info *bdi = wb->bdi;
 	unsigned long last_active = jiffies;
 	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
+	/*
+	 * Add us to the active bdi_list
+	 */
+	spin_lock_bh(&bdi_lock);
+	list_add_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
+
+	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(current, 0);
+
+	/*
+	 * Clear pending bit and wakeup anybody waiting to tear us down
+	 */
+	clear_bit(BDI_pending, &bdi->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&bdi->state, BDI_pending);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
@@ -813,9 +837,18 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 		try_to_freeze();
 	}
 
+	wb->task = NULL;
+
+	/*
+	 * Flush any work that raced with us exiting. No new work
+	 * will be added, since this bdi isn't discoverable anymore.
+	 */
+	if (!list_empty(&bdi->work_list))
+		wb_do_writeback(wb, 1);
 	return 0;
 }
 
+
 /*
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
-- 
cgit v1.1


From 1676effca4cd2a6b32e6e8e0ecaa91522dfda6fa Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Mon, 21 Jun 2010 11:02:48 +0200
Subject: gcc-4.6: fs: fix unused but set warnings

No real bugs I believe, just some dead code, and some
shut up code.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/splice.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index efdbfec..ec11c52 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -597,7 +597,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
-	pgoff_t index;
 	ssize_t res;
 	size_t this_len;
 	int error;
@@ -621,7 +620,6 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 			goto shrink_ret;
 	}
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-- 
cgit v1.1


From 455b2864686d3591b3b2f39eb46290c95f76471f Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Wed, 7 Jul 2010 13:24:06 +1000
Subject: writeback: Initial tracing support

Trace queue/sched/exec parts of the writeback loop. This provides
insight into when and why flusher threads are scheduled to run. e.g
a sync invocation leaves traces like:

     sync-[...]: writeback_queue: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0
flush-8:0-[...]: writeback_exec: bdi 8:0: sb_dev 8:1 nr_pages=7712 sync_mode=0 kupdate=0 range_cyclic=0 background=0

This also lays the foundation for adding more writeback tracing to
provide deeper insight into the whole writeback path.

The original tracing code is from Jens Axboe, though this version is
a rewrite as a result of the code being traced changing
significantly.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 38 ++++++++++++++++++++++++++++++--------
 1 file changed, 30 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c8471b3..73acab4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@ struct wb_writeback_work {
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,6 +74,8 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
+	trace_writeback_queue(bdi, work);
+
 	spin_lock(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
 	spin_unlock(&bdi->wb_lock);
@@ -74,6 +85,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 	 * it gets created and wakes up, we'll run this work.
 	 */
 	if (unlikely(!bdi->wb.task)) {
+		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
@@ -95,8 +107,10 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task)
+		if (bdi->wb.task) {
+			trace_writeback_nowork(bdi);
 			wake_up_process(bdi->wb.task);
+		}
 		return;
 	}
 
@@ -751,6 +765,8 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		if (force_wait)
 			work->sync_mode = WB_SYNC_ALL;
 
+		trace_writeback_exec(bdi, work);
+
 		wrote += wb_writeback(wb, work);
 
 		/*
@@ -805,9 +821,13 @@ int bdi_writeback_thread(void *data)
 	smp_mb__after_clear_bit();
 	wake_up_bit(&bdi->state, BDI_pending);
 
+	trace_writeback_thread_start(bdi);
+
 	while (!kthread_should_stop()) {
 		pages_written = wb_do_writeback(wb, 0);
 
+		trace_writeback_pages_written(pages_written);
+
 		if (pages_written)
 			last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
@@ -845,6 +865,8 @@ int bdi_writeback_thread(void *data)
 	 */
 	if (!list_empty(&bdi->work_list))
 		wb_do_writeback(wb, 1);
+
+	trace_writeback_thread_stop(bdi);
 	return 0;
 }
 
-- 
cgit v1.1


From 028c2dd184c097809986684f2f0627eea5529fea Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 7 Jul 2010 13:24:07 +1000
Subject: writeback: Add tracing to balance_dirty_pages

Tracing high level background writeback events is good, but it doesn't
give the entire picture. Add visibility into write throttling to catch IO
dispatched by foreground throttling of processing dirtying lots of pages.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73acab4..bf10cbf 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -656,10 +656,14 @@ static long wb_writeback(struct bdi_writeback *wb,
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
+
+		trace_wbc_writeback_start(&wbc, wb->bdi);
 		if (work->sb)
 			__writeback_inodes_sb(work->sb, wb, &wbc);
 		else
 			writeback_inodes_wb(wb, &wbc);
+		trace_wbc_writeback_written(&wbc, wb->bdi);
+
 		work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -687,6 +691,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		if (!list_empty(&wb->b_more_io))  {
 			inode = list_entry(wb->b_more_io.prev,
 						struct inode, i_list);
+			trace_wbc_writeback_wait(&wbc, wb->bdi);
 			inode_wait_for_writeback(inode);
 		}
 		spin_unlock(&inode_lock);
-- 
cgit v1.1


From 6e9624b8caec290d28b4c6d9ec75749df6372b87 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 7 Aug 2010 18:25:34 +0200
Subject: block: push down BKL into .open and .release

The open and release block_device_operations are currently
called with the BKL held. In order to change that, we must
first make sure that all drivers that currently rely
on this have no regressions.

This blindly pushes the BKL into all .open and .release
operations for all block drivers to prepare for the
next step. The drivers can subsequently replace the BKL
with their own locks or remove it completely when it can
be shown that it is not needed.

The functions blkdev_get and blkdev_put are the only
remaining users of the big kernel lock in the block
layer, besides a few uses in the ioctl code, none
of which need to serialize with blkdev_{get,put}.

Most of these two functions is also under the protection
of bdev->bd_mutex, including the actual calls to
->open and ->release, and the common code does not
access any global data structures that need the BKL.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/block_dev.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99d6af8..693c2bf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1345,13 +1345,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		return ret;
 	}
 
-	lock_kernel();
  restart:
 
 	ret = -ENXIO;
 	disk = get_gendisk(bdev->bd_dev, &partno);
 	if (!disk)
-		goto out_unlock_kernel;
+		goto out;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
@@ -1431,7 +1430,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
-	unlock_kernel();
 	return 0;
 
  out_clear:
@@ -1444,9 +1442,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	bdev->bd_contains = NULL;
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
-	unlock_kernel();
-
+ out:
 	if (disk)
 		module_put(disk->fops->owner);
 	put_disk(disk);
@@ -1515,7 +1511,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct block_device *victim = NULL;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
-	lock_kernel();
 	if (for_part)
 		bdev->bd_part_count--;
 
@@ -1540,7 +1535,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 	}
-	unlock_kernel();
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
 	if (victim)
-- 
cgit v1.1


From 6965031d331a642e31278fa1b5bd47f372ffdd5d Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 3 Aug 2010 12:48:50 +0200
Subject: splice: fix misuse of SPLICE_F_NONBLOCK

SPLICE_F_NONBLOCK is clearly documented to only affect blocking on the
pipe.  In __generic_file_splice_read(), however, it causes an EAGAIN
if the page is currently being read.

This makes it impossible to write an application that only wants
failure if the pipe is full.  For example if the same process is
handling both ends of a pipe and isn't otherwise able to determine
whether a splice to the pipe will fill it or not.

We could make the read non-blocking on O_NONBLOCK or some other splice
flag, but for now this is the simplest fix.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
CC: stable@kernel.org
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/splice.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index ec11c52..8f1dfae 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -399,17 +399,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		 * If the page isn't uptodate, we may need to start io on it
 		 */
 		if (!PageUptodate(page)) {
-			/*
-			 * If in nonblock mode then dont block on waiting
-			 * for an in-flight io page
-			 */
-			if (flags & SPLICE_F_NONBLOCK) {
-				if (!trylock_page(page)) {
-					error = -EAGAIN;
-					break;
-				}
-			} else
-				lock_page(page);
+			lock_page(page);
 
 			/*
 			 * Page was truncated, or invalidated by the
-- 
cgit v1.1


From 08852b6d6c40f387f2b75e199e2ca1df68970f4c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan.kim@gmail.com>
Date: Tue, 3 Aug 2010 12:51:16 +0200
Subject: writeback: remove wb in get_next_work_item

83ba7b07 cleans up the writeback.
So we don't use wb any more in get_next_work_item.
Let's remove unnecessary argument.

CC: Christoph Hellwig <hch@lst.de>
Signed-off-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index bf10cbf..261570d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -704,7 +704,7 @@ static long wb_writeback(struct bdi_writeback *wb,
  * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
 static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi)
 {
 	struct wb_writeback_work *work = NULL;
 
@@ -762,7 +762,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 	struct wb_writeback_work *work;
 	long wrote = 0;
 
-	while ((work = get_next_work_item(bdi, wb)) != NULL) {
+	while ((work = get_next_work_item(bdi)) != NULL) {
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 * because this thread is exiting now.
-- 
cgit v1.1


From 4aeefdc69f7b6f3f287e6fd8d4b213953b9e92d8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Tue, 3 Aug 2010 13:22:51 +0200
Subject: coda: fixup clash with block layer REQ_* defines

CODA should not be using defines in the global name space of
that nature, prefix them with CODA_.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/coda/psdev.c  | 12 ++++++------
 fs/coda/upcall.c | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf7..de89645 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -177,7 +177,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf,
 		nbytes = req->uc_outSize; /* don't have more space! */
 	}
         if (copy_from_user(req->uc_data, buf, nbytes)) {
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 		retval = -EFAULT;
 		goto out;
@@ -254,8 +254,8 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf,
 	        retval = -EFAULT;
         
 	/* If request was not a signal, enqueue and don't free */
-	if (!(req->uc_flags & REQ_ASYNC)) {
-		req->uc_flags |= REQ_READ;
+	if (!(req->uc_flags & CODA_REQ_ASYNC)) {
+		req->uc_flags |= CODA_REQ_READ;
 		list_add_tail(&(req->uc_chain), &vcp->vc_processing);
 		goto out;
 	}
@@ -315,19 +315,19 @@ static int coda_psdev_release(struct inode * inode, struct file * file)
 		list_del(&req->uc_chain);
 
 		/* Async requests need to be freed here */
-		if (req->uc_flags & REQ_ASYNC) {
+		if (req->uc_flags & CODA_REQ_ASYNC) {
 			CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
 			kfree(req);
 			continue;
 		}
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 
 	list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
 		list_del(&req->uc_chain);
 
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed..b8893ab 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -604,7 +604,7 @@ static void coda_unblock_signals(sigset_t *old)
 			       (((r)->uc_opcode != CODA_CLOSE && \
 				 (r)->uc_opcode != CODA_STORE && \
 				 (r)->uc_opcode != CODA_RELEASE) || \
-				(r)->uc_flags & REQ_READ))
+				(r)->uc_flags & CODA_REQ_READ))
 
 static inline void coda_waitfor_upcall(struct upc_req *req)
 {
@@ -624,7 +624,7 @@ static inline void coda_waitfor_upcall(struct upc_req *req)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 		/* got a reply */
-		if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+		if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
 			break;
 
 		if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	coda_waitfor_upcall(req);
 
 	/* Op went through, interrupt or not... */
-	if (req->uc_flags & REQ_WRITE) {
+	if (req->uc_flags & CODA_REQ_WRITE) {
 		out = (union outputArgs *)req->uc_data;
 		/* here we map positive Venus errors to kernel errors */
 		error = -out->oh.result;
@@ -717,13 +717,13 @@ static int coda_upcall(struct venus_comm *vcp,
 	}
 
 	error = -EINTR;
-	if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+	if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
 		printk(KERN_WARNING "coda: Unexpected interruption.\n");
 		goto exit;
 	}
 
 	/* Interrupted before venus read it. */
-	if (!(req->uc_flags & REQ_READ))
+	if (!(req->uc_flags & CODA_REQ_READ))
 		goto exit;
 
 	/* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@ static int coda_upcall(struct venus_comm *vcp,
 	sig_inputArgs->ih.opcode = CODA_SIGNAL;
 	sig_inputArgs->ih.unique = req->uc_unique;
 
-	sig_req->uc_flags = REQ_ASYNC;
+	sig_req->uc_flags = CODA_REQ_ASYNC;
 	sig_req->uc_opcode = sig_inputArgs->ih.opcode;
 	sig_req->uc_unique = sig_inputArgs->ih.unique;
 	sig_req->uc_inSize = sizeof(struct coda_in_hdr);
-- 
cgit v1.1


From 6f904ff0e39ea88f81eb77e8dfb4e1238492f0a8 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:11 +0300
Subject: writeback: harmonize writeback threads naming

The write-back code mixes words "thread" and "task" for the same things. This
is not a big deal, but still an inconsistency.

hch: a convention I tend to use and I've seen in various places
is to always use _task for the storage of the task_struct pointer,
and thread everywhere else.  This especially helps with having
foo_thread for the actual thread and foo_task for a global
variable keeping the task_struct pointer

This patch renames:
* 'bdi_add_default_flusher_task()' -> 'bdi_add_default_flusher_thread()'
* 'bdi_forker_task()'              -> 'bdi_forker_thread()'

because bdi threads are 'bdi_writeback_thread()', so these names are more
consistent.

This patch also amends commentaries and makes them refer the forker and bdi
threads as "thread", not "task".

Also, while on it, make 'bdi_add_default_flusher_thread()' declaration use
'static void' instead of 'void static' and make checkpatch.pl happy.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 261570d..002be0f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -840,7 +840,7 @@ int bdi_writeback_thread(void *data)
 
 			/*
 			 * Longest period of inactivity that we tolerate. If we
-			 * see dirty data again later, the task will get
+			 * see dirty data again later, the thread will get
 			 * recreated automatically.
 			 */
 			max_idle = max(5UL * 60 * HZ, wait_jiffies);
-- 
cgit v1.1


From 297252c81de8043ca6c36e5984c24fdb5aab9013 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:15 +0300
Subject: writeback: do not lose wake-ups in bdi threads

Currently, bdi threads ('bdi_writeback_thread()') can lose wake-ups. For
example, if 'bdi_queue_work()' is executed after the bdi thread have had
finished 'wb_do_writeback()' but before it called
'schedule_timeout_interruptible()'.

To fix this issue, we have to check whether we have works to process after we
have changed the task state to 'TASK_INTERRUPTIBLE'.

This patch also clean-ups handling of the cases when 'dirty_writeback_interval'
is zero or non-zero.

Additionally, this patch also removes unneeded 'list_empty_careful()' call.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 002be0f..05444ea 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -848,17 +848,18 @@ int bdi_writeback_thread(void *data)
 				break;
 		}
 
-		if (dirty_writeback_interval) {
-			wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-			schedule_timeout_interruptible(wait_jiffies);
-		} else {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (list_empty_careful(&wb->bdi->work_list) &&
-			    !kthread_should_stop())
-				schedule();
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!list_empty(&bdi->work_list)) {
 			__set_current_state(TASK_RUNNING);
+			continue;
 		}
 
+		if (dirty_writeback_interval) {
+			wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+			schedule_timeout(wait_jiffies);
+		} else
+			schedule();
+
 		try_to_freeze();
 	}
 
-- 
cgit v1.1


From 78c40cb6581a74adc48821f3de6b864a54d4c34d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:17 +0300
Subject: writeback: do not remove bdi from bdi_list

The forker thread removes bdis from 'bdi_list' before forking the bdi thread.
But this is wrong for at least 2 reasons.

Reason #1: if we temporary remove a bdi from the list, we may miss works which
           would otherwise be given to us.

Reason #2: this is racy; indeed, 'bdi_wb_shutdown()' expects that bdis are
           always in the 'bdi_list' (see 'bdi_remove_from_list()'), and when
           it races with the forker thread, it can shut down the bdi thread
           at the same time as the forker creates it.

This patch makes sure the forker thread never removes bdis from 'bdi_list'
(which was suggested by Christoph Hellwig).

In order to make sure that we do not race with 'bdi_wb_shutdown()', we have to
hold the 'bdi_lock' while walking the 'bdi_list' and setting the 'BDI_pending'
flag.

NOTE! The error path is interesting. Currently, when we fail to create a bdi
thread, we move the bdi to the tail of 'bdi_list'. But if we never remove the
bdi from the list, we cannot move it to the tail either, because then we can
mess up the RCU readers which walk the list. And also, we'll have the race
described above in "Reason #2".

But I not think that adding to the tail is any important so I just do not do
that.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05444ea..57fbfd0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -804,13 +804,6 @@ int bdi_writeback_thread(void *data)
 	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
-	/*
-	 * Add us to the active bdi_list
-	 */
-	spin_lock_bh(&bdi_lock);
-	list_add_rcu(&bdi->bdi_list, &bdi_list);
-	spin_unlock_bh(&bdi_lock);
-
 	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
 	set_freezable();
 
-- 
cgit v1.1


From ecd584030da67ede1bf17955746a6ce834d9fc6b Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:18 +0300
Subject: writeback: move last_active to bdi

Currently bdi threads use local variable 'last_active' which stores last time
when the bdi thread did some useful work. Move this local variable to 'struct
bdi_writeback'. This is just a preparation for the further patches which will
make the forker thread decide when bdi threads should be killed.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 57fbfd0..9f5cab7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -800,12 +800,12 @@ int bdi_writeback_thread(void *data)
 {
 	struct bdi_writeback *wb = data;
 	struct backing_dev_info *bdi = wb->bdi;
-	unsigned long last_active = jiffies;
 	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
 	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
 	set_freezable();
+	wb->last_active = jiffies;
 
 	/*
 	 * Our parent may run at a different priority, just set us to normal
@@ -827,7 +827,7 @@ int bdi_writeback_thread(void *data)
 		trace_writeback_pages_written(pages_written);
 
 		if (pages_written)
-			last_active = jiffies;
+			wb->last_active = jiffies;
 		else if (wait_jiffies != -1UL) {
 			unsigned long max_idle;
 
@@ -837,7 +837,7 @@ int bdi_writeback_thread(void *data)
 			 * recreated automatically.
 			 */
 			max_idle = max(5UL * 60 * HZ, wait_jiffies);
-			if (time_after(jiffies, max_idle + last_active))
+			if (time_after(jiffies, max_idle + wb->last_active))
 				break;
 		}
 
-- 
cgit v1.1


From fff5b85aa4225a7be157f208277a055822039a9e Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:20 +0300
Subject: writeback: move bdi threads exiting logic to the forker thread

Currently, bdi threads can decide to exit if there were no useful activities
for 5 minutes. However, this causes nasty races: we can easily oops in the
'bdi_queue_work()' if the bdi thread decides to exit while we are waking it up.

And even if we do not oops, but the bdi tread exits immediately after we wake
it up, we'd lose the wake-up event and have an unnecessary delay (up to 5 secs)
in the bdi work processing.

This patch makes the forker thread to be the central place which not only
creates bdi threads, but also kills them if they were inactive long enough.
This better design-wise.

Another reason why this change was done is to prepare for the further changes
which will prevent the bdi threads from waking up every 5 sec and wasting
power. Indeed, when the task does not wake up periodically anymore, it won't be
able to exit either.

This patch also moves the the 'wake_up_bit()' call from the bdi thread to the
forker thread as well. So now the forker thread sets the BDI_pending bit, then
forks the task or kills it, then clears the bit and wakes up the waiting
process.

The only process which may wain on the bit is 'bdi_wb_shutdown()'. This
function was changed as well - now it first removes the bdi from the
'bdi_list', then waits on the 'BDI_pending' bit. Once it wakes up, it is
guaranteed that the forker thread won't race with it, because the bdi is not
visible. Note, the forker thread sets the 'BDI_pending' bit under the
'bdi->wb_lock' which is essential for proper serialization.

And additionally, when we change 'bdi->wb.task', we now take the
'bdi->work_lock', to make sure that we do not lose wake-ups which we otherwise
would when raced with, say, 'bdi_queue_work()'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 54 ++++++++++++------------------------------------------
 1 file changed, 12 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9f5cab7..905f3ea 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -78,21 +78,17 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 
 	spin_lock(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
-	spin_unlock(&bdi->wb_lock);
-
-	/*
-	 * If the default thread isn't there, make sure we add it. When
-	 * it gets created and wakes up, we'll run this work.
-	 */
-	if (unlikely(!bdi->wb.task)) {
+	if (bdi->wb.task) {
+		wake_up_process(bdi->wb.task);
+	} else {
+		/*
+		 * The bdi thread isn't there, wake up the forker thread which
+		 * will create and run it.
+		 */
 		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
-	} else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
 	}
+	spin_unlock(&bdi->wb_lock);
 }
 
 static void
@@ -800,7 +796,6 @@ int bdi_writeback_thread(void *data)
 {
 	struct bdi_writeback *wb = data;
 	struct backing_dev_info *bdi = wb->bdi;
-	unsigned long wait_jiffies = -1UL;
 	long pages_written;
 
 	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
@@ -812,13 +807,6 @@ int bdi_writeback_thread(void *data)
 	 */
 	set_user_nice(current, 0);
 
-	/*
-	 * Clear pending bit and wakeup anybody waiting to tear us down
-	 */
-	clear_bit(BDI_pending, &bdi->state);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&bdi->state, BDI_pending);
-
 	trace_writeback_thread_start(bdi);
 
 	while (!kthread_should_stop()) {
@@ -828,18 +816,6 @@ int bdi_writeback_thread(void *data)
 
 		if (pages_written)
 			wb->last_active = jiffies;
-		else if (wait_jiffies != -1UL) {
-			unsigned long max_idle;
-
-			/*
-			 * Longest period of inactivity that we tolerate. If we
-			 * see dirty data again later, the thread will get
-			 * recreated automatically.
-			 */
-			max_idle = max(5UL * 60 * HZ, wait_jiffies);
-			if (time_after(jiffies, max_idle + wb->last_active))
-				break;
-		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!list_empty(&bdi->work_list)) {
@@ -847,21 +823,15 @@ int bdi_writeback_thread(void *data)
 			continue;
 		}
 
-		if (dirty_writeback_interval) {
-			wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-			schedule_timeout(wait_jiffies);
-		} else
+		if (dirty_writeback_interval)
+			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
+		else
 			schedule();
 
 		try_to_freeze();
 	}
 
-	wb->task = NULL;
-
-	/*
-	 * Flush any work that raced with us exiting. No new work
-	 * will be added, since this bdi isn't discoverable anymore.
-	 */
+	/* Flush any work that raced with us exiting */
 	if (!list_empty(&bdi->work_list))
 		wb_do_writeback(wb, 1);
 
-- 
cgit v1.1


From 253c34e9b10c30d3064be654b5b78fbc1a8b1896 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:21 +0300
Subject: writeback: prevent unnecessary bdi threads wakeups

Finally, we can get rid of unnecessary wake-ups in bdi threads, which are very
bad for battery-driven devices.

There are two types of activities bdi threads do:
1. process bdi works from the 'bdi->work_list'
2. periodic write-back

So there are 2 sources of wake-up events for bdi threads:

1. 'bdi_queue_work()' - submits bdi works
2. '__mark_inode_dirty()' - adds dirty I/O to bdi's

The former already has bdi wake-up code. The latter does not, and this patch
adds it.

'__mark_inode_dirty()' is hot-path function, but this patch adds another
'spin_lock(&bdi->wb_lock)' there. However, it is taken only in rare cases when
the bdi has no dirty inodes. So adding this spinlock should be fine and should
not affect performance.

This patch makes sure bdi threads and the forker thread do not wake-up if there
is nothing to do. The forker thread will nevertheless wake up at least every
5 min. to check whether it has to kill a bdi thread. This can also be optimized,
but is not worth it.

This patch also tidies up the warning about unregistered bid, and turns it from
an ugly crocodile to a simple 'WARN()' statement.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 59 ++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 905f3ea..55f6e46 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -823,10 +823,16 @@ int bdi_writeback_thread(void *data)
 			continue;
 		}
 
-		if (dirty_writeback_interval)
+		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
 			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
-		else
+		else {
+			/*
+			 * We have nothing to do, so can go sleep without any
+			 * timeout and save power. When a work is queued or
+			 * something is made dirty - we will be woken up.
+			 */
 			schedule();
+		}
 
 		try_to_freeze();
 	}
@@ -862,6 +868,26 @@ void wakeup_flusher_threads(long nr_pages)
 	rcu_read_unlock();
 }
 
+/*
+ * This function is used when the first inode for this bdi is marked dirty. It
+ * wakes-up the corresponding bdi thread which should then take care of the
+ * periodic background write-out of dirty inodes.
+ */
+static void wakeup_bdi_thread(struct backing_dev_info *bdi)
+{
+	spin_lock(&bdi->wb_lock);
+	if (bdi->wb.task)
+		wake_up_process(bdi->wb.task);
+	else
+		/*
+		 * When bdi tasks are inactive for long time, they are killed.
+		 * In this case we have to wake-up the forker thread which
+		 * should create and run the bdi thread.
+		 */
+		wake_up_process(default_backing_dev_info.wb.task);
+	spin_unlock(&bdi->wb_lock);
+}
+
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -914,6 +940,8 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
 	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = NULL;
+	bool wakeup_bdi = false;
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -967,22 +995,31 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
-			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-			struct backing_dev_info *bdi = wb->bdi;
-
-			if (bdi_cap_writeback_dirty(bdi) &&
-			    !test_bit(BDI_registered, &bdi->state)) {
-				WARN_ON(1);
-				printk(KERN_ERR "bdi-%s not registered\n",
-								bdi->name);
+			bdi = inode_to_bdi(inode);
+
+			if (bdi_cap_writeback_dirty(bdi)) {
+				WARN(!test_bit(BDI_registered, &bdi->state),
+				     "bdi-%s not registered\n", bdi->name);
+
+				/*
+				 * If this is the first dirty inode for this
+				 * bdi, we have to wake-up the corresponding
+				 * bdi thread to make sure background
+				 * write-back happens later.
+				 */
+				if (!wb_has_dirty_io(&bdi->wb))
+					wakeup_bdi = true;
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &wb->b_dirty);
+			list_move(&inode->i_list, &bdi->wb.b_dirty);
 		}
 	}
 out:
 	spin_unlock(&inode_lock);
+
+	if (wakeup_bdi)
+		wakeup_bdi_thread(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
-- 
cgit v1.1


From 6467716a37673e8d47b4984eb19839bdad0a8353 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 25 Jul 2010 14:29:22 +0300
Subject: writeback: optimize periodic bdi thread wakeups

Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which
should take care of the periodic background write-out. However, the write-out
will actually start only 'dirty_writeback_interval' centisecs later, so we can
delay the wake-up.

This change was requested by Nick Piggin who pointed out that if we delay the
wake-up, we weed out 2 unnecessary contex switches, which matters because
'__mark_inode_dirty()' is a hot-path function.

This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which
sets up a timer to wake-up the bdi thread and returns. So the wake-up is
delayed.

We also delete the timer in bdi threads just before writing-back. And
synchronously delete it when unregistering bdi. At the unregister point the bdi
does not have any users, so no one can arm it again.

Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq
context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes
this change as well.

This patch also moves the 'bdi_wb_init()' function down in the file to avoid
forward-declaration of 'bdi_wakeup_thread_delayed()'.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 fs/fs-writeback.c | 36 +++++++++++-------------------------
 1 file changed, 11 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 55f6e46..bfa2df2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -76,7 +76,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 {
 	trace_writeback_queue(bdi, work);
 
-	spin_lock(&bdi->wb_lock);
+	spin_lock_bh(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
 	if (bdi->wb.task) {
 		wake_up_process(bdi->wb.task);
@@ -88,7 +88,7 @@ static void bdi_queue_work(struct backing_dev_info *bdi,
 		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
 	}
-	spin_unlock(&bdi->wb_lock);
+	spin_unlock_bh(&bdi->wb_lock);
 }
 
 static void
@@ -704,13 +704,13 @@ get_next_work_item(struct backing_dev_info *bdi)
 {
 	struct wb_writeback_work *work = NULL;
 
-	spin_lock(&bdi->wb_lock);
+	spin_lock_bh(&bdi->wb_lock);
 	if (!list_empty(&bdi->work_list)) {
 		work = list_entry(bdi->work_list.next,
 				  struct wb_writeback_work, list);
 		list_del_init(&work->list);
 	}
-	spin_unlock(&bdi->wb_lock);
+	spin_unlock_bh(&bdi->wb_lock);
 	return work;
 }
 
@@ -810,6 +810,12 @@ int bdi_writeback_thread(void *data)
 	trace_writeback_thread_start(bdi);
 
 	while (!kthread_should_stop()) {
+		/*
+		 * Remove own delayed wake-up timer, since we are already awake
+		 * and we'll take care of the preriodic write-back.
+		 */
+		del_timer(&wb->wakeup_timer);
+
 		pages_written = wb_do_writeback(wb, 0);
 
 		trace_writeback_pages_written(pages_written);
@@ -868,26 +874,6 @@ void wakeup_flusher_threads(long nr_pages)
 	rcu_read_unlock();
 }
 
-/*
- * This function is used when the first inode for this bdi is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes.
- */
-static void wakeup_bdi_thread(struct backing_dev_info *bdi)
-{
-	spin_lock(&bdi->wb_lock);
-	if (bdi->wb.task)
-		wake_up_process(bdi->wb.task);
-	else
-		/*
-		 * When bdi tasks are inactive for long time, they are killed.
-		 * In this case we have to wake-up the forker thread which
-		 * should create and run the bdi thread.
-		 */
-		wake_up_process(default_backing_dev_info.wb.task);
-	spin_unlock(&bdi->wb_lock);
-}
-
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1019,7 +1005,7 @@ out:
 	spin_unlock(&inode_lock);
 
 	if (wakeup_bdi)
-		wakeup_bdi_thread(bdi);
+		bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 
-- 
cgit v1.1


From 6088c0587706b2cf21ce50c11576718bff5fae0c Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 8 Aug 2010 14:15:22 +0100
Subject: jffs2: Update copyright notices

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/jffs2/background.c  | 1 +
 fs/jffs2/build.c       | 1 +
 fs/jffs2/compr.c       | 5 +++--
 fs/jffs2/compr.h       | 1 +
 fs/jffs2/compr_lzo.c   | 1 +
 fs/jffs2/compr_rtime.c | 1 +
 fs/jffs2/compr_rubin.c | 1 +
 fs/jffs2/compr_zlib.c  | 1 +
 fs/jffs2/debug.c       | 1 +
 fs/jffs2/debug.h       | 1 +
 fs/jffs2/dir.c         | 1 +
 fs/jffs2/erase.c       | 1 +
 fs/jffs2/file.c        | 1 +
 fs/jffs2/fs.c          | 1 +
 fs/jffs2/gc.c          | 1 +
 fs/jffs2/ioctl.c       | 1 +
 fs/jffs2/jffs2_fs_i.h  | 1 +
 fs/jffs2/jffs2_fs_sb.h | 1 +
 18 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 55f1dde..404111b 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index c5e1450..a906f53 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index f029441..617a1e5 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -2,11 +2,12 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
- * Created by Arjan van de Ven <arjanv@redhat.com>
- *
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  * Copyright © 2004 Ferenc Havasi <havasi@inf.u-szeged.hu>,
  *		    University of Szeged, Hungary
  *
+ * Created by Arjan van de Ven <arjan@infradead.org>
+ *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
  */
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index 7d1d72f..e471a91 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -3,6 +3,7 @@
  *
  * Copyright © 2004   Ferenc Havasi <havasi@inf.u-szeged.hu>,
  *		      University of Szeged, Hungary
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * For licensing information, see the file 'LICENCE' in this directory.
  *
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index cd02aca..ed25ae7c 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2007 Nokia Corporation. All rights reserved.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Richard Purdie <rpurdie@openedhand.com>
  *
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 546d153..9696ad9 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Arjan van de Ven <arjanv@redhat.com>
  *
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index 170d289..a12b4f7 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by Arjan van de Ven <arjanv@redhat.com>
  *
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index b46661a4..97fc45d 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index ec35384..e0b76c8 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index a113ecc..c4f8eef 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 166062a..1aaf98d 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 6286ad9..abac961 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index e7291c1..5274912 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 26037e2..29d0970 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index f5e96bd..846a794 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index 9d41f43..859a598 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index c6923da..2e4a867 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 85ef6db..6784bc8 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -2,6 +2,7 @@
  * JFFS2 -- Journalling Flash File System, Version 2.
  *
  * Copyright © 2001-2007 Red Hat, Inc.
+ * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
  *
  * Created by David Woodhouse <dwmw2@infradead.org>
  *
-- 
cgit v1.1


From ffc18879903e55487bc5ac3c774b99a07de06029 Mon Sep 17 00:00:00 2001
From: Bill Pemberton <wfp5p@virginia.edu>
Date: Tue, 3 Aug 2010 15:19:30 -0400
Subject: omfs: fix uninitialized variable warning

quiet the warning:
fs/omfs/file.c: In function 'omfs_get_block':
fs/omfs/file.c:225: warning: 'new_block' may be used uninitialized in
this function

new_block is used properly by the call to omfs_grow_extent()

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
Signed-off-by: Bob Copeland <me@bobcopeland.com>
---
 fs/omfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 76bc21b..d821d46 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -222,7 +222,7 @@ static int omfs_get_block(struct inode *inode, sector_t block,
 	struct buffer_head *bh;
 	sector_t next, offset;
 	int ret;
-	u64 new_block;
+	u64 uninitialized_var(new_block);
 	u32 max_extents;
 	int extent_count;
 	struct omfs_extent *oe;
-- 
cgit v1.1


From 6ae0185fe201eae0548dace2a84acb5050fc8606 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 8 Aug 2010 21:19:42 +0100
Subject: mtd: Remove obsolete <mtd/compatmac.h> include

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/nodelist.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index a881a42..523a916 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -24,7 +24,6 @@
 #ifdef __ECOS
 #include "os-ecos.h"
 #else
-#include <linux/mtd/compatmac.h> /* For compatibility with older kernels */
 #include "os-linux.h"
 #endif
 
-- 
cgit v1.1


From c9243f5bdd6637b2bb7dc254b54d9edf957ef17e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 4 Jul 2010 00:15:07 +0200
Subject: autofs/autofs4: Move compat_ioctl handling into fs

Handling of autofs ioctl numbers does not need to be generic
and can easily be done directly in autofs itself.

This also pushes the BKL into autofs and autofs4 ioctl
methods.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ian Kent <raven@themaw.net>
Cc: Autofs <autofs@linux.kernel.org>
Cc: John Kacur <jkacur@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 fs/autofs/root.c  | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/autofs4/root.c | 49 ++++++++++++++++++++++++++++++++++++++++
 fs/compat_ioctl.c | 36 ------------------------------
 3 files changed, 113 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 9a0520b..11b1ea7 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
+#include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include "autofs_i.h"
 
@@ -25,13 +26,17 @@ static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
 static int autofs_root_unlink(struct inode *,struct dentry *);
 static int autofs_root_rmdir(struct inode *,struct dentry *);
 static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static int autofs_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
+static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 
 const struct file_operations autofs_root_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= autofs_root_readdir,
-	.ioctl		= autofs_root_ioctl,
+	.unlocked_ioctl	= autofs_root_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= autofs_root_compat_ioctl,
+#endif
 };
 
 const struct inode_operations autofs_root_inode_operations = {
@@ -492,6 +497,25 @@ static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 
 /* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
+					 unsigned int __user *p)
+{
+	unsigned long ntimeout;
+
+	if (get_user(ntimeout, p) ||
+	    put_user(sbi->exp_timeout / HZ, p))
+		return -EFAULT;
+
+	if (ntimeout > UINT_MAX/HZ)
+		sbi->exp_timeout = 0;
+	else
+		sbi->exp_timeout = ntimeout * HZ;
+
+	return 0;
+}
+#endif
+
 static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
 					 unsigned long __user *p)
 {
@@ -546,7 +570,7 @@ static inline int autofs_expire_run(struct super_block *sb,
  * ioctl()'s on the root directory is the chief method for the daemon to
  * generate kernel reactions
  */
-static int autofs_root_ioctl(struct inode *inode, struct file *filp,
+static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
 			     unsigned int cmd, unsigned long arg)
 {
 	struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
@@ -571,6 +595,10 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 		return 0;
 	case AUTOFS_IOC_PROTOVER: /* Get protocol version */
 		return autofs_get_protover(argp);
+#ifdef CONFIG_COMPAT
+	case AUTOFS_IOC_SETTIMEOUT32:
+		return autofs_compat_get_set_timeout(sbi, argp);
+#endif
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs_get_set_timeout(sbi, argp);
 	case AUTOFS_IOC_EXPIRE:
@@ -579,4 +607,37 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp,
 	default:
 		return -ENOSYS;
 	}
+
+}
+
+static long autofs_root_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
+				   filp, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long autofs_root_compat_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret;
+
+	lock_kernel();
+	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+		ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
+	else
+		ret = autofs_do_root_ioctl(inode, filp, cmd,
+			(unsigned long)compat_ptr(arg));
+	unlock_kernel();
+
+	return ret;
 }
+#endif
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index db4117e..48e056e 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -18,7 +18,9 @@
 #include <linux/slab.h>
 #include <linux/param.h>
 #include <linux/time.h>
+#include <linux/compat.h>
 #include <linux/smp_lock.h>
+
 #include "autofs_i.h"
 
 static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
@@ -26,6 +28,7 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *);
 static int autofs4_dir_rmdir(struct inode *,struct dentry *);
 static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
 static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
 static int autofs4_dir_open(struct inode *inode, struct file *file);
 static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
 static void *autofs4_follow_link(struct dentry *, struct nameidata *);
@@ -40,6 +43,9 @@ const struct file_operations autofs4_root_operations = {
 	.readdir	= dcache_readdir,
 	.llseek		= dcache_dir_lseek,
 	.unlocked_ioctl	= autofs4_root_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= autofs4_root_compat_ioctl,
+#endif
 };
 
 const struct file_operations autofs4_dir_operations = {
@@ -840,6 +846,26 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 }
 
 /* Get/set timeout ioctl() operation */
+#ifdef CONFIG_COMPAT
+static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
+					 compat_ulong_t __user *p)
+{
+	int rv;
+	unsigned long ntimeout;
+
+	if ((rv = get_user(ntimeout, p)) ||
+	     (rv = put_user(sbi->exp_timeout/HZ, p)))
+		return rv;
+
+	if (ntimeout > UINT_MAX/HZ)
+		sbi->exp_timeout = 0;
+	else
+		sbi->exp_timeout = ntimeout * HZ;
+
+	return 0;
+}
+#endif
+
 static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
 					 unsigned long __user *p)
 {
@@ -933,6 +959,10 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 		return autofs4_get_protosubver(sbi, p);
 	case AUTOFS_IOC_SETTIMEOUT:
 		return autofs4_get_set_timeout(sbi, p);
+#ifdef CONFIG_COMPAT
+	case AUTOFS_IOC_SETTIMEOUT32:
+		return autofs4_compat_get_set_timeout(sbi, p);
+#endif
 
 	case AUTOFS_IOC_ASKUMOUNT:
 		return autofs4_ask_umount(filp->f_path.mnt, p);
@@ -961,3 +991,22 @@ static long autofs4_root_ioctl(struct file *filp,
 
 	return ret;
 }
+
+#ifdef CONFIG_COMPAT
+static long autofs4_root_compat_ioctl(struct file *filp,
+			     unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret;
+
+	lock_kernel();
+	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
+		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
+	else
+		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
+			(unsigned long)compat_ptr(arg));
+	unlock_kernel();
+
+	return ret;
+}
+#endif
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640d..618f381 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -131,23 +131,6 @@ static int w_long(unsigned int fd, unsigned int cmd,
 	return err;
 }
 
-static int rw_long(unsigned int fd, unsigned int cmd,
-		compat_ulong_t __user *argp)
-{
-	mm_segment_t old_fs = get_fs();
-	int err;
-	unsigned long val;
-
-	if(get_user(val, argp))
-		return -EFAULT;
-	set_fs (KERNEL_DS);
-	err = sys_ioctl(fd, cmd, (unsigned long)&val);
-	set_fs (old_fs);
-	if (!err && put_user(val, argp))
-		return -EFAULT;
-	return err;
-}
-
 struct compat_video_event {
 	int32_t		type;
 	compat_time_t	timestamp;
@@ -594,12 +577,6 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
 	return err;
 }
 
-static int ioc_settimeout(unsigned int fd, unsigned int cmd,
-		compat_ulong_t __user *argp)
-{
-	return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, argp);
-}
-
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO	_IOW('U', 200, int)
 #define HCIUARTGETPROTO	_IOR('U', 201, int)
@@ -1281,13 +1258,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
 COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
 COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
 COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* AUTOFS */
-COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
-COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
-COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
-COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
 /* Raw devices */
 COMPATIBLE_IOCTL(RAW_SETBIND)
 COMPATIBLE_IOCTL(RAW_GETBIND)
@@ -1552,9 +1522,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case RAW_GETBIND:
 		return raw_ioctl(fd, cmd, argp);
 #endif
-#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
-	case AUTOFS_IOC_SETTIMEOUT32:
-		return ioc_settimeout(fd, cmd, argp);
 	/* One SMB ioctl needs translations. */
 #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
 	case SMB_IOC_GETMOUNTUID_32:
@@ -1609,9 +1576,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case KDSKBMETA:
 	case KDSKBLED:
 	case KDSETLED:
-	/* AUTOFS */
-	case AUTOFS_IOC_READY:
-	case AUTOFS_IOC_FAIL:
 	/* NBD */
 	case NBD_SET_SOCK:
 	case NBD_SET_BLKSIZE:
-- 
cgit v1.1


From a1275c3b21e433888994f7b979cd1129d384ec9c Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 27 May 2010 11:19:30 -0400
Subject: ecryptfs: Fix warning in ecryptfs_process_response()

Fix warning seen with "make -j24 CONFIG_DEBUG_SECTION_MISMATCH=y V=1":

fs/ecryptfs/messaging.c: In function 'ecryptfs_process_response':
fs/ecryptfs/messaging.c:276: warning: 'daemon' may be used uninitialized in this function

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/messaging.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 46c4dd8..bcb68c0 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -274,7 +274,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
 			      struct user_namespace *user_ns, struct pid *pid,
 			      u32 seq)
 {
-	struct ecryptfs_daemon *daemon;
+	struct ecryptfs_daemon *uninitialized_var(daemon);
 	struct ecryptfs_msg_ctx *msg_ctx;
 	size_t msg_size;
 	struct nsproxy *nsproxy;
-- 
cgit v1.1


From c43f7b8fb03be8bcc579bfc4e6ab70eac887ab55 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
Date: Tue, 3 Nov 2009 11:45:11 -0600
Subject: eCryptfs: Handle ioctl calls with unlocked and compat functions

Lower filesystems that only implemented unlocked_ioctl weren't being
passed ioctl calls because eCryptfs only checked for
lower_file->f_op->ioctl and returned -ENOTTY if it was NULL.

eCryptfs shouldn't implement ioctl(), since it doesn't require the BKL.
This patch introduces ecryptfs_unlocked_ioctl() and
ecryptfs_compat_ioctl(), which passes the calls on to the lower file
system.

https://bugs.launchpad.net/ecryptfs/+bug/469664

Reported-by: James Dupin <james.dupin@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/file.c | 56 ++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index e8fcf4e..6e4f84c 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -292,12 +292,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
 	return rc;
 }
 
-static int ecryptfs_ioctl(struct inode *inode, struct file *file,
-			  unsigned int cmd, unsigned long arg);
+static long
+ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct file *lower_file = NULL;
+	long rc = -ENOTTY;
+
+	if (ecryptfs_file_to_private(file))
+		lower_file = ecryptfs_file_to_lower(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
+		rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
+	return rc;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct file *lower_file = NULL;
+	long rc = -ENOIOCTLCMD;
+
+	if (ecryptfs_file_to_private(file))
+		lower_file = ecryptfs_file_to_lower(file);
+	if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
+		rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
+	return rc;
+}
+#endif
 
 const struct file_operations ecryptfs_dir_fops = {
 	.readdir = ecryptfs_readdir,
-	.ioctl = ecryptfs_ioctl,
+	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = ecryptfs_compat_ioctl,
+#endif
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
 	.release = ecryptfs_release,
@@ -313,7 +341,10 @@ const struct file_operations ecryptfs_main_fops = {
 	.write = do_sync_write,
 	.aio_write = generic_file_aio_write,
 	.readdir = ecryptfs_readdir,
-	.ioctl = ecryptfs_ioctl,
+	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = ecryptfs_compat_ioctl,
+#endif
 	.mmap = generic_file_mmap,
 	.open = ecryptfs_open,
 	.flush = ecryptfs_flush,
@@ -322,20 +353,3 @@ const struct file_operations ecryptfs_main_fops = {
 	.fasync = ecryptfs_fasync,
 	.splice_read = generic_file_splice_read,
 };
-
-static int
-ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
-	       unsigned long arg)
-{
-	int rc = 0;
-	struct file *lower_file = NULL;
-
-	if (ecryptfs_file_to_private(file))
-		lower_file = ecryptfs_file_to_lower(file);
-	if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
-		rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
-					     lower_file, cmd, arg);
-	else
-		rc = -ENOTTY;
-	return rc;
-}
-- 
cgit v1.1


From 31f73bee3e170b7cabb35db9e2f4bf7919b9d036 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Thu, 29 Jul 2010 13:01:36 +0200
Subject: ecryptfs: release reference to lower mount if interpose fails

In ecryptfs_lookup_and_interpose_lower() the lower mount is not decremented
if allocation of a dentry info struct failed. As a result the lower filesystem
cant be unmounted any more (since it is considered busy). This patch corrects
the reference counting.

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef525..8cd617b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -264,7 +264,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 		printk(KERN_ERR "%s: Out of memory whilst attempting "
 		       "to allocate ecryptfs_dentry_info struct\n",
 			__func__);
-		goto out_dput;
+		goto out_put;
 	}
 	ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
 	ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
@@ -339,8 +339,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
 out_free_kmem:
 	kmem_cache_free(ecryptfs_header_cache_2, page_virt);
 	goto out;
-out_dput:
+out_put:
 	dput(lower_dentry);
+	mntput(lower_mnt);
 	d_drop(ecryptfs_dentry);
 out:
 	return rc;
-- 
cgit v1.1


From ceeab92971e8af05c1e81a4ff2c271124b55bb9b Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 6 Aug 2010 22:58:49 +0200
Subject: fs/ecryptfs/file.c: introduce missing free

The comments in the code indicate that file_info should be released if the
function fails.  This releasing is done at the label out_free, not out.

The semantic match that finds this problem is as follows:
(http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r exists@
local idexpression x;
statement S;
expression E;
identifier f,f1,l;
position p1,p2;
expression *ptr != NULL;
@@

x@p1 = kmem_cache_zalloc(...);
...
if (x == NULL) S
<... when != x
     when != if (...) { <+...x...+> }
(
x->f1 = E
|
 (x->f1 == NULL || ...)
|
 f(...,x->f1,...)
)
...>
(
 return <+...x...+>;
|
 return@p2 ...;
)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

print "* file: %s kmem_cache_zalloc %s" % (p1[0].file,p1[0].line)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: stable@kernel.org
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 6e4f84c..622c9514 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -199,7 +199,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 			       "the persistent file for the dentry with name "
 			       "[%s]; rc = [%d]\n", __func__,
 			       ecryptfs_dentry->d_name.name, rc);
-			goto out;
+			goto out_free;
 		}
 	}
 	if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
@@ -207,7 +207,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
 		rc = -EPERM;
 		printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
 		       "file must hence be opened RO\n", __func__);
-		goto out;
+		goto out_free;
 	}
 	ecryptfs_set_file_lower(
 		file, ecryptfs_inode_to_private(inode)->lower_file);
-- 
cgit v1.1


From 21edad32205e97dc7ccb81a85234c77e760364c8 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <linosanfilippo@gmx.de>
Date: Thu, 15 Jul 2010 15:11:55 +0200
Subject: ecryptfs: dont call lookup_one_len to avoid NULL nameidata

I have encountered the same problem that Eric Sandeen described in
this post

 http://lkml.org/lkml/fancy/2010/4/23/467

while experimenting with stackable filesystems.

The reason seems to be that ecryptfs calls lookup_one_len() to get the
lower dentry, which in turn calls the lower parent dirs d_revalidate()
with a NULL nameidata object.
If ecryptfs is the underlaying filesystem, the NULL pointer dereference
occurs, since ecryptfs is not prepared to handle a NULL nameidata.

I know that this cant happen any more, since it is no longer allowed to
mount ecryptfs upon itself.

But maybe this patch it useful nevertheless, since the problem would still
apply for an underlaying filesystem that implements d_revalidate() and is
not prepared to handle a NULL nameidata (I dont know if there actually
is such a fs).

With this patch (against 2.6.35-rc5) ecryptfs uses the vfs_lookup_path()
function instead of lookup_one_len() which ensures that the nameidata
passed to the lower filesystems d_revalidate().

Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Tyler Hicks <tyhicks@linux.vnet.ibm.com>
---
 fs/ecryptfs/inode.c | 89 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 77 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 8cd617b..a8acfc5 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -348,6 +348,76 @@ out:
 }
 
 /**
+ * ecryptfs_new_lower_dentry
+ * @ename: The name of the new dentry.
+ * @lower_dir_dentry: Parent directory of the new dentry.
+ * @nd: nameidata from last lookup.
+ *
+ * Create a new dentry or get it from lower parent dir.
+ */
+static struct dentry *
+ecryptfs_new_lower_dentry(struct qstr *name, struct dentry *lower_dir_dentry,
+			  struct nameidata *nd)
+{
+	struct dentry *new_dentry;
+	struct dentry *tmp;
+	struct inode *lower_dir_inode;
+
+	lower_dir_inode = lower_dir_dentry->d_inode;
+
+	tmp = d_alloc(lower_dir_dentry, name);
+	if (!tmp)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&lower_dir_inode->i_mutex);
+	new_dentry = lower_dir_inode->i_op->lookup(lower_dir_inode, tmp, nd);
+	mutex_unlock(&lower_dir_inode->i_mutex);
+
+	if (!new_dentry)
+		new_dentry = tmp;
+	else
+		dput(tmp);
+
+	return new_dentry;
+}
+
+
+/**
+ * ecryptfs_lookup_one_lower
+ * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
+ * @lower_dir_dentry: lower parent directory
+ *
+ * Get the lower dentry from vfs. If lower dentry does not exist yet,
+ * create it.
+ */
+static struct dentry *
+ecryptfs_lookup_one_lower(struct dentry *ecryptfs_dentry,
+			  struct dentry *lower_dir_dentry)
+{
+	struct nameidata nd;
+	struct vfsmount *lower_mnt;
+	struct qstr *name;
+	int err;
+
+	name = &ecryptfs_dentry->d_name;
+	lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(
+				    ecryptfs_dentry->d_parent));
+	err = vfs_path_lookup(lower_dir_dentry, lower_mnt, name->name , 0, &nd);
+	mntput(lower_mnt);
+
+	if (!err) {
+		/* we dont need the mount */
+		mntput(nd.path.mnt);
+		return nd.path.dentry;
+	}
+	if (err != -ENOENT)
+		return ERR_PTR(err);
+
+	/* create a new lower dentry */
+	return ecryptfs_new_lower_dentry(name, lower_dir_dentry, &nd);
+}
+
+/**
  * ecryptfs_lookup
  * @ecryptfs_dir_inode: The eCryptfs directory inode
  * @ecryptfs_dentry: The eCryptfs dentry that we are looking up
@@ -374,14 +444,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 		goto out_d_drop;
 	}
 	lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-	mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-	lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
-				      lower_dir_dentry,
-				      ecryptfs_dentry->d_name.len);
-	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+
+	lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+						 lower_dir_dentry);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
 				"[%d] on lower_dentry = [%s]\n", __func__, rc,
 				encrypted_and_encoded_name);
 		goto out_d_drop;
@@ -403,14 +471,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
 		       "filename; rc = [%d]\n", __func__, rc);
 		goto out_d_drop;
 	}
-	mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
-	lower_dentry = lookup_one_len(encrypted_and_encoded_name,
-				      lower_dir_dentry,
-				      encrypted_and_encoded_name_size - 1);
-	mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+	lower_dentry = ecryptfs_lookup_one_lower(ecryptfs_dentry,
+						 lower_dir_dentry);
 	if (IS_ERR(lower_dentry)) {
 		rc = PTR_ERR(lower_dentry);
-		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
+		ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_lower() returned "
 				"[%d] on lower_dentry = [%s]\n", __func__, rc,
 				encrypted_and_encoded_name);
 		goto out_d_drop;
-- 
cgit v1.1


From 005a59ec745d23f60222f7712adde48f64d7d3c8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Apr 2009 01:27:08 -0400
Subject: Deal with missing exports for hostfs

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_user.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index b79424f..4b8c666 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -76,9 +76,9 @@ int file_type(const char *path, int *maj, int *min)
 	 * about its definition.
 	 */
 	if (maj != NULL)
-		*maj = major(buf.st_rdev);
+		*maj = os_major(buf.st_rdev);
 	if (min != NULL)
-		*min = minor(buf.st_rdev);
+		*min = os_minor(buf.st_rdev);
 
 	if (S_ISDIR(buf.st_mode))
 		return OS_TYPE_DIR;
@@ -361,7 +361,7 @@ int do_mknod(const char *file, int mode, unsigned int major, unsigned int minor)
 {
 	int err;
 
-	err = mknod(file, mode, makedev(major, minor));
+	err = mknod(file, mode, os_makedev(major, minor));
 	if (err)
 		return -errno;
 	return 0;
-- 
cgit v1.1


From 918377b696bff7384923a1ef4bf0af7626cb9b68 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 23:56:02 -0400
Subject: missing include in hppfs

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hppfs/hppfs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 826c3f9..943ce75 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/statfs.h>
 #include <linux/types.h>
+#include <linux/pid_namespace.h>
 #include <asm/uaccess.h>
 #include "os.h"
 
-- 
cgit v1.1


From 0e4f6a791b1e8cfde75a74e2f885642ecb3fe9d8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Jul 2010 12:18:57 +0400
Subject: Fix reiserfs_file_release()

a) count file openers correctly; i_count use was completely wrong
b) use new mutex for exclusion between final close/open/truncate,
to protect tailpacking logics.  i_mutex use was wrong and resulted
in deadlocks.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/file.c  | 50 ++++++++++++++++++++++++++++----------------------
 fs/reiserfs/inode.c |  2 --
 fs/reiserfs/super.c |  2 ++
 3 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index b82cdd8..6846371 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -38,20 +38,24 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 
 	BUG_ON(!S_ISREG(inode->i_mode));
 
+        if (atomic_add_unless(&REISERFS_I(inode)->openers, -1, 1))
+		return 0;
+
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
+
+        if (!atomic_dec_and_test(&REISERFS_I(inode)->openers)) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+		return 0;
+	}
+
 	/* fast out for when nothing needs to be done */
-	if ((atomic_read(&inode->i_count) > 1 ||
-	     !(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
+	if ((!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) ||
 	     !tail_has_to_be_packed(inode)) &&
 	    REISERFS_I(inode)->i_prealloc_count <= 0) {
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
 		return 0;
 	}
 
-	mutex_lock(&inode->i_mutex);
-
-	mutex_lock(&(REISERFS_I(inode)->i_mmap));
-	if (REISERFS_I(inode)->i_flags & i_ever_mapped)
-		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
-
 	reiserfs_write_lock(inode->i_sb);
 	/* freeing preallocation only involves relogging blocks that
 	 * are already in the current transaction.  preallocation gets
@@ -94,9 +98,10 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 	if (!err)
 		err = jbegin_failure;
 
-	if (!err && atomic_read(&inode->i_count) <= 1 &&
+	if (!err &&
 	    (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
 	    tail_has_to_be_packed(inode)) {
+
 		/* if regular file is released by last holder and it has been
 		   appended (we append by unformatted node only) or its direct
 		   item(s) had to be converted, then it may have to be
@@ -104,27 +109,28 @@ static int reiserfs_file_release(struct inode *inode, struct file *filp)
 		err = reiserfs_truncate_file(inode, 0);
 	}
       out:
-	mutex_unlock(&(REISERFS_I(inode)->i_mmap));
-	mutex_unlock(&inode->i_mutex);
 	reiserfs_write_unlock(inode->i_sb);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
 	return err;
 }
 
-static int reiserfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int reiserfs_file_open(struct inode *inode, struct file *file)
 {
-	struct inode *inode;
-
-	inode = file->f_path.dentry->d_inode;
-	mutex_lock(&(REISERFS_I(inode)->i_mmap));
-	REISERFS_I(inode)->i_flags |= i_ever_mapped;
-	mutex_unlock(&(REISERFS_I(inode)->i_mmap));
-
-	return generic_file_mmap(file, vma);
+	int err = dquot_file_open(inode, file);
+        if (!atomic_inc_not_zero(&REISERFS_I(inode)->openers)) {
+		/* somebody might be tailpacking on final close; wait for it */
+		mutex_lock(&(REISERFS_I(inode)->tailpack));
+		atomic_inc(&REISERFS_I(inode)->openers);
+		mutex_unlock(&(REISERFS_I(inode)->tailpack));
+	}
+	return err;
 }
 
 static void reiserfs_vfs_truncate_file(struct inode *inode)
 {
+	mutex_lock(&(REISERFS_I(inode)->tailpack));
 	reiserfs_truncate_file(inode, 1);
+	mutex_unlock(&(REISERFS_I(inode)->tailpack));
 }
 
 /* Sync a reiserfs file. */
@@ -288,8 +294,8 @@ const struct file_operations reiserfs_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
 #endif
-	.mmap = reiserfs_file_mmap,
-	.open = dquot_file_open,
+	.mmap = generic_file_mmap,
+	.open = reiserfs_file_open,
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.aio_read = generic_file_aio_read,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0f22fda..6edac85 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1138,7 +1138,6 @@ static void init_inode(struct inode *inode, struct treepath *path)
 	REISERFS_I(inode)->i_prealloc_count = 0;
 	REISERFS_I(inode)->i_trans_id = 0;
 	REISERFS_I(inode)->i_jl = NULL;
-	mutex_init(&(REISERFS_I(inode)->i_mmap));
 	reiserfs_init_xattr_rwsem(inode);
 
 	if (stat_data_v1(ih)) {
@@ -1841,7 +1840,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 	REISERFS_I(inode)->i_attrs =
 	    REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
 	sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode);
-	mutex_init(&(REISERFS_I(inode)->i_mmap));
 	reiserfs_init_xattr_rwsem(inode);
 
 	/* key to search for correct place for new stat data */
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 9822fa1..1e1ee90 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -525,6 +525,8 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 	    kmem_cache_alloc(reiserfs_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
+	atomic_set(&ei->openers, 0);
+	mutex_init(&ei->tailpack);
 	return &ei->vfs_inode;
 }
 
-- 
cgit v1.1


From 256249584bda1a9357e2d29987a37f5b2df035f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Jul 2010 12:23:11 +0400
Subject: fix leak in __logfs_create()

if kmalloc fails, we still need to drop the inode, as we do
on other failure exits.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/logfs/dir.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 72d1893..675cc49 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -434,8 +434,11 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
 	int ret;
 
 	ta = kzalloc(sizeof(*ta), GFP_KERNEL);
-	if (!ta)
+	if (!ta) {
+		inode->i_nlink--;
+		iput(inode);
 		return -ENOMEM;
+	}
 
 	ta->state = CREATE_1;
 	ta->ino = inode->i_ino;
-- 
cgit v1.1


From eafdc7d190a944c755a9fe68573c193e6e0217e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:53 +0200
Subject: sort out blockdev_direct_IO variants

Move the call to vmtruncate to get rid of accessive blocks to the callers
in prepearation of the new truncate calling sequence.  This was only done
for DIO_LOCKING filesystems, so the __blockdev_direct_IO_newtrunc variant
was not needed anyway.  Get rid of blockdev_direct_IO_no_locking and
its _newtrunc variant while at it as just opencoding the two additional
paramters is shorted than the name suffix.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c              |  5 ++-
 fs/direct-io.c              | 74 ++++++++++++---------------------------------
 fs/ext2/inode.c             |  2 +-
 fs/ext3/inode.c             | 11 +++++++
 fs/ext4/inode.c             | 15 +++++++--
 fs/fat/inode.c              |  4 +--
 fs/gfs2/aops.c              |  6 ++--
 fs/hfs/inode.c              | 17 ++++++++++-
 fs/hfsplus/inode.c          | 17 ++++++++++-
 fs/jfs/inode.c              | 17 ++++++++++-
 fs/nilfs2/inode.c           | 13 ++++++++
 fs/ocfs2/aops.c             |  9 +++---
 fs/reiserfs/inode.c         | 17 ++++++++++-
 fs/xfs/linux-2.6/xfs_aops.c | 16 +++++-----
 14 files changed, 140 insertions(+), 83 deletions(-)

(limited to 'fs')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 99d6af8..65a0c26 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -172,9 +172,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return blockdev_direct_IO_no_locking_newtrunc(rw, iocb, inode,
-				I_BDEV(inode), iov, offset, nr_segs,
-				blkdev_get_blocks, NULL);
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
+				    nr_segs, blkdev_get_blocks, NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index a10cb91..51f270b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1136,8 +1136,27 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 	return ret;
 }
 
+/*
+ * This is a library function for use by filesystem drivers.
+ *
+ * The locking rules are governed by the flags parameter:
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
+ *
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
+ *    internal locking but rather rely on the filesystem to synchronize
+ *    direct I/O reads/writes versus each other and truncate.
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
+ *    entry and are never taken.
+ */
 ssize_t
-__blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
@@ -1233,57 +1252,4 @@ __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode,
 out:
 	return retval;
 }
-EXPORT_SYMBOL(__blockdev_direct_IO_newtrunc);
-
-/*
- * This is a library function for use by filesystem drivers.
- *
- * The locking rules are governed by the flags parameter:
- *  - if the flags value contains DIO_LOCKING we use a fancy locking
- *    scheme for dumb filesystems.
- *    For writes this function is called under i_mutex and returns with
- *    i_mutex held, for reads, i_mutex is not held on entry, but it is
- *    taken and dropped again before returning.
- *    For reads and writes i_alloc_sem is taken in shared mode and released
- *    on I/O completion (which may happen asynchronously after returning to
- *    the caller).
- *
- *  - if the flags value does NOT contain DIO_LOCKING we don't use any
- *    internal locking but rather rely on the filesystem to synchronize
- *    direct I/O reads/writes versus each other and truncate.
- *    For reads and writes both i_mutex and i_alloc_sem are not held on
- *    entry and are never taken.
- */
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	dio_submit_t submit_io,	int flags)
-{
-	ssize_t retval;
-
-	retval = __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov,
-			offset, nr_segs, get_block, end_io, submit_io, flags);
-	/*
-	 * In case of error extending write may have instantiated a few
-	 * blocks outside i_size. Trim these off again for DIO_LOCKING.
-	 * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this in
-	 * their own manner. This is a further example of where the old
-	 * truncate sequence is inadequate.
-	 *
-	 * NOTE: filesystems with their own locking have to handle this
-	 * on their own.
-	 */
-	if (flags & DIO_LOCKING) {
-		if (unlikely((rw & WRITE) && retval < 0)) {
-			loff_t isize = i_size_read(inode);
-			loff_t end = offset + iov_length(iov, nr_segs);
-
-			if (end > isize)
-				vmtruncate(inode, isize);
-		}
-	}
-
-	return retval;
-}
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 3675088..f36e967 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -838,7 +838,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
 				iov, offset, nr_segs, ext2_get_block, NULL);
 	if (ret < 0 && (rw & WRITE))
 		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 735f019..a66f3fe 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1785,6 +1785,17 @@ retry:
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext3_get_block, NULL);
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
 	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0afc8c1..d6a7701 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3545,15 +3545,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 
 retry:
 	if (rw == READ && ext4_should_dioread_nolock(inode))
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+		ret = __blockdev_direct_IO(rw, iocb, inode,
 				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
-				 ext4_get_block, NULL);
-	else
+				 ext4_get_block, NULL, NULL, 0);
+	else {
 		ret = blockdev_direct_IO(rw, iocb, inode,
 				 inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
 				 ext4_get_block, NULL);
+
+		if (unlikely((rw & WRITE) && ret < 0)) {
+			loff_t isize = i_size_read(inode);
+			loff_t end = offset + iov_length(iov, nr_segs);
+
+			if (end > isize)
+				vmtruncate(inode, isize);
+		}
+	}
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 7bf45ae..ffe7c6f 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -212,8 +212,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO_newtrunc(rw, iocb, inode, inode->i_sb->s_bdev,
-				iov, offset, nr_segs, fat_get_block, NULL);
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				 iov, offset, nr_segs, fat_get_block, NULL);
 	if (ret < 0 && (rw & WRITE))
 		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 9f8b525..703000d 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1047,9 +1047,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	if (rv != 1)
 		goto out; /* dio not valid, fall back to buffered i/o */
 
-	rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
-					   iov, offset, nr_segs,
-					   gfs2_get_block_direct, NULL);
+	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				  offset, nr_segs, gfs2_get_block_direct,
+				  NULL, NULL, 0);
 out:
 	gfs2_glock_dq_m(1, &gh);
 	gfs2_holder_uninit(&gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 14f5cb1..07b2464 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -112,9 +112,24 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 static int hfs_writepages(struct address_space *mapping,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 9bbb829..4860217 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -105,9 +105,24 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, hfsplus_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 static int hfsplus_writepages(struct address_space *mapping,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index ed9ba6f..79e6cda 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -317,9 +317,24 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				offset, nr_segs, jfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 const struct address_space_operations jfs_aops = {
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 39e038a..1dd9e6a 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -237,6 +237,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	/* Needs synchronization with the cleaner */
 	size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs, nilfs_get_block, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && size < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
 	return size;
 }
 
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 96337a4..0de69c9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -643,11 +643,10 @@ static ssize_t ocfs2_direct_IO(int rw,
 	if (i_size_read(inode) <= offset)
 		return 0;
 
-	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-					    inode->i_sb->s_bdev, iov, offset,
-					    nr_segs,
-					    ocfs2_direct_IO_get_blocks,
-					    ocfs2_dio_end_io);
+	ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				   iov, offset, nr_segs,
+				   ocfs2_direct_IO_get_blocks,
+				   ocfs2_dio_end_io, NULL, 0);
 
 	mlog_exit(ret);
 	return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 6edac85..4c1fb54 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3057,10 +3057,25 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
+	ssize_t ret;
 
-	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				  offset, nr_segs,
 				  reiserfs_get_blocks_direct_io, NULL);
+
+	/*
+	 * In case of error extending write may have instantiated a few
+	 * blocks outside i_size. Trim these off again.
+	 */
+	if (unlikely((rw & WRITE) && ret < 0)) {
+		loff_t isize = i_size_read(inode);
+		loff_t end = offset + iov_length(iov, nr_segs);
+
+		if (end > isize)
+			vmtruncate(inode, isize);
+	}
+
+	return ret;
 }
 
 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d24e78f..7968d41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1478,17 +1478,17 @@ xfs_vm_direct_IO(
 	if (rw & WRITE) {
 		iocb->private = xfs_alloc_ioend(inode, IO_NEW);
 
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
-						    offset, nr_segs,
-						    xfs_get_blocks_direct,
-						    xfs_end_io_direct_write);
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    xfs_end_io_direct_write, NULL, 0);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			xfs_destroy_ioend(iocb->private);
 	} else {
-		ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
-						    offset, nr_segs,
-						    xfs_get_blocks_direct,
-						    NULL);
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+					    offset, nr_segs,
+					    xfs_get_blocks_direct,
+					    NULL, NULL, 0);
 	}
 
 	return ret;
-- 
cgit v1.1


From ea0f04e59543bafb3d2cbe37a0d375acb0bb2c34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:54 +0200
Subject: get rid of nobh_write_begin_newtrunc

Move the call to vmtruncate to get rid of accessive blocks to the only
remaining caller and rename the non-truncating version to nobh_write_begin.

Get rid of the superflous file argument to it while we're at it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c     | 37 ++++---------------------------------
 fs/ext2/inode.c |  9 ++-------
 fs/jfs/inode.c  | 11 ++++++++++-
 3 files changed, 16 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b..559daf7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2510,11 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
 }
 
 /*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
  * The filesystem needs to handle block truncation upon failure.
  */
-int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int nobh_write_begin(struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata,
 			get_block_t *get_block)
@@ -2547,7 +2547,7 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 		*pagep = NULL;
-		return block_write_begin_newtrunc(file, mapping, pos, len,
+		return block_write_begin_newtrunc(NULL, mapping, pos, len,
 					flags, pagep, fsdata, get_block);
 	}
 
@@ -2654,35 +2654,6 @@ out_release:
 
 	return ret;
 }
-EXPORT_SYMBOL(nobh_write_begin_newtrunc);
-
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
-{
-	int ret;
-
-	ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block);
-
-	/*
-	 * prepare_write() may have instantiated a few blocks
-	 * outside i_size.  Trim these off again. Don't need
-	 * i_size_read because we hold i_mutex.
-	 */
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
-}
 EXPORT_SYMBOL(nobh_write_begin);
 
 int nobh_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index f36e967..348805c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -806,13 +806,8 @@ ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
 {
 	int ret;
 
-	/*
-	 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
-	 * directory handling code to pass around offsets rather than struct
-	 * pages in order to make this work easily.
-	 */
-	ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, pagep,
-						fsdata, ext2_get_block);
+	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
+			       ext2_get_block);
 	if (ret < 0)
 		ext2_write_failed(mapping, pos + len);
 	return ret;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 79e6cda..c38dc18 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -303,8 +303,17 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
 {
-	return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	int ret;
+
+	ret = nobh_write_begin(mapping, pos, len, flags, pagep, fsdata,
 				jfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
-- 
cgit v1.1


From 282dc178849882289d30e58b54be6b2799b351aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:55 +0200
Subject: get rid of cont_write_begin_newtrunc

Move the call to vmtruncate to get rid of accessive blocks to the callers
in preparation of the new truncate sequence and rename the non-truncating
version to cont_write_begin.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/inode.c    | 11 ++++++++++-
 fs/affs/file.c     | 11 ++++++++++-
 fs/buffer.c        | 21 +--------------------
 fs/fat/inode.c     |  2 +-
 fs/hfs/inode.c     | 11 ++++++++++-
 fs/hfsplus/inode.c | 11 ++++++++++-
 fs/hpfs/file.c     | 11 ++++++++++-
 fs/qnx4/inode.c    | 11 ++++++++++-
 8 files changed, 62 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 6f850b0..b3dec19 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				adfs_get_block,
 				&ADFS_I(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 322710c..c4a9875 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				affs_get_block,
 				&AFFS_I(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/buffer.c b/fs/buffer.c
index 559daf7..14529ec 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2351,7 +2351,7 @@ out:
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
  */
-int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int cont_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata,
 			get_block_t *get_block, loff_t *bytes)
@@ -2377,25 +2377,6 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 out:
 	return err;
 }
-EXPORT_SYMBOL(cont_write_begin_newtrunc);
-
-int cont_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block, loff_t *bytes)
-{
-	int ret;
-
-	ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block, bytes);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
-}
 EXPORT_SYMBOL(cont_write_begin);
 
 int block_prepare_write(struct page *page, unsigned from, unsigned to,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ffe7c6f..ec6a699 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping,
 	int err;
 
 	*pagep = NULL;
-	err = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
+	err = cont_write_begin(file, mapping, pos, len, flags,
 				pagep, fsdata, fat_get_block,
 				&MSDOS_I(mapping->host)->mmu_private);
 	if (err < 0)
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 07b2464..8df18e6 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfs_get_block,
 				&HFS_I(mapping->host)->phys_size);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 4860217..88bf1b5 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hfsplus_get_block,
 				&HFSPLUS_I(mapping->host).phys_size);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index a9ae9bf..c034088 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				hpfs_get_block,
 				&hpfs_i(mapping->host)->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 277575d..1682972 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host);
+	int ret;
+
 	*pagep = NULL;
-	return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				qnx4_get_block,
 				&qnx4_inode->mmu_private);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 static sector_t qnx4_bmap(struct address_space *mapping, sector_t block)
 {
-- 
cgit v1.1


From f4e420dc423148fba637af1ab618fa8896dfb2d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:56 +0200
Subject: clean up write_begin usage for directories in pagecache

For filesystem that implement directories in pagecache we call
block_write_begin with an already allocated page for this code, while the
normal regular file write path uses the default block_write_begin behaviour.

Get rid of the __foofs_write_begin helper and opencode the normal write_begin
call in foofs_write_begin, while adding a new foofs_prepare_chunk helper for
the directory code.  The added benefit is that foofs_prepare_chunk has
a much saner calling convention.

Note that the interruptible flag passed into block_write_begin is always
ignored if we already pass in a page (see next patch for details), and
we never were doing truncations of exessive blocks for this case either so we
can switch directly to block_write_begin_newtrunc.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/dir.c    | 24 ++++++++++++------------
 fs/ext2/ext2.h   |  3 ---
 fs/ext2/inode.c  | 11 ++---------
 fs/minix/dir.c   | 21 +++++++--------------
 fs/minix/inode.c | 11 +++++------
 fs/minix/minix.h |  4 +---
 fs/nilfs2/dir.c  | 26 +++++++-------------------
 fs/sysv/dir.c    | 21 +++++++--------------
 fs/sysv/itree.c  | 11 +++++------
 fs/sysv/sysv.h   |  4 +---
 fs/ufs/dir.c     | 13 ++++---------
 fs/ufs/inode.c   | 11 +++++------
 fs/ufs/util.h    |  4 +---
 13 files changed, 57 insertions(+), 107 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 7516957..6b946ba 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -448,6 +448,12 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
 	return res;
 }
 
+static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len)
+{
+	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
+					  &page, NULL, ext2_get_block);
+}
+
 /* Releases the page */
 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 		   struct page *page, struct inode *inode, int update_times)
@@ -458,8 +464,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
 	int err;
 
 	lock_page(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ext2_prepare_chunk(page, pos, len);
 	BUG_ON(err);
 	de->inode = cpu_to_le32(inode->i_ino);
 	ext2_set_de_type(de, inode);
@@ -542,8 +547,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
 got_it:
 	pos = page_offset(page) +
 		(char*)de - (char*)page_address(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, pos, rec_len);
 	if (err)
 		goto out_unlock;
 	if (de->inode) {
@@ -576,8 +580,7 @@ out_unlock:
  */
 int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1);
 	unsigned to = ((char *)dir - kaddr) +
@@ -601,8 +604,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page )
 		from = (char*)pde - (char*)page_address(page);
 	pos = page_offset(page) + from;
 	lock_page(page);
-	err = __ext2_write_begin(NULL, page->mapping, pos, to - from, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, pos, to - from);
 	BUG_ON(err);
 	if (pde)
 		pde->rec_len = ext2_rec_len_to_disk(to - from);
@@ -621,8 +623,7 @@ out:
  */
 int ext2_make_empty(struct inode *inode, struct inode *parent)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	unsigned chunk_size = ext2_chunk_size(inode);
 	struct ext2_dir_entry_2 * de;
 	int err;
@@ -631,8 +632,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
 	if (!page)
 		return -ENOMEM;
 
-	err = __ext2_write_begin(NULL, page->mapping, 0, chunk_size, 0,
-							&page, NULL);
+	err = ext2_prepare_chunk(page, 0, chunk_size);
 	if (err) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 52b34f1..8f53d11 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -127,9 +127,6 @@ extern void ext2_set_inode_flags(struct inode *inode);
 extern void ext2_get_inode_flags(struct ext2_inode_info *);
 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		       u64 start, u64 len);
-int __ext2_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata);
 
 /* ioctl.c */
 extern long ext2_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 348805c..2f4dfbc 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -765,14 +765,6 @@ ext2_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
 }
 
-int __ext2_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata)
-{
-	return block_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, ext2_get_block);
-}
-
 static int
 ext2_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
@@ -781,7 +773,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
 	int ret;
 
 	*pagep = NULL;
-	ret = __ext2_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
+					 pagep, fsdata, ext2_get_block);
 	if (ret < 0)
 		ext2_write_failed(mapping, pos + len);
 	return ret;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index 1dbf921..085a926 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -271,8 +271,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
 
 got_it:
 	pos = page_offset(page) + p - (char *)page_address(page);
-	err = __minix_write_begin(NULL, page->mapping, pos, sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
 	if (err)
 		goto out_unlock;
 	memcpy (namx, name, namelen);
@@ -297,8 +296,7 @@ out_unlock:
 
 int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = (struct inode*)mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = page_address(page);
 	loff_t pos = page_offset(page) + (char*)de - kaddr;
 	struct minix_sb_info *sbi = minix_sb(inode->i_sb);
@@ -306,8 +304,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 	int err;
 
 	lock_page(page);
-	err = __minix_write_begin(NULL, mapping, pos, len,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, len);
 	if (err == 0) {
 		if (sbi->s_version == MINIX_V3)
 			((minix3_dirent *) de)->inode = 0;
@@ -325,16 +322,14 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
 
 int minix_make_empty(struct inode *inode, struct inode *dir)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	struct minix_sb_info *sbi = minix_sb(inode->i_sb);
 	char *kaddr;
 	int err;
 
 	if (!page)
 		return -ENOMEM;
-	err = __minix_write_begin(NULL, mapping, 0, 2 * sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, 0, 2 * sbi->s_dirsize);
 	if (err) {
 		unlock_page(page);
 		goto fail;
@@ -425,8 +420,7 @@ not_empty:
 void minix_set_link(struct minix_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *dir = mapping->host;
+	struct inode *dir = page->mapping->host;
 	struct minix_sb_info *sbi = minix_sb(dir->i_sb);
 	loff_t pos = page_offset(page) +
 			(char *)de-(char*)page_address(page);
@@ -434,8 +428,7 @@ void minix_set_link(struct minix_dir_entry *de, struct page *page,
 
 	lock_page(page);
 
-	err = __minix_write_begin(NULL, mapping, pos, sbi->s_dirsize,
-					AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = minix_prepare_chunk(page, pos, sbi->s_dirsize);
 	if (err == 0) {
 		if (sbi->s_version == MINIX_V3)
 			((minix3_dirent *) de)->inode = inode->i_ino;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 756f8c9..f4abe45 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -357,12 +357,10 @@ static int minix_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,minix_get_block);
 }
 
-int __minix_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				minix_get_block);
+	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
+					  &page, NULL, minix_get_block);
 }
 
 static int minix_write_begin(struct file *file, struct address_space *mapping,
@@ -370,7 +368,8 @@ static int minix_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	*pagep = NULL;
-	return __minix_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				minix_get_block);
 }
 
 static sector_t minix_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 111f34e..407b1c8 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -53,9 +53,7 @@ extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
 extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int __minix_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata);
+extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 extern void V1_minix_truncate(struct inode *);
 extern void V2_minix_truncate(struct inode *);
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 85c89df..fc2bcfa 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -80,23 +80,11 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
 	return last_byte;
 }
 
-static int nilfs_prepare_chunk_uninterruptible(struct page *page,
-					       struct address_space *mapping,
-					       unsigned from, unsigned to)
+static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
 {
 	loff_t pos = page_offset(page) + from;
-	return block_write_begin(NULL, mapping, pos, to - from,
-				 AOP_FLAG_UNINTERRUPTIBLE, &page,
-				 NULL, nilfs_get_block);
-}
-
-static int nilfs_prepare_chunk(struct page *page,
-			       struct address_space *mapping,
-			       unsigned from, unsigned to)
-{
-	loff_t pos = page_offset(page) + from;
-	return block_write_begin(NULL, mapping, pos, to - from, 0, &page,
-				 NULL, nilfs_get_block);
+	return block_write_begin_newtrunc(NULL, page->mapping, pos, to - from,
+					  0, &page, NULL, nilfs_get_block);
 }
 
 static void nilfs_commit_chunk(struct page *page,
@@ -449,7 +437,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
 	int err;
 
 	lock_page(page);
-	err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	BUG_ON(err);
 	de->inode = cpu_to_le64(inode->i_ino);
 	nilfs_set_de_type(de, inode);
@@ -530,7 +518,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 got_it:
 	from = (char *)de - (char *)page_address(page);
 	to = from + rec_len;
-	err = nilfs_prepare_chunk(page, page->mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	if (err)
 		goto out_unlock;
 	if (de->inode) {
@@ -587,7 +575,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
 	if (pde)
 		from = (char *)pde - (char *)page_address(page);
 	lock_page(page);
-	err = nilfs_prepare_chunk(page, mapping, from, to);
+	err = nilfs_prepare_chunk(page, from, to);
 	BUG_ON(err);
 	if (pde)
 		pde->rec_len = cpu_to_le16(to - from);
@@ -615,7 +603,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
 	if (!page)
 		return -ENOMEM;
 
-	err = nilfs_prepare_chunk(page, mapping, 0, chunk_size);
+	err = nilfs_prepare_chunk(page, 0, chunk_size);
 	if (unlikely(err)) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 79941e4..a77c421 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -218,8 +218,7 @@ got_it:
 	pos = page_offset(page) +
 			(char*)de - (char*)page_address(page);
 	lock_page(page);
-	err = __sysv_write_begin(NULL, page->mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	if (err)
 		goto out_unlock;
 	memcpy (de->name, name, namelen);
@@ -239,15 +238,13 @@ out_unlock:
 
 int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *inode = (struct inode*)mapping->host;
+	struct inode *inode = page->mapping->host;
 	char *kaddr = (char*)page_address(page);
 	loff_t pos = page_offset(page) + (char *)de - kaddr;
 	int err;
 
 	lock_page(page);
-	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	BUG_ON(err);
 	de->inode = 0;
 	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
@@ -259,16 +256,14 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
 
 int sysv_make_empty(struct inode *inode, struct inode *dir)
 {
-	struct address_space *mapping = inode->i_mapping;
-	struct page *page = grab_cache_page(mapping, 0);
+	struct page *page = grab_cache_page(inode->i_mapping, 0);
 	struct sysv_dir_entry * de;
 	char *base;
 	int err;
 
 	if (!page)
 		return -ENOMEM;
-	err = __sysv_write_begin(NULL, mapping, 0, 2 * SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, 0, 2 * SYSV_DIRSIZE);
 	if (err) {
 		unlock_page(page);
 		goto fail;
@@ -341,15 +336,13 @@ not_empty:
 void sysv_set_link(struct sysv_dir_entry *de, struct page *page,
 	struct inode *inode)
 {
-	struct address_space *mapping = page->mapping;
-	struct inode *dir = mapping->host;
+	struct inode *dir = page->mapping->host;
 	loff_t pos = page_offset(page) +
 			(char *)de-(char*)page_address(page);
 	int err;
 
 	lock_page(page);
-	err = __sysv_write_begin(NULL, mapping, pos, SYSV_DIRSIZE,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = sysv_prepare_chunk(page, pos, SYSV_DIRSIZE);
 	BUG_ON(err);
 	de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
 	err = dir_commit_chunk(page, pos, SYSV_DIRSIZE);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index f042eec..4068f48 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -459,12 +459,10 @@ static int sysv_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,get_block);
 }
 
-int __sysv_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				get_block);
+	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
+					  &page, NULL, get_block);
 }
 
 static int sysv_write_begin(struct file *file, struct address_space *mapping,
@@ -472,7 +470,8 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	*pagep = NULL;
-	return __sysv_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				get_block);
 }
 
 static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 94cb9b4..bb55cdb 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -136,9 +136,7 @@ extern unsigned long sysv_count_free_blocks(struct super_block *);
 
 /* itree.c */
 extern void sysv_truncate(struct inode *);
-extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata);
+extern int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 /* inode.c */
 extern struct inode *sysv_iget(struct super_block *, unsigned int);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index ec78475..dbc9099 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -95,8 +95,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
 	int err;
 
 	lock_page(page);
-	err = __ufs_write_begin(NULL, page->mapping, pos, len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, len);
 	BUG_ON(err);
 
 	de->d_ino = cpu_to_fs32(dir->i_sb, inode->i_ino);
@@ -381,8 +380,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
 got_it:
 	pos = page_offset(page) +
 			(char*)de - (char*)page_address(page);
-	err = __ufs_write_begin(NULL, page->mapping, pos, rec_len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, rec_len);
 	if (err)
 		goto out_unlock;
 	if (de->d_ino) {
@@ -518,7 +516,6 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 		     struct page * page)
 {
 	struct super_block *sb = inode->i_sb;
-	struct address_space *mapping = page->mapping;
 	char *kaddr = page_address(page);
 	unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
 	unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
@@ -549,8 +546,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
 
 	pos = page_offset(page) + from;
 	lock_page(page);
-	err = __ufs_write_begin(NULL, mapping, pos, to - from,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, pos, to - from);
 	BUG_ON(err);
 	if (pde)
 		pde->d_reclen = cpu_to_fs16(sb, to - from);
@@ -577,8 +573,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
 	if (!page)
 		return -ENOMEM;
 
-	err = __ufs_write_begin(NULL, mapping, 0, chunk_size,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, NULL);
+	err = ufs_prepare_chunk(page, 0, chunk_size);
 	if (err) {
 		unlock_page(page);
 		goto fail;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 73fe773..a9555b1 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -558,12 +558,10 @@ static int ufs_readpage(struct file *file, struct page *page)
 	return block_read_full_page(page,ufs_getfrag_block);
 }
 
-int __ufs_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
+int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				ufs_getfrag_block);
+	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
+					  &page, NULL, ufs_getfrag_block);
 }
 
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
@@ -571,7 +569,8 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata)
 {
 	*pagep = NULL;
-	return __ufs_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+				ufs_getfrag_block);
 }
 
 static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 23ceed8..0466036 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -257,9 +257,7 @@ ufs_set_inode_gid(struct super_block *sb, struct ufs_inode *inode, u32 value)
 
 extern dev_t ufs_get_inode_dev(struct super_block *, struct ufs_inode_info *);
 extern void ufs_set_inode_dev(struct super_block *, struct ufs_inode_info *, dev_t);
-extern int __ufs_write_begin(struct file *file, struct address_space *mapping,
-		loff_t pos, unsigned len, unsigned flags,
-		struct page **pagep, void **fsdata);
+extern int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 /*
  * These functions manipulate ufs buffers
-- 
cgit v1.1


From 6e1db88d536adcbbfe562b2d4b7d6425784fff12 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:57 +0200
Subject: introduce __block_write_begin

Split up the block_write_begin implementation - __block_write_begin is a new
trivial wrapper for block_prepare_write that always takes an already
allocated page and can be either called from block_write_begin or filesystem
code that already has a page allocated.  Remove the handling of already
allocated pages from block_write_begin after switching all callers that
do it to __block_write_begin.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/buffer.c         | 69 ++++++++++++++++++++---------------------------------
 fs/ext2/dir.c       |  3 +--
 fs/ext3/inode.c     |  3 +--
 fs/ext4/inode.c     | 11 ++++-----
 fs/minix/inode.c    |  3 +--
 fs/nilfs2/dir.c     |  3 +--
 fs/reiserfs/inode.c |  3 +--
 fs/sysv/itree.c     |  3 +--
 fs/ufs/inode.c      |  3 +--
 9 files changed, 37 insertions(+), 64 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 14529ec..c319c49 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
 
-static int __block_prepare_write(struct inode *inode, struct page *page,
-		unsigned from, unsigned to, get_block_t *get_block)
+int block_prepare_write(struct page *page, unsigned from, unsigned to,
+		get_block_t *get_block)
 {
+	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
 	sector_t block;
 	int err = 0;
@@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_uptodate(*wait_bh))
 			err = -EIO;
 	}
-	if (unlikely(err))
+	if (unlikely(err)) {
 		page_zero_new_buffers(page, from, to);
+		ClearPageUptodate(page);
+	}
 	return err;
 }
+EXPORT_SYMBOL(block_prepare_write);
 
 static int __block_commit_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to)
@@ -1948,6 +1952,15 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 	return 0;
 }
 
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+		get_block_t *get_block)
+{
+	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+
+	return block_prepare_write(page, start, start + len, get_block);
+}
+EXPORT_SYMBOL(__block_write_begin);
+
 /*
  * Filesystems implementing the new truncate sequence should use the
  * _newtrunc postfix variant which won't incorrectly call vmtruncate.
@@ -1958,41 +1971,22 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 			struct page **pagep, void **fsdata,
 			get_block_t *get_block)
 {
-	struct inode *inode = mapping->host;
-	int status = 0;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	struct page *page;
-	pgoff_t index;
-	unsigned start, end;
-	int ownpage = 0;
+	int status;
 
-	index = pos >> PAGE_CACHE_SHIFT;
-	start = pos & (PAGE_CACHE_SIZE - 1);
-	end = start + len;
-
-	page = *pagep;
-	if (page == NULL) {
-		ownpage = 1;
-		page = grab_cache_page_write_begin(mapping, index, flags);
-		if (!page) {
-			status = -ENOMEM;
-			goto out;
-		}
-		*pagep = page;
-	} else
-		BUG_ON(!PageLocked(page));
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page)
+		return -ENOMEM;
 
-	status = __block_prepare_write(inode, page, start, end, get_block);
+	status = __block_write_begin(page, pos, len, get_block);
 	if (unlikely(status)) {
-		ClearPageUptodate(page);
-
-		if (ownpage) {
-			unlock_page(page);
-			page_cache_release(page);
-			*pagep = NULL;
-		}
+		unlock_page(page);
+		page_cache_release(page);
+		page = NULL;
 	}
 
-out:
+	*pagep = page;
 	return status;
 }
 EXPORT_SYMBOL(block_write_begin_newtrunc);
@@ -2379,17 +2373,6 @@ out:
 }
 EXPORT_SYMBOL(cont_write_begin);
 
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
-			get_block_t *get_block)
-{
-	struct inode *inode = page->mapping->host;
-	int err = __block_prepare_write(inode, page, from, to, get_block);
-	if (err)
-		ClearPageUptodate(page);
-	return err;
-}
-EXPORT_SYMBOL(block_prepare_write);
-
 int block_commit_write(struct page *page, unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 6b946ba..7641098 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -450,8 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
 
 static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
-					  &page, NULL, ext2_get_block);
+	return __block_write_begin(page, pos, len, ext2_get_block);
 }
 
 /* Releases the page */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index a66f3fe..5c6f07e 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1196,8 +1196,7 @@ retry:
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-							ext3_get_block);
+	ret = __block_write_begin(page, pos, len, ext3_get_block);
 	if (ret)
 		goto write_begin_failed;
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d6a7701..3da3c96 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1578,11 +1578,9 @@ retry:
 	*pagep = page;
 
 	if (ext4_should_dioread_nolock(inode))
-		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, ext4_get_block_write);
+		ret = __block_write_begin(page, pos, len, ext4_get_block_write);
 	else
-		ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, ext4_get_block);
+		ret = __block_write_begin(page, pos, len, ext4_get_block);
 
 	if (!ret && ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
@@ -1593,7 +1591,7 @@ retry:
 		unlock_page(page);
 		page_cache_release(page);
 		/*
-		 * block_write_begin may have instantiated a few blocks
+		 * __block_write_begin may have instantiated a few blocks
 		 * outside i_size.  Trim these off again. Don't need
 		 * i_size_read because we hold i_mutex.
 		 *
@@ -3185,8 +3183,7 @@ retry:
 	}
 	*pagep = page;
 
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				ext4_da_get_block_prep);
+	ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
 	if (ret < 0) {
 		unlock_page(page);
 		ext4_journal_stop(handle);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index f4abe45..6b29e73 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -359,8 +359,7 @@ static int minix_readpage(struct file *file, struct page *page)
 
 int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
-					  &page, NULL, minix_get_block);
+	return __block_write_begin(page, pos, len, minix_get_block);
 }
 
 static int minix_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index fc2bcfa..d14e3b9 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -83,8 +83,7 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
 static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to)
 {
 	loff_t pos = page_offset(page) + from;
-	return block_write_begin_newtrunc(NULL, page->mapping, pos, to - from,
-					  0, &page, NULL, nilfs_get_block);
+	return __block_write_begin(page, pos, to - from, nilfs_get_block);
 }
 
 static void nilfs_commit_chunk(struct page *page,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 4c1fb54..045729f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2585,8 +2585,7 @@ static int reiserfs_write_begin(struct file *file,
 		old_ref = th->t_refcount;
 		th->t_refcount++;
 	}
-	ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				reiserfs_get_block);
+	ret = __block_write_begin(page, pos, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
 		/* this gets a little ugly.  If reiserfs_get_block returned an
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 4068f48..82a005c 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -461,8 +461,7 @@ static int sysv_readpage(struct file *file, struct page *page)
 
 int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
-					  &page, NULL, get_block);
+	return __block_write_begin(page, pos, len, get_block);
 }
 
 static int sysv_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a9555b1..45ce323 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -560,8 +560,7 @@ static int ufs_readpage(struct file *file, struct page *page)
 
 int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len)
 {
-	return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0,
-					  &page, NULL, ufs_getfrag_block);
+	return __block_write_begin(page, pos, len, ufs_getfrag_block);
 }
 
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
-- 
cgit v1.1


From 155130a4f7848b1aac439cab6bda1a175507c71c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:58 +0200
Subject: get rid of block_write_begin_newtrunc

Move the call to vmtruncate to get rid of accessive blocks to the callers
in preparation of the new truncate sequence and rename the non-truncating
version to block_write_begin.

While we're at it also remove several unused arguments to block_write_begin.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/bfs/file.c               | 14 ++++++++---
 fs/block_dev.c              |  5 ++--
 fs/buffer.c                 | 61 +++++++--------------------------------------
 fs/ext2/inode.c             |  5 ++--
 fs/minix/inode.c            | 12 +++++++--
 fs/nilfs2/inode.c           | 12 ++++++---
 fs/nilfs2/recovery.c        | 11 +++++---
 fs/omfs/file.c              | 14 ++++++++---
 fs/sysv/itree.c             | 13 +++++++---
 fs/udf/inode.c              | 13 +++++++---
 fs/ufs/inode.c              | 12 +++++++--
 fs/xfs/linux-2.6/xfs_aops.c | 14 ++++++++---
 12 files changed, 101 insertions(+), 85 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 88b9a3f..8fc2e9c 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -168,9 +168,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags,
-					pagep, fsdata, bfs_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				bfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 65a0c26..63c9d60 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -308,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin_newtrunc(file, mapping, pos, len, flags,
-				pagep, fsdata, blkdev_get_block);
+	return block_write_begin(mapping, pos, len, flags, pagep,
+				 blkdev_get_block);
 }
 
 static int blkdev_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/buffer.c b/fs/buffer.c
index c319c49..50efa33 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1962,14 +1962,13 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 EXPORT_SYMBOL(__block_write_begin);
 
 /*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * block_write_begin takes care of the basic task of block allocation and
+ * bringing partial write blocks uptodate first.
+ *
  * The filesystem needs to handle block truncation upon failure.
  */
-int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+		unsigned flags, struct page **pagep, get_block_t *get_block)
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	struct page *page;
@@ -1989,44 +1988,6 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
 	*pagep = page;
 	return status;
 }
-EXPORT_SYMBOL(block_write_begin_newtrunc);
-
-/*
- * block_write_begin takes care of the basic task of block allocation and
- * bringing partial write blocks uptodate first.
- *
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
- */
-int block_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata,
-			get_block_t *get_block)
-{
-	int ret;
-
-	ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
-					pagep, fsdata, get_block);
-
-	/*
-	 * prepare_write() may have instantiated a few blocks
-	 * outside i_size.  Trim these off again. Don't need
-	 * i_size_read because we hold i_mutex.
-	 *
-	 * Filesystems which pass down their own page also cannot
-	 * call into vmtruncate here because it would lead to lock
-	 * inversion problems (*pagep is locked). This is a further
-	 * example of where the old truncate sequence is inadequate.
-	 */
-	if (unlikely(ret) && *pagep == NULL) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
-
-	return ret;
-}
 EXPORT_SYMBOL(block_write_begin);
 
 int block_write_end(struct file *file, struct address_space *mapping,
@@ -2357,7 +2318,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
 
 	err = cont_expand_zero(file, mapping, pos, bytes);
 	if (err)
-		goto out;
+		return err;
 
 	zerofrom = *bytes & ~PAGE_CACHE_MASK;
 	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2365,11 +2326,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
 		(*bytes)++;
 	}
 
-	*pagep = NULL;
-	err = block_write_begin_newtrunc(file, mapping, pos, len,
-				flags, pagep, fsdata, get_block);
-out:
-	return err;
+	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
 }
 EXPORT_SYMBOL(cont_write_begin);
 
@@ -2511,8 +2468,8 @@ int nobh_write_begin(struct address_space *mapping,
 		unlock_page(page);
 		page_cache_release(page);
 		*pagep = NULL;
-		return block_write_begin_newtrunc(NULL, mapping, pos, len,
-					flags, pagep, fsdata, get_block);
+		return block_write_begin(mapping, pos, len, flags, pagep,
+					 get_block);
 	}
 
 	if (PageMappedToDisk(page))
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 2f4dfbc..74dfe5f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -772,9 +772,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
 {
 	int ret;
 
-	*pagep = NULL;
-	ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
-					 pagep, fsdata, ext2_get_block);
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				ext2_get_block);
 	if (ret < 0)
 		ext2_write_failed(mapping, pos + len);
 	return ret;
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 6b29e73..125062f 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -366,9 +366,17 @@ static int minix_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				minix_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t minix_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 1dd9e6a..5c694ec 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -197,11 +197,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping,
 	if (unlikely(err))
 		return err;
 
-	*pagep = NULL;
-	err = block_write_begin(file, mapping, pos, len, flags, pagep,
-				fsdata, nilfs_get_block);
-	if (unlikely(err))
+	err = block_write_begin(mapping, pos, len, flags, pagep,
+				nilfs_get_block);
+	if (unlikely(err)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+
 		nilfs_transaction_abort(inode->i_sb);
+	}
 	return err;
 }
 
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index bae2a51..2f11f08 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -505,11 +505,14 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
 		}
 
 		pos = rb->blkoff << inode->i_blkbits;
-		page = NULL;
-		err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
-					0, &page, NULL, nilfs_get_block);
-		if (unlikely(err))
+		err = block_write_begin(inode->i_mapping, pos, blocksize,
+					0, &page, nilfs_get_block);
+		if (unlikely(err)) {
+			loff_t isize = inode->i_size;
+			if (pos + blocksize > isize)
+				vmtruncate(inode, isize);
 			goto failed_inode;
+		}
 
 		err = nilfs_recovery_copy_block(sbi, rb, page);
 		if (unlikely(err))
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 6e7a329..810cff3 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags,
-				pagep, fsdata, omfs_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
+				omfs_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 82a005c..9ca6627 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -468,9 +468,16 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep, get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 124852b..ecddcc2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -127,9 +127,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-				udf_get_block);
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t udf_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 45ce323..45cafa9 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -567,9 +567,17 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
+	int ret;
+
+	ret = block_write_begin(mapping, pos, len, flags, pagep,
 				ufs_getfrag_block);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7968d41..bf7aad0 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1504,9 +1504,17 @@ xfs_vm_write_begin(
 	struct page		**pagep,
 	void			**fsdata)
 {
-	*pagep = NULL;
-	return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS,
-				 pagep, fsdata, xfs_get_blocks);
+	int			ret;
+
+	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+				pagep, xfs_get_blocks);
+	if (unlikely(ret)) {
+		loff_t isize = mapping->host->i_size;
+		if (pos + len > isize)
+			vmtruncate(mapping->host, isize);
+	}
+
+	return ret;
 }
 
 STATIC sector_t
-- 
cgit v1.1


From d39aae9ec447dda84d9a2850743a78a535a71c90 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:29:59 +0200
Subject: add missing setattr methods

For the new truncate sequence every filesystem that wants to truncate on-disk
state needs a seattr method.  Convert the remaining filesystems that implement
the truncate inode operation to have its own setattr method.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfsplus/inode.c | 12 ++++++++++++
 fs/minix/file.c    | 12 ++++++++++++
 fs/omfs/file.c     | 12 ++++++++++++
 fs/sysv/file.c     | 12 ++++++++++++
 fs/udf/file.c      | 12 ++++++++++++
 5 files changed, 60 insertions(+)

(limited to 'fs')

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 88bf1b5..d6ebe53 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -290,9 +290,21 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	return inode_setattr(inode, attr);
+}
+
 static const struct inode_operations hfsplus_file_inode_operations = {
 	.lookup		= hfsplus_file_lookup,
 	.truncate	= hfsplus_file_truncate,
+	.setattr	= hfsplus_setattr,
 	.setxattr	= hfsplus_setxattr,
 	.getxattr	= hfsplus_getxattr,
 	.listxattr	= hfsplus_listxattr,
diff --git a/fs/minix/file.c b/fs/minix/file.c
index d5320ff..7a45dd1 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,19 @@ const struct file_operations minix_file_operations = {
 	.splice_read	= generic_file_splice_read,
 };
 
+static int minix_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	return inode_setattr(inode, attr);
+}
+
 const struct inode_operations minix_file_inode_operations = {
 	.truncate	= minix_truncate,
+	.setattr	= minix_setattr,
 	.getattr	= minix_getattr,
 };
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 810cff3..78c9f0c 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -341,7 +341,19 @@ const struct file_operations omfs_file_operations = {
 	.splice_read = generic_file_splice_read,
 };
 
+static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	return inode_setattr(inode, attr);
+}
+
 const struct inode_operations omfs_file_inops = {
+	.setattr = omfs_setattr,
 	.truncate = omfs_truncate
 };
 
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 750cc22..94f6319 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,19 @@ const struct file_operations sysv_file_operations = {
 	.splice_read	= generic_file_splice_read,
 };
 
+static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	return inode_setattr(inode, attr);
+}
+
 const struct inode_operations sysv_file_inode_operations = {
 	.truncate	= sysv_truncate,
+	.setattr	= sysv_setattr,
 	.getattr	= sysv_getattr,
 };
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 94e06d6..7376032c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -228,6 +228,18 @@ const struct file_operations udf_file_operations = {
 	.llseek			= generic_file_llseek,
 };
 
+static int udf_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	return inode_setattr(inode, attr);
+}
+
 const struct inode_operations udf_file_inode_operations = {
+	.setattr		= udf_setattr,
 	.truncate		= udf_truncate,
 };
-- 
cgit v1.1


From 6a1a90ad1b0edb556a7550a6ef8a8756f0304dd5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:30:00 +0200
Subject: rename generic_setattr

Despite its name it's now a generic implementation of ->setattr, but
rather a helper to copy attributes from a struct iattr to the inode.
Rename it to setattr_copy to reflect this fact.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c             | 14 +++++++-------
 fs/ext2/inode.c       |  2 +-
 fs/fat/file.c         |  2 +-
 fs/libfs.c            |  3 +--
 fs/ramfs/file-nommu.c |  2 +-
 fs/sysfs/inode.c      |  2 +-
 6 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index b4fa3b0..1f6a895 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -105,13 +105,13 @@ out_big:
 EXPORT_SYMBOL(inode_newsize_ok);
 
 /**
- * generic_setattr - copy simple metadata updates into the generic inode
+ * setattr_copy - copy simple metadata updates into the generic inode
  * @inode:	the inode to be updated
  * @attr:	the new attributes
  *
- * generic_setattr must be called with i_mutex held.
+ * setattr_copy must be called with i_mutex held.
  *
- * generic_setattr updates the inode's metadata with that specified
+ * setattr_copy updates the inode's metadata with that specified
  * in attr. Noticably missing is inode size update, which is more complex
  * as it requires pagecache updates. See simple_setsize.
  *
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
  * that for "simple" filesystems, the struct inode is the inode storage.
  * The caller is free to mark the inode dirty afterwards if needed.
  */
-void generic_setattr(struct inode *inode, const struct iattr *attr)
+void setattr_copy(struct inode *inode, const struct iattr *attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
 
@@ -144,11 +144,11 @@ void generic_setattr(struct inode *inode, const struct iattr *attr)
 		inode->i_mode = mode;
 	}
 }
-EXPORT_SYMBOL(generic_setattr);
+EXPORT_SYMBOL(setattr_copy);
 
 /*
  * note this function is deprecated, the new truncate sequence should be
- * used instead -- see eg. simple_setsize, generic_setattr.
+ * used instead -- see eg. simple_setsize, setattr_copy.
  */
 int inode_setattr(struct inode *inode, const struct iattr *attr)
 {
@@ -163,7 +163,7 @@ int inode_setattr(struct inode *inode, const struct iattr *attr)
 			return error;
 	}
 
-	generic_setattr(inode, attr);
+	setattr_copy(inode, attr);
 
 	mark_inode_dirty(inode);
 
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 74dfe5f..7dee7b3 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1544,7 +1544,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 		if (error)
 			return error;
 	}
-	generic_setattr(inode, iattr);
+	setattr_copy(inode, iattr);
 	if (iattr->ia_valid & ATTR_MODE)
 		error = ext2_acl_chmod(inode);
 	mark_inode_dirty(inode);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 990dfae..20813d2 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -446,7 +446,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 			goto out;
 	}
 
-	generic_setattr(inode, attr);
+	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 out:
 	return error;
diff --git a/fs/libfs.c b/fs/libfs.c
index dcaf972..861a887 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -395,8 +395,7 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 			return error;
 	}
 
-	generic_setattr(inode, iattr);
-
+	setattr_copy(inode, iattr);
 	return error;
 }
 EXPORT_SYMBOL(simple_setattr);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index d532c20..8d44f03 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -183,7 +183,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
 		}
 	}
 
-	generic_setattr(inode, ia);
+	setattr_copy(inode, ia);
  out:
 	ia->ia_valid = old_ia_valid;
 	return ret;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0835a3b..7e187fb 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -122,7 +122,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		goto out;
 
 	/* this ignores size changes */
-	generic_setattr(inode, iattr);
+	setattr_copy(inode, iattr);
 
 out:
 	mutex_unlock(&sysfs_mutex);
-- 
cgit v1.1


From eef2380c187890816b73b1a4cb89a09203759469 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:30:01 +0200
Subject: default to simple_setattr

With the new truncate sequence every filesystem that wants to support file
size changes on disk needs to implement its own ->setattr.  So instead
of calling inode_setattr which supports size changes call into a simple
method that doesn't support this.  simple_setattr is almost what we
want except that it does not mark the inode dirty after changes.  Given
that marking the inode dirty is a no-op for the simple in-memory filesystems
that use simple_setattr currently just add the mark_inode_dirty call.

Also add a WARN_ON for the presence of a truncate method to simple_setattr
to catch new instances of it during the transition period.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/attr.c  |  9 +++------
 fs/libfs.c | 16 +++++++++++-----
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/attr.c b/fs/attr.c
index 1f6a895..aeac826 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -237,13 +237,10 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
 	if (ia_valid & ATTR_SIZE)
 		down_write(&dentry->d_inode->i_alloc_sem);
 
-	if (inode->i_op && inode->i_op->setattr) {
+	if (inode->i_op->setattr)
 		error = inode->i_op->setattr(dentry, attr);
-	} else {
-		error = inode_change_ok(inode, attr);
-		if (!error)
-			error = inode_setattr(inode, attr);
-	}
+	else
+		error = simple_setattr(dentry, attr);
 
 	if (ia_valid & ATTR_SIZE)
 		up_write(&dentry->d_inode->i_alloc_sem);
diff --git a/fs/libfs.c b/fs/libfs.c
index 861a887..4056222 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -370,21 +370,26 @@ int simple_setsize(struct inode *inode, loff_t newsize)
 EXPORT_SYMBOL(simple_setsize);
 
 /**
- * simple_setattr - setattr for simple in-memory filesystem
+ * simple_setattr - setattr for simple filesystem
  * @dentry: dentry
  * @iattr: iattr structure
  *
  * Returns 0 on success, -error on failure.
  *
- * simple_setattr implements setattr for an in-memory filesystem which
- * does not store its own file data or metadata (eg. uses the page cache
- * and inode cache as its data store).
+ * simple_setattr is a simple ->setattr implementation without a proper
+ * implementation of size changes.
+ *
+ * It can either be used for in-memory filesystems or special files
+ * on simple regular filesystems.  Anything that needs to change on-disk
+ * or wire state on size changes needs its own setattr method.
  */
 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+	WARN_ON_ONCE(inode->i_op->truncate);
+
 	error = inode_change_ok(inode, iattr);
 	if (error)
 		return error;
@@ -396,7 +401,8 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 	}
 
 	setattr_copy(inode, iattr);
-	return error;
+	mark_inode_dirty(inode);
+	return 0;
 }
 EXPORT_SYMBOL(simple_setattr);
 
-- 
cgit v1.1


From 1025774ce411f2bd4b059ad7b53f0003569b74fa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:30:02 +0200
Subject: remove inode_setattr

Replace inode_setattr with opencoded variants of it in all callers.  This
moves the remaining call to vmtruncate into the filesystem methods where it
can be replaced with the proper truncate sequence.

In a few cases it was obvious that we would never end up calling vmtruncate
so it was left out in the opencoded variant:

 spufs: explicitly checks for ATTR_SIZE earlier
 btrfs,hugetlbfs,logfs,dlmfs: explicitly clears ATTR_SIZE earlier
 ufs: contains an opencoded simple_seattr + truncate that sets the filesize just above

In addition to that ncpfs called inode_setattr with handcrafted iattrs,
which allowed to trim down the opencoded variant.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_inode.c       | 15 ++++++--
 fs/affs/inode.c         | 13 ++++++-
 fs/attr.c               | 25 -------------
 fs/btrfs/inode.c        | 12 +++---
 fs/cifs/inode.c         | 45 +++++++++++++++++------
 fs/exofs/inode.c        | 14 ++++++-
 fs/ext3/inode.c         | 12 +++++-
 fs/ext4/inode.c         | 16 ++++++--
 fs/gfs2/inode.c         | 25 +++++++++----
 fs/gfs2/ops_inode.c     | 12 +++++-
 fs/gfs2/xattr.c         | 24 +++++++++---
 fs/hfs/inode.c          | 12 ++++--
 fs/hfsplus/inode.c      | 12 +++++-
 fs/hostfs/hostfs_kern.c | 18 +++++++--
 fs/hpfs/inode.c         | 12 ++++--
 fs/hugetlbfs/inode.c    | 17 +++++----
 fs/jfs/file.c           | 14 +++++--
 fs/logfs/file.c         | 18 +++++----
 fs/minix/file.c         | 12 +++++-
 fs/ncpfs/inode.c        | 24 ++++++------
 fs/nilfs2/inode.c       | 25 ++++++++++---
 fs/ntfs/inode.c         |  3 --
 fs/ocfs2/dlmfs/dlmfs.c  |  8 ++--
 fs/ocfs2/file.c         | 16 ++++++--
 fs/omfs/file.c          | 12 +++++-
 fs/proc/base.c          | 16 ++++++--
 fs/proc/generic.c       | 18 ++++++---
 fs/proc/proc_sysctl.c   | 15 ++++++--
 fs/reiserfs/inode.c     | 97 ++++++++++++++++++++++++++-----------------------
 fs/sysv/file.c          | 12 +++++-
 fs/udf/file.c           | 12 +++++-
 fs/ufs/truncate.c       |  5 ++-
 32 files changed, 403 insertions(+), 188 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4331b3b..4b3ad6a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -896,10 +896,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	}
 
 	retval = p9_client_wstat(fid, &wstat);
-	if (retval >= 0)
-		retval = inode_setattr(dentry->d_inode, iattr);
+	if (retval < 0)
+		return retval;
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(dentry->d_inode)) {
+		retval = vmtruncate(dentry->d_inode, iattr->ia_size);
+		if (retval)
+			return retval;
+	}
 
-	return retval;
+	setattr_copy(dentry->d_inode, iattr);
+	mark_inode_dirty(dentry->d_inode);
+	return 0;
 }
 
 /**
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index f4b2a4e..6883d5f 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -235,8 +235,17 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
-	error = inode_setattr(inode, attr);
-	if (!error && (attr->ia_valid & ATTR_MODE))
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	if (attr->ia_valid & ATTR_MODE)
 		mode_to_prot(inode);
 out:
 	return error;
diff --git a/fs/attr.c b/fs/attr.c
index aeac826..ed44d8a 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -146,31 +146,6 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 }
 EXPORT_SYMBOL(setattr_copy);
 
-/*
- * note this function is deprecated, the new truncate sequence should be
- * used instead -- see eg. simple_setsize, setattr_copy.
- */
-int inode_setattr(struct inode *inode, const struct iattr *attr)
-{
-	unsigned int ia_valid = attr->ia_valid;
-
-	if (ia_valid & ATTR_SIZE &&
-	    attr->ia_size != i_size_read(inode)) {
-		int error;
-
-		error = vmtruncate(inode, attr->ia_size);
-		if (error)
-			return error;
-	}
-
-	setattr_copy(inode, attr);
-
-	mark_inode_dirty(inode);
-
-	return 0;
-}
-EXPORT_SYMBOL(inode_setattr);
-
 int notify_change(struct dentry * dentry, struct iattr * attr)
 {
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bff92a..7f9e053 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3656,13 +3656,15 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 		if (err)
 			return err;
 	}
-	attr->ia_valid &= ~ATTR_SIZE;
 
-	if (attr->ia_valid)
-		err = inode_setattr(inode, attr);
+	if (attr->ia_valid) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+
+		if (attr->ia_valid & ATTR_MODE)
+			err = btrfs_acl_chmod(inode);
+	}
 
-	if (!err && ((attr->ia_valid & ATTR_MODE)))
-		err = btrfs_acl_chmod(inode);
 	return err;
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a15b3a9..9c6a40f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1889,18 +1889,27 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 	}
 
-	if (!rc) {
-		rc = inode_setattr(inode, attrs);
+	if (rc)
+		goto out;
 
-		/* force revalidate when any of these times are set since some
-		   of the fs types (eg ext3, fat) do not have fine enough
-		   time granularity to match protocol, and we do not have a
-		   a way (yet) to query the server fs's time granularity (and
-		   whether it rounds times down).
-		*/
-		if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
-			cifsInode->time = 0;
+	if ((attrs->ia_valid & ATTR_SIZE) &&
+	    attrs->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, attrs->ia_size);
+		if (rc)
+			goto out;
 	}
+
+	setattr_copy(inode, attrs);
+	mark_inode_dirty(inode);
+
+	/* force revalidate when any of these times are set since some
+	   of the fs types (eg ext3, fat) do not have fine enough
+	   time granularity to match protocol, and we do not have a
+	   a way (yet) to query the server fs's time granularity (and
+	   whether it rounds times down).
+	*/
+	if (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME))
+		cifsInode->time = 0;
 out:
 	kfree(args);
 	kfree(full_path);
@@ -2040,8 +2049,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	/* do not need local check to inode_check_ok since the server does
 	   that */
-	if (!rc)
-		rc = inode_setattr(inode, attrs);
+	if (rc)
+		goto cifs_setattr_exit;
+
+	if ((attrs->ia_valid & ATTR_SIZE) &&
+	    attrs->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, attrs->ia_size);
+		if (rc)
+			goto cifs_setattr_exit;
+	}
+
+	setattr_copy(inode, attrs);
+	mark_inode_dirty(inode);
+	return 0;
+
 cifs_setattr_exit:
 	kfree(full_path);
 	FreeXid(xid);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4bb6ef8..4bfc1f4 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -887,8 +887,18 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
-	error = inode_setattr(inode, iattr);
-	return error;
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, iattr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5c6f07e..b04d119 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3208,9 +3208,17 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 		ext3_journal_stop(handle);
 	}
 
-	rc = inode_setattr(inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, attr->ia_size);
+		if (rc)
+			goto err_out;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
-	if (!rc && (ia_valid & ATTR_MODE))
+	if (ia_valid & ATTR_MODE)
 		rc = ext3_acl_chmod(inode);
 
 err_out:
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3da3c96..1fb3903 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5539,11 +5539,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			ext4_truncate(inode);
 	}
 
-	rc = inode_setattr(inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode))
+		rc = vmtruncate(inode, attr->ia_size);
 
-	/* If inode_setattr's call to ext4_truncate failed to get a
-	 * transaction handle at all, we need to clean up the in-core
-	 * orphan list manually. */
+	if (!rc) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+	}
+
+	/*
+	 * If the call to ext4_truncate failed to get a transaction handle at
+	 * all, we need to clean up the in-core orphan list manually.
+	 */
 	if (inode->i_nlink)
 		ext4_orphan_del(NULL, inode);
 
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index f03afd9..6c023a3 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -991,18 +991,29 @@ fail:
 
 static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
 {
+	struct inode *inode = &ip->i_inode;
 	struct buffer_head *dibh;
 	int error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		error = inode_setattr(&ip->i_inode, attr);
-		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
 	}
-	return error;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	gfs2_assert_warn(GFS2_SB(inode), !error);
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	gfs2_dinode_out(ip, dibh->b_data);
+	brelse(dibh);
+	return 0;
 }
 
 /**
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 98cdd05..d7d410a 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1136,8 +1136,16 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
 	if (error)
 		goto out_end_trans;
 
-	error = inode_setattr(inode, attr);
-	gfs2_assert_warn(sdp, !error);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		gfs2_assert_warn(sdp, !error);
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
 	gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 82f93da..776af6e 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
 
 int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 {
+	struct inode *inode = &ip->i_inode;
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_ea_location el;
 	struct buffer_head *dibh;
@@ -1321,14 +1322,25 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
 		return error;
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		error = inode_setattr(&ip->i_inode, attr);
-		gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
-		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (error)
+		goto out_trans_end;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		gfs2_assert_warn(GFS2_SB(inode), !error);
 	}
 
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
+	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+	gfs2_dinode_out(ip, dibh->b_data);
+	brelse(dibh);
+
+out_trans_end:
 	gfs2_trans_end(sdp);
 	return error;
 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 8df18e6..87de671 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -612,10 +612,16 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 			attr->ia_mode = inode->i_mode & ~S_IWUGO;
 		attr->ia_mode &= S_ISDIR(inode->i_mode) ? ~hsb->s_dir_umask: ~hsb->s_file_umask;
 	}
-	error = inode_setattr(inode, attr);
-	if (error)
-		return error;
 
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 	return 0;
 }
 
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d6ebe53..654c5a8 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -298,7 +298,17 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
-	return inode_setattr(inode, attr);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct inode_operations hfsplus_file_inode_operations = {
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 87ac189..7943ff1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -849,13 +849,14 @@ int hostfs_permission(struct inode *ino, int desired)
 
 int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
+	struct inode *inode = dentry->d_inode;
 	struct hostfs_iattr attrs;
 	char *name;
 	int err;
 
-	int fd = HOSTFS_I(dentry->d_inode)->fd;
+	int fd = HOSTFS_I(inode)->fd;
 
-	err = inode_change_ok(dentry->d_inode, attr);
+	err = inode_change_ok(inode, attr);
 	if (err)
 		return err;
 
@@ -905,7 +906,18 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (err)
 		return err;
 
-	return inode_setattr(dentry->d_inode, attr);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		int error;
+
+		error = vmtruncate(inode, attr->ia_size);
+		if (err)
+			return err;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct inode_operations hostfs_iops = {
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1042a9b..3f3b397 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -277,9 +277,15 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		goto out_unlock;
 
-	error = inode_setattr(inode, attr);
-	if (error)
-		goto out_unlock;
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
 
 	hpfs_write_inode(inode);
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a4e9a7e..d5f019d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -448,19 +448,20 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 
 	error = inode_change_ok(inode, attr);
 	if (error)
-		goto out;
+		return error;
 
 	if (ia_valid & ATTR_SIZE) {
 		error = -EINVAL;
-		if (!(attr->ia_size & ~huge_page_mask(h)))
-			error = hugetlb_vmtruncate(inode, attr->ia_size);
+		if (attr->ia_size & ~huge_page_mask(h))
+			return -EINVAL;
+		error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
-			goto out;
-		attr->ia_valid &= ~ATTR_SIZE;
+			return error;
 	}
-	error = inode_setattr(inode, attr);
-out:
-	return error;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 127263c..c5ce6c1 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -17,6 +17,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
+#include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
@@ -107,11 +108,18 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 			return rc;
 	}
 
-	rc = inode_setattr(inode, iattr);
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		rc = vmtruncate(inode, iattr->ia_size);
+		if (rc)
+			return rc;
+	}
 
-	if (!rc && (iattr->ia_valid & ATTR_MODE))
-		rc = jfs_acl_chmod(inode);
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
 
+	if (iattr->ia_valid & ATTR_MODE)
+		rc = jfs_acl_chmod(inode);
 	return rc;
 }
 
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index abe1caf..23b4d03 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -232,15 +232,19 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *inode = dentry->d_inode;
 	int err = 0;
 
-	if (attr->ia_valid & ATTR_SIZE)
+	if (attr->ia_valid & ATTR_SIZE) {
 		err = logfs_truncate(inode, attr->ia_size);
-	attr->ia_valid &= ~ATTR_SIZE;
+		if (err)
+			return err;
+	}
 
-	if (!err)
-		err = inode_change_ok(inode, attr);
-	if (!err)
-		err = inode_setattr(inode, attr);
-	return err;
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations logfs_reg_iops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 7a45dd1..4493ce6 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -31,7 +31,17 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
-	return inode_setattr(inode, attr);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index fa33851..b4e8aaa 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -924,9 +924,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 				tmpattr.ia_valid = ATTR_MODE;
 				tmpattr.ia_mode = attr->ia_mode;
 
-				result = inode_setattr(inode, &tmpattr);
-				if (result)
-					goto out;
+				setattr_copy(inode, &tmpattr);
+				mark_inode_dirty(inode);
 			}
 		}
 #endif
@@ -954,15 +953,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 		result = ncp_make_closed(inode);
 		if (result)
 			goto out;
-		{
-			struct iattr tmpattr;
-			
-			tmpattr.ia_valid = ATTR_SIZE;
-			tmpattr.ia_size = attr->ia_size;
-			
-			result = inode_setattr(inode, &tmpattr);
+
+		if (attr->ia_size != i_size_read(inode)) {
+			result = vmtruncate(inode, attr->ia_size);
 			if (result)
 				goto out;
+			mark_inode_dirty(inode);
 		}
 	}
 	if ((attr->ia_valid & ATTR_CTIME) != 0) {
@@ -1002,8 +998,12 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 			NCP_FINFO(inode)->nwattr = info.attributes;
 #endif
 	}
-	if (!result)
-		result = inode_setattr(inode, attr);
+	if (result)
+		goto out;
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
 out:
 	unlock_kernel();
 	return result;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 5c694ec..051d279 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -656,14 +656,27 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	err = nilfs_transaction_begin(sb, &ti, 0);
 	if (unlikely(err))
 		return err;
-	err = inode_setattr(inode, iattr);
-	if (!err && (iattr->ia_valid & ATTR_MODE))
+
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		err = vmtruncate(inode, iattr->ia_size);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
+
+	if (iattr->ia_valid & ATTR_MODE) {
 		err = nilfs_acl_chmod(inode);
-	if (likely(!err))
-		err = nilfs_transaction_commit(sb);
-	else
-		nilfs_transaction_abort(sb);
+		if (unlikely(err))
+			goto out_err;
+	}
+
+	return nilfs_transaction_commit(sb);
 
+out_err:
+	nilfs_transaction_abort(sb);
 	return err;
 }
 
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 4b57fb1..fdef8f7 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2879,9 +2879,6 @@ void ntfs_truncate_vfs(struct inode *vi) {
  *
  * Called with ->i_mutex held.  For the ATTR_SIZE (i.e. ->truncate) case, also
  * called with ->i_alloc_sem held for writing.
- *
- * Basically this is a copy of generic notify_change() and inode_setattr()
- * functionality, except we intercept and abort changes in i_size.
  */
 int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b83d610..85e4cca 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -214,10 +214,12 @@ static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
 
 	attr->ia_valid &= ~ATTR_SIZE;
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
+	if (error)
+		return error;
 
-	return error;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2b10b36..584cf8a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1238,13 +1238,21 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	 * Otherwise, we could get into problems with truncate as
 	 * ip_alloc_sem is used there to protect against i_size
 	 * changes.
+	 *
+	 * XXX: this means the conditional below can probably be removed.
 	 */
-	status = inode_setattr(inode, attr);
-	if (status < 0) {
-		mlog_errno(status);
-		goto bail_commit;
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		status = vmtruncate(inode, attr->ia_size);
+		if (status) {
+			mlog_errno(status);
+			goto bail_commit;
+		}
 	}
 
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+
 	status = ocfs2_mark_inode_dirty(handle, inode, bh);
 	if (status < 0)
 		mlog_errno(status);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 78c9f0c..5542c28 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -349,7 +349,17 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
-	return inode_setattr(inode, attr);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations omfs_file_inops = {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index acb7ef8..a49d9dd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -561,9 +561,19 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
 		return -EPERM;
 
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
-	return error;
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static const struct inode_operations proc_def_inode_operations = {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2791907..dd29f03 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
@@ -258,17 +259,22 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 
 	error = inode_change_ok(inode, iattr);
 	if (error)
-		goto out;
+		return error;
 
-	error = inode_setattr(inode, iattr);
-	if (error)
-		goto out;
+	if ((iattr->ia_valid & ATTR_SIZE) &&
+	    iattr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, iattr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, iattr);
+	mark_inode_dirty(inode);
 	
 	de->uid = inode->i_uid;
 	de->gid = inode->i_gid;
 	de->mode = inode->i_mode;
-out:
-	return error;
+	return 0;
 }
 
 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 6ff9981..5be436e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -329,10 +329,19 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 		return -EPERM;
 
 	error = inode_change_ok(inode, attr);
-	if (!error)
-		error = inode_setattr(inode, attr);
+	if (error)
+		return error;
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
 
-	return error;
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 045729f..2b8dc5c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3134,55 +3134,62 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	error = inode_change_ok(inode, attr);
-	if (!error) {
-		if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-		    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
-			error = reiserfs_chown_xattrs(inode, attr);
+	if (error)
+		goto out;
 
-			if (!error) {
-				struct reiserfs_transaction_handle th;
-				int jbegin_count =
-				    2 *
-				    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
-				     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
-				    2;
-
-				/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-				error =
-				    journal_begin(&th, inode->i_sb,
-						  jbegin_count);
-				if (error)
-					goto out;
-				error = dquot_transfer(inode, attr);
-				if (error) {
-					journal_end(&th, inode->i_sb,
-						    jbegin_count);
-					goto out;
-				}
-				/* Update corresponding info in inode so that everything is in
-				 * one transaction */
-				if (attr->ia_valid & ATTR_UID)
-					inode->i_uid = attr->ia_uid;
-				if (attr->ia_valid & ATTR_GID)
-					inode->i_gid = attr->ia_gid;
-				mark_inode_dirty(inode);
-				error =
-				    journal_end(&th, inode->i_sb, jbegin_count);
-			}
-		}
-		if (!error) {
-			/*
-			 * Relax the lock here, as it might truncate the
-			 * inode pages and wait for inode pages locks.
-			 * To release such page lock, the owner needs the
-			 * reiserfs lock
-			 */
-			reiserfs_write_unlock_once(inode->i_sb, depth);
-			error = inode_setattr(inode, attr);
-			depth = reiserfs_write_lock_once(inode->i_sb);
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		struct reiserfs_transaction_handle th;
+		int jbegin_count =
+		    2 *
+		    (REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb) +
+		     REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb)) +
+		    2;
+
+		error = reiserfs_chown_xattrs(inode, attr);
+
+		if (error)
+			return error;
+
+		/* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
+		error = journal_begin(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
+		error = dquot_transfer(inode, attr);
+		if (error) {
+			journal_end(&th, inode->i_sb, jbegin_count);
+			goto out;
 		}
+
+		/* Update corresponding info in inode so that everything is in
+		 * one transaction */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		mark_inode_dirty(inode);
+		error = journal_end(&th, inode->i_sb, jbegin_count);
+		if (error)
+			goto out;
 	}
 
+	/*
+	 * Relax the lock here, as it might truncate the
+	 * inode pages and wait for inode pages locks.
+	 * To release such page lock, the owner needs the
+	 * reiserfs lock
+	 */
+	reiserfs_write_unlock_once(inode->i_sb, depth);
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode))
+		error = vmtruncate(inode, attr->ia_size);
+
+	if (!error) {
+		setattr_copy(inode, attr);
+		mark_inode_dirty(inode);
+	}
+	depth = reiserfs_write_lock_once(inode->i_sb);
+
 	if (!error && reiserfs_posixacl(inode->i_sb)) {
 		if (attr->ia_valid & ATTR_MODE)
 			error = reiserfs_acl_chmod(inode);
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 94f6319..0a65939 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -38,7 +38,17 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
-	return inode_setattr(inode, attr);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7376032c..04bb5bf 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -236,7 +236,17 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
 	error = inode_change_ok(inode, attr);
 	if (error)
 		return error;
-	return inode_setattr(inode, attr);
+
+	if ((attr->ia_valid & ATTR_SIZE) &&
+	    attr->ia_size != i_size_read(inode)) {
+		error = vmtruncate(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 589e01a..085e116 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -525,7 +525,10 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 		if (error)
 			return error;
 	}
-	return inode_setattr(inode, attr);
+
+	setattr_copy(inode, attr);
+	mark_inode_dirty(inode);
+	return 0;
 }
 
 const struct inode_operations ufs_file_inode_operations = {
-- 
cgit v1.1


From db78b877f7744bec4a9d9f9e7d10da3931d7cd39 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:30:03 +0200
Subject: always call inode_change_ok early in ->setattr

Make sure we call inode_change_ok before doing any changes in ->setattr,
and make sure to call it even if our fs wants to ignore normal UNIX
permissions, but use the ATTR_FORCE to skip those.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/inode.c     | 29 +++++++++++++----------------
 fs/fat/file.c       | 30 +++++++++++++++---------------
 fs/fuse/dir.c       | 11 ++++++-----
 fs/logfs/file.c     |  8 ++++----
 fs/reiserfs/inode.c |  8 ++++----
 5 files changed, 42 insertions(+), 44 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9c6a40f..b95f4a5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1796,14 +1796,12 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 
 	xid = GetXid();
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
-		/* check if we have permission to change attrs */
-		rc = inode_change_ok(inode, attrs);
-		if (rc < 0)
-			goto out;
-		else
-			rc = 0;
-	}
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+		attrs->ia_valid |= ATTR_FORCE;
+
+	rc = inode_change_ok(inode, attrs);
+	if (rc < 0)
+		goto out;
 
 	full_path = build_path_from_dentry(direntry);
 	if (full_path == NULL) {
@@ -1934,14 +1932,13 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	cFYI(1, "setattr on file %s attrs->iavalid 0x%x",
 		 direntry->d_name.name, attrs->ia_valid);
 
-	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
-		/* check if we have permission to change attrs */
-		rc = inode_change_ok(inode, attrs);
-		if (rc < 0) {
-			FreeXid(xid);
-			return rc;
-		} else
-			rc = 0;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
+		attrs->ia_valid |= ATTR_FORCE;
+
+	rc = inode_change_ok(inode, attrs);
+	if (rc < 0) {
+		FreeXid(xid);
+		return rc;
 	}
 
 	full_path = build_path_from_dentry(direntry);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 20813d2..b2eedce 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -387,21 +387,6 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	unsigned int ia_valid;
 	int error;
 
-	/*
-	 * Expand the file. Since inode_setattr() updates ->i_size
-	 * before calling the ->truncate(), but FAT needs to fill the
-	 * hole before it. XXX: this is no longer true with new truncate
-	 * sequence.
-	 */
-	if (attr->ia_valid & ATTR_SIZE) {
-		if (attr->ia_size > inode->i_size) {
-			error = fat_cont_expand(inode, attr->ia_size);
-			if (error || attr->ia_valid == ATTR_SIZE)
-				goto out;
-			attr->ia_valid &= ~ATTR_SIZE;
-		}
-	}
-
 	/* Check for setting the inode time. */
 	ia_valid = attr->ia_valid;
 	if (ia_valid & TIMES_SET_FLAGS) {
@@ -417,6 +402,21 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
+	/*
+	 * Expand the file. Since inode_setattr() updates ->i_size
+	 * before calling the ->truncate(), but FAT needs to fill the
+	 * hole before it. XXX: this is no longer true with new truncate
+	 * sequence.
+	 */
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (attr->ia_size > inode->i_size) {
+			error = fat_cont_expand(inode, attr->ia_size);
+			if (error || attr->ia_valid == ATTR_SIZE)
+				goto out;
+			attr->ia_valid &= ~ATTR_SIZE;
+		}
+	}
+
 	if (((attr->ia_valid & ATTR_UID) &&
 	     (attr->ia_uid != sbi->options.fs_uid)) ||
 	    ((attr->ia_valid & ATTR_GID) &&
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 3cdc5f7..43a9b37 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1270,11 +1270,12 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
 	if (!fuse_allow_task(fc, current))
 		return -EACCES;
 
-	if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
-		err = inode_change_ok(inode, attr);
-		if (err)
-			return err;
-	}
+	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
+		attr->ia_valid |= ATTR_FORCE;
+
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
 
 	if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
 		return 0;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 23b4d03..4dd0f7c 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -232,16 +232,16 @@ static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct inode *inode = dentry->d_inode;
 	int err = 0;
 
+	err = inode_change_ok(inode, attr);
+	if (err)
+		return err;
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		err = logfs_truncate(inode, attr->ia_size);
 		if (err)
 			return err;
 	}
 
-	err = inode_change_ok(inode, attr);
-	if (err)
-		return err;
-
 	setattr_copy(inode, attr);
 	mark_inode_dirty(inode);
 	return 0;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2b8dc5c..46ba1cf 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3084,6 +3084,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 	int depth;
 	int error;
 
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
 	/* must be turned off for recursive notify_change calls */
 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
 
@@ -3133,10 +3137,6 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 		goto out;
 	}
 
-	error = inode_change_ok(inode, attr);
-	if (error)
-		goto out;
-
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		struct reiserfs_transaction_handle th;
-- 
cgit v1.1


From 2c27c65ed0696f0b5df2dad2cf6462d72164d547 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jun 2010 11:30:04 +0200
Subject: check ATTR_SIZE contraints in inode_change_ok

Make sure we check the truncate constraints early on in ->setattr by adding
those checks to inode_change_ok.  Also clean up and document inode_change_ok
to make this obvious.

As a fallout we don't have to call inode_newsize_ok from simple_setsize and
simplify it down to a truncate_setsize which doesn't return an error.  This
simplifies a lot of setattr implementations and means we use truncate_setsize
almost everywhere.  Get rid of fat_setsize now that it's trivial and mark
ext2_setsize static to make the calling convention obvious.

Keep the inode_newsize_ok in vmtruncate for now as all callers need an
audit for its removal anyway.

Note: setattr code in ecryptfs doesn't call inode_change_ok at all and
needs a deeper audit, but that is left for later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/adfs/inode.c       |  5 +----
 fs/attr.c             | 44 ++++++++++++++++++++++++++++++--------------
 fs/ecryptfs/inode.c   | 18 ++++++++++++++----
 fs/ext2/inode.c       | 12 ++----------
 fs/fat/fat.h          |  1 -
 fs/fat/file.c         | 17 ++---------------
 fs/fuse/dir.c         |  6 +-----
 fs/gfs2/aops.c        |  4 ++--
 fs/gfs2/ops_inode.c   |  6 ++----
 fs/jffs2/fs.c         |  4 ++--
 fs/libfs.c            | 51 ++-------------------------------------------------
 fs/ocfs2/file.c       |  6 +++---
 fs/ramfs/file-nommu.c |  5 ++---
 fs/smbfs/inode.c      |  4 +---
 fs/ubifs/file.c       | 23 ++++++++---------------
 fs/ubifs/ubifs.h      |  2 +-
 fs/ufs/truncate.c     | 11 +++--------
 17 files changed, 76 insertions(+), 143 deletions(-)

(limited to 'fs')

diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index b3dec19..65794b8 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -333,10 +333,7 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
 
 	/* XXX: this is missing some actual on-disk truncation.. */
 	if (ia_valid & ATTR_SIZE)
-		error = simple_setsize(inode, attr->ia_size);
-
-	if (error)
-		goto out;
+		truncate_setsize(inode, attr->ia_size);
 
 	if (ia_valid & ATTR_MTIME) {
 		inode->i_mtime = attr->ia_mtime;
diff --git a/fs/attr.c b/fs/attr.c
index ed44d8a..7ca4181 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -14,35 +14,53 @@
 #include <linux/fcntl.h>
 #include <linux/security.h>
 
-/* Taken over from the old code... */
-
-/* POSIX UID/GID verification for setting inode attributes. */
+/**
+ * inode_change_ok - check if attribute changes to an inode are allowed
+ * @inode:	inode to check
+ * @attr:	attributes to change
+ *
+ * Check if we are allowed to change the attributes contained in @attr
+ * in the given inode.  This includes the normal unix access permission
+ * checks, as well as checks for rlimits and others.
+ *
+ * Should be called as the first thing in ->setattr implementations,
+ * possibly after taking additional locks.
+ */
 int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
-	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
 
+	/*
+	 * First check size constraints.  These can't be overriden using
+	 * ATTR_FORCE.
+	 */
+	if (ia_valid & ATTR_SIZE) {
+		int error = inode_newsize_ok(inode, attr->ia_size);
+		if (error)
+			return error;
+	}
+
 	/* If force is set do it anyway. */
 	if (ia_valid & ATTR_FORCE)
-		goto fine;
+		return 0;
 
 	/* Make sure a caller can chown. */
 	if ((ia_valid & ATTR_UID) &&
 	    (current_fsuid() != inode->i_uid ||
 	     attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN))
-		goto error;
+		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (current_fsuid() != inode->i_uid ||
 	    (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) &&
 	    !capable(CAP_CHOWN))
-		goto error;
+		return -EPERM;
 
 	/* Make sure a caller can chmod. */
 	if (ia_valid & ATTR_MODE) {
 		if (!is_owner_or_cap(inode))
-			goto error;
+			return -EPERM;
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
 				inode->i_gid) && !capable(CAP_FSETID))
@@ -52,12 +70,10 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 	/* Check for setting the inode time. */
 	if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
 		if (!is_owner_or_cap(inode))
-			goto error;
+			return -EPERM;
 	}
-fine:
-	retval = 0;
-error:
-	return retval;
+
+	return 0;
 }
 EXPORT_SYMBOL(inode_change_ok);
 
@@ -113,7 +129,7 @@ EXPORT_SYMBOL(inode_newsize_ok);
  *
  * setattr_copy updates the inode's metadata with that specified
  * in attr. Noticably missing is inode size update, which is more complex
- * as it requires pagecache updates. See simple_setsize.
+ * as it requires pagecache updates.
  *
  * The inode is not marked as dirty after this operation. The rationale is
  * that for "simple" filesystems, the struct inode is the inode storage.
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 31ef525..82900b0 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -804,10 +804,20 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 		size_t num_zeros = (PAGE_CACHE_SIZE
 				    - (ia->ia_size & ~PAGE_CACHE_MASK));
 
+
+		/*
+		 * XXX(truncate) this should really happen at the begginning
+		 * of ->setattr.  But the code is too messy to that as part
+		 * of a larger patch.  ecryptfs is also totally missing out
+		 * on the inode_change_ok check at the beginning of
+		 * ->setattr while would include this.
+		 */
+		rc = inode_newsize_ok(inode, ia->ia_size);
+		if (rc)
+			goto out;
+
 		if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
-			rc = simple_setsize(inode, ia->ia_size);
-			if (rc)
-				goto out;
+			truncate_setsize(inode, ia->ia_size);
 			lower_ia->ia_size = ia->ia_size;
 			lower_ia->ia_valid |= ATTR_SIZE;
 			goto out;
@@ -830,7 +840,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
 				goto out;
 			}
 		}
-		simple_setsize(inode, ia->ia_size);
+		truncate_setsize(inode, ia->ia_size);
 		rc = ecryptfs_write_inode_size_to_metadata(inode);
 		if (rc) {
 			printk(KERN_ERR	"Problem with "
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 7dee7b3..069620b 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1156,15 +1156,10 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
 	__ext2_truncate_blocks(inode, offset);
 }
 
-int ext2_setsize(struct inode *inode, loff_t newsize)
+static int ext2_setsize(struct inode *inode, loff_t newsize)
 {
-	loff_t oldsize;
 	int error;
 
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return -EINVAL;
@@ -1184,10 +1179,7 @@ int ext2_setsize(struct inode *inode, loff_t newsize)
 	if (error)
 		return error;
 
-	oldsize = inode->i_size;
-	i_size_write(inode, newsize);
-	truncate_pagecache(inode, oldsize, newsize);
-
+	truncate_setsize(inode, newsize);
 	__ext2_truncate_blocks(inode, newsize);
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 27ac257..d75a77f 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -306,7 +306,6 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
 extern const struct file_operations fat_file_operations;
 extern const struct inode_operations fat_file_inode_operations;
 extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
-extern int fat_setsize(struct inode *inode, loff_t offset);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
 extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
 		       struct kstat *stat);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b2eedce..7257752 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -364,18 +364,6 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 	return 0;
 }
 
-int fat_setsize(struct inode *inode, loff_t offset)
-{
-	int error;
-
-	error = simple_setsize(inode, offset);
-	if (error)
-		return error;
-	fat_truncate_blocks(inode, offset);
-
-	return error;
-}
-
 #define TIMES_SET_FLAGS	(ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
 /* valid file mode bits */
 #define FAT_VALID_MODE	(S_IFREG | S_IFDIR | S_IRWXUGO)
@@ -441,9 +429,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		error = fat_setsize(inode, attr->ia_size);
-		if (error)
-			goto out;
+		truncate_setsize(inode, attr->ia_size);
+		fat_truncate_blocks(inode, attr->ia_size);
 	}
 
 	setattr_copy(inode, attr);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 43a9b37..3978a42 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1280,12 +1280,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
 	if ((attr->ia_valid & ATTR_OPEN) && fc->atomic_o_trunc)
 		return 0;
 
-	if (attr->ia_valid & ATTR_SIZE) {
-		err = inode_newsize_ok(inode, attr->ia_size);
-		if (err)
-			return err;
+	if (attr->ia_valid & ATTR_SIZE)
 		is_truncate = true;
-	}
 
 	req = fuse_get_req(fc);
 	if (IS_ERR(req))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 703000d..54fe087 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -702,12 +702,12 @@ out:
 	page_cache_release(page);
 
 	/*
-	 * XXX(hch): the call below should probably be replaced with
+	 * XXX(truncate): the call below should probably be replaced with
 	 * a call to the gfs2-specific truncate blocks helper to actually
 	 * release disk blocks..
 	 */
 	if (pos + len > ip->i_inode.i_size)
-		simple_setsize(&ip->i_inode, ip->i_inode.i_size);
+		truncate_setsize(&ip->i_inode, ip->i_inode.i_size);
 out_endtrans:
 	gfs2_trans_end(sdp);
 out_trans_fail:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d7d410a..1009be2 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1072,7 +1072,7 @@ int gfs2_permission(struct inode *inode, int mask)
 }
 
 /*
- * XXX: should be changed to have proper ordering by opencoding simple_setsize
+ * XXX(truncate): the truncate_setsize calls should be moved to the end.
  */
 static int setattr_size(struct inode *inode, struct iattr *attr)
 {
@@ -1084,10 +1084,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
 		error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
 		if (error)
 			return error;
-		error = simple_setsize(inode, attr->ia_size);
+		truncate_setsize(inode, attr->ia_size);
 		gfs2_trans_end(sdp);
-		if (error) 
-			return error;
 	}
 
 	error = gfs2_truncatei(ip, attr->ia_size);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 459d39d..1b24266 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -169,13 +169,13 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 	mutex_unlock(&f->sem);
 	jffs2_complete_reservation(c);
 
-	/* We have to do the simple_setsize() without f->sem held, since
+	/* We have to do the truncate_setsize() without f->sem held, since
 	   some pages may be locked and waiting for it in readpage().
 	   We are protected from a simultaneous write() extending i_size
 	   back past iattr->ia_size, because do_truncate() holds the
 	   generic inode semaphore. */
 	if (ivalid & ATTR_SIZE && inode->i_size > iattr->ia_size) {
-		simple_setsize(inode, iattr->ia_size);
+		truncate_setsize(inode, iattr->ia_size);
 		inode->i_blocks = (inode->i_size + 511) >> 9;
 	}	
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 4056222..0a9da95 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -327,49 +327,6 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
 }
 
 /**
- * simple_setsize - handle core mm and vfs requirements for file size change
- * @inode: inode
- * @newsize: new file size
- *
- * Returns 0 on success, -error on failure.
- *
- * simple_setsize must be called with inode_mutex held.
- *
- * simple_setsize will check that the requested new size is OK (see
- * inode_newsize_ok), and then will perform the necessary i_size update
- * and pagecache truncation (if necessary). It will be typically be called
- * from the filesystem's setattr function when ATTR_SIZE is passed in.
- *
- * The inode itself must have correct permissions and attributes to allow
- * i_size to be changed, this function then just checks that the new size
- * requested is valid.
- *
- * In the case of simple in-memory filesystems with inodes stored solely
- * in the inode cache, and file data in the pagecache, nothing more needs
- * to be done to satisfy a truncate request. Filesystems with on-disk
- * blocks for example will need to free them in the case of truncate, in
- * that case it may be easier not to use simple_setsize (but each of its
- * components will likely be required at some point to update pagecache
- * and inode etc).
- */
-int simple_setsize(struct inode *inode, loff_t newsize)
-{
-	loff_t oldsize;
-	int error;
-
-	error = inode_newsize_ok(inode, newsize);
-	if (error)
-		return error;
-
-	oldsize = inode->i_size;
-	i_size_write(inode, newsize);
-	truncate_pagecache(inode, oldsize, newsize);
-
-	return error;
-}
-EXPORT_SYMBOL(simple_setsize);
-
-/**
  * simple_setattr - setattr for simple filesystem
  * @dentry: dentry
  * @iattr: iattr structure
@@ -394,12 +351,8 @@ int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
-	if (iattr->ia_valid & ATTR_SIZE) {
-		error = simple_setsize(inode, iattr->ia_size);
-		if (error)
-			return error;
-	}
-
+	if (iattr->ia_valid & ATTR_SIZE)
+		truncate_setsize(inode, iattr->ia_size);
 	setattr_copy(inode, iattr);
 	mark_inode_dirty(inode);
 	return 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 584cf8a..81296b4 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1233,7 +1233,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	}
 
 	/*
-	 * This will intentionally not wind up calling simple_setsize(),
+	 * This will intentionally not wind up calling truncate_setsize(),
 	 * since all the work for a size change has been done above.
 	 * Otherwise, we could get into problems with truncate as
 	 * ip_alloc_sem is used there to protect against i_size
@@ -2308,12 +2308,12 @@ relock:
 			 * blocks outside i_size. Trim these off again.
 			 * Don't need i_size_read because we hold i_mutex.
 			 *
-			 * XXX(hch): this looks buggy because ocfs2 did not
+			 * XXX(truncate): this looks buggy because ocfs2 did not
 			 * actually implement ->truncate.  Take a look at
 			 * the new truncate sequence and update this accordingly
 			 */
 			if (*ppos + count > inode->i_size)
-				simple_setsize(inode, inode->i_size);
+				truncate_setsize(inode, inode->i_size);
 			ret = written;
 			goto out_dio;
 		}
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 8d44f03..9eead2c 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -146,9 +146,8 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 			return ret;
 	}
 
-	ret = simple_setsize(inode, newsize);
-
-	return ret;
+	truncate_setsize(inode, newsize);
+	return 0;
 }
 
 /*****************************************************************************/
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 9551cb6..e338f0a 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -714,9 +714,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
 		error = server->ops->truncate(inode, attr->ia_size);
 		if (error)
 			goto out;
-		error = simple_setsize(inode, attr->ia_size);
-		if (error)
-			goto out;
+		truncate_setsize(inode, attr->ia_size);
 		refresh = 1;
 	}
 
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 12f445c..03ae894 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -967,14 +967,15 @@ static int do_writepage(struct page *page, int len)
  * the page locked, and it locks @ui_mutex. However, write-back does take inode
  * @i_mutex, which means other VFS operations may be run on this inode at the
  * same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'simple_setsize()', which first changes @inode->i_size, then
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
  * drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'simple_setsize()' with
+ * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
  * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
  * means that @inode->i_size is changed while @ui_mutex is unlocked.
  *
- * XXX: with the new truncate the above is not true anymore, the simple_setsize
- * calls can be replaced with the individual components.
+ * XXX(truncate): with the new truncate sequence this is not true anymore,
+ * and the calls to truncate_setsize can be move around freely.  They should
+ * be moved to the very end of the truncate sequence.
  *
  * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
  * inode size. How do we do this if @inode->i_size may became smaller while we
@@ -1128,9 +1129,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
 		budgeted = 0;
 	}
 
-	err = simple_setsize(inode, new_size);
-	if (err)
-		goto out_budg;
+	truncate_setsize(inode, new_size);
 
 	if (offset) {
 		pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -1217,16 +1216,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 
 	if (attr->ia_valid & ATTR_SIZE) {
 		dbg_gen("size %lld -> %lld", inode->i_size, new_size);
-		err = simple_setsize(inode, new_size);
-		if (err)
-			goto out;
+		truncate_setsize(inode, new_size);
 	}
 
 	mutex_lock(&ui->ui_mutex);
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* Truncation changes inode [mc]time */
 		inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
-		/* 'simple_setsize()' changed @i_size, update @ui_size */
+		/* 'truncate_setsize()' changed @i_size, update @ui_size */
 		ui->ui_size = inode->i_size;
 	}
 
@@ -1248,10 +1245,6 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 	if (IS_SYNC(inode))
 		err = inode->i_sb->s_op->write_inode(inode, NULL);
 	return err;
-
-out:
-	ubifs_release_budget(c, &req);
-	return err;
 }
 
 int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 0431087..0c9876b 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -379,7 +379,7 @@ struct ubifs_gced_idx_leb {
  * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
  * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
  * make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'simple_setsize()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
  * with 'ubifs_writepage()' (see file.c). All the other inode fields are
  * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
  * could consider to rework locking and base it on "shadow" fields.
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 085e116..34d5cb1 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -500,11 +500,6 @@ out:
 	return err;
 }
 
-/*
- * TODO:
- *	- truncate case should use proper ordering instead of using
- *	  simple_setsize
- */
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -518,9 +513,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
 		loff_t old_i_size = inode->i_size;
 
-		error = simple_setsize(inode, attr->ia_size);
-		if (error)
-			return error;
+		/* XXX(truncate): truncate_setsize should be called last */
+		truncate_setsize(inode, attr->ia_size);
+
 		error = ufs_truncate(inode, old_i_size);
 		if (error)
 			return error;
-- 
cgit v1.1


From 41cce647f8dbe26941bed2158fad0839aab7a294 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Jun 2010 13:24:56 -0400
Subject: jffs2: don't open-code iget_failed()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jffs2/dir.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 166062a..5fd3b5c 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -232,9 +232,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	jffs2_free_raw_inode(ri);
 	return ret;
 }
@@ -454,9 +452,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
@@ -601,9 +597,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
@@ -778,9 +772,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
 	return 0;
 
  fail:
-	make_bad_inode(inode);
-	unlock_new_inode(inode);
-	iput(inode);
+	iget_failed(inode);
 	return ret;
 }
 
-- 
cgit v1.1


From 2f246fd0f126f3b3c23a4e6b7109350e83356bd6 Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Wed, 9 Jun 2010 18:23:18 +0300
Subject: exofs: New truncate sequence

These changes are crafted based on the similar
conversion done to ext2 by Nick Piggin.

* Remove the deprecated ->truncate vector. Let exofs_setattr
  take care of on-disk size updates.
* Call truncate_pagecache on the unused pages if
  write_begin/end fails.
* Cleanup exofs_delete_inode that did stupid inode
  writes and updates on an inode that will be
  removed.
* And finally get rid of exofs_get_block. We never
  had any blocks it was all for calling nobh_truncate_page.
  nobh_truncate_page is not actually needed in exofs since
  the last page is complete and gone, just like all the other
  pages. There is no partial blocks in exofs.

I've tested with this patch, and there are no apparent
failures, so far.

CC: Nick Piggin <npiggin@suse.de>
CC: Christoph Hellwig <hch@lst.de>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exofs/exofs.h |   1 -
 fs/exofs/file.c  |   1 -
 fs/exofs/inode.c | 115 ++++++++++++++++++++-----------------------------------
 3 files changed, 42 insertions(+), 75 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 22721b2..0706ce9 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -256,7 +256,6 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
 }
 
 /* inode.c               */
-void exofs_truncate(struct inode *inode);
 int exofs_setattr(struct dentry *, struct iattr *);
 int exofs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index fef6899..f9bfe2b 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -86,6 +86,5 @@ const struct file_operations exofs_file_operations = {
 };
 
 const struct inode_operations exofs_file_inode_operations = {
-	.truncate	= exofs_truncate,
 	.setattr	= exofs_setattr,
 };
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 4bfc1f4..ccd0ce3 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -697,6 +697,13 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
 	return write_exec(&pcol);
 }
 
+/* i_mutex held using inode->i_size directly */
+static void _write_failed(struct inode *inode, loff_t to)
+{
+	if (to > inode->i_size)
+		truncate_pagecache(inode, to, inode->i_size);
+}
+
 int exofs_write_begin(struct file *file, struct address_space *mapping,
 		loff_t pos, unsigned len, unsigned flags,
 		struct page **pagep, void **fsdata)
@@ -710,7 +717,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 					 fsdata);
 		if (ret) {
 			EXOFS_DBGMSG("simple_write_begin faild\n");
-			return ret;
+			goto out;
 		}
 
 		page = *pagep;
@@ -725,6 +732,9 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 			EXOFS_DBGMSG("__readpage_filler faild\n");
 		}
 	}
+out:
+	if (unlikely(ret))
+		_write_failed(mapping->host, pos + len);
 
 	return ret;
 }
@@ -750,6 +760,10 @@ static int exofs_write_end(struct file *file, struct address_space *mapping,
 	int ret;
 
 	ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
+	if (unlikely(ret))
+		_write_failed(inode, pos + len);
+
+	/* TODO: once simple_write_end marks inode dirty remove */
 	if (i_size != inode->i_size)
 		mark_inode_dirty(inode);
 	return ret;
@@ -808,91 +822,49 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
 	return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
 }
 
-/*
- * get_block_t - Fill in a buffer_head
- * An OSD takes care of block allocation so we just fake an allocation by
- * putting in the inode's sector_t in the buffer_head.
- * TODO: What about the case of create==0 and @iblock does not exist in the
- * object?
- */
-static int exofs_get_block(struct inode *inode, sector_t iblock,
-		    struct buffer_head *bh_result, int create)
-{
-	map_bh(bh_result, inode->i_sb, iblock);
-	return 0;
-}
-
 const struct osd_attr g_attr_logical_length = ATTR_DEF(
 	OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
 
-static int _do_truncate(struct inode *inode)
+static int _do_truncate(struct inode *inode, loff_t newsize)
 {
 	struct exofs_i_info *oi = exofs_i(inode);
-	loff_t isize = i_size_read(inode);
 	int ret;
 
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 
-	nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
+	ret = exofs_oi_truncate(oi, (u64)newsize);
+	if (likely(!ret))
+		truncate_setsize(inode, newsize);
 
-	ret = exofs_oi_truncate(oi, (u64)isize);
-	EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
+	EXOFS_DBGMSG("(0x%lx) size=0x%llx ret=>%d\n",
+		     inode->i_ino, newsize, ret);
 	return ret;
 }
 
 /*
- * Truncate a file to the specified size - all we have to do is set the size
- * attribute.  We make sure the object exists first.
- */
-void exofs_truncate(struct inode *inode)
-{
-	struct exofs_i_info *oi = exofs_i(inode);
-	int ret;
-
-	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
-	     || S_ISLNK(inode->i_mode)))
-		return;
-	if (exofs_inode_is_fast_symlink(inode))
-		return;
-	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
-		return;
-
-	/* if we are about to truncate an object, and it hasn't been
-	 * created yet, wait
-	 */
-	if (unlikely(wait_obj_created(oi)))
-		goto fail;
-
-	ret = _do_truncate(inode);
-	if (ret)
-		goto fail;
-
-out:
-	mark_inode_dirty(inode);
-	return;
-fail:
-	make_bad_inode(inode);
-	goto out;
-}
-
-/*
- * Set inode attributes - just call generic functions.
+ * Set inode attributes - update size attribute on OSD if needed,
+ *                        otherwise just call generic functions.
  */
 int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error;
 
+	/* if we are about to modify an object, and it hasn't been
+	 * created yet, wait
+	 */
+	error = wait_obj_created(exofs_i(inode));
+	if (unlikely(error))
+		return error;
+
 	error = inode_change_ok(inode, iattr);
-	if (error)
+	if (unlikely(error))
 		return error;
 
 	if ((iattr->ia_valid & ATTR_SIZE) &&
 	    iattr->ia_size != i_size_read(inode)) {
-		int error;
-
-		error = vmtruncate(inode, iattr->ia_size);
-		if (error)
+		error = _do_truncate(inode, iattr->ia_size);
+		if (unlikely(error))
 			return error;
 	}
 
@@ -1345,28 +1317,25 @@ void exofs_delete_inode(struct inode *inode)
 
 	truncate_inode_pages(&inode->i_data, 0);
 
+	/* TODO: should do better here */
 	if (is_bad_inode(inode))
 		goto no_delete;
 
-	mark_inode_dirty(inode);
-	exofs_update_inode(inode, inode_needs_sync(inode));
-
 	inode->i_size = 0;
-	if (inode->i_blocks)
-		exofs_truncate(inode);
-
 	clear_inode(inode);
 
-	ret = exofs_get_io_state(&sbi->layout, &ios);
-	if (unlikely(ret)) {
-		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
-		return;
-	}
-
 	/* if we are deleting an obj that hasn't been created yet, wait */
 	if (!obj_created(oi)) {
 		BUG_ON(!obj_2bcreated(oi));
 		wait_event(oi->i_wq, obj_created(oi));
+		/* ignore the error attempt a remove anyway */
+	}
+
+	/* Now Remove the OSD objects */
+	ret = exofs_get_io_state(&sbi->layout, &ios);
+	if (unlikely(ret)) {
+		EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
+		return;
 	}
 
 	ios->obj.id = exofs_oi_objno(oi);
-- 
cgit v1.1


From fa9b227e9019ebaeeb06224ba531a490f91144b3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Mon, 14 Jun 2010 05:17:31 -0400
Subject: xfs: new truncate sequence

Convert XFS to the new truncate sequence.  We still can have errors after
updating the file size in xfs_setattr, but these are real I/O errors and lead
to a transaction abort and filesystem shutdown, so they are not an issue.

Errors from ->write_begin and write_end can now be handled correctly because
we can actually get rid of the delalloc extents while previous the buffer
state was stipped in block_invalidatepage.

There is still no error handling for ->direct_IO, because doing so will need
some major restructuring given that we only have the iolock shared and do not
hold i_mutex at all.  Fortunately leaving the normally allocated blocks behind
there is not a major issue and this will get cleaned up by xfs_free_eofblock
later.

Note: the patch is against Al's vfs.git tree as that contains the nessecary
preparations.  I'd prefer to get it applied there so that we can get some
testing in linux-next.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/xfs/linux-2.6/xfs_aops.c  | 42 ++++++++++++++++++++++++++++++++++++------
 fs/xfs/linux-2.6/xfs_iops.c  | 16 ----------------
 fs/xfs/linux-2.6/xfs_linux.h |  2 --
 fs/xfs/xfs_vnodeops.c        | 38 ++++++++++++++++++++------------------
 4 files changed, 56 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index bf7aad0..15412fe 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1494,6 +1494,22 @@ xfs_vm_direct_IO(
 	return ret;
 }
 
+STATIC void
+xfs_vm_write_failed(
+	struct address_space	*mapping,
+	loff_t			to)
+{
+	struct inode		*inode = mapping->host;
+
+	if (to > inode->i_size) {
+		struct iattr	ia = {
+			.ia_valid	= ATTR_SIZE | ATTR_FORCE,
+			.ia_size	= inode->i_size,
+		};
+		xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+	}
+}
+
 STATIC int
 xfs_vm_write_begin(
 	struct file		*file,
@@ -1508,12 +1524,26 @@ xfs_vm_write_begin(
 
 	ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
 				pagep, xfs_get_blocks);
-	if (unlikely(ret)) {
-		loff_t isize = mapping->host->i_size;
-		if (pos + len > isize)
-			vmtruncate(mapping->host, isize);
-	}
+	if (unlikely(ret))
+		xfs_vm_write_failed(mapping, pos + len);
+	return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+	struct file		*file,
+	struct address_space	*mapping,
+	loff_t			pos,
+	unsigned		len,
+	unsigned		copied,
+	struct page		*page,
+	void			*fsdata)
+{
+	int			ret;
 
+	ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+	if (unlikely(ret < len))
+		xfs_vm_write_failed(mapping, pos + len);
 	return ret;
 }
 
@@ -1559,7 +1589,7 @@ const struct address_space_operations xfs_address_space_operations = {
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.write_begin		= xfs_vm_write_begin,
-	.write_end		= generic_write_end,
+	.write_end		= xfs_vm_write_end,
 	.bmap			= xfs_vm_bmap,
 	.direct_IO		= xfs_vm_direct_IO,
 	.migratepage		= buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 536b81e..62dd349 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -540,21 +540,6 @@ xfs_vn_setattr(
 	return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 
-/*
- * block_truncate_page can return an error, but we can't propagate it
- * at all here. Leave a complaint + stack trace in the syslog because
- * this could be bad. If it is bad, we need to propagate the error further.
- */
-STATIC void
-xfs_vn_truncate(
-	struct inode	*inode)
-{
-	int	error;
-	error = block_truncate_page(inode->i_mapping, inode->i_size,
-							xfs_get_blocks);
-	WARN_ON(error);
-}
-
 STATIC long
 xfs_vn_fallocate(
 	struct inode	*inode,
@@ -694,7 +679,6 @@ xfs_vn_fiemap(
 
 static const struct inode_operations xfs_inode_operations = {
 	.check_acl		= xfs_check_acl,
-	.truncate		= xfs_vn_truncate,
 	.getattr		= xfs_vn_getattr,
 	.setattr		= xfs_vn_setattr,
 	.setxattr		= generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 998a9d7..2fa0bd9 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -156,8 +156,6 @@
  */
 #define xfs_sort(a,n,s,fn)	sort(a,n,s,fn,NULL)
 #define xfs_stack_trace()	dump_stack()
-#define xfs_itruncate_data(ip, off)	\
-	(-vmtruncate(VFS_I(ip), (off)))
 
 
 /* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 3ac137d..66d585c 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -221,8 +221,11 @@ xfs_setattr(
 			 * transaction to modify the i_size.
 			 */
 			code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
+			if (code)
+				goto error_return;
 		}
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		lock_flags &= ~XFS_ILOCK_EXCL;
 
 		/*
 		 * We are going to log the inode size change in this
@@ -236,36 +239,35 @@ xfs_setattr(
 		 * really care about here and prevents waiting for other data
 		 * not within the range we care about here.
 		 */
-		if (!code &&
-		    ip->i_size != ip->i_d.di_size &&
+		if (ip->i_size != ip->i_d.di_size &&
 		    iattr->ia_size > ip->i_d.di_size) {
 			code = xfs_flush_pages(ip,
 					ip->i_d.di_size, iattr->ia_size,
 					XBF_ASYNC, FI_NONE);
+			if (code)
+				goto error_return;
 		}
 
 		/* wait for all I/O to complete */
 		xfs_ioend_wait(ip);
 
-		if (!code)
-			code = xfs_itruncate_data(ip, iattr->ia_size);
-		if (code) {
-			ASSERT(tp == NULL);
-			lock_flags &= ~XFS_ILOCK_EXCL;
-			ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
+		code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
+					    xfs_get_blocks);
+		if (code)
 			goto error_return;
-		}
+
 		tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
-		if ((code = xfs_trans_reserve(tp, 0,
-					     XFS_ITRUNCATE_LOG_RES(mp), 0,
-					     XFS_TRANS_PERM_LOG_RES,
-					     XFS_ITRUNCATE_LOG_COUNT))) {
-			xfs_trans_cancel(tp, 0);
-			if (need_iolock)
-				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-			return code;
-		}
+		code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
+					 XFS_TRANS_PERM_LOG_RES,
+					 XFS_ITRUNCATE_LOG_COUNT);
+		if (code)
+			goto error_return;
+
+		truncate_setsize(inode, iattr->ia_size);
+
 		commit_flags = XFS_TRANS_RELEASE_LOG_RES;
+		lock_flags |= XFS_ILOCK_EXCL;
+
 		xfs_ilock(ip, XFS_ILOCK_EXCL);
 
 		xfs_trans_ijoin(tp, ip);
-- 
cgit v1.1


From b5fc510c48f631882ccec3c0f02a25d5b67de09f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Jul 2010 12:24:09 +0400
Subject: get rid of file_fsync()

Copy and simplify in the only two users remaining.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hfs/inode.c          | 26 +++++++++++++++++++++++++-
 fs/hfsplus/hfsplus_fs.h |  1 +
 fs/hfsplus/inode.c      | 27 ++++++++++++++++++++++++++-
 fs/hfsplus/super.c      |  2 +-
 fs/sync.c               | 25 -------------------------
 5 files changed, 53 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 87de671..93ceec8 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -625,6 +625,30 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 	return 0;
 }
 
+static int hfs_file_fsync(struct file *filp, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	struct super_block * sb;
+	int ret, err;
+
+	/* sync the inode to buffers */
+	ret = write_inode_now(inode, 0);
+
+	/* sync the superblock to buffers */
+	sb = inode->i_sb;
+	if (sb->s_dirt) {
+		lock_super(sb);
+		sb->s_dirt = 0;
+		if (!(sb->s_flags & MS_RDONLY))
+			hfs_mdb_commit(sb);
+		unlock_super(sb);
+	}
+	/* .. finally sync the buffers to disk */
+	err = sync_blockdev(sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
 
 static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
@@ -634,7 +658,7 @@ static const struct file_operations hfs_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.fsync		= file_fsync,
+	.fsync		= hfs_file_fsync,
 	.open		= hfs_file_open,
 	.release	= hfs_file_release,
 };
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 6505c30..dc856be 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *);
 
 /* super.c */
 struct inode *hfsplus_iget(struct super_block *, unsigned long);
+int hfsplus_sync_fs(struct super_block *sb, int wait);
 
 /* tables.c */
 extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 654c5a8..c5a979d 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -311,6 +311,31 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
 	return 0;
 }
 
+static int hfsplus_file_fsync(struct file *filp, int datasync)
+{
+	struct inode *inode = filp->f_mapping->host;
+	struct super_block * sb;
+	int ret, err;
+
+	/* sync the inode to buffers */
+	ret = write_inode_now(inode, 0);
+
+	/* sync the superblock to buffers */
+	sb = inode->i_sb;
+	if (sb->s_dirt) {
+		if (!(sb->s_flags & MS_RDONLY))
+			hfsplus_sync_fs(sb, 1);
+		else
+			sb->s_dirt = 0;
+	}
+
+	/* .. finally sync the buffers to disk */
+	err = sync_blockdev(sb->s_bdev);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
 static const struct inode_operations hfsplus_file_inode_operations = {
 	.lookup		= hfsplus_file_lookup,
 	.truncate	= hfsplus_file_truncate,
@@ -328,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = {
 	.aio_write	= generic_file_aio_write,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.fsync		= file_fsync,
+	.fsync		= hfsplus_file_fsync,
 	.open		= hfsplus_file_open,
 	.release	= hfsplus_file_release,
 	.unlocked_ioctl = hfsplus_ioctl,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 74b473a..a32c241 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -154,7 +154,7 @@ static void hfsplus_clear_inode(struct inode *inode)
 	}
 }
 
-static int hfsplus_sync_fs(struct super_block *sb, int wait)
+int hfsplus_sync_fs(struct super_block *sb, int wait)
 {
 	struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
diff --git a/fs/sync.c b/fs/sync.c
index 15aa6f0..ba76b96 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -128,31 +128,6 @@ void emergency_sync(void)
 	}
 }
 
-/*
- * Generic function to fsync a file.
- */
-int file_fsync(struct file *filp, int datasync)
-{
-	struct inode *inode = filp->f_mapping->host;
-	struct super_block * sb;
-	int ret, err;
-
-	/* sync the inode to buffers */
-	ret = write_inode_now(inode, 0);
-
-	/* sync the superblock to buffers */
-	sb = inode->i_sb;
-	if (sb->s_dirt && sb->s_op->write_super)
-		sb->s_op->write_super(sb);
-
-	/* .. finally sync the buffers to disk */
-	err = sync_blockdev(sb->s_bdev);
-	if (!ret)
-		ret = err;
-	return ret;
-}
-EXPORT_SYMBOL(file_fsync);
-
 /**
  * vfs_fsync_range - helper to sync a range of data & metadata to disk
  * @file:		file to sync
-- 
cgit v1.1


From a4ffdde6e56fdf8c34ddadc2674d6eb978083369 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 Jun 2010 17:38:30 -0400
Subject: simplify checks for I_CLEAR/I_FREEING

add I_CLEAR instead of replacing I_FREEING with it.  I_CLEAR is
equivalent to I_FREEING for almost all code looking at either;
it's there to keep track of having called clear_inode() exactly
once per inode lifetime, at some point after having set I_FREEING.
I_CLEAR and I_FREEING never get set at the same time with the
current code, so we can switch to setting i_flags to I_FREEING | I_CLEAR
instead of I_CLEAR without loss of information.  As the result of
such change, checks become simpler and the amount of code that needs
to know about I_CLEAR shrinks a lot.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c            |  2 +-
 fs/drop_caches.c            |  2 +-
 fs/fs-writeback.c           |  8 ++++----
 fs/gfs2/inode.c             |  2 +-
 fs/inode.c                  | 16 ++++++++--------
 fs/nilfs2/gcdat.c           |  2 +-
 fs/notify/inode_mark.c      |  6 +++---
 fs/notify/inotify/inotify.c |  7 +++----
 fs/quota/dquot.c            |  2 +-
 fs/xfs/linux-2.6/xfs_iops.c |  4 ++--
 10 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7f9e053..95eac01 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3860,7 +3860,7 @@ again:
 			p = &parent->rb_right;
 		else {
 			WARN_ON(!(entry->vfs_inode.i_state &
-				  (I_WILL_FREE | I_FREEING | I_CLEAR)));
+				  (I_WILL_FREE | I_FREEING)));
 			rb_erase(parent, &root->inode_tree);
 			RB_CLEAR_NODE(parent);
 			spin_unlock(&root->inode_lock);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 83c4f60..2195c21 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -18,7 +18,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 		if (inode->i_mapping->nrpages == 0)
 			continue;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d5be169..7608880 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -352,7 +352,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 
 	spin_lock(&inode_lock);
 	inode->i_state &= ~I_SYNC;
-	if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
+	if (!(inode->i_state & I_FREEING)) {
 		if ((inode->i_state & I_DIRTY_PAGES) && wbc->for_kupdate) {
 			/*
 			 * More pages get dirtied by a fast dirtier.
@@ -499,7 +499,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 		if (inode_dirtied_after(inode, wbc->wb_start))
 			return 1;
 
-		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
+		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		writeback_single_inode(inode, wbc);
@@ -935,7 +935,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			if (hlist_unhashed(&inode->i_hash))
 				goto out;
 		}
-		if (inode->i_state & (I_FREEING|I_CLEAR))
+		if (inode->i_state & I_FREEING)
 			goto out;
 
 		/*
@@ -1001,7 +1001,7 @@ static void wait_sb_inodes(struct super_block *sb)
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		struct address_space *mapping;
 
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 		mapping = inode->i_mapping;
 		if (mapping->nrpages == 0)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6c023a3..08140f1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -84,7 +84,7 @@ static int iget_skip_test(struct inode *inode, void *opaque)
 	struct gfs2_skip_data *data = opaque;
 
 	if (ip->i_no_addr == data->no_addr) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)){
 			data->skipped = 1;
 			return 0;
 		}
diff --git a/fs/inode.c b/fs/inode.c
index 722860b..71fe079 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -317,7 +317,7 @@ void clear_inode(struct inode *inode)
 		bd_forget(inode);
 	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
 		cd_forget(inode);
-	inode->i_state = I_CLEAR;
+	inode->i_state = I_FREEING | I_CLEAR;
 }
 EXPORT_SYMBOL(clear_inode);
 
@@ -553,7 +553,7 @@ repeat:
 			continue;
 		if (!test(inode, data))
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -578,7 +578,7 @@ repeat:
 			continue;
 		if (inode->i_sb != sb)
 			continue;
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
+		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
@@ -840,7 +840,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
 	spin_lock(&inode_lock);
-	if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
+	if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
 		__iget(inode);
 	else
 		/*
@@ -1089,7 +1089,7 @@ int insert_inode_locked(struct inode *inode)
 				continue;
 			if (old->i_sb != sb)
 				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			if (old->i_state & (I_FREEING|I_WILL_FREE))
 				continue;
 			break;
 		}
@@ -1128,7 +1128,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 				continue;
 			if (!test(old, data))
 				continue;
-			if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+			if (old->i_state & (I_FREEING|I_WILL_FREE))
 				continue;
 			break;
 		}
@@ -1218,7 +1218,7 @@ void generic_delete_inode(struct inode *inode)
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_lock);
 	wake_up_inode(inode);
-	BUG_ON(inode->i_state != I_CLEAR);
+	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	destroy_inode(inode);
 }
 EXPORT_SYMBOL(generic_delete_inode);
@@ -1322,7 +1322,7 @@ static inline void iput_final(struct inode *inode)
 void iput(struct inode *inode)
 {
 	if (inode) {
-		BUG_ON(inode->i_state == I_CLEAR);
+		BUG_ON(inode->i_state & I_CLEAR);
 
 		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
 			iput_final(inode);
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c
index dd5f7e0..84a45d1 100644
--- a/fs/nilfs2/gcdat.c
+++ b/fs/nilfs2/gcdat.c
@@ -78,7 +78,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs)
 	struct inode *gcdat = nilfs->ns_gc_dat;
 	struct nilfs_inode_info *gii = NILFS_I(gcdat);
 
-	gcdat->i_state = I_CLEAR;
+	gcdat->i_state = I_FREEING | I_CLEAR;
 	gii->i_flags = 0;
 
 	nilfs_palloc_clear_cache(gcdat);
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 0399bcb..152b83e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -369,11 +369,11 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		struct inode *need_iput_tmp;
 
 		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * We cannot __iget() an inode in state I_FREEING,
 		 * I_WILL_FREE, or I_NEW which is fine because by that point
 		 * the inode cannot have any associated watches.
 		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 
 		/*
@@ -397,7 +397,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		/* In case the dropping of a reference would nuke next_i. */
 		if ((&next_i->i_sb_list != list) &&
 		    atomic_read(&next_i->i_count) &&
-		    !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
+		    !(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
 			__iget(next_i);
 			need_iput = next_i;
 		}
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 27b75eb..cf6b042 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -377,11 +377,11 @@ void inotify_unmount_inodes(struct list_head *list)
 		struct list_head *watches;
 
 		/*
-		 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
+		 * We cannot __iget() an inode in state I_FREEING,
 		 * I_WILL_FREE, or I_NEW which is fine because by that point
 		 * the inode cannot have any associated watches.
 		 */
-		if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 
 		/*
@@ -403,8 +403,7 @@ void inotify_unmount_inodes(struct list_head *list)
 		/* In case the dropping of a reference would nuke next_i. */
 		if ((&next_i->i_sb_list != list) &&
 				atomic_read(&next_i->i_count) &&
-				!(next_i->i_state & (I_CLEAR | I_FREEING |
-					I_WILL_FREE))) {
+				!(next_i->i_state & (I_FREEING|I_WILL_FREE))) {
 			__iget(next_i);
 			need_iput = next_i;
 		}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 437d2ca..5cec3e2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -885,7 +885,7 @@ static void add_dquot_ref(struct super_block *sb, int type)
 
 	spin_lock(&inode_lock);
 	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
 			continue;
 #ifdef CONFIG_QUOTA_DEBUG
 		if (unlikely(inode_get_rsv_space(inode) > 0))
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 62dd349..68be25d 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -80,7 +80,7 @@ xfs_mark_inode_dirty_sync(
 {
 	struct inode	*inode = VFS_I(ip);
 
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
 		mark_inode_dirty_sync(inode);
 }
 
@@ -90,7 +90,7 @@ xfs_mark_inode_dirty(
 {
 	struct inode	*inode = VFS_I(ip);
 
-	if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+	if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
 		mark_inode_dirty(inode);
 }
 
-- 
cgit v1.1


From b4272d4c810532e1a4dea111433a0af56d3bd2b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 19:33:20 -0400
Subject: unify fs/inode.c callers of clear_inode()

For now, just a straightforward merge

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 71fe079..60cb259 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -321,6 +321,19 @@ void clear_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(clear_inode);
 
+static void evict(struct inode *inode, int delete)
+{
+	const struct super_operations *op = inode->i_sb->s_op;
+
+	if (delete && op->delete_inode) {
+		op->delete_inode(inode);
+	} else {
+		if (inode->i_data.nrpages)
+			truncate_inode_pages(&inode->i_data, 0);
+		clear_inode(inode);
+	}
+}
+
 /*
  * dispose_list - dispose of the contents of a local list
  * @head: the head of the list to free
@@ -338,9 +351,7 @@ static void dispose_list(struct list_head *head)
 		inode = list_first_entry(head, struct inode, i_list);
 		list_del(&inode->i_list);
 
-		if (inode->i_data.nrpages)
-			truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+		evict(inode, 0);
 
 		spin_lock(&inode_lock);
 		hlist_del_init(&inode->i_hash);
@@ -1194,8 +1205,6 @@ EXPORT_SYMBOL(remove_inode_hash);
  */
 void generic_delete_inode(struct inode *inode)
 {
-	const struct super_operations *op = inode->i_sb->s_op;
-
 	list_del_init(&inode->i_list);
 	list_del_init(&inode->i_sb_list);
 	WARN_ON(inode->i_state & I_NEW);
@@ -1203,17 +1212,8 @@ void generic_delete_inode(struct inode *inode)
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 
-	if (op->delete_inode) {
-		void (*delete)(struct inode *) = op->delete_inode;
-		/* Filesystems implementing their own
-		 * s_op->delete_inode are required to call
-		 * truncate_inode_pages and clear_inode()
-		 * internally */
-		delete(inode);
-	} else {
-		truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
-	}
+	evict(inode, 1);
+
 	spin_lock(&inode_lock);
 	hlist_del_init(&inode->i_hash);
 	spin_unlock(&inode_lock);
@@ -1268,9 +1268,7 @@ static void generic_forget_inode(struct inode *inode)
 {
 	if (!generic_detach_inode(inode))
 		return;
-	if (inode->i_data.nrpages)
-		truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	evict(inode, 0);
 	wake_up_inode(inode);
 	destroy_inode(inode);
 }
-- 
cgit v1.1


From be7ce4161f9e6bf2497f90337d1214aa6ee06e15 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 19:40:39 -0400
Subject: New method - evict_inode()

Hybrid of ->clear_inode() and ->delete_inode(); if present, does
all fs work to be done when in-core inode is about to be gone,
for whatever reason.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 60cb259..474a72f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -325,7 +325,9 @@ static void evict(struct inode *inode, int delete)
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
-	if (delete && op->delete_inode) {
+	if (op->evict_inode) {
+		op->evict_inode(inode);
+	} else if (delete && op->delete_inode) {
 		op->delete_inode(inode);
 	} else {
 		if (inode->i_data.nrpages)
-- 
cgit v1.1


From 2bbbda308f5ca027d4fd721f914c0cab88d49aec Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 19:52:12 -0400
Subject: switch hugetlbfs to ->evict_inode()

The first spoils - hugetlb can use default ->drop_inode() now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d5f019d..bf1a2f4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -371,29 +371,12 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 	hugetlb_unreserve_pages(inode, start, freed);
 }
 
-static void hugetlbfs_delete_inode(struct inode *inode)
+static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	truncate_hugepages(inode, 0);
 	clear_inode(inode);
 }
 
-static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
-{
-	if (generic_detach_inode(inode)) {
-		truncate_hugepages(inode, 0);
-		clear_inode(inode);
-		destroy_inode(inode);
-	}
-}
-
-static void hugetlbfs_drop_inode(struct inode *inode)
-{
-	if (!inode->i_nlink)
-		generic_delete_inode(inode);
-	else
-		hugetlbfs_forget_inode(inode);
-}
-
 static inline void
 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
 {
@@ -713,9 +696,8 @@ static const struct inode_operations hugetlbfs_inode_operations = {
 static const struct super_operations hugetlbfs_ops = {
 	.alloc_inode    = hugetlbfs_alloc_inode,
 	.destroy_inode  = hugetlbfs_destroy_inode,
+	.evict_inode	= hugetlbfs_evict_inode,
 	.statfs		= hugetlbfs_statfs,
-	.delete_inode	= hugetlbfs_delete_inode,
-	.drop_inode	= hugetlbfs_drop_inode,
 	.put_super	= hugetlbfs_put_super,
 	.show_options	= generic_show_options,
 };
-- 
cgit v1.1


From c6287315cb958e740466555ca5e9d007f25b39bd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 19:56:17 -0400
Subject: generic_detach_inode() can be static now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 474a72f..256e620 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1234,7 +1234,7 @@ EXPORT_SYMBOL(generic_delete_inode);
  *
  *	Returns 1 if inode should be completely destroyed.
  */
-int generic_detach_inode(struct inode *inode)
+static int generic_detach_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 
@@ -1264,7 +1264,6 @@ int generic_detach_inode(struct inode *inode)
 	spin_unlock(&inode_lock);
 	return 1;
 }
-EXPORT_SYMBOL_GPL(generic_detach_inode);
 
 static void generic_forget_inode(struct inode *inode)
 {
-- 
cgit v1.1


From 661074e91b1da1ee262dfde6dd836deacccb9def Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 20:19:55 -0400
Subject: Take ->i_bdev/->i_cdev handling out of clear_inode()

All call chains to clear_inode() pass through evict_inode() and
clear_inode() should be called by evict_inode() exactly once.
So we can pull i_bdev/i_cdev detaching up to evict_inode() itself.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 256e620..9aff7de 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -313,10 +313,6 @@ void clear_inode(struct inode *inode)
 	inode_sync_wait(inode);
 	if (inode->i_sb->s_op->clear_inode)
 		inode->i_sb->s_op->clear_inode(inode);
-	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
-		bd_forget(inode);
-	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
-		cd_forget(inode);
 	inode->i_state = I_FREEING | I_CLEAR;
 }
 EXPORT_SYMBOL(clear_inode);
@@ -334,6 +330,10 @@ static void evict(struct inode *inode, int delete)
 			truncate_inode_pages(&inode->i_data, 0);
 		clear_inode(inode);
 	}
+	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
+		bd_forget(inode);
+	if (S_ISCHR(inode->i_mode) && inode->i_cdev)
+		cd_forget(inode);
 }
 
 /*
-- 
cgit v1.1


From b0683aa638b3326c6fc22e5290dfa75e08bd83f5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 20:55:25 -0400
Subject: new helper: end_writeback()

Essentially, the minimal variant of ->evict_inode().  It's
a trimmed-down clear_inode(), sans any fs callbacks.  Once
it returns we know that no async writeback will be happening;
every ->evict_inode() instance should do that once and do that
before doing anything ->write_inode() could interfere with
(e.g. freeing the on-disk inode).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c |  2 +-
 fs/inode.c           | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index bf1a2f4..6e5bd42 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -374,7 +374,7 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart)
 static void hugetlbfs_evict_inode(struct inode *inode)
 {
 	truncate_hugepages(inode, 0);
-	clear_inode(inode);
+	end_writeback(inode);
 }
 
 static inline void
diff --git a/fs/inode.c b/fs/inode.c
index 9aff7de..93e7a5e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -294,6 +294,18 @@ void __iget(struct inode *inode)
 	inodes_stat.nr_unused--;
 }
 
+void end_writeback(struct inode *inode)
+{
+	might_sleep();
+	BUG_ON(inode->i_data.nrpages);
+	BUG_ON(!list_empty(&inode->i_data.private_list));
+	BUG_ON(!(inode->i_state & I_FREEING));
+	BUG_ON(inode->i_state & I_CLEAR);
+	inode_sync_wait(inode);
+	inode->i_state = I_FREEING | I_CLEAR;
+}
+EXPORT_SYMBOL(end_writeback);
+
 /**
  * clear_inode - clear an inode
  * @inode: inode to clear
-- 
cgit v1.1


From 77b8a75f5bb461951148a7211ef30eecac5cb662 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 21:19:01 -0400
Subject: simplify get_cramfs_inode()

simply don't hash the inodes that don't have real inumber instead of
skipping them during iget5_locked(); as the result, simple iget_locked()
would do and we can get rid of cramfs ->drop_inode() as well.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cramfs/inode.c | 88 ++++++++++++++++++++++++-------------------------------
 1 file changed, 38 insertions(+), 50 deletions(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index dd3634e..a53b130 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -39,66 +39,55 @@ static DEFINE_MUTEX(read_mutex);
 #define CRAMINO(x)	(((x)->offset && (x)->size)?(x)->offset<<2:1)
 #define OFFSET(x)	((x)->i_ino)
 
-
-static int cramfs_iget5_test(struct inode *inode, void *opaque)
-{
-	struct cramfs_inode *cramfs_inode = opaque;
-	return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1;
-}
-
-static int cramfs_iget5_set(struct inode *inode, void *opaque)
+static void setup_inode(struct inode *inode, struct cramfs_inode * cramfs_inode)
 {
-	struct cramfs_inode *cramfs_inode = opaque;
-	inode->i_ino = CRAMINO(cramfs_inode);
-	return 0;
+	static struct timespec zerotime;
+	inode->i_mode = cramfs_inode->mode;
+	inode->i_uid = cramfs_inode->uid;
+	inode->i_size = cramfs_inode->size;
+	inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
+	inode->i_gid = cramfs_inode->gid;
+	/* Struct copy intentional */
+	inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+	/* inode->i_nlink is left 1 - arguably wrong for directories,
+	   but it's the best we can do without reading the directory
+	   contents.  1 yields the right result in GNU find, even
+	   without -noleaf option. */
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_fop = &generic_ro_fops;
+		inode->i_data.a_ops = &cramfs_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &cramfs_dir_inode_operations;
+		inode->i_fop = &cramfs_directory_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		inode->i_op = &page_symlink_inode_operations;
+		inode->i_data.a_ops = &cramfs_aops;
+	} else {
+		init_special_inode(inode, inode->i_mode,
+			old_decode_dev(cramfs_inode->size));
+	}
 }
 
 static struct inode *get_cramfs_inode(struct super_block *sb,
 				struct cramfs_inode * cramfs_inode)
 {
-	struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode),
-					    cramfs_iget5_test, cramfs_iget5_set,
-					    cramfs_inode);
-	static struct timespec zerotime;
-
-	if (inode && (inode->i_state & I_NEW)) {
-		inode->i_mode = cramfs_inode->mode;
-		inode->i_uid = cramfs_inode->uid;
-		inode->i_size = cramfs_inode->size;
-		inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1;
-		inode->i_gid = cramfs_inode->gid;
-		/* Struct copy intentional */
-		inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
-		/* inode->i_nlink is left 1 - arguably wrong for directories,
-		   but it's the best we can do without reading the directory
-		   contents.  1 yields the right result in GNU find, even
-		   without -noleaf option. */
-		if (S_ISREG(inode->i_mode)) {
-			inode->i_fop = &generic_ro_fops;
-			inode->i_data.a_ops = &cramfs_aops;
-		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = &cramfs_dir_inode_operations;
-			inode->i_fop = &cramfs_directory_operations;
-		} else if (S_ISLNK(inode->i_mode)) {
-			inode->i_op = &page_symlink_inode_operations;
-			inode->i_data.a_ops = &cramfs_aops;
-		} else {
-			init_special_inode(inode, inode->i_mode,
-				old_decode_dev(cramfs_inode->size));
+	struct inode *inode;
+	if (CRAMINO(cramfs_inode) == 1) {
+		inode = new_inode(sb);
+		if (inode) {
+			inode->i_ino = 1;
+			setup_inode(inode, cramfs_inode);
+		}
+	} else {
+		inode = iget_locked(sb, CRAMINO(cramfs_inode));
+		if (inode) {
+			setup_inode(inode, cramfs_inode);
+			unlock_new_inode(inode);
 		}
-		unlock_new_inode(inode);
 	}
 	return inode;
 }
 
-static void cramfs_drop_inode(struct inode *inode)
-{
-	if (inode->i_ino == 1)
-		generic_delete_inode(inode);
-	else
-		generic_drop_inode(inode);
-}
-
 /*
  * We have our own block cache: don't fill up the buffer cache
  * with the rom-image, because the way the filesystem is set
@@ -542,7 +531,6 @@ static const struct super_operations cramfs_ops = {
 	.put_super	= cramfs_put_super,
 	.remount_fs	= cramfs_remount,
 	.statfs		= cramfs_statfs,
-	.drop_inode	= cramfs_drop_inode,
 };
 
 static int cramfs_get_sb(struct file_system_type *fs_type,
-- 
cgit v1.1


From 8267952b362b67a5cb5371d6894a772a13e6874c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 22:17:56 -0400
Subject: switch procfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/proc/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index aea8502..23561cd 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -25,11 +25,12 @@
 
 #include "internal.h"
 
-static void proc_delete_inode(struct inode *inode)
+static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 
 	/* Stop tracking associated processes */
 	put_pid(PROC_I(inode)->pid);
@@ -40,7 +41,6 @@ static void proc_delete_inode(struct inode *inode)
 		pde_put(de);
 	if (PROC_I(inode)->sysctl)
 		sysctl_head_put(PROC_I(inode)->sysctl);
-	clear_inode(inode);
 }
 
 struct vfsmount *proc_mnt;
@@ -91,7 +91,7 @@ static const struct super_operations proc_sops = {
 	.alloc_inode	= proc_alloc_inode,
 	.destroy_inode	= proc_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= proc_delete_inode,
+	.evict_inode	= proc_evict_inode,
 	.statfs		= simple_statfs,
 };
 
-- 
cgit v1.1


From 01cd9fef6eb3caae06415861de5b53224b722549 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 22:21:54 -0400
Subject: switch sysfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysfs/inode.c | 6 +++---
 fs/sysfs/mount.c | 2 +-
 fs/sysfs/sysfs.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 7e187fb..cffb1fd 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -312,15 +312,15 @@ struct inode * sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd)
  * The sysfs_dirent serves as both an inode and a directory entry for sysfs.
  * To prevent the sysfs inode numbers from being freed prematurely we take a
  * reference to sysfs_dirent from the sysfs inode.  A
- * super_operations.delete_inode() implementation is needed to drop that
+ * super_operations.evict_inode() implementation is needed to drop that
  * reference upon inode destruction.
  */
-void sysfs_delete_inode(struct inode *inode)
+void sysfs_evict_inode(struct inode *inode)
 {
 	struct sysfs_dirent *sd  = inode->i_private;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
 	sysfs_put(sd);
 }
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 281c0c9..f2af225 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -29,7 +29,7 @@ struct kmem_cache *sysfs_dir_cachep;
 static const struct super_operations sysfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= sysfs_delete_inode,
+	.evict_inode	= sysfs_evict_inode,
 };
 
 struct sysfs_dirent sysfs_root = {
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 6a13105..d9be60a 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -198,7 +198,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
  * inode.c
  */
 struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
-void sysfs_delete_inode(struct inode *inode);
+void sysfs_evict_inode(struct inode *inode);
 int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
 int sysfs_permission(struct inode *inode, int mask);
 int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-- 
cgit v1.1


From 5ccb4a78d8c0e27985afec32cc4894d48e7b876e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 22:27:38 -0400
Subject: switch minix to ->evict_inode(), fix write_inode/delete_inode race

We need to wait for completion of possible writeback in progress
before we clear on-disk inode during deletion.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/minix/bitmap.c |  6 ++----
 fs/minix/inode.c  | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 482779f..3f32bcb 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -200,13 +200,13 @@ void minix_free_inode(struct inode * inode)
 	ino = inode->i_ino;
 	if (ino < 1 || ino > sbi->s_ninodes) {
 		printk("minix_free_inode: inode 0 or nonexistent inode\n");
-		goto out;
+		return;
 	}
 	bit = ino & ((1<<k) - 1);
 	ino >>= k;
 	if (ino >= sbi->s_imap_blocks) {
 		printk("minix_free_inode: nonexistent imap in superblock\n");
-		goto out;
+		return;
 	}
 
 	minix_clear_inode(inode);	/* clear on-disk copy */
@@ -217,8 +217,6 @@ void minix_free_inode(struct inode * inode)
 		printk("minix_free_inode: bit %lu already cleared\n", bit);
 	spin_unlock(&bitmap_lock);
 	mark_buffer_dirty(bh);
- out:
-	clear_inode(inode);		/* clear in-memory copy */
 }
 
 struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 125062f..e39d6bf 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -24,12 +24,17 @@ static int minix_write_inode(struct inode *inode,
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int minix_remount (struct super_block * sb, int * flags, char * data);
 
-static void minix_delete_inode(struct inode *inode)
+static void minix_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	minix_truncate(inode);
-	minix_free_inode(inode);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		minix_truncate(inode);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink)
+		minix_free_inode(inode);
 }
 
 static void minix_put_super(struct super_block *sb)
@@ -96,7 +101,7 @@ static const struct super_operations minix_sops = {
 	.alloc_inode	= minix_alloc_inode,
 	.destroy_inode	= minix_destroy_inode,
 	.write_inode	= minix_write_inode,
-	.delete_inode	= minix_delete_inode,
+	.evict_inode	= minix_evict_inode,
 	.put_super	= minix_put_super,
 	.statfs		= minix_statfs,
 	.remount_fs	= minix_remount,
-- 
cgit v1.1


From 3889717d2851bf38015c0b291026c07c02264623 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 22 Jul 2010 01:13:36 +0400
Subject: ext2: switch to dquot_free_block_nodirty()

brute-force conversion

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/balloc.c | 12 ++++++++----
 fs/ext2/xattr.c  | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index e8766a3..db69c12 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -571,7 +571,8 @@ do_more:
 error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
-	dquot_free_block(inode, freed);
+	dquot_free_block_nodirty(inode, freed);
+	mark_inode_dirty(inode);
 }
 
 /**
@@ -1418,7 +1419,8 @@ allocated:
 
 	*errp = 0;
 	brelse(bitmap_bh);
-	dquot_free_block(inode, *count-num);
+	dquot_free_block_nodirty(inode, *count-num);
+	mark_inode_dirty(inode);
 	*count = num;
 	return ret_block;
 
@@ -1428,8 +1430,10 @@ out:
 	/*
 	 * Undo the block allocation
 	 */
-	if (!performed_allocation)
-		dquot_free_block(inode, *count);
+	if (!performed_allocation) {
+		dquot_free_block_nodirty(inode, *count);
+		mark_inode_dirty(inode);
+	}
 	brelse(bitmap_bh);
 	return 0;
 }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7c39157..0fa24e8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -703,8 +703,10 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
 		if (error && error != -ENOSPC) {
-			if (new_bh && new_bh != old_bh)
-				dquot_free_block(inode, 1);
+			if (new_bh && new_bh != old_bh) {
+				dquot_free_block_nodirty(inode, 1);
+				mark_inode_dirty(inode);
+			}
 			goto cleanup;
 		}
 	} else
@@ -736,7 +738,8 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
 			if (ce)
 				mb_cache_entry_release(ce);
-			dquot_free_block(inode, 1);
+			dquot_free_block_nodirty(inode, 1);
+			mark_inode_dirty(inode);
 			mark_buffer_dirty(old_bh);
 			ea_bdebug(old_bh, "refcount now=%d",
 				le32_to_cpu(HDR(old_bh)->h_refcount));
@@ -799,7 +802,8 @@ ext2_xattr_delete_inode(struct inode *inode)
 		mark_buffer_dirty(bh);
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
-		dquot_free_block(inode, 1);
+		dquot_free_block_nodirty(inode, 1);
+		mark_inode_dirty(inode);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
-- 
cgit v1.1


From addacc7d6f0f0bcce12adf9fe9e6455e1dfd74da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 22 Jul 2010 01:19:42 +0400
Subject: Take dirtying the inode to callers of ext2_free_blocks()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/balloc.c | 1 -
 fs/ext2/inode.c  | 6 ++++--
 fs/ext2/xattr.c  | 3 +++
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index db69c12..c6c684b 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -572,7 +572,6 @@ error_return:
 	brelse(bitmap_bh);
 	release_blocks(sb, freed);
 	dquot_free_block_nodirty(inode, freed);
-	mark_inode_dirty(inode);
 }
 
 /**
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 069620b..e8af26d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -423,6 +423,8 @@ static int ext2_alloc_blocks(struct inode *inode,
 failed_out:
 	for (i = 0; i <index; i++)
 		ext2_free_blocks(inode, new_blocks[i], 1);
+	if (index)
+		mark_inode_dirty(inode);
 	return ret;
 }
 
@@ -993,8 +995,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
 			else if (block_to_free == nr - count)
 				count++;
 			else {
-				mark_inode_dirty(inode);
 				ext2_free_blocks (inode, block_to_free, count);
+				mark_inode_dirty(inode);
 			free_this:
 				block_to_free = nr;
 				count = 1;
@@ -1002,8 +1004,8 @@ static inline void ext2_free_data(struct inode *inode, __le32 *p, __le32 *q)
 		}
 	}
 	if (count > 0) {
-		mark_inode_dirty(inode);
 		ext2_free_blocks (inode, block_to_free, count);
+		mark_inode_dirty(inode);
 	}
 }
 
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 0fa24e8..25ff041 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -674,6 +674,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 				ext2_free_blocks(inode, block, 1);
+				mark_inode_dirty(inode);
 				error = -EIO;
 				goto cleanup;
 			}
@@ -729,6 +730,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 				mb_cache_entry_free(ce);
 			ea_bdebug(old_bh, "freeing");
 			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
+			mark_inode_dirty(inode);
 			/* We let our caller release old_bh, so we
 			 * need to duplicate the buffer before. */
 			get_bh(old_bh);
@@ -789,6 +791,7 @@ ext2_xattr_delete_inode(struct inode *inode)
 		if (ce)
 			mb_cache_entry_free(ce);
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
+		mark_inode_dirty(inode);
 		get_bh(bh);
 		bforget(bh);
 		unlock_buffer(bh);
-- 
cgit v1.1


From 3937871d91e4f43e4aaf0b214c68a7857c0e6e80 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 22 Jul 2010 01:22:47 +0400
Subject: Don't dirty the victim in ext2_xattr_delete_inode()

... it's beyond fs-writeback reach already - writeback won't
be started at that point.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/xattr.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 25ff041..5ab87e6 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -791,7 +791,6 @@ ext2_xattr_delete_inode(struct inode *inode)
 		if (ce)
 			mb_cache_entry_free(ce);
 		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
-		mark_inode_dirty(inode);
 		get_bh(bh);
 		bforget(bh);
 		unlock_buffer(bh);
@@ -806,7 +805,6 @@ ext2_xattr_delete_inode(struct inode *inode)
 		if (IS_SYNC(inode))
 			sync_dirty_buffer(bh);
 		dquot_free_block_nodirty(inode, 1);
-		mark_inode_dirty(inode);
 	}
 	EXT2_I(inode)->i_file_acl = 0;
 
-- 
cgit v1.1


From 72edc4d0873ba5165c0759264298bf5f55351c7a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Jun 2010 23:32:28 -0400
Subject: merge ext2 delete_inode and clear_inode, switch to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/ext2.h   |  2 +-
 fs/ext2/ialloc.c | 13 ++++---------
 fs/ext2/inode.c  | 44 ++++++++++++++++++++++++++++++--------------
 fs/ext2/super.c  | 14 +-------------
 4 files changed, 36 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f53d11..416daa6 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -119,7 +119,7 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 /* inode.c */
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, struct writeback_control *);
-extern void ext2_delete_inode (struct inode *);
+extern void ext2_evict_inode(struct inode *);
 extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern int ext2_setattr (struct dentry *, struct iattr *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index 938dbc7..ad70479 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -118,19 +118,14 @@ void ext2_free_inode (struct inode * inode)
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
-	if (!is_bad_inode(inode)) {
-		/* Quota is already initialized in iput() */
-		ext2_xattr_delete_inode(inode);
-		dquot_free_inode(inode);
-		dquot_drop(inode);
-	}
+	/* Quota is already initialized in iput() */
+	ext2_xattr_delete_inode(inode);
+	dquot_free_inode(inode);
+	dquot_drop(inode);
 
 	es = EXT2_SB(sb)->s_es;
 	is_directory = S_ISDIR(inode->i_mode);
 
-	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode (inode);
-
 	if (ino < EXT2_FIRST_INO(sb) ||
 	    ino > le32_to_cpu(es->s_inodes_count)) {
 		ext2_error (sb, "ext2_free_inode",
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e8af26d..940c961 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -69,26 +69,42 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext2_delete_inode (struct inode * inode)
+void ext2_evict_inode(struct inode * inode)
 {
-	if (!is_bad_inode(inode))
+	struct ext2_block_alloc_info *rsv;
+	int want_delete = 0;
+
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
+		want_delete = 1;
 		dquot_initialize(inode);
+	} else {
+		dquot_drop(inode);
+	}
+
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (is_bad_inode(inode))
-		goto no_delete;
-	EXT2_I(inode)->i_dtime	= get_seconds();
-	mark_inode_dirty(inode);
-	__ext2_write_inode(inode, inode_needs_sync(inode));
+	if (want_delete) {
+		/* set dtime */
+		EXT2_I(inode)->i_dtime	= get_seconds();
+		mark_inode_dirty(inode);
+		__ext2_write_inode(inode, inode_needs_sync(inode));
+		/* truncate to 0 */
+		inode->i_size = 0;
+		if (inode->i_blocks)
+			ext2_truncate_blocks(inode, 0);
+	}
 
-	inode->i_size = 0;
-	if (inode->i_blocks)
-		ext2_truncate_blocks(inode, 0);
-	ext2_free_inode (inode);
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+
+	ext2_discard_reservation(inode);
+	rsv = EXT2_I(inode)->i_block_alloc_info;
+	EXT2_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
 
-	return;
-no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	if (want_delete)
+		ext2_free_inode(inode);
 }
 
 typedef struct {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 7ff43f4..1ec6026 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -195,17 +195,6 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext2_inode_cachep);
 }
 
-static void ext2_clear_inode(struct inode *inode)
-{
-	struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info;
-
-	dquot_drop(inode);
-	ext2_discard_reservation(inode);
-	EXT2_I(inode)->i_block_alloc_info = NULL;
-	if (unlikely(rsv))
-		kfree(rsv);
-}
-
 static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct super_block *sb = vfs->mnt_sb;
@@ -299,13 +288,12 @@ static const struct super_operations ext2_sops = {
 	.alloc_inode	= ext2_alloc_inode,
 	.destroy_inode	= ext2_destroy_inode,
 	.write_inode	= ext2_write_inode,
-	.delete_inode	= ext2_delete_inode,
+	.evict_inode	= ext2_evict_inode,
 	.put_super	= ext2_put_super,
 	.write_super	= ext2_write_super,
 	.sync_fs	= ext2_sync_fs,
 	.statfs		= ext2_statfs,
 	.remount_fs	= ext2_remount,
-	.clear_inode	= ext2_clear_inode,
 	.show_options	= ext2_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext2_quota_read,
-- 
cgit v1.1


From d299eadc098743ea0cfbf9502fb04abf1d39ce36 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Jun 2010 19:16:20 -0400
Subject: switch sysv to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/ialloc.c |  1 -
 fs/sysv/inode.c  | 15 ++++++++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index fcc498e..0c96c98 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -113,7 +113,6 @@ void sysv_free_inode(struct inode * inode)
 		return;
 	}
 	raw_inode = sysv_raw_inode(sb, ino, &bh);
-	clear_inode(inode);
 	if (!raw_inode) {
 		printk("sysv_free_inode: unable to read inode block on device "
 		       "%s\n", inode->i_sb->s_id);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index d4a5380..613a505 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -308,12 +308,17 @@ int sysv_sync_inode(struct inode *inode)
 	return __sysv_write_inode(inode, 1);
 }
 
-static void sysv_delete_inode(struct inode *inode)
+static void sysv_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	sysv_truncate(inode);
-	sysv_free_inode(inode);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		sysv_truncate(inode);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink)
+		sysv_free_inode(inode);
 }
 
 static struct kmem_cache *sysv_inode_cachep;
@@ -344,7 +349,7 @@ const struct super_operations sysv_sops = {
 	.alloc_inode	= sysv_alloc_inode,
 	.destroy_inode	= sysv_destroy_inode,
 	.write_inode	= sysv_write_inode,
-	.delete_inode	= sysv_delete_inode,
+	.evict_inode	= sysv_evict_inode,
 	.put_super	= sysv_put_super,
 	.write_super	= sysv_write_super,
 	.sync_fs	= sysv_sync_fs,
-- 
cgit v1.1


From d3b4f9ae184b0a3982dbe000ddf88952f090dc28 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Jun 2010 19:22:50 -0400
Subject: switch smbfs to evict_inode()

NB: treatment of inode hash is completely braindead there

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/smbfs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index e338f0a..450c919 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -46,7 +46,7 @@
 
 #define SMB_TTL_DEFAULT 1000
 
-static void smb_delete_inode(struct inode *);
+static void smb_evict_inode(struct inode *);
 static void smb_put_super(struct super_block *);
 static int  smb_statfs(struct dentry *, struct kstatfs *);
 static int  smb_show_options(struct seq_file *, struct vfsmount *);
@@ -102,7 +102,7 @@ static const struct super_operations smb_sops =
 	.alloc_inode	= smb_alloc_inode,
 	.destroy_inode	= smb_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= smb_delete_inode,
+	.evict_inode	= smb_evict_inode,
 	.put_super	= smb_put_super,
 	.statfs		= smb_statfs,
 	.show_options	= smb_show_options,
@@ -324,15 +324,15 @@ out:
  * All blocking cleanup operations need to go here to avoid races.
  */
 static void
-smb_delete_inode(struct inode *ino)
+smb_evict_inode(struct inode *ino)
 {
 	DEBUG1("ino=%ld\n", ino->i_ino);
 	truncate_inode_pages(&ino->i_data, 0);
+	end_writeback(ino);
 	lock_kernel();
 	if (smb_close(ino))
 		PARANOIA("could not close inode %ld\n", ino->i_ino);
 	unlock_kernel();
-	clear_inode(ino);
 }
 
 static struct option opts[] = {
-- 
cgit v1.1


From deee3ce466a3e2cfb54c93b8fd22bbccd19ea7d6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Jun 2010 19:28:32 -0400
Subject: covert fatfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fat/inode.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ec6a699..8300580 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -263,7 +263,7 @@ static const struct address_space_operations fat_aops = {
  *			check if the location is still valid and retry if it
  *			isn't. Otherwise we do changes.
  *		5. Spinlock is used to protect hash/unhash/location check/lookup
- *		6. fat_clear_inode() unhashes the F-d-c entry.
+ *		6. fat_evict_inode() unhashes the F-d-c entry.
  *		7. lookup() and readdir() do igrab() if they find a F-d-c entry
  *			and consider negative result as cache miss.
  */
@@ -448,16 +448,15 @@ out:
 
 EXPORT_SYMBOL_GPL(fat_build_inode);
 
-static void fat_delete_inode(struct inode *inode)
+static void fat_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	fat_truncate_blocks(inode, 0);
-	clear_inode(inode);
-}
-
-static void fat_clear_inode(struct inode *inode)
-{
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		fat_truncate_blocks(inode, 0);
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	fat_cache_inval_inode(inode);
 	fat_detach(inode);
 }
@@ -674,12 +673,11 @@ static const struct super_operations fat_sops = {
 	.alloc_inode	= fat_alloc_inode,
 	.destroy_inode	= fat_destroy_inode,
 	.write_inode	= fat_write_inode,
-	.delete_inode	= fat_delete_inode,
+	.evict_inode	= fat_evict_inode,
 	.put_super	= fat_put_super,
 	.write_super	= fat_write_super,
 	.sync_fs	= fat_sync_fs,
 	.statfs		= fat_statfs,
-	.clear_inode	= fat_clear_inode,
 	.remount_fs	= fat_remount,
 
 	.show_options	= fat_show_options,
-- 
cgit v1.1


From 58e8268c7bae538ccb8b7eccc817c1c28bcd4da2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Jun 2010 19:40:56 -0400
Subject: switch ufs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ufs/ialloc.c |  2 --
 fs/ufs/inode.c  | 43 ++++++++++++++++++++++++++-----------------
 fs/ufs/super.c  |  2 +-
 fs/ufs/ufs.h    |  2 +-
 4 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 594480e..428017e 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -94,8 +94,6 @@ void ufs_free_inode (struct inode * inode)
 
 	is_directory = S_ISDIR(inode->i_mode);
 
-	clear_inode (inode);
-
 	if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit))
 		ufs_error(sb, "ufs_free_inode", "bit already cleared for inode %u", ino);
 	else {
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 45cafa9..2b251f2 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -911,24 +911,33 @@ int ufs_sync_inode (struct inode *inode)
 	return ufs_update_inode (inode, 1);
 }
 
-void ufs_delete_inode (struct inode * inode)
+void ufs_evict_inode(struct inode * inode)
 {
-	loff_t old_i_size;
+	int want_delete = 0;
+
+	if (!inode->i_nlink && !is_bad_inode(inode))
+		want_delete = 1;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	if (is_bad_inode(inode))
-		goto no_delete;
-	/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
-	lock_kernel();
-	mark_inode_dirty(inode);
-	ufs_update_inode(inode, IS_SYNC(inode));
-	old_i_size = inode->i_size;
-	inode->i_size = 0;
-	if (inode->i_blocks && ufs_truncate(inode, old_i_size))
-		ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
-	ufs_free_inode (inode);
-	unlock_kernel();
-	return;
-no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	if (want_delete) {
+		loff_t old_i_size;
+		/*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
+		lock_kernel();
+		mark_inode_dirty(inode);
+		ufs_update_inode(inode, IS_SYNC(inode));
+		old_i_size = inode->i_size;
+		inode->i_size = 0;
+		if (inode->i_blocks && ufs_truncate(inode, old_i_size))
+			ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
+		unlock_kernel();
+	}
+
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
+
+	if (want_delete) {
+		lock_kernel();
+		ufs_free_inode (inode);
+		unlock_kernel();
+	}
 }
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3ec5a9e..d510c1b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1440,7 +1440,7 @@ static const struct super_operations ufs_super_ops = {
 	.alloc_inode	= ufs_alloc_inode,
 	.destroy_inode	= ufs_destroy_inode,
 	.write_inode	= ufs_write_inode,
-	.delete_inode	= ufs_delete_inode,
+	.evict_inode	= ufs_evict_inode,
 	.put_super	= ufs_put_super,
 	.write_super	= ufs_write_super,
 	.sync_fs	= ufs_sync_fs,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 179ae6b..c08782e 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -108,7 +108,7 @@ extern struct inode * ufs_new_inode (struct inode *, int);
 extern struct inode *ufs_iget(struct super_block *, unsigned long);
 extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
-extern void ufs_delete_inode (struct inode *);
+extern void ufs_evict_inode (struct inode *);
 extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
 extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
 
-- 
cgit v1.1


From ac14a95b5239d37b6082c3791b88d7ab4e8e444c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 07:08:19 -0400
Subject: convert ext3 to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext3/ialloc.c | 12 ------------
 fs/ext3/inode.c  | 37 +++++++++++++++++++++++++++----------
 fs/ext3/super.c  | 14 +-------------
 3 files changed, 28 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 498021e..4ab72db 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -119,20 +119,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	ino = inode->i_ino;
 	ext3_debug ("freeing inode %lu\n", ino);
 
-	/*
-	 * Note: we must free any quota before locking the superblock,
-	 * as writing the quota to disk may need the lock as well.
-	 */
-	dquot_initialize(inode);
-	ext3_xattr_delete_inode(handle, inode);
-	dquot_free_inode(inode);
-	dquot_drop(inode);
-
 	is_directory = S_ISDIR(inode->i_mode);
 
-	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode (inode);
-
 	es = EXT3_SB(sb)->s_es;
 	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
 		ext3_error (sb, "ext3_free_inode",
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b04d119..cc55cec 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -190,18 +190,28 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
 }
 
 /*
- * Called at the last iput() if i_nlink is zero.
+ * Called at inode eviction from icache
  */
-void ext3_delete_inode (struct inode * inode)
+void ext3_evict_inode (struct inode *inode)
 {
+	struct ext3_block_alloc_info *rsv;
 	handle_t *handle;
+	int want_delete = 0;
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
 		dquot_initialize(inode);
+		want_delete = 1;
+	}
 
 	truncate_inode_pages(&inode->i_data, 0);
 
-	if (is_bad_inode(inode))
+	ext3_discard_reservation(inode);
+	rsv = EXT3_I(inode)->i_block_alloc_info;
+	EXT3_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
+
+	if (!want_delete)
 		goto no_delete;
 
 	handle = start_transaction(inode);
@@ -238,15 +248,22 @@ void ext3_delete_inode (struct inode * inode)
 	 * having errors), but we can't free the inode if the mark_dirty
 	 * fails.
 	 */
-	if (ext3_mark_inode_dirty(handle, inode))
-		/* If that failed, just do the required in-core inode clear. */
-		clear_inode(inode);
-	else
+	if (ext3_mark_inode_dirty(handle, inode)) {
+		/* If that failed, just dquot_drop() and be done with that */
+		dquot_drop(inode);
+		end_writeback(inode);
+	} else {
+		ext3_xattr_delete_inode(handle, inode);
+		dquot_free_inode(inode);
+		dquot_drop(inode);
+		end_writeback(inode);
 		ext3_free_inode(handle, inode);
+	}
 	ext3_journal_stop(handle);
 	return;
 no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 typedef struct {
@@ -2564,7 +2581,7 @@ out_stop:
 	 * If this was a simple ftruncate(), and the file will remain alive
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
-	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * ext3_evict_inode(), and we allow that function to clean up the
 	 * orphan info for us.
 	 */
 	if (inode->i_nlink)
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6c953bb..a951fd5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -527,17 +527,6 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext3_inode_cachep);
 }
 
-static void ext3_clear_inode(struct inode *inode)
-{
-	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-
-	dquot_drop(inode);
-	ext3_discard_reservation(inode);
-	EXT3_I(inode)->i_block_alloc_info = NULL;
-	if (unlikely(rsv))
-		kfree(rsv);
-}
-
 static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
@@ -783,14 +772,13 @@ static const struct super_operations ext3_sops = {
 	.destroy_inode	= ext3_destroy_inode,
 	.write_inode	= ext3_write_inode,
 	.dirty_inode	= ext3_dirty_inode,
-	.delete_inode	= ext3_delete_inode,
+	.evict_inode	= ext3_evict_inode,
 	.put_super	= ext3_put_super,
 	.sync_fs	= ext3_sync_fs,
 	.freeze_fs	= ext3_freeze,
 	.unfreeze_fs	= ext3_unfreeze,
 	.statfs		= ext3_statfs,
 	.remount_fs	= ext3_remount,
-	.clear_inode	= ext3_clear_inode,
 	.show_options	= ext3_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext3_quota_read,
-- 
cgit v1.1


From 9df2f85128def59185f8a1c584f8af41df17405a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 09:50:39 -0400
Subject: switch bfs to ->evict_inode(), clean up

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/bfs/inode.c | 70 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 38 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index f22a7d3..0499822 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -99,6 +99,24 @@ error:
 	return ERR_PTR(-EIO);
 }
 
+static struct bfs_inode *find_inode(struct super_block *sb, u16 ino, struct buffer_head **p)
+{
+	if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(sb)->si_lasti)) {
+		printf("Bad inode number %s:%08x\n", sb->s_id, ino);
+		return ERR_PTR(-EIO);
+	}
+
+	ino -= BFS_ROOT_INO;
+
+	*p = sb_bread(sb, 1 + ino / BFS_INODES_PER_BLOCK);
+	if (!*p) {
+		printf("Unable to read inode %s:%08x\n", sb->s_id, ino);
+		return ERR_PTR(-EIO);
+	}
+
+	return (struct bfs_inode *)(*p)->b_data +  ino % BFS_INODES_PER_BLOCK;
+}
+
 static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
 	struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -106,28 +124,15 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
         unsigned long i_sblock;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
-	int block, off;
 	int err = 0;
 
         dprintf("ino=%08x\n", ino);
 
-	if ((ino < BFS_ROOT_INO) || (ino > BFS_SB(inode->i_sb)->si_lasti)) {
-		printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino);
-		return -EIO;
-	}
+	di = find_inode(inode->i_sb, ino, &bh);
+	if (IS_ERR(di))
+		return PTR_ERR(di);
 
 	mutex_lock(&info->bfs_lock);
-	block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
-	bh = sb_bread(inode->i_sb, block);
-	if (!bh) {
-		printf("Unable to read inode %s:%08x\n",
-				inode->i_sb->s_id, ino);
-		mutex_unlock(&info->bfs_lock);
-		return -EIO;
-	}
-
-	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
 
 	if (ino == BFS_ROOT_INO)
 		di->i_vtype = cpu_to_le32(BFS_VDIR);
@@ -158,12 +163,11 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-static void bfs_delete_inode(struct inode *inode)
+static void bfs_evict_inode(struct inode *inode)
 {
 	unsigned long ino = inode->i_ino;
 	struct bfs_inode *di;
 	struct buffer_head *bh;
-	int block, off;
 	struct super_block *s = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(s);
 	struct bfs_inode_info *bi = BFS_I(inode);
@@ -171,28 +175,19 @@ static void bfs_delete_inode(struct inode *inode)
 	dprintf("ino=%08lx\n", ino);
 
 	truncate_inode_pages(&inode->i_data, 0);
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 
-	if ((ino < BFS_ROOT_INO) || (ino > info->si_lasti)) {
-		printf("invalid ino=%08lx\n", ino);
+	if (inode->i_nlink)
 		return;
-	}
-	
-	inode->i_size = 0;
-	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	mutex_lock(&info->bfs_lock);
-	mark_inode_dirty(inode);
 
-	block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
-	bh = sb_bread(s, block);
-	if (!bh) {
-		printf("Unable to read inode %s:%08lx\n",
-					inode->i_sb->s_id, ino);
-		mutex_unlock(&info->bfs_lock);
+	di = find_inode(s, inode->i_ino, &bh);
+	if (IS_ERR(di))
 		return;
-	}
-	off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
-	di = (struct bfs_inode *)bh->b_data + off;
-	memset((void *)di, 0, sizeof(struct bfs_inode));
+
+	mutex_lock(&info->bfs_lock);
+	/* clear on-disk inode */
+	memset(di, 0, sizeof(struct bfs_inode));
 	mark_buffer_dirty(bh);
 	brelse(bh);
 
@@ -214,7 +209,6 @@ static void bfs_delete_inode(struct inode *inode)
 		mark_buffer_dirty(info->si_sbh);
 	}
 	mutex_unlock(&info->bfs_lock);
-	clear_inode(inode);
 }
 
 static int bfs_sync_fs(struct super_block *sb, int wait)
@@ -319,7 +313,7 @@ static const struct super_operations bfs_sops = {
 	.alloc_inode	= bfs_alloc_inode,
 	.destroy_inode	= bfs_destroy_inode,
 	.write_inode	= bfs_write_inode,
-	.delete_inode	= bfs_delete_inode,
+	.evict_inode	= bfs_evict_inode,
 	.put_super	= bfs_put_super,
 	.write_super	= bfs_write_super,
 	.sync_fs	= bfs_sync_fs,
-- 
cgit v1.1


From 69c9e750176b409559b2361fbb28fa7bbf3c5461 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 10:12:01 -0400
Subject: switch omfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/omfs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 089839a..56121de 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -175,9 +175,13 @@ int omfs_sync_inode(struct inode *inode)
  * called when an entry is deleted, need to clear the bits in the
  * bitmaps.
  */
-static void omfs_delete_inode(struct inode *inode)
+static void omfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+
+	if (inode->i_nlink)
+		return;
 
 	if (S_ISREG(inode->i_mode)) {
 		inode->i_size = 0;
@@ -185,7 +189,6 @@ static void omfs_delete_inode(struct inode *inode)
 	}
 
 	omfs_clear_range(inode->i_sb, inode->i_ino, 2);
-	clear_inode(inode);
 }
 
 struct inode *omfs_iget(struct super_block *sb, ino_t ino)
@@ -284,7 +287,7 @@ static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static const struct super_operations omfs_sops = {
 	.write_inode	= omfs_write_inode,
-	.delete_inode	= omfs_delete_inode,
+	.evict_inode	= omfs_evict_inode,
 	.put_super	= omfs_put_super,
 	.statfs		= omfs_statfs,
 	.show_options	= generic_show_options,
-- 
cgit v1.1


From f053ddde7575090e09e2f5c4233d8a19f0925b93 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 10:16:41 -0400
Subject: switch affs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/affs.h  |  3 +--
 fs/affs/inode.c | 25 ++++++++++++-------------
 fs/affs/super.c |  3 +--
 3 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index f05b615..a8cbdeb 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -171,8 +171,7 @@ extern int	affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 extern unsigned long		 affs_parent_ino(struct inode *dir);
 extern struct inode		*affs_new_inode(struct inode *dir);
 extern int			 affs_notify_change(struct dentry *dentry, struct iattr *attr);
-extern void			 affs_delete_inode(struct inode *inode);
-extern void			 affs_clear_inode(struct inode *inode);
+extern void			 affs_evict_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
 					unsigned long ino);
 extern int			 affs_write_inode(struct inode *inode,
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 6883d5f..3a0fdec 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -252,23 +252,19 @@ out:
 }
 
 void
-affs_delete_inode(struct inode *inode)
-{
-	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
-	truncate_inode_pages(&inode->i_data, 0);
-	inode->i_size = 0;
-	affs_truncate(inode);
-	clear_inode(inode);
-	affs_free_block(inode->i_sb, inode->i_ino);
-}
-
-void
-affs_clear_inode(struct inode *inode)
+affs_evict_inode(struct inode *inode)
 {
 	unsigned long cache_page;
+	pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	truncate_inode_pages(&inode->i_data, 0);
 
-	pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+	if (!inode->i_nlink) {
+		inode->i_size = 0;
+		affs_truncate(inode);
+	}
 
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	affs_free_prealloc(inode);
 	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	if (cache_page) {
@@ -280,6 +276,9 @@ affs_clear_inode(struct inode *inode)
 	affs_brelse(AFFS_I(inode)->i_ext_bh);
 	AFFS_I(inode)->i_ext_last = ~1;
 	AFFS_I(inode)->i_ext_bh = NULL;
+
+	if (!inode->i_nlink)
+		affs_free_block(inode->i_sb, inode->i_ino);
 }
 
 struct inode *
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 16a3e47..2c804a8 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -140,8 +140,7 @@ static const struct super_operations affs_sops = {
 	.alloc_inode	= affs_alloc_inode,
 	.destroy_inode	= affs_destroy_inode,
 	.write_inode	= affs_write_inode,
-	.delete_inode	= affs_delete_inode,
-	.clear_inode	= affs_clear_inode,
+	.evict_inode	= affs_evict_inode,
 	.put_super	= affs_put_super,
 	.write_super	= affs_write_super,
 	.sync_fs	= affs_sync_fs,
-- 
cgit v1.1


From e971a6d7b9daebfe2c11c590377d3933410ab929 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 15:16:17 -0400
Subject: stop icache pollution in hostfs, switch to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7943ff1..fab5f5a 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -241,16 +241,13 @@ static struct inode *hostfs_iget(struct super_block *sb)
 	struct inode *inode;
 	long ret;
 
-	inode = iget_locked(sb, 0);
+	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	if (inode->i_state & I_NEW) {
-		ret = hostfs_read_inode(inode);
-		if (ret < 0) {
-			iget_failed(inode);
-			return ERR_PTR(ret);
-		}
-		unlock_new_inode(inode);
+	ret = hostfs_read_inode(inode);
+	if (ret < 0) {
+		iput(inode);
+		return ERR_PTR(ret);
 	}
 	return inode;
 }
@@ -299,29 +296,19 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 	return &hi->vfs_inode;
 }
 
-static void hostfs_delete_inode(struct inode *inode)
+static void hostfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HOSTFS_I(inode)->fd != -1) {
 		close_file(&HOSTFS_I(inode)->fd);
 		HOSTFS_I(inode)->fd = -1;
 	}
-	clear_inode(inode);
 }
 
 static void hostfs_destroy_inode(struct inode *inode)
 {
 	kfree(HOSTFS_I(inode)->host_filename);
-
-	/*
-	 * XXX: This should not happen, probably. The check is here for
-	 * additional safety.
-	 */
-	if (HOSTFS_I(inode)->fd != -1) {
-		close_file(&HOSTFS_I(inode)->fd);
-		printk(KERN_DEBUG "Closing host fd in .destroy_inode\n");
-	}
-
 	kfree(HOSTFS_I(inode));
 }
 
@@ -339,9 +326,8 @@ static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static const struct super_operations hostfs_sbops = {
 	.alloc_inode	= hostfs_alloc_inode,
-	.drop_inode	= generic_delete_inode,
-	.delete_inode   = hostfs_delete_inode,
 	.destroy_inode	= hostfs_destroy_inode,
+	.evict_inode	= hostfs_evict_inode,
 	.statfs		= hostfs_statfs,
 	.show_options	= hostfs_show_options,
 };
-- 
cgit v1.1


From 601d2c38b93130d387091c28d13abea40924e518 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 17:53:01 -0400
Subject: hostfs: don't keep a field in each inode when we are using it only in
 root

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index fab5f5a..7e67504 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -19,7 +19,6 @@
 #include "kern.h"
 
 struct hostfs_inode_info {
-	char *host_filename;
 	int fd;
 	fmode_t mode;
 	struct inode vfs_inode;
@@ -103,7 +102,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
 		parent = parent->d_parent;
 	}
 
-	root = HOSTFS_I(parent->d_inode)->host_filename;
+	root = parent->d_sb->s_fs_info;
 	len += strlen(root);
 	name = kmalloc(len + extra + 1, GFP_KERNEL);
 	if (name == NULL)
@@ -266,7 +265,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 	long long f_files;
 	long long f_ffree;
 
-	err = do_statfs(HOSTFS_I(dentry->d_sb->s_root->d_inode)->host_filename,
+	err = do_statfs(dentry->d_sb->s_fs_info,
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
 			&sf->f_namelen, sf->f_spare);
@@ -285,13 +284,10 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb)
 {
 	struct hostfs_inode_info *hi;
 
-	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
+	hi = kzalloc(sizeof(*hi), GFP_KERNEL);
 	if (hi == NULL)
 		return NULL;
-
-	*hi = ((struct hostfs_inode_info) { .host_filename	= NULL,
-					    .fd			= -1,
-					    .mode		= 0 });
+	hi->fd = -1;
 	inode_init_once(&hi->vfs_inode);
 	return &hi->vfs_inode;
 }
@@ -308,14 +304,12 @@ static void hostfs_evict_inode(struct inode *inode)
 
 static void hostfs_destroy_inode(struct inode *inode)
 {
-	kfree(HOSTFS_I(inode)->host_filename);
 	kfree(HOSTFS_I(inode));
 }
 
 static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
-	struct inode *root = vfs->mnt_sb->s_root->d_inode;
-	const char *root_path = HOSTFS_I(root)->host_filename;
+	const char *root_path = vfs->mnt_sb->s_fs_info;
 	size_t offset = strlen(root_ino) + 1;
 
 	if (strlen(root_path) > offset)
@@ -978,8 +972,8 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 		req_root = "";
 
 	err = -ENOMEM;
-	host_root_path = kmalloc(strlen(root_ino) + 1
-				 + strlen(req_root) + 1, GFP_KERNEL);
+	sb->s_fs_info = host_root_path =
+		kmalloc(strlen(root_ino) + strlen(req_root) + 2, GFP_KERNEL);
 	if (host_root_path == NULL)
 		goto out;
 
@@ -988,20 +982,13 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	root_inode = hostfs_iget(sb);
 	if (IS_ERR(root_inode)) {
 		err = PTR_ERR(root_inode);
-		goto out_free;
+		goto out;
 	}
 
 	err = init_inode(root_inode, NULL);
 	if (err)
 		goto out_put;
 
-	HOSTFS_I(root_inode)->host_filename = host_root_path;
-	/*
-	 * Avoid that in the error path, iput(root_inode) frees again
-	 * host_root_path through hostfs_destroy_inode!
-	 */
-	host_root_path = NULL;
-
 	err = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
 	if (sb->s_root == NULL)
@@ -1019,8 +1006,6 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 
 out_put:
 	iput(root_inode);
-out_free:
-	kfree(host_root_path);
 out:
 	return err;
 }
@@ -1032,11 +1017,17 @@ static int hostfs_read_sb(struct file_system_type *type,
 	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
 }
 
+static void hostfs_kill_sb(struct super_block *s)
+{
+	kill_anon_super(s);
+	kfree(s->s_fs_info);
+}
+
 static struct file_system_type hostfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hostfs",
 	.get_sb 	= hostfs_read_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= hostfs_kill_sb,
 	.fs_flags 	= 0,
 };
 
-- 
cgit v1.1


From 52b209f7b848a28987ed133dc2b48f304b1dc6b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 18:43:19 -0400
Subject: get rid of hostfs_read_inode()

There are only two call sites; in one (hostfs_iget()) it's actually
a no-op and in another (fill_super()) it's easier to expand the
damn thing and use what we know about its arguments to simplify
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 69 ++++++++++++-------------------------------------
 1 file changed, 16 insertions(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7e67504..25d7929 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -204,50 +204,11 @@ static char *follow_link(char *link)
 	return ERR_PTR(n);
 }
 
-static int hostfs_read_inode(struct inode *ino)
-{
-	char *name;
-	int err = 0;
-
-	/*
-	 * Unfortunately, we are called from iget() when we don't have a dentry
-	 * allocated yet.
-	 */
-	if (list_empty(&ino->i_dentry))
-		goto out;
-
-	err = -ENOMEM;
-	name = inode_name(ino, 0);
-	if (name == NULL)
-		goto out;
-
-	if (file_type(name, NULL, NULL) == OS_TYPE_SYMLINK) {
-		name = follow_link(name);
-		if (IS_ERR(name)) {
-			err = PTR_ERR(name);
-			goto out;
-		}
-	}
-
-	err = read_name(ino, name);
-	kfree(name);
- out:
-	return err;
-}
-
 static struct inode *hostfs_iget(struct super_block *sb)
 {
-	struct inode *inode;
-	long ret;
-
-	inode = new_inode(sb);
+	struct inode *inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	ret = hostfs_read_inode(inode);
-	if (ret < 0) {
-		iput(inode);
-		return ERR_PTR(ret);
-	}
 	return inode;
 }
 
@@ -979,13 +940,23 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 
 	sprintf(host_root_path, "%s/%s", root_ino, req_root);
 
-	root_inode = hostfs_iget(sb);
-	if (IS_ERR(root_inode)) {
-		err = PTR_ERR(root_inode);
+	root_inode = new_inode(sb);
+	if (!root_inode)
 		goto out;
-	}
 
-	err = init_inode(root_inode, NULL);
+	root_inode->i_op = &hostfs_dir_iops;
+	root_inode->i_fop = &hostfs_dir_fops;
+
+	if (file_type(host_root_path, NULL, NULL) == OS_TYPE_SYMLINK) {
+		char *name = follow_link(host_root_path);
+		if (IS_ERR(name))
+			err = PTR_ERR(name);
+		else
+			err = read_name(root_inode, name);
+		kfree(name);
+	} else {
+		err = read_name(root_inode, host_root_path);
+	}
 	if (err)
 		goto out_put;
 
@@ -994,14 +965,6 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	if (sb->s_root == NULL)
 		goto out_put;
 
-	err = hostfs_read_inode(root_inode);
-	if (err) {
-		/* No iput in this case because the dput does that for us */
-		dput(sb->s_root);
-		sb->s_root = NULL;
-		goto out;
-	}
-
 	return 0;
 
 out_put:
-- 
cgit v1.1


From 5e2df28cc62fdc3f4900de23f4ec69e6312f78a4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 19:38:18 -0400
Subject: hostfs: pass pathname to init_inode()

We will calculate it in all callers anyway, so there's no
need to duplicate that inside.  Moreover, that way we lose
all failure exits in init_inode(), so it doesn't need to
return anything.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 45 +++++++++++++++------------------------------
 1 file changed, 15 insertions(+), 30 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 25d7929..5a77ed3 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -485,25 +485,16 @@ static const struct address_space_operations hostfs_aops = {
 	.write_end	= hostfs_write_end,
 };
 
-static int init_inode(struct inode *inode, struct dentry *dentry)
+static void init_inode(struct inode *inode, char *path)
 {
-	char *name;
-	int type, err = -ENOMEM;
+	int type;
 	int maj, min;
 	dev_t rdev = 0;
 
-	if (dentry) {
-		name = dentry_name(dentry, 0);
-		if (name == NULL)
-			goto out;
-		type = file_type(name, &maj, &min);
-		/* Reencode maj and min with the kernel encoding.*/
-		rdev = MKDEV(maj, min);
-		kfree(name);
-	}
-	else type = OS_TYPE_DIR;
+	type = file_type(path, &maj, &min);
+	/* Reencode maj and min with the kernel encoding.*/
+	rdev = MKDEV(maj, min);
 
-	err = 0;
 	if (type == OS_TYPE_SYMLINK)
 		inode->i_op = &page_symlink_inode_operations;
 	else if (type == OS_TYPE_DIR)
@@ -531,8 +522,6 @@ static int init_inode(struct inode *inode, struct dentry *dentry)
 		init_special_inode(inode, S_IFSOCK, 0);
 		break;
 	}
- out:
-	return err;
 }
 
 int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -548,10 +537,6 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 		goto out;
 	}
 
-	error = init_inode(inode, dentry);
-	if (error)
-		goto out_put;
-
 	error = -ENOMEM;
 	name = dentry_name(dentry, 0);
 	if (name == NULL)
@@ -561,9 +546,12 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 			 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
 			 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
 			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
-	if (fd < 0)
+	if (fd < 0) {
 		error = fd;
-	else error = read_name(inode, name);
+	} else {
+		error = read_name(inode, name);
+		init_inode(inode, name);
+	}
 
 	kfree(name);
 	if (error)
@@ -593,16 +581,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 		goto out;
 	}
 
-	err = init_inode(inode, dentry);
-	if (err)
-		goto out_put;
-
 	err = -ENOMEM;
 	name = dentry_name(dentry, 0);
 	if (name == NULL)
 		goto out_put;
 
 	err = read_name(inode, name);
+	init_inode(inode, name);
+
 	kfree(name);
 	if (err == -ENOENT) {
 		iput(inode);
@@ -717,10 +703,6 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 		goto out;
 	}
 
-	err = init_inode(inode, dentry);
-	if (err)
-		goto out_put;
-
 	err = -ENOMEM;
 	name = dentry_name(dentry, 0);
 	if (name == NULL)
@@ -732,6 +714,9 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 		goto out_free;
 
 	err = read_name(inode, name);
+	init_inode(inode, name);
+	if (err)
+		goto out_put;
 	kfree(name);
 	if (err)
 		goto out_put;
-- 
cgit v1.1


From 39b743c6199a317ffac67fcae1dd05be3142633a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 20:08:56 -0400
Subject: switch stat_file() to passing a single struct rather than fsckloads
 of pointers

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs.h      | 20 +++++++++----
 fs/hostfs/hostfs_kern.c | 31 ++++++++------------
 fs/hostfs/hostfs_user.c | 75 ++++++++++++++++++++-----------------------------
 3 files changed, 58 insertions(+), 68 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 2f34f8f..3a52ede 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -53,11 +53,21 @@ struct hostfs_iattr {
 	struct timespec	ia_ctime;
 };
 
-extern int stat_file(const char *path, unsigned long long *inode_out,
-		     int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
-		     unsigned long long *size_out, struct timespec *atime_out,
-		     struct timespec *mtime_out, struct timespec *ctime_out,
-		     int *blksize_out, unsigned long long *blocks_out, int fd);
+struct hostfs_stat {
+	unsigned long long ino;
+	unsigned int mode;
+	unsigned int nlink;
+	unsigned int uid;
+	unsigned int gid;
+	unsigned long long size;
+	struct timespec atime, mtime, ctime;
+	unsigned int blksize;
+	unsigned long long blocks;
+	unsigned int maj;
+	unsigned int min;
+};
+
+extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
 extern int access_file(char *path, int r, int w, int x);
 extern int open_file(char *path, int r, int w, int append);
 extern int file_type(const char *path, int *maj, int *min);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5a77ed3..420a826 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -131,28 +131,21 @@ static char *inode_name(struct inode *ino, int extra)
 
 static int read_name(struct inode *ino, char *name)
 {
-	/*
-	 * The non-int inode fields are copied into ints by stat_file and
-	 * then copied into the inode because passing the actual pointers
-	 * in and having them treated as int * breaks on big-endian machines
-	 */
-	int err;
-	int i_mode, i_nlink, i_blksize;
-	unsigned long long i_size;
-	unsigned long long i_ino;
-	unsigned long long i_blocks;
-
-	err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
-			&ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
-			&ino->i_ctime, &i_blksize, &i_blocks, -1);
+	struct hostfs_stat st;
+	int err = stat_file(name, &st, -1);
 	if (err)
 		return err;
 
-	ino->i_ino = i_ino;
-	ino->i_mode = i_mode;
-	ino->i_nlink = i_nlink;
-	ino->i_size = i_size;
-	ino->i_blocks = i_blocks;
+	ino->i_ino = st.ino;
+	ino->i_mode = st.mode;
+	ino->i_nlink = st.nlink;
+	ino->i_uid = st.uid;
+	ino->i_gid = st.gid;
+	ino->i_atime = st.atime;
+	ino->i_mtime = st.mtime;
+	ino->i_ctime = st.ctime;
+	ino->i_size = st.size;
+	ino->i_blocks = st.blocks;
 	return 0;
 }
 
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 4b8c666..701d454 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -19,11 +19,27 @@
 #include "user.h"
 #include <utime.h>
 
-int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
-	      int *nlink_out, int *uid_out, int *gid_out,
-	      unsigned long long *size_out, struct timespec *atime_out,
-	      struct timespec *mtime_out, struct timespec *ctime_out,
-	      int *blksize_out, unsigned long long *blocks_out, int fd)
+static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
+{
+	p->ino = buf->st_ino;
+	p->mode = buf->st_mode;
+	p->nlink = buf->st_nlink;
+	p->uid = buf->st_uid;
+	p->gid = buf->st_gid;
+	p->size = buf->st_size;
+	p->atime.tv_sec = buf->st_atime;
+	p->atime.tv_nsec = 0;
+	p->ctime.tv_sec = buf->st_ctime;
+	p->ctime.tv_nsec = 0;
+	p->mtime.tv_sec = buf->st_mtime;
+	p->mtime.tv_nsec = 0;
+	p->blksize = buf->st_blksize;
+	p->blocks = buf->st_blocks;
+	p->maj = os_major(buf->st_rdev);
+	p->min = os_minor(buf->st_rdev);
+}
+
+int stat_file(const char *path, struct hostfs_stat *p, int fd)
 {
 	struct stat64 buf;
 
@@ -33,35 +49,7 @@ int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
 	} else if (lstat64(path, &buf) < 0) {
 		return -errno;
 	}
-
-	if (inode_out != NULL)
-		*inode_out = buf.st_ino;
-	if (mode_out != NULL)
-		*mode_out = buf.st_mode;
-	if (nlink_out != NULL)
-		*nlink_out = buf.st_nlink;
-	if (uid_out != NULL)
-		*uid_out = buf.st_uid;
-	if (gid_out != NULL)
-		*gid_out = buf.st_gid;
-	if (size_out != NULL)
-		*size_out = buf.st_size;
-	if (atime_out != NULL) {
-		atime_out->tv_sec = buf.st_atime;
-		atime_out->tv_nsec = 0;
-	}
-	if (mtime_out != NULL) {
-		mtime_out->tv_sec = buf.st_mtime;
-		mtime_out->tv_nsec = 0;
-	}
-	if (ctime_out != NULL) {
-		ctime_out->tv_sec = buf.st_ctime;
-		ctime_out->tv_nsec = 0;
-	}
-	if (blksize_out != NULL)
-		*blksize_out = buf.st_blksize;
-	if (blocks_out != NULL)
-		*blocks_out = buf.st_blocks;
+	stat64_to_hostfs(&buf, p);
 	return 0;
 }
 
@@ -235,8 +223,8 @@ int file_create(char *name, int ur, int uw, int ux, int gr,
 
 int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 {
+	struct hostfs_stat st;
 	struct timeval times[2];
-	struct timespec atime_ts, mtime_ts;
 	int err, ma;
 
 	if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
@@ -279,15 +267,14 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 	 */
 	ma = (HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET);
 	if (attrs->ia_valid & ma) {
-		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
-				&atime_ts, &mtime_ts, NULL, NULL, NULL, fd);
+		err = stat_file(file, &st, fd);
 		if (err != 0)
 			return err;
 
-		times[0].tv_sec = atime_ts.tv_sec;
-		times[0].tv_usec = atime_ts.tv_nsec / 1000;
-		times[1].tv_sec = mtime_ts.tv_sec;
-		times[1].tv_usec = mtime_ts.tv_nsec / 1000;
+		times[0].tv_sec = st.atime.tv_sec;
+		times[0].tv_usec = st.atime.tv_nsec / 1000;
+		times[1].tv_sec = st.mtime.tv_sec;
+		times[1].tv_usec = st.mtime.tv_nsec / 1000;
 
 		if (attrs->ia_valid & HOSTFS_ATTR_ATIME_SET) {
 			times[0].tv_sec = attrs->ia_atime.tv_sec;
@@ -308,9 +295,9 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 
 	/* Note: ctime is not handled */
 	if (attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)) {
-		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
-				&attrs->ia_atime, &attrs->ia_mtime, NULL,
-				NULL, NULL, fd);
+		err = stat_file(file, &st, fd);
+		attrs->ia_atime = st.atime;
+		attrs->ia_mtime = st.mtime;
 		if (err != 0)
 			return err;
 	}
-- 
cgit v1.1


From 4754b825571a6f2f7655245e420e8e486c4458f6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 20:33:12 -0400
Subject: hostfs: get rid of file_type(), fold init_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs.h      |   1 -
 fs/hostfs/hostfs_kern.c | 107 ++++++++++++++++++++----------------------------
 fs/hostfs/hostfs_user.c |  30 --------------
 3 files changed, 45 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 3a52ede..ea87e22 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -70,7 +70,6 @@ struct hostfs_stat {
 extern int stat_file(const char *path, struct hostfs_stat *p, int fd);
 extern int access_file(char *path, int r, int w, int x);
 extern int open_file(char *path, int r, int w, int append);
-extern int file_type(const char *path, int *maj, int *min);
 extern void *open_dir(char *path, int *err_out);
 extern char *read_dir(void *stream, unsigned long long *pos,
 		      unsigned long long *ino_out, int *len_out);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 420a826..b29a2b8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -129,26 +129,6 @@ static char *inode_name(struct inode *ino, int extra)
 	return dentry_name(dentry, extra);
 }
 
-static int read_name(struct inode *ino, char *name)
-{
-	struct hostfs_stat st;
-	int err = stat_file(name, &st, -1);
-	if (err)
-		return err;
-
-	ino->i_ino = st.ino;
-	ino->i_mode = st.mode;
-	ino->i_nlink = st.nlink;
-	ino->i_uid = st.uid;
-	ino->i_gid = st.gid;
-	ino->i_atime = st.atime;
-	ino->i_mtime = st.mtime;
-	ino->i_ctime = st.ctime;
-	ino->i_size = st.size;
-	ino->i_blocks = st.blocks;
-	return 0;
-}
-
 static char *follow_link(char *link)
 {
 	int len, n;
@@ -478,43 +458,51 @@ static const struct address_space_operations hostfs_aops = {
 	.write_end	= hostfs_write_end,
 };
 
-static void init_inode(struct inode *inode, char *path)
+static int read_name(struct inode *ino, char *name)
 {
-	int type;
-	int maj, min;
-	dev_t rdev = 0;
+	dev_t rdev;
+	struct hostfs_stat st;
+	int err = stat_file(name, &st, -1);
+	if (err)
+		return err;
 
-	type = file_type(path, &maj, &min);
 	/* Reencode maj and min with the kernel encoding.*/
-	rdev = MKDEV(maj, min);
+	rdev = MKDEV(st.maj, st.min);
 
-	if (type == OS_TYPE_SYMLINK)
-		inode->i_op = &page_symlink_inode_operations;
-	else if (type == OS_TYPE_DIR)
-		inode->i_op = &hostfs_dir_iops;
-	else inode->i_op = &hostfs_iops;
-
-	if (type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
-	else inode->i_fop = &hostfs_file_fops;
-
-	if (type == OS_TYPE_SYMLINK)
-		inode->i_mapping->a_ops = &hostfs_link_aops;
-	else inode->i_mapping->a_ops = &hostfs_aops;
-
-	switch (type) {
-	case OS_TYPE_CHARDEV:
-		init_special_inode(inode, S_IFCHR, rdev);
-		break;
-	case OS_TYPE_BLOCKDEV:
-		init_special_inode(inode, S_IFBLK, rdev);
+	switch (st.mode & S_IFMT) {
+	case S_IFLNK:
+		ino->i_op = &page_symlink_inode_operations;
+		ino->i_mapping->a_ops = &hostfs_link_aops;
 		break;
-	case OS_TYPE_FIFO:
-		init_special_inode(inode, S_IFIFO, 0);
+	case S_IFDIR:
+		ino->i_op = &hostfs_dir_iops;
+		ino->i_fop = &hostfs_dir_fops;
 		break;
-	case OS_TYPE_SOCK:
-		init_special_inode(inode, S_IFSOCK, 0);
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		init_special_inode(ino, st.mode & S_IFMT, rdev);
+		ino->i_op = &hostfs_iops;
 		break;
+
+	default:
+		ino->i_op = &hostfs_iops;
+		ino->i_fop = &hostfs_file_fops;
+		ino->i_mapping->a_ops = &hostfs_aops;
 	}
+
+	ino->i_ino = st.ino;
+	ino->i_mode = st.mode;
+	ino->i_nlink = st.nlink;
+	ino->i_uid = st.uid;
+	ino->i_gid = st.gid;
+	ino->i_atime = st.atime;
+	ino->i_mtime = st.mtime;
+	ino->i_ctime = st.ctime;
+	ino->i_size = st.size;
+	ino->i_blocks = st.blocks;
+	return 0;
 }
 
 int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -539,12 +527,10 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 			 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
 			 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
 			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
-	if (fd < 0) {
+	if (fd < 0)
 		error = fd;
-	} else {
+	else
 		error = read_name(inode, name);
-		init_inode(inode, name);
-	}
 
 	kfree(name);
 	if (error)
@@ -580,7 +566,6 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 		goto out_put;
 
 	err = read_name(inode, name);
-	init_inode(inode, name);
 
 	kfree(name);
 	if (err == -ENOENT) {
@@ -707,7 +692,6 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 		goto out_free;
 
 	err = read_name(inode, name);
-	init_inode(inode, name);
 	if (err)
 		goto out_put;
 	kfree(name);
@@ -922,21 +906,20 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 	if (!root_inode)
 		goto out;
 
-	root_inode->i_op = &hostfs_dir_iops;
-	root_inode->i_fop = &hostfs_dir_fops;
+	err = read_name(root_inode, host_root_path);
+	if (err)
+		goto out_put;
 
-	if (file_type(host_root_path, NULL, NULL) == OS_TYPE_SYMLINK) {
+	if (S_ISLNK(root_inode->i_mode)) {
 		char *name = follow_link(host_root_path);
 		if (IS_ERR(name))
 			err = PTR_ERR(name);
 		else
 			err = read_name(root_inode, name);
 		kfree(name);
-	} else {
-		err = read_name(root_inode, host_root_path);
+		if (err)
+			goto out_put;
 	}
-	if (err)
-		goto out_put;
 
 	err = -ENOMEM;
 	sb->s_root = d_alloc_root(root_inode);
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 701d454..91ebfce 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -53,36 +53,6 @@ int stat_file(const char *path, struct hostfs_stat *p, int fd)
 	return 0;
 }
 
-int file_type(const char *path, int *maj, int *min)
-{
- 	struct stat64 buf;
-
-	if (lstat64(path, &buf) < 0)
-		return -errno;
-	/*
-	 * We cannot pass rdev as is because glibc and the kernel disagree
-	 * about its definition.
-	 */
-	if (maj != NULL)
-		*maj = os_major(buf.st_rdev);
-	if (min != NULL)
-		*min = os_minor(buf.st_rdev);
-
-	if (S_ISDIR(buf.st_mode))
-		return OS_TYPE_DIR;
-	else if (S_ISLNK(buf.st_mode))
-		return OS_TYPE_SYMLINK;
-	else if (S_ISCHR(buf.st_mode))
-		return OS_TYPE_CHARDEV;
-	else if (S_ISBLK(buf.st_mode))
-		return OS_TYPE_BLOCKDEV;
-	else if (S_ISFIFO(buf.st_mode))
-		return OS_TYPE_FIFO;
-	else if (S_ISSOCK(buf.st_mode))
-		return OS_TYPE_SOCK;
-	else return OS_TYPE_FILE;
-}
-
 int access_file(char *path, int r, int w, int x)
 {
 	int mode = 0;
-- 
cgit v1.1


From c5322220eb91b9e56ac7b69eb690d9d20fac5725 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 20:42:10 -0400
Subject: hostfs: get rid of inode_dentry_name()

it's equivalent to dentry_name() anyway

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 55 ++++++++++++++++++-------------------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index b29a2b8..3841fb1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -89,7 +89,7 @@ __uml_setup("hostfs=", hostfs_args,
 );
 #endif
 
-static char *dentry_name(struct dentry *dentry, int extra)
+static char *dentry_name(struct dentry *dentry)
 {
 	struct dentry *parent;
 	char *root, *name;
@@ -104,7 +104,7 @@ static char *dentry_name(struct dentry *dentry, int extra)
 
 	root = parent->d_sb->s_fs_info;
 	len += strlen(root);
-	name = kmalloc(len + extra + 1, GFP_KERNEL);
+	name = kmalloc(len + 1, GFP_KERNEL);
 	if (name == NULL)
 		return NULL;
 
@@ -121,12 +121,12 @@ static char *dentry_name(struct dentry *dentry, int extra)
 	return name;
 }
 
-static char *inode_name(struct inode *ino, int extra)
+static char *inode_name(struct inode *ino)
 {
 	struct dentry *dentry;
 
 	dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
-	return dentry_name(dentry, extra);
+	return dentry_name(dentry);
 }
 
 static char *follow_link(char *link)
@@ -267,7 +267,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	unsigned long long next, ino;
 	int error, len;
 
-	name = dentry_name(file->f_path.dentry, 0);
+	name = dentry_name(file->f_path.dentry);
 	if (name == NULL)
 		return -ENOMEM;
 	dir = open_dir(name, &error);
@@ -312,7 +312,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
 	if (w)
 		r = 1;
 
-	name = dentry_name(file->f_path.dentry, 0);
+	name = dentry_name(file->f_path.dentry);
 	if (name == NULL)
 		return -ENOMEM;
 
@@ -519,7 +519,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	}
 
 	error = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
@@ -561,7 +561,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 	}
 
 	err = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
@@ -585,29 +585,14 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 	return ERR_PTR(err);
 }
 
-static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
-{
-	char *file;
-	int len;
-
-	file = inode_name(ino, dentry->d_name.len + 1);
-	if (file == NULL)
-		return NULL;
-	strcat(file, "/");
-	len = strlen(file);
-	strncat(file, dentry->d_name.name, dentry->d_name.len);
-	file[len + dentry->d_name.len] = '\0';
-	return file;
-}
-
 int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
 {
 	char *from_name, *to_name;
 	int err;
 
-	if ((from_name = inode_dentry_name(ino, from)) == NULL)
+	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
-	to_name = dentry_name(to, 0);
+	to_name = dentry_name(to);
 	if (to_name == NULL) {
 		kfree(from_name);
 		return -ENOMEM;
@@ -623,7 +608,7 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	if (append)
 		return -EPERM;
@@ -638,7 +623,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = make_symlink(file, to);
 	kfree(file);
@@ -650,7 +635,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_mkdir(file, mode);
 	kfree(file);
@@ -662,7 +647,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if ((file = inode_dentry_name(ino, dentry)) == NULL)
+	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_rmdir(file);
 	kfree(file);
@@ -682,7 +667,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	}
 
 	err = -ENOMEM;
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		goto out_put;
 
@@ -715,9 +700,9 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	char *from_name, *to_name;
 	int err;
 
-	if ((from_name = inode_dentry_name(from_ino, from)) == NULL)
+	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
-	if ((to_name = inode_dentry_name(to_ino, to)) == NULL) {
+	if ((to_name = dentry_name(to)) == NULL) {
 		kfree(from_name);
 		return -ENOMEM;
 	}
@@ -735,7 +720,7 @@ int hostfs_permission(struct inode *ino, int desired)
 	if (desired & MAY_READ) r = 1;
 	if (desired & MAY_WRITE) w = 1;
 	if (desired & MAY_EXEC) x = 1;
-	name = inode_name(ino, 0);
+	name = inode_name(ino);
 	if (name == NULL)
 		return -ENOMEM;
 
@@ -801,7 +786,7 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (attr->ia_valid & ATTR_MTIME_SET) {
 		attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
 	}
-	name = dentry_name(dentry, 0);
+	name = dentry_name(dentry);
 	if (name == NULL)
 		return -ENOMEM;
 	err = set_attr(name, &attrs, fd);
@@ -856,7 +841,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
 	int err;
 
 	buffer = kmap(page);
-	name = inode_name(page->mapping->host, 0);
+	name = inode_name(page->mapping->host);
 	if (name == NULL)
 		return -ENOMEM;
 	err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE);
-- 
cgit v1.1


From d0352d3ed722b134dacc21836c1763e7e3523662 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 21:51:16 -0400
Subject: hostfs: sanitize symlinks

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 61 ++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 3841fb1..10bb71b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/namei.h>
 #include "hostfs.h"
 #include "init.h"
 #include "kern.h"
@@ -48,7 +49,7 @@ static int append = 0;
 
 static const struct inode_operations hostfs_iops;
 static const struct inode_operations hostfs_dir_iops;
-static const struct address_space_operations hostfs_link_aops;
+static const struct inode_operations hostfs_link_iops;
 
 #ifndef MODULE
 static int __init hostfs_args(char *options, int *add)
@@ -471,8 +472,7 @@ static int read_name(struct inode *ino, char *name)
 
 	switch (st.mode & S_IFMT) {
 	case S_IFLNK:
-		ino->i_op = &page_symlink_inode_operations;
-		ino->i_mapping->a_ops = &hostfs_link_aops;
+		ino->i_op = &hostfs_link_iops;
 		break;
 	case S_IFDIR:
 		ino->i_op = &hostfs_dir_iops;
@@ -835,32 +835,41 @@ static const struct inode_operations hostfs_dir_iops = {
 	.setattr	= hostfs_setattr,
 };
 
-int hostfs_link_readpage(struct file *file, struct page *page)
-{
-	char *buffer, *name;
-	int err;
-
-	buffer = kmap(page);
-	name = inode_name(page->mapping->host);
-	if (name == NULL)
-		return -ENOMEM;
-	err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE);
-	kfree(name);
-	if (err == PAGE_CACHE_SIZE)
-		err = -E2BIG;
-	else if (err > 0) {
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-		if (PageError(page)) ClearPageError(page);
-		err = 0;
+static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *link = __getname();
+	if (link) {
+		char *path = dentry_name(dentry);
+		int err = -ENOMEM;
+		if (path) {
+			int err = hostfs_do_readlink(path, link, PATH_MAX);
+			if (err == PATH_MAX)
+				err = -E2BIG;
+			kfree(path);
+		}
+		if (err < 0) {
+			__putname(link);
+			link = ERR_PTR(err);
+		}
+	} else {
+		link = ERR_PTR(-ENOMEM);
 	}
-	kunmap(page);
-	unlock_page(page);
-	return err;
+
+	nd_set_link(nd, link);
+	return NULL;
+}
+
+static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+{
+	char *s = nd_get_link(nd);
+	if (!IS_ERR(s))
+		__putname(s);
 }
 
-static const struct address_space_operations hostfs_link_aops = {
-	.readpage	= hostfs_link_readpage,
+static const struct inode_operations hostfs_link_iops = {
+	.readlink	= generic_readlink,
+	.follow_link	= hostfs_follow_link,
+	.put_link	= hostfs_put_link,
 };
 
 static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
-- 
cgit v1.1


From c103135c14e03fc9a9e5f0adc01df9ad272cf2a1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 22:31:14 -0400
Subject: new helper: __dentry_path()

builds path relative to fs root, called under dcache_lock,
doesn't append any nonsense to unlinked ones.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 86d4db1..caf0857 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2049,16 +2049,12 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
 /*
  * Write full pathname from the root of the filesystem into the buffer.
  */
-char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
 	char *end = buf + buflen;
 	char *retval;
 
-	spin_lock(&dcache_lock);
 	prepend(&end, &buflen, "\0", 1);
-	if (d_unlinked(dentry) &&
-		(prepend(&end, &buflen, "//deleted", 9) != 0))
-			goto Elong;
 	if (buflen < 1)
 		goto Elong;
 	/* Get '/' right */
@@ -2076,7 +2072,28 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 		retval = end;
 		dentry = parent;
 	}
+	return retval;
+Elong:
+	return ERR_PTR(-ENAMETOOLONG);
+}
+EXPORT_SYMBOL(__dentry_path);
+
+char *dentry_path(struct dentry *dentry, char *buf, int buflen)
+{
+	char *p = NULL;
+	char *retval;
+
+	spin_lock(&dcache_lock);
+	if (d_unlinked(dentry)) {
+		p = buf + buflen;
+		if (prepend(&p, &buflen, "//deleted", 10) != 0)
+			goto Elong;
+		buflen++;
+	}
+	retval = __dentry_path(dentry, buf, buflen);
 	spin_unlock(&dcache_lock);
+	if (!IS_ERR(retval) && p)
+		*p = '/';	/* restore '/' overriden with '\0' */
 	return retval;
 Elong:
 	spin_unlock(&dcache_lock);
-- 
cgit v1.1


From e9193059b1b3733695d5b80e667778311695aa73 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 23:16:34 -0400
Subject: hostfs: fix races in dentry_name() and inode_name()

calculating size, then doing allocation, then filling the
path is a Bad Idea(tm), since the ancestors can be renamed,
leading to buffer overrun.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 106 +++++++++++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 10bb71b..79783a0 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -90,44 +90,58 @@ __uml_setup("hostfs=", hostfs_args,
 );
 #endif
 
-static char *dentry_name(struct dentry *dentry)
+static char *__dentry_name(struct dentry *dentry, char *name)
 {
-	struct dentry *parent;
-	char *root, *name;
-	int len;
+	char *p = __dentry_path(dentry, name, PATH_MAX);
+	char *root;
+	size_t len;
 
-	len = 0;
-	parent = dentry;
-	while (parent->d_parent != parent) {
-		len += parent->d_name.len + 1;
-		parent = parent->d_parent;
-	}
+	spin_unlock(&dcache_lock);
 
-	root = parent->d_sb->s_fs_info;
-	len += strlen(root);
-	name = kmalloc(len + 1, GFP_KERNEL);
-	if (name == NULL)
+	root = dentry->d_sb->s_fs_info;
+	len = strlen(root);
+	if (IS_ERR(p)) {
+		__putname(name);
 		return NULL;
-
-	name[len] = '\0';
-	parent = dentry;
-	while (parent->d_parent != parent) {
-		len -= parent->d_name.len + 1;
-		name[len] = '/';
-		strncpy(&name[len + 1], parent->d_name.name,
-			parent->d_name.len);
-		parent = parent->d_parent;
 	}
-	strncpy(name, root, strlen(root));
+	strncpy(name, root, PATH_MAX);
+	if (len > p - name) {
+		__putname(name);
+		return NULL;
+	}
+	if (p > name + len) {
+		char *s = name + len;
+		while ((*s++ = *p++) != '\0')
+			;
+	}
 	return name;
 }
 
+static char *dentry_name(struct dentry *dentry)
+{
+	char *name = __getname();
+	if (!name)
+		return NULL;
+
+	spin_lock(&dcache_lock);
+	return __dentry_name(dentry, name); /* will unlock */
+}
+
 static char *inode_name(struct inode *ino)
 {
 	struct dentry *dentry;
+	char *name = __getname();
+	if (!name)
+		return NULL;
 
-	dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
-	return dentry_name(dentry);
+	spin_lock(&dcache_lock);
+	if (list_empty(&ino->i_dentry)) {
+		spin_unlock(&dcache_lock);
+		__putname(name);
+		return NULL;
+	}
+	dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias);
+	return __dentry_name(dentry, name); /* will unlock */
 }
 
 static char *follow_link(char *link)
@@ -272,7 +286,7 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 	if (name == NULL)
 		return -ENOMEM;
 	dir = open_dir(name, &error);
-	kfree(name);
+	__putname(name);
 	if (dir == NULL)
 		return -error;
 	next = file->f_pos;
@@ -318,7 +332,7 @@ int hostfs_file_open(struct inode *ino, struct file *file)
 		return -ENOMEM;
 
 	fd = open_file(name, r, w, append);
-	kfree(name);
+	__putname(name);
 	if (fd < 0)
 		return fd;
 	FILE_HOSTFS_I(file)->fd = fd;
@@ -532,7 +546,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	else
 		error = read_name(inode, name);
 
-	kfree(name);
+	__putname(name);
 	if (error)
 		goto out_put;
 
@@ -567,7 +581,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
 
 	err = read_name(inode, name);
 
-	kfree(name);
+	__putname(name);
 	if (err == -ENOENT) {
 		iput(inode);
 		inode = NULL;
@@ -594,12 +608,12 @@ int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
 		return -ENOMEM;
 	to_name = dentry_name(to);
 	if (to_name == NULL) {
-		kfree(from_name);
+		__putname(from_name);
 		return -ENOMEM;
 	}
 	err = link_file(to_name, from_name);
-	kfree(from_name);
-	kfree(to_name);
+	__putname(from_name);
+	__putname(to_name);
 	return err;
 }
 
@@ -614,7 +628,7 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
 		return -EPERM;
 
 	err = unlink_file(file);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -626,7 +640,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
 	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = make_symlink(file, to);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -638,7 +652,7 @@ int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
 	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_mkdir(file, mode);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -650,7 +664,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
 	if ((file = dentry_name(dentry)) == NULL)
 		return -ENOMEM;
 	err = do_rmdir(file);
-	kfree(file);
+	__putname(file);
 	return err;
 }
 
@@ -673,13 +687,13 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 
 	init_special_inode(inode, mode, dev);
 	err = do_mknod(name, mode, MAJOR(dev), MINOR(dev));
-	if (err)
+	if (!err)
 		goto out_free;
 
 	err = read_name(inode, name);
+	__putname(name);
 	if (err)
 		goto out_put;
-	kfree(name);
 	if (err)
 		goto out_put;
 
@@ -687,7 +701,7 @@ int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return 0;
 
  out_free:
-	kfree(name);
+	__putname(name);
  out_put:
 	iput(inode);
  out:
@@ -703,12 +717,12 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
 	if ((from_name = dentry_name(from)) == NULL)
 		return -ENOMEM;
 	if ((to_name = dentry_name(to)) == NULL) {
-		kfree(from_name);
+		__putname(from_name);
 		return -ENOMEM;
 	}
 	err = rename_file(from_name, to_name);
-	kfree(from_name);
-	kfree(to_name);
+	__putname(from_name);
+	__putname(to_name);
 	return err;
 }
 
@@ -729,7 +743,7 @@ int hostfs_permission(struct inode *ino, int desired)
 		err = 0;
 	else
 		err = access_file(name, r, w, x);
-	kfree(name);
+	__putname(name);
 	if (!err)
 		err = generic_permission(ino, desired, NULL);
 	return err;
@@ -790,7 +804,7 @@ int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (name == NULL)
 		return -ENOMEM;
 	err = set_attr(name, &attrs, fd);
-	kfree(name);
+	__putname(name);
 	if (err)
 		return err;
 
@@ -845,7 +859,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 			int err = hostfs_do_readlink(path, link, PATH_MAX);
 			if (err == PATH_MAX)
 				err = -E2BIG;
-			kfree(path);
+			__putname(path);
 		}
 		if (err < 0) {
 			__putname(link);
-- 
cgit v1.1


From f8d7e1877e5121841bc9a4d284a04dbc13f45bea Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 23:19:04 -0400
Subject: leak in hostfs_unlink()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 79783a0..8130ce9 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -622,11 +622,12 @@ int hostfs_unlink(struct inode *ino, struct dentry *dentry)
 	char *file;
 	int err;
 
-	if ((file = dentry_name(dentry)) == NULL)
-		return -ENOMEM;
 	if (append)
 		return -EPERM;
 
+	if ((file = dentry_name(dentry)) == NULL)
+		return -ENOMEM;
+
 	err = unlink_file(file);
 	__putname(file);
 	return err;
-- 
cgit v1.1


From f8ad850f11e11d10e7de1a16ca53cb193afc9313 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Jun 2010 23:49:18 -0400
Subject: try to get rid of races in hostfs open()

In case of mode mismatch, do *not* blindly close the descriptor
another openers might be using right now.  Open the underlying
file with currently sufficient mode, then
	* if current mode has grown so that it's sufficient for
us now, just close our new fd
	* if current mode has grown and our fd is *not* enough
to cover it, close and repeat.
	* otherwise, install our fd if the file hadn't been
opened at all or dup2() our fd over the current one (and close
our fd).
Critical section is protected by mutex; yes, system-wide.  All
we do under it is a bunch of comparison and maybe an overwriting
dup2() on host.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs.h      |  1 +
 fs/hostfs/hostfs_kern.c | 43 +++++++++++++++++++++++++++++++------------
 fs/hostfs/hostfs_user.c |  5 +++++
 3 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index ea87e22..6bbd75c 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -74,6 +74,7 @@ extern void *open_dir(char *path, int *err_out);
 extern char *read_dir(void *stream, unsigned long long *pos,
 		      unsigned long long *ino_out, int *len_out);
 extern void close_file(void *stream);
+extern int replace_file(int oldfd, int fd);
 extern void close_dir(void *stream);
 extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
 extern int write_file(int fd, unsigned long long *offset, const char *buf,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8130ce9..dd1e555 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -302,27 +302,22 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
 
 int hostfs_file_open(struct inode *ino, struct file *file)
 {
+	static DEFINE_MUTEX(open_mutex);
 	char *name;
 	fmode_t mode = 0;
+	int err;
 	int r = 0, w = 0, fd;
 
 	mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
 	if ((mode & HOSTFS_I(ino)->mode) == mode)
 		return 0;
 
-	/*
-	 * The file may already have been opened, but with the wrong access,
-	 * so this resets things and reopens the file with the new access.
-	 */
-	if (HOSTFS_I(ino)->fd != -1) {
-		close_file(&HOSTFS_I(ino)->fd);
-		HOSTFS_I(ino)->fd = -1;
-	}
+	mode |= HOSTFS_I(ino)->mode;
 
-	HOSTFS_I(ino)->mode |= mode;
-	if (HOSTFS_I(ino)->mode & FMODE_READ)
+retry:
+	if (mode & FMODE_READ)
 		r = 1;
-	if (HOSTFS_I(ino)->mode & FMODE_WRITE)
+	if (mode & FMODE_WRITE)
 		w = 1;
 	if (w)
 		r = 1;
@@ -335,7 +330,31 @@ int hostfs_file_open(struct inode *ino, struct file *file)
 	__putname(name);
 	if (fd < 0)
 		return fd;
-	FILE_HOSTFS_I(file)->fd = fd;
+
+	mutex_lock(&open_mutex);
+	/* somebody else had handled it first? */
+	if ((mode & HOSTFS_I(ino)->mode) == mode) {
+		mutex_unlock(&open_mutex);
+		return 0;
+	}
+	if ((mode | HOSTFS_I(ino)->mode) != mode) {
+		mode |= HOSTFS_I(ino)->mode;
+		mutex_unlock(&open_mutex);
+		close_file(&fd);
+		goto retry;
+	}
+	if (HOSTFS_I(ino)->fd == -1) {
+		HOSTFS_I(ino)->fd = fd;
+	} else {
+		err = replace_file(fd, HOSTFS_I(ino)->fd);
+		close_file(&fd);
+		if (err < 0) {
+			mutex_unlock(&open_mutex);
+			return err;
+		}
+	}
+	HOSTFS_I(ino)->mode = mode;
+	mutex_unlock(&open_mutex);
 
 	return 0;
 }
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 91ebfce..6777aa0 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -160,6 +160,11 @@ int fsync_file(int fd, int datasync)
 	return 0;
 }
 
+int replace_file(int oldfd, int fd)
+{
+	return dup2(oldfd, fd);
+}
+
 void close_file(void *stream)
 {
 	close(*((int *) stream));
-- 
cgit v1.1


From 33b0daaa5557e9dadf4c27407fae7d316bab5686 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:12:50 -0400
Subject: switch hppfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hppfs/hppfs.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 943ce75..7b02772 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -624,12 +624,11 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb)
 	return &hi->vfs_inode;
 }
 
-void hppfs_delete_inode(struct inode *ino)
+void hppfs_evict_inode(struct inode *ino)
 {
+	end_writeback(ino);
 	dput(HPPFS_I(ino)->proc_dentry);
 	mntput(ino->i_sb->s_fs_info);
-
-	clear_inode(ino);
 }
 
 static void hppfs_destroy_inode(struct inode *inode)
@@ -640,7 +639,7 @@ static void hppfs_destroy_inode(struct inode *inode)
 static const struct super_operations hppfs_sbops = {
 	.alloc_inode	= hppfs_alloc_inode,
 	.destroy_inode	= hppfs_destroy_inode,
-	.delete_inode	= hppfs_delete_inode,
+	.evict_inode	= hppfs_evict_inode,
 	.statfs		= hppfs_statfs,
 };
 
-- 
cgit v1.1


From ea544009206baa03d606161656618900260b48e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:18:40 -0400
Subject: switch hpfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hpfs/hpfs_fn.h |  2 +-
 fs/hpfs/inode.c   | 12 +++++++-----
 fs/hpfs/super.c   |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 75f9d43..b59eac0 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -281,7 +281,7 @@ void hpfs_write_inode(struct inode *);
 void hpfs_write_inode_nolock(struct inode *);
 int hpfs_setattr(struct dentry *, struct iattr *);
 void hpfs_write_if_changed(struct inode *);
-void hpfs_delete_inode(struct inode *);
+void hpfs_evict_inode(struct inode *);
 
 /* map.c */
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 3f3b397..56f0da1 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -302,11 +302,13 @@ void hpfs_write_if_changed(struct inode *inode)
 		hpfs_write_inode(inode);
 }
 
-void hpfs_delete_inode(struct inode *inode)
+void hpfs_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
-	lock_kernel();
-	hpfs_remove_fnode(inode->i_sb, inode->i_ino);
-	unlock_kernel();
-	clear_inode(inode);
+	end_writeback(inode);
+	if (!inode->i_nlink) {
+		lock_kernel();
+		hpfs_remove_fnode(inode->i_sb, inode->i_ino);
+		unlock_kernel();
+	}
 }
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index aa53842..2607010 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -450,7 +450,7 @@ static const struct super_operations hpfs_sops =
 {
 	.alloc_inode	= hpfs_alloc_inode,
 	.destroy_inode	= hpfs_destroy_inode,
-	.delete_inode	= hpfs_delete_inode,
+	.evict_inode	= hpfs_evict_inode,
 	.put_super	= hpfs_put_super,
 	.statfs		= hpfs_statfs,
 	.remount_fs	= hpfs_remount_fs,
-- 
cgit v1.1


From 62aff86fdf18657d9eca7878654415f94f16d027 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:28:54 -0400
Subject: switch jfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/jfs/inode.c     | 35 ++++++++++++++++++-----------------
 fs/jfs/jfs_inode.h |  2 +-
 fs/jfs/super.c     |  8 +-------
 3 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index c38dc18..9978803 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -145,31 +145,32 @@ int jfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 		return 0;
 }
 
-void jfs_delete_inode(struct inode *inode)
+void jfs_evict_inode(struct inode *inode)
 {
-	jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
+	jfs_info("In jfs_evict_inode, inode = 0x%p", inode);
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
 		dquot_initialize(inode);
 
-	if (!is_bad_inode(inode) &&
-	    (JFS_IP(inode)->fileset == FILESYSTEM_I)) {
-		truncate_inode_pages(&inode->i_data, 0);
+		if (JFS_IP(inode)->fileset == FILESYSTEM_I) {
+			truncate_inode_pages(&inode->i_data, 0);
 
-		if (test_cflag(COMMIT_Freewmap, inode))
-			jfs_free_zero_link(inode);
+			if (test_cflag(COMMIT_Freewmap, inode))
+				jfs_free_zero_link(inode);
 
-		diFree(inode);
+			diFree(inode);
 
-		/*
-		 * Free the inode from the quota allocation.
-		 */
-		dquot_initialize(inode);
-		dquot_free_inode(inode);
-		dquot_drop(inode);
+			/*
+			 * Free the inode from the quota allocation.
+			 */
+			dquot_initialize(inode);
+			dquot_free_inode(inode);
+		}
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
 	}
-
-	clear_inode(inode);
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 void jfs_dirty_inode(struct inode *inode)
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 11042b1..155e91e 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -27,7 +27,7 @@ extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern struct inode *jfs_iget(struct super_block *, unsigned long);
 extern int jfs_commit_inode(struct inode *, int);
 extern int jfs_write_inode(struct inode *, struct writeback_control *);
-extern void jfs_delete_inode(struct inode *);
+extern void jfs_evict_inode(struct inode *);
 extern void jfs_dirty_inode(struct inode *);
 extern void jfs_truncate(struct inode *);
 extern void jfs_truncate_nolock(struct inode *, loff_t);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b38f96b..ec8c3e4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -132,11 +132,6 @@ static void jfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(jfs_inode_cachep, ji);
 }
 
-static void jfs_clear_inode(struct inode *inode)
-{
-	dquot_drop(inode);
-}
-
 static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
@@ -765,8 +760,7 @@ static const struct super_operations jfs_super_operations = {
 	.destroy_inode	= jfs_destroy_inode,
 	.dirty_inode	= jfs_dirty_inode,
 	.write_inode	= jfs_write_inode,
-	.delete_inode	= jfs_delete_inode,
-	.clear_inode	= jfs_clear_inode,
+	.evict_inode	= jfs_evict_inode,
 	.put_super	= jfs_put_super,
 	.sync_fs	= jfs_sync_fs,
 	.freeze_fs	= jfs_freeze,
-- 
cgit v1.1


From d640e1b50885b5beb61ccacdebf9f3f05ee2119c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:34:05 -0400
Subject: switch ubifs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ubifs/super.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 4d2f215..899066d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -327,7 +327,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return err;
 }
 
-static void ubifs_delete_inode(struct inode *inode)
+static void ubifs_evict_inode(struct inode *inode)
 {
 	int err;
 	struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -343,9 +343,12 @@ static void ubifs_delete_inode(struct inode *inode)
 
 	dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
 	ubifs_assert(!atomic_read(&inode->i_count));
-	ubifs_assert(inode->i_nlink == 0);
 
 	truncate_inode_pages(&inode->i_data, 0);
+
+	if (inode->i_nlink)
+		goto done;
+
 	if (is_bad_inode(inode))
 		goto out;
 
@@ -367,7 +370,8 @@ out:
 		c->nospace = c->nospace_rp = 0;
 		smp_wmb();
 	}
-	clear_inode(inode);
+done:
+	end_writeback(inode);
 }
 
 static void ubifs_dirty_inode(struct inode *inode)
@@ -1824,7 +1828,7 @@ const struct super_operations ubifs_super_operations = {
 	.destroy_inode = ubifs_destroy_inode,
 	.put_super     = ubifs_put_super,
 	.write_inode   = ubifs_write_inode,
-	.delete_inode  = ubifs_delete_inode,
+	.evict_inode   = ubifs_evict_inode,
 	.statfs        = ubifs_statfs,
 	.dirty_inode   = ubifs_dirty_inode,
 	.remount_fs    = ubifs_remount_fs,
-- 
cgit v1.1


From 3aac2b62e0f345c8a637cf94dc62e9000de9d8b6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:43:39 -0400
Subject: switch udf to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/udf/ialloc.c  |  2 --
 fs/udf/inode.c   | 48 +++++++++++++++++++-----------------------------
 fs/udf/super.c   |  3 +--
 fs/udf/udfdecl.h |  3 +--
 4 files changed, 21 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 18cd711..75d9304 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -31,8 +31,6 @@ void udf_free_inode(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 
-	clear_inode(inode);
-
 	mutex_lock(&sbi->s_alloc_mutex);
 	if (sbi->s_lvid_bh) {
 		struct logicalVolIntegrityDescImpUse *lvidiu =
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index ecddcc2..fc48f37 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -68,37 +68,23 @@ static void udf_update_extents(struct inode *,
 static int udf_get_block(struct inode *, sector_t, struct buffer_head *, int);
 
 
-void udf_delete_inode(struct inode *inode)
-{
-	truncate_inode_pages(&inode->i_data, 0);
-
-	if (is_bad_inode(inode))
-		goto no_delete;
-
-	inode->i_size = 0;
-	udf_truncate(inode);
-	lock_kernel();
-
-	udf_update_inode(inode, IS_SYNC(inode));
-	udf_free_inode(inode);
-
-	unlock_kernel();
-	return;
-
-no_delete:
-	clear_inode(inode);
-}
-
-/*
- * If we are going to release inode from memory, we truncate last inode extent
- * to proper length. We could use drop_inode() but it's called under inode_lock
- * and thus we cannot mark inode dirty there.  We use clear_inode() but we have
- * to make sure to write inode as it's not written automatically.
- */
-void udf_clear_inode(struct inode *inode)
+void udf_evict_inode(struct inode *inode)
 {
 	struct udf_inode_info *iinfo = UDF_I(inode);
+	int want_delete = 0;
 
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (!inode->i_nlink && !is_bad_inode(inode)) {
+		want_delete = 1;
+		inode->i_size = 0;
+		udf_truncate(inode);
+		lock_kernel();
+		udf_update_inode(inode, IS_SYNC(inode));
+		unlock_kernel();
+	}
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
 	    inode->i_size != iinfo->i_lenExtents) {
 		printk(KERN_WARNING "UDF-fs (%s): Inode %lu (mode %o) has "
@@ -108,9 +94,13 @@ void udf_clear_inode(struct inode *inode)
 			(unsigned long long)inode->i_size,
 			(unsigned long long)iinfo->i_lenExtents);
 	}
-
 	kfree(iinfo->i_ext.i_data);
 	iinfo->i_ext.i_data = NULL;
+	if (want_delete) {
+		lock_kernel();
+		udf_free_inode(inode);
+		unlock_kernel();
+	}
 }
 
 static int udf_writepage(struct page *page, struct writeback_control *wbc)
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 612d1e2..f9f4a9a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -175,8 +175,7 @@ static const struct super_operations udf_sb_ops = {
 	.alloc_inode	= udf_alloc_inode,
 	.destroy_inode	= udf_destroy_inode,
 	.write_inode	= udf_write_inode,
-	.delete_inode	= udf_delete_inode,
-	.clear_inode	= udf_clear_inode,
+	.evict_inode	= udf_evict_inode,
 	.put_super	= udf_put_super,
 	.sync_fs	= udf_sync_fs,
 	.statfs		= udf_statfs,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 2bac035..6995ab1 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -139,8 +139,7 @@ extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
 extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
 extern void udf_truncate(struct inode *);
 extern void udf_read_inode(struct inode *);
-extern void udf_delete_inode(struct inode *);
-extern void udf_clear_inode(struct inode *);
+extern void udf_evict_inode(struct inode *);
 extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
 extern long udf_block_map(struct inode *, sector_t);
 extern int udf_extend_file(struct inode *, struct extent_position *,
-- 
cgit v1.1


From 94ee8494ac84606f06d522a2c016d40aabffb378 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 00:45:56 -0400
Subject: switch ncpfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ncpfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index b4e8aaa..c043431 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -43,7 +43,7 @@
 #define NCP_DEFAULT_TIME_OUT 10
 #define NCP_DEFAULT_RETRY_COUNT 20
 
-static void ncp_delete_inode(struct inode *);
+static void ncp_evict_inode(struct inode *);
 static void ncp_put_super(struct super_block *);
 static int  ncp_statfs(struct dentry *, struct kstatfs *);
 static int  ncp_show_options(struct seq_file *, struct vfsmount *);
@@ -100,7 +100,7 @@ static const struct super_operations ncp_sops =
 	.alloc_inode	= ncp_alloc_inode,
 	.destroy_inode	= ncp_destroy_inode,
 	.drop_inode	= generic_delete_inode,
-	.delete_inode	= ncp_delete_inode,
+	.evict_inode	= ncp_evict_inode,
 	.put_super	= ncp_put_super,
 	.statfs		= ncp_statfs,
 	.remount_fs	= ncp_remount,
@@ -282,19 +282,19 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info)
 }
 
 static void
-ncp_delete_inode(struct inode *inode)
+ncp_evict_inode(struct inode *inode)
 {
 	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 
 	if (S_ISDIR(inode->i_mode)) {
-		DDPRINTK("ncp_delete_inode: put directory %ld\n", inode->i_ino);
+		DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
 	}
 
 	if (ncp_make_closed(inode) != 0) {
 		/* We can't do anything but complain. */
-		printk(KERN_ERR "ncp_delete_inode: could not close\n");
+		printk(KERN_ERR "ncp_evict_inode: could not close\n");
 	}
-	clear_inode(inode);
 }
 
 static void ncp_stop_tasks(struct ncp_server *server) {
-- 
cgit v1.1


From 066d92dcbfa5842d98f6c4c671220cef50a9720f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 8 Jun 2010 21:28:10 -0400
Subject: convert ocfs2 to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/inode.c | 21 ++++++++++++++-------
 fs/ocfs2/inode.h |  3 +--
 fs/ocfs2/super.c |  3 +--
 3 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index abb0a95..eb7fd07 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -969,7 +969,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
 	truncate_inode_pages(&inode->i_data, 0);
 }
 
-void ocfs2_delete_inode(struct inode *inode)
+static void ocfs2_delete_inode(struct inode *inode)
 {
 	int wipe, status;
 	sigset_t oldset;
@@ -1075,20 +1075,17 @@ bail_unlock_nfs_sync:
 bail_unblock:
 	ocfs2_unblock_signals(&oldset);
 bail:
-	clear_inode(inode);
 	mlog_exit_void();
 }
 
-void ocfs2_clear_inode(struct inode *inode)
+static void ocfs2_clear_inode(struct inode *inode)
 {
 	int status;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 
 	mlog_entry_void();
 
-	if (!inode)
-		goto bail;
-
+	end_writeback(inode);
 	mlog(0, "Clearing inode: %llu, nlink = %u\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_nlink);
 
@@ -1180,10 +1177,20 @@ void ocfs2_clear_inode(struct inode *inode)
 	jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
 				       &oi->ip_jinode);
 
-bail:
 	mlog_exit_void();
 }
 
+void ocfs2_evict_inode(struct inode *inode)
+{
+	if (!inode->i_nlink ||
+	    (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
+		ocfs2_delete_inode(inode);
+	} else {
+		truncate_inode_pages(&inode->i_data, 0);
+	}
+	ocfs2_clear_inode(inode);
+}
+
 /* Called under inode_lock, with no more references on the
  * struct inode, so it's safe here to check the flags field
  * and to manipulate i_nlink without any other locks. */
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 9f5f5fc..975eedd 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -123,8 +123,7 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
 	return &OCFS2_I(inode)->ip_metadata_cache;
 }
 
-void ocfs2_clear_inode(struct inode *inode);
-void ocfs2_delete_inode(struct inode *inode);
+void ocfs2_evict_inode(struct inode *inode);
 void ocfs2_drop_inode(struct inode *inode);
 
 /* Flags for ocfs2_iget() */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 0eaa929..ae1a443 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -145,8 +145,7 @@ static const struct super_operations ocfs2_sops = {
 	.alloc_inode	= ocfs2_alloc_inode,
 	.destroy_inode	= ocfs2_destroy_inode,
 	.drop_inode	= ocfs2_drop_inode,
-	.clear_inode	= ocfs2_clear_inode,
-	.delete_inode	= ocfs2_delete_inode,
+	.evict_inode	= ocfs2_evict_inode,
 	.sync_fs	= ocfs2_sync_fs,
 	.put_super	= ocfs2_put_super,
 	.remount_fs	= ocfs2_remount,
-- 
cgit v1.1


From d5c1515cf374951f07e5bf97b6ff3718d3401b6f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 11:05:19 -0400
Subject: switch gfs2 to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/gfs2/super.c | 39 +++++++++++++++------------------------
 1 file changed, 15 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4d1aad3..555f5a4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1203,25 +1203,6 @@ static void gfs2_drop_inode(struct inode *inode)
 	generic_drop_inode(inode);
 }
 
-/**
- * gfs2_clear_inode - Deallocate an inode when VFS is done with it
- * @inode: The VFS inode
- *
- */
-
-static void gfs2_clear_inode(struct inode *inode)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-
-	ip->i_gl->gl_object = NULL;
-	gfs2_glock_put(ip->i_gl);
-	ip->i_gl = NULL;
-	if (ip->i_iopen_gh.gh_gl) {
-		ip->i_iopen_gh.gh_gl->gl_object = NULL;
-		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-	}
-}
-
 static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
 {
 	do {
@@ -1347,13 +1328,16 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
  * is safe, just less efficient.
  */
 
-static void gfs2_delete_inode(struct inode *inode)
+static void gfs2_evict_inode(struct inode *inode)
 {
 	struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	int error;
 
+	if (inode->i_nlink)
+		goto out;
+
 	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
 	if (unlikely(error)) {
 		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
@@ -1407,10 +1391,18 @@ out_unlock:
 	gfs2_holder_uninit(&ip->i_iopen_gh);
 	gfs2_glock_dq_uninit(&gh);
 	if (error && error != GLR_TRYFAILED && error != -EROFS)
-		fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
+		fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
 out:
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
+
+	ip->i_gl->gl_object = NULL;
+	gfs2_glock_put(ip->i_gl);
+	ip->i_gl = NULL;
+	if (ip->i_iopen_gh.gh_gl) {
+		ip->i_iopen_gh.gh_gl->gl_object = NULL;
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+	}
 }
 
 static struct inode *gfs2_alloc_inode(struct super_block *sb)
@@ -1434,14 +1426,13 @@ const struct super_operations gfs2_super_ops = {
 	.alloc_inode		= gfs2_alloc_inode,
 	.destroy_inode		= gfs2_destroy_inode,
 	.write_inode		= gfs2_write_inode,
-	.delete_inode		= gfs2_delete_inode,
+	.evict_inode		= gfs2_evict_inode,
 	.put_super		= gfs2_put_super,
 	.sync_fs		= gfs2_sync_fs,
 	.freeze_fs 		= gfs2_freeze,
 	.unfreeze_fs		= gfs2_unfreeze,
 	.statfs			= gfs2_statfs,
 	.remount_fs		= gfs2_remount_fs,
-	.clear_inode		= gfs2_clear_inode,
 	.drop_inode		= gfs2_drop_inode,
 	.show_options		= gfs2_show_options,
 };
-- 
cgit v1.1


From bd55597520a2eaa0d71dd7683513a14bfd1bdf5c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 11:35:40 -0400
Subject: convert btrfs to ->evict_inode()

NB: do we want btrfs_wait_ordered_range() on eviction of
inodes with positive i_nlink on subvolume with zero root_refs?
If not, btrfs_evict_inode() can be simplified by unconditionally
bailing out in case of i_nlink > 0 in the very beginning...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ctree.h | 2 +-
 fs/btrfs/inode.c | 8 ++++++--
 fs/btrfs/super.c | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 29c2009..394d542 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2389,7 +2389,7 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
 			      pgoff_t offset, pgoff_t last_index);
 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
-void btrfs_delete_inode(struct inode *inode);
+void btrfs_evict_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95eac01..ce02199 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3668,7 +3668,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 	return err;
 }
 
-void btrfs_delete_inode(struct inode *inode)
+void btrfs_evict_inode(struct inode *inode)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3676,10 +3676,14 @@ void btrfs_delete_inode(struct inode *inode)
 	int ret;
 
 	truncate_inode_pages(&inode->i_data, 0);
+	if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+		goto no_delete;
+
 	if (is_bad_inode(inode)) {
 		btrfs_orphan_del(NULL, inode);
 		goto no_delete;
 	}
+	/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
 	if (root->fs_info->log_root_recovering) {
@@ -3729,7 +3733,7 @@ void btrfs_delete_inode(struct inode *inode)
 	btrfs_end_transaction(trans, root);
 	btrfs_btree_balance_dirty(root, nr);
 no_delete:
-	clear_inode(inode);
+	end_writeback(inode);
 	return;
 }
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f2393b3..1776dbd 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -797,7 +797,7 @@ static int btrfs_unfreeze(struct super_block *sb)
 
 static const struct super_operations btrfs_super_ops = {
 	.drop_inode	= btrfs_drop_inode,
-	.delete_inode	= btrfs_delete_inode,
+	.evict_inode	= btrfs_evict_inode,
 	.put_super	= btrfs_put_super,
 	.sync_fs	= btrfs_sync_fs,
 	.show_options	= btrfs_show_options,
-- 
cgit v1.1


From 845a2cc0507055278e0fa722ed0f8c791b7401dd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 11:37:37 -0400
Subject: convert reiserfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/reiserfs/inode.c | 13 ++++++++++---
 fs/reiserfs/super.c |  8 +-------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 46ba1cf..a94e08b 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -25,7 +25,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 int reiserfs_prepare_write(struct file *f, struct page *page,
 			   unsigned from, unsigned to);
 
-void reiserfs_delete_inode(struct inode *inode)
+void reiserfs_evict_inode(struct inode *inode)
 {
 	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
 	int jbegin_count =
@@ -35,10 +35,12 @@ void reiserfs_delete_inode(struct inode *inode)
 	int depth;
 	int err;
 
-	if (!is_bad_inode(inode))
+	if (!inode->i_nlink && !is_bad_inode(inode))
 		dquot_initialize(inode);
 
 	truncate_inode_pages(&inode->i_data, 0);
+	if (inode->i_nlink)
+		goto no_delete;
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
 
@@ -77,9 +79,14 @@ void reiserfs_delete_inode(struct inode *inode)
 		;
 	}
       out:
-	clear_inode(inode);	/* note this must go after the journal_end to prevent deadlock */
+	end_writeback(inode);	/* note this must go after the journal_end to prevent deadlock */
+	dquot_drop(inode);
 	inode->i_blocks = 0;
 	reiserfs_write_unlock_once(inode->i_sb, depth);
+
+no_delete:
+	end_writeback(inode);
+	dquot_drop(inode);
 }
 
 static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1e1ee90..e15ff612 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -591,11 +591,6 @@ out:
 	reiserfs_write_unlock_once(inode->i_sb, lock_depth);
 }
 
-static void reiserfs_clear_inode(struct inode *inode)
-{
-	dquot_drop(inode);
-}
-
 #ifdef CONFIG_QUOTA
 static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
@@ -608,8 +603,7 @@ static const struct super_operations reiserfs_sops = {
 	.destroy_inode = reiserfs_destroy_inode,
 	.write_inode = reiserfs_write_inode,
 	.dirty_inode = reiserfs_dirty_inode,
-	.clear_inode = reiserfs_clear_inode,
-	.delete_inode = reiserfs_delete_inode,
+	.evict_inode = reiserfs_evict_inode,
 	.put_super = reiserfs_put_super,
 	.write_super = reiserfs_write_super,
 	.sync_fs = reiserfs_sync_fs,
-- 
cgit v1.1


From 4ec70c9b46b032e7f1b41b543c607d6a33b78a1a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 11:42:26 -0400
Subject: convert exofs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exofs/exofs.h | 2 +-
 fs/exofs/inode.c | 8 ++++----
 fs/exofs/super.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 0706ce9..2dc925f 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -263,7 +263,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
 extern struct inode *exofs_iget(struct super_block *, unsigned long);
 struct inode *exofs_new_inode(struct inode *, int);
 extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
-extern void exofs_delete_inode(struct inode *);
+extern void exofs_evict_inode(struct inode *);
 
 /* dir.c:                */
 int exofs_add_link(struct dentry *, struct inode *);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ccd0ce3..088cb47 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1307,7 +1307,7 @@ static void delete_done(struct exofs_io_state *ios, void *p)
  * from the OSD here.  We make sure the object was created before we try and
  * delete it.
  */
-void exofs_delete_inode(struct inode *inode)
+void exofs_evict_inode(struct inode *inode)
 {
 	struct exofs_i_info *oi = exofs_i(inode);
 	struct super_block *sb = inode->i_sb;
@@ -1318,11 +1318,11 @@ void exofs_delete_inode(struct inode *inode)
 	truncate_inode_pages(&inode->i_data, 0);
 
 	/* TODO: should do better here */
-	if (is_bad_inode(inode))
+	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
 	inode->i_size = 0;
-	clear_inode(inode);
+	end_writeback(inode);
 
 	/* if we are deleting an obj that hasn't been created yet, wait */
 	if (!obj_created(oi)) {
@@ -1353,5 +1353,5 @@ void exofs_delete_inode(struct inode *inode)
 	return;
 
 no_delete:
-	clear_inode(inode);
+	end_writeback(inode);
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 03149b9a..32cfd61 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -743,7 +743,7 @@ static const struct super_operations exofs_sops = {
 	.alloc_inode    = exofs_alloc_inode,
 	.destroy_inode  = exofs_destroy_inode,
 	.write_inode    = exofs_write_inode,
-	.delete_inode   = exofs_delete_inode,
+	.evict_inode    = exofs_evict_inode,
 	.put_super      = exofs_put_super,
 	.write_super    = exofs_write_super,
 	.sync_fs	= exofs_sync_fs,
-- 
cgit v1.1


From 6fd1e5c994c392ebdbe45600051b2a32ec4860f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 11:55:00 -0400
Subject: convert nilfs2 to ->evict_inode()

[folded build fix from sfr]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nilfs2/inode.c | 28 ++++++++++++++++++++++++----
 fs/nilfs2/nilfs.h |  2 +-
 fs/nilfs2/super.c | 20 +-------------------
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 051d279..eccb2f2 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -27,6 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/uio.h>
 #include "nilfs.h"
+#include "btnode.h"
 #include "segment.h"
 #include "page.h"
 #include "mdt.h"
@@ -354,7 +355,6 @@ void nilfs_free_inode(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
 
-	clear_inode(inode);
 	/* XXX: check error code? Is there any thing I can do? */
 	(void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino);
 	atomic_dec(&sbi->s_inodes_count);
@@ -614,16 +614,34 @@ void nilfs_truncate(struct inode *inode)
 	   But truncate has no return value. */
 }
 
-void nilfs_delete_inode(struct inode *inode)
+static void nilfs_clear_inode(struct inode *inode)
+{
+	struct nilfs_inode_info *ii = NILFS_I(inode);
+
+	/*
+	 * Free resources allocated in nilfs_read_inode(), here.
+	 */
+	BUG_ON(!list_empty(&ii->i_dirty));
+	brelse(ii->i_bh);
+	ii->i_bh = NULL;
+
+	if (test_bit(NILFS_I_BMAP, &ii->i_state))
+		nilfs_bmap_clear(ii->i_bmap);
+
+	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
+}
+
+void nilfs_evict_inode(struct inode *inode)
 {
 	struct nilfs_transaction_info ti;
 	struct super_block *sb = inode->i_sb;
 	struct nilfs_inode_info *ii = NILFS_I(inode);
 
-	if (unlikely(is_bad_inode(inode))) {
+	if (inode->i_nlink || unlikely(is_bad_inode(inode))) {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+		end_writeback(inode);
+		nilfs_clear_inode(inode);
 		return;
 	}
 	nilfs_transaction_begin(sb, &ti, 0); /* never fails */
@@ -633,6 +651,8 @@ void nilfs_delete_inode(struct inode *inode)
 
 	nilfs_truncate_bmap(ii, 0);
 	nilfs_mark_inode_dirty(inode);
+	end_writeback(inode);
+	nilfs_clear_inode(inode);
 	nilfs_free_inode(inode);
 	/* nilfs_free_inode() marks inode buffer dirty */
 	if (IS_SYNC(inode))
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 47d6d79..f032797 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -245,7 +245,7 @@ extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int);
 extern struct inode *nilfs_iget(struct super_block *, unsigned long);
 extern void nilfs_update_inode(struct inode *, struct buffer_head *);
 extern void nilfs_truncate(struct inode *);
-extern void nilfs_delete_inode(struct inode *);
+extern void nilfs_evict_inode(struct inode *);
 extern int nilfs_setattr(struct dentry *, struct iattr *);
 extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
 				  struct buffer_head **);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 414ef68..7c7572a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -159,23 +159,6 @@ void nilfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
 }
 
-static void nilfs_clear_inode(struct inode *inode)
-{
-	struct nilfs_inode_info *ii = NILFS_I(inode);
-
-	/*
-	 * Free resources allocated in nilfs_read_inode(), here.
-	 */
-	BUG_ON(!list_empty(&ii->i_dirty));
-	brelse(ii->i_bh);
-	ii->i_bh = NULL;
-
-	if (test_bit(NILFS_I_BMAP, &ii->i_state))
-		nilfs_bmap_clear(ii->i_bmap);
-
-	nilfs_btnode_cache_clear(&ii->i_btnode_cache);
-}
-
 static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb)
 {
 	struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -467,7 +450,7 @@ static const struct super_operations nilfs_sops = {
 	/* .write_inode    = nilfs_write_inode, */
 	/* .put_inode      = nilfs_put_inode, */
 	/* .drop_inode	  = nilfs_drop_inode, */
-	.delete_inode   = nilfs_delete_inode,
+	.evict_inode    = nilfs_evict_inode,
 	.put_super      = nilfs_put_super,
 	/* .write_super    = nilfs_write_super, */
 	.sync_fs        = nilfs_sync_fs,
@@ -475,7 +458,6 @@ static const struct super_operations nilfs_sops = {
 	/* .unlockfs */
 	.statfs         = nilfs_statfs,
 	.remount_fs     = nilfs_remount,
-	.clear_inode    = nilfs_clear_inode,
 	/* .umount_begin */
 	.show_options = nilfs_show_options
 };
-- 
cgit v1.1


From 8e22c1a4e429e9facf309c7e7a03ba9cdfd7b106 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 12:22:31 -0400
Subject: logfs: get rid of magical inodes

ordering problems at ->kill_sb() time are solved by doing iput()
of these suckers in ->put_super()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/logfs/inode.c     | 44 ++++++++++++++++----------------------------
 fs/logfs/journal.c   |  2 --
 fs/logfs/logfs.h     |  1 -
 fs/logfs/readwrite.c |  1 -
 fs/logfs/segment.c   |  1 -
 fs/logfs/super.c     | 23 +++++++++++++++--------
 6 files changed, 31 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index f602e23..7811a2a 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -235,33 +235,21 @@ static struct inode *logfs_alloc_inode(struct super_block *sb)
  * purpose is to create a new inode that will not trigger the warning if such
  * an inode is still in use.  An ugly hack, no doubt.  Suggections for
  * improvement are welcome.
+ *
+ * AV: that's what ->put_super() is for...
  */
 struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
 {
 	struct inode *inode;
 
-	inode = logfs_alloc_inode(sb);
+	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
 	inode->i_mode = S_IFREG;
 	inode->i_ino = ino;
-	inode->i_sb = sb;
-
-	/* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
-	 * to be nonstatic, alas. */
-	{
-		struct address_space * const mapping = &inode->i_data;
-
-		mapping->a_ops = &logfs_reg_aops;
-		mapping->host = inode;
-		mapping->flags = 0;
-		mapping_set_gfp_mask(mapping, GFP_NOFS);
-		mapping->assoc_mapping = NULL;
-		mapping->backing_dev_info = &default_backing_dev_info;
-		inode->i_mapping = mapping;
-		inode->i_nlink = 1;
-	}
+	inode->i_data.a_ops = &logfs_reg_aops;
+	mapping_set_gfp_mask(&inode->i_data, GFP_NOFS);
 
 	return inode;
 }
@@ -277,7 +265,7 @@ struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
 
 	err = logfs_read_inode(inode);
 	if (err) {
-		destroy_meta_inode(inode);
+		iput(inode);
 		return ERR_PTR(err);
 	}
 	logfs_inode_setops(inode);
@@ -298,16 +286,6 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 
-void destroy_meta_inode(struct inode *inode)
-{
-	if (inode) {
-		if (inode->i_data.nrpages)
-			truncate_inode_pages(&inode->i_data, 0);
-		logfs_clear_inode(inode);
-		kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
-	}
-}
-
 /* called with inode_lock held */
 static void logfs_drop_inode(struct inode *inode)
 {
@@ -384,12 +362,22 @@ static int logfs_sync_fs(struct super_block *sb, int wait)
 	return 0;
 }
 
+static void logfs_put_super(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	/* kill the meta-inodes */
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
+}
+
 const struct super_operations logfs_super_operations = {
 	.alloc_inode	= logfs_alloc_inode,
 	.clear_inode	= logfs_clear_inode,
 	.delete_inode	= logfs_delete_inode,
 	.destroy_inode	= logfs_destroy_inode,
 	.drop_inode	= logfs_drop_inode,
+	.put_super	= logfs_put_super,
 	.write_inode	= logfs_write_inode,
 	.statfs		= logfs_statfs,
 	.sync_fs	= logfs_sync_fs,
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index 4b0e061..f46ee8b 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -889,8 +889,6 @@ void logfs_cleanup_journal(struct super_block *sb)
 	struct logfs_super *super = logfs_super(sb);
 
 	btree_grim_visitor32(&super->s_reserved_segments, 0, NULL);
-	destroy_meta_inode(super->s_master_inode);
-	super->s_master_inode = NULL;
 
 	kfree(super->s_compressed_je);
 	kfree(super->s_je);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index c838c4d..5e65171 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -525,7 +525,6 @@ struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
 struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
 int logfs_init_inode_cache(void);
 void logfs_destroy_inode_cache(void);
-void destroy_meta_inode(struct inode *inode);
 void logfs_set_blocks(struct inode *inode, u64 no);
 /* these logically belong into inode.c but actually reside in readwrite.c */
 int logfs_read_inode(struct inode *inode);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 0718d11..580d126 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -2272,7 +2272,6 @@ void logfs_cleanup_rw(struct super_block *sb)
 {
 	struct logfs_super *super = logfs_super(sb);
 
-	destroy_meta_inode(super->s_segfile_inode);
 	logfs_mempool_destroy(super->s_block_pool);
 	logfs_mempool_destroy(super->s_shadow_pool);
 }
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index a9657af..9d51873 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -929,5 +929,4 @@ void logfs_cleanup_areas(struct super_block *sb)
 	for_each_area(i)
 		free_area(super->s_area[i]);
 	free_area(super->s_journal_area);
-	destroy_meta_inode(super->s_mapping_inode);
 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index d651e10..5336155 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -342,24 +342,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
 		goto fail;
 	}
 
+	/* at that point we know that ->put_super() will be called */
 	super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
 	if (!super->s_erase_page)
-		goto fail;
+		return -ENOMEM;
 	memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
 
 	/* FIXME: check for read-only mounts */
 	err = logfs_make_writeable(sb);
-	if (err)
-		goto fail1;
+	if (err) {
+		__free_page(super->s_erase_page);
+		return err;
+	}
 
 	log_super("LogFS: Finished mounting\n");
 	simple_set_mnt(mnt, sb);
 	return 0;
 
-fail1:
-	__free_page(super->s_erase_page);
 fail:
-	iput(logfs_super(sb)->s_master_inode);
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
 	return -EIO;
 }
 
@@ -580,10 +583,14 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
 	sb->s_flags |= MS_ACTIVE;
 	err = logfs_get_sb_final(sb, mnt);
 	if (err)
-		goto err1;
-	return 0;
+		deactivate_locked_super(sb);
+	return err;
 
 err1:
+	/* no ->s_root, no ->put_super() */
+	iput(super->s_master_inode);
+	iput(super->s_segfile_inode);
+	iput(super->s_mapping_inode);
 	deactivate_locked_super(sb);
 	return err;
 err0:
-- 
cgit v1.1


From 7da08fd17a6e42d80f0f3897a5cbd682e77bcdb4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:11:34 -0400
Subject: convert logfs to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/logfs/inode.c     |  3 +--
 fs/logfs/logfs.h     |  3 +--
 fs/logfs/readwrite.c | 61 +++++++++++++++++++++++++---------------------------
 3 files changed, 31 insertions(+), 36 deletions(-)

(limited to 'fs')

diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 7811a2a..78be674 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -373,9 +373,8 @@ static void logfs_put_super(struct super_block *sb)
 
 const struct super_operations logfs_super_operations = {
 	.alloc_inode	= logfs_alloc_inode,
-	.clear_inode	= logfs_clear_inode,
-	.delete_inode	= logfs_delete_inode,
 	.destroy_inode	= logfs_destroy_inode,
+	.evict_inode	= logfs_evict_inode,
 	.drop_inode	= logfs_drop_inode,
 	.put_super	= logfs_put_super,
 	.write_inode	= logfs_write_inode,
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 5e65171..5e3b720 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -529,8 +529,7 @@ void logfs_set_blocks(struct inode *inode, u64 no);
 /* these logically belong into inode.c but actually reside in readwrite.c */
 int logfs_read_inode(struct inode *inode);
 int __logfs_write_inode(struct inode *inode, long flags);
-void logfs_delete_inode(struct inode *inode);
-void logfs_clear_inode(struct inode *inode);
+void logfs_evict_inode(struct inode *inode);
 
 /* journal.c */
 void logfs_write_anchor(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 580d126..6127baf 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1972,31 +1972,6 @@ static struct page *inode_to_page(struct inode *inode)
 	return page;
 }
 
-/* Cheaper version of write_inode.  All changes are concealed in
- * aliases, which are moved back.  No write to the medium happens.
- */
-void logfs_clear_inode(struct inode *inode)
-{
-	struct super_block *sb = inode->i_sb;
-	struct logfs_inode *li = logfs_inode(inode);
-	struct logfs_block *block = li->li_block;
-	struct page *page;
-
-	/* Only deleted files may be dirty at this point */
-	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
-	if (!block)
-		return;
-	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
-		block->ops->free_block(inode->i_sb, block);
-		return;
-	}
-
-	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
-	page = inode_to_page(inode);
-	BUG_ON(!page); /* FIXME: Use emergency page */
-	logfs_put_write_page(page);
-}
-
 static int do_write_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
@@ -2164,18 +2139,40 @@ static int do_delete_inode(struct inode *inode)
  * ZOMBIE inodes have already been deleted before and should remain dead,
  * if it weren't for valid checking.  No need to kill them again here.
  */
-void logfs_delete_inode(struct inode *inode)
+void logfs_evict_inode(struct inode *inode)
 {
+	struct super_block *sb = inode->i_sb;
 	struct logfs_inode *li = logfs_inode(inode);
+	struct logfs_block *block = li->li_block;
+	struct page *page;
 
-	if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
-		li->li_flags |= LOGFS_IF_ZOMBIE;
-		if (i_size_read(inode) > 0)
-			logfs_truncate(inode, 0);
-		do_delete_inode(inode);
+	if (!inode->i_nlink) {
+		if (!(li->li_flags & LOGFS_IF_ZOMBIE)) {
+			li->li_flags |= LOGFS_IF_ZOMBIE;
+			if (i_size_read(inode) > 0)
+				logfs_truncate(inode, 0);
+			do_delete_inode(inode);
+		}
 	}
 	truncate_inode_pages(&inode->i_data, 0);
-	clear_inode(inode);
+	end_writeback(inode);
+
+	/* Cheaper version of write_inode.  All changes are concealed in
+	 * aliases, which are moved back.  No write to the medium happens.
+	 */
+	/* Only deleted files may be dirty at this point */
+	BUG_ON(inode->i_state & I_DIRTY && inode->i_nlink);
+	if (!block)
+		return;
+	if ((logfs_super(sb)->s_flags & LOGFS_SB_FLAG_SHUTDOWN)) {
+		block->ops->free_block(inode->i_sb, block);
+		return;
+	}
+
+	BUG_ON(inode->i_ino < LOGFS_RESERVED_INOS);
+	page = inode_to_page(inode);
+	BUG_ON(!page); /* FIXME: Use emergency page */
+	logfs_put_write_page(page);
 }
 
 void btree_write_block(struct logfs_block *block)
-- 
cgit v1.1


From 0930fcc1ee2f0a810b938bc283a3a262d7adccbb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:16:22 -0400
Subject: convert ext4 to ->evict_inode()

pretty much brute-force...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ext4.h   |  3 ++-
 fs/ext4/ialloc.c |  2 +-
 fs/ext4/inode.c  | 11 ++++++++---
 fs/ext4/super.c  | 10 +++++-----
 4 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 19a4de5..6a0d52c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1571,7 +1571,8 @@ extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 				struct kstat *stat);
-extern void ext4_delete_inode(struct inode *);
+extern void ext4_evict_inode(struct inode *);
+extern void ext4_clear_inode(struct inode *);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 25c4b31..07e791a 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -222,7 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	is_directory = S_ISDIR(inode->i_mode);
 
 	/* Do this BEFORE marking the inode not in use or returning an error */
-	clear_inode(inode);
+	ext4_clear_inode(inode);
 
 	es = EXT4_SB(sb)->s_es;
 	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1fb3903..c6d365f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext4_delete_inode(struct inode *inode)
+void ext4_evict_inode(struct inode *inode)
 {
 	handle_t *handle;
 	int err;
 
+	if (inode->i_nlink) {
+		truncate_inode_pages(&inode->i_data, 0);
+		goto no_delete;
+	}
+
 	if (!is_bad_inode(inode))
 		dquot_initialize(inode);
 
@@ -245,13 +250,13 @@ void ext4_delete_inode(struct inode *inode)
 	 */
 	if (ext4_mark_inode_dirty(handle, inode))
 		/* If that failed, just do the required in-core inode clear. */
-		clear_inode(inode);
+		ext4_clear_inode(inode);
 	else
 		ext4_free_inode(handle, inode);
 	ext4_journal_stop(handle);
 	return;
 no_delete:
-	clear_inode(inode);	/* We must guarantee clearing of inode... */
+	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 
 typedef struct {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e8983a..f627a6a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -813,8 +813,10 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(ext4_inode_cachep);
 }
 
-static void ext4_clear_inode(struct inode *inode)
+void ext4_clear_inode(struct inode *inode)
 {
+	invalidate_inode_buffers(inode);
+	end_writeback(inode);
 	dquot_drop(inode);
 	ext4_discard_preallocations(inode);
 	if (EXT4_JOURNAL(inode))
@@ -1100,14 +1102,13 @@ static const struct super_operations ext4_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
-	.delete_inode	= ext4_delete_inode,
+	.evict_inode	= ext4_evict_inode,
 	.put_super	= ext4_put_super,
 	.sync_fs	= ext4_sync_fs,
 	.freeze_fs	= ext4_freeze,
 	.unfreeze_fs	= ext4_unfreeze,
 	.statfs		= ext4_statfs,
 	.remount_fs	= ext4_remount,
-	.clear_inode	= ext4_clear_inode,
 	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext4_quota_read,
@@ -1121,12 +1122,11 @@ static const struct super_operations ext4_nojournal_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
-	.delete_inode	= ext4_delete_inode,
+	.evict_inode	= ext4_evict_inode,
 	.write_super	= ext4_write_super,
 	.put_super	= ext4_put_super,
 	.statfs		= ext4_statfs,
 	.remount_fs	= ext4_remount,
-	.clear_inode	= ext4_clear_inode,
 	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext4_quota_read,
-- 
cgit v1.1


From 07958f9f5b9e8422c15368a1733a52ea99009896 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:20:09 -0400
Subject: ->delete_inode() is gone

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 93e7a5e..7a1bea9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -335,8 +335,6 @@ static void evict(struct inode *inode, int delete)
 
 	if (op->evict_inode) {
 		op->evict_inode(inode);
-	} else if (delete && op->delete_inode) {
-		op->delete_inode(inode);
 	} else {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-- 
cgit v1.1


From 644da5960ded137c339bc69bc2aeac54f73aad59 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:21:05 -0400
Subject: fs/inode.c:evict() doesn't care about delete vs. non-delete paths now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 7a1bea9..8320bef 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -329,7 +329,7 @@ void clear_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(clear_inode);
 
-static void evict(struct inode *inode, int delete)
+static void evict(struct inode *inode)
 {
 	const struct super_operations *op = inode->i_sb->s_op;
 
@@ -363,7 +363,7 @@ static void dispose_list(struct list_head *head)
 		inode = list_first_entry(head, struct inode, i_list);
 		list_del(&inode->i_list);
 
-		evict(inode, 0);
+		evict(inode);
 
 		spin_lock(&inode_lock);
 		hlist_del_init(&inode->i_hash);
@@ -1224,7 +1224,7 @@ void generic_delete_inode(struct inode *inode)
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 
-	evict(inode, 1);
+	evict(inode);
 
 	spin_lock(&inode_lock);
 	hlist_del_init(&inode->i_hash);
@@ -1279,7 +1279,7 @@ static void generic_forget_inode(struct inode *inode)
 {
 	if (!generic_detach_inode(inode))
 		return;
-	evict(inode, 0);
+	evict(inode);
 	wake_up_inode(inode);
 	destroy_inode(inode);
 }
-- 
cgit v1.1


From 30140837f256558c943636245ab90897a9455a70 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:23:20 -0400
Subject: fs/inode.c:clear_inode() is gone

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 8320bef..82ca356 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -306,29 +306,6 @@ void end_writeback(struct inode *inode)
 }
 EXPORT_SYMBOL(end_writeback);
 
-/**
- * clear_inode - clear an inode
- * @inode: inode to clear
- *
- * This is called by the filesystem to tell us
- * that the inode is no longer useful. We just
- * terminate it with extreme prejudice.
- */
-void clear_inode(struct inode *inode)
-{
-	might_sleep();
-	invalidate_inode_buffers(inode);
-
-	BUG_ON(inode->i_data.nrpages);
-	BUG_ON(!(inode->i_state & I_FREEING));
-	BUG_ON(inode->i_state & I_CLEAR);
-	inode_sync_wait(inode);
-	if (inode->i_sb->s_op->clear_inode)
-		inode->i_sb->s_op->clear_inode(inode);
-	inode->i_state = I_FREEING | I_CLEAR;
-}
-EXPORT_SYMBOL(clear_inode);
-
 static void evict(struct inode *inode)
 {
 	const struct super_operations *op = inode->i_sb->s_op;
@@ -338,7 +315,10 @@ static void evict(struct inode *inode)
 	} else {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		clear_inode(inode);
+		invalidate_inode_buffers(inode);
+		end_writeback(inode);
+		if (op->clear_inode)
+			op->clear_inode(inode);
 	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
-- 
cgit v1.1


From 45321ac54316eaeeebde0b5f728a1791e500974c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 13:43:19 -0400
Subject: Make ->drop_inode() just return whether inode needs to be dropped

... and let iput_final() do the actual eviction or retention

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ctree.h |   2 +-
 fs/btrfs/inode.c |  11 +++---
 fs/cifs/cifsfs.c |   9 ++---
 fs/gfs2/super.c  |   4 +-
 fs/inode.c       | 113 +++++++++++++++++--------------------------------------
 fs/logfs/inode.c |   4 +-
 fs/ocfs2/inode.c |   8 ++--
 fs/ocfs2/inode.h |   2 +-
 8 files changed, 55 insertions(+), 98 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 394d542..eaf286a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2395,7 +2395,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
 void btrfs_destroy_inode(struct inode *inode);
-void btrfs_drop_inode(struct inode *inode);
+int btrfs_drop_inode(struct inode *inode);
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ce02199..2c54f04 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3943,7 +3943,7 @@ again:
 			if (atomic_read(&inode->i_count) > 1)
 				d_prune_aliases(inode);
 			/*
-			 * btrfs_drop_inode will remove it from
+			 * btrfs_drop_inode will have it removed from
 			 * the inode cache when its usage count
 			 * hits zero.
 			 */
@@ -6337,13 +6337,14 @@ free:
 	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
 }
 
-void btrfs_drop_inode(struct inode *inode)
+int btrfs_drop_inode(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
-	if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
-		generic_delete_inode(inode);
+
+	if (btrfs_root_refs(&root->root_item) == 0)
+		return 1;
 	else
-		generic_drop_inode(inode);
+		return generic_drop_inode(inode);
 }
 
 static void init_once(void *foo)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8a2cf12..20914f5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -480,14 +480,13 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
-void cifs_drop_inode(struct inode *inode)
+static int cifs_drop_inode(struct inode *inode)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
-		return generic_drop_inode(inode);
-
-	return generic_delete_inode(inode);
+	/* no serverino => unconditional eviction */
+	return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) ||
+		generic_drop_inode(inode);
 }
 
 static const struct super_operations cifs_super_ops = {
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 555f5a4..fa865ab 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1191,7 +1191,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
  * node for later deallocation.
  */
 
-static void gfs2_drop_inode(struct inode *inode)
+static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
@@ -1200,7 +1200,7 @@ static void gfs2_drop_inode(struct inode *inode)
 		if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
 			clear_nlink(inode);
 	}
-	generic_drop_inode(inode);
+	return generic_drop_inode(inode);
 }
 
 static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
diff --git a/fs/inode.c b/fs/inode.c
index 82ca356..0e07761 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1183,58 +1183,51 @@ void remove_inode_hash(struct inode *inode)
 }
 EXPORT_SYMBOL(remove_inode_hash);
 
+int generic_delete_inode(struct inode *inode)
+{
+	return 1;
+}
+EXPORT_SYMBOL(generic_delete_inode);
+
 /*
- * Tell the filesystem that this inode is no longer of any interest and should
- * be completely destroyed.
- *
- * We leave the inode in the inode hash table until *after* the filesystem's
- * ->delete_inode completes.  This ensures that an iget (such as nfsd might
- * instigate) will always find up-to-date information either in the hash or on
- * disk.
- *
- * I_FREEING is set so that no-one will take a new reference to the inode while
- * it is being deleted.
+ * Normal UNIX filesystem behaviour: delete the
+ * inode when the usage count drops to zero, and
+ * i_nlink is zero.
  */
-void generic_delete_inode(struct inode *inode)
+int generic_drop_inode(struct inode *inode)
 {
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	WARN_ON(inode->i_state & I_NEW);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-
-	evict(inode);
-
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
-	wake_up_inode(inode);
-	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
-	destroy_inode(inode);
+	return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
 }
-EXPORT_SYMBOL(generic_delete_inode);
+EXPORT_SYMBOL_GPL(generic_drop_inode);
 
-/**
- *	generic_detach_inode - remove inode from inode lists
- *	@inode: inode to remove
- *
- *	Remove inode from inode lists, write it if it's dirty. This is just an
- *	internal VFS helper exported for hugetlbfs. Do not use!
+/*
+ * Called when we're dropping the last reference
+ * to an inode.
  *
- *	Returns 1 if inode should be completely destroyed.
+ * Call the FS "drop_inode()" function, defaulting to
+ * the legacy UNIX filesystem behaviour.  If it tells
+ * us to evict inode, do so.  Otherwise, retain inode
+ * in cache if fs is alive, sync and evict if fs is
+ * shutting down.
  */
-static int generic_detach_inode(struct inode *inode)
+static void iput_final(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
+	const struct super_operations *op = inode->i_sb->s_op;
+	int drop;
+
+	if (op && op->drop_inode)
+		drop = op->drop_inode(inode);
+	else
+		drop = generic_drop_inode(inode);
 
-	if (!hlist_unhashed(&inode->i_hash)) {
+	if (!drop) {
 		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
 			list_move(&inode->i_list, &inode_unused);
 		inodes_stat.nr_unused++;
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
-			return 0;
+			return;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
@@ -1252,53 +1245,15 @@ static int generic_detach_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
-	return 1;
-}
-
-static void generic_forget_inode(struct inode *inode)
-{
-	if (!generic_detach_inode(inode))
-		return;
 	evict(inode);
+	spin_lock(&inode_lock);
+	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_lock);
 	wake_up_inode(inode);
+	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	destroy_inode(inode);
 }
 
-/*
- * Normal UNIX filesystem behaviour: delete the
- * inode when the usage count drops to zero, and
- * i_nlink is zero.
- */
-void generic_drop_inode(struct inode *inode)
-{
-	if (!inode->i_nlink)
-		generic_delete_inode(inode);
-	else
-		generic_forget_inode(inode);
-}
-EXPORT_SYMBOL_GPL(generic_drop_inode);
-
-/*
- * Called when we're dropping the last reference
- * to an inode.
- *
- * Call the FS "drop()" function, defaulting to
- * the legacy UNIX filesystem behaviour..
- *
- * NOTE! NOTE! NOTE! We're called with the inode lock
- * held, and the drop function is supposed to release
- * the lock!
- */
-static inline void iput_final(struct inode *inode)
-{
-	const struct super_operations *op = inode->i_sb->s_op;
-	void (*drop)(struct inode *) = generic_drop_inode;
-
-	if (op && op->drop_inode)
-		drop = op->drop_inode;
-	drop(inode);
-}
-
 /**
  *	iput	- put an inode
  *	@inode: inode to put
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 78be674..d8c71ec 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -287,7 +287,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 }
 
 /* called with inode_lock held */
-static void logfs_drop_inode(struct inode *inode)
+static int logfs_drop_inode(struct inode *inode)
 {
 	struct logfs_super *super = logfs_super(inode->i_sb);
 	struct logfs_inode *li = logfs_inode(inode);
@@ -295,7 +295,7 @@ static void logfs_drop_inode(struct inode *inode)
 	spin_lock(&logfs_inode_lock);
 	list_move(&li->li_freeing_list, &super->s_freeing_list);
 	spin_unlock(&logfs_inode_lock);
-	generic_drop_inode(inode);
+	return generic_drop_inode(inode);
 }
 
 static void logfs_set_ino_generation(struct super_block *sb,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index eb7fd07..0492464 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1194,9 +1194,10 @@ void ocfs2_evict_inode(struct inode *inode)
 /* Called under inode_lock, with no more references on the
  * struct inode, so it's safe here to check the flags field
  * and to manipulate i_nlink without any other locks. */
-void ocfs2_drop_inode(struct inode *inode)
+int ocfs2_drop_inode(struct inode *inode)
 {
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+	int res;
 
 	mlog_entry_void();
 
@@ -1204,11 +1205,12 @@ void ocfs2_drop_inode(struct inode *inode)
 	     (unsigned long long)oi->ip_blkno, inode->i_nlink, oi->ip_flags);
 
 	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
-		generic_delete_inode(inode);
+		res = 1;
 	else
-		generic_drop_inode(inode);
+		res = generic_drop_inode(inode);
 
 	mlog_exit_void();
+	return res;
 }
 
 /*
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 975eedd..6de5a86 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -124,7 +124,7 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
 }
 
 void ocfs2_evict_inode(struct inode *inode);
-void ocfs2_drop_inode(struct inode *inode);
+int ocfs2_drop_inode(struct inode *inode);
 
 /* Flags for ocfs2_iget() */
 #define OCFS2_FI_FLAG_SYSFILE		0x1
-- 
cgit v1.1


From b57922d97fd6f79b6dbe6db0c4fd30d219fa08c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 14:34:48 -0400
Subject: convert remaining ->clear_inode() to ->evict_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/v9fs_vfs.h             |  2 +-
 fs/9p/vfs_inode.c            |  4 +++-
 fs/9p/vfs_super.c            |  4 ++--
 fs/afs/inode.c               |  5 ++++-
 fs/afs/internal.h            |  2 +-
 fs/afs/super.c               |  2 +-
 fs/binfmt_misc.c             |  5 +++--
 fs/block_dev.c               |  7 +++++--
 fs/cifs/cifsfs.c             |  6 ++++--
 fs/coda/inode.c              |  8 +++++---
 fs/ecryptfs/super.c          |  8 +++++---
 fs/freevxfs/vxfs_extern.h    |  2 +-
 fs/freevxfs/vxfs_inode.c     |  8 +++++---
 fs/freevxfs/vxfs_super.c     |  2 +-
 fs/fuse/inode.c              |  6 ++++--
 fs/hfs/hfs_fs.h              |  2 +-
 fs/hfs/inode.c               |  4 +++-
 fs/hfs/super.c               |  2 +-
 fs/hfsplus/super.c           |  8 +++++---
 fs/inode.c                   |  2 --
 fs/jffs2/fs.c                |  6 ++++--
 fs/jffs2/os-linux.h          |  2 +-
 fs/jffs2/super.c             |  2 +-
 fs/jffs2/xattr.c             |  2 +-
 fs/nfs/inode.c               | 13 +++++++++++--
 fs/nfs/internal.h            |  4 ++--
 fs/nfs/super.c               |  4 ++--
 fs/ntfs/inode.c              |  7 +++++--
 fs/ntfs/inode.h              |  2 +-
 fs/ntfs/super.c              |  2 +-
 fs/ocfs2/dlmfs/dlmfs.c       |  7 +++----
 fs/xfs/linux-2.6/xfs_super.c |  8 +++++---
 fs/xfs/linux-2.6/xfs_trace.h |  2 +-
 33 files changed, 93 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 32ef400..3d056fe 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -52,7 +52,7 @@ void v9fs_destroy_inode(struct inode *inode);
 #endif
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
-void v9fs_clear_inode(struct inode *inode);
+void v9fs_evict_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 4b3ad6a..b81ce20 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -387,8 +387,10 @@ error:
  * @inode: inode to release
  *
  */
-void v9fs_clear_inode(struct inode *inode)
+void v9fs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(inode->i_mapping, 0);
+	end_writeback(inode);
 	filemap_fdatawrite(inode->i_mapping);
 
 #ifdef CONFIG_9P_FSCACHE
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index be74d02..c6122bf 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -257,7 +257,7 @@ static const struct super_operations v9fs_super_ops = {
 	.destroy_inode = v9fs_destroy_inode,
 #endif
 	.statfs = simple_statfs,
-	.clear_inode = v9fs_clear_inode,
+	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
 };
@@ -268,7 +268,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 	.destroy_inode = v9fs_destroy_inode,
 #endif
 	.statfs = v9fs_statfs,
-	.clear_inode = v9fs_clear_inode,
+	.evict_inode = v9fs_evict_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
 };
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index d00b312..320ffef 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -316,7 +316,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 /*
  * clear an AFS inode
  */
-void afs_clear_inode(struct inode *inode)
+void afs_evict_inode(struct inode *inode)
 {
 	struct afs_permits *permits;
 	struct afs_vnode *vnode;
@@ -335,6 +335,9 @@ void afs_clear_inode(struct inode *inode)
 
 	ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode);
 
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+
 	afs_give_up_callback(vnode);
 
 	if (vnode->server) {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5f679b77..8679089 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -565,7 +565,7 @@ extern void afs_zap_data(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
 extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int afs_setattr(struct dentry *, struct iattr *);
-extern void afs_clear_inode(struct inode *);
+extern void afs_evict_inode(struct inode *);
 
 /*
  * main.c
diff --git a/fs/afs/super.c b/fs/afs/super.c
index e932e5a..9cf80f0 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -49,7 +49,7 @@ static const struct super_operations afs_super_ops = {
 	.statfs		= afs_statfs,
 	.alloc_inode	= afs_alloc_inode,
 	.destroy_inode	= afs_destroy_inode,
-	.clear_inode	= afs_clear_inode,
+	.evict_inode	= afs_evict_inode,
 	.put_super	= afs_put_super,
 	.show_options	= generic_show_options,
 };
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c4e8353..9e60fd2 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -502,8 +502,9 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 	return inode;
 }
 
-static void bm_clear_inode(struct inode *inode)
+static void bm_evict_inode(struct inode *inode)
 {
+	end_writeback(inode);
 	kfree(inode->i_private);
 }
 
@@ -685,7 +686,7 @@ static const struct file_operations bm_status_operations = {
 
 static const struct super_operations s_ops = {
 	.statfs		= simple_statfs,
-	.clear_inode	= bm_clear_inode,
+	.evict_inode	= bm_evict_inode,
 };
 
 static int bm_fill_super(struct super_block * sb, void * data, int silent)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 63c9d60..de7b4d0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -426,10 +426,13 @@ static inline void __bd_forget(struct inode *inode)
 	inode->i_mapping = &inode->i_data;
 }
 
-static void bdev_clear_inode(struct inode *inode)
+static void bdev_evict_inode(struct inode *inode)
 {
 	struct block_device *bdev = &BDEV_I(inode)->bdev;
 	struct list_head *p;
+	truncate_inode_pages(&inode->i_data, 0);
+	invalidate_inode_buffers(inode); /* is it needed here? */
+	end_writeback(inode);
 	spin_lock(&bdev_lock);
 	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
 		__bd_forget(list_entry(p, struct inode, i_devices));
@@ -443,7 +446,7 @@ static const struct super_operations bdev_sops = {
 	.alloc_inode = bdev_alloc_inode,
 	.destroy_inode = bdev_destroy_inode,
 	.drop_inode = generic_delete_inode,
-	.clear_inode = bdev_clear_inode,
+	.evict_inode = bdev_evict_inode,
 };
 
 static int bd_get_sb(struct file_system_type *fs_type,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 20914f5..5574a42 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -330,8 +330,10 @@ cifs_destroy_inode(struct inode *inode)
 }
 
 static void
-cifs_clear_inode(struct inode *inode)
+cifs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	cifs_fscache_release_inode_cookie(inode);
 }
 
@@ -495,7 +497,7 @@ static const struct super_operations cifs_super_ops = {
 	.alloc_inode = cifs_alloc_inode,
 	.destroy_inode = cifs_destroy_inode,
 	.drop_inode	= cifs_drop_inode,
-	.clear_inode	= cifs_clear_inode,
+	.evict_inode	= cifs_evict_inode,
 /*	.delete_inode	= cifs_delete_inode,  */  /* Do not need above
 	function unless later we add lazy close of inodes or unless the
 	kernel forgets to call us with the same number of releases (closes)
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d97f993..6526e6f 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -35,7 +35,7 @@
 #include "coda_int.h"
 
 /* VFS super_block ops */
-static void coda_clear_inode(struct inode *);
+static void coda_evict_inode(struct inode *);
 static void coda_put_super(struct super_block *);
 static int coda_statfs(struct dentry *dentry, struct kstatfs *buf);
 
@@ -93,7 +93,7 @@ static const struct super_operations coda_super_operations =
 {
 	.alloc_inode	= coda_alloc_inode,
 	.destroy_inode	= coda_destroy_inode,
-	.clear_inode	= coda_clear_inode,
+	.evict_inode	= coda_evict_inode,
 	.put_super	= coda_put_super,
 	.statfs		= coda_statfs,
 	.remount_fs	= coda_remount,
@@ -224,8 +224,10 @@ static void coda_put_super(struct super_block *sb)
 	printk("Coda: Bye bye.\n");
 }
 
-static void coda_clear_inode(struct inode *inode)
+static void coda_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	coda_cache_clear_inode(inode);
 }
 
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 0435886..4b5de6c 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -122,7 +122,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 /**
- * ecryptfs_clear_inode
+ * ecryptfs_evict_inode
  * @inode - The ecryptfs inode
  *
  * Called by iput() when the inode reference count reached zero
@@ -131,8 +131,10 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  * on the inode free list. We use this to drop out reference to the
  * lower inode.
  */
-static void ecryptfs_clear_inode(struct inode *inode)
+static void ecryptfs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	iput(ecryptfs_inode_to_lower(inode));
 }
 
@@ -184,6 +186,6 @@ const struct super_operations ecryptfs_sops = {
 	.drop_inode = generic_delete_inode,
 	.statfs = ecryptfs_statfs,
 	.remount_fs = NULL,
-	.clear_inode = ecryptfs_clear_inode,
+	.evict_inode = ecryptfs_evict_inode,
 	.show_options = ecryptfs_show_options
 };
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 50ab5ee..881aa3d 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -63,7 +63,7 @@ extern void			vxfs_put_fake_inode(struct inode *);
 extern struct vxfs_inode_info *	vxfs_blkiget(struct super_block *, u_long, ino_t);
 extern struct vxfs_inode_info *	vxfs_stiget(struct super_block *, ino_t);
 extern struct inode *		vxfs_iget(struct super_block *, ino_t);
-extern void			vxfs_clear_inode(struct inode *);
+extern void			vxfs_evict_inode(struct inode *);
 
 /* vxfs_lookup.c */
 extern const struct inode_operations	vxfs_dir_inode_ops;
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 03a6ea5..79d1b4e 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,15 +337,17 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
 }
 
 /**
- * vxfs_clear_inode - remove inode from main memory
+ * vxfs_evict_inode - remove inode from main memory
  * @ip:		inode to discard.
  *
  * Description:
- *  vxfs_clear_inode() is called on the final iput and frees the private
+ *  vxfs_evict_inode() is called on the final iput and frees the private
  *  inode area.
  */
 void
-vxfs_clear_inode(struct inode *ip)
+vxfs_evict_inode(struct inode *ip)
 {
+	truncate_inode_pages(&ip->i_data, 0);
+	end_writeback(ip);
 	kmem_cache_free(vxfs_inode_cachep, ip->i_private);
 }
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1e8af93..1f3ffd9 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -61,7 +61,7 @@ static int		vxfs_statfs(struct dentry *, struct kstatfs *);
 static int		vxfs_remount(struct super_block *, int *, char *);
 
 static const struct super_operations vxfs_super_ops = {
-	.clear_inode =		vxfs_clear_inode,
+	.evict_inode =		vxfs_evict_inode,
 	.put_super =		vxfs_put_super,
 	.statfs =		vxfs_statfs,
 	.remount_fs =		vxfs_remount,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index ec14d19..da9e6e1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -122,8 +122,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
 	fuse_request_send_noreply(fc, req);
 }
 
-static void fuse_clear_inode(struct inode *inode)
+static void fuse_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (inode->i_sb->s_flags & MS_ACTIVE) {
 		struct fuse_conn *fc = get_fuse_conn(inode);
 		struct fuse_inode *fi = get_fuse_inode(inode);
@@ -736,7 +738,7 @@ static const struct export_operations fuse_export_operations = {
 static const struct super_operations fuse_super_operations = {
 	.alloc_inode    = fuse_alloc_inode,
 	.destroy_inode  = fuse_destroy_inode,
-	.clear_inode	= fuse_clear_inode,
+	.evict_inode	= fuse_evict_inode,
 	.drop_inode	= generic_delete_inode,
 	.remount_fs	= fuse_remount_fs,
 	.put_super	= fuse_put_super,
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index fe35e3b..4f55651 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -193,7 +193,7 @@ extern int hfs_inode_setattr(struct dentry *, struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
 			__be32 log_size, __be32 phys_size, u32 clump_size);
 extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
-extern void hfs_clear_inode(struct inode *);
+extern void hfs_evict_inode(struct inode *);
 extern void hfs_delete_inode(struct inode *);
 
 /* attr.c */
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 93ceec8..397b7ad 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -531,8 +531,10 @@ out:
 	return NULL;
 }
 
-void hfs_clear_inode(struct inode *inode)
+void hfs_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) {
 		HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL;
 		iput(HFS_I(inode)->rsrc_inode);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 0a81eb7..34235d4 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -181,7 +181,7 @@ static const struct super_operations hfs_super_operations = {
 	.alloc_inode	= hfs_alloc_inode,
 	.destroy_inode	= hfs_destroy_inode,
 	.write_inode	= hfs_write_inode,
-	.clear_inode	= hfs_clear_inode,
+	.evict_inode	= hfs_evict_inode,
 	.put_super	= hfs_put_super,
 	.write_super	= hfs_write_super,
 	.sync_fs	= hfs_sync_fs,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a32c241..3b55c05 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -145,9 +145,11 @@ static int hfsplus_write_inode(struct inode *inode,
 	return ret;
 }
 
-static void hfsplus_clear_inode(struct inode *inode)
+static void hfsplus_evict_inode(struct inode *inode)
 {
-	dprint(DBG_INODE, "hfsplus_clear_inode: %lu\n", inode->i_ino);
+	dprint(DBG_INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino);
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	if (HFSPLUS_IS_RSRC(inode)) {
 		HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL;
 		iput(HFSPLUS_I(inode).rsrc_inode);
@@ -293,7 +295,7 @@ static const struct super_operations hfsplus_sops = {
 	.alloc_inode	= hfsplus_alloc_inode,
 	.destroy_inode	= hfsplus_destroy_inode,
 	.write_inode	= hfsplus_write_inode,
-	.clear_inode	= hfsplus_clear_inode,
+	.evict_inode	= hfsplus_evict_inode,
 	.put_super	= hfsplus_put_super,
 	.write_super	= hfsplus_write_super,
 	.sync_fs	= hfsplus_sync_fs,
diff --git a/fs/inode.c b/fs/inode.c
index 0e07761..5daeb0b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -317,8 +317,6 @@ static void evict(struct inode *inode)
 			truncate_inode_pages(&inode->i_data, 0);
 		invalidate_inode_buffers(inode);
 		end_writeback(inode);
-		if (op->clear_inode)
-			op->clear_inode(inode);
 	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
 		bd_forget(inode);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 1b24266..ac0638f 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -225,7 +225,7 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 }
 
 
-void jffs2_clear_inode (struct inode *inode)
+void jffs2_evict_inode (struct inode *inode)
 {
 	/* We can forget about this inode for now - drop all
 	 *  the nodelists associated with it, etc.
@@ -233,7 +233,9 @@ void jffs2_clear_inode (struct inode *inode)
 	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
 
-	D1(printk(KERN_DEBUG "jffs2_clear_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
+	D1(printk(KERN_DEBUG "jffs2_evict_inode(): ino #%lu mode %o\n", inode->i_ino, inode->i_mode));
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	jffs2_do_clear_inode(c, f);
 }
 
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 4791aac..00bae7c 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -171,7 +171,7 @@ extern const struct inode_operations jffs2_symlink_inode_operations;
 int jffs2_setattr (struct dentry *, struct iattr *);
 int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
-void jffs2_clear_inode (struct inode *);
+void jffs2_evict_inode (struct inode *);
 void jffs2_dirty_inode(struct inode *inode);
 struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
 			       struct jffs2_raw_inode *ri);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 511e2d6..662bba0 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -135,7 +135,7 @@ static const struct super_operations jffs2_super_operations =
 	.write_super =	jffs2_write_super,
 	.statfs =	jffs2_statfs,
 	.remount_fs =	jffs2_remount_fs,
-	.clear_inode =	jffs2_clear_inode,
+	.evict_inode =	jffs2_evict_inode,
 	.dirty_inode =	jffs2_dirty_inode,
 	.sync_fs =	jffs2_sync_fs,
 };
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index d258e26..9b572ca 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -588,7 +588,7 @@ static void delete_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_ref *re
 
 void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
 {
-	/* It's called from jffs2_clear_inode() on inode removing.
+	/* It's called from jffs2_evict_inode() on inode removing.
 	   When an inode with XATTR is removed, those XATTRs must be removed. */
 	struct jffs2_xattr_ref *ref, *_ref;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 099b351..c211b81 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -98,7 +98,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
 	return ino;
 }
 
-void nfs_clear_inode(struct inode *inode)
+static void nfs_clear_inode(struct inode *inode)
 {
 	/*
 	 * The following should never happen...
@@ -110,6 +110,13 @@ void nfs_clear_inode(struct inode *inode)
 	nfs_fscache_release_inode_cookie(inode);
 }
 
+void nfs_evict_inode(struct inode *inode)
+{
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
+	nfs_clear_inode(inode);
+}
+
 /**
  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
  */
@@ -1338,8 +1345,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  * to open() calls that passed nfs_atomic_lookup, but failed to call
  * nfs_open().
  */
-void nfs4_clear_inode(struct inode *inode)
+void nfs4_evict_inode(struct inode *inode)
 {
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e70f44b..f168ebd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -213,9 +213,9 @@ extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *, struct writeback_control *);
-extern void nfs_clear_inode(struct inode *);
+extern void nfs_evict_inode(struct inode *);
 #ifdef CONFIG_NFS_V4
-extern void nfs4_clear_inode(struct inode *);
+extern void nfs4_evict_inode(struct inode *);
 #endif
 void nfs_zap_acl_cache(struct inode *inode);
 extern int nfs_wait_bit_killable(void *word);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f9df16d..ef2b7e4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -270,7 +270,7 @@ static const struct super_operations nfs_sops = {
 	.write_inode	= nfs_write_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
-	.clear_inode	= nfs_clear_inode,
+	.evict_inode	= nfs_evict_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
 	.show_stats	= nfs_show_stats,
@@ -340,7 +340,7 @@ static const struct super_operations nfs4_sops = {
 	.write_inode	= nfs_write_inode,
 	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
-	.clear_inode	= nfs4_clear_inode,
+	.evict_inode	= nfs4_evict_inode,
 	.umount_begin	= nfs_umount_begin,
 	.show_options	= nfs_show_options,
 	.show_stats	= nfs_show_stats,
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index fdef8f7..93622b1 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2238,7 +2238,7 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
 }
 
 /**
- * ntfs_clear_big_inode - clean up the ntfs specific part of an inode
+ * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
  * @vi:		vfs inode pending annihilation
  *
  * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
@@ -2247,10 +2247,13 @@ void ntfs_clear_extent_inode(ntfs_inode *ni)
  *
  * If the MFT record is dirty, we commit it before doing anything else.
  */
-void ntfs_clear_big_inode(struct inode *vi)
+void ntfs_evict_big_inode(struct inode *vi)
 {
 	ntfs_inode *ni = NTFS_I(vi);
 
+	truncate_inode_pages(&vi->i_data, 0);
+	end_writeback(vi);
+
 #ifdef NTFS_RW
 	if (NInoDirty(ni)) {
 		bool was_bad = (is_bad_inode(vi));
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 9a11354..2dabf81 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -279,7 +279,7 @@ extern struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
 
 extern struct inode *ntfs_alloc_big_inode(struct super_block *sb);
 extern void ntfs_destroy_big_inode(struct inode *inode);
-extern void ntfs_clear_big_inode(struct inode *vi);
+extern void ntfs_evict_big_inode(struct inode *vi);
 
 extern void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni);
 
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 0de1db6..5128061 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2700,7 +2700,7 @@ static const struct super_operations ntfs_sops = {
 	.put_super	= ntfs_put_super,	/* Syscall: umount. */
 	.statfs		= ntfs_statfs,		/* Syscall: statfs */
 	.remount_fs	= ntfs_remount,		/* Syscall: mount -o remount. */
-	.clear_inode	= ntfs_clear_big_inode,	/* VFS: Called when an inode is
+	.evict_inode	= ntfs_evict_big_inode,	/* VFS: Called when an inode is
 						   removed from memory. */
 	//.umount_begin	= NULL,			/* Forced umount. */
 	.show_options	= ntfs_show_options,	/* Show mount options in
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 85e4cca..a43ebb1 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -357,13 +357,12 @@ static void dlmfs_destroy_inode(struct inode *inode)
 	kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
 }
 
-static void dlmfs_clear_inode(struct inode *inode)
+static void dlmfs_evict_inode(struct inode *inode)
 {
 	int status;
 	struct dlmfs_inode_private *ip;
 
-	if (!inode)
-		return;
+	end_writeback(inode);
 
 	mlog(0, "inode %lu\n", inode->i_ino);
 
@@ -633,7 +632,7 @@ static const struct super_operations dlmfs_ops = {
 	.statfs		= simple_statfs,
 	.alloc_inode	= dlmfs_alloc_inode,
 	.destroy_inode	= dlmfs_destroy_inode,
-	.clear_inode	= dlmfs_clear_inode,
+	.evict_inode	= dlmfs_evict_inode,
 	.drop_inode	= generic_delete_inode,
 };
 
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 758df946..15c35b6 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1100,13 +1100,15 @@ xfs_fs_write_inode(
 }
 
 STATIC void
-xfs_fs_clear_inode(
+xfs_fs_evict_inode(
 	struct inode		*inode)
 {
 	xfs_inode_t		*ip = XFS_I(inode);
 
-	trace_xfs_clear_inode(ip);
+	trace_xfs_evict_inode(ip);
 
+	truncate_inode_pages(&inode->i_data, 0);
+	end_writeback(inode);
 	XFS_STATS_INC(vn_rele);
 	XFS_STATS_INC(vn_remove);
 	XFS_STATS_DEC(vn_active);
@@ -1622,7 +1624,7 @@ static const struct super_operations xfs_super_operations = {
 	.destroy_inode		= xfs_fs_destroy_inode,
 	.dirty_inode		= xfs_fs_dirty_inode,
 	.write_inode		= xfs_fs_write_inode,
-	.clear_inode		= xfs_fs_clear_inode,
+	.evict_inode		= xfs_fs_evict_inode,
 	.put_super		= xfs_fs_put_super,
 	.sync_fs		= xfs_fs_sync_fs,
 	.freeze_fs		= xfs_fs_freeze,
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c657cdc..be5dffd 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -581,7 +581,7 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
 DEFINE_INODE_EVENT(xfs_file_fsync);
 DEFINE_INODE_EVENT(xfs_destroy_inode);
 DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_clear_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
 
 DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
 DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-- 
cgit v1.1


From b70a3e0702dee2ed9435e06a8bde7d9fa2228895 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 7 Jun 2010 14:35:46 -0400
Subject: All filesystems that need invalidate_inode_buffers() are doing that
 explicitly

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/inode.c b/fs/inode.c
index 5daeb0b..2575244 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -315,7 +315,6 @@ static void evict(struct inode *inode)
 	} else {
 		if (inode->i_data.nrpages)
 			truncate_inode_pages(&inode->i_data, 0);
-		invalidate_inode_buffers(inode);
 		end_writeback(inode);
 	}
 	if (S_ISBLK(inode->i_mode) && inode->i_bdev)
-- 
cgit v1.1


From ebabe9a9001af0af56c0c2780ca1576246e7a74b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 Jul 2010 18:53:11 +0200
Subject: pass a struct path to vfs_statfs

We'll need the path to implement the flags field for statvfs support.
We do have it available in all callers except:

 - ecryptfs_statfs.  This one doesn't actually need vfs_statfs but just
   needs to do a caller to the lower filesystem statfs method.
 - sys_ustat.  Add a non-exported statfs_by_dentry helper for it which
   doesn't won't be able to fill out the flags field later on.

In addition rename the helpers for statfs vs fstatfs to do_*statfs instead
of the misleading vfs prefix.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cachefiles/bind.c   |  2 +-
 fs/cachefiles/daemon.c |  6 +++++-
 fs/compat.c            | 10 +++++-----
 fs/ecryptfs/super.c    |  6 +++++-
 fs/nfsd/nfs4xdr.c      |  6 +++++-
 fs/nfsd/vfs.c          | 10 ++++++++--
 fs/statfs.c            | 50 ++++++++++++++++++++++++++------------------------
 7 files changed, 55 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 2906077..a2603e7 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -146,7 +146,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 		goto error_unsupported;
 
 	/* get the cache size and blocksize */
-	ret = vfs_statfs(root, &stats);
+	ret = vfs_statfs(&path, &stats);
 	if (ret < 0)
 		goto error_unsupported;
 
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index c241356..24eb0d3 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -683,6 +683,10 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
 			 unsigned fnr, unsigned bnr)
 {
 	struct kstatfs stats;
+	struct path path = {
+		.mnt	= cache->mnt,
+		.dentry	= cache->mnt->mnt_root,
+	};
 	int ret;
 
 	//_enter("{%llu,%llu,%llu,%llu,%llu,%llu},%u,%u",
@@ -697,7 +701,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
 	/* find out how many pages of blockdev are available */
 	memset(&stats, 0, sizeof(stats));
 
-	ret = vfs_statfs(cache->mnt->mnt_root, &stats);
+	ret = vfs_statfs(&path, &stats);
 	if (ret < 0) {
 		if (ret == -EIO)
 			cachefiles_io_error(cache, "statfs failed");
diff --git a/fs/compat.c b/fs/compat.c
index 6490d21..fc6c2ad 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -266,7 +266,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(path.dentry, &tmp);
+		error = vfs_statfs(&path, &tmp);
 		if (!error)
 			error = put_compat_statfs(buf, &tmp);
 		path_put(&path);
@@ -284,7 +284,7 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_path.dentry, &tmp);
+	error = vfs_statfs(&file->f_path, &tmp);
 	if (!error)
 		error = put_compat_statfs(buf, &tmp);
 	fput(file);
@@ -334,7 +334,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct kstatfs tmp;
-		error = vfs_statfs(path.dentry, &tmp);
+		error = vfs_statfs(&path, &tmp);
 		if (!error)
 			error = put_compat_statfs64(buf, &tmp);
 		path_put(&path);
@@ -355,7 +355,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs(file->f_path.dentry, &tmp);
+	error = vfs_statfs(&file->f_path, &tmp);
 	if (!error)
 		error = put_compat_statfs64(buf, &tmp);
 	fput(file);
@@ -378,7 +378,7 @@ asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
 	sb = user_get_super(new_decode_dev(dev));
 	if (!sb)
 		return -EINVAL;
-	err = vfs_statfs(sb->s_root, &sbuf);
+	err = statfs_by_dentry(sb->s_root, &sbuf);
 	drop_super(sb);
 	if (err)
 		return err;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 4b5de6c..f7fc286 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -118,7 +118,11 @@ void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode)
  */
 static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	return vfs_statfs(ecryptfs_dentry_to_lower(dentry), buf);
+	struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+
+	if (!lower_dentry->d_sb->s_op->statfs)
+		return -ENOSYS;
+	return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
 }
 
 /**
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ac17a70..4d6154f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1756,6 +1756,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	struct nfs4_acl *acl = NULL;
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	u32 minorversion = resp->cstate.minorversion;
+	struct path path = {
+		.mnt	= exp->ex_path.mnt,
+		.dentry	= dentry,
+	};
 
 	BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
 	BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
@@ -1776,7 +1780,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 			FATTR4_WORD0_MAXNAME)) ||
 	    (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
 		       FATTR4_WORD1_SPACE_TOTAL))) {
-		err = vfs_statfs(dentry, &statfs);
+		err = vfs_statfs(&path, &statfs);
 		if (err)
 			goto out_nfserr;
 	}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 3c11112..f6f1a71 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -2019,8 +2019,14 @@ out:
 __be32
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
 {
-	__be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
-	if (!err && vfs_statfs(fhp->fh_dentry,stat))
+	struct path path = {
+		.mnt	= fhp->fh_export->ex_path.mnt,
+		.dentry	= fhp->fh_dentry,
+	};
+	__be32 err;
+
+	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
+	if (!err && vfs_statfs(&path, stat))
 		err = nfserr_io;
 	return err;
 }
diff --git a/fs/statfs.c b/fs/statfs.c
index 4ef021f..6a30570 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -7,33 +7,35 @@
 #include <linux/security.h>
 #include <linux/uaccess.h>
 
-int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 {
-	int retval = -ENODEV;
-
-	if (dentry) {
-		retval = -ENOSYS;
-		if (dentry->d_sb->s_op->statfs) {
-			memset(buf, 0, sizeof(*buf));
-			retval = security_sb_statfs(dentry);
-			if (retval)
-				return retval;
-			retval = dentry->d_sb->s_op->statfs(dentry, buf);
-			if (retval == 0 && buf->f_frsize == 0)
-				buf->f_frsize = buf->f_bsize;
-		}
-	}
+	int retval;
+
+	if (!dentry->d_sb->s_op->statfs)
+		return -ENOSYS;
+
+	memset(buf, 0, sizeof(*buf));
+	retval = security_sb_statfs(dentry);
+	if (retval)
+		return retval;
+	retval = dentry->d_sb->s_op->statfs(dentry, buf);
+	if (retval == 0 && buf->f_frsize == 0)
+		buf->f_frsize = buf->f_bsize;
 	return retval;
 }
 
+int vfs_statfs(struct path *path, struct kstatfs *buf)
+{
+	return statfs_by_dentry(path->dentry, buf);
+}
 EXPORT_SYMBOL(vfs_statfs);
 
-static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
+static int do_statfs_native(struct path *path, struct statfs *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(dentry, &st);
+	retval = vfs_statfs(path, &st);
 	if (retval)
 		return retval;
 
@@ -72,12 +74,12 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
 	return 0;
 }
 
-static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
+static int do_statfs64(struct path *path, struct statfs64 *buf)
 {
 	struct kstatfs st;
 	int retval;
 
-	retval = vfs_statfs(dentry, &st);
+	retval = vfs_statfs(path, &st);
 	if (retval)
 		return retval;
 
@@ -107,7 +109,7 @@ SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, b
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs tmp;
-		error = vfs_statfs_native(path.dentry, &tmp);
+		error = do_statfs_native(&path, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_put(&path);
@@ -125,7 +127,7 @@ SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct stat
 	error = user_path(pathname, &path);
 	if (!error) {
 		struct statfs64 tmp;
-		error = vfs_statfs64(path.dentry, &tmp);
+		error = do_statfs64(&path, &tmp);
 		if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 			error = -EFAULT;
 		path_put(&path);
@@ -143,7 +145,7 @@ SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs_native(file->f_path.dentry, &tmp);
+	error = do_statfs_native(&file->f_path, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -164,7 +166,7 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
 	file = fget(fd);
 	if (!file)
 		goto out;
-	error = vfs_statfs64(file->f_path.dentry, &tmp);
+	error = do_statfs64(&file->f_path, &tmp);
 	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
 		error = -EFAULT;
 	fput(file);
@@ -183,7 +185,7 @@ SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
 	if (!s)
 		return -EINVAL;
 
-	err = vfs_statfs(s->s_root, &sbuf);
+	err = statfs_by_dentry(s->s_root, &sbuf);
 	drop_super(s);
 	if (err)
 		return err;
-- 
cgit v1.1


From 365b18189789bfa1acd9939e6312b8a4b4577b28 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 Jul 2010 18:53:25 +0200
Subject: add f_flags to struct statfs(64)

Add a flags field to help glibc implementing statvfs(3) efficiently.

We copy the flag values from glibc, and add a new ST_VALID flag to
denote that f_flags is implemented.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/statfs.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/statfs.c b/fs/statfs.c
index 6a30570..30ea8c8 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -2,11 +2,49 @@
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/file.h>
+#include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/uaccess.h>
 
+static int flags_by_mnt(int mnt_flags)
+{
+	int flags = 0;
+
+	if (mnt_flags & MNT_READONLY)
+		flags |= ST_RDONLY;
+	if (mnt_flags & MNT_NOSUID)
+		flags |= ST_NOSUID;
+	if (mnt_flags & MNT_NODEV)
+		flags |= ST_NODEV;
+	if (mnt_flags & MNT_NOEXEC)
+		flags |= ST_NOEXEC;
+	if (mnt_flags & MNT_NOATIME)
+		flags |= ST_NOATIME;
+	if (mnt_flags & MNT_NODIRATIME)
+		flags |= ST_NODIRATIME;
+	if (mnt_flags & MNT_RELATIME)
+		flags |= ST_RELATIME;
+	return flags;
+}
+
+static int flags_by_sb(int s_flags)
+{
+	int flags = 0;
+	if (s_flags & MS_SYNCHRONOUS)
+		flags |= ST_SYNCHRONOUS;
+	if (s_flags & MS_MANDLOCK)
+		flags |= ST_MANDLOCK;
+	return flags;
+}
+
+static int calculate_f_flags(struct vfsmount *mnt)
+{
+	return ST_VALID | flags_by_mnt(mnt->mnt_flags) |
+		flags_by_sb(mnt->mnt_sb->s_flags);
+}
+
 int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 {
 	int retval;
@@ -26,7 +64,12 @@ int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 
 int vfs_statfs(struct path *path, struct kstatfs *buf)
 {
-	return statfs_by_dentry(path->dentry, buf);
+	int error;
+
+	error = statfs_by_dentry(path->dentry, buf);
+	if (!error)
+		buf->f_flags = calculate_f_flags(path->mnt);
+	return error;
 }
 EXPORT_SYMBOL(vfs_statfs);
 
@@ -69,6 +112,7 @@ static int do_statfs_native(struct path *path, struct statfs *buf)
 		buf->f_fsid = st.f_fsid;
 		buf->f_namelen = st.f_namelen;
 		buf->f_frsize = st.f_frsize;
+		buf->f_flags = st.f_flags;
 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
 	}
 	return 0;
@@ -96,6 +140,7 @@ static int do_statfs64(struct path *path, struct statfs64 *buf)
 		buf->f_fsid = st.f_fsid;
 		buf->f_namelen = st.f_namelen;
 		buf->f_frsize = st.f_frsize;
+		buf->f_flags = st.f_flags;
 		memset(buf->f_spare, 0, sizeof(buf->f_spare));
 	}
 	return 0;
-- 
cgit v1.1


From 2aec7c523291621ebb68ba8e0bd9b52a26bb76ee Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Mon, 19 Jul 2010 18:19:41 +0200
Subject: mbcache: Remove unused features

The mbcache code was written to support a variable number of indexes,
but all the existing users use exactly one index.  Simplify to code to
support only that case.

There are also no users of the cache entry free operation, and none of
the users keep extra data in cache entries.  Remove those features as
well.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext2/xattr.c |  12 ++---
 fs/ext3/xattr.c |  12 ++---
 fs/ext4/xattr.c |  12 ++---
 fs/mbcache.c    | 141 ++++++++++++++++----------------------------------------
 4 files changed, 55 insertions(+), 122 deletions(-)

(limited to 'fs')

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 5ab87e6..8c29ae1 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -843,7 +843,7 @@ ext2_xattr_cache_insert(struct buffer_head *bh)
 	ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
 	if (!ce)
 		return -ENOMEM;
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -917,8 +917,8 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext2_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -950,7 +950,7 @@ again:
 			unlock_buffer(bh);
 			brelse(bh);
 		}
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1026,9 +1026,7 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
 int __init
 init_ext2_xattr(void)
 {
-	ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
 	if (!ext2_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 71fb8d6..e69dc6d 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1139,7 +1139,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
 		ea_bdebug(bh, "out of memory");
 		return;
 	}
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -1211,8 +1211,8 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -1237,7 +1237,7 @@ again:
 			return bh;
 		}
 		brelse(bh);
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1313,9 +1313,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 int __init
 init_ext3_xattr(void)
 {
-	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext3_xattr_cache = mb_cache_create("ext3_xattr", 6);
 	if (!ext3_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0433800..1c93198 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1418,7 +1418,7 @@ ext4_xattr_cache_insert(struct buffer_head *bh)
 		ea_bdebug(bh, "out of memory");
 		return;
 	}
-	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
 	if (error) {
 		mb_cache_entry_free(ce);
 		if (error == -EBUSY) {
@@ -1490,8 +1490,8 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext4_xattr_cache, 0,
-				       inode->i_sb->s_bdev, hash);
+	ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
+				       hash);
 	while (ce) {
 		struct buffer_head *bh;
 
@@ -1515,7 +1515,7 @@ again:
 			return bh;
 		}
 		brelse(bh);
-		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
 	}
 	return NULL;
 }
@@ -1591,9 +1591,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
 int __init
 init_ext4_xattr(void)
 {
-	ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL,
-		sizeof(struct mb_cache_entry) +
-		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
 	if (!ext4_xattr_cache)
 		return -ENOMEM;
 	return 0;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index e28f21b..8a2cbd8 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -79,15 +79,11 @@ EXPORT_SYMBOL(mb_cache_entry_find_next);
 struct mb_cache {
 	struct list_head		c_cache_list;
 	const char			*c_name;
-	struct mb_cache_op		c_op;
 	atomic_t			c_entry_count;
 	int				c_bucket_bits;
-#ifndef MB_CACHE_INDEXES_COUNT
-	int				c_indexes_count;
-#endif
-	struct kmem_cache			*c_entry_cache;
+	struct kmem_cache		*c_entry_cache;
 	struct list_head		*c_block_hash;
-	struct list_head		*c_indexes_hash[0];
+	struct list_head		*c_index_hash;
 };
 
 
@@ -101,16 +97,6 @@ static LIST_HEAD(mb_cache_list);
 static LIST_HEAD(mb_cache_lru_list);
 static DEFINE_SPINLOCK(mb_cache_spinlock);
 
-static inline int
-mb_cache_indexes(struct mb_cache *cache)
-{
-#ifdef MB_CACHE_INDEXES_COUNT
-	return MB_CACHE_INDEXES_COUNT;
-#else
-	return cache->c_indexes_count;
-#endif
-}
-
 /*
  * What the mbcache registers as to get shrunk dynamically.
  */
@@ -132,12 +118,9 @@ __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
 static void
 __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 {
-	int n;
-
 	if (__mb_cache_entry_is_hashed(ce)) {
 		list_del_init(&ce->e_block_list);
-		for (n=0; n<mb_cache_indexes(ce->e_cache); n++)
-			list_del(&ce->e_indexes[n].o_list);
+		list_del(&ce->e_index.o_list);
 	}
 }
 
@@ -148,16 +131,8 @@ __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 	struct mb_cache *cache = ce->e_cache;
 
 	mb_assert(!(ce->e_used || ce->e_queued));
-	if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) {
-		/* free failed -- put back on the lru list
-		   for freeing later. */
-		spin_lock(&mb_cache_spinlock);
-		list_add(&ce->e_lru_list, &mb_cache_lru_list);
-		spin_unlock(&mb_cache_spinlock);
-	} else {
-		kmem_cache_free(cache->c_entry_cache, ce);
-		atomic_dec(&cache->c_entry_count);
-	}
+	kmem_cache_free(cache->c_entry_cache, ce);
+	atomic_dec(&cache->c_entry_count);
 }
 
 
@@ -243,72 +218,49 @@ out:
  * memory was available.
  *
  * @name: name of the cache (informal)
- * @cache_op: contains the callback called when freeing a cache entry
- * @entry_size: The size of a cache entry, including
- *              struct mb_cache_entry
- * @indexes_count: number of additional indexes in the cache. Must equal
- *                 MB_CACHE_INDEXES_COUNT if the number of indexes is
- *                 hardwired.
  * @bucket_bits: log2(number of hash buckets)
  */
 struct mb_cache *
-mb_cache_create(const char *name, struct mb_cache_op *cache_op,
-		size_t entry_size, int indexes_count, int bucket_bits)
+mb_cache_create(const char *name, int bucket_bits)
 {
-	int m=0, n, bucket_count = 1 << bucket_bits;
+	int n, bucket_count = 1 << bucket_bits;
 	struct mb_cache *cache = NULL;
 
-	if(entry_size < sizeof(struct mb_cache_entry) +
-	   indexes_count * sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]))
-		return NULL;
-
-	cache = kmalloc(sizeof(struct mb_cache) +
-	                indexes_count * sizeof(struct list_head), GFP_KERNEL);
+	cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
 	if (!cache)
-		goto fail;
+		return NULL;
 	cache->c_name = name;
-	cache->c_op.free = NULL;
-	if (cache_op)
-		cache->c_op.free = cache_op->free;
 	atomic_set(&cache->c_entry_count, 0);
 	cache->c_bucket_bits = bucket_bits;
-#ifdef MB_CACHE_INDEXES_COUNT
-	mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT);
-#else
-	cache->c_indexes_count = indexes_count;
-#endif
 	cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head),
 	                              GFP_KERNEL);
 	if (!cache->c_block_hash)
 		goto fail;
 	for (n=0; n<bucket_count; n++)
 		INIT_LIST_HEAD(&cache->c_block_hash[n]);
-	for (m=0; m<indexes_count; m++) {
-		cache->c_indexes_hash[m] = kmalloc(bucket_count *
-		                                 sizeof(struct list_head),
-		                                 GFP_KERNEL);
-		if (!cache->c_indexes_hash[m])
-			goto fail;
-		for (n=0; n<bucket_count; n++)
-			INIT_LIST_HEAD(&cache->c_indexes_hash[m][n]);
-	}
-	cache->c_entry_cache = kmem_cache_create(name, entry_size, 0,
+	cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head),
+				      GFP_KERNEL);
+	if (!cache->c_index_hash)
+		goto fail;
+	for (n=0; n<bucket_count; n++)
+		INIT_LIST_HEAD(&cache->c_index_hash[n]);
+	cache->c_entry_cache = kmem_cache_create(name,
+		sizeof(struct mb_cache_entry), 0,
 		SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
 	if (!cache->c_entry_cache)
-		goto fail;
+		goto fail2;
 
 	spin_lock(&mb_cache_spinlock);
 	list_add(&cache->c_cache_list, &mb_cache_list);
 	spin_unlock(&mb_cache_spinlock);
 	return cache;
 
+fail2:
+	kfree(cache->c_index_hash);
+
 fail:
-	if (cache) {
-		while (--m >= 0)
-			kfree(cache->c_indexes_hash[m]);
-		kfree(cache->c_block_hash);
-		kfree(cache);
-	}
+	kfree(cache->c_block_hash);
+	kfree(cache);
 	return NULL;
 }
 
@@ -357,7 +309,6 @@ mb_cache_destroy(struct mb_cache *cache)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
-	int n;
 
 	spin_lock(&mb_cache_spinlock);
 	list_for_each_safe(l, ltmp, &mb_cache_lru_list) {
@@ -384,8 +335,7 @@ mb_cache_destroy(struct mb_cache *cache)
 
 	kmem_cache_destroy(cache->c_entry_cache);
 
-	for (n=0; n < mb_cache_indexes(cache); n++)
-		kfree(cache->c_indexes_hash[n]);
+	kfree(cache->c_index_hash);
 	kfree(cache->c_block_hash);
 	kfree(cache);
 }
@@ -429,17 +379,16 @@ mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
  *
  * @bdev: device the cache entry belongs to
  * @block: block number
- * @keys: array of additional keys. There must be indexes_count entries
- *        in the array (as specified when creating the cache).
+ * @key: lookup key
  */
 int
 mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
-		      sector_t block, unsigned int keys[])
+		      sector_t block, unsigned int key)
 {
 	struct mb_cache *cache = ce->e_cache;
 	unsigned int bucket;
 	struct list_head *l;
-	int error = -EBUSY, n;
+	int error = -EBUSY;
 
 	bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), 
 			   cache->c_bucket_bits);
@@ -454,12 +403,9 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
 	ce->e_bdev = bdev;
 	ce->e_block = block;
 	list_add(&ce->e_block_list, &cache->c_block_hash[bucket]);
-	for (n=0; n<mb_cache_indexes(cache); n++) {
-		ce->e_indexes[n].o_key = keys[n];
-		bucket = hash_long(keys[n], cache->c_bucket_bits);
-		list_add(&ce->e_indexes[n].o_list,
-			 &cache->c_indexes_hash[n][bucket]);
-	}
+	ce->e_index.o_key = key;
+	bucket = hash_long(key, cache->c_bucket_bits);
+	list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]);
 	error = 0;
 out:
 	spin_unlock(&mb_cache_spinlock);
@@ -555,13 +501,12 @@ cleanup:
 
 static struct mb_cache_entry *
 __mb_cache_entry_find(struct list_head *l, struct list_head *head,
-		      int index, struct block_device *bdev, unsigned int key)
+		      struct block_device *bdev, unsigned int key)
 {
 	while (l != head) {
 		struct mb_cache_entry *ce =
-			list_entry(l, struct mb_cache_entry,
-			           e_indexes[index].o_list);
-		if (ce->e_bdev == bdev && ce->e_indexes[index].o_key == key) {
+			list_entry(l, struct mb_cache_entry, e_index.o_list);
+		if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
 			DEFINE_WAIT(wait);
 
 			if (!list_empty(&ce->e_lru_list))
@@ -603,23 +548,20 @@ __mb_cache_entry_find(struct list_head *l, struct list_head *head,
  * returned cache entry is locked for shared access ("multiple readers").
  *
  * @cache: the cache to search
- * @index: the number of the additonal index to search (0<=index<indexes_count)
  * @bdev: the device the cache entry should belong to
  * @key: the key in the index
  */
 struct mb_cache_entry *
-mb_cache_entry_find_first(struct mb_cache *cache, int index,
-			  struct block_device *bdev, unsigned int key)
+mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
+			  unsigned int key)
 {
 	unsigned int bucket = hash_long(key, cache->c_bucket_bits);
 	struct list_head *l;
 	struct mb_cache_entry *ce;
 
-	mb_assert(index < mb_cache_indexes(cache));
 	spin_lock(&mb_cache_spinlock);
-	l = cache->c_indexes_hash[index][bucket].next;
-	ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-	                           index, bdev, key);
+	l = cache->c_index_hash[bucket].next;
+	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
 	spin_unlock(&mb_cache_spinlock);
 	return ce;
 }
@@ -640,12 +582,11 @@ mb_cache_entry_find_first(struct mb_cache *cache, int index,
  * }
  *
  * @prev: The previous match
- * @index: the number of the additonal index to search (0<=index<indexes_count)
  * @bdev: the device the cache entry should belong to
  * @key: the key in the index
  */
 struct mb_cache_entry *
-mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
+mb_cache_entry_find_next(struct mb_cache_entry *prev,
 			 struct block_device *bdev, unsigned int key)
 {
 	struct mb_cache *cache = prev->e_cache;
@@ -653,11 +594,9 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, int index,
 	struct list_head *l;
 	struct mb_cache_entry *ce;
 
-	mb_assert(index < mb_cache_indexes(cache));
 	spin_lock(&mb_cache_spinlock);
-	l = prev->e_indexes[index].o_list.next;
-	ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket],
-	                           index, bdev, key);
+	l = prev->e_index.o_list.next;
+	ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key);
 	__mb_cache_entry_release_unlock(prev);
 	return ce;
 }
-- 
cgit v1.1


From e566d48c9bd56f57e25e855a21e06ca2c2525795 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@suse.de>
Date: Wed, 21 Jul 2010 19:44:45 +0200
Subject: mbcache: fix shrinker function return value

The shrinker function is supposed to return the number of cache
entries after shrinking, not before shrinking.  Fix that.

Based on a patch from Wang Sheng-Hui <crosslonelyover@gmail.com>.

Signed-off-by: Andreas Gruenbacher <agruen@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mbcache.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/mbcache.c b/fs/mbcache.c
index 8a2cbd8..cf4e6cd 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -176,22 +176,12 @@ static int
 mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
-	struct list_head *l, *ltmp;
+	struct mb_cache *cache;
+	struct mb_cache_entry *entry, *tmp;
 	int count = 0;
 
-	spin_lock(&mb_cache_spinlock);
-	list_for_each(l, &mb_cache_list) {
-		struct mb_cache *cache =
-			list_entry(l, struct mb_cache, c_cache_list);
-		mb_debug("cache %s (%d)", cache->c_name,
-			  atomic_read(&cache->c_entry_count));
-		count += atomic_read(&cache->c_entry_count);
-	}
 	mb_debug("trying to free %d entries", nr_to_scan);
-	if (nr_to_scan == 0) {
-		spin_unlock(&mb_cache_spinlock);
-		goto out;
-	}
+	spin_lock(&mb_cache_spinlock);
 	while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) {
 		struct mb_cache_entry *ce =
 			list_entry(mb_cache_lru_list.next,
@@ -199,12 +189,15 @@ mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 		list_move_tail(&ce->e_lru_list, &free_list);
 		__mb_cache_entry_unhash(ce);
 	}
+	list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
+		mb_debug("cache %s (%d)", cache->c_name,
+			  atomic_read(&cache->c_entry_count));
+		count += atomic_read(&cache->c_entry_count);
+	}
 	spin_unlock(&mb_cache_spinlock);
-	list_for_each_safe(l, ltmp, &free_list) {
-		__mb_cache_entry_forget(list_entry(l, struct mb_cache_entry,
-						   e_lru_list), gfp_mask);
+	list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
+		__mb_cache_entry_forget(entry, gfp_mask);
 	}
-out:
 	return (count / 100) * sysctl_vfs_cache_pressure;
 }
 
-- 
cgit v1.1


From 1b9474635e21eef0f3e69fd1c7b1b9598ffdddd3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 18 Jul 2010 17:51:21 -0400
Subject: cifs: truncate fallout

Remove the calls to inode_newsize_ok given that we already did it as
part of inode_change_ok in the beginning of cifs_setattr_(no)unix.

No need to call ->truncate if cifs doesn't have one, so remove the
explicit call in cifs_vmtruncate, and replace the calls to vmtruncate
with truncate_setsize which is vmtruncate minus inode_newsize_ok
and the call to ->truncate.

Rename cifs_vmtruncate to cifs_setsize to match the new calling conventions.

Question 1:  why does cifs do the pagecache munging and i_size update twice
	for each setattr call, once opencoded in cifs_vmtruncate, and once
	using the VFS helpers?
Question 2: what is supposed to be protected by i_lock in cifs_vmtruncate?
	Do we need it around the call to inode_change_ok?

[AV: fixed build breakage]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/cifs/inode.c | 30 +++++++-----------------------
 1 file changed, 7 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b95f4a5..ddbe8a8 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1679,26 +1679,16 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 	return rc;
 }
 
-static int cifs_vmtruncate(struct inode *inode, loff_t offset)
+static void cifs_setsize(struct inode *inode, loff_t offset)
 {
 	loff_t oldsize;
-	int err;
 
 	spin_lock(&inode->i_lock);
-	err = inode_newsize_ok(inode, offset);
-	if (err) {
-		spin_unlock(&inode->i_lock);
-		goto out;
-	}
-
 	oldsize = inode->i_size;
 	i_size_write(inode, offset);
 	spin_unlock(&inode->i_lock);
+
 	truncate_pagecache(inode, oldsize, offset);
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-out:
-	return err;
 }
 
 static int
@@ -1771,7 +1761,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
 
 	if (rc == 0) {
 		cifsInode->server_eof = attrs->ia_size;
-		rc = cifs_vmtruncate(inode, attrs->ia_size);
+		cifs_setsize(inode, attrs->ia_size);
 		cifs_truncate_page(inode->i_mapping, inode->i_size);
 	}
 
@@ -1891,11 +1881,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 		goto out;
 
 	if ((attrs->ia_valid & ATTR_SIZE) &&
-	    attrs->ia_size != i_size_read(inode)) {
-		rc = vmtruncate(inode, attrs->ia_size);
-		if (rc)
-			goto out;
-	}
+	    attrs->ia_size != i_size_read(inode))
+		truncate_setsize(inode, attrs->ia_size);
 
 	setattr_copy(inode, attrs);
 	mark_inode_dirty(inode);
@@ -2050,11 +2037,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 		goto cifs_setattr_exit;
 
 	if ((attrs->ia_valid & ATTR_SIZE) &&
-	    attrs->ia_size != i_size_read(inode)) {
-		rc = vmtruncate(inode, attrs->ia_size);
-		if (rc)
-			goto cifs_setattr_exit;
-	}
+	    attrs->ia_size != i_size_read(inode))
+		truncate_setsize(inode, attrs->ia_size);
 
 	setattr_copy(inode, attrs);
 	mark_inode_dirty(inode);
-- 
cgit v1.1


From 669d5f1f608f7de29bb467bb239517a414e43777 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:14:59 +0300
Subject: AFFS: clean up dirty flag usage

In 'affs_write_super()': remove ancient and wrong commented code,
remove unneeded 'clean' variable, so the function becomes a bit
cleaner and simpler.

In 'affs_remount(): remove unnecessary SB dirty flag changes.

Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index 2c804a8..fde3b9a 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -61,20 +61,13 @@ affs_put_super(struct super_block *sb)
 static void
 affs_write_super(struct super_block *sb)
 {
-	int clean = 2;
-
 	lock_super(sb);
-	if (!(sb->s_flags & MS_RDONLY)) {
-		//	if (sbi->s_bitmap[i].bm_bh) {
-		//		if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
-		//			clean = 0;
-		affs_commit_super(sb, clean);
-		sb->s_dirt = !clean;	/* redo until bitmap synced */
-	} else
-		sb->s_dirt = 0;
+	if (!(sb->s_flags & MS_RDONLY))
+		affs_commit_super(sb, 2);
+	sb->s_dirt = 0;
 	unlock_super(sb);
 
-	pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
+	pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds());
 }
 
 static int
@@ -553,9 +546,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return 0;
 	}
 	if (*flags & MS_RDONLY) {
-		sb->s_dirt = 1;
-		while (sb->s_dirt)
-			affs_write_super(sb);
+		affs_write_super(sb);
 		affs_free_bitmap(sb);
 	} else
 		res = affs_init_bitmap(sb, flags);
-- 
cgit v1.1


From 7435d50611b04c1155a939a9f373154a53606592 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:15:00 +0300
Subject: AFFS: wait for sb synchronization when needed

AFFS does not ever wait for superblock synchronization in
->put_super(), ->write_super, and ->sync_fs().

However, it should wait for synchronization in ->put_super() because
it is about to be unmounted, in ->write_super() because this is
periodic SB synchronization performed from a separate kernel thread,
and in ->sync_fs() it should respect the 'wait' flag. This patch fixes
the situation.

Also, in ->put_super(), do not write the SB if it is not dirty.

Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/super.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/super.c b/fs/affs/super.c
index fde3b9a..33c4e7e 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -26,7 +26,7 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
 static void
-affs_commit_super(struct super_block *sb, int clean)
+affs_commit_super(struct super_block *sb, int wait, int clean)
 {
 	struct affs_sb_info *sbi = AFFS_SB(sb);
 	struct buffer_head *bh = sbi->s_root_bh;
@@ -36,6 +36,8 @@ affs_commit_super(struct super_block *sb, int clean)
 	secs_to_datestamp(get_seconds(), &tail->disk_change);
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty(bh);
+	if (wait)
+		sync_dirty_buffer(bh);
 }
 
 static void
@@ -46,8 +48,8 @@ affs_put_super(struct super_block *sb)
 
 	lock_kernel();
 
-	if (!(sb->s_flags & MS_RDONLY))
-		affs_commit_super(sb, 1);
+	if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt)
+		affs_commit_super(sb, 1, 1);
 
 	kfree(sbi->s_prefix);
 	affs_free_bitmap(sb);
@@ -63,7 +65,7 @@ affs_write_super(struct super_block *sb)
 {
 	lock_super(sb);
 	if (!(sb->s_flags & MS_RDONLY))
-		affs_commit_super(sb, 2);
+		affs_commit_super(sb, 1, 2);
 	sb->s_dirt = 0;
 	unlock_super(sb);
 
@@ -74,7 +76,7 @@ static int
 affs_sync_fs(struct super_block *sb, int wait)
 {
 	lock_super(sb);
-	affs_commit_super(sb, 2);
+	affs_commit_super(sb, wait, 2);
 	sb->s_dirt = 0;
 	unlock_super(sb);
 	return 0;
-- 
cgit v1.1


From 4e29d50a28c267bd1d1731a9fb8f773663d93e23 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:15:01 +0300
Subject: BFS: clean up the superblock usage

BFS is a very simple FS and its superblocks contains only static
information and is never changed. However, the BFS code for some
misterious reasons marked its buffer head as dirty from time to
time, but nothing in that buffer was ever changed.

This patch removes all the BFS superblock manipulation, simply
because it is not needed. It removes:

1. The si_sbh filed from 'struct bfs_sb_info' because it is not
   needed. We only need to read the SB once on mount to get the
   start of data blocks and the FS size. After this, we can forget
   about the SB.
2. All instances of 'mark_buffer_dirty(sbh)' for BFS SB because
   it is never changed.
3. The '->sync_fs()' method because there is nothing to sync
   (inodes are synched by VFS).
4. The '->write_super()' method, again, because the SB is never
   changed.

Tested-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/bfs/bfs.h   |  1 -
 fs/bfs/file.c  |  3 ---
 fs/bfs/inode.c | 46 +++++++---------------------------------------
 3 files changed, 7 insertions(+), 43 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 7109e45..f7f87e2 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -17,7 +17,6 @@ struct bfs_sb_info {
 	unsigned long si_lf_eblk;
 	unsigned long si_lasti;
 	unsigned long *si_imap;
-	struct buffer_head *si_sbh;		/* buffer header w/superblock */
 	struct mutex bfs_lock;
 };
 
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 8fc2e9c..eb67edd 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -70,7 +70,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 	struct super_block *sb = inode->i_sb;
 	struct bfs_sb_info *info = BFS_SB(sb);
 	struct bfs_inode_info *bi = BFS_I(inode);
-	struct buffer_head *sbh = info->si_sbh;
 
 	phys = bi->i_sblock + block;
 	if (!create) {
@@ -112,7 +111,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 		info->si_freeb -= phys - bi->i_eblock;
 		info->si_lf_eblk = bi->i_eblock = phys;
 		mark_inode_dirty(inode);
-		mark_buffer_dirty(sbh);
 		err = 0;
 		goto out;
 	}
@@ -147,7 +145,6 @@ static int bfs_get_block(struct inode *inode, sector_t block,
 	 */
 	info->si_freeb -= bi->i_eblock - bi->i_sblock + 1 - inode->i_blocks;
 	mark_inode_dirty(inode);
-	mark_buffer_dirty(sbh);
 	map_bh(bh_result, sb, phys);
 out:
 	mutex_unlock(&info->bfs_lock);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 0499822..c4daf0f 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -31,7 +31,6 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
-static void bfs_write_super(struct super_block *s);
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -204,33 +203,11 @@ static void bfs_evict_inode(struct inode *inode)
 	 * "last block of the last file" even if there is no
 	 * real file there, saves us 1 gap.
 	 */
-	if (info->si_lf_eblk == bi->i_eblock) {
+	if (info->si_lf_eblk == bi->i_eblock)
 		info->si_lf_eblk = bi->i_sblock - 1;
-		mark_buffer_dirty(info->si_sbh);
-	}
 	mutex_unlock(&info->bfs_lock);
 }
 
-static int bfs_sync_fs(struct super_block *sb, int wait)
-{
-	struct bfs_sb_info *info = BFS_SB(sb);
-
-	mutex_lock(&info->bfs_lock);
-	mark_buffer_dirty(info->si_sbh);
-	sb->s_dirt = 0;
-	mutex_unlock(&info->bfs_lock);
-
-	return 0;
-}
-
-static void bfs_write_super(struct super_block *sb)
-{
-	if (!(sb->s_flags & MS_RDONLY))
-		bfs_sync_fs(sb, 1);
-	else
-		sb->s_dirt = 0;
-}
-
 static void bfs_put_super(struct super_block *s)
 {
 	struct bfs_sb_info *info = BFS_SB(s);
@@ -240,10 +217,6 @@ static void bfs_put_super(struct super_block *s)
 
 	lock_kernel();
 
-	if (s->s_dirt)
-		bfs_write_super(s);
-
-	brelse(info->si_sbh);
 	mutex_destroy(&info->bfs_lock);
 	kfree(info->si_imap);
 	kfree(info);
@@ -315,8 +288,6 @@ static const struct super_operations bfs_sops = {
 	.write_inode	= bfs_write_inode,
 	.evict_inode	= bfs_evict_inode,
 	.put_super	= bfs_put_super,
-	.write_super	= bfs_write_super,
-	.sync_fs	= bfs_sync_fs,
 	.statfs		= bfs_statfs,
 };
 
@@ -343,7 +314,7 @@ void dump_imap(const char *prefix, struct super_block *s)
 
 static int bfs_fill_super(struct super_block *s, void *data, int silent)
 {
-	struct buffer_head *bh;
+	struct buffer_head *bh, *sbh;
 	struct bfs_super_block *bfs_sb;
 	struct inode *inode;
 	unsigned i, imap_len;
@@ -359,10 +330,10 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 
 	sb_set_blocksize(s, BFS_BSIZE);
 
-	info->si_sbh = sb_bread(s, 0);
-	if (!info->si_sbh)
+	sbh = sb_bread(s, 0);
+	if (!sbh)
 		goto out;
-	bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
+	bfs_sb = (struct bfs_super_block *)sbh->b_data;
 	if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
 		if (!silent)
 			printf("No BFS filesystem on %s (magic=%08x)\n", 
@@ -466,10 +437,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 			info->si_lf_eblk = eblock;
 	}
 	brelse(bh);
-	if (!(s->s_flags & MS_RDONLY)) {
-		mark_buffer_dirty(info->si_sbh);
-		s->s_dirt = 1;
-	} 
+	brelse(sbh);
 	dump_imap("read_super", s);
 	return 0;
 
@@ -479,7 +447,7 @@ out3:
 out2:
 	kfree(info->si_imap);
 out1:
-	brelse(info->si_sbh);
+	brelse(sbh);
 out:
 	mutex_destroy(&info->bfs_lock);
 	kfree(info);
-- 
cgit v1.1


From 696ac96c2757963cd6751c26215e3c6d328705aa Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:15:02 +0300
Subject: btrfs: remove junk sb_dirt change

BTRFS does not define a '->write_super()' method, so it should
not mark its superblock as dirty. This looks like some left-over.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Acked-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/inode.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c54f04..8976c33 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2938,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
 	ret = btrfs_update_inode(trans, root, dir);
 	BUG_ON(ret);
-	dir->i_sb->s_dirt = 1;
 
 	btrfs_free_path(path);
 	return 0;
-- 
cgit v1.1


From 315671f3b8091bc8e15035ffeba5f7bff7b8edec Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:15:03 +0300
Subject: sysv: do not mark superblock dirty on mount

I did not find any docs about this file-system, and I have no possibility
to test my changes. Thus, this is untested.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 5a903da..0e44a62 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -347,7 +347,6 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
 		sb->s_flags |= MS_RDONLY;
 	if (sbi->s_truncate)
 		sb->s_root->d_op = &sysv_dentry_operations;
-	sb->s_dirt = 1;
 	return 1;
 }
 
-- 
cgit v1.1


From 719f2c879f4dda7d7f303bd387d37cd96db29d31 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 5 Jul 2010 15:15:04 +0300
Subject: sysv: do not mark superblock dirty on remount

No need to mark the superblock as dirty in sysv_remount, synchronize
it instead (only if mounting R/O).

I did not find any docs about this file-system, and I have no possibility
to test my changes. Thus, this is untested. I see other issues in sysv,
e.g., why sysv_sync_fs writes only in the FSTYPE_SYSV4 case? However,
it marks its SB bh's dirty for all types, and does not wait for them
ever. With zero docs I'm unable to fix this.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 613a505..de44d06 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -71,8 +71,8 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data)
 	lock_super(sb);
 	if (sbi->s_forced_ro)
 		*flags |= MS_RDONLY;
-	if (!(*flags & MS_RDONLY))
-		sb->s_dirt = 1;
+	if (*flags & MS_RDONLY)
+		sysv_write_super(sb);
 	unlock_super(sb);
 	return 0;
 }
-- 
cgit v1.1


From 4f331f01b9c43bf001d3ffee578a97a1e0633eac Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 20 Jul 2010 15:18:07 -0700
Subject: vfs: don't hold s_umount over close_bdev_exclusive() call

Fix an obscure AB-BA deadlock in get_sb_bdev().

When a superblock is mounted more than once get_sb_bdev() calls
close_bdev_exclusive() to drop the extra bdev reference while holding
s_umount.  However, sb->s_umount nests inside bd_mutex during
__invalidate_device() and close_bdev_exclusive() acquires bd_mutex during
blkdev_put(); thus creating an AB-BA deadlock.

This condition doesn't trigger frequently.  For this condition to be
visible to lockdep, the filesystem must occupy the whole device (as
__invalidate_device() only grabs bd_mutex for the whole device), the FS
must be mounted more than once and partition rescan should be issued while
the FS is still mounted.

Fix it by dropping s_umount over close_bdev_exclusive().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Ciprian Docan <docan@eden.rutgers.edu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/super.c b/fs/super.c
index 938119a..3479ca6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -773,7 +773,16 @@ int get_sb_bdev(struct file_system_type *fs_type,
 			goto error_bdev;
 		}
 
+		/*
+		 * s_umount nests inside bd_mutex during
+		 * __invalidate_device().  close_bdev_exclusive()
+		 * acquires bd_mutex and can't be called under
+		 * s_umount.  Drop s_umount temporarily.  This is safe
+		 * as we're holding an active reference.
+		 */
+		up_write(&s->s_umount);
 		close_bdev_exclusive(bdev, mode);
+		down_write(&s->s_umount);
 	} else {
 		char b[BDEVNAME_SIZE];
 
-- 
cgit v1.1


From 7a4dec53897ecd3367efb1e12fe8a4edc47dc0e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 9 Aug 2010 12:05:43 -0400
Subject: Fix sget() race with failing mount

If sget() finds a matching superblock being set up, it'll
grab an active reference to it and grab s_umount.  That's
fine - we'll wait for completion of foofs_get_sb() that way.
However, if said foofs_get_sb() fails we'll end up holding
the halfway-created superblock.  deactivate_locked_super()
called by foofs_get_sb() will just unlock the sucker since
we are holding another active reference to it.

What we need is a way to tell if superblock has been successfully
set up.  Unfortunately, neither ->s_root nor the check for
MS_ACTIVE quite fit.  Cheap and easy way, suitable for backport:
new flag set by the (only) caller of ->get_sb().  If that flag
isn't present by the time sget() grabbed s_umount on preexisting
superblock it has found, it's seeing a stillborn and should
just bury it with deactivate_locked_super() (and repeat the search).

Longer term we want to set that flag in ->get_sb() instances (and
check for it to distinguish between "sget() found us a live sb"
and "sget() has allocated an sb, we need to set it up" in there,
instead of checking ->s_root as we do now).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org
---
 fs/namespace.c | 2 +-
 fs/super.c     | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 88058de..32dcd24 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1984,7 +1984,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
 	if (flags & MS_RDONLY)
 		mnt_flags |= MNT_READONLY;
 
-	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
+	flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
 		   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
 		   MS_STRICTATIME);
 
diff --git a/fs/super.c b/fs/super.c
index 3479ca6..bd9eea4 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -305,8 +305,13 @@ retry:
 			if (s) {
 				up_write(&s->s_umount);
 				destroy_super(s);
+				s = NULL;
 			}
 			down_write(&old->s_umount);
+			if (unlikely(!(old->s_flags & MS_BORN))) {
+				deactivate_locked_super(old);
+				goto retry;
+			}
 			return old;
 		}
 	}
@@ -918,6 +923,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 		goto out_free_secdata;
 	BUG_ON(!mnt->mnt_sb);
 	WARN_ON(!mnt->mnt_sb->s_bdi);
+	mnt->mnt_sb->s_flags |= MS_BORN;
 
 	error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata);
 	if (error)
-- 
cgit v1.1


From dca332528bc69e05f67161e1ed59929633d5e63d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Jul 2010 02:31:46 +0400
Subject: no need for list_for_each_entry_safe()/resetting with superblock list

just delay __put_super() a bit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c | 12 +++++++-----
 fs/super.c  | 36 +++++++++++++++++++++---------------
 2 files changed, 28 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index caf0857..9f2c134 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -536,7 +536,7 @@ restart:
  */
 static void prune_dcache(int count)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 	int w_count;
 	int unused = dentry_stat.nr_unused;
 	int prune_ratio;
@@ -550,7 +550,7 @@ static void prune_dcache(int count)
 	else
 		prune_ratio = unused / count;
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_nr_dentry_unused == 0)
@@ -590,14 +590,16 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
+		if (p)
+			__put_super(p);
 		count -= pruned;
-		__put_super(sb);
+		p = sb;
 		/* more work left to do? */
 		if (count <= 0)
 			break;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 	spin_unlock(&dcache_lock);
 }
diff --git a/fs/super.c b/fs/super.c
index bd9eea4..9674ab2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -363,10 +363,10 @@ EXPORT_SYMBOL(drop_super);
  */
 void sync_supers(void)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		if (sb->s_op->write_super && sb->s_dirt) {
@@ -379,11 +379,13 @@ void sync_supers(void)
 			up_read(&sb->s_umount);
 
 			spin_lock(&sb_lock);
-			/* lock was dropped, must reset next */
-			list_safe_reset_next(sb, n, s_list);
-			__put_super(sb);
+			if (p)
+				__put_super(p);
+			p = sb;
 		}
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 
@@ -397,10 +399,10 @@ void sync_supers(void)
  */
 void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -412,10 +414,12 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 		up_read(&sb->s_umount);
 
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
-		__put_super(sb);
+		if (p)
+			__put_super(p);
+		p = sb;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 }
 
@@ -577,10 +581,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 
 static void do_emergency_remount(struct work_struct *work)
 {
-	struct super_block *sb, *n;
+	struct super_block *sb, *p = NULL;
 
 	spin_lock(&sb_lock);
-	list_for_each_entry_safe(sb, n, &super_blocks, s_list) {
+	list_for_each_entry(sb, &super_blocks, s_list) {
 		if (list_empty(&sb->s_instances))
 			continue;
 		sb->s_count++;
@@ -594,10 +598,12 @@ static void do_emergency_remount(struct work_struct *work)
 		}
 		up_write(&sb->s_umount);
 		spin_lock(&sb_lock);
-		/* lock was dropped, must reset next */
-		list_safe_reset_next(sb, n, s_list);
-		__put_super(sb);
+		if (p)
+			__put_super(p);
+		p = sb;
 	}
+	if (p)
+		__put_super(p);
 	spin_unlock(&sb_lock);
 	kfree(work);
 	printk("Emergency Remount complete\n");
-- 
cgit v1.1


From 26df6d13406d1a53b0bda08bd712f1924affd7cd Mon Sep 17 00:00:00 2001
From: "hyc@symas.com" <hyc@symas.com>
Date: Tue, 22 Jun 2010 10:14:49 -0700
Subject: tty: Add EXTPROC support for LINEMODE

This patch is against the 2.6.34 source.

Paraphrased from the 1989 BSD patch by David Borman @ cray.com:

     These are the changes needed for the kernel to support
     LINEMODE in the server.

     There is a new bit in the termios local flag word, EXTPROC.
     When this bit is set, several aspects of the terminal driver
     are disabled.  Input line editing, character echo, and mapping
     of signals are all disabled.  This allows the telnetd to turn
     off these functions when in linemode, but still keep track of
     what state the user wants the terminal to be in.

     New ioctl:
         TIOCSIG         Generate a signal to processes in the
                         current process group of the pty.

     There is a new mode for packet driver, the TIOCPKT_IOCTL bit.
     When packet mode is turned on in the pty, and the EXTPROC bit
     is set, then whenever the state of the pty is changed, the
     next read on the master side of the pty will have the TIOCPKT_IOCTL
     bit set.  This allows the process on the server side of the pty
     to know when the state of the terminal has changed; it can then
     issue the appropriate ioctl to retrieve the new state.

Since the original BSD patches accompanied the source code for telnet
I've left that reference here, but obviously the feature is useful for
any remote terminal protocol, including ssh.

The corresponding feature has existed in the BSD tty driver since 1989.
For historical reference, a good copy of the relevant files can be found
here:

http://anonsvn.mit.edu/viewvc/krb5/trunk/src/appl/telnet/?pathrev=17741

Signed-off-by: Howard Chu <hyc@symas.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/compat_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 63ae858..fa4bc48 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -969,6 +969,7 @@ COMPATIBLE_IOCTL(TIOCGPGRP)
 COMPATIBLE_IOCTL(TIOCGPTN)
 COMPATIBLE_IOCTL(TIOCSPTLCK)
 COMPATIBLE_IOCTL(TIOCSERGETLSR)
+COMPATIBLE_IOCTL(TIOCSIG)
 #ifdef TCGETS2
 COMPATIBLE_IOCTL(TCGETS2)
 COMPATIBLE_IOCTL(TCSETS2)
-- 
cgit v1.1


From aca0fa34bdaba39bfddddba8ca70dba4782e8fe6 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Date: Mon, 9 Aug 2010 15:57:38 -0500
Subject: jfs: don't allow os2 xattr namespace overlap with others

It's currently possible to bypass xattr namespace access rules by
prefixing valid xattr names with "os2.", since the os2 namespace stores
extended attributes in a legacy format with no prefix.

This patch adds checking to deny access to any valid namespace prefix
following "os2.".

Signed-off-by: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Reported-by: Sergey Vlasov <vsu@altlinux.ru>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jfs/xattr.c | 87 +++++++++++++++++++++++++---------------------------------
 1 file changed, 38 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index fa96bbb..2d7f165 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -86,46 +86,25 @@ struct ea_buffer {
 #define EA_MALLOC	0x0008
 
 
+static int is_known_namespace(const char *name)
+{
+	if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
+	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+	    strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+		return false;
+
+	return true;
+}
+
 /*
  * These three routines are used to recognize on-disk extended attributes
  * that are in a recognized namespace.  If the attribute is not recognized,
  * "os2." is prepended to the name
  */
-static inline int is_os2_xattr(struct jfs_ea *ea)
+static int is_os2_xattr(struct jfs_ea *ea)
 {
-	/*
-	 * Check for "system."
-	 */
-	if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "user."
-	 */
-	if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "security."
-	 */
-	if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_SECURITY_PREFIX,
-		     XATTR_SECURITY_PREFIX_LEN))
-		return false;
-	/*
-	 * Check for "trusted."
-	 */
-	if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) &&
-	    !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
-		return false;
-	/*
-	 * Add any other valid namespace prefixes here
-	 */
-
-	/*
-	 * We assume it's OS/2's flat namespace
-	 */
-	return true;
+	return !is_known_namespace(ea->name);
 }
 
 static inline int name_size(struct jfs_ea *ea)
@@ -764,13 +743,23 @@ static int can_set_xattr(struct inode *inode, const char *name,
 	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
 		return can_set_system_xattr(inode, name, value, value_len);
 
+	if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) {
+		/*
+		 * This makes sure that we aren't trying to set an
+		 * attribute in a different namespace by prefixing it
+		 * with "os2."
+		 */
+		if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN))
+				return -EOPNOTSUPP;
+		return 0;
+	}
+
 	/*
 	 * Don't allow setting an attribute in an unknown namespace.
 	 */
 	if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
 	    strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
-	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
-	    strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
+	    strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
 		return -EOPNOTSUPP;
 
 	return 0;
@@ -952,19 +941,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
 	int xattr_size;
 	ssize_t size;
 	int namelen = strlen(name);
-	char *os2name = NULL;
 	char *value;
 
-	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
-		os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
-				  GFP_KERNEL);
-		if (!os2name)
-			return -ENOMEM;
-		strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
-		name = os2name;
-		namelen -= XATTR_OS2_PREFIX_LEN;
-	}
-
 	down_read(&JFS_IP(inode)->xattr_sem);
 
 	xattr_size = ea_get(inode, &ea_buf, 0);
@@ -1002,8 +980,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
       out:
 	up_read(&JFS_IP(inode)->xattr_sem);
 
-	kfree(os2name);
-
 	return size;
 }
 
@@ -1012,6 +988,19 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
 {
 	int err;
 
+	if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
+		/*
+		 * skip past "os2." prefix
+		 */
+		name += XATTR_OS2_PREFIX_LEN;
+		/*
+		 * Don't allow retrieving properly prefixed attributes
+		 * by prepending them with "os2."
+		 */
+		if (is_known_namespace(name))
+			return -EOPNOTSUPP;
+	}
+
 	err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
 
 	return err;
-- 
cgit v1.1