From ac45d61357e86b9a0cf14e45e8e09dfb626970ef Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 5 Mar 2012 15:48:11 +0100
Subject: fuse: fix nlink after unlink

Anand Avati reports that the following sequence of system calls fail on a fuse
filesystem:


 	create("filename") => 0
 	link("filename", "linkname") => 0
 	unlink("filename") => 0
 	link("linkname", "filename") => -ENOENT ### BUG ###

vfs_link() fails with ENOENT if i_nlink is zero, this is done to prevent
resurrecting already deleted files.

Fuse clears i_nlink on unlink even if there are other links pointing to the
file.

Reported-by: Anand Avati <avati@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2066328..da37907 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -644,13 +644,12 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 	fuse_put_request(fc, req);
 	if (!err) {
 		struct inode *inode = entry->d_inode;
+		struct fuse_inode *fi = get_fuse_inode(inode);
 
-		/*
-		 * Set nlink to zero so the inode can be cleared, if the inode
-		 * does have more links this will be discovered at the next
-		 * lookup/getattr.
-		 */
-		clear_nlink(inode);
+		spin_lock(&fc->lock);
+		fi->attr_version = ++fc->attr_version;
+		drop_nlink(inode);
+		spin_unlock(&fc->lock);
 		fuse_invalidate_attr(inode);
 		fuse_invalidate_attr(dir);
 		fuse_invalidate_entry_cache(entry);
@@ -762,8 +761,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 	   will reflect changes in the backing inode (link count,
 	   etc.)
 	*/
-	if (!err || err == -EINTR)
+	if (!err) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		spin_lock(&fc->lock);
+		fi->attr_version = ++fc->attr_version;
+		inc_nlink(inode);
+		spin_unlock(&fc->lock);
 		fuse_invalidate_attr(inode);
+	} else if (err == -EINTR) {
+		fuse_invalidate_attr(inode);
+	}
 	return err;
 }
 
-- 
cgit v1.1


From 4273b793ec68753cc3fcf5be7cbfd88c2be2058d Mon Sep 17 00:00:00 2001
From: Anand Avati <avati@redhat.com>
Date: Fri, 17 Feb 2012 12:46:25 -0500
Subject: fuse: O_DIRECT support for files

Implement ->direct_IO() method in aops. The ->direct_IO() method combines
the existing fuse_direct_read/fuse_direct_write methods to implement
O_DIRECT functionality.

Reaching ->direct_IO() in the read path via generic_file_aio_read ensures
proper synchronization with page cache with its existing framework.

Reaching ->direct_IO() in the write path via fuse_file_aio_write is made
to come via generic_file_direct_write() which makes it play nice with
the page cache w.r.t other mmap pages etc.

On files marked 'direct_io' by the filesystem server, IO always follows
the fuse_direct_read/write path. There is no effect of fcntl(O_DIRECT)
and it always succeeds.

On files not marked with 'direct_io' by the filesystem server, the IO
path depends on O_DIRECT flag by the application. This can be passed
at the time of open() as well as via fcntl().

Note that asynchronous O_DIRECT iocb jobs are completed synchronously
always (this has been the case with FUSE even before this patch)

Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c  |   3 --
 fs/fuse/file.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 112 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index da37907..df5ac04 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	if (fc->no_create)
 		return -ENOSYS;
 
-	if (flags & O_DIRECT)
-		return -EINVAL;
-
 	forget = fuse_alloc_forget();
 	if (!forget)
 		return -ENOMEM;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 4a199fd..8ca007d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-	/* VFS checks this, but only _after_ ->open() */
-	if (file->f_flags & O_DIRECT)
-		return -EINVAL;
-
 	err = generic_file_open(inode, file);
 	if (err)
 		return err;
@@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	size_t count = 0;
+	size_t ocount = 0;
 	ssize_t written = 0;
+	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
 	ssize_t err;
 	struct iov_iter i;
+	loff_t endbyte = 0;
 
 	WARN_ON(iocb->ki_pos != pos);
 
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+	ocount = 0;
+	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
 	if (err)
 		return err;
 
+	count = ocount;
+
 	mutex_lock(&inode->i_mutex);
 	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
 
@@ -962,11 +964,41 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	file_update_time(file);
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
-	written = fuse_perform_write(file, mapping, &i, pos);
-	if (written >= 0)
-		iocb->ki_pos = pos + written;
+	if (file->f_flags & O_DIRECT) {
+		written = generic_file_direct_write(iocb, iov, &nr_segs,
+						    pos, &iocb->ki_pos,
+						    count, ocount);
+		if (written < 0 || written == count)
+			goto out;
+
+		pos += written;
+		count -= written;
 
+		iov_iter_init(&i, iov, nr_segs, count, written);
+		written_buffered = fuse_perform_write(file, mapping, &i, pos);
+		if (written_buffered < 0) {
+			err = written_buffered;
+			goto out;
+		}
+		endbyte = pos + written_buffered - 1;
+
+		err = filemap_write_and_wait_range(file->f_mapping, pos,
+						   endbyte);
+		if (err)
+			goto out;
+
+		invalidate_mapping_pages(file->f_mapping,
+					 pos >> PAGE_CACHE_SHIFT,
+					 endbyte >> PAGE_CACHE_SHIFT);
+
+		written += written_buffered;
+		iocb->ki_pos = pos + written_buffered;
+	} else {
+		iov_iter_init(&i, iov, nr_segs, count, 0);
+		written = fuse_perform_write(file, mapping, &i, pos);
+		if (written >= 0)
+			iocb->ki_pos = pos + written;
+	}
 out:
 	current->backing_dev_info = NULL;
 	mutex_unlock(&inode->i_mutex);
@@ -1101,30 +1133,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf,
 	return res;
 }
 
-static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
-				 size_t count, loff_t *ppos)
+static ssize_t __fuse_direct_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *ppos)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	ssize_t res;
 
-	if (is_bad_inode(inode))
-		return -EIO;
-
-	/* Don't allow parallel writes to the same file */
-	mutex_lock(&inode->i_mutex);
 	res = generic_write_checks(file, ppos, &count, 0);
 	if (!res) {
 		res = fuse_direct_io(file, buf, count, ppos, 1);
 		if (res > 0)
 			fuse_write_update_size(inode, *ppos);
 	}
-	mutex_unlock(&inode->i_mutex);
 
 	fuse_invalidate_attr(inode);
 
 	return res;
 }
 
+static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	ssize_t res;
+
+	if (is_bad_inode(inode))
+		return -EIO;
+
+	/* Don't allow parallel writes to the same file */
+	mutex_lock(&inode->i_mutex);
+	res = __fuse_direct_write(file, buf, count, ppos);
+	mutex_unlock(&inode->i_mutex);
+
+	return res;
+}
+
 static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
 {
 	__free_page(req->pages[0]);
@@ -2077,6 +2120,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc,
 	return 0;
 }
 
+static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov,
+			     unsigned long nr_segs, loff_t *ppos, int rw)
+{
+	const struct iovec *vector = iov;
+	ssize_t ret = 0;
+
+	while (nr_segs > 0) {
+		void __user *base;
+		size_t len;
+		ssize_t nr;
+
+		base = vector->iov_base;
+		len = vector->iov_len;
+		vector++;
+		nr_segs--;
+
+		if (rw == WRITE)
+			nr = __fuse_direct_write(filp, base, len, ppos);
+		else
+			nr = fuse_direct_read(filp, base, len, ppos);
+
+		if (nr < 0) {
+			if (!ret)
+				ret = nr;
+			break;
+		}
+		ret += nr;
+		if (nr != len)
+			break;
+	}
+
+	return ret;
+}
+
+
+static ssize_t
+fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+			loff_t offset, unsigned long nr_segs)
+{
+	ssize_t ret = 0;
+	struct file *file = NULL;
+	loff_t pos = 0;
+
+	file = iocb->ki_filp;
+	pos = offset;
+
+	ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw);
+
+	return ret;
+}
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
 	.read		= do_sync_read,
@@ -2120,6 +2214,7 @@ static const struct address_space_operations fuse_file_aops  = {
 	.readpages	= fuse_readpages,
 	.set_page_dirty	= __set_page_dirty_nobuffers,
 	.bmap		= fuse_bmap,
+	.direct_IO	= fuse_direct_IO,
 };
 
 void fuse_init_file_inode(struct inode *inode)
-- 
cgit v1.1


From cb85a6ed67e979c59a29b7b4e8217e755b951cf4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 30 Mar 2012 12:23:08 +0200
Subject: proc: stats: Use arch_idle_time for idle and iowait times if
 available

Git commit a25cac5198d4ff28 "proc: Consider NO_HZ when printing idle and
iowait times" changes the code for /proc/stat to use get_cpu_idle_time_us
and get_cpu_iowait_time_us if the system is running with nohz enabled.
For architectures which define arch_idle_time (currently s390 only)
this is a change for the worse. The result of arch_idle_time is supposed
to be the exact sleep time of the target cpu and should be used instead
of the value kept by the scheduler.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20120330122308.18720283@de.ibm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/stat.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 6a0c62d..64c3b31 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,19 +18,39 @@
 #ifndef arch_irq_stat
 #define arch_irq_stat() 0
 #endif
-#ifndef arch_idle_time
-#define arch_idle_time(cpu) 0
-#endif
+
+#ifdef arch_idle_time
+
+static cputime64_t get_idle_time(int cpu)
+{
+	cputime64_t idle;
+
+	idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
+	if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
+		idle += arch_idle_time(cpu);
+	return idle;
+}
+
+static cputime64_t get_iowait_time(int cpu)
+{
+	cputime64_t iowait;
+
+	iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
+	if (cpu_online(cpu) && nr_iowait_cpu(cpu))
+		iowait += arch_idle_time(cpu);
+	return iowait;
+}
+
+#else
 
 static u64 get_idle_time(int cpu)
 {
 	u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
 
-	if (idle_time == -1ULL) {
+	if (idle_time == -1ULL)
 		/* !NO_HZ so we can rely on cpustat.idle */
 		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
-		idle += arch_idle_time(cpu);
-	} else
+	else
 		idle = usecs_to_cputime64(idle_time);
 
 	return idle;
@@ -49,6 +69,8 @@ static u64 get_iowait_time(int cpu)
 	return iowait;
 }
 
+#endif
+
 static int show_stat(struct seq_file *p, void *v)
 {
 	int i, j;
-- 
cgit v1.1


From 70fa4a62e913dde2d100e0be2711562742f58bee Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Wed, 4 Apr 2012 12:06:20 -0700
Subject: sysfs: Update the name hash for an entry after changing the namespace

This is needed to allow renaming network devices that have been moved
to another network namespace.

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 2a7a3f5..8ddc102 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -878,7 +878,6 @@ int sysfs_rename(struct sysfs_dirent *sd,
 
 		dup_name = sd->s_name;
 		sd->s_name = new_name;
-		sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	}
 
 	/* Move to the appropriate place in the appropriate directories rbtree. */
@@ -886,6 +885,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 	sysfs_get(new_parent_sd);
 	sysfs_put(sd->s_parent);
 	sd->s_ns = new_ns;
+	sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name);
 	sd->s_parent = new_parent_sd;
 	sysfs_link_sibling(sd);
 
-- 
cgit v1.1


From 5631f2c18f4b2845b3e97df1c659c5094a17605f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bruno=20Pr=C3=A9mont?= <bonbons@linux-vserver.org>
Date: Tue, 3 Apr 2012 09:59:48 +0200
Subject: sysfs: Prevent crash on unset sysfs group attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do not let the kernel crash when a device is registered with
sysfs while group attributes are not set (aka NULL).

Warn about the offender with some information about the offending
device.

This would warn instead of trying NULL pointer deref like:
 BUG: unable to handle kernel NULL pointer dereference at (null)
 IP: [<ffffffff81152673>] internal_create_group+0x83/0x1a0
 PGD 0
 Oops: 0000 [#1] SMP
 CPU 0
 Modules linked in:

 Pid: 1, comm: swapper/0 Not tainted 3.4.0-rc1-x86_64 #3 HP ProLiant DL360 G4
 RIP: 0010:[<ffffffff81152673>]  [<ffffffff81152673>] internal_create_group+0x83/0x1a0
 RSP: 0018:ffff88019485fd70  EFLAGS: 00010202
 RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000001
 RDX: ffff880192e99908 RSI: ffff880192e99630 RDI: ffffffff81a26c60
 RBP: ffff88019485fdc0 R08: 0000000000000000 R09: 0000000000000000
 R10: ffff880192e99908 R11: 0000000000000000 R12: ffffffff81a16a00
 R13: ffff880192e99908 R14: ffffffff81a16900 R15: 0000000000000000
 FS:  0000000000000000(0000) GS:ffff88019bc00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: 0000000000000000 CR3: 0000000001a0c000 CR4: 00000000000007f0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
 Process swapper/0 (pid: 1, threadinfo ffff88019485e000, task ffff880194878000)
 Stack:
  ffff88019485fdd0 ffff880192da9d60 0000000000000000 ffff880192e99908
  ffff880192e995d8 0000000000000001 ffffffff81a16a00 ffff880192da9d60
  0000000000000000 0000000000000000 ffff88019485fdd0 ffffffff811527be
 Call Trace:
  [<ffffffff811527be>] sysfs_create_group+0xe/0x10
  [<ffffffff81376ca6>] device_add_groups+0x46/0x80
  [<ffffffff81377d3d>] device_add+0x46d/0x6a0
  ...

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/group.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index dd1701c..2df555c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -67,7 +67,11 @@ static int internal_create_group(struct kobject *kobj, int update,
 	/* Updates may happen before the object has been instantiated */
 	if (unlikely(update && !kobj->sd))
 		return -EINVAL;
-
+	if (!grp->attrs) {
+		WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n",
+			kobj->name, grp->name ? "" : grp->name);
+		return -EINVAL;
+	}
 	if (grp->name) {
 		error = sysfs_create_subdir(kobj, grp->name, &sd);
 		if (error)
-- 
cgit v1.1


From 3a198886ab5f228fcbebb9ace803d8b99721d49a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 6 Apr 2012 13:41:06 -0700
Subject: sysfs: handle 'parent deleted before child added'

In scsi at least two cases of the parent device being deleted before the
child is added have been observed.

1/ scsi is performing async scans and the device is removed prior to the
   async can thread running (can happen with an in-opportune / unlikely
   unplug during initial scan).

2/ libsas discovery event running after the parent port has been torn
   down (this is a bug in libsas).

Result in crash signatures like:
 BUG: unable to handle kernel NULL pointer dereference at 0000000000000098
 IP: [<ffffffff8115e100>] sysfs_create_dir+0x32/0xb6
 ...
 Process scsi_scan_8 (pid: 5417, threadinfo ffff88080bd16000, task ffff880801b8a0b0)
 Stack:
  00000000fffffffe ffff880813470628 ffff88080bd17cd0 ffff88080614b7e8
  ffff88080b45c108 00000000fffffffe ffff88080bd17d20 ffffffff8125e4a8
  ffff88080bd17cf0 ffffffff81075149 ffff88080bd17d30 ffff88080614b7e8
 Call Trace:
  [<ffffffff8125e4a8>] kobject_add_internal+0x120/0x1e3
  [<ffffffff81075149>] ? trace_hardirqs_on+0xd/0xf
  [<ffffffff8125e641>] kobject_add_varg+0x41/0x50
  [<ffffffff8125e70b>] kobject_add+0x64/0x66
  [<ffffffff8131122b>] device_add+0x12d/0x63a

In this scenario the parent is still valid (because we have a
reference), but it has been device_del()'d which means its kobj->sd
pointer is NULL'd via:

 device_del()->kobject_del()->sysfs_remove_dir()

...and then sysfs_create_dir() (without this fix) goes ahead and
de-references parent_sd via sysfs_ns_type():

 return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;

This scenario is being fixed in scsi/libsas, but if other subsystems
present the same ordering the system need not immediately crash.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: James Bottomley <JBottomley@parallels.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/sysfs/dir.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8ddc102..35a36d3 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -729,6 +729,9 @@ int sysfs_create_dir(struct kobject * kobj)
 	else
 		parent_sd = &sysfs_root;
 
+	if (!parent_sd)
+		return -ENOENT;
+
 	if (sysfs_ns_type(parent_sd))
 		ns = kobj->ktype->namespace(kobj);
 	type = sysfs_read_ns_type(kobj);
-- 
cgit v1.1


From 0a2da9b2ef2ef76c09397597f260245b020e6522 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 11 Apr 2012 11:45:06 +0200
Subject: fuse: allow nanosecond granularity

Derrik Pates reports that an utimensat with a NULL argument results in the
current time being sent from the kernel with 1 second granularity.

Reported-by: Derrik Pates <demon@now.ai>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 64cf8d0..c9a0c97 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -947,6 +947,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_magic = FUSE_SUPER_MAGIC;
 	sb->s_op = &fuse_super_operations;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
 	sb->s_export_op = &fuse_export_operations;
 
 	file = fget(d.fd);
-- 
cgit v1.1


From 9dc4e6c4d1182d34604ea40fef641775f5b15456 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 9 Apr 2012 18:06:49 -0400
Subject: nfsd: don't fail unchecked creates of non-special files

Allow a v3 unchecked open of a non-regular file succeed as if it were a
lookup; typically a client in such a case will want to fall back on a
local open, so succeeding and giving it the filehandle is more useful
than failing with nfserr_exist, which makes it appear that nothing at
all exists by that name.

Similarly for v4, on an open-create, return the same errors we would on
an attempt to open a non-regular file, instead of returning
nfserr_exist.

This fixes a problem found doing a v4 open of a symlink with
O_RDONLY|O_CREAT, which resulted in the current client returning EEXIST.

Thanks also to Trond for analysis.

Cc: stable@kernel.org
Reported-by: Orion Poplawski <orion@cora.nwra.com>
Tested-by: Orion Poplawski <orion@cora.nwra.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 8 ++++----
 fs/nfsd/vfs.c      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2ed14df..8256efd 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -235,17 +235,17 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 		 */
 		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
 			open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
-						FATTR4_WORD1_TIME_MODIFY);
+							FATTR4_WORD1_TIME_MODIFY);
 	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname.data, open->op_fname.len, resfh);
 		fh_unlock(current_fh);
-		if (status)
-			goto out;
-		status = nfsd_check_obj_isreg(resfh);
 	}
 	if (status)
 		goto out;
+	status = nfsd_check_obj_isreg(resfh);
+	if (status)
+		goto out;
 
 	if (is_create_with_attrs(open) && open->op_acl != NULL)
 		do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 296d671..5686661 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1458,7 +1458,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		switch (createmode) {
 		case NFS3_CREATE_UNCHECKED:
 			if (! S_ISREG(dchild->d_inode->i_mode))
-				err = nfserr_exist;
+				goto out;
 			else if (truncp) {
 				/* in nfsv4, we need to treat this case a little
 				 * differently.  we don't want to truncate the
-- 
cgit v1.1


From 4fe9e9639d95cd11de63afa353f2de320f26033a Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Tue, 10 Apr 2012 18:12:27 +0100
Subject: Cleanup handling of NULL value passed for a mount option

Allow blank user= and ip= mount option. Also clean up redundant
checks for NULL values since the token parser will not actually
match mount options with NULL values unless explicitly specified.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Reported-by: Chris Clayton <chris2553@googlemail.com>
Acked-by: Jeff Layton <jlayton@samba.org>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 80 +++++++++++++------------------------------------------
 1 file changed, 19 insertions(+), 61 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index d81e933..6a86f3d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -109,6 +109,8 @@ enum {
 
 	/* Options which could be blank */
 	Opt_blank_pass,
+	Opt_blank_user,
+	Opt_blank_ip,
 
 	Opt_err
 };
@@ -183,11 +185,15 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_wsize, "wsize=%s" },
 	{ Opt_actimeo, "actimeo=%s" },
 
+	{ Opt_blank_user, "user=" },
+	{ Opt_blank_user, "username=" },
 	{ Opt_user, "user=%s" },
 	{ Opt_user, "username=%s" },
 	{ Opt_blank_pass, "pass=" },
 	{ Opt_pass, "pass=%s" },
 	{ Opt_pass, "password=%s" },
+	{ Opt_blank_ip, "ip=" },
+	{ Opt_blank_ip, "addr=" },
 	{ Opt_ip, "ip=%s" },
 	{ Opt_ip, "addr=%s" },
 	{ Opt_unc, "unc=%s" },
@@ -1534,15 +1540,17 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 		/* String Arguments */
 
+		case Opt_blank_user:
+			/* null user, ie. anonymous authentication */
+			vol->nullauth = 1;
+			vol->username = NULL;
+			break;
 		case Opt_user:
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				/* null user, ie. anonymous authentication */
-				vol->nullauth = 1;
-			} else if (strnlen(string, MAX_USERNAME_SIZE) >
+			if (strnlen(string, MAX_USERNAME_SIZE) >
 							MAX_USERNAME_SIZE) {
 				printk(KERN_WARNING "CIFS: username too long\n");
 				goto cifs_parse_mount_err;
@@ -1611,14 +1619,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			}
 			vol->password[j] = '\0';
 			break;
+		case Opt_blank_ip:
+			vol->UNCip = NULL;
+			break;
 		case Opt_ip:
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				vol->UNCip = NULL;
-			} else if (strnlen(string, INET6_ADDRSTRLEN) >
+			if (strnlen(string, INET6_ADDRSTRLEN) >
 						INET6_ADDRSTRLEN) {
 				printk(KERN_WARNING "CIFS: ip address "
 						    "too long\n");
@@ -1636,12 +1645,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: invalid path to "
-						    "network resource\n");
-				goto cifs_parse_mount_err;
-			}
-
 			temp_len = strnlen(string, 300);
 			if (temp_len  == 300) {
 				printk(KERN_WARNING "CIFS: UNC name too long\n");
@@ -1670,11 +1673,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: invalid domain"
-						    " name\n");
-				goto cifs_parse_mount_err;
-			} else if (strnlen(string, 256) == 256) {
+			if (strnlen(string, 256) == 256) {
 				printk(KERN_WARNING "CIFS: domain name too"
 						    " long\n");
 				goto cifs_parse_mount_err;
@@ -1693,11 +1692,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: srcaddr value not"
-						    " specified\n");
-				goto cifs_parse_mount_err;
-			} else if (!cifs_convert_address(
+			if (!cifs_convert_address(
 					(struct sockaddr *)&vol->srcaddr,
 					string, strlen(string))) {
 				printk(KERN_WARNING "CIFS:  Could not parse"
@@ -1710,11 +1705,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: Invalid path"
-						    " prefix\n");
-				goto cifs_parse_mount_err;
-			}
 			temp_len = strnlen(string, 1024);
 			if (string[0] != '/')
 				temp_len++; /* missing leading slash */
@@ -1742,11 +1732,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: Invalid iocharset"
-						    " specified\n");
-				goto cifs_parse_mount_err;
-			} else if (strnlen(string, 1024) >= 65) {
+			if (strnlen(string, 1024) >= 65) {
 				printk(KERN_WARNING "CIFS: iocharset name "
 						    "too long.\n");
 				goto cifs_parse_mount_err;
@@ -1771,11 +1757,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: No socket option"
-						    " specified\n");
-				goto cifs_parse_mount_err;
-			}
 			if (strnicmp(string, "TCP_NODELAY", 11) == 0)
 				vol->sockopt_tcp_nodelay = 1;
 			break;
@@ -1784,12 +1765,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: Invalid (empty)"
-						    " netbiosname\n");
-				break;
-			}
-
 			memset(vol->source_rfc1001_name, 0x20,
 				RFC1001_NAME_LEN);
 			/*
@@ -1817,11 +1792,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: Empty server"
-					" netbiosname specified\n");
-				break;
-			}
 			/* last byte, type, is 0x20 for servr type */
 			memset(vol->target_rfc1001_name, 0x20,
 				RFC1001_NAME_LEN_WITH_NULL);
@@ -1848,12 +1818,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				cERROR(1, "no protocol version specified"
-					  " after vers= mount option");
-				goto cifs_parse_mount_err;
-			}
-
 			if (strnicmp(string, "cifs", 4) == 0 ||
 			    strnicmp(string, "1", 1) == 0) {
 				/* This is the default */
@@ -1868,12 +1832,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (string == NULL)
 				goto out_nomem;
 
-			if (!*string) {
-				printk(KERN_WARNING "CIFS: no security flavor"
-						    " specified\n");
-				break;
-			}
-
 			if (cifs_parse_security_flavors(string, vol) != 0)
 				goto cifs_parse_mount_err;
 			break;
-- 
cgit v1.1


From 8e62c2de6e23e5c1fee04f59de51b54cc2868ca5 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 12 Apr 2012 13:46:48 -0400
Subject: Revert "Btrfs: increase the global block reserve estimates"

This reverts commit 5500cdbe14d7435e04f66ff3cfb8ecd8b8e44ebf.

We've had a number of complaints of early enospc that bisect down
to this patch.  We'll hae to fix the reservations differently.

CC: stable@kernel.org
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a844204..ace5e8c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4205,7 +4205,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 	num_bytes += div64_u64(data_used + meta_used, 50);
 
 	if (num_bytes * 3 > meta_used)
-		num_bytes = div64_u64(meta_used, 3) * 2;
+		num_bytes = div64_u64(meta_used, 3);
 
 	return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
 }
-- 
cgit v1.1


From d95603b262edb53d6016a8df0c150371d4d61e67 Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@oracle.com>
Date: Thu, 12 Apr 2012 15:55:15 -0400
Subject: Btrfs: fix uninit variable in repair_eb_io_failure

We'd have to be passing bogus extent buffers for this uninit variable to
actually be used, but set it to zero just in case.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 0c3ec00..59ec105 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1937,7 +1937,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
 	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
 	u64 start = eb->start;
 	unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
-	int ret;
+	int ret = 0;
 
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = extent_buffer_page(eb, i);
-- 
cgit v1.1


From b89203f74bdfcb15407d54d3f257b16a2ea19e62 Mon Sep 17 00:00:00 2001
From: Liu Bo <liubo2009@cn.fujitsu.com>
Date: Thu, 12 Apr 2012 16:03:56 -0400
Subject: Btrfs: fix eof while discarding extents

We miscalculate the length of extents we're discarding, and it leads to
an eof of device.

Reported-by: Daniel Blueman <daniel@quora.org>
Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/volumes.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a872b48..759d024 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3833,6 +3833,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 		int sub_stripes = 0;
 		u64 stripes_per_dev = 0;
 		u32 remaining_stripes = 0;
+		u32 last_stripe = 0;
 
 		if (map->type &
 		    (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
@@ -3846,6 +3847,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 						      stripe_nr_orig,
 						      factor,
 						      &remaining_stripes);
+			div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
+			last_stripe *= sub_stripes;
 		}
 
 		for (i = 0; i < num_stripes; i++) {
@@ -3858,16 +3861,29 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
 					 BTRFS_BLOCK_GROUP_RAID10)) {
 				bbio->stripes[i].length = stripes_per_dev *
 							  map->stripe_len;
+
 				if (i / sub_stripes < remaining_stripes)
 					bbio->stripes[i].length +=
 						map->stripe_len;
+
+				/*
+				 * Special for the first stripe and
+				 * the last stripe:
+				 *
+				 * |-------|...|-------|
+				 *     |----------|
+				 *    off     end_off
+				 */
 				if (i < sub_stripes)
 					bbio->stripes[i].length -=
 						stripe_offset;
-				if ((i / sub_stripes + 1) %
-				    sub_stripes == remaining_stripes)
+
+				if (stripe_index >= last_stripe &&
+				    stripe_index <= (last_stripe +
+						     sub_stripes - 1))
 					bbio->stripes[i].length -=
 						stripe_end_offset;
+
 				if (i == sub_stripes - 1)
 					stripe_offset = 0;
 			} else
-- 
cgit v1.1


From c6664b42c4e567792abdb17c958fb01c5bcfcb3a Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 12 Apr 2012 16:03:56 -0400
Subject: Btrfs: remove lock assert from get_restripe_target()

This fixes a regression introduced by fc67c450.  spin_is_locked() always
returns 0 on UP kernels, which caused assert in get_restripe_target() to
be fired on every call from btrfs_reduce_alloc_profile() on UP systems.
Remove it completely for now, it's not clear if it's going to be needed
in future.

Reported-by: Bobby Powers <bobbypowers@gmail.com>
Reported-by: Mitch Harder <mitch.harder@sabayonlinux.org>
Tested-by: Mitch Harder <mitch.harder@sabayonlinux.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index ace5e8c..a2134d8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3152,15 +3152,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 /*
  * returns target flags in extended format or 0 if restripe for this
  * chunk_type is not in progress
+ *
+ * should be called with either volume_mutex or balance_lock held
  */
 static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
 {
 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
 	u64 target = 0;
 
-	BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) &&
-	       !spin_is_locked(&fs_info->balance_lock));
-
 	if (!bctl)
 		return 0;
 
-- 
cgit v1.1


From e627ee7bcd42b4e3a03ca01a8e46dcb4033c5ae0 Mon Sep 17 00:00:00 2001
From: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Date: Thu, 12 Apr 2012 16:03:56 -0400
Subject: Btrfs: check return value of bio_alloc() properly

bio_alloc() has the possibility of returning NULL.
So, it is necessary to check the return value.

Signed-off-by: Tsutomu Itoh <t-itoh@jp.fujitsu.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/compression.c | 2 ++
 fs/btrfs/extent_io.c   | 4 ++++
 fs/btrfs/scrub.c       | 4 ++++
 3 files changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d11afa67..646f5e6 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -405,6 +405,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 			bio_put(bio);
 
 			bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
+			BUG_ON(!bio);
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
 			bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -687,6 +688,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
 			comp_bio = compressed_bio_alloc(bdev, cur_disk_byte,
 							GFP_NOFS);
+			BUG_ON(!comp_bio);
 			comp_bio->bi_private = cb;
 			comp_bio->bi_end_io = end_compressed_bio_read;
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 59ec105..4789770 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2180,6 +2180,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
 	}
 
 	bio = bio_alloc(GFP_NOFS, 1);
+	if (!bio) {
+		free_io_failure(inode, failrec, 0);
+		return -EIO;
+	}
 	bio->bi_private = state;
 	bio->bi_end_io = failed_bio->bi_end_io;
 	bio->bi_sector = failrec->logical >> 9;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c9a2c1a..60f0e28 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1044,6 +1044,8 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info,
 
 		BUG_ON(!page->page);
 		bio = bio_alloc(GFP_NOFS, 1);
+		if (!bio)
+			return -EIO;
 		bio->bi_bdev = page->bdev;
 		bio->bi_sector = page->physical >> 9;
 		bio->bi_end_io = scrub_complete_bio_end_io;
@@ -1172,6 +1174,8 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 		DECLARE_COMPLETION_ONSTACK(complete);
 
 		bio = bio_alloc(GFP_NOFS, 1);
+		if (!bio)
+			return -EIO;
 		bio->bi_bdev = page_bad->bdev;
 		bio->bi_sector = page_bad->physical >> 9;
 		bio->bi_end_io = scrub_complete_bio_end_io;
-- 
cgit v1.1


From 4edc2ca388d62abffe38149f6ac00e749ea721c5 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 12 Apr 2012 16:03:56 -0400
Subject: Btrfs: fix use-after-free in __btrfs_end_transaction

49b25e0540904be0bf558b84475c69d72e4de66e introduced a use-after-free bug
that caused spurious -EIO's to be returned.

Do the check before we free the transaction.

Cc: David Sterba <dsterba@suse.cz>
Cc: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/transaction.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8da29e8..11b77a5 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -480,6 +480,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_fs_info *info = root->fs_info;
 	int count = 0;
+	int err = 0;
 
 	if (--trans->use_count) {
 		trans->block_rsv = trans->orig_rsv;
@@ -532,18 +533,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
-	memset(trans, 0, sizeof(*trans));
-	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
 	if (throttle)
 		btrfs_run_delayed_iputs(root);
 
 	if (trans->aborted ||
 	    root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-		return -EIO;
+		err = -EIO;
 	}
 
-	return 0;
+	memset(trans, 0, sizeof(*trans));
+	kmem_cache_free(btrfs_trans_handle_cachep, trans);
+	return err;
 }
 
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
-- 
cgit v1.1


From d53ba47484ed6245e640ee4bfe9d21e9bfc15765 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Thu, 12 Apr 2012 16:03:57 -0400
Subject: Btrfs: use commit root when loading free space cache

A user reported that booting his box up with btrfs root on 3.4 was way
slower than on 3.3 because I removed the ideal caching code.  It turns out
that we don't load the free space cache if we're in a commit for deadlock
reasons, but since we're reading the cache and it hasn't changed yet we are
safe reading the inode and free space item from the commit root, so do that
and remove all of the deadlock checks so we don't unnecessarily skip loading
the free space cache.  The user reported this fixed the slowness.  Thanks,

Tested-by: Calvin Walton <calvin.walton@kepstin.ca>
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
---
 fs/btrfs/extent-tree.c      | 4 +---
 fs/btrfs/free-space-cache.c | 9 ++-------
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a2134d8..2b35f8d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -529,9 +529,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 	 * allocate blocks for the tree root we can't do the fast caching since
 	 * we likely hold important locks.
 	 */
-	if (trans && (!trans->transaction->in_commit) &&
-	    (root && root != root->fs_info->tree_root) &&
-	    btrfs_test_opt(root, SPACE_CACHE)) {
+	if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
 		ret = load_free_space_cache(fs_info, cache);
 
 		spin_lock(&cache->lock);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 054707e..baaa518 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -748,13 +748,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	u64 used = btrfs_block_group_used(&block_group->item);
 
 	/*
-	 * If we're unmounting then just return, since this does a search on the
-	 * normal root and not the commit root and we could deadlock.
-	 */
-	if (btrfs_fs_closing(fs_info))
-		return 0;
-
-	/*
 	 * If this block group has been marked to be cleared for one reason or
 	 * another then we can't trust the on disk cache, so just return.
 	 */
@@ -768,6 +761,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	path = btrfs_alloc_path();
 	if (!path)
 		return 0;
+	path->search_commit_root = 1;
+	path->skip_locking = 1;
 
 	inode = lookup_free_space_inode(root, block_group, path);
 	if (IS_ERR(inode)) {
-- 
cgit v1.1


From 96f6f98501196d46ce52c2697dd758d9300c63f5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Apr 2012 23:47:00 -0400
Subject: nfsd: fix b0rken error value for setattr on read-only mount

..._want_write() returns -EROFS on failure, _not_ an NFS error value.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4proc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2ed14df..694d526 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -841,6 +841,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	      struct nfsd4_setattr *setattr)
 {
 	__be32 status = nfs_ok;
+	int err;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		nfs4_lock_state();
@@ -852,9 +853,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			return status;
 		}
 	}
-	status = fh_want_write(&cstate->current_fh);
-	if (status)
-		return status;
+	err = fh_want_write(&cstate->current_fh);
+	if (err)
+		return nfserrno(err);
 	status = nfs_ok;
 
 	status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
-- 
cgit v1.1


From 04da6e9d63427b2d0fd04766712200c250b3278f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 00:00:04 -0400
Subject: nfsd: fix error values returned by nfsd4_lockt() when nfsd_open()
 fails

nfsd_open() already returns an NFS error value; only vfs_test_lock()
result needs to be fed through nfserrno().  Broken by commit 55ef12
(nfsd: Ensure nfsv4 calls the underlying filesystem on LOCKT)
three years ago...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4state.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1841f8b..7f71c69 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4211,16 +4211,14 @@ out:
  * vfs_test_lock.  (Arguably perhaps test_lock should be done with an
  * inode operation.)
  */
-static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
+static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
 {
 	struct file *file;
-	int err;
-
-	err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
-	if (err)
-		return err;
-	err = vfs_test_lock(file, lock);
-	nfsd_close(file);
+	__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+	if (!err) {
+		err = nfserrno(vfs_test_lock(file, lock));
+		nfsd_close(file);
+	}
 	return err;
 }
 
@@ -4234,7 +4232,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	struct inode *inode;
 	struct file_lock file_lock;
 	struct nfs4_lockowner *lo;
-	int error;
 	__be32 status;
 
 	if (locks_in_grace())
@@ -4280,12 +4277,10 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	nfs4_transform_lock_offset(&file_lock);
 
-	status = nfs_ok;
-	error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
-	if (error) {
-		status = nfserrno(error);
+	status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
+	if (status)
 		goto out;
-	}
+
 	if (file_lock.fl_type != F_UNLCK) {
 		status = nfserr_denied;
 		nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
-- 
cgit v1.1


From 02f5fde5df0ea930e70f93763dd48beff182b208 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 00:10:34 -0400
Subject: nfsd: fix endianness breakage in TEST_STATEID handling

->ts_id_status gets nfs errno, i.e. it's already big-endian; no need
to apply htonl() to it.  Broken by commit 174568 (NFSD: Added TEST_STATEID
operation) last year...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bcd8904..07a99b6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3410,7 +3410,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr,
 	*p++ = htonl(test_stateid->ts_num_ids);
 
 	list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
-		*p++ = htonl(stateid->ts_id_status);
+		*p++ = stateid->ts_id_status;
 	}
 
 	ADJUST_ARGS();
-- 
cgit v1.1


From afcf6792afd66209161495f691e19d4fc5460a93 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 00:15:37 -0400
Subject: nfsd: fix error value on allocation failure in
 nfsd4_decode_test_stateid()

PTR_ERR(NULL) is going to be 0...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 07a99b6..74c00bc 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1392,7 +1392,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta
 	for (i = 0; i < test_stateid->ts_num_ids; i++) {
 		stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL);
 		if (!stateid) {
-			status = PTR_ERR(stateid);
+			status = nfserrno(-ENOMEM);
 			goto out;
 		}
 
-- 
cgit v1.1


From efe39651f08813180f37dc508d950fc7d92b29a8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 00:32:14 -0400
Subject: nfsd: fix compose_entry_fh() failure exits

Restore the original logics ("fail on mountpoints, negatives and in
case of fh_compose() failures").  Since commit 8177e (nfsd: clean up
readdirplus encoding) that got broken -
	rv = fh_compose(fhp, exp, dchild, &cd->fh);
	if (rv)
	       goto out;
	if (!dchild->d_inode)
		goto out;
	rv = 0;
out:
is equivalent to
	rv = fh_compose(fhp, exp, dchild, &cd->fh);
out:
and the second check has no effect whatsoever...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfs3xdr.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 08c6e36..43f46cd 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -803,13 +803,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	return p;
 }
 
-static int
+static __be32
 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 		const char *name, int namlen)
 {
 	struct svc_export	*exp;
 	struct dentry		*dparent, *dchild;
-	int rv = 0;
+	__be32 rv = nfserr_noent;
 
 	dparent = cd->fh.fh_dentry;
 	exp  = cd->fh.fh_export;
@@ -817,26 +817,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
 	if (isdotent(name, namlen)) {
 		if (namlen == 2) {
 			dchild = dget_parent(dparent);
-			if (dchild == dparent) {
-				/* filesystem root - cannot return filehandle for ".." */
-				dput(dchild);
-				return -ENOENT;
-			}
+			/* filesystem root - cannot return filehandle for ".." */
+			if (dchild == dparent)
+				goto out;
 		} else
 			dchild = dget(dparent);
 	} else
 		dchild = lookup_one_len(name, dparent, namlen);
 	if (IS_ERR(dchild))
-		return -ENOENT;
-	rv = -ENOENT;
+		return rv;
 	if (d_mountpoint(dchild))
 		goto out;
-	rv = fh_compose(fhp, exp, dchild, &cd->fh);
-	if (rv)
-		goto out;
 	if (!dchild->d_inode)
 		goto out;
-	rv = 0;
+	rv = fh_compose(fhp, exp, dchild, &cd->fh);
 out:
 	dput(dchild);
 	return rv;
@@ -845,7 +839,7 @@ out:
 static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
 {
 	struct svc_fh	fh;
-	int err;
+	__be32 err;
 
 	fh_init(&fh, NFS3_FHSIZE);
 	err = compose_entry_fh(cd, &fh, name, namlen);
-- 
cgit v1.1


From af1584f570b19b0285e4402a0b54731495d31784 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 12 Apr 2012 20:32:25 -0400
Subject: ext4: fix endianness breakage in ext4_split_extent_at()

->ee_len is __le16, so assigning cpu_to_le32() to it is going to do
Bad Things(tm) on big-endian hosts...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/extents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1421938..1d41838 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2882,7 +2882,7 @@ static int ext4_split_extent_at(handle_t *handle,
 		if (err)
 			goto fix_extent_len;
 		/* update the extent length and mark as initialized */
-		ex->ee_len = cpu_to_le32(ee_len);
+		ex->ee_len = cpu_to_le16(ee_len);
 		ext4_ext_try_to_merge(inode, path, ex);
 		err = ext4_ext_dirty(handle, inode, path + depth);
 		goto out;
-- 
cgit v1.1


From bfa890a3cdeed29eef53d54cd7f80cec0fd46b11 Mon Sep 17 00:00:00 2001
From: Sachin Prabhu <sprabhu@redhat.com>
Date: Fri, 13 Apr 2012 14:04:32 +0100
Subject: Fix number parsing in cifs_parse_mount_options

The function kstrtoul() used to parse number strings in the mount
option parser is set to expect a base 10 number . This treats the octal
numbers passed for mount options such as file_mode as base10 numbers
leading to incorrect behavior.

Change the 'base' argument passed to kstrtoul from 10 to 0 to
allow it to auto-detect the base of the number passed.

Signed-off-by: Sachin Prabhu <sprabhu@redhat.com>
Acked-by: Jeff Layton <jlayton@samba.org>
Reported-by: Chris Clayton <chris2553@googlemail.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 6a86f3d..f31dc9a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1123,7 +1123,7 @@ static int get_option_ul(substring_t args[], unsigned long *option)
 	string = match_strdup(args);
 	if (string == NULL)
 		return -ENOMEM;
-	rc = kstrtoul(string, 10, option);
+	rc = kstrtoul(string, 0, option);
 	kfree(string);
 
 	return rc;
-- 
cgit v1.1


From 6ed3cf2cdfce4c9f1d73171bd3f27d9cb77b734e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 11:49:04 -0400
Subject: btrfs: btrfs_root_readonly() broken on big-endian

->root_flags is __le64 and all accesses to it go through the helpers
that do proper conversions.  Except for btrfs_root_readonly(), which
checks bit 0 as in host-endian...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ctree.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5b8ef8e..3f65a81 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2166,7 +2166,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
 
 static inline bool btrfs_root_readonly(struct btrfs_root *root)
 {
-	return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
 }
 
 /* struct btrfs_root_backup */
-- 
cgit v1.1


From 3a251f04fe97c3d335b745c98e4b377e3c3899f2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 12:22:00 -0400
Subject: ocfs2: ->l_next_free_req breakage on big-endian

It's le16, not le32...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/alloc.c        | 2 +-
 fs/ocfs2/refcounttree.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 3165aeb..31b9463 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1134,7 +1134,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
 	}
 
 	el = path_leaf_el(path);
-	rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
+	rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
 
 	ocfs2_adjust_rightmost_records(handle, et, path, rec);
 
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index cf78233..a7b7217 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1036,14 +1036,14 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
 
 	tmp_el = left_path->p_node[subtree_root].el;
 	blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
-	for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
+	for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) {
 		if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
 			*cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
 			break;
 		}
 	}
 
-	BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
+	BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec));
 
 out:
 	ocfs2_free_path(left_path);
-- 
cgit v1.1


From e1bf4cc620fd143766ddfcee3b004a1d1bb34fd0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 12:27:11 -0400
Subject: ocfs: ->rl_used breakage on big-endian

it's le16, not le32 or le64...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/refcounttree.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index a7b7217..bc90ebc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
 
 	trace_ocfs2_divide_leaf_refcount_block(
 		(unsigned long long)ref_leaf_bh->b_blocknr,
-		le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
+		le32_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
 
 	/*
 	 * XXX: Improvement later.
@@ -2411,7 +2411,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
 				rb = (struct ocfs2_refcount_block *)
 							prev_bh->b_data;
 
-				if (le64_to_cpu(rb->rf_records.rl_used) +
+				if (le16_to_cpu(rb->rf_records.rl_used) +
 				    recs_add >
 				    le16_to_cpu(rb->rf_records.rl_count))
 					ref_blocks++;
@@ -2476,7 +2476,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
 	if (prev_bh) {
 		rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
 
-		if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
+		if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
 		    le16_to_cpu(rb->rf_records.rl_count))
 			ref_blocks++;
 
@@ -3629,7 +3629,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
 			 * one will split a refcount rec, so totally we need
 			 * clusters * 2 new refcount rec.
 			 */
-			if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
+			if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
 			    le16_to_cpu(rb->rf_records.rl_count))
 				ref_blocks++;
 
-- 
cgit v1.1


From 28748b325dc2d730ccc312830a91c4ae0c0d9379 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 12:28:21 -0400
Subject: ocfs2: ->rl_count endianness breakage

le16, not le32...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/refcounttree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bc90ebc..9f32d7c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
 
 	trace_ocfs2_divide_leaf_refcount_block(
 		(unsigned long long)ref_leaf_bh->b_blocknr,
-		le32_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
+		le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
 
 	/*
 	 * XXX: Improvement later.
-- 
cgit v1.1


From 72094e43e3af5020510f920321d71f1798fa896d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 12:30:02 -0400
Subject: ocfs2: ->e_leaf_clusters endianness breakage

le16, not le32...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ocfs2/suballoc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ba5d97e..f169da4 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -600,7 +600,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle,
 		ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
 					  cluster_ac->ac_bh,
 					  le64_to_cpu(rec->e_blkno),
-					  le32_to_cpu(rec->e_leaf_clusters));
+					  le16_to_cpu(rec->e_leaf_clusters));
 		if (ret)
 			mlog_errno(ret);
 		/* Try all the clusters to free */
@@ -1628,7 +1628,7 @@ static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
 {
 	unsigned int bpc = le16_to_cpu(cl->cl_bpc);
 	unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
-	unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
+	unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
 
 	if (res->sr_bit_offset < bitoff)
 		return 0;
-- 
cgit v1.1


From e847469bf77a1d339274074ed068d461f0c872bc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 13 Apr 2012 13:49:47 -0400
Subject: lockd: fix the endianness bug

comparing be32 values for < is not doing the right thing...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/lockd/clnt4xdr.c | 2 +-
 fs/lockd/clntxdr.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 3ddcbb1..13ad153 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -241,7 +241,7 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		goto out_overflow;
-	if (unlikely(*p > nlm4_failed))
+	if (unlikely(ntohl(*p) > ntohl(nlm4_failed)))
 		goto out_bad_xdr;
 	*stat = *p;
 	return 0;
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 3d35e3e..d269ada 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -236,7 +236,7 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
 	p = xdr_inline_decode(xdr, 4);
 	if (unlikely(p == NULL))
 		goto out_overflow;
-	if (unlikely(*p > nlm_lck_denied_grace_period))
+	if (unlikely(ntohl(*p) > ntohl(nlm_lck_denied_grace_period)))
 		goto out_enum;
 	*stat = *p;
 	return 0;
-- 
cgit v1.1


From 9cd70b347e9761ea2d2ac3d758c529a48a8193e6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 16 Apr 2012 12:16:20 -0400
Subject: ext4: address scalability issue by removing extent cache statistics

Andi Kleen and Tim Chen have reported that under certain circumstances
the extent cache statistics are causing scalability problems due to
cache line bounces.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/ext4.h    |  3 ---
 fs/ext4/extents.c |  4 ----
 fs/ext4/super.c   | 16 ----------------
 3 files changed, 23 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab2594a..0e01e90 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1203,9 +1203,6 @@ struct ext4_sb_info {
 	unsigned long s_ext_blocks;
 	unsigned long s_ext_extents;
 #endif
-	/* ext4 extent cache stats */
-	unsigned long extent_cache_hits;
-	unsigned long extent_cache_misses;
 
 	/* for buddy allocator */
 	struct ext4_group_info ***s_group_info;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 1421938..8a12cd2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2066,10 +2066,6 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
 		ret = 1;
 	}
 errout:
-	if (!ret)
-		sbi->extent_cache_misses++;
-	else
-		sbi->extent_cache_hits++;
 	trace_ext4_ext_in_cache(inode, block, ret);
 	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 	return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ceebaf8..ef7db50 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2366,18 +2366,6 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
-static ssize_t extent_cache_hits_show(struct ext4_attr *a,
-				      struct ext4_sb_info *sbi, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits);
-}
-
-static ssize_t extent_cache_misses_show(struct ext4_attr *a,
-					struct ext4_sb_info *sbi, char *buf)
-{
-	return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses);
-}
-
 static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
 					  struct ext4_sb_info *sbi,
 					  const char *buf, size_t count)
@@ -2435,8 +2423,6 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 EXT4_RO_ATTR(delayed_allocation_blocks);
 EXT4_RO_ATTR(session_write_kbytes);
 EXT4_RO_ATTR(lifetime_write_kbytes);
-EXT4_RO_ATTR(extent_cache_hits);
-EXT4_RO_ATTR(extent_cache_misses);
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
 		 inode_readahead_blks_store, s_inode_readahead_blks);
 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
@@ -2452,8 +2438,6 @@ static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(delayed_allocation_blocks),
 	ATTR_LIST(session_write_kbytes),
 	ATTR_LIST(lifetime_write_kbytes),
-	ATTR_LIST(extent_cache_hits),
-	ATTR_LIST(extent_cache_misses),
 	ATTR_LIST(inode_readahead_blks),
 	ATTR_LIST(inode_goal),
 	ATTR_LIST(mb_stats),
-- 
cgit v1.1


From 57f73c2c89a5d3b2ed87201c8100d1fa989a1a65 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 16 Apr 2012 18:55:26 -0400
Subject: ext4: fix handling of journalled quota options

Commit 26092bf5 broke handling of journalled quota mount options by
trying to parse argument of every mount option as a number.  Fix this
by dealing with the quota options before we call match_int().

Thanks to Jan Kara for discovering this regression.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/super.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ef7db50..6da1935 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1305,20 +1305,20 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 		ext4_msg(sb, KERN_ERR,
 			"Cannot change journaled "
 			"quota options when quota turned on");
-		return 0;
+		return -1;
 	}
 	qname = match_strdup(args);
 	if (!qname) {
 		ext4_msg(sb, KERN_ERR,
 			"Not enough memory for storing quotafile name");
-		return 0;
+		return -1;
 	}
 	if (sbi->s_qf_names[qtype] &&
 		strcmp(sbi->s_qf_names[qtype], qname)) {
 		ext4_msg(sb, KERN_ERR,
 			"%s quota file already specified", QTYPE2NAME(qtype));
 		kfree(qname);
-		return 0;
+		return -1;
 	}
 	sbi->s_qf_names[qtype] = qname;
 	if (strchr(sbi->s_qf_names[qtype], '/')) {
@@ -1326,7 +1326,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
 			"quotafile must be on filesystem root");
 		kfree(sbi->s_qf_names[qtype]);
 		sbi->s_qf_names[qtype] = NULL;
-		return 0;
+		return -1;
 	}
 	set_opt(sb, QUOTA);
 	return 1;
@@ -1341,7 +1341,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
 		sbi->s_qf_names[qtype]) {
 		ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
 			" when quota turned on");
-		return 0;
+		return -1;
 	}
 	/*
 	 * The space will be released later when all options are confirmed
@@ -1450,6 +1450,16 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 	const struct mount_opts *m;
 	int arg = 0;
 
+#ifdef CONFIG_QUOTA
+	if (token == Opt_usrjquota)
+		return set_qf_name(sb, USRQUOTA, &args[0]);
+	else if (token == Opt_grpjquota)
+		return set_qf_name(sb, GRPQUOTA, &args[0]);
+	else if (token == Opt_offusrjquota)
+		return clear_qf_name(sb, USRQUOTA);
+	else if (token == Opt_offgrpjquota)
+		return clear_qf_name(sb, GRPQUOTA);
+#endif
 	if (args->from && match_int(args, &arg))
 		return -1;
 	switch (token) {
@@ -1549,18 +1559,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 				sbi->s_mount_opt |= m->mount_opt;
 			}
 #ifdef CONFIG_QUOTA
-		} else if (token == Opt_usrjquota) {
-			if (!set_qf_name(sb, USRQUOTA, &args[0]))
-				return -1;
-		} else if (token == Opt_grpjquota) {
-			if (!set_qf_name(sb, GRPQUOTA, &args[0]))
-				return -1;
-		} else if (token == Opt_offusrjquota) {
-			if (!clear_qf_name(sb, USRQUOTA))
-				return -1;
-		} else if (token == Opt_offgrpjquota) {
-			if (!clear_qf_name(sb, GRPQUOTA))
-				return -1;
 		} else if (m->flags & MOPT_QFMT) {
 			if (sb_any_quota_loaded(sb) &&
 			    sbi->s_jquota_fmt != m->mount_opt) {
-- 
cgit v1.1


From e4eb1ff61b323d6141614e5458a1f53c7046ff8e Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 Apr 2012 15:35:40 -0700
Subject: VM: add "vm_brk()" helper function

It does the same thing as "do_brk()", except it handles the VM locking
too.

It turns out that all external callers want that anyway, so we can make
do_brk() static to just mm/mmap.c while at it.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c | 20 +++++---------------
 fs/binfmt_elf.c  | 15 ++++-----------
 2 files changed, 9 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 2eb12f1..8852749 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -50,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end)
 	end = PAGE_ALIGN(end);
 	if (end > start) {
 		unsigned long addr;
-		down_write(&current->mm->mmap_sem);
-		addr = do_brk(start, end - start);
-		up_write(&current->mm->mmap_sem);
+		addr = vm_brk(start, end - start);
 		if (BAD_ADDR(addr))
 			return addr;
 	}
@@ -280,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 		pos = 32;
 		map_size = ex.a_text+ex.a_data;
 #endif
-		down_write(&current->mm->mmap_sem);
-		error = do_brk(text_addr & PAGE_MASK, map_size);
-		up_write(&current->mm->mmap_sem);
+		error = vm_brk(text_addr & PAGE_MASK, map_size);
 		if (error != (text_addr & PAGE_MASK)) {
 			send_sig(SIGKILL, current, 0);
 			return error;
@@ -313,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 
 		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
 			loff_t pos = fd_offset;
-			down_write(&current->mm->mmap_sem);
-			do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-			up_write(&current->mm->mmap_sem);
+			vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
 			bprm->file->f_op->read(bprm->file,
 					(char __user *)N_TXTADDR(ex),
 					ex.a_text+ex.a_data, &pos);
@@ -412,9 +406,7 @@ static int load_aout_library(struct file *file)
 			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
 			       file->f_path.dentry->d_name.name);
 		}
-		down_write(&current->mm->mmap_sem);
-		do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
-		up_write(&current->mm->mmap_sem);
+		vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		
 		file->f_op->read(file, (char __user *)start_addr,
 			ex.a_text + ex.a_data, &pos);
@@ -438,9 +430,7 @@ static int load_aout_library(struct file *file)
 	len = PAGE_ALIGN(ex.a_text + ex.a_data);
 	bss = ex.a_text + ex.a_data + ex.a_bss;
 	if (bss > len) {
-		down_write(&current->mm->mmap_sem);
-		error = do_brk(start_addr + len, bss - len);
-		up_write(&current->mm->mmap_sem);
+		error = vm_brk(start_addr + len, bss - len);
 		retval = error;
 		if (error != start_addr + len)
 			goto out;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 48ffb3d..0708a0b 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -82,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end)
 	end = ELF_PAGEALIGN(end);
 	if (end > start) {
 		unsigned long addr;
-		down_write(&current->mm->mmap_sem);
-		addr = do_brk(start, end - start);
-		up_write(&current->mm->mmap_sem);
+		addr = vm_brk(start, end - start);
 		if (BAD_ADDR(addr))
 			return addr;
 	}
@@ -514,9 +512,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
 
 		/* Map the last of the bss segment */
-		down_write(&current->mm->mmap_sem);
-		error = do_brk(elf_bss, last_bss - elf_bss);
-		up_write(&current->mm->mmap_sem);
+		error = vm_brk(elf_bss, last_bss - elf_bss);
 		if (BAD_ADDR(error))
 			goto out_close;
 	}
@@ -1072,11 +1068,8 @@ static int load_elf_library(struct file *file)
 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
 			    ELF_MIN_ALIGN - 1);
 	bss = eppnt->p_memsz + eppnt->p_vaddr;
-	if (bss > len) {
-		down_write(&current->mm->mmap_sem);
-		do_brk(len, bss - len);
-		up_write(&current->mm->mmap_sem);
-	}
+	if (bss > len)
+		vm_brk(len, bss - len);
 	error = 0;
 
 out_free_ph:
-- 
cgit v1.1


From a46ef99d80817a167477ed1c8b4d90ee0c2e726f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 Apr 2012 16:20:01 -0700
Subject: VM: add "vm_munmap()" helper function

Like the vm_brk() function, this is the same as "do_munmap()", except it
does the VM locking for the caller.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index da88760..99bd790 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -92,11 +92,8 @@ static void aio_free_ring(struct kioctx *ctx)
 	for (i=0; i<info->nr_pages; i++)
 		put_page(info->ring_pages[i]);
 
-	if (info->mmap_size) {
-		down_write(&ctx->mm->mmap_sem);
-		do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
-		up_write(&ctx->mm->mmap_sem);
-	}
+	if (info->mmap_size)
+		vm_munmap(ctx->mm, info->mmap_base, info->mmap_size);
 
 	if (info->ring_pages && info->ring_pages != info->internal_pages)
 		kfree(info->ring_pages);
-- 
cgit v1.1


From 6be5ceb02e98eaf6cfc4f8b12a896d04023f340d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 20 Apr 2012 17:13:58 -0700
Subject: VM: add "vm_mmap()" helper function

This continues the theme started with vm_brk() and vm_munmap():
vm_mmap() does the same thing as do_mmap(), but additionally does the
required VM locking.

This uninlines (and rewrites it to be clearer) do_mmap(), which sadly
duplicates it in mm/mmap.c and mm/nommu.c.  But that way we don't have
to export our internal do_mmap_pgoff() function.

Some day we hopefully don't have to export do_mmap() either, if all
modular users can become the simpler vm_mmap() instead.  We're actually
very close to that already, with the notable exception of the (broken)
use in i810, and a couple of stragglers in binfmt_elf.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_aout.c      | 12 +++---------
 fs/binfmt_elf.c       |  8 ++------
 fs/binfmt_elf_fdpic.c | 18 ++++--------------
 fs/binfmt_flat.c      | 12 +++---------
 fs/binfmt_som.c       | 12 +++---------
 5 files changed, 15 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 8852749..d146e18 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -319,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 			goto beyond_if;
 		}
 
-		down_write(&current->mm->mmap_sem);
-		error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
+		error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
 			PROT_READ | PROT_EXEC,
 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 			fd_offset);
-		up_write(&current->mm->mmap_sem);
 
 		if (error != N_TXTADDR(ex)) {
 			send_sig(SIGKILL, current, 0);
 			return error;
 		}
 
-		down_write(&current->mm->mmap_sem);
- 		error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
+		error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
 				PROT_READ | PROT_WRITE | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
 				fd_offset + ex.a_text);
-		up_write(&current->mm->mmap_sem);
 		if (error != N_DATADDR(ex)) {
 			send_sig(SIGKILL, current, 0);
 			return error;
@@ -417,12 +413,10 @@ static int load_aout_library(struct file *file)
 		goto out;
 	}
 	/* Now use mmap to map the library into memory. */
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
+	error = vm_mmap(file, start_addr, ex.a_text + ex.a_data,
 			PROT_READ | PROT_WRITE | PROT_EXEC,
 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
 			N_TXTOFF(ex));
-	up_write(&current->mm->mmap_sem);
 	retval = error;
 	if (error != start_addr)
 		goto out;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0708a0b..16f7354 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -958,10 +958,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 		   and some applications "depend" upon this behavior.
 		   Since we do not have the power to recompile these, we
 		   emulate the SVr4 behavior. Sigh. */
-		down_write(&current->mm->mmap_sem);
-		error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
+		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
 				MAP_FIXED | MAP_PRIVATE, 0);
-		up_write(&current->mm->mmap_sem);
 	}
 
 #ifdef ELF_PLAT_INIT
@@ -1046,8 +1044,7 @@ static int load_elf_library(struct file *file)
 		eppnt++;
 
 	/* Now use mmap to map the library into memory. */
-	down_write(&current->mm->mmap_sem);
-	error = do_mmap(file,
+	error = vm_mmap(file,
 			ELF_PAGESTART(eppnt->p_vaddr),
 			(eppnt->p_filesz +
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
@@ -1055,7 +1052,6 @@ static int load_elf_library(struct file *file)
 			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
 			(eppnt->p_offset -
 			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
-	up_write(&current->mm->mmap_sem);
 	if (error != ELF_PAGESTART(eppnt->p_vaddr))
 		goto out_free_ph;
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9bd5612..d390a0f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
 	    (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
 		stack_prot |= PROT_EXEC;
 
-	down_write(&current->mm->mmap_sem);
-	current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
+	current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot,
 					 MAP_PRIVATE | MAP_ANONYMOUS |
 					 MAP_UNINITIALIZED | MAP_GROWSDOWN,
 					 0);
 
 	if (IS_ERR_VALUE(current->mm->start_brk)) {
-		up_write(&current->mm->mmap_sem);
 		retval = current->mm->start_brk;
 		current->mm->start_brk = 0;
 		goto error_kill;
 	}
 
-	up_write(&current->mm->mmap_sem);
-
 	current->mm->brk = current->mm->start_brk;
 	current->mm->context.end_brk = current->mm->start_brk;
 	current->mm->context.end_brk +=
@@ -955,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
 	if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
 		mflags |= MAP_EXECUTABLE;
 
-	down_write(&mm->mmap_sem);
-	maddr = do_mmap(NULL, load_addr, top - base,
+	maddr = vm_mmap(NULL, load_addr, top - base,
 			PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
-	up_write(&mm->mmap_sem);
 	if (IS_ERR_VALUE(maddr))
 		return (int) maddr;
 
@@ -1096,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 
 		/* create the mapping */
 		disp = phdr->p_vaddr & ~PAGE_MASK;
-		down_write(&mm->mmap_sem);
-		maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
+		maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags,
 				phdr->p_offset - disp);
-		up_write(&mm->mmap_sem);
 
 		kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx",
 		       loop, phdr->p_memsz + disp, prot, flags,
@@ -1143,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
 			unsigned long xmaddr;
 
 			flags |= MAP_FIXED | MAP_ANONYMOUS;
-			down_write(&mm->mmap_sem);
-			xmaddr = do_mmap(NULL, xaddr, excess - excess1,
+			xmaddr = vm_mmap(NULL, xaddr, excess - excess1,
 					 prot, flags, 0);
-			up_write(&mm->mmap_sem);
 
 			kdebug("mmap[%d] <anon>"
 			       " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx",
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 024d20e..6b2daf9 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -542,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 		 */
 		DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n");
 
-		down_write(&current->mm->mmap_sem);
-		textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
+		textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
 				  MAP_PRIVATE|MAP_EXECUTABLE, 0);
-		up_write(&current->mm->mmap_sem);
 		if (!textpos || IS_ERR_VALUE(textpos)) {
 			if (!textpos)
 				textpos = (unsigned long) -ENOMEM;
@@ -556,10 +554,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
 		len = PAGE_ALIGN(len);
-		down_write(&current->mm->mmap_sem);
-		realdatastart = do_mmap(0, 0, len,
+		realdatastart = vm_mmap(0, 0, len,
 			PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
-		up_write(&current->mm->mmap_sem);
 
 		if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) {
 			if (!realdatastart)
@@ -603,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm,
 
 		len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
 		len = PAGE_ALIGN(len);
-		down_write(&current->mm->mmap_sem);
-		textpos = do_mmap(0, 0, len,
+		textpos = vm_mmap(0, 0, len,
 			PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
-		up_write(&current->mm->mmap_sem);
 
 		if (!textpos || IS_ERR_VALUE(textpos)) {
 			if (!textpos)
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index e4fc746..4517aaf 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -147,10 +147,8 @@ static int map_som_binary(struct file *file,
 	code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize);
 	current->mm->start_code = code_start;
 	current->mm->end_code = code_start + code_size;
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap(file, code_start, code_size, prot,
+	retval = vm_mmap(file, code_start, code_size, prot,
 			flags, SOM_PAGESTART(hpuxhdr->exec_tfile));
-	up_write(&current->mm->mmap_sem);
 	if (retval < 0 && retval > -1024)
 		goto out;
 
@@ -158,20 +156,16 @@ static int map_som_binary(struct file *file,
 	data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize);
 	current->mm->start_data = data_start;
 	current->mm->end_data = bss_start = data_start + data_size;
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap(file, data_start, data_size,
+	retval = vm_mmap(file, data_start, data_size,
 			prot | PROT_WRITE, flags,
 			SOM_PAGESTART(hpuxhdr->exec_dfile));
-	up_write(&current->mm->mmap_sem);
 	if (retval < 0 && retval > -1024)
 		goto out;
 
 	som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize);
 	current->mm->start_brk = current->mm->brk = som_brk;
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap(NULL, bss_start, som_brk - bss_start,
+	retval = vm_mmap(NULL, bss_start, som_brk - bss_start,
 			prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0);
-	up_write(&current->mm->mmap_sem);
 	if (retval > 0 || retval < -1024)
 		retval = 0;
 out:
-- 
cgit v1.1


From 936af1576e4c24b466380fc2b8d93352161d13b0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Apr 2012 21:49:41 -0400
Subject: aio: don't bother with unmapping when aio_free_ring() is coming from
 exit_aio()

... since exit_mmap() is coming and it will munmap() everything anyway.
In all other cases aio_free_ring() has ctx->mm == current->mm; moreover,
all other callers of vm_munmap() have mm == current->mm, so this will
allow us to get rid of mm argument of vm_munmap().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 99bd790..976e33d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -92,8 +92,10 @@ static void aio_free_ring(struct kioctx *ctx)
 	for (i=0; i<info->nr_pages; i++)
 		put_page(info->ring_pages[i]);
 
-	if (info->mmap_size)
+	if (info->mmap_size) {
+		BUG_ON(ctx->mm != current->mm);
 		vm_munmap(ctx->mm, info->mmap_base, info->mmap_size);
+	}
 
 	if (info->ring_pages && info->ring_pages != info->internal_pages)
 		kfree(info->ring_pages);
@@ -386,6 +388,17 @@ void exit_aio(struct mm_struct *mm)
 				"exit_aio:ioctx still alive: %d %d %d\n",
 				atomic_read(&ctx->users), ctx->dead,
 				ctx->reqs_active);
+		/*
+		 * We don't need to bother with munmap() here -
+		 * exit_mmap(mm) is coming and it'll unmap everything.
+		 * Since aio_free_ring() uses non-zero ->mmap_size
+		 * as indicator that it needs to unmap the area,
+		 * just set it to 0; aio_free_ring() is the only
+		 * place that uses ->mmap_size, so it's safe.
+		 * That way we get all munmap done to current->mm -
+		 * all other callers have ctx->mm == current->mm.
+		 */
+		ctx->ring_info.mmap_size = 0;
 		put_ioctx(ctx);
 	}
 }
-- 
cgit v1.1


From bfce281c287a427d0841fadf5d59242757b4e620 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Apr 2012 21:57:04 -0400
Subject: kill mm argument of vm_munmap()

it's always current->mm

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 976e33d..67a6db3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -94,7 +94,7 @@ static void aio_free_ring(struct kioctx *ctx)
 
 	if (info->mmap_size) {
 		BUG_ON(ctx->mm != current->mm);
-		vm_munmap(ctx->mm, info->mmap_base, info->mmap_size);
+		vm_munmap(info->mmap_base, info->mmap_size);
 	}
 
 	if (info->ring_pages && info->ring_pages != info->internal_pages)
-- 
cgit v1.1


From 53ad1c980d4fb450722a575ca17c188808939340 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 4 Apr 2012 09:49:15 -0500
Subject: dlm: fix QUECVT when convert queue is empty

The QUECVT flag should not prevent conversions from
being granted immediately when the convert queue is
empty.

Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lock.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index fa5c07d..4c58d4a 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1737,6 +1737,18 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 		return 1;
 
 	/*
+	 * Even if the convert is compat with all granted locks,
+	 * QUECVT forces it behind other locks on the convert queue.
+	 */
+
+	if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) {
+		if (list_empty(&r->res_convertqueue))
+			return 1;
+		else
+			goto out;
+	}
+
+	/*
 	 * The NOORDER flag is set to avoid the standard vms rules on grant
 	 * order.
 	 */
-- 
cgit v1.1