From c89681ed7d0e4a61d35bdc12c06c6733b718b2cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 22 Jun 2006 14:47:22 -0700
Subject: [PATCH] remove steal_locks()

This patch removes the steal_locks() function.

steal_locks() doesn't work correctly with any filesystem that does it's own
lock management, including NFS, CIFS, etc.

In addition it has weird semantics on local filesystems in case tasks
sharing file-descriptor tables are doing POSIX locking operations in
parallel to execve().

The steal_locks() function has an effect on applications doing:

clone(CLONE_FILES)
  /* in child */
  lock
  execve
  lock

POSIX locks acquired before execve (by "child", "parent" or any further
task sharing files_struct) will after the execve be owned exclusively by
"child".

According to Chris Wright some LSB/LTP kind of suite triggers without the
stealing behavior, but there's no known real-world application that would
also fail.

Apps using NPTL are not affected, since all other threads are killed before
execve.

Apps using LinuxThreads are only affected if they

  - have multiple threads during exec (LinuxThreads doesn't kill other
    threads, the app may do it with pthread_kill_other_threads_np())
  - rely on POSIX locks being inherited across exec

Both conditions are documented, but not their interaction.

Apps using clone() natively are affected if they

  - use clone(CLONE_FILES)
  - rely on POSIX locks being inherited across exec

The above scenarios are unlikely, but possible.

If the patch is vetoed, there's a plan B, that involves mostly keeping the
weird stealing semantics, but changing the way lock ownership is handled so
that network and local filesystems work consistently.

That would add more complexity though, so this solution seems to be
preferred by most people.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Matthew Wilcox <willy@debian.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Steven French <sfrench@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 57 ---------------------------------------------------------
 1 file changed, 57 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index ab61a8b..69435c6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2206,63 +2206,6 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
 
 EXPORT_SYMBOL(lock_may_write);
 
-static inline void __steal_locks(struct file *file, fl_owner_t from)
-{
-	struct inode *inode = file->f_dentry->d_inode;
-	struct file_lock *fl = inode->i_flock;
-
-	while (fl) {
-		if (fl->fl_file == file && fl->fl_owner == from)
-			fl->fl_owner = current->files;
-		fl = fl->fl_next;
-	}
-}
-
-/* When getting ready for executing a binary, we make sure that current
- * has a files_struct on its own. Before dropping the old files_struct,
- * we take over ownership of all locks for all file descriptors we own.
- * Note that we may accidentally steal a lock for a file that a sibling
- * has created since the unshare_files() call.
- */
-void steal_locks(fl_owner_t from)
-{
-	struct files_struct *files = current->files;
-	int i, j;
-	struct fdtable *fdt;
-
-	if (from == files)
-		return;
-
-	lock_kernel();
-	j = 0;
-
-	/*
-	 * We are not taking a ref to the file structures, so
-	 * we need to acquire ->file_lock.
-	 */
-	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	for (;;) {
-		unsigned long set;
-		i = j * __NFDBITS;
-		if (i >= fdt->max_fdset || i >= fdt->max_fds)
-			break;
-		set = fdt->open_fds->fds_bits[j++];
-		while (set) {
-			if (set & 1) {
-				struct file *file = fdt->fd[i];
-				if (file)
-					__steal_locks(file, from);
-			}
-			i++;
-			set >>= 1;
-		}
-	}
-	spin_unlock(&files->file_lock);
-	unlock_kernel();
-}
-EXPORT_SYMBOL(steal_locks);
-
 static int __init filelock_init(void)
 {
 	filelock_cache = kmem_cache_create("file_lock_cache",
-- 
cgit v1.1


From 0d9a490abe1f69fda220f7866f6f23af41daa128 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 23 Jun 2006 02:05:09 -0700
Subject: [PATCH] locks: don't unnecessarily fail posix lock operations

posix_lock_file() was too cautious, failing operations on OOM, even if they
didn't actually require an allocation.

This has the disadvantage, that a failing unlock on process exit could lead to
a memory leak.  There are two possibilites for this:

- filesystem implements .lock() and calls back to posix_lock_file().  On
cleanup of files_struct locks_remove_posix() is called which should remove all
locks belonging to files_struct.  However if filesystem calls
posix_lock_file() which fails, then those locks will never be freed.

- if a file is closed while a lock is blocked, then after acquiring
fcntl_setlk() will undo the lock.  But this unlock itself might fail on OOM,
again possibly leaking the lock.

The solution is to move the checking of the allocations until after it is sure
that they will be needed.  This will solve the above problem since unlock will
always succeed unless it splits an existing region.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 69435c6..c5ac6b4 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -834,14 +834,7 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 	if (request->fl_flags & FL_ACCESS)
 		goto out;
 
-	error = -ENOLCK; /* "no luck" */
-	if (!(new_fl && new_fl2))
-		goto out;
-
 	/*
-	 * We've allocated the new locks in advance, so there are no
-	 * errors possible (and no blocking operations) from here on.
-	 * 
 	 * Find the first old lock with the same owner as the new lock.
 	 */
 	
@@ -938,10 +931,25 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 		before = &fl->fl_next;
 	}
 
+	/*
+	 * The above code only modifies existing locks in case of
+	 * merging or replacing.  If new lock(s) need to be inserted
+	 * all modifications are done bellow this, so it's safe yet to
+	 * bail out.
+	 */
+	error = -ENOLCK; /* "no luck" */
+	if (right && left == right && !new_fl2)
+		goto out;
+
 	error = 0;
 	if (!added) {
 		if (request->fl_type == F_UNLCK)
 			goto out;
+
+		if (!new_fl) {
+			error = -ENOLCK;
+			goto out;
+		}
 		locks_copy_lock(new_fl, request);
 		locks_insert_lock(before, new_fl);
 		new_fl = NULL;
-- 
cgit v1.1


From 39005d022ad221b76dc2de0ac62ef475a796433b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 23 Jun 2006 02:05:10 -0700
Subject: [PATCH] locks: don't do unnecessary allocations

posix_lock_file() always allocates new locks in advance, even if it's easy to
determine that no allocations will be needed.

Optimize these cases:

 - FL_ACCESS flag is set

 - Unlocking the whole range

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index c5ac6b4..2344f24 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -794,7 +794,8 @@ out:
 static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
-	struct file_lock *new_fl, *new_fl2;
+	struct file_lock *new_fl = NULL;
+	struct file_lock *new_fl2 = NULL;
 	struct file_lock *left = NULL;
 	struct file_lock *right = NULL;
 	struct file_lock **before;
@@ -803,9 +804,15 @@ static int __posix_lock_file_conf(struct inode *inode, struct file_lock *request
 	/*
 	 * We may need two file_lock structures for this operation,
 	 * so we get them in advance to avoid races.
+	 *
+	 * In some cases we can be sure, that no new locks will be needed
 	 */
-	new_fl = locks_alloc_lock();
-	new_fl2 = locks_alloc_lock();
+	if (!(request->fl_flags & FL_ACCESS) &&
+	    (request->fl_type != F_UNLCK ||
+	     request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
+		new_fl = locks_alloc_lock();
+		new_fl2 = locks_alloc_lock();
+	}
 
 	lock_kernel();
 	if (request->fl_type != F_UNLCK) {
-- 
cgit v1.1


From ff7b86b82083f24b8637dff1528c7101c18c7f39 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 23 Jun 2006 02:05:11 -0700
Subject: [PATCH] locks: clean up locks_remove_posix()

locks_remove_posix() can use posix_lock_file() instead of doing the lock
removal by hand.  posix_lock_file() now does exacly the same.

The comment about pids no longer applies, posix_lock_file() takes only the
owner into account.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index 2344f24..e588e1c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1896,15 +1896,14 @@ out:
  */
 void locks_remove_posix(struct file *filp, fl_owner_t owner)
 {
-	struct file_lock lock, **before;
+	struct file_lock lock;
 
 	/*
 	 * If there are no locks held on this file, we don't need to call
 	 * posix_lock_file().  Another process could be setting a lock on this
 	 * file at the same time, but we wouldn't remove that lock anyway.
 	 */
-	before = &filp->f_dentry->d_inode->i_flock;
-	if (*before == NULL)
+	if (!filp->f_dentry->d_inode->i_flock)
 		return;
 
 	lock.fl_type = F_UNLCK;
@@ -1917,25 +1916,11 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
 
-	if (filp->f_op && filp->f_op->lock != NULL) {
+	if (filp->f_op && filp->f_op->lock != NULL)
 		filp->f_op->lock(filp, F_SETLK, &lock);
-		goto out;
-	}
+	else
+		posix_lock_file(filp, &lock);
 
-	/* Can't use posix_lock_file here; we need to remove it no matter
-	 * which pid we have.
-	 */
-	lock_kernel();
-	while (*before != NULL) {
-		struct file_lock *fl = *before;
-		if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) {
-			locks_delete_lock(before);
-			continue;
-		}
-		before = &fl->fl_next;
-	}
-	unlock_kernel();
-out:
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
 }
-- 
cgit v1.1


From 75e1fcc0b18df0a65ab113198e9dc0e98999a08c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 23 Jun 2006 02:05:12 -0700
Subject: [PATCH] vfs: add lock owner argument to flush operation

Pass the POSIX lock owner ID to the flush operation.

This is useful for filesystems which don't want to store any locking state
in inode->i_flock but want to handle locking/unlocking POSIX locks
internally.  FUSE is one such filesystem but I think it possible that some
network filesystems would need this also.

Also add a flag to indicate that a POSIX locking request was generated by
close(), so filesystems using the above feature won't send an extra locking
request in this case.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index e588e1c..f8a634a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1907,7 +1907,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
 		return;
 
 	lock.fl_type = F_UNLCK;
-	lock.fl_flags = FL_POSIX;
+	lock.fl_flags = FL_POSIX | FL_CLOSE;
 	lock.fl_start = 0;
 	lock.fl_end = OFFSET_MAX;
 	lock.fl_owner = owner;
-- 
cgit v1.1


From b0904e147f7cbe4be3b4dae49ddccd627bb66f16 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 23 Jun 2006 02:05:13 -0700
Subject: [PATCH] fs/locks.c: make posix_locks_deadlock() static

We can now make posix_locks_deadlock() static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/locks.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs/locks.c')

diff --git a/fs/locks.c b/fs/locks.c
index f8a634a..1ad29c9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -703,7 +703,7 @@ EXPORT_SYMBOL(posix_test_lock);
  * from a broken NFS client. But broken NFS clients have a lot more to
  * worry about than proper deadlock detection anyway... --okir
  */
-int posix_locks_deadlock(struct file_lock *caller_fl,
+static int posix_locks_deadlock(struct file_lock *caller_fl,
 				struct file_lock *block_fl)
 {
 	struct list_head *tmp;
@@ -722,8 +722,6 @@ next_task:
 	return 0;
 }
 
-EXPORT_SYMBOL(posix_locks_deadlock);
-
 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
  * at the head of the list, but that's secret knowledge known only to
  * flock_lock_file and posix_lock_file.
-- 
cgit v1.1