From 1b83d707032a1be40a60ed0a9bd841662cc04a5d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:04 -0400
Subject: NFS: Protect inode->i_nlink updates using inode->i_lock

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b194066..d6ec1c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -870,6 +870,14 @@ static int nfs_dentry_delete(struct dentry *dentry)
 
 }
 
+static void nfs_drop_nlink(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	if (inode->i_nlink > 0)
+		drop_nlink(inode);
+	spin_unlock(&inode->i_lock);
+}
+
 /*
  * Called when the dentry loses inode.
  * We use it to clean up silly-renamed files.
@@ -1420,7 +1428,7 @@ static int nfs_safe_remove(struct dentry *dentry)
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 		/* The VFS may want to delete this inode */
 		if (error == 0)
-			drop_nlink(inode);
+			nfs_drop_nlink(inode);
 		nfs_mark_for_revalidate(inode);
 	} else
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
@@ -1647,7 +1655,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			/* dentry still busy? */
 			goto out;
 	} else
-		drop_nlink(new_inode);
+		nfs_drop_nlink(new_inode);
 
 go_ahead:
 	/*
-- 
cgit v1.1


From a3d01454bc58b5a211ef64a7670572a40b71e682 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 12:21:19 -0400
Subject: NFS: Remove BKL requirement from attribute updates

The main problem is dealing with inode->i_size: we need to set the
inode->i_lock on all attribute updates, and so vmtruncate won't cut it.
Make an NFS-private version of vmtruncate that has the necessary locking
semantics.

The result should be that the following inode attribute updates are
protected by inode->i_lock
	nfsi->cache_validity
	nfsi->read_cache_jiffies
	nfsi->attrtimeo
	nfsi->attrtimeo_timestamp
	nfsi->change_attr
	nfsi->last_updated
	nfsi->cache_change_attribute
	nfsi->access_cache
	nfsi->access_cache_entry_lru
	nfsi->access_cache_inode_lru
	nfsi->acl_access
	nfsi->acl_default
	nfsi->nfs_page_tree
	nfsi->ncommit
	nfsi->npages
	nfsi->open_files
	nfsi->silly_list
	nfsi->acl
	nfsi->open_states
	inode->i_size
	inode->i_atime
	inode->i_mtime
	inode->i_ctime
	inode->i_nlink
	inode->i_uid
	inode->i_gid

The following is protected by dir->i_mutex
	nfsi->cookieverf

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 fs/nfs/write.c | 15 ++++++++-----
 2 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2c23d06..3adabd1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -389,6 +389,62 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 }
 
 /**
+ * nfs_vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * This is a copy of the common vmtruncate, but with the locking
+ * corrected to take into account the fact that NFS requires
+ * inode->i_size to be updated under the inode->i_lock.
+ */
+static int nfs_vmtruncate(struct inode * inode, loff_t offset)
+{
+	if (i_size_read(inode) < offset) {
+		unsigned long limit;
+
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && offset > limit)
+			goto out_sig;
+		if (offset > inode->i_sb->s_maxbytes)
+			goto out_big;
+		spin_lock(&inode->i_lock);
+		i_size_write(inode, offset);
+		spin_unlock(&inode->i_lock);
+	} else {
+		struct address_space *mapping = inode->i_mapping;
+
+		/*
+		 * truncation of in-use swapfiles is disallowed - it would
+		 * cause subsequent swapout to scribble on the now-freed
+		 * blocks.
+		 */
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
+		spin_lock(&inode->i_lock);
+		i_size_write(inode, offset);
+		spin_unlock(&inode->i_lock);
+
+		/*
+		 * unmap_mapping_range is called twice, first simply for
+		 * efficiency so that truncate_inode_pages does fewer
+		 * single-page unmaps.  However after this first call, and
+		 * before truncate_inode_pages finishes, it is possible for
+		 * private pages to be COWed, which remain after
+		 * truncate_inode_pages finishes, hence the second
+		 * unmap_mapping_range call must be made for correctness.
+		 */
+		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, offset);
+		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+	}
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out_big:
+	return -EFBIG;
+}
+
+/**
  * nfs_setattr_update_inode - Update inode metadata after a setattr call.
  * @inode: pointer to struct inode
  * @attr: pointer to struct iattr
@@ -414,8 +470,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
 	}
 	if ((attr->ia_valid & ATTR_SIZE) != 0) {
 		nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
-		inode->i_size = attr->ia_size;
-		vmtruncate(inode, attr->ia_size);
+		nfs_vmtruncate(inode, attr->ia_size);
 	}
 }
 
@@ -829,9 +884,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			if (S_ISDIR(inode->i_mode))
 				nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 		}
-		if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
+		if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
 		    nfsi->npages == 0)
-			inode->i_size = nfs_size_to_loff_t(fattr->size);
+			i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 	}
 }
 
@@ -972,7 +1027,7 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
 			(fattr->valid & NFS_ATTR_WCC) == 0) {
 		memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
 		memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
-		fattr->pre_size = inode->i_size;
+		fattr->pre_size = i_size_read(inode);
 		fattr->valid |= NFS_ATTR_WCC;
 	}
 	return nfs_post_op_update_inode(inode, fattr);
@@ -1057,7 +1112,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		/* Do we perhaps have any outstanding writes, or has
 		 * the file grown beyond our last write? */
 		if (nfsi->npages == 0 || new_isize > cur_isize) {
-			inode->i_size = new_isize;
+			i_size_write(inode, new_isize);
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 		}
 		dprintk("NFS: isize change on server for file %s/%ld\n",
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index feca8c6..3229e21 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -133,16 +133,21 @@ static struct nfs_page *nfs_page_find_request(struct page *page)
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t end, i_size = i_size_read(inode);
-	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	loff_t end, i_size;
+	pgoff_t end_index;
 
+	spin_lock(&inode->i_lock);
+	i_size = i_size_read(inode);
+	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
 	if (i_size > 0 && page->index < end_index)
-		return;
+		goto out;
 	end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
 	if (i_size >= end)
-		return;
-	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+		goto out;
 	i_size_write(inode, end);
+	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
+out:
+	spin_unlock(&inode->i_lock);
 }
 
 /* A writeback failed: mark the page as bad, and invalidate the page cache */
-- 
cgit v1.1


From fa6dc9dc59c3a76fd209a97c8cf37395980fb903 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 13:26:14 -0400
Subject: NFS: Remove attribute update related BKL references

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 4 ----
 fs/nfs/super.c | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3adabd1..df23f98 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -370,7 +370,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	if ((attr->ia_valid & ~ATTR_FILE) == 0)
 		return 0;
 
-	lock_kernel();
 	/* Write all dirty data */
 	if (S_ISREG(inode->i_mode)) {
 		filemap_write_and_wait(inode->i_mapping);
@@ -384,7 +383,6 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
 	if (error == 0)
 		nfs_refresh_inode(inode, &fattr);
-	unlock_kernel();
 	return error;
 }
 
@@ -700,7 +698,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		inode->i_sb->s_id, (long long)NFS_FILEID(inode));
 
 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
-	lock_kernel();
 	if (is_bad_inode(inode))
  		goto out_nowait;
 	if (NFS_STALE(inode))
@@ -749,7 +746,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	nfs_wake_up_inode(inode);
 
  out_nowait:
-	unlock_kernel();
 	return status;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 47cf83e..1b94e36 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -374,8 +374,6 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	};
 	int error;
 
-	lock_kernel();
-
 	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	if (error < 0)
 		goto out_err;
@@ -407,12 +405,10 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	buf->f_namelen = server->namelen;
 
-	unlock_kernel();
 	return 0;
 
  out_err:
 	dprintk("%s: statfs error = %d\n", __func__, -error);
-	unlock_kernel();
 	return error;
 }
 
-- 
cgit v1.1


From 4d80f2ecd506d9732ad94a6da104580bb47680d6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:18 -0400
Subject: NFS: Remove the BKL from the permission checking code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d6ec1c8..73e0f97 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1982,8 +1982,6 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
 	}
 
 force_lookup:
-	lock_kernel();
-
 	if (!NFS_PROTO(inode)->access)
 		goto out_notsup;
 
@@ -1993,7 +1991,6 @@ force_lookup:
 		put_rpccred(cred);
 	} else
 		res = PTR_ERR(cred);
-	unlock_kernel();
 out:
 	dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
 		inode->i_sb->s_id, inode->i_ino, mask, res);
@@ -2002,7 +1999,6 @@ out_notsup:
 	res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	if (res == 0)
 		res = generic_permission(inode, mask, NULL);
-	unlock_kernel();
 	goto out;
 }
 
-- 
cgit v1.1


From b6a2e569e2157509951c9f3f58dfa18b44ce91b3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 13:26:16 -0400
Subject: NFS: Remove BKL usage from the write path

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 509dcb5..f919d9a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -393,9 +393,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 			zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
 	}
 
-	lock_kernel();
 	status = nfs_updatepage(file, page, offset, copied);
-	unlock_kernel();
 
 	unlock_page(page);
 	page_cache_release(page);
-- 
cgit v1.1


From bba67e0e3f4caba2b2b90b48ed798fb0461bcb86 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 13:26:23 -0400
Subject: NFS: Remove BKL usage from open()

All the NFSv4 stateful operations are already protected by other locks (in
particular by the rpc_sequence locks.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 6 ------
 fs/nfs/file.c     | 2 --
 fs/nfs/nfs4proc.c | 2 --
 3 files changed, 10 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 73e0f97..c68ec44 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -139,10 +139,8 @@ nfs_opendir(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 
-	lock_kernel();
 	/* Call generic open code in order to cache credentials */
 	res = nfs_open(inode, filp);
-	unlock_kernel();
 	return res;
 }
 
@@ -1019,9 +1017,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	}
 
 	/* Open the file on the server */
-	lock_kernel();
 	res = nfs4_atomic_open(dir, dentry, nd);
-	unlock_kernel();
 	if (IS_ERR(res)) {
 		error = PTR_ERR(res);
 		switch (error) {
@@ -1083,9 +1079,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 * operations that change the directory. We therefore save the
 	 * change attribute *before* we do the RPC call.
 	 */
-	lock_kernel();
 	ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
-	unlock_kernel();
 out:
 	dput(parent);
 	if (!ret)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f919d9a..9f1bed9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -128,9 +128,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 		return res;
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
-	lock_kernel();
 	res = nfs_open(inode, filp);
-	unlock_kernel();
 	return res;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4451287..c910413 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -451,9 +451,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		/* Save the delegation */
 		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
 		rcu_read_unlock();
-		lock_kernel();
 		ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
-		unlock_kernel();
 		if (ret != 0)
 			goto out;
 		ret = -EAGAIN;
-- 
cgit v1.1


From f1e2eda23513b68003202bddf1f84158baad8844 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:50:50 -0400
Subject: NFS: Remove the BKL from the inode creation operations

nfs_instantiate() does not require the BKL, neither do the attribute
updates or the RPC code.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c68ec44..d40e91e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1232,14 +1232,11 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	if ((nd->flags & LOOKUP_CREATE) != 0)
 		open_flags = nd->intent.open.flags;
 
-	lock_kernel();
 	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
 	if (error != 0)
 		goto out_err;
-	unlock_kernel();
 	return 0;
 out_err:
-	unlock_kernel();
 	d_drop(dentry);
 	return error;
 }
@@ -1262,14 +1259,11 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	lock_kernel();
 	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
 	if (status != 0)
 		goto out_err;
-	unlock_kernel();
 	return 0;
 out_err:
-	unlock_kernel();
 	d_drop(dentry);
 	return status;
 }
@@ -1288,15 +1282,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	attr.ia_valid = ATTR_MODE;
 	attr.ia_mode = mode | S_IFDIR;
 
-	lock_kernel();
 	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
 	if (error != 0)
 		goto out_err;
-	unlock_kernel();
 	return 0;
 out_err:
 	d_drop(dentry);
-	unlock_kernel();
 	return error;
 }
 
-- 
cgit v1.1


From fc81af535e462764e17f638d542973fbef13b026 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:52:40 -0400
Subject: NFS: Remove the BKL from nfs_link()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d40e91e5..5ae8ee6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1549,14 +1549,12 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	lock_kernel();
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
 		atomic_inc(&inode->i_count);
 		d_add(dentry, inode);
 	}
-	unlock_kernel();
 	return error;
 }
 
-- 
cgit v1.1


From fc0f684c21b5d4b41dc2ec76f7c0897ac98f5b6e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:20 -0400
Subject: NFS: Remove BKL from NFS lookup code

All dentry-related operations are already BKL-safe, since they are
protected by the VFS locking. No extra locks should be needed in the NFS
code.

In the case of nfs_revalidate_inode(), we're only doing an attribute
update (protected by the inode->i_lock).
In the case of nfs_lookup(), we're instantiating a new dentry, so there
should be no contention possible until after we call d_materialise_unique.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5ae8ee6..60da755 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -777,7 +777,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	struct nfs_fattr fattr;
 
 	parent = dget_parent(dentry);
-	lock_kernel();
 	dir = parent->d_inode;
 	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
 	inode = dentry->d_inode;
@@ -815,7 +814,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
  out_valid:
-	unlock_kernel();
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
 			__func__, dentry->d_parent->d_name.name,
@@ -834,7 +832,6 @@ out_zap_parent:
 		shrink_dcache_parent(dentry);
 	}
 	d_drop(dentry);
-	unlock_kernel();
 	dput(parent);
 	dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
 			__func__, dentry->d_parent->d_name.name,
@@ -921,8 +918,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	res = ERR_PTR(-ENOMEM);
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
-	lock_kernel();
-
 	/*
 	 * If we're doing an exclusive create, optimize away the lookup
 	 * but don't hash the dentry.
@@ -930,7 +925,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	if (nfs_is_exclusive_create(dir, nd)) {
 		d_instantiate(dentry, NULL);
 		res = NULL;
-		goto out_unlock;
+		goto out;
 	}
 
 	parent = dentry->d_parent;
@@ -958,8 +953,6 @@ no_entry:
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unblock_sillyrename:
 	nfs_unblock_sillyrename(parent);
-out_unlock:
-	unlock_kernel();
 out:
 	return res;
 }
-- 
cgit v1.1


From bd9bb454b76fb6ca2d00f17313f9f9df5f5c404a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 16:09:59 -0400
Subject: NFS: Remove the BKL from the rename, rmdir and unlink operations

Attribute updates are safe, and dentry operations are protected using VFS
level locks. Defer removing the BKL from sillyrename until a separate
patch.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 60da755..68e0688 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1297,14 +1297,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
 			dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
-	lock_kernel();
 	error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
 	/* Ensure the VFS deletes this inode */
 	if (error == 0 && dentry->d_inode != NULL)
 		clear_nlink(dentry->d_inode);
 	else if (error == -ENOENT)
 		nfs_dentry_handle_enoent(dentry);
-	unlock_kernel();
 
 	return error;
 }
@@ -1429,7 +1427,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name);
 
-	lock_kernel();
 	spin_lock(&dcache_lock);
 	spin_lock(&dentry->d_lock);
 	if (atomic_read(&dentry->d_count) > 1) {
@@ -1437,6 +1434,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		spin_unlock(&dcache_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(dentry->d_inode, 0);
+		lock_kernel();
 		error = nfs_sillyrename(dir, dentry);
 		unlock_kernel();
 		return error;
@@ -1452,7 +1450,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	} else if (need_rehash)
 		d_rehash(dentry);
-	unlock_kernel();
 	return error;
 }
 
@@ -1587,7 +1584,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 * To prevent any new references to the target during the rename,
 	 * we unhash the dentry and free the inode in advance.
 	 */
-	lock_kernel();
 	if (!d_unhashed(new_dentry)) {
 		d_drop(new_dentry);
 		rehash = new_dentry;
@@ -1621,7 +1617,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto out;
 
 		/* silly-rename the existing target ... */
+		lock_kernel();
 		err = nfs_sillyrename(new_dir, new_dentry);
+		unlock_kernel();
 		if (!err) {
 			new_dentry = rehash = dentry;
 			new_inode = NULL;
@@ -1665,7 +1663,6 @@ out:
 	/* new dentry created? */
 	if (dentry)
 		dput(dentry);
-	unlock_kernel();
 	return error;
 }
 
-- 
cgit v1.1


From 52e2e8d37e01edf38ccdccc983fb13ec1456d63d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:21 -0400
Subject: NFS: Remove BKL from the sillydelete operations

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 68e0688..1bdc36b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -884,10 +884,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
 
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-		lock_kernel();
 		drop_nlink(inode);
 		nfs_complete_unlink(dentry, inode);
-		unlock_kernel();
 	}
 	iput(inode);
 }
@@ -1434,9 +1432,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 		spin_unlock(&dcache_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(dentry->d_inode, 0);
-		lock_kernel();
 		error = nfs_sillyrename(dir, dentry);
-		unlock_kernel();
 		return error;
 	}
 	if (!d_unhashed(dentry)) {
@@ -1617,9 +1613,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			goto out;
 
 		/* silly-rename the existing target ... */
-		lock_kernel();
 		err = nfs_sillyrename(new_dir, new_dentry);
-		unlock_kernel();
 		if (!err) {
 			new_dentry = rehash = dentry;
 			new_inode = NULL;
-- 
cgit v1.1


From 76566991f94c206d9c5881edcaf99ba72c9e9d61 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:22 -0400
Subject: NFS: Remove BKL from the symlink code

Page cache accesses are serialised using page locks, whereas attribute
updates are serialised using inode->i_lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1bdc36b..e5f9502 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1482,13 +1482,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	attr.ia_mode = S_IFLNK | S_IRWXUGO;
 	attr.ia_valid = ATTR_MODE;
 
-	lock_kernel();
-
 	page = alloc_page(GFP_HIGHUSER);
-	if (!page) {
-		unlock_kernel();
+	if (!page)
 		return -ENOMEM;
-	}
 
 	kaddr = kmap_atomic(page, KM_USER0);
 	memcpy(kaddr, symname, pathlen);
@@ -1503,7 +1499,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 			dentry->d_name.name, symname, error);
 		d_drop(dentry);
 		__free_page(page);
-		unlock_kernel();
 		return error;
 	}
 
@@ -1521,7 +1516,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	} else
 		__free_page(page);
 
-	unlock_kernel();
 	return 0;
 }
 
-- 
cgit v1.1


From c3cc8c019ca09767d7c9b5457d5cf8ac65085f44 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:23 -0400
Subject: NFS: Remove BKL from the readdir code

Page accesses are serialised using the page locks, whereas all attribute
updates are serialised using the inode->i_lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e5f9502..28a238d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -534,8 +534,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			(long long)filp->f_pos);
 	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
 
-	lock_kernel();
-
 	/*
 	 * filp->f_pos points to the dirent entry number.
 	 * *desc->dir_cookie has the cookie for the next entry. We have
@@ -593,7 +591,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	}
 out:
 	nfs_unblock_sillyrename(dentry);
-	unlock_kernel();
 	if (res > 0)
 		res = 0;
 	dfprintk(FILE, "NFS: readdir(%s/%s) returns %ld\n",
-- 
cgit v1.1


From a86dc496b764ebb1431677b38eab45310e5a2ad4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 13:37:09 -0400
Subject: SUNRPC: Remove the BKL from the callback functions

Push it into those callback functions that actually need it.

Note that all the NFS operations use their own locking, so don't need the
BKL. Ditto for the rpcbind client.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 6 ++++++
 fs/lockd/svc4proc.c | 2 ++
 fs/lockd/svclock.c  | 7 ++++++-
 fs/lockd/svcproc.c  | 2 ++
 net/sunrpc/sched.c  | 9 +--------
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fd7d4669..1f6dc51 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -224,7 +224,9 @@ void nlm_release_call(struct nlm_rqst *call)
 
 static void nlmclnt_rpc_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -710,7 +712,9 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 die:
 	return;
  retry_rebind:
+	lock_kernel();
 	nlm_rebind_host(req->a_host);
+	unlock_kernel();
  retry_unlock:
 	rpc_restart_call(task);
 }
@@ -788,7 +792,9 @@ retry_cancel:
 	/* Don't ever retry more than 3 times */
 	if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
 		goto die;
+	lock_kernel();
 	nlm_rebind_host(req->a_host);
+	unlock_kernel();
 	rpc_restart_call(task);
 	rpc_delay(task, 30 * HZ);
 }
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 385437e..2e27176 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -248,7 +248,9 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 
 static void nlm4svc_callback_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 81aca85..56a08ab 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -795,6 +795,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
 
+	lock_kernel();
 	/* if the block is not on a list at this point then it has
 	 * been invalidated. Don't try to requeue it.
 	 *
@@ -804,7 +805,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	 * for nlm_blocked?
 	 */
 	if (list_empty(&block->b_list))
-		return;
+		goto out;
 
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
@@ -818,13 +819,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 	}
 	nlmsvc_insert_block(block, timeout);
 	svc_wake_up(block->b_daemon);
+out:
+	unlock_kernel();
 }
 
 static void nlmsvc_grant_release(void *data)
 {
 	struct nlm_rqst		*call = data;
 
+	lock_kernel();
 	nlmsvc_release_block(call->a_block);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 88379cc..ce6952b 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -278,7 +278,9 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 
 static void nlmsvc_callback_release(void *data)
 {
+	lock_kernel();
 	nlm_release_call(data);
+	unlock_kernel();
 }
 
 static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6288af0..385f427 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -576,9 +576,7 @@ EXPORT_SYMBOL_GPL(rpc_delay);
  */
 static void rpc_prepare_task(struct rpc_task *task)
 {
-	lock_kernel();
 	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
-	unlock_kernel();
 }
 
 /*
@@ -588,9 +586,7 @@ void rpc_exit_task(struct rpc_task *task)
 {
 	task->tk_action = NULL;
 	if (task->tk_ops->rpc_call_done != NULL) {
-		lock_kernel();
 		task->tk_ops->rpc_call_done(task, task->tk_calldata);
-		unlock_kernel();
 		if (task->tk_action != NULL) {
 			WARN_ON(RPC_ASSASSINATED(task));
 			/* Always release the RPC slot and buffer memory */
@@ -602,11 +598,8 @@ EXPORT_SYMBOL_GPL(rpc_exit_task);
 
 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
 {
-	if (ops->rpc_release != NULL) {
-		lock_kernel();
+	if (ops->rpc_release != NULL)
 		ops->rpc_release(calldata);
-		unlock_kernel();
-	}
 }
 
 /*
-- 
cgit v1.1


From f839c4c1991cc9b580ae38f98f54554938a7f49c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 11 Jun 2008 15:44:26 -0400
Subject: NFSv4: Remove BKL from the nfsv4 state recovery

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4state.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 856a893..401ef8b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -940,7 +940,6 @@ static int reclaimer(void *ptr)
 	allow_signal(SIGKILL);
 
 	/* Ensure exclusive access to NFSv4 state */
-	lock_kernel();
 	down_write(&clp->cl_sem);
 	/* Are there any NFS mounts out there? */
 	if (list_empty(&clp->cl_superblocks))
@@ -1000,7 +999,6 @@ restart_loop:
 	nfs_delegation_reap_unclaimed(clp);
 out:
 	up_write(&clp->cl_sem);
-	unlock_kernel();
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-- 
cgit v1.1