From b4c98f625fffee3a6f633082e9e4be3e952ca2ab Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Wed, 13 Sep 2006 11:01:19 -0700
Subject: configfs: Prevent duplicate subsystem names.

For all child objects, creation comes through mkdir(2), so duplicate names
are prevented.

Subsystems, though, are registered by client drivers at init_module()/__init
time.  This patch prevents duplicate subsystem names.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/configfs/dir.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index df02545..816e8ef 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -86,6 +86,32 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
 	return sd;
 }
 
+/*
+ *
+ * Return -EEXIST if there is already a configfs element with the same
+ * name for the same parent.
+ *
+ * called with parent inode's i_mutex held
+ */
+int configfs_dirent_exists(struct configfs_dirent *parent_sd,
+			   const unsigned char *new)
+{
+	struct configfs_dirent * sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_element) {
+			const unsigned char *existing = configfs_get_name(sd);
+			if (strcmp(existing, new))
+				continue;
+			else
+				return -EEXIST;
+		}
+	}
+
+	return 0;
+}
+
+
 int configfs_make_dirent(struct configfs_dirent * parent_sd,
 			 struct dentry * dentry, void * element,
 			 umode_t mode, int type)
@@ -136,8 +162,10 @@ static int create_dir(struct config_item * k, struct dentry * p,
 	int error;
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 
-	error = configfs_make_dirent(p->d_fsdata, d, k, mode,
-				     CONFIGFS_DIR);
+	error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
+	if (!error)
+		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+					     CONFIGFS_DIR);
 	if (!error) {
 		error = configfs_create(d, mode, init_dir);
 		if (!error) {
-- 
cgit v1.1


From ca4d147e62df370c334898464023aa7f9126abe1 Mon Sep 17 00:00:00 2001
From: Herbert Poetzl <herbert@13thfloor.at>
Date: Mon, 3 Jul 2006 17:27:12 -0700
Subject: ocfs2: add ext2 attributes

Support immutable, and other attributes.

Some renaming and other minor fixes done by myself.

Signed-off-by: Herbert Poetzl <herbert@13thfloor.at>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/Makefile   |   1 +
 fs/ocfs2/dlmglue.c  |   9 +++-
 fs/ocfs2/dlmglue.h  |   5 +-
 fs/ocfs2/file.c     |   3 ++
 fs/ocfs2/inode.c    |  28 ++++++++++-
 fs/ocfs2/inode.h    |   3 ++
 fs/ocfs2/ioctl.c    | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/ioctl.h    |  16 +++++++
 fs/ocfs2/ocfs2_fs.h |  24 +++++++++-
 9 files changed, 217 insertions(+), 6 deletions(-)
 create mode 100644 fs/ocfs2/ioctl.c
 create mode 100644 fs/ocfs2/ioctl.h

(limited to 'fs')

diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 7d3be84..9fb8132f 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -16,6 +16,7 @@ ocfs2-objs := \
 	file.o 			\
 	heartbeat.o 		\
 	inode.o 		\
+	ioctl.o 		\
 	journal.o 		\
 	localalloc.o 		\
 	mmap.o 			\
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 762eb1f..151b417 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1330,6 +1330,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 
 	mlog_meta_lvb(0, lockres);
 
@@ -1360,6 +1361,9 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	oi->ip_clusters = be32_to_cpu(lvb->lvb_iclusters);
 	i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
 
+	oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
+	ocfs2_set_inode_flags(inode);
+
 	/* fast-symlinks are a special case */
 	if (S_ISLNK(inode->i_mode) && !oi->ip_clusters)
 		inode->i_blocks = 0;
@@ -2899,8 +2903,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
 	     be16_to_cpu(lvb->lvb_imode));
 	mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
-	     "mtime_packed 0x%llx\n", be16_to_cpu(lvb->lvb_inlink),
+	     "mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
 	     (long long)be64_to_cpu(lvb->lvb_iatime_packed),
 	     (long long)be64_to_cpu(lvb->lvb_ictime_packed),
-	     (long long)be64_to_cpu(lvb->lvb_imtime_packed));
+	     (long long)be64_to_cpu(lvb->lvb_imtime_packed),
+	     be32_to_cpu(lvb->lvb_iattr));
 }
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 8f2d1db..243ae86 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,7 +27,7 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
-#define OCFS2_LVB_VERSION 2
+#define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
 	__be32       lvb_version;
@@ -40,7 +40,8 @@ struct ocfs2_meta_lvb {
 	__be64       lvb_isize;
 	__be16       lvb_imode;
 	__be16       lvb_inlink;
-	__be32       lvb_reserved[3];
+	__be32       lvb_iattr;
+	__be32       lvb_reserved[2];
 };
 
 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a9559c8..2bbfa17 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -44,6 +44,7 @@
 #include "file.h"
 #include "sysfile.h"
 #include "inode.h"
+#include "ioctl.h"
 #include "journal.h"
 #include "mmap.h"
 #include "suballoc.h"
@@ -1227,10 +1228,12 @@ const struct file_operations ocfs2_fops = {
 	.open		= ocfs2_file_open,
 	.aio_read	= ocfs2_file_aio_read,
 	.aio_write	= ocfs2_file_aio_write,
+	.ioctl		= ocfs2_ioctl,
 };
 
 const struct file_operations ocfs2_dops = {
 	.read		= generic_read_dir,
 	.readdir	= ocfs2_readdir,
 	.fsync		= ocfs2_sync_file,
+	.ioctl		= ocfs2_ioctl,
 };
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 327a5b7..3f496c4 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -71,6 +71,26 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
 				    struct inode *inode,
 				    struct buffer_head *fe_bh);
 
+void ocfs2_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = OCFS2_I(inode)->ip_attr;
+
+	inode->i_flags &= ~(S_IMMUTABLE |
+		S_SYNC | S_APPEND | S_NOATIME | S_DIRSYNC);
+
+	if (flags & OCFS2_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+
+	if (flags & OCFS2_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & OCFS2_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & OCFS2_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & OCFS2_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 				     u64 blkno,
 				     int delete_vote)
@@ -260,7 +280,6 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		inode->i_blocks =
 			ocfs2_align_bytes_to_sectors(le64_to_cpu(fe->i_size));
 	inode->i_mapping->a_ops = &ocfs2_aops;
-	inode->i_flags |= S_NOATIME;
 	inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
 	inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
 	inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
@@ -276,6 +295,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
 
 	if (create_ino)
 		inode->i_ino = ino_from_blkno(inode->i_sb,
@@ -330,6 +350,9 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
 				  OCFS2_LOCK_TYPE_DATA, inode);
 
+	ocfs2_set_inode_flags(inode);
+	inode->i_flags |= S_NOATIME;
+
 	status = 0;
 bail:
 	mlog_exit(status);
@@ -1131,6 +1154,7 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 	fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
+	fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
 	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
 	fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1169,6 +1193,8 @@ void ocfs2_refresh_inode(struct inode *inode,
 	spin_lock(&OCFS2_I(inode)->ip_lock);
 
 	OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
+	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
+	ocfs2_set_inode_flags(inode);
 	i_size_write(inode, le64_to_cpu(fe->i_size));
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 	inode->i_uid = le32_to_cpu(fe->i_uid);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 35140f6..4d1e539 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -56,6 +56,7 @@ struct ocfs2_inode_info
 	struct ocfs2_journal_handle	*ip_handle;
 
 	u32				ip_flags; /* see below */
+	u32				ip_attr; /* inode attributes */
 
 	/* protected by recovery_lock. */
 	struct inode			*ip_next_orphan;
@@ -142,4 +143,6 @@ int ocfs2_mark_inode_dirty(struct ocfs2_journal_handle *handle,
 int ocfs2_aio_read(struct file *file, struct kiocb *req, struct iocb *iocb);
 int ocfs2_aio_write(struct file *file, struct kiocb *req, struct iocb *iocb);
 
+void ocfs2_set_inode_flags(struct inode *inode);
+
 #endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
new file mode 100644
index 0000000..68f4806
--- /dev/null
+++ b/fs/ocfs2/ioctl.c
@@ -0,0 +1,134 @@
+/*
+ * linux/fs/ocfs2/ioctl.c
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ * adapted from Remy Card's ext2/ioctl.c
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#define MLOG_MASK_PREFIX ML_INODE
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "alloc.h"
+#include "dlmglue.h"
+#include "inode.h"
+#include "journal.h"
+
+#include "ocfs2_fs.h"
+#include <linux/ext2_fs.h>
+
+static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
+{
+	int status;
+
+	status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+	if (status < 0) {
+		mlog_errno(status);
+		return status;
+	}
+	*flags = OCFS2_I(inode)->ip_attr;
+	ocfs2_meta_unlock(inode, 0);
+
+	mlog_exit(status);
+	return status;
+}
+
+static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
+				unsigned mask)
+{
+	struct ocfs2_inode_info *ocfs2_inode = OCFS2_I(inode);
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct ocfs2_journal_handle *handle = NULL;
+	struct buffer_head *bh = NULL;
+	unsigned oldflags;
+	int status;
+
+	mutex_lock(&inode->i_mutex);
+
+	status = ocfs2_meta_lock(inode, NULL, &bh, 1);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
+
+	status = -EROFS;
+	if (IS_RDONLY(inode))
+		goto bail_unlock;
+
+	status = -EACCES;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		goto bail_unlock;
+
+	if (!S_ISDIR(inode->i_mode))
+		flags &= ~OCFS2_DIRSYNC_FL;
+
+	handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS);
+	if (IS_ERR(handle)) {
+		status = PTR_ERR(handle);
+		mlog_errno(status);
+		goto bail_unlock;
+	}
+
+	oldflags = ocfs2_inode->ip_attr;
+	flags = flags & mask;
+	flags |= oldflags & ~mask;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 */
+	status = -EPERM;
+	if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
+		(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto bail_unlock;
+	}
+
+	ocfs2_inode->ip_attr = flags;
+	ocfs2_set_inode_flags(inode);
+
+	status = ocfs2_mark_inode_dirty(handle, inode, bh);
+	if (status < 0)
+		mlog_errno(status);
+
+	ocfs2_commit_trans(handle);
+bail_unlock:
+	ocfs2_meta_unlock(inode, 1);
+bail:
+	mutex_unlock(&inode->i_mutex);
+
+	if (bh)
+		brelse(bh);
+
+	mlog_exit(status);
+	return status;
+}
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg)
+{
+	unsigned int flags;
+	int status;
+
+	switch (cmd) {
+	case OCFS2_IOC_GETFLAGS:
+		status = ocfs2_get_inode_attr(inode, &flags);
+		if (status < 0)
+			return status;
+
+		flags &= OCFS2_FL_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case OCFS2_IOC_SETFLAGS:
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		return ocfs2_set_inode_attr(inode, flags,
+			OCFS2_FL_MODIFIABLE);
+	default:
+		return -ENOTTY;
+	}
+}
+
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
new file mode 100644
index 0000000..4a7c829
--- /dev/null
+++ b/fs/ocfs2/ioctl.h
@@ -0,0 +1,16 @@
+/*
+ * ioctl.h
+ *
+ * Function prototypes
+ *
+ * Copyright (C) 2006 Herbert Poetzl
+ *
+ */
+
+#ifndef OCFS2_IOCTL_H
+#define OCFS2_IOCTL_H
+
+int ocfs2_ioctl(struct inode * inode, struct file * filp,
+	unsigned int cmd, unsigned long arg);
+
+#endif /* OCFS2_IOCTL_H */
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c5b1ac5..3330a5d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -114,6 +114,26 @@
 #define OCFS2_CHAIN_FL		(0x00000400)	/* Chain allocator */
 #define OCFS2_DEALLOC_FL	(0x00000800)	/* Truncate log */
 
+/* Inode attributes, keep in sync with EXT2 */
+#define OCFS2_SECRM_FL		(0x00000001)	/* Secure deletion */
+#define OCFS2_UNRM_FL		(0x00000002)	/* Undelete */
+#define OCFS2_COMPR_FL		(0x00000004)	/* Compress file */
+#define OCFS2_SYNC_FL		(0x00000008)	/* Synchronous updates */
+#define OCFS2_IMMUTABLE_FL	(0x00000010)	/* Immutable file */
+#define OCFS2_APPEND_FL		(0x00000020)	/* writes to file may only append */
+#define OCFS2_NODUMP_FL		(0x00000040)	/* do not dump file */
+#define OCFS2_NOATIME_FL	(0x00000080)	/* do not update atime */
+#define OCFS2_DIRSYNC_FL	(0x00010000)	/* dirsync behaviour (directories only) */
+
+#define OCFS2_FL_VISIBLE	(0x000100FF)	/* User visible flags */
+#define OCFS2_FL_MODIFIABLE	(0x000100FF)	/* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define OCFS2_IOC_GETFLAGS	_IOR('f', 1, long)
+#define OCFS2_IOC_SETFLAGS	_IOW('f', 2, long)
+
 /*
  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
  */
@@ -399,7 +419,9 @@ struct ocfs2_dinode {
 	__le32 i_atime_nsec;
 	__le32 i_ctime_nsec;
 	__le32 i_mtime_nsec;
-/*70*/	__le64 i_reserved1[9];
+	__le32 i_attr;
+	__le32 i_reserved1;
+/*70*/	__le64 i_reserved2[8];
 /*B8*/	union {
 		__le64 i_pad1;		/* Generic way to refer to this
 					   64bit union */
-- 
cgit v1.1


From 2d5625181fac18f572cbbd18878d28f5eebf4733 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Mon, 10 Jul 2006 01:32:51 +0200
Subject: [PATCH] fs/ocfs2/ioctl.c should #include "ioctl.h"

Every file should #include the headers containing the prototypes for its
global functions.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 68f4806..3663cef 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -18,6 +18,8 @@
 #include "journal.h"
 
 #include "ocfs2_fs.h"
+#include "ioctl.h"
+
 #include <linux/ext2_fs.h>
 
 static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags)
-- 
cgit v1.1


From 471e3f57286da7ce8820ad42c77d5f5f49d56a41 Mon Sep 17 00:00:00 2001
From: Mathieu Avila <mathieu.avila@seanodes.com>
Date: Wed, 13 Sep 2006 11:11:27 -0700
Subject: ocfs2: Fix heartbeat sector calculation

This fixes things for devices which set max_sectors to 8.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/heartbeat.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 504595d..305cba3 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -320,8 +320,12 @@ static int compute_max_sectors(struct block_device *bdev)
 		max_pages = q->max_hw_segments;
 	max_pages--; /* Handle I/Os that straddle a page */
 
-	max_sectors = max_pages << (PAGE_SHIFT - 9);
-
+	if (max_pages) {
+		max_sectors = max_pages << (PAGE_SHIFT - 9);
+	} else {
+		/* If BIO contains 1 or less than 1 page. */
+		max_sectors = q->max_sectors;
+	}
 	/* Why is fls() 1-based???? */
 	pow_two_sectors = 1 << (fls(max_sectors) - 1);
 
-- 
cgit v1.1


From a663e30513d7ecc77dd71d474e7646bf78c0ba62 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 9 Aug 2006 11:45:07 -0700
Subject: ocfs2: move nlink check in ocfs2_mknod()

The dir nlink check in ocfs2_mknod() was being done outside of the cluster
lock, which means we could have been checking against a stale version of the
inode. Fix this by doing the check after the cluster lock instead.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0673862..d8161a7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -310,13 +310,6 @@ static int ocfs2_mknod(struct inode *dir,
 	/* get our super block */
 	osb = OCFS2_SB(dir->i_sb);
 
-	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
-		mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
-		     (unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
-		status = -EMLINK;
-		goto leave;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		status = -ENOMEM;
@@ -331,6 +324,11 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
+		status = -EMLINK;
+		goto leave;
+	}
+
 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
 	if (!dirfe->i_links_count) {
 		/* can't make a file in a deleted directory. */
-- 
cgit v1.1


From 0f62de2c9ca60a35f63122e7ea992cee8aae4bef Mon Sep 17 00:00:00 2001
From: Tiger Yang <tiger.yang@oracle.com>
Date: Thu, 31 Aug 2006 20:39:47 -0700
Subject: ocfs2: Fix directory link count checks in ocfs2_link()

Remove the redundant "i_nlink >= OCFS2_LINK_MAX" check and adds an unlinked
directory check in ocfs2_link().

Signed-off-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/namei.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d8161a7..2412647 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -641,11 +641,6 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
-	if (inode->i_nlink >= OCFS2_LINK_MAX) {
-		err = -EMLINK;
-		goto bail;
-	}
-
 	handle = ocfs2_alloc_handle(osb);
 	if (handle == NULL) {
 		err = -ENOMEM;
@@ -659,6 +654,11 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
+	if (!dir->i_nlink) {
+		err = -ENOENT;
+		goto bail;
+	}
+
 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
 					dentry->d_name.len);
 	if (err)
-- 
cgit v1.1


From e0b4096d34fbd6b30838c417100c9d0ef73c71f2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 11 Jul 2006 14:38:54 -0700
Subject: ocfs2: properly update i_mtime on buffered write

We weren't always updating i_mtime on writes, so fix ocfs2_commit_write() to
handle this.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Acked-by: Zach Brown <zach.brown@oracle.com>
---
 fs/ocfs2/aops.c | 83 +++++++++++++++++++++++----------------------------------
 1 file changed, 34 insertions(+), 49 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1d1c34..3d7c082 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -391,31 +391,28 @@ out:
 static int ocfs2_commit_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
-	int ret, extending = 0, locklevel = 0;
-	loff_t new_i_size;
+	int ret;
 	struct buffer_head *di_bh = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ocfs2_journal_handle *handle = NULL;
+	struct ocfs2_dinode *di;
 
 	mlog_entry("(0x%p, 0x%p, %u, %u)\n", file, page, from, to);
 
 	/* NOTE: ocfs2_file_aio_write has ensured that it's safe for
-	 * us to sample inode->i_size here without the metadata lock:
+	 * us to continue here without rechecking the I/O against
+	 * changed inode values.
 	 *
 	 * 1) We're currently holding the inode alloc lock, so no
 	 *    nodes can change it underneath us.
 	 *
 	 * 2) We've had to take the metadata lock at least once
-	 *    already to check for extending writes, hence insuring
-	 *    that our current copy is also up to date.
+	 *    already to check for extending writes, suid removal, etc.
+	 *    The meta data update code then ensures that we don't get a
+	 *    stale inode allocation image (i_size, i_clusters, etc).
 	 */
-	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > i_size_read(inode)) {
-		extending = 1;
-		locklevel = 1;
-	}
 
-	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, locklevel, page);
+	ret = ocfs2_meta_lock_with_page(inode, NULL, &di_bh, 1, page);
 	if (ret != 0) {
 		mlog_errno(ret);
 		goto out;
@@ -427,23 +424,20 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_unlock_meta;
 	}
 
-	if (extending) {
-		handle = ocfs2_start_walk_page_trans(inode, page, from, to);
-		if (IS_ERR(handle)) {
-			ret = PTR_ERR(handle);
-			handle = NULL;
-			goto out_unlock_data;
-		}
+	handle = ocfs2_start_walk_page_trans(inode, page, from, to);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_unlock_data;
+	}
 
-		/* Mark our buffer early. We'd rather catch this error up here
-		 * as opposed to after a successful commit_write which would
-		 * require us to set back inode->i_size. */
-		ret = ocfs2_journal_access(handle, inode, di_bh,
-					   OCFS2_JOURNAL_ACCESS_WRITE);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	/* Mark our buffer early. We'd rather catch this error up here
+	 * as opposed to after a successful commit_write which would
+	 * require us to set back inode->i_size. */
+	ret = ocfs2_journal_access(handle, inode, di_bh,
+				   OCFS2_JOURNAL_ACCESS_WRITE);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
 	/* might update i_size */
@@ -453,37 +447,28 @@ static int ocfs2_commit_write(struct file *file, struct page *page,
 		goto out_commit;
 	}
 
-	if (extending) {
-		loff_t size = (u64) i_size_read(inode);
-		struct ocfs2_dinode *di =
-			(struct ocfs2_dinode *)di_bh->b_data;
+	di = (struct ocfs2_dinode *)di_bh->b_data;
 
-		/* ocfs2_mark_inode_dirty is too heavy to use here. */
-		inode->i_blocks = ocfs2_align_bytes_to_sectors(size);
-		inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	/* ocfs2_mark_inode_dirty() is too heavy to use here. */
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
-		di->i_size = cpu_to_le64(size);
-		di->i_ctime = di->i_mtime = 
-				cpu_to_le64(inode->i_mtime.tv_sec);
-		di->i_ctime_nsec = di->i_mtime_nsec = 
-				cpu_to_le32(inode->i_mtime.tv_nsec);
+	inode->i_blocks = ocfs2_align_bytes_to_sectors((u64)(i_size_read(inode)));
+	di->i_size = cpu_to_le64((u64)i_size_read(inode));
 
-		ret = ocfs2_journal_dirty(handle, di_bh);
-		if (ret < 0) {
-			mlog_errno(ret);
-			goto out_commit;
-		}
+	ret = ocfs2_journal_dirty(handle, di_bh);
+	if (ret < 0) {
+		mlog_errno(ret);
+		goto out_commit;
 	}
 
-	BUG_ON(extending && (i_size_read(inode) != new_i_size));
-
 out_commit:
-	if (handle)
-		ocfs2_commit_trans(handle);
+	ocfs2_commit_trans(handle);
 out_unlock_data:
 	ocfs2_data_unlock(inode, 1);
 out_unlock_meta:
-	ocfs2_meta_unlock(inode, locklevel);
+	ocfs2_meta_unlock(inode, 1);
 out:
 	if (di_bh)
 		brelse(di_bh);
-- 
cgit v1.1


From aa9588741db907785e4d92c8b768dd6c9077e6f0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 21 Apr 2006 13:49:02 -0700
Subject: ocfs2: implement directory read-ahead

Uptodate.c now knows about read-ahead buffers. Use some more aggressive
logic in ocfs2_readdir().

The two functions which currently use directory read-ahead are
ocfs2_find_entry() and ocfs2_readdir().

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/buffer_head_io.c | 95 +++++++++++++++++++++++++++++++++++------------
 fs/ocfs2/buffer_head_io.h |  2 +-
 fs/ocfs2/dir.c            | 28 ++++++++------
 fs/ocfs2/inode.c          |  4 --
 fs/ocfs2/namei.c          | 10 ++---
 fs/ocfs2/uptodate.c       | 21 ++++++++++-
 fs/ocfs2/uptodate.h       |  2 +
 7 files changed, 115 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 9a24adf..c903741 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -100,6 +100,9 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n",
 		   (unsigned long long)block, nr, flags, inode);
 
+	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
+	       (!inode || !(flags & OCFS2_BH_CACHED)));
+
 	if (osb == NULL || osb->sb == NULL || bhs == NULL) {
 		status = -EINVAL;
 		mlog_errno(status);
@@ -140,6 +143,30 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 		bh = bhs[i];
 		ignore_cache = 0;
 
+		/* There are three read-ahead cases here which we need to
+		 * be concerned with. All three assume a buffer has
+		 * previously been submitted with OCFS2_BH_READAHEAD
+		 * and it hasn't yet completed I/O.
+		 *
+		 * 1) The current request is sync to disk. This rarely
+		 *    happens these days, and never when performance
+		 *    matters - the code can just wait on the buffer
+		 *    lock and re-submit.
+		 *
+		 * 2) The current request is cached, but not
+		 *    readahead. ocfs2_buffer_uptodate() will return
+		 *    false anyway, so we'll wind up waiting on the
+		 *    buffer lock to do I/O. We re-check the request
+		 *    with after getting the lock to avoid a re-submit.
+		 *
+		 * 3) The current request is readahead (and so must
+		 *    also be a caching one). We short circuit if the
+		 *    buffer is locked (under I/O) and if it's in the
+		 *    uptodate cache. The re-check from #2 catches the
+		 *    case that the previous read-ahead completes just
+		 *    before our is-it-in-flight check.
+		 */
+
 		if (flags & OCFS2_BH_CACHED &&
 		    !ocfs2_buffer_uptodate(inode, bh)) {
 			mlog(ML_UPTODATE,
@@ -169,6 +196,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 			}
 
+			/* A read-ahead request was made - if the
+			 * buffer is already under read-ahead from a
+			 * previously submitted request than we are
+			 * done here. */
+			if ((flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_read_ahead(inode, bh))
+				continue;
+
 			lock_buffer(bh);
 			if (buffer_jbd(bh)) {
 #ifdef CATCH_BH_JBD_RACES
@@ -181,13 +216,22 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 				continue;
 #endif
 			}
+
+			/* Re-check ocfs2_buffer_uptodate() as a
+			 * previously read-ahead buffer may have
+			 * completed I/O while we were waiting for the
+			 * buffer lock. */
+			if ((flags & OCFS2_BH_CACHED)
+			    && !(flags & OCFS2_BH_READAHEAD)
+			    && ocfs2_buffer_uptodate(inode, bh)) {
+				unlock_buffer(bh);
+				continue;
+			}
+
 			clear_buffer_uptodate(bh);
 			get_bh(bh); /* for end_buffer_read_sync() */
 			bh->b_end_io = end_buffer_read_sync;
-			if (flags & OCFS2_BH_READAHEAD)
-				submit_bh(READA, bh);
-			else
-				submit_bh(READ, bh);
+			submit_bh(READ, bh);
 			continue;
 		}
 	}
@@ -197,34 +241,39 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
 	for (i = (nr - 1); i >= 0; i--) {
 		bh = bhs[i];
 
-		/* We know this can't have changed as we hold the
-		 * inode sem. Avoid doing any work on the bh if the
-		 * journal has it. */
-		if (!buffer_jbd(bh))
-			wait_on_buffer(bh);
-
-		if (!buffer_uptodate(bh)) {
-			/* Status won't be cleared from here on out,
-			 * so we can safely record this and loop back
-			 * to cleanup the other buffers. Don't need to
-			 * remove the clustered uptodate information
-			 * for this bh as it's not marked locally
-			 * uptodate. */
-			status = -EIO;
-			brelse(bh);
-			bhs[i] = NULL;
-			continue;
+		if (!(flags & OCFS2_BH_READAHEAD)) {
+			/* We know this can't have changed as we hold the
+			 * inode sem. Avoid doing any work on the bh if the
+			 * journal has it. */
+			if (!buffer_jbd(bh))
+				wait_on_buffer(bh);
+
+			if (!buffer_uptodate(bh)) {
+				/* Status won't be cleared from here on out,
+				 * so we can safely record this and loop back
+				 * to cleanup the other buffers. Don't need to
+				 * remove the clustered uptodate information
+				 * for this bh as it's not marked locally
+				 * uptodate. */
+				status = -EIO;
+				brelse(bh);
+				bhs[i] = NULL;
+				continue;
+			}
 		}
 
+		/* Always set the buffer in the cache, even if it was
+		 * a forced read, or read-ahead which hasn't yet
+		 * completed. */
 		if (inode)
 			ocfs2_set_buffer_uptodate(inode, bh);
 	}
 	if (inode)
 		mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
 
-	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s\n", 
+	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
 	     (unsigned long long)block, nr,
-	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes");
+	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);
 
 bail:
 
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index 6ecb909..6cc2093 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -49,7 +49,7 @@ int ocfs2_read_blocks(struct ocfs2_super          *osb,
 
 
 #define OCFS2_BH_CACHED            1
-#define OCFS2_BH_READAHEAD         8	/* use this to pass READA down to submit_bh */
+#define OCFS2_BH_READAHEAD         8
 
 static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
 				   struct buffer_head **bh, int flags,
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 3d494d1..04e0191 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -74,14 +74,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
 int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
 	int error = 0;
-	unsigned long offset, blk;
-	int i, num, stored;
+	unsigned long offset, blk, last_ra_blk = 0;
+	int i, stored;
 	struct buffer_head * bh, * tmp;
 	struct ocfs2_dir_entry * de;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	struct super_block * sb = inode->i_sb;
-	int have_disk_lock = 0;
+	unsigned int ra_sectors = 16;
 
 	mlog_entry("dirino=%llu\n",
 		   (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -95,9 +95,8 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			mlog_errno(error);
 		/* we haven't got any yet, so propagate the error. */
 		stored = error;
-		goto bail;
+		goto bail_nolock;
 	}
-	have_disk_lock = 1;
 
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
@@ -113,16 +112,21 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			continue;
 		}
 
-		/*
-		 * Do the readahead (8k)
-		 */
-		if (!offset) {
-			for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0;
+		/* The idea here is to begin with 8k read-ahead and to stay
+		 * 4k ahead of our current position.
+		 *
+		 * TODO: Use the pagecache for this. We just need to
+		 * make sure it's cluster-safe... */
+		if (!last_ra_blk
+		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
+			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
 			     i > 0; i--) {
 				tmp = ocfs2_bread(inode, ++blk, &err, 1);
 				if (tmp)
 					brelse(tmp);
 			}
+			last_ra_blk = blk;
+			ra_sectors = 8;
 		}
 
 revalidate:
@@ -194,9 +198,9 @@ revalidate:
 
 	stored = 0;
 bail:
-	if (have_disk_lock)
-		ocfs2_meta_unlock(inode, 0);
+	ocfs2_meta_unlock(inode, 0);
 
+bail_nolock:
 	mlog_exit(stored);
 
 	return stored;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 3f496c4..7bcf691 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1050,12 +1050,8 @@ struct buffer_head *ocfs2_bread(struct inode *inode,
 	u64 p_blkno;
 	int readflags = OCFS2_BH_CACHED;
 
-#if 0
-	/* only turn this on if we know we can deal with read_block
-	 * returning nothing */
 	if (reada)
 		readflags |= OCFS2_BH_READAHEAD;
-#endif
 
 	if (((u64)block << inode->i_sb->s_blocksize_bits) >=
 	    i_size_read(inode)) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2412647..0d3e939 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -56,6 +56,7 @@
 #include "journal.h"
 #include "namei.h"
 #include "suballoc.h"
+#include "super.h"
 #include "symlink.h"
 #include "sysfile.h"
 #include "uptodate.h"
@@ -1962,13 +1963,8 @@ restart:
 				}
 				num++;
 
-				/* XXX: questionable readahead stuff here */
 				bh = ocfs2_bread(dir, b++, &err, 1);
 				bh_use[ra_max] = bh;
-#if 0		// ???
-				if (bh)
-					ll_rw_block(READ, 1, &bh);
-#endif
 			}
 		}
 		if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1976,6 +1972,10 @@ restart:
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
+			ocfs2_error(dir->i_sb, "reading directory %llu, "
+				    "offset %lu\n",
+				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
+				    block);
 			brelse(bh);
 			goto next;
 		}
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index b8a00a7..9707ed7 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -206,7 +206,10 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
 }
 
 /* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache. */
+ * the block is stored in our inode metadata cache. 
+ * 
+ * This can be called under lock_buffer()
+ */
 int ocfs2_buffer_uptodate(struct inode *inode,
 			  struct buffer_head *bh)
 {
@@ -226,6 +229,16 @@ int ocfs2_buffer_uptodate(struct inode *inode,
 	return ocfs2_buffer_cached(OCFS2_I(inode), bh);
 }
 
+/* 
+ * Determine whether a buffer is currently out on a read-ahead request.
+ * ip_io_sem should be held to serialize submitters with the logic here.
+ */
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh)
+{
+	return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh);
+}
+
 /* Requires ip_lock */
 static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
 				     sector_t block)
@@ -403,7 +416,11 @@ out_free:
  *
  * Note that this function may actually fail to insert the block if
  * memory cannot be allocated. This is not fatal however (but may
- * result in a performance penalty) */
+ * result in a performance penalty)
+ *
+ * Readahead buffers can be passed in here before the I/O request is
+ * completed.
+ */
 void ocfs2_set_buffer_uptodate(struct inode *inode,
 			       struct buffer_head *bh)
 {
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 01cd32d..2e73206 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,5 +40,7 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
 				   struct buffer_head *bh);
 void ocfs2_remove_from_cache(struct inode *inode,
 			     struct buffer_head *bh);
+int ocfs2_buffer_read_ahead(struct inode *inode,
+			    struct buffer_head *bh);
 
 #endif /* OCFS2_UPTODATE_H */
-- 
cgit v1.1


From 02ed8416fe5b7e33b5bbf2d73f9af1d316806822 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 14 Sep 2006 10:28:06 -0700
Subject: ocfs2: Remove EXPERIMENTAL dependency

Things have been working pretty well for a while now.

We should've probably done this at least one kernel
revision ago, but it doesn't hurt to be paranoid.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 3f00a9f..5305816 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -325,8 +325,8 @@ config FS_POSIX_ACL
 source "fs/xfs/Kconfig"
 
 config OCFS2_FS
-	tristate "OCFS2 file system support (EXPERIMENTAL)"
-	depends on NET && SYSFS && EXPERIMENTAL
+	tristate "OCFS2 file system support"
+	depends on NET && SYSFS
 	select CONFIGFS_FS
 	select JBD
 	select CRC32
-- 
cgit v1.1


From f12033d206ea48928d8124cdd5d35d8008c18935 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 18:57:57 -0700
Subject: ocfs2: Don't print on unknown remote blocking call

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 42775e2..f13a4ba 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -367,12 +367,10 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 			goto do_ast;
 	}
 
-	mlog(ML_ERROR, "got %sast for unknown lock!  cookie=%u:%llu, "
-		       "name=%.*s, namelen=%u\n", 
-		       past->type == DLM_AST ? "" : "b", 
-		       dlm_get_lock_cookie_node(cookie),
-		       dlm_get_lock_cookie_seq(cookie),
-		       locklen, name, locklen);
+	mlog(0, "got %sast for unknown lock!  cookie=%u:%llu, "
+	     "name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b", 
+	     dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+	     locklen, name, locklen);
 
 	ret = DLM_NORMAL;
 unlock_out:
-- 
cgit v1.1


From eb35746ca5e2211569b91ebb44d55b88ec91f3b0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 9 Aug 2006 13:23:08 -0700
Subject: ocfs2: Remove overzealous BUG_ON()

The truncate code was never supposed to BUG() on an allocator it doesn't
know about, but rather to ignore it. Right now, this does nothing, but when
we change our allocation paths to use all suballocator files, this will
allow current versions of the fs module to work fine.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/alloc.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index edaab05..f43bc5f 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1717,17 +1717,29 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
 
 			ocfs2_remove_from_cache(inode, eb_bh);
 
-			BUG_ON(eb->h_suballoc_slot);
 			BUG_ON(el->l_recs[0].e_clusters);
 			BUG_ON(el->l_recs[0].e_cpos);
 			BUG_ON(el->l_recs[0].e_blkno);
-			status = ocfs2_free_extent_block(handle,
-							 tc->tc_ext_alloc_inode,
-							 tc->tc_ext_alloc_bh,
-							 eb);
-			if (status < 0) {
-				mlog_errno(status);
-				goto bail;
+			if (eb->h_suballoc_slot == 0) {
+				/*
+				 * This code only understands how to
+				 * lock the suballocator in slot 0,
+				 * which is fine because allocation is
+				 * only ever done out of that
+				 * suballocator too. A future version
+				 * might change that however, so avoid
+				 * a free if we don't know how to
+				 * handle it. This way an fs incompat
+				 * bit will not be necessary.
+				 */
+				status = ocfs2_free_extent_block(handle,
+								 tc->tc_ext_alloc_inode,
+								 tc->tc_ext_alloc_bh,
+								 eb);
+				if (status < 0) {
+					mlog_errno(status);
+					goto bail;
+				}
 			}
 		}
 		brelse(eb_bh);
-- 
cgit v1.1


From 35058687912aa2f0b4554383cc10be4e0683b9a4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 24 Aug 2006 19:10:20 +1000
Subject: [CRYPTO] users: Use crypto_hash interface instead of crypto_digest

This patch converts all remaining crypto_digest users to use the new
crypto_hash interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 fs/nfsd/nfs4recover.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 06da750..e35d7e5 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -33,7 +33,7 @@
 *
 */
 
-
+#include <linux/err.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfs4.h>
@@ -87,34 +87,35 @@ int
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
-	struct crypto_tfm *tfm;
+	struct hash_desc desc;
 	struct scatterlist sg[1];
 	int status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
-	tfm = crypto_alloc_tfm("md5", CRYPTO_TFM_REQ_MAY_SLEEP);
-	if (tfm == NULL)
-		goto out;
-	cksum.len = crypto_tfm_alg_digestsize(tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		goto out_no_tfm;
+	cksum.len = crypto_hash_digestsize(desc.tfm);
 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
 	if (cksum.data == NULL)
  		goto out;
-	crypto_digest_init(tfm);
 
 	sg[0].page = virt_to_page(clname->data);
 	sg[0].offset = offset_in_page(clname->data);
 	sg[0].length = clname->len;
 
-	crypto_digest_update(tfm, sg, 1);
-	crypto_digest_final(tfm, cksum.data);
+	if (crypto_hash_digest(&desc, sg, sg->length, cksum.data))
+		goto out;
 
 	md5_to_hex(dname, cksum.data);
 
 	kfree(cksum.data);
 	status = nfs_ok;
 out:
-	crypto_free_tfm(tfm);
+	crypto_free_hash(desc.tfm);
+out_no_tfm:
 	return status;
 }
 
-- 
cgit v1.1


From 2fe87f02a04ad6e7075023a87fe38eb458a4bb9d Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 21 Sep 2006 07:02:52 +0000
Subject: [CIFS] Support deep tree mounts (e.g. mounts to //server/share/path)

Samba bugzilla #4040

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/CHANGES      |  7 ++++++-
 fs/cifs/cifs_fs_sb.h |  2 ++
 fs/cifs/cifsfs.h     |  2 +-
 fs/cifs/cifspdu.h    |  2 ++
 fs/cifs/connect.c    | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/dir.c        | 23 +++++++++++++++--------
 fs/cifs/xattr.c      |  2 +-
 7 files changed, 74 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 0feb3bd..1eb9a2e 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+
 Version 1.45
 ------------
 Do not time out lockw calls when using posix extensions. Do not
@@ -6,7 +10,8 @@ on requests on other threads.  Improve POSIX locking emulation,
 (lock cancel now works, and unlock of merged range works even
 to Windows servers now).  Fix oops on mount to lanman servers
 (win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
 
 Version 1.44
 ------------
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ad58eb0..fd1e52e 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -40,5 +40,7 @@ struct cifs_sb_info {
 	mode_t	mnt_file_mode;
 	mode_t	mnt_dir_mode;
 	int     mnt_cifs_flags;
+	int	prepathlen;
+	char *  prepath;
 };
 #endif				/* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 39ee8ef..bea875d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern ssize_t	cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t	cifs_listxattr(struct dentry *, char *, size_t);
 extern int cifs_ioctl (struct inode * inode, struct file * filep,
 		       unsigned int command, unsigned long arg);
-#define CIFS_VERSION   "1.45"
+#define CIFS_VERSION   "1.46"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 8623902..81df2bf 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1344,6 +1344,7 @@ struct smb_t2_rsp {
 #define SMB_QUERY_ATTR_FLAGS            0x206  /* append,immutable etc. */
 #define SMB_QUERY_POSIX_PERMISSION      0x207
 #define SMB_QUERY_POSIX_LOCK            0x208
+/* #define SMB_POSIX_OPEN  		0x209 */
 #define SMB_QUERY_FILE_INTERNAL_INFO    0x3ee
 #define SMB_QUERY_FILE_ACCESS_INFO      0x3f0
 #define SMB_QUERY_FILE_NAME_INFO2       0x3f1 /* 0x30 bytes */
@@ -1363,6 +1364,7 @@ struct smb_t2_rsp {
 #define SMB_SET_XATTR                   0x205
 #define SMB_SET_ATTR_FLAGS              0x206  /* append, immutable etc. */
 #define SMB_SET_POSIX_LOCK              0x208
+#define SMB_POSIX_OPEN                  0x209
 #define SMB_SET_FILE_BASIC_INFO2        0x3ec
 #define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
 #define SMB_FILE_ALL_INFO2              0x3fa
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5d394c7..0e9ba0b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -89,6 +89,7 @@ struct smb_vol {
 	unsigned int wsize;
 	unsigned int sockopt;
 	unsigned short int port;
+	char * prepath;
 };
 
 static int ipv4_connect(struct sockaddr_in *psin_server, 
@@ -993,6 +994,28 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
 				printk(KERN_WARNING "CIFS: domain name too long\n");
 				return 1;
 			}
+                } else if (strnicmp(data, "prefixpath", 10) == 0) {
+                        if (!value || !*value) {
+                                printk(KERN_WARNING
+                                       "CIFS: invalid path prefix\n");
+                                return 1;       /* needs_arg; */
+                        }
+                        if ((temp_len = strnlen(value, 1024)) < 1024) {
+				if(value[0] != '/')
+					temp_len++;  /* missing leading slash */
+                                vol->prepath = kmalloc(temp_len+1,GFP_KERNEL);
+                                if(vol->prepath == NULL)
+                                        return 1;
+				if(value[0] != '/') {
+					vol->prepath[0] = '/';
+	                                strcpy(vol->prepath+1,value);
+				} else
+					strcpy(vol->prepath,value);
+				cFYI(1,("prefix path %s",vol->prepath));
+                        } else {
+                                printk(KERN_WARNING "CIFS: prefix too long\n");
+                                return 1;
+                        }
 		} else if (strnicmp(data, "iocharset", 9) == 0) {
 			if (!value || !*value) {
 				printk(KERN_WARNING "CIFS: invalid iocharset specified\n");
@@ -1605,6 +1628,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1619,6 +1643,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
            locations such as env variables and files on disk */
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1639,6 +1664,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* we failed translating address */
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -EINVAL;
 		}
@@ -1651,6 +1677,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		cERROR(1,("Connecting to DFS root not implemented yet"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	} else /* which servers DFS root would we conect to */ {
@@ -1658,6 +1685,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 		       ("CIFS mount error: No UNC path (e.g. -o unc=//192.168.1.100/public) specified"));
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1672,6 +1700,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			cERROR(1,("CIFS mount error: iocharset %s not found",volume_info.iocharset));
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return -ELIBACC;
 		}
@@ -1688,6 +1717,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	else {
 		kfree(volume_info.UNC);
 		kfree(volume_info.password);
+		kfree(volume_info.prepath);
 		FreeXid(xid);
 		return -EINVAL;
 	}
@@ -1710,6 +1740,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		}
@@ -1720,6 +1751,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			sock_release(csocket);
 			kfree(volume_info.UNC);
 			kfree(volume_info.password);
+			kfree(volume_info.prepath);
 			FreeXid(xid);
 			return rc;
 		} else {
@@ -1744,6 +1776,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 				sock_release(csocket);
 				kfree(volume_info.UNC);
 				kfree(volume_info.password);
+				kfree(volume_info.prepath);
 				FreeXid(xid);
 				return rc;
 			}
@@ -1831,6 +1864,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 			/* Windows ME may prefer this */
 			cFYI(1,("readsize set to minimum 2048"));
 		}
+		/* calculate prepath */
+		cifs_sb->prepath = volume_info.prepath;
+		if(cifs_sb->prepath) {
+			cifs_sb->prepathlen = strlen(cifs_sb->prepath);
+			cifs_sb->prepath[0] = CIFS_DIR_SEP(cifs_sb);
+			volume_info.prepath = NULL;
+		} else 
+			cifs_sb->prepathlen = 0;
 		cifs_sb->mnt_uid = volume_info.linux_uid;
 		cifs_sb->mnt_gid = volume_info.linux_gid;
 		cifs_sb->mnt_file_mode = volume_info.file_mode;
@@ -2008,6 +2049,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
 	the password ptr is put in the new session structure (in which case the
 	password will be freed at unmount time) */
 	kfree(volume_info.UNC);
+	kfree(volume_info.prepath);
 	FreeXid(xid);
 	return rc;
 }
@@ -3195,6 +3237,7 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	int xid;
 	struct cifsSesInfo *ses = NULL;
 	struct task_struct *cifsd_task;
+	char * tmp;
 
 	xid = GetXid();
 
@@ -3228,6 +3271,10 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 	}
 	
 	cifs_sb->tcon = NULL;
+	tmp = cifs_sb->prepath;
+	cifs_sb->prepathlen = 0;
+	cifs_sb->prepath = NULL;
+	kfree(tmp);
 	if (ses)
 		schedule_timeout_interruptible(msecs_to_jiffies(500));
 	if (ses)
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 914239d5..66b825a 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -46,7 +46,8 @@ char *
 build_path_from_dentry(struct dentry *direntry)
 {
 	struct dentry *temp;
-	int namelen = 0;
+	int namelen;
+	int pplen;
 	char *full_path;
 	char dirsep;
 
@@ -56,7 +57,9 @@ build_path_from_dentry(struct dentry *direntry)
 		when the server crashed */
 
 	dirsep = CIFS_DIR_SEP(CIFS_SB(direntry->d_sb));
+	pplen = CIFS_SB(direntry->d_sb)->prepathlen;
 cifs_bp_rename_retry:
+	namelen = pplen; 
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen += (1 + temp->d_name.len);
 		temp = temp->d_parent;
@@ -70,7 +73,6 @@ cifs_bp_rename_retry:
 	if(full_path == NULL)
 		return full_path;
 	full_path[namelen] = 0;	/* trailing null */
-
 	for (temp = direntry; !IS_ROOT(temp);) {
 		namelen -= 1 + temp->d_name.len;
 		if (namelen < 0) {
@@ -79,7 +81,7 @@ cifs_bp_rename_retry:
 			full_path[namelen] = dirsep;
 			strncpy(full_path + namelen + 1, temp->d_name.name,
 				temp->d_name.len);
-			cFYI(0, (" name: %s ", full_path + namelen));
+			cFYI(0, ("name: %s", full_path + namelen));
 		}
 		temp = temp->d_parent;
 		if(temp == NULL) {
@@ -88,18 +90,23 @@ cifs_bp_rename_retry:
 			return NULL;
 		}
 	}
-	if (namelen != 0) {
+	if (namelen != pplen) {
 		cERROR(1,
-		       ("We did not end path lookup where we expected namelen is %d",
+		       ("did not end path lookup where expected namelen is %d",
 			namelen));
-		/* presumably this is only possible if we were racing with a rename 
+		/* presumably this is only possible if racing with a rename 
 		of one of the parent directories  (we can not lock the dentries
 		above us to prevent this, but retrying should be harmless) */
 		kfree(full_path);
-		namelen = 0;
 		goto cifs_bp_rename_retry;
 	}
-
+	/* DIR_SEP already set for byte  0 / vs \ but not for
+	   subsequent slashes in prepath which currently must
+	   be entered the right way - not sure if there is an alternative
+	   since the '\' is a valid posix character so we can not switch
+	   those safely to '/' if any are found in the middle of the prepath */
+	/* BB test paths to Windows with '/' in the midst of prepath */
+	strncpy(full_path,CIFS_SB(direntry->d_sb)->prepath,pplen);
 	return full_path;
 }
 
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 067648b..18fcec1 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -269,7 +269,7 @@ ssize_t cifs_getxattr(struct dentry * direntry, const char * ea_name,
 				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid,
 					ea_value, buf_size,
 					ACL_TYPE_ACCESS);
-				CIFSSMBClose(xid, pTcon, fid)
+				CIFSSMBClose(xid, pTcon, fid);
 			}
 		} */  /* BB enable after fixing up return data */
                   		
-- 
cgit v1.1


From 6b70c9559bcf381a6521e38b0dd5d3d4d905868a Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Thu, 21 Sep 2006 07:35:29 +0000
Subject: [CIFS] New POSIX locking code not setting rc properly to zero on
 successful unlock in case where server does not support POSIX locks and nobrl
 is not specified.

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e9c5ba9..ddb012a 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -752,6 +752,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 			int stored_rc = 0;
 			struct cifsLockInfo *li, *tmp;
 
+			rc = 0;
 			down(&fid->lock_sem);
 			list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
 				if (pfLock->fl_start <= li->offset &&
@@ -766,7 +767,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 					kfree(li);
 				}
 			}
-		up(&fid->lock_sem);
+			up(&fid->lock_sem);
 		}
 	}
 
-- 
cgit v1.1


From b8c06a2ab68661bf841e21003f4447f8d422aed3 Mon Sep 17 00:00:00 2001
From: Steve French <sfrench@us.ibm.com>
Date: Fri, 22 Sep 2006 01:14:52 +0000
Subject: [CIFS] statfs for cifs unix extensions no longer experimental

Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/cifsfs.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3cd7500..c3ef1c0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -189,7 +189,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = 0;	/* undefined */
 	buf->f_ffree = 0;	/* unlimited */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 /* BB we could add a second check for a QFS Unix capability bit */
 /* BB FIXME check CIFS_POSIX_EXTENSIONS Unix cap first FIXME BB */
     if ((pTcon->ses->capabilities & CAP_UNIX) && (CIFS_POSIX_EXTENSIONS &
@@ -199,7 +198,6 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
     /* Only need to call the old QFSInfo if failed
     on newer one */
     if(rc)
-#endif /* CIFS_EXPERIMENTAL */
 	rc = CIFSSMBQFSInfo(xid, pTcon, buf);
 
 	/* Old Windows servers do not support level 103, retry with level 
-- 
cgit v1.1


From f2dd117fa51dd8fc071b1352254c0d14d2399b0a Mon Sep 17 00:00:00 2001
From: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Date: Fri, 22 Sep 2006 10:13:46 +0100
Subject: [JFFS2] Remove unneeded ifdefs from jffs2_fs_i.h

We certainly don't need the check for Linux version > 2.5.2, and in fact
we can also live without the __ECOS check, since we can just add it back
in the eCos git tree which is automatically derived from the Linux fs/jffs2
subdirectory in the upstream git tree.

Signed-off-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
---
 fs/jffs2/jffs2_fs_i.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h
index 2e0cc8e..3a56607 100644
--- a/fs/jffs2/jffs2_fs_i.h
+++ b/fs/jffs2/jffs2_fs_i.h
@@ -41,11 +41,7 @@ struct jffs2_inode_info {
 
 	uint16_t flags;
 	uint8_t usercompr;
-#if !defined (__ECOS)
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,2)
 	struct inode vfs_inode;
-#endif
-#endif
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 	struct posix_acl *i_acl_access;
 	struct posix_acl *i_acl_default;
-- 
cgit v1.1


From 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:18 -0400
Subject: NFS: Add a new ACCESS rpc call cache to the linux nfs client

The current access cache only allows one entry at a time to be cached for each
inode. Add a per-inode red-black tree in order to allow more than one to
be cached at a time.

Should significantly cut down the time spent in path traversal for shared
directories such as ${PATH}, /usr/share, etc.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c   | 133 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 fs/nfs/inode.c |  13 ++----
 2 files changed, 120 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e7ffb4d..094afde 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1638,35 +1638,134 @@ out:
 	return error;
 }
 
-int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+static void nfs_access_free_entry(struct nfs_access_entry *entry)
+{
+	put_rpccred(entry->cred);
+	kfree(entry);
+}
+
+static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	struct rb_root *root_node = &nfsi->access_cache;
+	struct rb_node *n, *dispose = NULL;
+	struct nfs_access_entry *entry;
+
+	/* Unhook entries from the cache */
+	while ((n = rb_first(root_node)) != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+		rb_erase(n, root_node);
+		n->rb_left = dispose;
+		dispose = n;
+	}
+	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
 
-	if (cache->cred != cred
-			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (nfsi->cache_validity & NFS_INO_INVALID_ACCESS))
-		return -ENOENT;
-	memcpy(res, cache, sizeof(*res));
-	return 0;
+	/* Now kill them all! */
+	while (dispose != NULL) {
+		n = dispose;
+		dispose = n->rb_left;
+		nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
+	}
 }
 
-void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_zap_cache(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_access_entry *cache = &nfsi->cache_access;
+	spin_lock(&inode->i_lock);
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+}
 
-	if (cache->cred != set->cred) {
-		if (cache->cred)
-			put_rpccred(cache->cred);
-		cache->cred = get_rpccred(set->cred);
+static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
+{
+	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
+	struct nfs_access_entry *entry;
+
+	while (n != NULL) {
+		entry = rb_entry(n, struct nfs_access_entry, rb_node);
+
+		if (cred < entry->cred)
+			n = n->rb_left;
+		else if (cred > entry->cred)
+			n = n->rb_right;
+		else
+			return entry;
 	}
-	/* FIXME: replace current access_cache BKL reliance with inode->i_lock */
+	return NULL;
+}
+
+int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_access_entry *cache;
+	int err = -ENOENT;
+
 	spin_lock(&inode->i_lock);
-	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
+	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
+		goto out_zap;
+	cache = nfs_access_search_rbtree(inode, cred);
+	if (cache == NULL)
+		goto out;
+	if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)))
+		goto out_stale;
+	res->jiffies = cache->jiffies;
+	res->cred = cache->cred;
+	res->mask = cache->mask;
+	err = 0;
+out:
+	spin_unlock(&inode->i_lock);
+	return err;
+out_stale:
+	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(cache);
+	return -ENOENT;
+out_zap:
+	/* This will release the spinlock */
+	__nfs_access_zap_cache(inode);
+	return -ENOENT;
+}
+
+static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct rb_node **p = &root_node->rb_node;
+	struct rb_node *parent = NULL;
+	struct nfs_access_entry *entry;
+
+	spin_lock(&inode->i_lock);
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
+
+		if (set->cred < entry->cred)
+			p = &parent->rb_left;
+		else if (set->cred > entry->cred)
+			p = &parent->rb_right;
+		else
+			goto found;
+	}
+	rb_link_node(&set->rb_node, parent, p);
+	rb_insert_color(&set->rb_node, root_node);
 	spin_unlock(&inode->i_lock);
+	return;
+found:
+	rb_replace_node(parent, &set->rb_node, root_node);
+	spin_unlock(&inode->i_lock);
+	nfs_access_free_entry(entry);
+}
+
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+{
+	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
+	if (cache == NULL)
+		return;
+	RB_CLEAR_NODE(&cache->rb_node);
 	cache->jiffies = set->jiffies;
+	cache->cred = get_rpccred(set->cred);
 	cache->mask = set->mask;
+
+	nfs_access_add_rbtree(inode, cache);
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d349fb2..b94ab06 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync)
 
 void nfs_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct rpc_cred *cred;
-
 	/*
 	 * The following should never happen...
 	 */
 	BUG_ON(nfs_have_writebacks(inode));
-	BUG_ON (!list_empty(&nfsi->open_files));
+	BUG_ON(!list_empty(&NFS_I(inode)->open_files));
+	BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0);
 	nfs_zap_acl_cache(inode);
-	cred = nfsi->cache_access.cred;
-	if (cred)
-		put_rpccred(cred);
-	BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+	nfs_access_zap_cache(inode);
 }
 
 /**
@@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = jiffies;
 		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		nfsi->cache_access.cred = NULL;
+		nfsi->access_cache = RB_ROOT;
 
 		unlock_new_inode(inode);
 	} else
-- 
cgit v1.1


From cfcea3e8c66c2dcde98d5c2693d4bff50b5cac97 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:18 -0400
Subject: NFS: Add a global LRU list for the ACCESS cache

...in order to allow the addition of a memory shrinker.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c   | 35 ++++++++++++++++++++++++++++++++++-
 fs/nfs/inode.c |  2 ++
 2 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 094afde..bf4f5ff 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1638,10 +1638,17 @@ out:
 	return error;
 }
 
+static DEFINE_SPINLOCK(nfs_access_lru_lock);
+static LIST_HEAD(nfs_access_lru_list);
+static atomic_long_t nfs_access_nr_entries;
+
 static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
 	kfree(entry);
+	smp_mb__before_atomic_dec();
+	atomic_long_dec(&nfs_access_nr_entries);
+	smp_mb__after_atomic_dec();
 }
 
 static void __nfs_access_zap_cache(struct inode *inode)
@@ -1655,6 +1662,7 @@ static void __nfs_access_zap_cache(struct inode *inode)
 	while ((n = rb_first(root_node)) != NULL) {
 		entry = rb_entry(n, struct nfs_access_entry, rb_node);
 		rb_erase(n, root_node);
+		list_del(&entry->lru);
 		n->rb_left = dispose;
 		dispose = n;
 	}
@@ -1671,6 +1679,13 @@ static void __nfs_access_zap_cache(struct inode *inode)
 
 void nfs_access_zap_cache(struct inode *inode)
 {
+	/* Remove from global LRU init */
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
+		spin_unlock(&nfs_access_lru_lock);
+	}
+
 	spin_lock(&inode->i_lock);
 	/* This will release the spinlock */
 	__nfs_access_zap_cache(inode);
@@ -1711,12 +1726,14 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
 	res->jiffies = cache->jiffies;
 	res->cred = cache->cred;
 	res->mask = cache->mask;
+	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
 	err = 0;
 out:
 	spin_unlock(&inode->i_lock);
 	return err;
 out_stale:
 	rb_erase(&cache->rb_node, &nfsi->access_cache);
+	list_del(&cache->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(cache);
 	return -ENOENT;
@@ -1728,7 +1745,8 @@ out_zap:
 
 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
 {
-	struct rb_root *root_node = &NFS_I(inode)->access_cache;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct rb_root *root_node = &nfsi->access_cache;
 	struct rb_node **p = &root_node->rb_node;
 	struct rb_node *parent = NULL;
 	struct nfs_access_entry *entry;
@@ -1747,10 +1765,13 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *
 	}
 	rb_link_node(&set->rb_node, parent, p);
 	rb_insert_color(&set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
 	spin_unlock(&inode->i_lock);
 	return;
 found:
 	rb_replace_node(parent, &set->rb_node, root_node);
+	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
+	list_del(&entry->lru);
 	spin_unlock(&inode->i_lock);
 	nfs_access_free_entry(entry);
 }
@@ -1766,6 +1787,18 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 	cache->mask = set->mask;
 
 	nfs_access_add_rbtree(inode, cache);
+
+	/* Update accounting */
+	smp_mb__before_atomic_inc();
+	atomic_long_inc(&nfs_access_nr_entries);
+	smp_mb__after_atomic_inc();
+
+	/* Add inode to global LRU list */
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+		spin_lock(&nfs_access_lru_lock);
+		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+		spin_unlock(&nfs_access_lru_lock);
+	}
 }
 
 static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b94ab06..6ed018c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1104,6 +1104,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		INIT_LIST_HEAD(&nfsi->dirty);
 		INIT_LIST_HEAD(&nfsi->commit);
 		INIT_LIST_HEAD(&nfsi->open_files);
+		INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+		INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 		INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 		atomic_set(&nfsi->data_updates, 0);
 		nfsi->ndirty = 0;
-- 
cgit v1.1


From 979df72e6f963b42ee484f2eca049c3344da0ba7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 25 Jul 2006 11:28:19 -0400
Subject: NFS: Add an ACCESS cache memory shrinker

A pinned inode may in theory end up filling memory with cached ACCESS
calls. This patch ensures that the VM may shrink away the cache in these
particular cases.
The shrinker works by iterating through the list of inodes on the global
nfs_access_lru_list, and removing the least recently used access
cache entry until it is done (or until the entire cache is empty).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 44 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h |  3 +++
 fs/nfs/super.c    |  5 +++++
 3 files changed, 52 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bf4f5ff..067d144 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1651,6 +1651,50 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
 	smp_mb__after_atomic_dec();
 }
 
+int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
+{
+	LIST_HEAD(head);
+	struct nfs_inode *nfsi;
+	struct nfs_access_entry *cache;
+
+	spin_lock(&nfs_access_lru_lock);
+restart:
+	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct inode *inode;
+
+		if (nr_to_scan-- == 0)
+			break;
+		inode = igrab(&nfsi->vfs_inode);
+		if (inode == NULL)
+			continue;
+		spin_lock(&inode->i_lock);
+		if (list_empty(&nfsi->access_cache_entry_lru))
+			goto remove_lru_entry;
+		cache = list_entry(nfsi->access_cache_entry_lru.next,
+				struct nfs_access_entry, lru);
+		list_move(&cache->lru, &head);
+		rb_erase(&cache->rb_node, &nfsi->access_cache);
+		if (!list_empty(&nfsi->access_cache_entry_lru))
+			list_move_tail(&nfsi->access_cache_inode_lru,
+					&nfs_access_lru_list);
+		else {
+remove_lru_entry:
+			list_del_init(&nfsi->access_cache_inode_lru);
+			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+		}
+		spin_unlock(&inode->i_lock);
+		iput(inode);
+		goto restart;
+	}
+	spin_unlock(&nfs_access_lru_lock);
+	while (!list_empty(&head)) {
+		cache = list_entry(head.next, struct nfs_access_entry, lru);
+		list_del(&cache->lru);
+		nfs_access_free_entry(cache);
+	}
+	return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+}
+
 static void __nfs_access_zap_cache(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e4f4e5d..660e9ff 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -66,6 +66,9 @@ extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
 				  struct page *page);
 #endif
 
+/* dir.c */
+extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
+
 /* inode.c */
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e8a9bee..06c321be 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -221,6 +221,8 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
 		 &nfs_idmap_cache_timeout, 0644);
 #endif
 
+static struct shrinker *acl_shrinker;
+
 /*
  * Register the NFS filesystems
  */
@@ -240,6 +242,7 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_2;
 #endif
+	acl_shrinker = set_shrinker(DEFAULT_SEEKS, nfs_access_cache_shrinker);
 	return 0;
 
 #ifdef CONFIG_NFS_V4
@@ -257,6 +260,8 @@ error_0:
  */
 void __exit unregister_nfs_fs(void)
 {
+	if (acl_shrinker != NULL)
+		remove_shrinker(acl_shrinker);
 #ifdef CONFIG_NFS_V4
 	unregister_filesystem(&nfs4_fs_type);
 	nfs_unregister_sysctl();
-- 
cgit v1.1


From 770bfad846ab6628444428467b11fa6773ae9ea1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:07 -0400
Subject: NFS: Add dentry materialisation op

The attached patch adds a new directory cache management function that prepares
a disconnected anonymous function to be connected into the dentry tree. The
anonymous dentry is transferred the name and parentage from another dentry.

The following changes were made in [try #2]:

 (*) d_materialise_dentry() now switches the parentage of the two nodes around
     correctly when one or other of them is self-referential.

The following changes were made in [try #7]:

 (*) d_instantiate_unique() has had the interior part split out as function
     __d_instantiate_unique(). Callers of this latter function must be holding
     the appropriate locks.

 (*) _d_rehash() has been added as a wrapper around __d_rehash() to call it
     with the most obvious hash list (the one from the name). d_rehash() now
     calls _d_rehash().

 (*) d_materialise_dentry() is now __d_materialise_dentry() and is static.

 (*) d_materialise_unique() added to perform the combination of d_find_alias(),
     d_materialise_dentry() and d_add_unique() that the NFS client was doing
     twice, all within a single dcache_lock critical section. This reduces the
     number of times two different spinlocks were being accessed.

The following further changes were made:

 (*) Add the dentries onto their parents d_subdirs lists.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/dcache.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 150 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index 1b4a3a3..17b392a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -828,17 +828,19 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
  * (or otherwise set) by the caller to indicate that it is now
  * in use by the dcache.
  */
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+static struct dentry *__d_instantiate_unique(struct dentry *entry,
+					     struct inode *inode)
 {
 	struct dentry *alias;
 	int len = entry->d_name.len;
 	const char *name = entry->d_name.name;
 	unsigned int hash = entry->d_name.hash;
 
-	BUG_ON(!list_empty(&entry->d_alias));
-	spin_lock(&dcache_lock);
-	if (!inode)
-		goto do_negative;
+	if (!inode) {
+		entry->d_inode = NULL;
+		return NULL;
+	}
+
 	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
 		struct qstr *qstr = &alias->d_name;
 
@@ -851,19 +853,35 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 		if (memcmp(qstr->name, name, len))
 			continue;
 		dget_locked(alias);
-		spin_unlock(&dcache_lock);
-		BUG_ON(!d_unhashed(alias));
-		iput(inode);
 		return alias;
 	}
+
 	list_add(&entry->d_alias, &inode->i_dentry);
-do_negative:
 	entry->d_inode = inode;
 	fsnotify_d_instantiate(entry, inode);
-	spin_unlock(&dcache_lock);
-	security_d_instantiate(entry, inode);
 	return NULL;
 }
+
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *result;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+
+	spin_lock(&dcache_lock);
+	result = __d_instantiate_unique(entry, inode);
+	spin_unlock(&dcache_lock);
+
+	if (!result) {
+		security_d_instantiate(entry, inode);
+		return NULL;
+	}
+
+	BUG_ON(!d_unhashed(result));
+	iput(inode);
+	return result;
+}
+
 EXPORT_SYMBOL(d_instantiate_unique);
 
 /**
@@ -1235,6 +1253,11 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  	hlist_add_head_rcu(&entry->d_hash, list);
 }
 
+static void _d_rehash(struct dentry * entry)
+{
+	__d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
+}
+
 /**
  * d_rehash	- add an entry back to the hash
  * @entry: dentry to add to the hash
@@ -1244,11 +1267,9 @@ static void __d_rehash(struct dentry * entry, struct hlist_head *list)
  
 void d_rehash(struct dentry * entry)
 {
-	struct hlist_head *list = d_hash(entry->d_parent, entry->d_name.hash);
-
 	spin_lock(&dcache_lock);
 	spin_lock(&entry->d_lock);
-	__d_rehash(entry, list);
+	_d_rehash(entry);
 	spin_unlock(&entry->d_lock);
 	spin_unlock(&dcache_lock);
 }
@@ -1386,6 +1407,120 @@ already_unhashed:
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * Prepare an anonymous dentry for life in the superblock's dentry tree as a
+ * named dentry in place of the dentry to be replaced.
+ */
+static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
+{
+	struct dentry *dparent, *aparent;
+
+	switch_names(dentry, anon);
+	do_switch(dentry->d_name.len, anon->d_name.len);
+	do_switch(dentry->d_name.hash, anon->d_name.hash);
+
+	dparent = dentry->d_parent;
+	aparent = anon->d_parent;
+
+	dentry->d_parent = (aparent == anon) ? dentry : aparent;
+	list_del(&dentry->d_u.d_child);
+	if (!IS_ROOT(dentry))
+		list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&dentry->d_u.d_child);
+
+	anon->d_parent = (dparent == dentry) ? anon : dparent;
+	list_del(&anon->d_u.d_child);
+	if (!IS_ROOT(anon))
+		list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+	else
+		INIT_LIST_HEAD(&anon->d_u.d_child);
+
+	anon->d_flags &= ~DCACHE_DISCONNECTED;
+}
+
+/**
+ * d_materialise_unique - introduce an inode into the tree
+ * @dentry: candidate dentry
+ * @inode: inode to bind to the dentry, to which aliases may be attached
+ *
+ * Introduces an dentry into the tree, substituting an extant disconnected
+ * root directory alias in its place if there is one
+ */
+struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
+{
+	struct dentry *alias, *actual;
+
+	BUG_ON(!d_unhashed(dentry));
+
+	spin_lock(&dcache_lock);
+
+	if (!inode) {
+		actual = dentry;
+		dentry->d_inode = NULL;
+		goto found_lock;
+	}
+
+	/* See if a disconnected directory already exists as an anonymous root
+	 * that we should splice into the tree instead */
+	if (S_ISDIR(inode->i_mode) && (alias = __d_find_alias(inode, 1))) {
+		spin_lock(&alias->d_lock);
+
+		/* Is this a mountpoint that we could splice into our tree? */
+		if (IS_ROOT(alias))
+			goto connect_mountpoint;
+
+		if (alias->d_name.len == dentry->d_name.len &&
+		    alias->d_parent == dentry->d_parent &&
+		    memcmp(alias->d_name.name,
+			   dentry->d_name.name,
+			   dentry->d_name.len) == 0)
+			goto replace_with_alias;
+
+		spin_unlock(&alias->d_lock);
+
+		/* Doh! Seem to be aliasing directories for some reason... */
+		dput(alias);
+	}
+
+	/* Add a unique reference */
+	actual = __d_instantiate_unique(dentry, inode);
+	if (!actual)
+		actual = dentry;
+	else if (unlikely(!d_unhashed(actual)))
+		goto shouldnt_be_hashed;
+
+found_lock:
+	spin_lock(&actual->d_lock);
+found:
+	_d_rehash(actual);
+	spin_unlock(&actual->d_lock);
+	spin_unlock(&dcache_lock);
+
+	if (actual == dentry) {
+		security_d_instantiate(dentry, inode);
+		return NULL;
+	}
+
+	iput(inode);
+	return actual;
+
+	/* Convert the anonymous/root alias into an ordinary dentry */
+connect_mountpoint:
+	__d_materialise_dentry(dentry, alias);
+
+	/* Replace the candidate dentry with the alias in the tree */
+replace_with_alias:
+	__d_drop(alias);
+	actual = alias;
+	goto found;
+
+shouldnt_be_hashed:
+	spin_unlock(&dcache_lock);
+	BUG();
+	goto shouldnt_be_hashed;
+}
+
 /**
  * d_path - return the path of a dentry
  * @dentry: dentry to report
@@ -1784,6 +1919,7 @@ EXPORT_SYMBOL(d_instantiate);
 EXPORT_SYMBOL(d_invalidate);
 EXPORT_SYMBOL(d_lookup);
 EXPORT_SYMBOL(d_move);
+EXPORT_SYMBOL_GPL(d_materialise_unique);
 EXPORT_SYMBOL(d_path);
 EXPORT_SYMBOL(d_prune_aliases);
 EXPORT_SYMBOL(d_rehash);
-- 
cgit v1.1


From 7d4e2747a0412583526a162fbbd6edeeafcceb08 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:07 -0400
Subject: NFS: Fix up split of fs/nfs/inode.c

Fix ups for the splitting of the superblock stuff out of fs/nfs/inode.c,
including:

 (*) Move the callback tcpport module param into callback.c.

 (*) Move the idmap cache timeout module param into idmap.c.

 (*) Changes to internal.h:

     (*) namespace-nfs4.c was renamed to nfs4namespace.c.

     (*) nfs_stat_to_errno() is in nfs2xdr.c, not nfs4xdr.c.

     (*) nfs4xdr.c is contingent on CONFIG_NFS_V4.

     (*) nfs4_path() is only uses if CONFIG_NFS_V4 is set.

Plus also:

 (*) The sec_flavours[] table should really be const.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 15 +++++++++++++++
 fs/nfs/idmap.c    | 14 ++++++++++++++
 fs/nfs/internal.h | 12 ++++++------
 fs/nfs/super.c    | 40 ++++------------------------------------
 4 files changed, 39 insertions(+), 42 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index fe0a6b8..d6c4bae 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -36,6 +36,21 @@ static struct svc_program nfs4_callback_program;
 
 unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
 
 /*
  * This is the callback kernel thread.
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 07a5dd5..873deb9 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -57,6 +57,20 @@
 /* Default cache timeout is 10 minutes */
 unsigned int nfs_idmap_cache_timeout = 600 * HZ;
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 struct idmap_hashent {
 	unsigned long ih_expires;
 	__u32 ih_id;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 660e9ff..4802157 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -15,7 +15,7 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
-/* namespace-nfs4.c */
+/* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
 #else
@@ -46,6 +46,7 @@ extern void nfs_destroy_directcache(void);
 #endif
 
 /* nfs2xdr.c */
+extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
@@ -54,8 +55,9 @@ extern struct rpc_procinfo nfs3_procedures[];
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
-extern int nfs_stat_to_errno(int);
+#ifdef CONFIG_NFS_V4
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+#endif
 
 /* nfs4proc.c */
 #ifdef CONFIG_NFS_V4
@@ -97,15 +99,13 @@ extern char *nfs_path(const char *base, const struct dentry *dentry,
 /*
  * Determine the mount path as a string
  */
+#ifdef CONFIG_NFS_V4
 static inline char *
 nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
 {
-#ifdef CONFIG_NFS_V4
 	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-#else
-	return NULL;
-#endif
 }
+#endif
 
 /*
  * Determine the device name as a string
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 06c321be..6349734 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -187,40 +187,6 @@ static struct super_operations nfs4_sops = {
 };
 #endif
 
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-
-static int param_set_port(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
-		return -EINVAL;
-	*((int *)kp->arg) = num;
-	return 0;
-}
-
-module_param_call(callback_tcpport, param_set_port, param_get_int,
-		 &nfs_callback_set_tcpport, 0644);
-#endif
-
-#ifdef CONFIG_NFS_V4
-static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
-{
-	char *endp;
-	int num = simple_strtol(val, &endp, 0);
-	int jif = num * HZ;
-	if (endp == val || *endp || num < 0 || jif < num)
-		return -EINVAL;
-	*((int *)kp->arg) = jif;
-	return 0;
-}
-
-module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
-		 &nfs_idmap_cache_timeout, 0644);
-#endif
-
 static struct shrinker *acl_shrinker;
 
 /*
@@ -328,9 +294,12 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 }
 
+/*
+ * Map the security flavour number to a name
+ */
 static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 {
-	static struct {
+	static const struct {
 		rpc_authflavor_t flavour;
 		const char *str;
 	} sec_flavours[] = {
@@ -1368,7 +1337,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	}
 
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
-
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
-- 
cgit v1.1


From 0a8ea4372b2868842986118ca90912f3382e6c5a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: NFS: Disambiguate nfs_stat_to_errno()

Rename the NFS4 version of nfs_stat_to_errno() so that it doesn't conflict with
the common one used by NFS2 and NFS3.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 730ec8f..1dee6ef 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -58,7 +58,7 @@
 /* Mapping from NFS error code to "errno" error code. */
 #define errno_NFSERR_IO		EIO
 
-static int nfs_stat_to_errno(int);
+static int nfs4_stat_to_errno(int);
 
 /* NFSv4 COMPOUND tags are only wanted for debugging purposes */
 #ifdef DEBUG
@@ -2127,7 +2127,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
@@ -3598,7 +3598,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs_stat_to_errno(nfserr);
+		return -nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4256,7 +4256,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsi
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4346,7 +4346,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4368,7 +4368,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs_stat_to_errno(hdr.status);
+		status = -nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4521,7 +4521,7 @@ static struct {
  * This one is used jointly by NFSv2 and NFSv3.
  */
 static int
-nfs_stat_to_errno(int stat)
+nfs4_stat_to_errno(int stat)
 {
 	int i;
 	for (i = 0; nfs_errtbl[i].stat != -1; i++) {
-- 
cgit v1.1


From 5ae1fbce142b67bf59e15fb1af96e88a96abde7b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: NFS: Fix NFS4 callback up/down prototypes

Make the nfs_callback_up()/down() prototypes just do nothing if NFS4 is not
enabled.  Also make the down function void type since we can't really do
anything if it fails.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c | 5 +----
 fs/nfs/callback.h | 7 ++++++-
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index d6c4bae..b1f7dc4 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -149,10 +149,8 @@ out_err:
 /*
  * Kill the server process if it is not already up.
  */
-int nfs_callback_down(void)
+void nfs_callback_down(void)
 {
-	int ret = 0;
-
 	lock_kernel();
 	mutex_lock(&nfs_callback_mutex);
 	nfs_callback_info.users--;
@@ -164,7 +162,6 @@ int nfs_callback_down(void)
 	} while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
 	mutex_unlock(&nfs_callback_mutex);
 	unlock_kernel();
-	return ret;
 }
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b252e7f..5676163d 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -62,8 +62,13 @@ struct cb_recallargs {
 extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
 extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 
+#ifdef CONFIG_NFS_V4
 extern int nfs_callback_up(void);
-extern int nfs_callback_down(void);
+extern void nfs_callback_down(void);
+#else
+#define nfs_callback_up()	(0)
+#define nfs_callback_down()	do {} while(0)
+#endif
 
 extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
-- 
cgit v1.1


From adfa6f980bd46974e6b32b22dd0c45e3f52063f4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:08 -0400
Subject: NFS: Rename struct nfs4_client to struct nfs_client

Rename struct nfs4_client to struct nfs_client so that it can become the basis
for a general client record for NFS2 and NFS3 in addition to NFS4.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c      |  2 +-
 fs/nfs/callback_proc.c |  4 ++--
 fs/nfs/delegation.c    | 24 ++++++++++++------------
 fs/nfs/delegation.h    | 10 +++++-----
 fs/nfs/idmap.c         | 12 ++++++------
 fs/nfs/nfs4_fs.h       | 30 +++++++++++++++---------------
 fs/nfs/nfs4proc.c      | 32 ++++++++++++++++----------------
 fs/nfs/nfs4renewd.c    |  8 ++++----
 fs/nfs/nfs4state.c     | 50 +++++++++++++++++++++++++-------------------------
 fs/nfs/nfs4xdr.c       | 18 +++++++++---------
 fs/nfs/super.c         |  4 ++--
 11 files changed, 97 insertions(+), 97 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index b1f7dc4..1b596b6 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -167,7 +167,7 @@ void nfs_callback_down(void)
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
 	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
 	clp = nfs4_find_client(addr);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7719483..55d6e2e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -15,7 +15,7 @@
  
 unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
@@ -56,7 +56,7 @@ out:
 
 unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct inode *inode;
 	unsigned res;
 	
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 9540a31..5a1105c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode)
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs4_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs4_state;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -232,7 +232,7 @@ restart:
 
 int nfs_do_expire_all_delegations(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -258,7 +258,7 @@ out:
 	module_put_and_exit(0);
 }
 
-void nfs_expire_all_delegations(struct nfs4_client *clp)
+void nfs_expire_all_delegations(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -276,7 +276,7 @@ void nfs_expire_all_delegations(struct nfs4_client *clp)
 /*
  * Return all delegations following an NFS4ERR_CB_PATH_DOWN error.
  */
-void nfs_handle_cb_pathdown(struct nfs4_client *clp)
+void nfs_handle_cb_pathdown(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	struct inode *inode;
@@ -299,7 +299,7 @@ restart:
 
 struct recall_threadargs {
 	struct inode *inode;
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	const nfs4_stateid *stateid;
 
 	struct completion started;
@@ -310,7 +310,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -371,7 +371,7 @@ out_module_put:
 /*
  * Retrieve the inode associated with a delegation
  */
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle)
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
@@ -389,7 +389,7 @@ struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nf
 /*
  * Mark all delegations as needing to be reclaimed
  */
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
+void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
 	spin_lock(&clp->cl_lock);
@@ -401,7 +401,7 @@ void nfs_delegation_mark_reclaim(struct nfs4_client *clp)
 /*
  * Reap all unclaimed delegations after reboot recovery is done
  */
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation, *n;
 	LIST_HEAD(head);
@@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3858694..2cfd4b2 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,13 +29,13 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
 int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
-struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
+struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
-void nfs_expire_all_delegations(struct nfs4_client *clp);
-void nfs_handle_cb_pathdown(struct nfs4_client *clp);
+void nfs_expire_all_delegations(struct nfs_client *clp);
+void nfs_handle_cb_pathdown(struct nfs_client *clp);
 
-void nfs_delegation_mark_reclaim(struct nfs4_client *clp);
-void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
+void nfs_delegation_mark_reclaim(struct nfs_client *clp);
+void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 873deb9..d05148e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -109,7 +109,7 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
 };
 
 void
-nfs_idmap_new(struct nfs4_client *clp)
+nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
 
@@ -138,7 +138,7 @@ nfs_idmap_new(struct nfs4_client *clp)
 }
 
 void
-nfs_idmap_delete(struct nfs4_client *clp)
+nfs_idmap_delete(struct nfs_client *clp)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
@@ -491,27 +491,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
 	return (hash);
 }
 
-int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
 }
 
-int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid)
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
 	return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
 {
 	struct idmap *idmap = clp->cl_idmap;
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 9a10286..4e334cb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,9 +43,9 @@ enum nfs4_client_state {
 };
 
 /*
- * The nfs4_client identifies our client state to the server.
+ * The nfs_client identifies our client state to the server.
  */
-struct nfs4_client {
+struct nfs_client {
 	struct list_head	cl_servers;	/* Global list of servers */
 	struct in_addr		cl_addr;	/* Server identifier */
 	u64			cl_clientid;	/* constant */
@@ -127,7 +127,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 struct nfs4_state_owner {
 	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
-	struct nfs4_client   *so_client;
+	struct nfs_client    *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	atomic_t	     so_count;
 
@@ -210,10 +210,10 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 /* nfs4proc.c */
 extern int nfs4_map_errors(int err);
-extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_async_renew(struct nfs4_client *, struct rpc_cred *);
-extern int nfs4_proc_renew(struct nfs4_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
@@ -231,19 +231,19 @@ extern const u32 nfs4_fsinfo_bitmap[2];
 extern const u32 nfs4_fs_locations_bitmap[2];
 
 /* nfs4renewd.c */
-extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
-extern void nfs4_kill_renewd(struct nfs4_client *);
+extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
 extern void init_nfsv4_state(struct nfs_server *);
 extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs4_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs4_client *clp);
-extern struct nfs4_client *nfs4_find_client(struct in_addr *);
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *);
+extern struct nfs_client *nfs4_get_client(struct in_addr *);
+extern void nfs4_put_client(struct nfs_client *clp);
+extern struct nfs_client *nfs4_find_client(struct in_addr *);
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
+extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
@@ -252,7 +252,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
-extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b14145b..168f3ff 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,7 +64,7 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinf
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp);
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -792,7 +792,7 @@ out:
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs4_schedule_state_recovery(clp);
@@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -2521,7 +2521,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
  */
 static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+	struct nfs_client *clp = (struct nfs_client *)task->tk_msg.rpc_argp;
 	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
@@ -2543,7 +2543,7 @@ static const struct rpc_call_ops nfs4_renew_ops = {
 	.rpc_call_done = nfs4_renew_done,
 };
 
-int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2555,7 +2555,7 @@ int nfs4_proc_async_renew(struct nfs4_client *clp, struct rpc_cred *cred)
 			&nfs4_renew_ops, (void *)jiffies);
 }
 
-int nfs4_proc_renew(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_RENEW],
@@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2828,7 +2828,7 @@ static int nfs4_wait_bit_interruptible(void *word)
 	return 0;
 }
 
-static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp)
+static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
 {
 	sigset_t oldset;
 	int res;
@@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -2898,7 +2898,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 	return nfs4_map_errors(ret);
 }
 
-int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -2945,7 +2945,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
@@ -2969,7 +2969,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cr
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	long timeout;
 	int err;
@@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3513,7 +3513,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 	unsigned char fl_flags = request->fl_flags;
 	int status;
 
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 5d764d8..2087640 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -61,7 +61,7 @@
 void
 nfs4_renew_state(void *data)
 {
-	struct nfs4_client *clp = (struct nfs4_client *)data;
+	struct nfs_client *clp = (struct nfs_client *)data;
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
@@ -108,7 +108,7 @@ out:
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_schedule_state_renewal(struct nfs4_client *clp)
+nfs4_schedule_state_renewal(struct nfs_client *clp)
 {
 	long timeout;
 
@@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs4_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 
 	if (!clp)
 		return;
@@ -140,7 +140,7 @@ nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 
 /* Must be called with clp->cl_sem locked for writes */
 void
-nfs4_kill_renewd(struct nfs4_client *clp)
+nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
 	if (!list_empty(&clp->cl_superblocks)) {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 090a36b..c0b6439 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -83,10 +83,10 @@ destroy_nfsv4_state(struct nfs_server *server)
  * Since these are allocated/deallocated very rarely, we don't
  * bother putting them in a slab cache...
  */
-static struct nfs4_client *
+static struct nfs_client *
 nfs4_alloc_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 
 	if (nfs_callback_up() < 0)
 		return NULL;
@@ -111,7 +111,7 @@ nfs4_alloc_client(struct in_addr *addr)
 }
 
 static void
-nfs4_free_client(struct nfs4_client *clp)
+nfs4_free_client(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -130,9 +130,9 @@ nfs4_free_client(struct nfs4_client *clp)
 	nfs_callback_down();
 }
 
-static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
+static struct nfs_client *__nfs4_find_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
 		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
 			atomic_inc(&clp->cl_count);
@@ -142,19 +142,19 @@ static struct nfs4_client *__nfs4_find_client(struct in_addr *addr)
 	return NULL;
 }
 
-struct nfs4_client *nfs4_find_client(struct in_addr *addr)
+struct nfs_client *nfs4_find_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	spin_lock(&state_spinlock);
 	clp = __nfs4_find_client(addr);
 	spin_unlock(&state_spinlock);
 	return clp;
 }
 
-struct nfs4_client *
+struct nfs_client *
 nfs4_get_client(struct in_addr *addr)
 {
-	struct nfs4_client *clp, *new = NULL;
+	struct nfs_client *clp, *new = NULL;
 
 	spin_lock(&state_spinlock);
 	for (;;) {
@@ -180,7 +180,7 @@ nfs4_get_client(struct in_addr *addr)
 }
 
 void
-nfs4_put_client(struct nfs4_client *clp)
+nfs4_put_client(struct nfs_client *clp)
 {
 	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
 		return;
@@ -192,7 +192,7 @@ nfs4_put_client(struct nfs4_client *clp)
 	nfs4_free_client(clp);
 }
 
-static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
+static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
 			nfs_callback_tcpport, cred);
@@ -204,13 +204,13 @@ static int nfs4_init_client(struct nfs4_client *clp, struct rpc_cred *cred)
 }
 
 u32
-nfs4_alloc_lockowner_id(struct nfs4_client *clp)
+nfs4_alloc_lockowner_id(struct nfs_client *clp)
 {
 	return clp->cl_lockowner_id ++;
 }
 
 static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp = NULL;
 
@@ -224,7 +224,7 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
 	return sp;
 }
 
-struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rpc_cred *cred = NULL;
@@ -238,7 +238,7 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs4_client *clp)
 	return cred;
 }
 
-struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
+struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 
@@ -251,7 +251,7 @@ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs4_client *clp)
 }
 
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	struct nfs4_state_owner *sp, *res = NULL;
 
@@ -294,7 +294,7 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	spin_lock(&clp->cl_lock);
 	list_del_init(&sp->so_list);
 	spin_unlock(&clp->cl_lock);
@@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
@@ -337,7 +337,7 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs4_client *clp = sp->so_client;
+	struct nfs_client *clp = sp->so_client;
 	struct rpc_cred *cred = sp->so_cred;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
@@ -540,7 +540,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
-	struct nfs4_client *clp = state->owner->so_client;
+	struct nfs_client *clp = state->owner->so_client;
 
 	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
@@ -752,7 +752,7 @@ out:
 
 static int reclaimer(void *);
 
-static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
+static inline void nfs4_clear_recover_bit(struct nfs_client *clp)
 {
 	smp_mb__before_clear_bit();
 	clear_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state);
@@ -764,7 +764,7 @@ static inline void nfs4_clear_recover_bit(struct nfs4_client *clp)
 /*
  * State recovery routine
  */
-static void nfs4_recover_state(struct nfs4_client *clp)
+static void nfs4_recover_state(struct nfs_client *clp)
 {
 	struct task_struct *task;
 
@@ -782,7 +782,7 @@ static void nfs4_recover_state(struct nfs4_client *clp)
 /*
  * Schedule a state recovery attempt
  */
-void nfs4_schedule_state_recovery(struct nfs4_client *clp)
+void nfs4_schedule_state_recovery(struct nfs_client *clp)
 {
 	if (!clp)
 		return;
@@ -879,7 +879,7 @@ out_err:
 	return status;
 }
 
-static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct nfs4_state *state;
@@ -903,7 +903,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 
 static int reclaimer(void *ptr)
 {
-	struct nfs4_client *clp = ptr;
+	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1dee6ef..04748ab9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1160,7 +1160,7 @@ static int encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, con
 	return 0;
 }
 
-static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client_stateid)
+static int encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid)
 {
 	uint32_t *p;
 
@@ -1246,7 +1246,7 @@ static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclien
 	return 0;
 }
 
-static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_client *client_state)
+static int encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state)
 {
         uint32_t *p;
 
@@ -1945,7 +1945,7 @@ static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, uint32_t *p, const str
 /*
  * a RENEW request
  */
-static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1975,7 +1975,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, struct nf
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2132,7 +2132,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 }
 
 /* Dummy routine */
-static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp)
+static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
 {
 	uint32_t *p;
 	unsigned int strlen;
@@ -2636,7 +2636,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
 	return 0;
 }
 
-static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *uid)
+static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *uid)
 {
 	uint32_t len, *p;
 
@@ -2660,7 +2660,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
 	return 0;
 }
 
-static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_client *clp, int32_t *gid)
+static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, int32_t *gid)
 {
 	uint32_t len, *p;
 
@@ -3565,7 +3565,7 @@ static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
 	return 0;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 {
 	uint32_t *p;
 	uint32_t opnum;
@@ -4335,7 +4335,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
  * a SETCLIENTID request
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p,
-		struct nfs4_client *clp)
+		struct nfs_client *clp)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 6349734..d03ede5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1099,7 +1099,7 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
 {
-	struct nfs4_client *clp;
+	struct nfs_client *clp;
 	struct rpc_xprt *xprt = NULL;
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
@@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry)
 static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
 {
 	const struct dentry *dentry = data->dentry;
-	struct nfs4_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs4_state;
 	struct super_block *sb;
 
 	server->fsid = data->fattr->fsid;
-- 
cgit v1.1


From 7539bbab8062aadc1db95a22b377146843cfa88f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: NFS: Rename nfs_server::nfs4_state

Rename nfs_server::nfs4_state to nfs_client as it will be used to represent the
client state for NFS2 and NFS3 also.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 12 ++++++------
 fs/nfs/nfs4proc.c   | 26 +++++++++++++-------------
 fs/nfs/nfs4renewd.c |  2 +-
 fs/nfs/nfs4state.c  | 10 +++++-----
 fs/nfs/nfs4xdr.c    | 10 +++++-----
 fs/nfs/super.c      |  6 +++---
 6 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5a1105c..cfe2397 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
 			case -NFS4ERR_EXPIRED:
 				/* kill_proc(fl->fl_pid, SIGLOST, 1); */
 			case -NFS4ERR_STALE_CLIENTID:
-				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs4_state);
+				nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client);
 				goto out_err;
 		}
 	}
@@ -114,7 +114,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
  */
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int status = 0;
@@ -176,7 +176,7 @@ static void nfs_msync_inode(struct inode *inode)
  */
 int __nfs_inode_return_delegation(struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
@@ -208,7 +208,7 @@ int __nfs_inode_return_delegation(struct inode *inode)
  */
 void nfs_return_all_delegations(struct super_block *sb)
 {
-	struct nfs_client *clp = NFS_SB(sb)->nfs4_state;
+	struct nfs_client *clp = NFS_SB(sb)->nfs_client;
 	struct nfs_delegation *delegation;
 	struct inode *inode;
 
@@ -310,7 +310,7 @@ static int recall_thread(void *data)
 {
 	struct recall_threadargs *args = (struct recall_threadargs *)data;
 	struct inode *inode = igrab(args->inode);
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 
@@ -423,7 +423,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs4_state;
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
 	int res = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 168f3ff..b46597f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -195,7 +195,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 
 static void renew_lease(const struct nfs_server *server, unsigned long timestamp)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	spin_lock(&clp->cl_lock);
 	if (time_before(clp->cl_last_renewal,timestamp))
 		clp->cl_last_renewal = timestamp;
@@ -252,7 +252,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
-	p->o_arg.clientid = server->nfs4_state->cl_clientid;
+	p->o_arg.clientid = server->nfs_client->cl_clientid;
 	p->o_arg.id = sp->so_id;
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
@@ -550,7 +550,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
 				/* Don't recall a delegation if it was lost */
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 				return err;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -792,7 +792,7 @@ out:
 
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
 		nfs4_schedule_state_recovery(clp);
@@ -867,7 +867,7 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 {
 	struct nfs_delegation *delegation;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs4_state_owner *sp = NULL;
 	struct nfs4_state *state = NULL;
@@ -953,7 +953,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
 	int                     status;
 
@@ -1133,7 +1133,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(server->nfs4_state);
+			nfs4_schedule_state_recovery(server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -2791,7 +2791,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp || task->tk_status >= 0)
 		return 0;
@@ -2871,7 +2871,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  */
 int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	int ret = errorcode;
 
 	exception->retry = 0;
@@ -3077,7 +3077,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
+				nfs4_schedule_state_recovery(server->nfs_client);
 			case 0:
 				return 0;
 		}
@@ -3106,7 +3106,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
 		.fl = request,
@@ -3231,7 +3231,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs_client);
 			break;
 		default:
 			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
@@ -3343,7 +3343,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	if (p->arg.lock_seqid == NULL)
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
-	p->arg.lock_owner.clientid = server->nfs4_state->cl_clientid;
+	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 2087640..ff947ec 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,7 +127,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!clp)
 		return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c0b6439..fa51a7d 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -61,7 +61,7 @@ static LIST_HEAD(nfs4_clientid_list);
 void
 init_nfsv4_state(struct nfs_server *server)
 {
-	server->nfs4_state = NULL;
+	server->nfs_client = NULL;
 	INIT_LIST_HEAD(&server->nfs4_siblings);
 }
 
@@ -70,9 +70,9 @@ destroy_nfsv4_state(struct nfs_server *server)
 {
 	kfree(server->mnt_path);
 	server->mnt_path = NULL;
-	if (server->nfs4_state) {
-		nfs4_put_client(server->nfs4_state);
-		server->nfs4_state = NULL;
+	if (server->nfs_client) {
+		nfs4_put_client(server->nfs_client);
+		server->nfs_client = NULL;
 	}
 }
 
@@ -306,7 +306,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
  */
 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
 	get_rpccred(cred);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 04748ab9..9992606 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -529,7 +529,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 	if (iap->ia_valid & ATTR_MODE)
 		len += 4;
 	if (iap->ia_valid & ATTR_UID) {
-		owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name);
+		owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
 		if (owner_namelen < 0) {
 			printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n",
 			       iap->ia_uid);
@@ -541,7 +541,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s
 		len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
 	}
 	if (iap->ia_valid & ATTR_GID) {
-		owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group);
+		owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
 		if (owner_grouplen < 0) {
 			printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n",
 			       iap->ia_gid);
@@ -3051,9 +3051,9 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 	fattr->mode |= fmode;
 	if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_owner(xdr, bitmap, server->nfs4_state, &fattr->uid)) != 0)
+	if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
 		goto xdr_error;
-	if ((status = decode_attr_group(xdr, bitmap, server->nfs4_state, &fattr->gid)) != 0)
+	if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
 		goto xdr_error;
 	if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
 		goto xdr_error;
@@ -3254,7 +3254,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
 			if (decode_space_limit(xdr, &res->maxsize) < 0)
 				return -EIO;
 	}
-	return decode_ace(xdr, NULL, res->server->nfs4_state);
+	return decode_ace(xdr, NULL, res->server->nfs_client);
 }
 
 static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d03ede5..ab4c78e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1141,7 +1141,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (!IS_ERR(clnt))
-		server->nfs4_state = clp;
+		server->nfs_client = clp;
 	up_write(&clp->cl_sem);
 	clp = NULL;
 
@@ -1151,7 +1151,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		return clnt;
 	}
 
-	if (server->nfs4_state->cl_idmap == NULL) {
+	if (server->nfs_client->cl_idmap == NULL) {
 		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
 		return ERR_PTR(-ENOMEM);
 	}
@@ -1416,7 +1416,7 @@ static inline char *nfs4_dup_path(const struct dentry *dentry)
 static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
 {
 	const struct dentry *dentry = data->dentry;
-	struct nfs_client *clp = server->nfs4_state;
+	struct nfs_client *clp = server->nfs_client;
 	struct super_block *sb;
 
 	server->fsid = data->fattr->fsid;
-- 
cgit v1.1


From b7162792b5c0e0f6e91b8997f8e6bbc76ec5420a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: NFS: Return an error when starting the idmapping pipe

Return an error when starting the idmapping pipe so that we can detect it
failing.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 12 ++++++++----
 fs/nfs/super.c |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index d05148e..231c20f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -108,15 +108,17 @@ static struct rpc_pipe_ops idmap_upcall_ops = {
         .destroy_msg    = idmap_pipe_destroy_msg,
 };
 
-void
+int
 nfs_idmap_new(struct nfs_client *clp)
 {
 	struct idmap *idmap;
+	int error;
 
 	if (clp->cl_idmap != NULL)
-		return;
+		return 0;
+
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return;
+                return -ENOMEM;
 
 	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
 	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
@@ -124,8 +126,9 @@ nfs_idmap_new(struct nfs_client *clp)
         idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
+		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
-		return;
+		return error;
 	}
 
         mutex_init(&idmap->idmap_lock);
@@ -135,6 +138,7 @@ nfs_idmap_new(struct nfs_client *clp)
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
 
 	clp->cl_idmap = idmap;
+	return 0;
 }
 
 void
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ab4c78e..3ee85c4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1136,7 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		clnt->cl_softrtry = 1;
 		clp->cl_rpcclient = clnt;
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		nfs_idmap_new(clp);
+		if (nfs_idmap_new(clp) < 0)
+			goto out_fail;
 	}
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	clnt = rpc_clone_client(clp->cl_rpcclient);
-- 
cgit v1.1


From 2b3de4411b3ccaeb00018c99d1bbe7203554cf7f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:09 -0400
Subject: NFS: Add a lookupfh NFS RPC op

Add a lookup filehandle NFS RPC op so that a file handle can be looked up
without requiring dentries and inodes and other VFS stuff when doing an NFS4
pathwalk during mounting.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b46597f..de2006f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1583,6 +1583,52 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	return status;
 }
 
+static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+		struct qstr *name, struct nfs_fh *fhandle,
+		struct nfs_fattr *fattr)
+{
+	int		       status;
+	struct nfs4_lookup_arg args = {
+		.bitmask = server->attr_bitmask,
+		.dir_fh = dirfh,
+		.name = name,
+	};
+	struct nfs4_lookup_res res = {
+		.server = server,
+		.fattr = fattr,
+		.fh = fhandle,
+	};
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+
+	nfs_fattr_init(fattr);
+
+	dprintk("NFS call  lookupfh %s\n", name->name);
+	status = rpc_call_sync(server->client, &msg, 0);
+	dprintk("NFS reply lookupfh: %d\n", status);
+	if (status == -NFS4ERR_MOVED)
+		status = -EREMOTE;
+	return status;
+}
+
+static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
+			      struct qstr *name, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr)
+{
+	struct nfs4_exception exception = { };
+	int err;
+	do {
+		err = nfs4_handle_exception(server,
+				_nfs4_proc_lookupfh(server, dirfh, name,
+						    fhandle, fattr),
+				&exception);
+	} while (exception.retry);
+	return err;
+}
+
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
@@ -3723,6 +3769,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.getroot	= nfs4_proc_get_root,
 	.getattr	= nfs4_proc_getattr,
 	.setattr	= nfs4_proc_setattr,
+	.lookupfh	= nfs4_proc_lookupfh,
 	.lookup		= nfs4_proc_lookup,
 	.access		= nfs4_proc_access,
 	.readlink	= nfs4_proc_readlink,
-- 
cgit v1.1


From e9326dcab413848e70ab746c7c5363da13e5f801 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: NFS: Add a server capabilities NFS RPC op

Add a set_capabilities NFS RPC op so that the server capabilities can be set.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de2006f..850f085 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3790,6 +3790,7 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.statfs		= nfs4_proc_statfs,
 	.fsinfo		= nfs4_proc_fsinfo,
 	.pathconf	= nfs4_proc_pathconf,
+	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
 	.read_done	= nfs4_read_done,
-- 
cgit v1.1


From 24c8dbbb5f777187d660393599641ab3307b4b97 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: NFS: Generalise the nfs_client structure

Generalise the nfs_client structure by:

 (1) Moving nfs_client to a more general place (nfs_fs_sb.h).

 (2) Renaming its maintenance routines to be non-NFS4 specific.

 (3) Move those maintenance routines to a new non-NFS4 specific file (client.c)
     and move the declarations to internal.h.

 (4) Make nfs_find/get_client() take a full sockaddr_in to include the port
     number (will be required for NFS2/3).

 (5) Make nfs_find/get_client() take the NFS protocol version (again will be
     required to differentiate NFS2, 3 & 4 client records).

Also:

 (6) Make nfs_client construction proceed akin to inodes, marking them as under
     construction and providing a function to indicate completion.

 (7) Make nfs_get_client() wait interruptibly if it finds a client that it can
     share, but that client is currently being constructed.

 (8) Make nfs4_create_client() use (6) and (7) instead of locking cl_sem.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile        |   6 +-
 fs/nfs/callback.c      |   9 +-
 fs/nfs/callback_proc.c |   9 +-
 fs/nfs/client.c        | 312 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/delegation.c    |   9 +-
 fs/nfs/internal.h      |   6 +
 fs/nfs/nfs4_fs.h       |  52 ---------
 fs/nfs/nfs4proc.c      |   2 +-
 fs/nfs/nfs4state.c     | 128 +-------------------
 fs/nfs/super.c         |  53 ++++-----
 10 files changed, 364 insertions(+), 222 deletions(-)
 create mode 100644 fs/nfs/client.c

(limited to 'fs')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 0b572a0..3b993a6 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,9 +4,9 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= dir.o file.o inode.o super.o nfs2xdr.o pagelist.o \
-			   proc.o read.o symlink.o unlink.o write.o \
-			   namespace.o
+nfs-y 			:= client.o dir.o file.o inode.o super.o nfs2xdr.o \
+			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 1b596b6..a3ee113 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -19,6 +19,7 @@
 
 #include "nfs4_fs.h"
 #include "callback.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
 
@@ -166,15 +167,15 @@ void nfs_callback_down(void)
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct in_addr *addr = &rqstp->rq_addr.sin_addr;
+	struct sockaddr_in *addr = &rqstp->rq_addr;
 	struct nfs_client *clp;
 
 	/* Don't talk to strangers */
-	clp = nfs4_find_client(addr);
+	clp = nfs_find_client(addr, 4);
 	if (clp == NULL)
 		return SVC_DROP;
-	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr));
-	nfs4_put_client(clp);
+	dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+	nfs_put_client(clp);
 	switch (rqstp->rq_authop->flavour) {
 		case RPC_AUTH_NULL:
 			if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 55d6e2e..97cf8f7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -10,6 +10,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
  
@@ -22,7 +23,7 @@ unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres
 	
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -48,7 +49,7 @@ out_iput:
 	up_read(&nfsi->rwsem);
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status));
 	return res->status;
@@ -61,7 +62,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	unsigned res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs4_find_client(&args->addr->sin_addr);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 	inode = nfs_delegation_find_inode(clp, &args->fh);
@@ -80,7 +81,7 @@ unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	}
 	iput(inode);
 out_putclient:
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
new file mode 100644
index 0000000..cb5e924
--- /dev/null
+++ b/fs/nfs/client.c
@@ -0,0 +1,312 @@
+/* client.c: NFS client sharing and management code
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/inet.h>
+#include <linux/nfs_xdr.h>
+
+#include <asm/system.h>
+
+#include "nfs4_fs.h"
+#include "callback.h"
+#include "delegation.h"
+#include "iostat.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+
+static DEFINE_SPINLOCK(nfs_client_lock);
+static LIST_HEAD(nfs_client_list);
+static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
+
+/*
+ * Allocate a shared client record
+ *
+ * Since these are allocated/deallocated very rarely, we don't
+ * bother putting them in a slab cache...
+ */
+static struct nfs_client *nfs_alloc_client(const char *hostname,
+					   const struct sockaddr_in *addr,
+					   int nfsversion)
+{
+	struct nfs_client *clp;
+	int error;
+
+	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+		goto error_0;
+
+	error = rpciod_up();
+	if (error < 0) {
+		dprintk("%s: couldn't start rpciod! Error = %d\n",
+				__FUNCTION__, error);
+		__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+		goto error_1;
+	}
+
+	if (nfsversion == 4) {
+		if (nfs_callback_up() < 0)
+			goto error_2;
+		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	}
+
+	atomic_set(&clp->cl_count, 1);
+	clp->cl_cons_state = NFS_CS_INITING;
+
+	clp->cl_nfsversion = nfsversion;
+	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+
+	if (hostname) {
+		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (!clp->cl_hostname)
+			goto error_3;
+	}
+
+	INIT_LIST_HEAD(&clp->cl_superblocks);
+	clp->cl_rpcclient = ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_NFS_V4
+	init_rwsem(&clp->cl_sem);
+	INIT_LIST_HEAD(&clp->cl_delegations);
+	INIT_LIST_HEAD(&clp->cl_state_owners);
+	INIT_LIST_HEAD(&clp->cl_unused);
+	spin_lock_init(&clp->cl_lock);
+	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+	clp->cl_boot_time = CURRENT_TIME;
+	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+#endif
+
+	return clp;
+
+error_3:
+	nfs_callback_down();
+	__clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+error_2:
+	rpciod_down();
+	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+error_1:
+	kfree(clp);
+error_0:
+	return NULL;
+}
+
+/*
+ * Destroy a shared client record
+ */
+static void nfs_free_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) {
+		while (!list_empty(&clp->cl_unused)) {
+			struct nfs4_state_owner *sp;
+
+			sp = list_entry(clp->cl_unused.next,
+					struct nfs4_state_owner,
+					so_list);
+			list_del(&sp->so_list);
+			kfree(sp);
+		}
+		BUG_ON(!list_empty(&clp->cl_state_owners));
+		nfs_idmap_delete(clp);
+	}
+#endif
+
+	/* -EIO all pending I/O */
+	if (!IS_ERR(clp->cl_rpcclient))
+		rpc_shutdown_client(clp->cl_rpcclient);
+
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
+
+	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+	rpciod_down();
+
+	kfree(clp->cl_hostname);
+	kfree(clp);
+
+	dprintk("<-- nfs_free_client()\n");
+}
+
+/*
+ * Release a reference to a shared client record
+ */
+void nfs_put_client(struct nfs_client *clp)
+{
+	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
+
+	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
+		list_del(&clp->cl_share_link);
+		spin_unlock(&nfs_client_lock);
+
+		BUG_ON(!list_empty(&clp->cl_superblocks));
+
+		nfs_free_client(clp);
+	}
+}
+
+/*
+ * Find a client by address
+ * - caller must hold nfs_client_lock
+ */
+static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != nfsversion)
+			continue;
+
+		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
+			   sizeof(clp->cl_addr.sin_addr)) != 0)
+			continue;
+
+		if (clp->cl_addr.sin_port == addr->sin_port)
+			goto found;
+	}
+
+	return NULL;
+
+found:
+	atomic_inc(&clp->cl_count);
+	return clp;
+}
+
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	struct nfs_client *clp;
+
+	spin_lock(&nfs_client_lock);
+	clp = __nfs_find_client(addr, nfsversion);
+	spin_unlock(&nfs_client_lock);
+
+	BUG_ON(clp->cl_cons_state == 0);
+
+	return clp;
+}
+
+/*
+ * Look up a client by IP address and protocol version
+ * - creates a new record if one doesn't yet exist
+ */
+struct nfs_client *nfs_get_client(const char *hostname,
+				  const struct sockaddr_in *addr,
+				  int nfsversion)
+{
+	struct nfs_client *clp, *new = NULL;
+	int error;
+
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+		hostname ?: "", NIPQUAD(addr->sin_addr),
+		addr->sin_port, nfsversion);
+
+	/* see if the client already exists */
+	do {
+		spin_lock(&nfs_client_lock);
+
+		clp = __nfs_find_client(addr, nfsversion);
+		if (clp)
+			goto found_client;
+		if (new)
+			goto install_client;
+
+		spin_unlock(&nfs_client_lock);
+
+		new = nfs_alloc_client(hostname, addr, nfsversion);
+	} while (new);
+
+	return ERR_PTR(-ENOMEM);
+
+	/* install a new client and return with it unready */
+install_client:
+	clp = new;
+	list_add(&clp->cl_share_link, &nfs_client_list);
+	spin_unlock(&nfs_client_lock);
+	dprintk("--> nfs_get_client() = %p [new]\n", clp);
+	return clp;
+
+	/* found an existing client
+	 * - make sure it's ready before returning
+	 */
+found_client:
+	spin_unlock(&nfs_client_lock);
+
+	if (new)
+		nfs_free_client(new);
+
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		add_wait_queue(&nfs_client_active_wq, &myself);
+
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (signal_pending(current) ||
+			    clp->cl_cons_state > NFS_CS_READY)
+				break;
+			schedule();
+		}
+
+		remove_wait_queue(&nfs_client_active_wq, &myself);
+
+		if (signal_pending(current)) {
+			nfs_put_client(clp);
+			return ERR_PTR(-ERESTARTSYS);
+		}
+	}
+
+	if (clp->cl_cons_state < NFS_CS_READY) {
+		error = clp->cl_cons_state;
+		nfs_put_client(clp);
+		return ERR_PTR(error);
+	}
+
+	dprintk("--> nfs_get_client() = %p [share]\n", clp);
+	return clp;
+}
+
+/*
+ * Mark a server as ready or failed
+ */
+void nfs_mark_client_ready(struct nfs_client *clp, int state)
+{
+	clp->cl_cons_state = state;
+	wake_up_all(&nfs_client_active_wq);
+}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cfe2397..5713367 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -18,6 +18,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 
 static struct nfs_delegation *nfs_alloc_delegation(void)
 {
@@ -145,7 +146,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
 			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr));
+					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
 			status = -EIO;
 		}
 	}
@@ -254,7 +255,7 @@ restart:
 	}
 out:
 	spin_unlock(&clp->cl_lock);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
 
@@ -266,10 +267,10 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
 			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4802157..ac370d5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -15,6 +15,12 @@ struct nfs_clone_mount {
 	rpc_authflavor_t authflavor;
 };
 
+/* client.c */
+extern void nfs_put_client(struct nfs_client *);
+extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
+extern void nfs_mark_client_ready(struct nfs_client *, int);
+
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
 extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4e334cb..e787924 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -43,55 +43,6 @@ enum nfs4_client_state {
 };
 
 /*
- * The nfs_client identifies our client state to the server.
- */
-struct nfs_client {
-	struct list_head	cl_servers;	/* Global list of servers */
-	struct in_addr		cl_addr;	/* Server identifier */
-	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
-	unsigned long		cl_state;
-
-	u32			cl_lockowner_id;
-
-	/*
-	 * The following rwsem ensures exclusive access to the server
-	 * while we recover the state following a lease expiration.
-	 */
-	struct rw_semaphore	cl_sem;
-
-	struct list_head	cl_delegations;
-	struct list_head	cl_state_owners;
-	struct list_head	cl_unused;
-	int			cl_nunused;
-	spinlock_t		cl_lock;
-	atomic_t		cl_count;
-
-	struct rpc_clnt *	cl_rpcclient;
-
-	struct list_head	cl_superblocks;	/* List of nfs_server structs */
-
-	unsigned long		cl_lease_time;
-	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
-	struct work_struct	cl_recoverd;
-
-	struct rpc_wait_queue	cl_rpcwaitq;
-
-	/* used for the setclientid verifier */
-	struct timespec		cl_boot_time;
-
-	/* idmapper */
-	struct idmap *		cl_idmap;
-
-	/* Our own IP address, as a null-terminated string.
-	 * This is used to generate the clientid, and the callback address.
-	 */
-	char			cl_ipaddr[16];
-	unsigned char		cl_id_uniquifier;
-};
-
-/*
  * struct rpc_sequence ensures that RPC calls are sent in the exact
  * order that they appear on the list.
  */
@@ -239,9 +190,6 @@ extern void nfs4_renew_state(void *);
 /* nfs4state.c */
 extern void init_nfsv4_state(struct nfs_server *);
 extern void destroy_nfsv4_state(struct nfs_server *);
-extern struct nfs_client *nfs4_get_client(struct in_addr *);
-extern void nfs4_put_client(struct nfs_client *clp);
-extern struct nfs_client *nfs4_find_client(struct in_addr *);
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
 extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 850f085..803c31b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2968,7 +2968,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr),
+				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index fa51a7d..058811e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -50,12 +50,12 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "internal.h"
 
 #define OPENOWNER_POOL_SIZE	8
 
 const nfs4_stateid zero_stateid;
 
-static DEFINE_SPINLOCK(state_spinlock);
 static LIST_HEAD(nfs4_clientid_list);
 
 void
@@ -71,127 +71,11 @@ destroy_nfsv4_state(struct nfs_server *server)
 	kfree(server->mnt_path);
 	server->mnt_path = NULL;
 	if (server->nfs_client) {
-		nfs4_put_client(server->nfs_client);
+		nfs_put_client(server->nfs_client);
 		server->nfs_client = NULL;
 	}
 }
 
-/*
- * nfs4_get_client(): returns an empty client structure
- * nfs4_put_client(): drops reference to client structure
- *
- * Since these are allocated/deallocated very rarely, we don't
- * bother putting them in a slab cache...
- */
-static struct nfs_client *
-nfs4_alloc_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-
-	if (nfs_callback_up() < 0)
-		return NULL;
-	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) {
-		nfs_callback_down();
-		return NULL;
-	}
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
-	init_rwsem(&clp->cl_sem);
-	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
-	spin_lock_init(&clp->cl_lock);
-	atomic_set(&clp->cl_count, 1);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
-	INIT_LIST_HEAD(&clp->cl_superblocks);
-	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client");
-	clp->cl_rpcclient = ERR_PTR(-EINVAL);
-	clp->cl_boot_time = CURRENT_TIME;
-	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
-	return clp;
-}
-
-static void
-nfs4_free_client(struct nfs_client *clp)
-{
-	struct nfs4_state_owner *sp;
-
-	while (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
-	nfs_idmap_delete(clp);
-	if (!IS_ERR(clp->cl_rpcclient))
-		rpc_shutdown_client(clp->cl_rpcclient);
-	kfree(clp);
-	nfs_callback_down();
-}
-
-static struct nfs_client *__nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-	list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) {
-		if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) {
-			atomic_inc(&clp->cl_count);
-			return clp;
-		}
-	}
-	return NULL;
-}
-
-struct nfs_client *nfs4_find_client(struct in_addr *addr)
-{
-	struct nfs_client *clp;
-	spin_lock(&state_spinlock);
-	clp = __nfs4_find_client(addr);
-	spin_unlock(&state_spinlock);
-	return clp;
-}
-
-struct nfs_client *
-nfs4_get_client(struct in_addr *addr)
-{
-	struct nfs_client *clp, *new = NULL;
-
-	spin_lock(&state_spinlock);
-	for (;;) {
-		clp = __nfs4_find_client(addr);
-		if (clp != NULL)
-			break;
-		clp = new;
-		if (clp != NULL) {
-			list_add(&clp->cl_servers, &nfs4_clientid_list);
-			new = NULL;
-			break;
-		}
-		spin_unlock(&state_spinlock);
-		new = nfs4_alloc_client(addr);
-		spin_lock(&state_spinlock);
-		if (new == NULL)
-			break;
-	}
-	spin_unlock(&state_spinlock);
-	if (new)
-		nfs4_free_client(new);
-	return clp;
-}
-
-void
-nfs4_put_client(struct nfs_client *clp)
-{
-	if (!atomic_dec_and_lock(&clp->cl_count, &state_spinlock))
-		return;
-	list_del(&clp->cl_servers);
-	spin_unlock(&state_spinlock);
-	BUG_ON(!list_empty(&clp->cl_superblocks));
-	rpc_wake_up(&clp->cl_rpcwaitq);
-	nfs4_kill_renewd(clp);
-	nfs4_free_client(clp);
-}
-
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
@@ -771,11 +655,11 @@ static void nfs4_recover_state(struct nfs_client *clp)
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr));
+			NIPQUAD(clp->cl_addr.sin_addr));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put(THIS_MODULE);
 }
 
@@ -970,12 +854,12 @@ out:
 	if (status == -NFS4ERR_CB_PATH_DOWN)
 		nfs_handle_cb_pathdown(clp);
 	nfs4_clear_recover_bit(clp);
-	nfs4_put_client(clp);
+	nfs_put_client(clp);
 	module_put_and_exit(0);
 	return 0;
 out_error:
 	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.s_addr), -status);
+				NIPQUAD(clp->cl_addr.sin_addr), -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3ee85c4..f97d7d9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1104,47 +1104,46 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
 
-	clp = nfs4_get_client(&server->addr.sin_addr);
+	clp = nfs_get_client(server->hostname, &server->addr, 4);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return ERR_PTR(err);
 	}
 
 	/* Now create transport and client */
-	down_write(&clp->cl_sem);
-	if (IS_ERR(clp->cl_rpcclient)) {
+	if (clp->cl_cons_state == NFS_CS_INITING) {
 		xprt = xprt_create_proto(proto, &server->addr, timeparms);
 		if (IS_ERR(xprt)) {
-			up_write(&clp->cl_sem);
 			err = PTR_ERR(xprt);
 			dprintk("%s: cannot create RPC transport. Error = %d\n",
 					__FUNCTION__, err);
-			goto out_fail;
+			goto client_init_error;
 		}
 		/* Bind to a reserved port! */
 		xprt->resvport = 1;
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
 				server->rpc_ops->version, flavor);
 		if (IS_ERR(clnt)) {
-			up_write(&clp->cl_sem);
 			err = PTR_ERR(clnt);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
 					__FUNCTION__, err);
-			goto out_fail;
+			goto client_init_error;
 		}
 		clnt->cl_intr     = 1;
 		clnt->cl_softrtry = 1;
 		clp->cl_rpcclient = clnt;
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		if (nfs_idmap_new(clp) < 0)
-			goto out_fail;
+		err = nfs_idmap_new(clp);
+		if (err < 0) {
+			dprintk("%s: failed to create idmapper.\n",
+				__FUNCTION__);
+			goto client_init_error;
+		}
+		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+		nfs_mark_client_ready(clp, 0);
 	}
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+
 	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (!IS_ERR(clnt))
-		server->nfs_client = clp;
-	up_write(&clp->cl_sem);
-	clp = NULL;
 
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1152,11 +1151,6 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		return clnt;
 	}
 
-	if (server->nfs_client->cl_idmap == NULL) {
-		dprintk("%s: failed to create idmapper.\n", __FUNCTION__);
-		return ERR_PTR(-ENOMEM);
-	}
-
 	if (clnt->cl_auth->au_flavor != flavor) {
 		struct rpc_auth *auth;
 
@@ -1166,11 +1160,16 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 			return (struct rpc_clnt *)auth;
 		}
 	}
+
+	server->nfs_client = clp;
+	down_write(&clp->cl_sem);
+	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
+	up_write(&clp->cl_sem);
 	return clnt;
 
- out_fail:
-	if (clp)
-		nfs4_put_client(clp);
+client_init_error:
+	nfs_mark_client_ready(clp, err);
+	nfs_put_client(clp);
 	return ERR_PTR(err);
 }
 
@@ -1329,14 +1328,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		goto out_free;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_free;
-	}
-
 	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
@@ -1383,8 +1374,6 @@ static void nfs4_kill_super(struct super_block *sb)
 
 	destroy_nfsv4_state(server);
 
-	rpciod_down();
-
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	kfree(server);
-- 
cgit v1.1


From 0c7d90cfed91a283228017ba6faf37ee0bcd32b1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:10 -0400
Subject: NFS: Use the dentry superblock directly in nfs_statfs()

Use the nominated dentry's superblock directly in the NFS statfs() op to get a
file handle, rather than using s_root (which will become a dummy dentry in a
future patch).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f97d7d9..a41d516 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -240,11 +240,10 @@ void __exit unregister_nfs_fs(void)
  */
 static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-	struct super_block *sb = dentry->d_sb;
-	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_server *server = NFS_SB(dentry->d_sb);
 	unsigned char blockbits;
 	unsigned long blockres;
-	struct nfs_fh *rootfh = NFS_FH(sb->s_root->d_inode);
+	struct nfs_fh *fh = NFS_FH(dentry->d_inode);
 	struct nfs_fattr fattr;
 	struct nfs_fsstat res = {
 			.fattr = &fattr,
@@ -253,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, rootfh, &res);
+	error = server->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -263,7 +262,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * case where f_frsize != f_bsize.  Eventually we want to
 	 * report the value of wtmult in this field.
 	 */
-	buf->f_frsize = sb->s_blocksize;
+	buf->f_frsize = dentry->d_sb->s_blocksize;
 
 	/*
 	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
@@ -272,8 +271,8 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	 * thus historically Linux's sys_statfs reports these
 	 * fields in units of f_bsize.
 	 */
-	buf->f_bsize = sb->s_blocksize;
-	blockbits = sb->s_blocksize_bits;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	blockbits = dentry->d_sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
-- 
cgit v1.1


From 509de8111656a7d89b4a1a5f430f4460ce510f0f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: NFS: Add extra const qualifiers

Add some extra const qualifiers into NFS.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/namespace.c     |  3 ++-
 fs/nfs/nfs3proc.c      |  2 +-
 fs/nfs/nfs4namespace.c |  8 ++++----
 fs/nfs/nfs4proc.c      |  2 +-
 fs/nfs/proc.c          |  2 +-
 fs/nfs/super.c         | 10 +++++-----
 6 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 86b3169..85d9ed1 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -172,7 +172,8 @@ void nfs_release_automount_timer(void)
 /*
  * Clone a mountpoint of the appropriate type
  */
-static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, char *devname,
+static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
+					   const char *devname,
 					   struct nfs_clone_mount *mountdata)
 {
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7143b1f..3e53712 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -886,7 +886,7 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 	return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl);
 }
 
-struct nfs_rpc_ops	nfs_v3_clientops = {
+const struct nfs_rpc_ops nfs_v3_clientops = {
 	.version	= 3,			/* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs3_dir_inode_operations,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index ea38d27..faed9bc 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -23,7 +23,7 @@
 /*
  * Check if fs_root is valid
  */
-static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
+static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname,
 					 char *buffer, ssize_t buflen)
 {
 	char *end = buffer + buflen;
@@ -34,7 +34,7 @@ static inline char *nfs4_pathname_string(struct nfs4_pathname *pathname,
 
 	n = pathname->ncomponents;
 	while (--n >= 0) {
-		struct nfs4_string *component = &pathname->components[n];
+		const struct nfs4_string *component = &pathname->components[n];
 		buflen -= component->len + 1;
 		if (buflen < 0)
 			goto Elong;
@@ -60,7 +60,7 @@ Elong:
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 					    const struct dentry *dentry,
-					    struct nfs4_fs_locations *locations)
+					    const struct nfs4_fs_locations *locations)
 {
 	struct vfsmount *mnt = ERR_PTR(-ENOENT);
 	struct nfs_clone_mount mountdata = {
@@ -108,7 +108,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 
 	loc = 0;
 	while (loc < locations->nlocations && IS_ERR(mnt)) {
-		struct nfs4_fs_location *location = &locations->locations[loc];
+		const struct nfs4_fs_location *location = &locations->locations[loc];
 		char *mnt_path;
 
 		if (location == NULL || location->nservers <= 0 ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 803c31b..061be71 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3761,7 +3761,7 @@ static struct inode_operations nfs4_file_inode_operations = {
 	.listxattr	= nfs4_listxattr,
 };
 
-struct nfs_rpc_ops	nfs_v4_clientops = {
+const struct nfs_rpc_ops nfs_v4_clientops = {
 	.version	= 4,			/* protocol version */
 	.dentry_ops	= &nfs4_dentry_operations,
 	.dir_inode_ops	= &nfs4_dir_inode_operations,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b3899ea..7767690 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -671,7 +671,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 }
 
 
-struct nfs_rpc_ops	nfs_v2_clientops = {
+const struct nfs_rpc_ops nfs_v2_clientops = {
 	.version	= 2,		       /* protocol version */
 	.dentry_ops	= &nfs_dentry_operations,
 	.dir_inode_ops	= &nfs_dir_inode_operations,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a41d516..c97f309 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -329,10 +329,10 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
  */
 static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
-	static struct proc_nfs_info {
+	static const struct proc_nfs_info {
 		int flag;
-		char *str;
-		char *nostr;
+		const char *str;
+		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
@@ -342,9 +342,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ NFS_MOUNT_NOACL, ",noacl", "" },
 		{ 0, NULL, NULL }
 	};
-	struct proc_nfs_info *nfs_infop;
+	const struct proc_nfs_info *nfs_infop;
 	char buf[12];
-	char *proto;
+	const char *proto;
 
 	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
-- 
cgit v1.1


From 27951bd26031f6c27d38df9e94623bbe208a2464 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: NFS: Maintain a common server record for NFS2/3 as well as for NFS4

Maintain a common server record for NFS2/3 as well as for NFS4 so that common
stuff can be moved there from struct nfs_server.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c97f309..d1b4a5b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -658,11 +658,19 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
 static struct rpc_clnt *
 nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 {
+	struct nfs_client	*clp;
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 
+	clp = nfs_get_client(server->hostname, &server->addr,
+			     server->rpc_ops->version);
+	if (!clp) {
+		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
+		return ERR_PTR(PTR_ERR(clp));
+	}
+
 	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
 	server->retrans_timeo = timeparms.to_initval;
@@ -673,6 +681,8 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	if (IS_ERR(xprt)) {
 		dprintk("%s: cannot create RPC transport. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
+		nfs_mark_client_ready(clp, PTR_ERR(xprt));
+		nfs_put_client(clp);
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
@@ -686,9 +696,13 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	clnt->cl_intr     = 1;
 	clnt->cl_softrtry = 1;
 
+	nfs_mark_client_ready(clp, 0);
+	server->nfs_client = clp;
 	return clnt;
 
 out_fail:
+	nfs_mark_client_ready(clp, PTR_ERR(xprt));
+	nfs_put_client(clp);
 	return clnt;
 }
 
@@ -764,6 +778,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 	if (server == NULL)
 		goto out_err;
 	memcpy(server, parent, sizeof(*server));
+	atomic_inc(&server->nfs_client->cl_count);
 	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
 	len = strlen(hostname) + 1;
 	server->hostname = kmalloc(len, GFP_KERNEL);
@@ -796,6 +811,7 @@ out_deactivate:
 out_rpciod_down:
 	rpciod_down();
 	kfree(server->hostname);
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 	return simple_set_mnt(mnt, sb);
 kill_rpciod:
@@ -803,6 +819,7 @@ kill_rpciod:
 free_hostname:
 	kfree(server->hostname);
 free_server:
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 out_err:
 	return error;
@@ -1071,6 +1088,7 @@ static void nfs_kill_super(struct super_block *s)
 
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
+	nfs_put_client(server->nfs_client);
 	kfree(server);
 	nfs_release_automount_timer();
 }
@@ -1421,7 +1439,6 @@ static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_c
 	nfs4_server_capabilities(server, &server->fh);
 
 	down_write(&clp->cl_sem);
-	atomic_inc(&clp->cl_count);
 	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
 	up_write(&clp->cl_sem);
 	return sb;
@@ -1476,6 +1493,8 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf
 	retrans = 1;
 	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
 
+	nfs_put_client(server->nfs_client);
+	server->nfs_client = NULL;
 	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
 	if (IS_ERR((err = server->client)))
 		goto out_err;
-- 
cgit v1.1


From 1f163415dc05983830bcc47b33c155b2528b1574 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:11 -0400
Subject: NFS: Make better use of inode* dereferencing macros

Make better use of inode* dereferencing macros to hide dereferencing chains
(including NFS_PROTO and NFS_CLIENT).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c     |  2 +-
 fs/nfs/nfs4proc.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 48e8928..a146ed3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -111,7 +111,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 	lock_kernel();
-	res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
+	res = NFS_PROTO(inode)->file_open(inode, filp);
 	unlock_kernel();
 	return res;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 061be71..b731b19 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1268,7 +1268,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
@@ -1291,7 +1291,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
@@ -1565,7 +1565,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1927,7 +1927,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
@@ -2816,7 +2816,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 		return -EOPNOTSUPP;
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
-	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
+	ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (ret == 0)
 		nfs4_write_cached_acl(inode, buf, buflen);
 	return ret;
-- 
cgit v1.1


From 8fa5c000d7f986ef9cdc6d95f9f7fcee20e0a7d6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: NFS: Move rpc_ops from nfs_server to nfs_client

Move the rpc_ops from the nfs_server struct to the nfs_client struct as they're
common to all server records of a particular NFS protocol version.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c       |  2 +-
 fs/nfs/inode.c     |  4 ++--
 fs/nfs/namespace.c |  6 ++++--
 fs/nfs/nfs4proc.c  |  2 +-
 fs/nfs/super.c     | 59 ++++++++++++++++++++++++++++++------------------------
 5 files changed, 41 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 067d144..1936271 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1147,7 +1147,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
-		error = server->rpc_ops->getattr(server, fhandle, fattr);
+		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
 			goto out_err;
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6ed018c..771c3b8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -237,13 +237,13 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
-		inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+		inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->file_inode_ops;
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = &nfs_file_operations;
 			inode->i_data.a_ops = &nfs_file_aops;
 			inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
 		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
+			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 85d9ed1..d8b8d56 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -104,7 +104,9 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 		goto out_follow;
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
-	err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr);
+	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
+						  &nd->dentry->d_name,
+						  &fh, &fattr);
 	dput(parent);
 	if (err != 0)
 		goto out_err;
@@ -178,7 +180,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->rpc_ops->version) {
+	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
 			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b731b19..1573eeb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -758,7 +758,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	}
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
-		return server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
+		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d1b4a5b..e1e5eab 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -252,7 +252,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 	lock_kernel();
 
-	error = server->rpc_ops->statfs(server, fh, &res);
+	error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 	buf->f_type = NFS_SUPER_MAGIC;
 	if (error < 0)
 		goto out_err;
@@ -343,10 +343,11 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		{ 0, NULL, NULL }
 	};
 	const struct proc_nfs_info *nfs_infop;
+	struct nfs_client *clp = nfss->nfs_client;
 	char buf[12];
 	const char *proto;
 
-	seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
+	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
 	seq_printf(m, ",wsize=%d", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
@@ -427,7 +428,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->rpc_ops->version == 4) {
+	if (nfss->nfs_client->cl_nfsversion == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@ -503,7 +504,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
 	struct nfs_server	*server = NFS_SB(sb);
 	int			error;
 
-	error = server->rpc_ops->getroot(server, rootfh, fsinfo);
+	error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo);
 	if (error < 0) {
 		dprintk("nfs_get_root: getattr error = %d\n", -error);
 		return ERR_PTR(error);
@@ -553,14 +554,14 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 		no_root_error = -ENOMEM;
 		goto out_no_root;
 	}
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
 	/* mount time stamp, in seconds */
 	server->mount_time = jiffies;
 
 	/* Get some general file system info */
 	if (server->namelen == 0 &&
-	    server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+	    server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
 		server->namelen = pathinfo.max_namelen;
 	/* Work out a lot of parameters */
 	if (server->rsize == 0)
@@ -663,9 +664,14 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int			nfsversion = 2;
 
-	clp = nfs_get_client(server->hostname, &server->addr,
-			     server->rpc_ops->version);
+#ifdef CONFIG_NFS_V3
+	if (server->flags & NFS_MOUNT_VER3)
+		nfsversion = 3;
+#endif
+
+	clp = nfs_get_client(server->hostname, &server->addr, nfsversion);
 	if (!clp) {
 		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
 		return ERR_PTR(PTR_ERR(clp));
@@ -676,6 +682,19 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	server->retrans_timeo = timeparms.to_initval;
 	server->retrans_count = timeparms.to_retries;
 
+	/* Check NFS protocol revision and initialize RPC op vector
+	 * and file handle pool. */
+#ifdef CONFIG_NFS_V3
+	if (nfsversion == 3) {
+		clp->rpc_ops = &nfs_v3_clientops;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		clp->rpc_ops = &nfs_v2_clientops;
+	}
+#else
+	clp->rpc_ops = &nfs_v2_clientops;
+#endif
+
 	/* create transport and client */
 	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
@@ -686,7 +705,7 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 		return (struct rpc_clnt *)xprt;
 	}
 	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 server->rpc_ops->version, data->pseudoflavor);
+				 clp->cl_nfsversion, data->pseudoflavor);
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
@@ -750,7 +769,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	fsinfo.fattr = data->fattr;
 	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
 		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->rpc_ops->dentry_ops;
+	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
 	sb->s_flags |= MS_ACTIVE;
 	return server;
 out_put_root:
@@ -865,19 +884,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return -ENOMEM;
 	strcpy(server->hostname, data->hostname);
 
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3) {
-		server->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		server->rpc_ops = &nfs_v2_clientops;
-	}
-#else
-	server->rpc_ops = &nfs_v2_clientops;
-#endif
-
 	/* Fill in pseudoflavor for mount version < 5 */
 	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
 		data->pseudoflavor = RPC_AUTH_UNIX;
@@ -888,6 +894,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 	server->client = nfs_create_client(server, data);
 	if (IS_ERR(server->client))
 		return PTR_ERR(server->client);
+
 	/* RFC 2623, sec 2.3.2 */
 	if (authflavor != RPC_AUTH_UNIX) {
 		struct rpc_auth *auth;
@@ -1129,6 +1136,8 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 
 	/* Now create transport and client */
 	if (clp->cl_cons_state == NFS_CS_INITING) {
+		clp->rpc_ops = &nfs_v4_clientops;
+
 		xprt = xprt_create_proto(proto, &server->addr, timeparms);
 		if (IS_ERR(xprt)) {
 			err = PTR_ERR(xprt);
@@ -1139,7 +1148,7 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 		/* Bind to a reserved port! */
 		xprt->resvport = 1;
 		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				server->rpc_ops->version, flavor);
+				clp->cl_nfsversion, flavor);
 		if (IS_ERR(clnt)) {
 			err = PTR_ERR(clnt);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1215,8 +1224,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 
-	server->rpc_ops = &nfs_v4_clientops;
-
 	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
 	server->retrans_timeo = timeparms.to_initval;
-- 
cgit v1.1


From 5006a76cca8f86c6975c16fcf67e83b8b0eee2b6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: NFS: Eliminate client_sys in favour of cl_rpcclient

Eliminate nfs_server::client_sys in favour of nfs_client::cl_rpcclient as we
only really need one per server that we're talking to since it doesn't have any
security on it.

The retransmission management variables are also moved to the common struct as
they're required to set up the cl_rpcclient connection.

The NFS2/3 client and client_acl connections are thenceforth derived by cloning
the cl_rpcclient connection and post-applying the authorisation flavour.

The code for setting up the initial common connection has been moved to
client.c as nfs_create_rpc_client().  All the NFS program definition tables are
also moved there as that's where they're now required rather than super.c.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c   | 119 +++++++++++++++++++++++++++++
 fs/nfs/internal.h |   2 +
 fs/nfs/nfs3proc.c |   6 +-
 fs/nfs/proc.c     |   4 +-
 fs/nfs/super.c    | 222 +++++++++++++-----------------------------------------
 5 files changed, 177 insertions(+), 176 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index cb5e924..c08cab9 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -51,6 +51,48 @@ static LIST_HEAD(nfs_client_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 
 /*
+ * RPC cruft for NFS
+ */
+static struct rpc_version *nfs_version[5] = {
+	[2]			= &nfs_version2,
+#ifdef CONFIG_NFS_V3
+	[3]			= &nfs_version3,
+#endif
+#ifdef CONFIG_NFS_V4
+	[4]			= &nfs_version4,
+#endif
+};
+
+struct rpc_program nfs_program = {
+	.name			= "nfs",
+	.number			= NFS_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfs_version),
+	.version		= nfs_version,
+	.stats			= &nfs_rpcstat,
+	.pipe_dir_name		= "/nfs",
+};
+
+struct rpc_stat nfs_rpcstat = {
+	.program		= &nfs_program
+};
+
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
+static struct rpc_version *	nfsacl_version[] = {
+	[3]			= &nfsacl_version3,
+};
+
+struct rpc_program		nfsacl_program = {
+	.name			= "nfsacl",
+	.number			= NFS_ACL_PROGRAM,
+	.nrvers			= ARRAY_SIZE(nfsacl_version),
+	.version		= nfsacl_version,
+	.stats			= &nfsacl_rpcstat,
+};
+#endif  /* CONFIG_NFS_V3_ACL */
+
+/*
  * Allocate a shared client record
  *
  * Since these are allocated/deallocated very rarely, we don't
@@ -310,3 +352,80 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state)
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
 }
+
+/*
+ * Initialise the timeout values for a connection
+ */
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+				    unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
+/*
+ * Create an RPC client handle
+ */
+int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+			  unsigned int timeo,
+			  unsigned int retrans,
+			  rpc_authflavor_t flavor)
+{
+	struct rpc_timeout	timeparms;
+	struct rpc_xprt		*xprt = NULL;
+	struct rpc_clnt		*clnt = NULL;
+
+	if (!IS_ERR(clp->cl_rpcclient))
+		return 0;
+
+	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
+	clp->retrans_timeo = timeparms.to_initval;
+	clp->retrans_count = timeparms.to_retries;
+
+	/* create transport and client */
+	xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms);
+	if (IS_ERR(xprt)) {
+		dprintk("%s: cannot create RPC transport. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(xprt));
+		return PTR_ERR(xprt);
+	}
+
+	/* Bind to a reserved port! */
+	xprt->resvport = 1;
+	/* Create the client RPC handle */
+	clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program,
+				 clp->rpc_ops->version, RPC_AUTH_UNIX);
+	if (IS_ERR(clnt)) {
+		dprintk("%s: cannot create RPC client. Error = %ld\n",
+				__FUNCTION__, PTR_ERR(clnt));
+		return PTR_ERR(clnt);
+	}
+
+	clnt->cl_intr     = 1;
+	clnt->cl_softrtry = 1;
+	clp->cl_rpcclient = clnt;
+	return 0;
+}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ac370d5..2f3aa52 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -20,6 +20,8 @@ extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
 extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
 extern void nfs_mark_client_ready(struct nfs_client *, int);
+extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int,
+				 unsigned int, rpc_authflavor_t);
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3e53712..0622af0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -90,8 +90,8 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	status = do_proc_get_root(server->client, fhandle, info);
-	if (status && server->client_sys != server->client)
-		status = do_proc_get_root(server->client_sys, fhandle, info);
+	if (status && server->nfs_client->cl_rpcclient != server->client)
+		status = do_proc_get_root(server->nfs_client->cl_rpcclient, fhandle, info);
 	return status;
 }
 
@@ -785,7 +785,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("NFS call  fsinfo\n");
 	nfs_fattr_init(info->fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7767690..5a8b940 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -66,14 +66,14 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	nfs_fattr_init(fattr);
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
 	msg.rpc_resp = &fsinfo;
-	status = rpc_call_sync(server->client_sys, &msg, 0);
+	status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
 	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e1e5eab..8558341 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -60,52 +60,6 @@
  */
 #define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
 
-/*
- * RPC cruft for NFS
- */
-static struct rpc_version * nfs_version[] = {
-	NULL,
-	NULL,
-	&nfs_version2,
-#if defined(CONFIG_NFS_V3)
-	&nfs_version3,
-#elif defined(CONFIG_NFS_V4)
-	NULL,
-#endif
-#if defined(CONFIG_NFS_V4)
-	&nfs_version4,
-#endif
-};
-
-static struct rpc_program nfs_program = {
-	.name			= "nfs",
-	.number			= NFS_PROGRAM,
-	.nrvers			= ARRAY_SIZE(nfs_version),
-	.version		= nfs_version,
-	.stats			= &nfs_rpcstat,
-	.pipe_dir_name		= "/nfs",
-};
-
-struct rpc_stat nfs_rpcstat = {
-	.program		= &nfs_program
-};
-
-
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat		nfsacl_rpcstat = { &nfsacl_program };
-static struct rpc_version *	nfsacl_version[] = {
-	[3]			= &nfsacl_version3,
-};
-
-struct rpc_program		nfsacl_program = {
-	.name =			"nfsacl",
-	.number =		NFS_ACL_PROGRAM,
-	.nrvers =		ARRAY_SIZE(nfsacl_version),
-	.version =		nfsacl_version,
-	.stats =		&nfsacl_rpcstat,
-};
-#endif  /* CONFIG_NFS_V3_ACL */
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
@@ -376,8 +330,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 			proto = buf;
 	}
 	seq_printf(m, ",proto=%s", proto);
-	seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", nfss->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
+	seq_printf(m, ",retrans=%u", clp->retrans_count);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
@@ -622,49 +576,16 @@ out_no_root:
 }
 
 /*
- * Initialise the timeout values for a connection
- */
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
-{
-	to->to_initval = timeo * HZ / 10;
-	to->to_retries = retrans;
-	if (!to->to_retries)
-		to->to_retries = 2;
-
-	switch (proto) {
-	case IPPROTO_TCP:
-		if (!to->to_initval)
-			to->to_initval = 60 * HZ;
-		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
-			to->to_initval = NFS_MAX_TCP_TIMEOUT;
-		to->to_increment = to->to_initval;
-		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
-		to->to_exponential = 0;
-		break;
-	case IPPROTO_UDP:
-	default:
-		if (!to->to_initval)
-			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
-			to->to_initval = NFS_MAX_UDP_TIMEOUT;
-		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
-		to->to_exponential = 1;
-		break;
-	}
-}
-
-/*
  * Create an RPC client handle.
  */
 static struct rpc_clnt *
 nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 {
 	struct nfs_client	*clp;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
-	struct rpc_clnt		*clnt = NULL;
+	struct rpc_clnt		*clnt;
 	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 	int			nfsversion = 2;
+	int			err;
 
 #ifdef CONFIG_NFS_V3
 	if (server->flags & NFS_MOUNT_VER3)
@@ -677,52 +598,54 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 		return ERR_PTR(PTR_ERR(clp));
 	}
 
-	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
-	/* Check NFS protocol revision and initialize RPC op vector
-	 * and file handle pool. */
+	if (clp->cl_cons_state == NFS_CS_INITING) {
+		/* Check NFS protocol revision and initialize RPC op
+		 * vector and file handle pool. */
 #ifdef CONFIG_NFS_V3
-	if (nfsversion == 3) {
-		clp->rpc_ops = &nfs_v3_clientops;
-		server->caps |= NFS_CAP_READDIRPLUS;
-	} else {
-		clp->rpc_ops = &nfs_v2_clientops;
-	}
+		if (nfsversion == 3) {
+			clp->rpc_ops = &nfs_v3_clientops;
+			server->caps |= NFS_CAP_READDIRPLUS;
+		} else {
+			clp->rpc_ops = &nfs_v2_clientops;
+		}
 #else
-	clp->rpc_ops = &nfs_v2_clientops;
+		clp->rpc_ops = &nfs_v2_clientops;
 #endif
 
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		nfs_mark_client_ready(clp, PTR_ERR(xprt));
-		nfs_put_client(clp);
-		return (struct rpc_clnt *)xprt;
+		/* create transport and client */
+		err = nfs_create_rpc_client(clp, proto, data->timeo,
+					    data->retrans, RPC_AUTH_UNIX);
+		if (err < 0)
+			goto client_init_error;
+
+		nfs_mark_client_ready(clp, 0);
 	}
-	clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				 clp->cl_nfsversion, data->pseudoflavor);
+
+	/* create an nfs_server-specific client */
+	clnt = rpc_clone_client(clp->cl_rpcclient);
 	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		goto out_fail;
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		nfs_put_client(clp);
+		return ERR_PTR(PTR_ERR(clnt));
 	}
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
+	if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(data->pseudoflavor, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return ERR_PTR(PTR_ERR(auth));
+		}
+	}
 
-	nfs_mark_client_ready(clp, 0);
 	server->nfs_client = clp;
 	return clnt;
 
-out_fail:
-	nfs_mark_client_ready(clp, PTR_ERR(xprt));
+client_init_error:
+	nfs_mark_client_ready(clp, err);
 	nfs_put_client(clp);
-	return clnt;
+	return ERR_PTR(err);
 }
 
 /*
@@ -741,7 +664,7 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
 	sb->s_maxbytes = data->sb->s_maxbytes;
 
-	server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client_acl = ERR_PTR(-EINVAL);
 	server->io_stats = nfs_alloc_iostats();
 	if (server->io_stats == NULL)
 		goto out;
@@ -750,11 +673,6 @@ static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_cl
 	if (IS_ERR((err = server->client)))
 		goto out;
 
-	if (!IS_ERR(parent->client_sys)) {
-		server->client_sys = rpc_clone_client(parent->client_sys);
-		if (IS_ERR((err = server->client_sys)))
-			goto out;
-	}
 	if (!IS_ERR(parent->client_acl)) {
 		server->client_acl = rpc_clone_client(parent->client_acl);
 		if (IS_ERR((err = server->client_acl)))
@@ -813,7 +731,7 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 		error = PTR_ERR(sb);
 		goto kill_rpciod;
 	}
-		
+
 	if (sb->s_root)
 		goto out_rpciod_down;
 
@@ -896,19 +814,6 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
 		return PTR_ERR(server->client);
 
 	/* RFC 2623, sec 2.3.2 */
-	if (authflavor != RPC_AUTH_UNIX) {
-		struct rpc_auth *auth;
-
-		server->client_sys = rpc_clone_client(server->client);
-		if (IS_ERR(server->client_sys))
-			return PTR_ERR(server->client_sys);
-		auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys);
-		if (IS_ERR(auth))
-			return PTR_ERR(auth);
-	} else {
-		atomic_inc(&server->client->cl_count);
-		server->client_sys = server->client;
-	}
 	if (server->flags & NFS_MOUNT_VER3) {
 #ifdef CONFIG_NFS_V3_ACL
 		if (!(server->flags & NFS_MOUNT_NOACL)) {
@@ -1012,7 +917,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		goto out_err_noserver;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	root = &server->fh;
 	if (data->flags & NFS_MOUNT_VER3)
@@ -1083,8 +988,6 @@ static void nfs_kill_super(struct super_block *s)
 
 	if (!IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_sys))
-		rpc_shutdown_client(server->client_sys);
 	if (!IS_ERR(server->client_acl))
 		rpc_shutdown_client(server->client_acl);
 
@@ -1121,10 +1024,9 @@ static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
 
 #ifdef CONFIG_NFS_V4
 static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	struct rpc_timeout *timeparms, int proto, rpc_authflavor_t flavor)
+	int timeo, int retrans, int proto, rpc_authflavor_t flavor)
 {
 	struct nfs_client *clp;
-	struct rpc_xprt *xprt = NULL;
 	struct rpc_clnt *clnt = NULL;
 	int err = -EIO;
 
@@ -1138,26 +1040,10 @@ static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
 	if (clp->cl_cons_state == NFS_CS_INITING) {
 		clp->rpc_ops = &nfs_v4_clientops;
 
-		xprt = xprt_create_proto(proto, &server->addr, timeparms);
-		if (IS_ERR(xprt)) {
-			err = PTR_ERR(xprt);
-			dprintk("%s: cannot create RPC transport. Error = %d\n",
-					__FUNCTION__, err);
+		err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor);
+		if (err < 0)
 			goto client_init_error;
-		}
-		/* Bind to a reserved port! */
-		xprt->resvport = 1;
-		clnt = rpc_create_client(xprt, server->hostname, &nfs_program,
-				clp->cl_nfsversion, flavor);
-		if (IS_ERR(clnt)) {
-			err = PTR_ERR(clnt);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto client_init_error;
-		}
-		clnt->cl_intr     = 1;
-		clnt->cl_softrtry = 1;
-		clp->cl_rpcclient = clnt;
+
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
 		err = nfs_idmap_new(clp);
 		if (err < 0) {
@@ -1205,7 +1091,6 @@ client_init_error:
 static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
 {
 	struct nfs_server *server;
-	struct rpc_timeout timeparms;
 	rpc_authflavor_t authflavour;
 	int err = -EIO;
 
@@ -1224,11 +1109,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmin = data->acdirmin*HZ;
 	server->acdirmax = data->acdirmax*HZ;
 
-	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
-
-	server->retrans_timeo = timeparms.to_initval;
-	server->retrans_count = timeparms.to_retries;
-
 	/* Now create transport and client */
 	authflavour = RPC_AUTH_UNIX;
 	if (data->auth_flavourlen != 0) {
@@ -1244,7 +1124,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		}
 	}
 
-	server->client = nfs4_create_client(server, &timeparms, data->proto, authflavour);
+	server->client = nfs4_create_client(server, data->timeo, data->retrans,
+					    data->proto, authflavour);
 	if (IS_ERR(server->client)) {
 		err = PTR_ERR(server->client);
 			dprintk("%s: cannot create RPC client. Error = %d\n",
@@ -1318,7 +1199,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		return -ENOMEM;
 	/* Zero out the NFS state stuff */
 	init_nfsv4_state(server);
-	server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
@@ -1489,7 +1370,6 @@ err:
 static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct rpc_timeout timeparms;
 	int proto, timeo, retrans;
 	void *err;
 
@@ -1498,11 +1378,11 @@ static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nf
 	   set the timeouts and retries to low values */
 	timeo = 2;
 	retrans = 1;
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
 
 	nfs_put_client(server->nfs_client);
 	server->nfs_client = NULL;
-	server->client = nfs4_create_client(server, &timeparms, proto, data->authflavor);
+	server->client = nfs4_create_client(server, timeo, retrans, proto,
+					    data->authflavor);
 	if (IS_ERR((err = server->client)))
 		goto out_err;
 
-- 
cgit v1.1


From cf6d7b5de8535a9f0088c5cc28ee2dae87371b4a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:12 -0400
Subject: NFS: Start rpciod in server common management

Start rpciod in the server common (nfs_client struct) management code rather
than in the superblock management code.  This means we only need to "start" it
once per server instead of once per superblock.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8558341..5842d51 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -722,18 +722,15 @@ static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
 	if (server->hostname == NULL)
 		goto free_server;
 	memcpy(server->hostname, hostname, len);
-	error = rpciod_up();
-	if (error != 0)
-		goto free_hostname;
 
 	sb = fill_sb(server, data);
 	if (IS_ERR(sb)) {
 		error = PTR_ERR(sb);
-		goto kill_rpciod;
+		goto free_hostname;
 	}
 
 	if (sb->s_root)
-		goto out_rpciod_down;
+		goto out_share;
 
 	server = fill_server(sb, data);
 	if (IS_ERR(server)) {
@@ -745,14 +742,11 @@ out_deactivate:
 	up_write(&sb->s_umount);
 	deactivate_super(sb);
 	return error;
-out_rpciod_down:
-	rpciod_down();
+out_share:
 	kfree(server->hostname);
 	nfs_put_client(server->nfs_client);
 	kfree(server);
 	return simple_set_mnt(mnt, sb);
-kill_rpciod:
-	rpciod_down();
 free_hostname:
 	kfree(server->hostname);
 free_server:
@@ -939,22 +933,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 		goto out_err;
 	}
 
-	/* Fire up rpciod if not yet running */
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto out_err;
-	}
-
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err_rpciod;
+		goto out_err;
 	}
 
 	if (s->s_root)
-		goto out_rpciod_down;
+		goto out_share;
 
 	s->s_flags = flags;
 
@@ -967,13 +953,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	s->s_flags |= MS_ACTIVE;
 	return simple_set_mnt(mnt, s);
 
-out_rpciod_down:
-	rpciod_down();
+out_share:
 	kfree(server);
 	return simple_set_mnt(mnt, s);
 
-out_err_rpciod:
-	rpciod_down();
 out_err:
 	kfree(server);
 out_err_noserver:
@@ -994,8 +977,6 @@ static void nfs_kill_super(struct super_block *s)
 	if (!(server->flags & NFS_MOUNT_NONLM))
 		lockd_down();	/* release rpc.lockd */
 
-	rpciod_down();		/* release rpciod */
-
 	nfs_free_iostats(server->io_stats);
 	kfree(server->hostname);
 	nfs_put_client(server->nfs_client);
-- 
cgit v1.1


From 54ceac4515986030c2502960be620198dd8fe25b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:13 -0400
Subject: NFS: Share NFS superblocks per-protocol per-server per-FSID

The attached patch makes NFS share superblocks between mounts from the same
server and FSID over the same protocol.

It does this by creating each superblock with a false root and returning the
real root dentry in the vfsmount presented by get_sb(). The root dentry set
starts off as an anonymous dentry if we don't already have the dentry for its
inode, otherwise it simply returns the dentry we already have.

We may thus end up with several trees of dentries in the superblock, and if at
some later point one of anonymous tree roots is discovered by normal filesystem
activity to be located in another tree within the superblock, the anonymous
root is named and materialises attached to the second tree at the appropriate
point.

Why do it this way? Why not pass an extra argument to the mount() syscall to
indicate the subpath and then pathwalk from the server root to the desired
directory? You can't guarantee this will work for two reasons:

 (1) The root and intervening nodes may not be accessible to the client.

     With NFS2 and NFS3, for instance, mountd is called on the server to get
     the filehandle for the tip of a path. mountd won't give us handles for
     anything we don't have permission to access, and so we can't set up NFS
     inodes for such nodes, and so can't easily set up dentries (we'd have to
     have ghost inodes or something).

     With this patch we don't actually create dentries until we get handles
     from the server that we can use to set up their inodes, and we don't
     actually bind them into the tree until we know for sure where they go.

 (2) Inaccessible symbolic links.

     If we're asked to mount two exports from the server, eg:

	mount warthog:/warthog/aaa/xxx /mmm
	mount warthog:/warthog/bbb/yyy /nnn

     We may not be able to access anything nearer the root than xxx and yyy,
     but we may find out later that /mmm/www/yyy, say, is actually the same
     directory as the one mounted on /nnn. What we might then find out, for
     example, is that /warthog/bbb was actually a symbolic link to
     /warthog/aaa/xxx/www, but we can't actually determine that by talking to
     the server until /warthog is made available by NFS.

     This would lead to having constructed an errneous dentry tree which we
     can't easily fix. We can end up with a dentry marked as a directory when
     it should actually be a symlink, or we could end up with an apparently
     hardlinked directory.

     With this patch we need not make assumptions about the type of a dentry
     for which we can't retrieve information, nor need we assume we know its
     place in the grand scheme of things until we actually see that place.

This patch reduces the possibility of aliasing in the inode and page caches for
inodes that may be accessed by more than one NFS export. It also reduces the
number of superblocks required for NFS where there are many NFS exports being
used from a server (home directory server + autofs for example).

This in turn makes it simpler to do local caching of network filesystems, as it
can then be guaranteed that there won't be links from multiple inodes in
separate superblocks to the same cache file.

Obviously, cache aliasing between different levels of NFS protocol could still
be a problem, but at least that gives us another key to use when indexing the
cache.

This patch makes the following changes:

 (1) The server record construction/destruction has been abstracted out into
     its own set of functions to make things easier to get right.  These have
     been moved into fs/nfs/client.c.

     All the code in fs/nfs/client.c has to do with the management of
     connections to servers, and doesn't touch superblocks in any way; the
     remaining code in fs/nfs/super.c has to do with VFS superblock management.

 (2) The sequence of events undertaken by NFS mount is now reordered:

     (a) A volume representation (struct nfs_server) is allocated.

     (b) A server representation (struct nfs_client) is acquired.  This may be
     	 allocated or shared, and is keyed on server address, port and NFS
     	 version.

     (c) If allocated, the client representation is initialised.  The state
     	 member variable of nfs_client is used to prevent a race during
     	 initialisation from two mounts.

     (d) For NFS4 a simple pathwalk is performed, walking from FH to FH to find
     	 the root filehandle for the mount (fs/nfs/getroot.c).  For NFS2/3 we
     	 are given the root FH in advance.

     (e) The volume FSID is probed for on the root FH.

     (f) The volume representation is initialised from the FSINFO record
     	 retrieved on the root FH.

     (g) sget() is called to acquire a superblock.  This may be allocated or
     	 shared, keyed on client pointer and FSID.

     (h) If allocated, the superblock is initialised.

     (i) If the superblock is shared, then the new nfs_server record is
     	 discarded.

     (j) The root dentry for this mount is looked up from the root FH.

     (k) The root dentry for this mount is assigned to the vfsmount.

 (3) nfs_readdir_lookup() creates dentries for each of the entries readdir()
     returns; this function now attaches disconnected trees from alternate
     roots that happen to be discovered attached to a directory being read (in
     the same way nfs_lookup() is made to do for lookup ops).

     The new d_materialise_unique() function is now used to do this, thus
     permitting the whole thing to be done under one set of locks, and thus
     avoiding any race between mount and lookup operations on the same
     directory.

 (4) The client management code uses a new debug facility: NFSDBG_CLIENT which
     is set by echoing 1024 to /proc/net/sunrpc/nfs_debug.

 (5) Clone mounts are now called xdev mounts.

 (6) Use the dentry passed to the statfs() op as the handle for retrieving fs
     statistics rather than the root dentry of the superblock (which is now a
     dummy).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/Makefile        |    2 +-
 fs/nfs/client.c        |  735 ++++++++++++++++++++++++++++-
 fs/nfs/dir.c           |   16 +-
 fs/nfs/getroot.c       |  306 ++++++++++++
 fs/nfs/idmap.c         |    3 +-
 fs/nfs/inode.c         |    2 +-
 fs/nfs/internal.h      |   82 ++--
 fs/nfs/namespace.c     |   25 +-
 fs/nfs/nfs3proc.c      |    2 +-
 fs/nfs/nfs4_fs.h       |    6 -
 fs/nfs/nfs4namespace.c |  110 ++++-
 fs/nfs/nfs4proc.c      |   59 +--
 fs/nfs/nfs4renewd.c    |   13 -
 fs/nfs/nfs4state.c     |   18 -
 fs/nfs/read.c          |    2 +-
 fs/nfs/super.c         | 1207 ++++++++++++++++++------------------------------
 fs/nfs/write.c         |    2 +-
 17 files changed, 1646 insertions(+), 944 deletions(-)
 create mode 100644 fs/nfs/getroot.c

(limited to 'fs')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 3b993a6..f4580b4 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_NFS_FS) += nfs.o
 
-nfs-y 			:= client.o dir.o file.o inode.o super.o nfs2xdr.o \
+nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
 			   pagelist.o proc.o read.o symlink.o unlink.o \
 			   write.o namespace.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c08cab9..dafba60 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
 
 static DEFINE_SPINLOCK(nfs_client_lock);
 static LIST_HEAD(nfs_client_list);
+static LIST_HEAD(nfs_volume_list);
 static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
 
 /*
@@ -268,9 +269,9 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
  * Look up a client by IP address and protocol version
  * - creates a new record if one doesn't yet exist
  */
-struct nfs_client *nfs_get_client(const char *hostname,
-				  const struct sockaddr_in *addr,
-				  int nfsversion)
+static struct nfs_client *nfs_get_client(const char *hostname,
+					 const struct sockaddr_in *addr,
+					 int nfsversion)
 {
 	struct nfs_client *clp, *new = NULL;
 	int error;
@@ -340,6 +341,8 @@ found_client:
 		return ERR_PTR(error);
 	}
 
+	BUG_ON(clp->cl_cons_state != NFS_CS_READY);
+
 	dprintk("--> nfs_get_client() = %p [share]\n", clp);
 	return clp;
 }
@@ -347,7 +350,7 @@ found_client:
 /*
  * Mark a server as ready or failed
  */
-void nfs_mark_client_ready(struct nfs_client *clp, int state)
+static void nfs_mark_client_ready(struct nfs_client *clp, int state)
 {
 	clp->cl_cons_state = state;
 	wake_up_all(&nfs_client_active_wq);
@@ -389,10 +392,10 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 /*
  * Create an RPC client handle
  */
-int nfs_create_rpc_client(struct nfs_client *clp, int proto,
-			  unsigned int timeo,
-			  unsigned int retrans,
-			  rpc_authflavor_t flavor)
+static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+						unsigned int timeo,
+						unsigned int retrans,
+						rpc_authflavor_t flavor)
 {
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
@@ -429,3 +432,719 @@ int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	clp->cl_rpcclient = clnt;
 	return 0;
 }
+
+/*
+ * Version 2 or 3 client destruction
+ */
+static void nfs_destroy_server(struct nfs_server *server)
+{
+	if (!IS_ERR(server->client_acl))
+		rpc_shutdown_client(server->client_acl);
+
+	if (!(server->flags & NFS_MOUNT_NONLM))
+		lockd_down();	/* release rpc.lockd */
+}
+
+/*
+ * Version 2 or 3 lockd setup
+ */
+static int nfs_start_lockd(struct nfs_server *server)
+{
+	int error = 0;
+
+	if (server->nfs_client->cl_nfsversion > 3)
+		goto out;
+	if (server->flags & NFS_MOUNT_NONLM)
+		goto out;
+	error = lockd_up();
+	if (error < 0)
+		server->flags |= NFS_MOUNT_NONLM;
+	else
+		server->destroy = nfs_destroy_server;
+out:
+	return error;
+}
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+#ifdef CONFIG_NFS_V3_ACL
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	if (server->nfs_client->cl_nfsversion != 3)
+		goto out_noacl;
+	if (server->flags & NFS_MOUNT_NOACL)
+		goto out_noacl;
+
+	server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+	if (IS_ERR(server->client_acl))
+		goto out_noacl;
+
+	/* No errors! Assume that Sun nfsacls are supported */
+	server->caps |= NFS_CAP_ACLS;
+	return;
+
+out_noacl:
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+	server->flags &= ~NFS_MOUNT_NOACL;
+	server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+/*
+ * Create a general RPC client
+ */
+static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+{
+	struct nfs_client *clp = server->nfs_client;
+
+	server->client = rpc_clone_client(clp->cl_rpcclient);
+	if (IS_ERR(server->client)) {
+		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
+		return PTR_ERR(server->client);
+	}
+
+	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
+		struct rpc_auth *auth;
+
+		auth = rpcauth_create(pseudoflavour, server->client);
+		if (IS_ERR(auth)) {
+			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
+			return PTR_ERR(auth);
+		}
+	}
+	server->client->cl_softrtry = 0;
+	if (server->flags & NFS_MOUNT_SOFT)
+		server->client->cl_softrtry = 1;
+
+	server->client->cl_intr = 0;
+	if (server->flags & NFS4_MOUNT_INTR)
+		server->client->cl_intr = 1;
+
+	return 0;
+}
+
+/*
+ * Initialise an NFS2 or NFS3 client
+ */
+static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data)
+{
+	int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is already initialised */
+		dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v2_clientops;
+#ifdef CONFIG_NFS_V3
+	if (clp->cl_nfsversion == 3)
+		clp->rpc_ops = &nfs_v3_clientops;
+#endif
+	/*
+	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
+	 * - RFC 2623, sec 2.3.2
+	 */
+	error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
+			RPC_AUTH_UNIX);
+	if (error < 0)
+		goto error;
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 2 or 3 client
+ */
+static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data)
+{
+	struct nfs_client *clp;
+	int error, nfsvers = 2;
+
+	dprintk("--> nfs_init_server()\n");
+
+#ifdef CONFIG_NFS_V3
+	if (data->flags & NFS_MOUNT_VER3)
+		nfsvers = 3;
+#endif
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(data->hostname, &data->addr, nfsvers);
+	if (IS_ERR(clp)) {
+		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
+		return PTR_ERR(clp);
+	}
+
+	error = nfs_init_client(clp, data);
+	if (error < 0)
+		goto error;
+
+	server->nfs_client = clp;
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	/* Start lockd here, before we might error out */
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto error;
+
+	error = nfs_init_server_rpcclient(server, data->pseudoflavor);
+	if (error < 0)
+		goto error;
+
+	server->namelen  = data->namlen;
+	/* Create a client RPC handle for the NFSv3 ACL management interface */
+	nfs_init_server_aclclient(server);
+	if (clp->cl_nfsversion == 3) {
+		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
+			server->namelen = NFS3_MAXNAMLEN;
+		server->caps |= NFS_CAP_READDIRPLUS;
+	} else {
+		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
+			server->namelen = NFS2_MAXNAMLEN;
+	}
+
+	dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
+	return 0;
+
+error:
+	server->nfs_client = NULL;
+	nfs_put_client(clp);
+	dprintk("<-- nfs_init_server() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Load up the server record from information gained in an fsinfo record
+ */
+static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *fsinfo)
+{
+	unsigned long max_rpc_payload;
+
+	/* Work out a lot of parameters */
+	if (server->rsize == 0)
+		server->rsize = nfs_block_size(fsinfo->rtpref, NULL);
+	if (server->wsize == 0)
+		server->wsize = nfs_block_size(fsinfo->wtpref, NULL);
+
+	if (fsinfo->rtmax >= 512 && server->rsize > fsinfo->rtmax)
+		server->rsize = nfs_block_size(fsinfo->rtmax, NULL);
+	if (fsinfo->wtmax >= 512 && server->wsize > fsinfo->wtmax)
+		server->wsize = nfs_block_size(fsinfo->wtmax, NULL);
+
+	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
+	if (server->rsize > max_rpc_payload)
+		server->rsize = max_rpc_payload;
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
+	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
+	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
+
+	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
+	if (server->dtsize > PAGE_CACHE_SIZE)
+		server->dtsize = PAGE_CACHE_SIZE;
+	if (server->dtsize > server->rsize)
+		server->dtsize = server->rsize;
+
+	if (server->flags & NFS_MOUNT_NOAC) {
+		server->acregmin = server->acregmax = 0;
+		server->acdirmin = server->acdirmax = 0;
+	}
+
+	server->maxfilesize = fsinfo->maxfilesize;
+
+	/* We're airborne Set socket buffersize */
+	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+}
+
+/*
+ * Probe filesystem information, including the FSID on v2/v3
+ */
+static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_client *clp = server->nfs_client;
+	int error;
+
+	dprintk("--> nfs_probe_fsinfo()\n");
+
+	if (clp->rpc_ops->set_capabilities != NULL) {
+		error = clp->rpc_ops->set_capabilities(server, mntfh);
+		if (error < 0)
+			goto out_error;
+	}
+
+	fsinfo.fattr = fattr;
+	nfs_fattr_init(fattr);
+	error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
+	if (error < 0)
+		goto out_error;
+
+	nfs_server_set_fsinfo(server, &fsinfo);
+
+	/* Get some general file system info */
+	if (server->namelen == 0) {
+		struct nfs_pathconf pathinfo;
+
+		pathinfo.fattr = fattr;
+		nfs_fattr_init(fattr);
+
+		if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0)
+			server->namelen = pathinfo.max_namelen;
+	}
+
+	dprintk("<-- nfs_probe_fsinfo() = 0\n");
+	return 0;
+
+out_error:
+	dprintk("nfs_probe_fsinfo: error = %d\n", -error);
+	return error;
+}
+
+/*
+ * Copy useful information when duplicating a server record
+ */
+static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+{
+	target->flags = source->flags;
+	target->acregmin = source->acregmin;
+	target->acregmax = source->acregmax;
+	target->acdirmin = source->acdirmin;
+	target->acdirmax = source->acdirmax;
+	target->caps = source->caps;
+}
+
+/*
+ * Allocate and initialise a server record
+ */
+static struct nfs_server *nfs_alloc_server(void)
+{
+	struct nfs_server *server;
+
+	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
+	if (!server)
+		return NULL;
+
+	server->client = server->client_acl = ERR_PTR(-EINVAL);
+
+	/* Zero out the NFS state stuff */
+	INIT_LIST_HEAD(&server->client_link);
+	INIT_LIST_HEAD(&server->master_link);
+
+	server->io_stats = nfs_alloc_iostats();
+	if (!server->io_stats) {
+		kfree(server);
+		return NULL;
+	}
+
+	return server;
+}
+
+/*
+ * Free up a server record
+ */
+void nfs_free_server(struct nfs_server *server)
+{
+	dprintk("--> nfs_free_server()\n");
+
+	spin_lock(&nfs_client_lock);
+	list_del(&server->client_link);
+	list_del(&server->master_link);
+	spin_unlock(&nfs_client_lock);
+
+	if (server->destroy != NULL)
+		server->destroy(server);
+	if (!IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+
+	nfs_put_client(server->nfs_client);
+
+	nfs_free_iostats(server->io_stats);
+	kfree(server);
+	nfs_release_automount_timer();
+	dprintk("<-- nfs_free_server()\n");
+}
+
+/*
+ * Create a version 2 or 3 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
+				     struct nfs_fh *mntfh)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr;
+	int error;
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client representation */
+	error = nfs_init_server(server, data);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+	if (!(fattr.valid & NFS_ATTR_FATTR)) {
+		error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+		if (error < 0) {
+			dprintk("nfs_create_server: getattr error = %d\n", -error);
+			goto error;
+		}
+	}
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	return server;
+
+error:
+	nfs_free_server(server);
+	return ERR_PTR(error);
+}
+
+#ifdef CONFIG_NFS_V4
+/*
+ * Initialise an NFS4 client record
+ */
+static int nfs4_init_client(struct nfs_client *clp,
+		int proto, int timeo, int retrans,
+		rpc_authflavor_t authflavour)
+{
+	int error;
+
+	if (clp->cl_cons_state == NFS_CS_READY) {
+		/* the client is initialised already */
+		dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+		return 0;
+	}
+
+	/* Check NFS protocol revision and initialize RPC op vector */
+	clp->rpc_ops = &nfs_v4_clientops;
+
+	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error;
+
+	error = nfs_idmap_new(clp);
+	if (error < 0) {
+		dprintk("%s: failed to create idmapper. Error = %d\n",
+			__FUNCTION__, error);
+		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+		goto error;
+	}
+
+	nfs_mark_client_ready(clp, NFS_CS_READY);
+	return 0;
+
+error:
+	nfs_mark_client_ready(clp, error);
+	dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+		const char *hostname, const struct sockaddr_in *addr,
+		rpc_authflavor_t authflavour,
+		int proto, int timeo, int retrans)
+{
+	struct nfs_client *clp;
+	int error;
+
+	dprintk("--> nfs4_set_client()\n");
+
+	/* Allocate or find a client reference we can use */
+	clp = nfs_get_client(hostname, addr, 4);
+	if (IS_ERR(clp)) {
+		error = PTR_ERR(clp);
+		goto error;
+	}
+	error = nfs4_init_client(clp, proto, timeo, retrans, authflavour);
+	if (error < 0)
+		goto error_put;
+
+	server->nfs_client = clp;
+	dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+	return 0;
+
+error_put:
+	nfs_put_client(clp);
+error:
+	dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+		const struct nfs4_mount_data *data, rpc_authflavor_t authflavour)
+{
+	int error;
+
+	dprintk("--> nfs4_init_server()\n");
+
+	/* Initialise the client representation from the mount data */
+	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	if (data->rsize)
+		server->rsize = nfs_block_size(data->rsize, NULL);
+	if (data->wsize)
+		server->wsize = nfs_block_size(data->wsize, NULL);
+
+	server->acregmin = data->acregmin * HZ;
+	server->acregmax = data->acregmax * HZ;
+	server->acdirmin = data->acdirmin * HZ;
+	server->acdirmax = data->acdirmax * HZ;
+
+	error = nfs_init_server_rpcclient(server, authflavour);
+
+	/* Done */
+	dprintk("<-- nfs4_init_server() = %d\n", error);
+	return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
+				      const char *hostname,
+				      const struct sockaddr_in *addr,
+				      const char *mntpath,
+				      const char *ip_addr,
+				      rpc_authflavor_t authflavour,
+				      struct nfs_fh *mntfh)
+{
+	struct nfs_fattr fattr;
+	struct nfs_server *server;
+	int error;
+
+	dprintk("--> nfs4_create_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Get a client record */
+	error = nfs4_set_client(server, hostname, addr, authflavour,
+			data->proto, data->timeo, data->retrans);
+	if (error < 0)
+		goto error;
+
+	/* set up the general RPC client */
+	error = nfs4_init_server(server, data, authflavour);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* Probe the root fh to retrieve its FSID */
+	error = nfs4_path_walk(server, mntfh, mntpath);
+	if (error < 0)
+		goto error;
+
+	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Mount FH: %d\n", mntfh->size);
+
+	error = nfs_probe_fsinfo(server, mntfh, &fattr);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+	dprintk("<-- nfs4_create_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+					       struct nfs_fh *fh)
+{
+	struct nfs_client *parent_client;
+	struct nfs_server *server, *parent_server;
+	struct nfs_fattr fattr;
+	int error;
+
+	dprintk("--> nfs4_create_referral_server()\n");
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	parent_server = NFS_SB(data->sb);
+	parent_client = parent_server->nfs_client;
+
+	/* Get a client representation.
+	 * Note: NFSv4 always uses TCP, */
+	error = nfs4_set_client(server, data->hostname, data->addr,
+			data->authflavor,
+			parent_server->client->cl_xprt->prot,
+			parent_client->retrans_timeo,
+			parent_client->retrans_count);
+
+	/* Initialise the client representation from the parent server */
+	nfs_server_copy_userdata(server, parent_server);
+	server->caps |= NFS_CAP_ATOMIC_OPEN;
+
+	error = nfs_init_server_rpcclient(server, data->authflavor);
+	if (error < 0)
+		goto error;
+
+	BUG_ON(!server->nfs_client);
+	BUG_ON(!server->nfs_client->rpc_ops);
+	BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr);
+	if (error < 0)
+		goto error;
+
+	dprintk("Referral FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_create_referral_server() = %p\n", server);
+	return server;
+
+error:
+	nfs_free_server(server);
+	dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
+
+#endif /* CONFIG_NFS_V4 */
+
+/*
+ * Clone an NFS2, NFS3 or NFS4 server record
+ */
+struct nfs_server *nfs_clone_server(struct nfs_server *source,
+				    struct nfs_fh *fh,
+				    struct nfs_fattr *fattr)
+{
+	struct nfs_server *server;
+	struct nfs_fattr fattr_fsinfo;
+	int error;
+
+	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
+		fattr->fsid.major, fattr->fsid.minor);
+
+	server = nfs_alloc_server();
+	if (!server)
+		return ERR_PTR(-ENOMEM);
+
+	/* Copy data from the source */
+	server->nfs_client = source->nfs_client;
+	atomic_inc(&server->nfs_client->cl_count);
+	nfs_server_copy_userdata(server, source);
+
+	server->fsid = fattr->fsid;
+
+	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	if (error < 0)
+		goto out_free_server;
+	if (!IS_ERR(source->client_acl))
+		nfs_init_server_aclclient(server);
+
+	/* probe the filesystem info for this server filesystem */
+	error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo);
+	if (error < 0)
+		goto out_free_server;
+
+	dprintk("Cloned FSID: %llx:%llx\n",
+		server->fsid.major, server->fsid.minor);
+
+	error = nfs_start_lockd(server);
+	if (error < 0)
+		goto out_free_server;
+
+	spin_lock(&nfs_client_lock);
+	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
+	list_add_tail(&server->master_link, &nfs_volume_list);
+	spin_unlock(&nfs_client_lock);
+
+	server->mount_time = jiffies;
+
+	dprintk("<-- nfs_clone_server() = %p\n", server);
+	return server;
+
+out_free_server:
+	nfs_free_server(server);
+	dprintk("<-- nfs_clone_server() = error %d\n", error);
+	return ERR_PTR(error);
+}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1936271..9b496ef 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -31,6 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -870,14 +871,14 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct inode *dir,
-		struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+				 struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode);
+		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
 	return 0;
 }
 
@@ -913,7 +914,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -922,8 +923,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	res = (struct dentry *)inode;
 	if (IS_ERR(res))
 		goto out_unlock;
+
 no_entry:
-	res = d_add_unique(dentry, inode);
+	res = d_materialise_unique(dentry, inode);
 	if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
@@ -1117,11 +1119,13 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 		dput(dentry);
 		return NULL;
 	}
-	alias = d_add_unique(dentry, inode);
+
+	alias = d_materialise_unique(dentry, inode);
 	if (alias != NULL) {
 		dput(dentry);
 		dentry = alias;
 	}
+
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	return dentry;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
new file mode 100644
index 0000000..977e590
--- /dev/null
+++ b/fs/nfs/getroot.c
@@ -0,0 +1,306 @@
+/* getroot.c: get the root dentry for an NFS mount
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/time.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include <linux/nfs4_mount.h>
+#include <linux/lockd/bind.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#include <linux/mount.h>
+#include <linux/nfs_idmap.h>
+#include <linux/vfs.h>
+#include <linux/namei.h>
+#include <linux/namespace.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include "nfs4_fs.h"
+#include "delegation.h"
+#include "internal.h"
+
+#define NFSDBG_FACILITY		NFSDBG_CLIENT
+#define NFS_PARANOIA 1
+
+/*
+ * get an NFS2/NFS3 root dentry from the root filehandle
+ */
+struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the actual root for this mount */
+	fsinfo.fattr = &fattr;
+
+	error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, fsinfo.fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	return mntroot;
+}
+
+#ifdef CONFIG_NFS_V4
+
+/*
+ * Do a simple pathwalk from the root FH of the server to the nominated target
+ * of the mountpoint
+ * - give error on symlinks
+ * - give error on ".." occurring in the path
+ * - follow traversals
+ */
+int nfs4_path_walk(struct nfs_server *server,
+		   struct nfs_fh *mntfh,
+		   const char *path)
+{
+	struct nfs_fsinfo fsinfo;
+	struct nfs_fattr fattr;
+	struct nfs_fh lastfh;
+	struct qstr name;
+	int ret;
+	//int referral_count = 0;
+
+	dprintk("--> nfs4_path_walk(,,%s)\n", path);
+
+	fsinfo.fattr = &fattr;
+	nfs_fattr_init(&fattr);
+
+	if (*path++ != '/') {
+		dprintk("nfs4_get_root: Path does not begin with a slash\n");
+		return -EINVAL;
+	}
+
+	/* Start by getting the root filehandle from the server */
+	ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " getroot obtained referral\n");
+		return -EREMOTE;
+	}
+
+next_component:
+	dprintk("Next: %s\n", path);
+
+	/* extract the next bit of the path */
+	if (!*path)
+		goto path_walk_complete;
+
+	name.name = path;
+	while (*path && *path != '/')
+		path++;
+	name.len = path - (const char *) name.name;
+
+eat_dot_dir:
+	while (*path == '/')
+		path++;
+
+	if (path[0] == '.' && (path[1] == '/' || !path[1])) {
+		path += 2;
+		goto eat_dot_dir;
+	}
+
+	if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
+	    ) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " Mount path contains reference to \"..\"\n");
+		return -EINVAL;
+	}
+
+	/* lookup the next FH in the sequence */
+	memcpy(&lastfh, mntfh, sizeof(lastfh));
+
+	dprintk("LookupFH: %*.*s [%s]\n", name.len, name.len, name.name, path);
+
+	ret = server->nfs_client->rpc_ops->lookupfh(server, &lastfh, &name,
+						    mntfh, &fattr);
+	if (ret < 0) {
+		dprintk("nfs4_get_root: getroot error = %d\n", -ret);
+		return ret;
+	}
+
+	if (fattr.type != NFDIR) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh encountered non-directory\n");
+		return -ENOTDIR;
+	}
+
+	if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+		printk(KERN_ERR "nfs4_get_root:"
+		       " lookupfh obtained referral\n");
+		return -EREMOTE;
+	}
+
+	goto next_component;
+
+path_walk_complete:
+	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
+	dprintk("<-- nfs4_path_walk() = 0\n");
+	return 0;
+}
+
+/*
+ * get an NFS4 root dentry from the root filehandle
+ */
+struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	struct nfs_fattr fattr;
+	struct dentry *mntroot;
+	struct inode *inode;
+	int error;
+
+	dprintk("--> nfs4_get_root()\n");
+
+	/* create a dummy root dentry with dummy inode for this superblock */
+	if (!sb->s_root) {
+		struct nfs_fh dummyfh;
+		struct dentry *root;
+		struct inode *iroot;
+
+		memset(&dummyfh, 0, sizeof(dummyfh));
+		memset(&fattr, 0, sizeof(fattr));
+		nfs_fattr_init(&fattr);
+		fattr.valid = NFS_ATTR_FATTR;
+		fattr.type = NFDIR;
+		fattr.mode = S_IFDIR | S_IRUSR | S_IWUSR;
+		fattr.nlink = 2;
+
+		iroot = nfs_fhget(sb, &dummyfh, &fattr);
+		if (IS_ERR(iroot))
+			return ERR_PTR(PTR_ERR(iroot));
+
+		root = d_alloc_root(iroot);
+		if (!root) {
+			iput(iroot);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		sb->s_root = root;
+	}
+
+	/* get the info about the server and filesystem */
+	error = nfs4_server_capabilities(server, mntfh);
+	if (error < 0) {
+		dprintk("nfs_get_root: getcaps error = %d\n",
+			-error);
+		return ERR_PTR(error);
+	}
+
+	/* get the actual root for this mount */
+	error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr);
+	if (error < 0) {
+		dprintk("nfs_get_root: getattr error = %d\n", -error);
+		return ERR_PTR(error);
+	}
+
+	inode = nfs_fhget(sb, mntfh, &fattr);
+	if (IS_ERR(inode)) {
+		dprintk("nfs_get_root: get root inode failed\n");
+		return ERR_PTR(PTR_ERR(inode));
+	}
+
+	/* root dentries normally start off anonymous and get spliced in later
+	 * if the dentry tree reaches them; however if the dentry already
+	 * exists, we'll pick it up at this point and use it as the root
+	 */
+	mntroot = d_alloc_anon(inode);
+	if (!mntroot) {
+		iput(inode);
+		dprintk("nfs_get_root: get root dentry failed\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (!mntroot->d_op)
+		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
+
+	dprintk("<-- nfs4_get_root()\n");
+	return mntroot;
+}
+
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 231c20f..f96dfac 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -114,8 +114,7 @@ nfs_idmap_new(struct nfs_client *clp)
 	struct idmap *idmap;
 	int error;
 
-	if (clp->cl_idmap != NULL)
-		return 0;
+	BUG_ON(clp->cl_idmap != NULL);
 
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return -ENOMEM;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 771c3b8..a547c58 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1020,7 +1020,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  out_fileid:
 	printk(KERN_ERR "NFS: server %s error: fileid changed\n"
 		"fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
-		NFS_SERVER(inode)->hostname, inode->i_sb->s_id,
+		NFS_SERVER(inode)->nfs_client->cl_hostname, inode->i_sb->s_id,
 		(long long)nfsi->fileid, (long long)fattr->fileid);
 	goto out_err;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2f3aa52..e73ba4f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -4,6 +4,18 @@
 
 #include <linux/mount.h>
 
+struct nfs_string;
+struct nfs_mount_data;
+struct nfs4_mount_data;
+
+/* Maximum number of readahead requests
+ * FIXME: this should really be a sysctl so that users may tune it to suit
+ *        their needs. People that do NFS over a slow network, might for
+ *        instance want to reduce it to something closer to 1 for improved
+ *        interactive response.
+ */
+#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
@@ -16,12 +28,25 @@ struct nfs_clone_mount {
 };
 
 /* client.c */
+extern struct rpc_program nfs_program;
+
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
-extern struct nfs_client *nfs_get_client(const char *, const struct sockaddr_in *, int);
-extern void nfs_mark_client_ready(struct nfs_client *, int);
-extern int nfs_create_rpc_client(struct nfs_client *, int, unsigned int,
-				 unsigned int, rpc_authflavor_t);
+extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *,
+					    struct nfs_fh *);
+extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *,
+					     const char *,
+					     const struct sockaddr_in *,
+					     const char *,
+					     const char *,
+					     rpc_authflavor_t,
+					     struct nfs_fh *);
+extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
+						      struct nfs_fh *);
+extern void nfs_free_server(struct nfs_server *server);
+extern struct nfs_server *nfs_clone_server(struct nfs_server *,
+					   struct nfs_fh *,
+					   struct nfs_fattr *);
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
@@ -89,10 +114,10 @@ extern void nfs4_clear_inode(struct inode *);
 #endif
 
 /* super.c */
-extern struct file_system_type nfs_referral_nfs4_fs_type;
-extern struct file_system_type clone_nfs_fs_type;
+extern struct file_system_type nfs_xdev_fs_type;
 #ifdef CONFIG_NFS_V4
-extern struct file_system_type clone_nfs4_fs_type;
+extern struct file_system_type nfs4_xdev_fs_type;
+extern struct file_system_type nfs4_referral_fs_type;
 #endif
 
 extern struct rpc_stat nfs_rpcstat;
@@ -101,28 +126,30 @@ extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
 
 /* namespace.c */
-extern char *nfs_path(const char *base, const struct dentry *dentry,
+extern char *nfs_path(const char *base,
+		      const struct dentry *droot,
+		      const struct dentry *dentry,
 		      char *buffer, ssize_t buflen);
 
-/*
- * Determine the mount path as a string
- */
+/* getroot.c */
+extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *);
 #ifdef CONFIG_NFS_V4
-static inline char *
-nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen)
-{
-	return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen);
-}
+extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *);
+
+extern int nfs4_path_walk(struct nfs_server *server,
+			  struct nfs_fh *mntfh,
+			  const char *path);
 #endif
 
 /*
  * Determine the device name as a string
  */
 static inline char *nfs_devname(const struct vfsmount *mnt_parent,
-			 const struct dentry *dentry,
-			 char *buffer, ssize_t buflen)
+				const struct dentry *dentry,
+				char *buffer, ssize_t buflen)
 {
-	return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen);
+	return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root,
+			dentry, buffer, buflen);
 }
 
 /*
@@ -178,20 +205,3 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
 	if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
 		sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
-
-/*
- * Check if the string represents a "valid" IPv4 address
- */
-static inline int valid_ipaddr4(const char *buf)
-{
-	int rc, count, in[4];
-
-	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
-	if (rc != 4)
-		return -EINVAL;
-	for (count = 0; count < 4; count++) {
-		if (in[count] > 255)
-			return -EINVAL;
-	}
-	return 0;
-}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index d8b8d56..77b0068 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFS namespace
  */
@@ -28,6 +29,7 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
 /*
  * nfs_path - reconstruct the path given an arbitrary dentry
  * @base - arbitrary string to prepend to the path
+ * @droot - pointer to root dentry for mountpoint
  * @dentry - pointer to dentry
  * @buffer - result buffer
  * @buflen - length of buffer
@@ -38,7 +40,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ;
  * This is mainly for use in figuring out the path on the
  * server side when automounting on top of an existing partition.
  */
-char *nfs_path(const char *base, const struct dentry *dentry,
+char *nfs_path(const char *base,
+	       const struct dentry *droot,
+	       const struct dentry *dentry,
 	       char *buffer, ssize_t buflen)
 {
 	char *end = buffer+buflen;
@@ -47,7 +51,7 @@ char *nfs_path(const char *base, const struct dentry *dentry,
 	*--end = '\0';
 	buflen--;
 	spin_lock(&dcache_lock);
-	while (!IS_ROOT(dentry)) {
+	while (!IS_ROOT(dentry) && dentry != droot) {
 		namelen = dentry->d_name.len;
 		buflen -= namelen + 1;
 		if (buflen < 0)
@@ -96,12 +100,13 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	struct nfs_fattr fattr;
 	int err;
 
+	dprintk("--> nfs_follow_mountpoint()\n");
+
 	BUG_ON(IS_ROOT(dentry));
 	dprintk("%s: enter\n", __FUNCTION__);
 	dput(nd->dentry);
 	nd->dentry = dget(dentry);
-	if (d_mountpoint(nd->dentry))
-		goto out_follow;
+
 	/* Look it up again */
 	parent = dget_parent(nd->dentry);
 	err = server->nfs_client->rpc_ops->lookup(parent->d_inode,
@@ -134,6 +139,8 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
 	schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
 out:
 	dprintk("%s: done, returned %d\n", __FUNCTION__, err);
+
+	dprintk("<-- nfs_follow_mountpoint() = %d\n", err);
 	return ERR_PTR(err);
 out_err:
 	path_release(nd);
@@ -183,14 +190,14 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 	switch (server->nfs_client->cl_nfsversion) {
 		case 2:
 		case 3:
-			mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 			break;
 		case 4:
-			mnt = vfs_kern_mount(&clone_nfs4_fs_type, 0, devname, mountdata);
+			mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
 	}
 	return mnt;
 #else
-	return vfs_kern_mount(&clone_nfs_fs_type, 0, devname, mountdata);
+	return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
 #endif
 }
 
@@ -216,6 +223,8 @@ struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
 	char *page = (char *) __get_free_page(GFP_USER);
 	char *devname;
 
+	dprintk("--> nfs_do_submount()\n");
+
 	dprintk("%s: submounting on %s/%s\n", __FUNCTION__,
 			dentry->d_parent->d_name.name,
 			dentry->d_name.name);
@@ -230,5 +239,7 @@ free_page:
 	free_page((unsigned long)page);
 out:
 	dprintk("%s: done\n", __FUNCTION__);
+
+	dprintk("<-- nfs_do_submount() = %p\n", mnt);
 	return mnt;
 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 0622af0..9e8258e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -81,7 +81,7 @@ do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
 }
 
 /*
- * Bare-bones access to getattr: this is for nfs_read_super.
+ * Bare-bones access to getattr: this is for nfs_get_root/nfs_get_sb
  */
 static int
 nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e787924..61095fe 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -188,8 +188,6 @@ extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(void *);
 
 /* nfs4state.c */
-extern void init_nfsv4_state(struct nfs_server *);
-extern void destroy_nfsv4_state(struct nfs_server *);
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
 extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
@@ -224,10 +222,6 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define init_nfsv4_state(server)  do { } while (0)
-#define destroy_nfsv4_state(server)       do { } while (0)
-#define nfs4_put_state_owner(inode, owner) do { } while (0)
-#define nfs4_put_open_state(state) do { } while (0)
 #define nfs4_close_state(a, b) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index faed9bc..24e47f3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -2,6 +2,7 @@
  * linux/fs/nfs/nfs4namespace.c
  *
  * Copyright (C) 2005 Trond Myklebust <Trond.Myklebust@netapp.com>
+ * - Modified by David Howells <dhowells@redhat.com>
  *
  * NFSv4 namespace
  */
@@ -47,6 +48,68 @@ Elong:
 	return ERR_PTR(-ENAMETOOLONG);
 }
 
+/*
+ * Determine the mount path as a string
+ */
+static char *nfs4_path(const struct vfsmount *mnt_parent,
+		       const struct dentry *dentry,
+		       char *buffer, ssize_t buflen)
+{
+	const char *srvpath;
+
+	srvpath = strchr(mnt_parent->mnt_devname, ':');
+	if (srvpath)
+		srvpath++;
+	else
+		srvpath = mnt_parent->mnt_devname;
+
+	return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
+}
+
+/*
+ * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
+ * believe to be the server path to this dentry
+ */
+static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
+				const struct dentry *dentry,
+				const struct nfs4_fs_locations *locations,
+				char *page, char *page2)
+{
+	const char *path, *fs_path;
+
+	path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE);
+	if (IS_ERR(path))
+		return PTR_ERR(path);
+
+	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
+	if (IS_ERR(fs_path))
+		return PTR_ERR(fs_path);
+
+	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
+		dprintk("%s: path %s does not begin with fsroot %s\n",
+			__FUNCTION__, path, fs_path);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if the string represents a "valid" IPv4 address
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
 
 /**
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
@@ -68,10 +131,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		.dentry = dentry,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
-	char *page, *page2;
-	char *path, *fs_path;
+	char *page = NULL, *page2 = NULL;
 	char *devname;
-	int loc, s;
+	int loc, s, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;
@@ -79,31 +141,25 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 	dprintk("%s: referral at %s/%s\n", __FUNCTION__,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/* Ensure fs path is a prefix of current dentry path */
 	page = (char *) __get_free_page(GFP_USER);
-	if (page == NULL)
+	if (!page)
 		goto out;
+
 	page2 = (char *) __get_free_page(GFP_USER);
-	if (page2 == NULL)
+	if (!page2)
 		goto out;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (IS_ERR(path))
-		goto out_free;
-
-	fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE);
-	if (IS_ERR(fs_path))
-		goto out_free;
-
-	if (strncmp(path, fs_path, strlen(fs_path)) != 0) {
-		dprintk("%s: path %s does not begin with fsroot %s\n", __FUNCTION__, path, fs_path);
-		goto out_free;
+	/* Ensure fs path is a prefix of current dentry path */
+	error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2);
+	if (error < 0) {
+		mnt = ERR_PTR(error);
+		goto out;
 	}
 
 	devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
 	if (IS_ERR(devname)) {
 		mnt = (struct vfsmount *)devname;
-		goto out_free;
+		goto out;
 	}
 
 	loc = 0;
@@ -140,7 +196,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 			addr.sin_port = htons(NFS_PORT);
 			mountdata.addr = &addr;
 
-			mnt = vfs_kern_mount(&nfs_referral_nfs4_fs_type, 0, devname, &mountdata);
+			mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
 			if (!IS_ERR(mnt)) {
 				break;
 			}
@@ -149,10 +205,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		loc++;
 	}
 
-out_free:
-	free_page((unsigned long)page);
-	free_page((unsigned long)page2);
 out:
+	free_page((unsigned long) page);
+	free_page((unsigned long) page2);
 	dprintk("%s: done\n", __FUNCTION__);
 	return mnt;
 }
@@ -165,7 +220,7 @@ out:
  */
 struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry)
 {
-	struct vfsmount *mnt = ERR_PTR(-ENOENT);
+	struct vfsmount *mnt = ERR_PTR(-ENOMEM);
 	struct dentry *parent;
 	struct nfs4_fs_locations *fs_locations = NULL;
 	struct page *page;
@@ -183,11 +238,16 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
 		goto out_free;
 
 	/* Get locations */
+	mnt = ERR_PTR(-ENOENT);
+
 	parent = dget_parent(dentry);
-	dprintk("%s: getting locations for %s/%s\n", __FUNCTION__, parent->d_name.name, dentry->d_name.name);
+	dprintk("%s: getting locations for %s/%s\n",
+		__FUNCTION__, parent->d_name.name, dentry->d_name.name);
+
 	err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
 	dput(parent);
-	if (err != 0 || fs_locations->nlocations <= 0 ||
+	if (err != 0 ||
+	    fs_locations->nlocations <= 0 ||
 	    fs_locations->fs_path.ncomponents <= 0)
 		goto out_free;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1573eeb..a825547 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1393,70 +1393,19 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
+/*
+ * get the file handle for the "/" directory on the server
+ */
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fsinfo *info)
+			      struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
-	unsigned char *		p;
-	struct qstr		q;
-	struct nfs4_lookup_arg args = {
-		.dir_fh = fhandle,
-		.name = &q,
-		.bitmask = nfs4_fattr_bitmap,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
 	int status;
 
-	/*
-	 * Now we do a separate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done separately so that we can conveniently
-	 * catch an ERR_WRONGSEC if it occurs along the way...
-	 */
 	status = nfs4_lookup_root(server, fhandle, info);
-	if (status)
-		goto out;
-
-	p = server->mnt_path;
-	for (;;) {
-		struct nfs4_exception exception = { };
-
-		while (*p == '/')
-			p++;
-		if (!*p)
-			break;
-		q.name = p;
-		while (*p && (*p != '/'))
-			p++;
-		q.len = p - q.name;
-
-		do {
-			nfs_fattr_init(fattr);
-			status = nfs4_handle_exception(server,
-					rpc_call_sync(server->client, &msg, 0),
-					&exception);
-		} while (exception.retry);
-		if (status == 0)
-			continue;
-		if (status == -ENOENT) {
-			printk(KERN_NOTICE "NFS: mount path %s does not exist!\n", server->mnt_path);
-			printk(KERN_NOTICE "NFS: suggestion: try mounting '/' instead.\n");
-		}
-		break;
-	}
 	if (status == 0)
 		status = nfs4_server_capabilities(server, fhandle);
 	if (status == 0)
 		status = nfs4_do_fsinfo(server, fhandle, info);
-out:
 	return nfs4_map_errors(status);
 }
 
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index ff947ec..f2c8936 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,26 +127,13 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 void
 nfs4_renewd_prepare_shutdown(struct nfs_server *server)
 {
-	struct nfs_client *clp = server->nfs_client;
-
-	if (!clp)
-		return;
 	flush_scheduled_work();
-	down_write(&clp->cl_sem);
-	if (!list_empty(&server->nfs4_siblings))
-		list_del_init(&server->nfs4_siblings);
-	up_write(&clp->cl_sem);
 }
 
-/* Must be called with clp->cl_sem locked for writes */
 void
 nfs4_kill_renewd(struct nfs_client *clp)
 {
 	down_read(&clp->cl_sem);
-	if (!list_empty(&clp->cl_superblocks)) {
-		up_read(&clp->cl_sem);
-		return;
-	}
 	cancel_delayed_work(&clp->cl_renewd);
 	up_read(&clp->cl_sem);
 	flush_scheduled_work();
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 058811e..5fffbdf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -58,24 +58,6 @@ const nfs4_stateid zero_stateid;
 
 static LIST_HEAD(nfs4_clientid_list);
 
-void
-init_nfsv4_state(struct nfs_server *server)
-{
-	server->nfs_client = NULL;
-	INIT_LIST_HEAD(&server->nfs4_siblings);
-}
-
-void
-destroy_nfsv4_state(struct nfs_server *server)
-{
-	kfree(server->mnt_path);
-	server->mnt_path = NULL;
-	if (server->nfs_client) {
-		nfs_put_client(server->nfs_client);
-		server->nfs_client = NULL;
-	}
-}
-
 static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 {
 	int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f0aff82..dae33c1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -171,7 +171,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
 
 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-			NFS_SERVER(inode)->hostname,
+			NFS_SERVER(inode)->nfs_client->cl_hostname,
 			inode->i_sb->s_id,
 			(long long)NFS_FILEID(inode),
 			(unsigned long long)rdata->args.pgbase,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5842d51..867b5dc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -13,6 +13,11 @@
  *
  *  Split from inode.c by David Howells <dhowells@redhat.com>
  *
+ * - superblocks are indexed on server only - all inodes, dentries, etc. associated with a
+ *   particular server are held in the same superblock
+ * - NFS superblocks can have several effective roots to the dentry tree
+ * - directory type roots are spliced into the tree when a path from one root reaches the root
+ *   of another (see nfs_lookup())
  */
 
 #include <linux/config.h>
@@ -52,20 +57,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-/* Maximum number of readahead requests
- * FIXME: this should really be a sysctl so that users may tune it to suit
- *        their needs. People that do NFS over a slow network, might for
- *        instance want to reduce it to something closer to 1 for improved
- *        interactive response.
- */
-#define NFS_MAX_READAHEAD	(RPC_DEF_SLOT_TABLE - 1)
-
 static void nfs_umount_begin(struct vfsmount *, int);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
+static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
 
@@ -77,10 +74,10 @@ static struct file_system_type nfs_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs_fs_type = {
+struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_clone_nfs_sb,
+	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -99,10 +96,10 @@ static struct super_operations nfs_sops = {
 #ifdef CONFIG_NFS_V4
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static int nfs4_referral_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -113,18 +110,18 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type clone_nfs4_fs_type = {
+struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_clone_nfs4_sb,
+	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
-struct file_system_type nfs_referral_nfs4_fs_type = {
+struct file_system_type nfs4_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs_referral_nfs4_sb,
+	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -345,7 +342,7 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	nfs_show_mount_options(m, nfss, 0);
 
 	seq_puts(m, ",addr=");
-	seq_escape(m, nfss->hostname, " \t\n\\");
+	seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\");
 
 	return 0;
 }
@@ -429,714 +426,351 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 
 /*
  * Begin unmount by attempting to remove all automounted mountpoints we added
- * in response to traversals
+ * in response to xdev traversals and referrals
  */
 static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 {
-	struct nfs_server *server;
-	struct rpc_clnt	*rpc;
-
 	shrink_submounts(vfsmnt, &nfs_automount_list);
-	if (!(flags & MNT_FORCE))
-		return;
-	/* -EIO all pending I/O */
-	server = NFS_SB(vfsmnt->mnt_sb);
-	rpc = server->client;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
-	rpc = server->client_acl;
-	if (!IS_ERR(rpc))
-		rpc_killall_tasks(rpc);
 }
 
 /*
- * Obtain the root inode of the file system.
+ * Validate the NFS2/NFS3 mount data
+ * - fills in the mount root filehandle
  */
-static struct inode *
-nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+static int nfs_validate_mount_data(struct nfs_mount_data *data,
+				   struct nfs_fh *mntfh)
 {
-	struct nfs_server	*server = NFS_SB(sb);
-	int			error;
-
-	error = server->nfs_client->rpc_ops->getroot(server, rootfh, fsinfo);
-	if (error < 0) {
-		dprintk("nfs_get_root: getattr error = %d\n", -error);
-		return ERR_PTR(error);
+	if (data == NULL) {
+		dprintk("%s: missing data argument\n", __FUNCTION__);
+		return -EINVAL;
 	}
 
-	server->fsid = fsinfo->fattr->fsid;
-	return nfs_fhget(sb, rootfh, fsinfo->fattr);
-}
-
-/*
- * Do NFS version-independent mount processing, and sanity checking
- */
-static int
-nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
-{
-	struct nfs_server	*server;
-	struct inode		*root_inode;
-	struct nfs_fattr	fattr;
-	struct nfs_fsinfo	fsinfo = {
-					.fattr = &fattr,
-				};
-	struct nfs_pathconf pathinfo = {
-			.fattr = &fattr,
-	};
-	int no_root_error = 0;
-	unsigned long max_rpc_payload;
-
-	/* We probably want something more informative here */
-	snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
-
-	server = NFS_SB(sb);
-
-	sb->s_magic      = NFS_SUPER_MAGIC;
-
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		return -ENOMEM;
+	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
+		dprintk("%s: bad mount version\n", __FUNCTION__);
+		return -EINVAL;
+	}
 
-	root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
-	/* Did getting the root inode fail? */
-	if (IS_ERR(root_inode)) {
-		no_root_error = PTR_ERR(root_inode);
-		goto out_no_root;
+	switch (data->version) {
+		case 1:
+			data->namlen = 0;
+		case 2:
+			data->bsize  = 0;
+		case 3:
+			if (data->flags & NFS_MOUNT_VER3) {
+				dprintk("%s: mount structure version %d does not support NFSv3\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			data->root.size = NFS2_FHSIZE;
+			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
+		case 4:
+			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
+				dprintk("%s: mount structure version %d does not support strong security\n",
+						__FUNCTION__,
+						data->version);
+				return -EINVAL;
+			}
+			/* Fill in pseudoflavor for mount version < 5 */
+			data->pseudoflavor = RPC_AUTH_UNIX;
+		case 5:
+			memset(data->context, 0, sizeof(data->context));
 	}
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root) {
-		no_root_error = -ENOMEM;
-		goto out_no_root;
+
+#ifndef CONFIG_NFS_V3
+	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
+	if (data->flags & NFS_MOUNT_VER3) {
+		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
+		return -EPROTONOSUPPORT;
 	}
-	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
-
-	/* mount time stamp, in seconds */
-	server->mount_time = jiffies;
-
-	/* Get some general file system info */
-	if (server->namelen == 0 &&
-	    server->nfs_client->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
-		server->namelen = pathinfo.max_namelen;
-	/* Work out a lot of parameters */
-	if (server->rsize == 0)
-		server->rsize = nfs_block_size(fsinfo.rtpref, NULL);
-	if (server->wsize == 0)
-		server->wsize = nfs_block_size(fsinfo.wtpref, NULL);
-
-	if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax)
-		server->rsize = nfs_block_size(fsinfo.rtmax, NULL);
-	if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax)
-		server->wsize = nfs_block_size(fsinfo.wtmax, NULL);
-
-	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
-	if (server->rsize > max_rpc_payload)
-		server->rsize = max_rpc_payload;
-	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
-		server->rsize = NFS_MAX_FILE_IO_SIZE;
-	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
-		server->wsize = NFS_MAX_FILE_IO_SIZE;
-	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+#endif /* CONFIG_NFS_V3 */
 
-	if (sb->s_blocksize == 0)
-		sb->s_blocksize = nfs_block_bits(server->wsize,
-							 &sb->s_blocksize_bits);
-	server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL);
-
-	server->dtsize = nfs_block_size(fsinfo.dtpref, NULL);
-	if (server->dtsize > PAGE_CACHE_SIZE)
-		server->dtsize = PAGE_CACHE_SIZE;
-	if (server->dtsize > server->rsize)
-		server->dtsize = server->rsize;
-
-	if (server->flags & NFS_MOUNT_NOAC) {
-		server->acregmin = server->acregmax = 0;
-		server->acdirmin = server->acdirmax = 0;
-		sb->s_flags |= MS_SYNCHRONOUS;
+	/* We now require that the mount process passes the remote address */
+	if (data->addr.sin_addr.s_addr == INADDR_ANY) {
+		dprintk("%s: mount program didn't pass remote address!\n",
+			__FUNCTION__);
+		return -EINVAL;
 	}
-	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
-	nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
+	/* Prepare the root filehandle */
+	if (data->flags & NFS_MOUNT_VER3)
+		mntfh->size = data->root.size;
+	else
+		mntfh->size = NFS2_FHSIZE;
 
-	server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0;
-	server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0;
+	if (mntfh->size > sizeof(mntfh->data)) {
+		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
+		return -EINVAL;
+	}
+
+	memcpy(mntfh->data, data->root.data, mntfh->size);
+	if (mntfh->size < sizeof(mntfh->data))
+		memset(mntfh->data + mntfh->size, 0,
+		       sizeof(mntfh->data) - mntfh->size);
 
-	/* We're airborne Set socket buffersize */
-	rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 	return 0;
-	/* Yargs. It didn't work out. */
-out_no_root:
-	dprintk("nfs_sb_init: get root inode failed: errno %d\n", -no_root_error);
-	if (!IS_ERR(root_inode))
-		iput(root_inode);
-	return no_root_error;
 }
 
 /*
- * Create an RPC client handle.
+ * Initialise the common bits of the superblock
  */
-static struct rpc_clnt *
-nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
+static inline void nfs_initialise_sb(struct super_block *sb)
 {
-	struct nfs_client	*clp;
-	struct rpc_clnt		*clnt;
-	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-	int			nfsversion = 2;
-	int			err;
-
-#ifdef CONFIG_NFS_V3
-	if (server->flags & NFS_MOUNT_VER3)
-		nfsversion = 3;
-#endif
-
-	clp = nfs_get_client(server->hostname, &server->addr, nfsversion);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(PTR_ERR(clp));
-	}
-
-	if (clp->cl_cons_state == NFS_CS_INITING) {
-		/* Check NFS protocol revision and initialize RPC op
-		 * vector and file handle pool. */
-#ifdef CONFIG_NFS_V3
-		if (nfsversion == 3) {
-			clp->rpc_ops = &nfs_v3_clientops;
-			server->caps |= NFS_CAP_READDIRPLUS;
-		} else {
-			clp->rpc_ops = &nfs_v2_clientops;
-		}
-#else
-		clp->rpc_ops = &nfs_v2_clientops;
-#endif
-
-		/* create transport and client */
-		err = nfs_create_rpc_client(clp, proto, data->timeo,
-					    data->retrans, RPC_AUTH_UNIX);
-		if (err < 0)
-			goto client_init_error;
-
-		nfs_mark_client_ready(clp, 0);
-	}
+	struct nfs_server *server = NFS_SB(sb);
 
-	/* create an nfs_server-specific client */
-	clnt = rpc_clone_client(clp->cl_rpcclient);
-	if (IS_ERR(clnt)) {
-		dprintk("%s: couldn't create rpc_client!\n", __FUNCTION__);
-		nfs_put_client(clp);
-		return ERR_PTR(PTR_ERR(clnt));
-	}
+	sb->s_magic = NFS_SUPER_MAGIC;
 
-	if (data->pseudoflavor != clp->cl_rpcclient->cl_auth->au_flavor) {
-		struct rpc_auth *auth;
+	/* We probably want something more informative here */
+	snprintf(sb->s_id, sizeof(sb->s_id),
+		 "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev));
 
-		auth = rpcauth_create(data->pseudoflavor, server->client);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return ERR_PTR(PTR_ERR(auth));
-		}
-	}
+	if (sb->s_blocksize == 0)
+		sb->s_blocksize = nfs_block_bits(server->wsize,
+						 &sb->s_blocksize_bits);
 
-	server->nfs_client = clp;
-	return clnt;
+	if (server->flags & NFS_MOUNT_NOAC)
+		sb->s_flags |= MS_SYNCHRONOUS;
 
-client_init_error:
-	nfs_mark_client_ready(clp, err);
-	nfs_put_client(clp);
-	return ERR_PTR(err);
+	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
 /*
- * Clone a server record
+ * Finish setting up an NFS2/3 superblock
  */
-static struct nfs_server *nfs_clone_server(struct super_block *sb, struct nfs_clone_mount *data)
+static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data)
 {
 	struct nfs_server *server = NFS_SB(sb);
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct inode *root_inode;
-	struct nfs_fsinfo fsinfo;
-	void *err = ERR_PTR(-ENOMEM);
-
-	sb->s_op = data->sb->s_op;
-	sb->s_blocksize = data->sb->s_blocksize;
-	sb->s_blocksize_bits = data->sb->s_blocksize_bits;
-	sb->s_maxbytes = data->sb->s_maxbytes;
-
-	server->client_acl = ERR_PTR(-EINVAL);
-	server->io_stats = nfs_alloc_iostats();
-	if (server->io_stats == NULL)
-		goto out;
-
-	server->client = rpc_clone_client(parent->client);
-	if (IS_ERR((err = server->client)))
-		goto out;
-
-	if (!IS_ERR(parent->client_acl)) {
-		server->client_acl = rpc_clone_client(parent->client_acl);
-		if (IS_ERR((err = server->client_acl)))
-			goto out;
-	}
-	root_inode = nfs_fhget(sb, data->fh, data->fattr);
-	if (!root_inode)
-		goto out;
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_put_root;
-	fsinfo.fattr = data->fattr;
-	if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0)
-		nfs_super_set_maxbytes(sb, fsinfo.maxfilesize);
-	sb->s_root->d_op = server->nfs_client->rpc_ops->dentry_ops;
-	sb->s_flags |= MS_ACTIVE;
-	return server;
-out_put_root:
-	iput(root_inode);
-out:
-	return err;
-}
-
-/*
- * Copy an existing superblock and attach revised data
- */
-static int nfs_clone_generic_sb(struct nfs_clone_mount *data,
-		struct super_block *(*fill_sb)(struct nfs_server *, struct nfs_clone_mount *),
-		struct nfs_server *(*fill_server)(struct super_block *, struct nfs_clone_mount *),
-		struct vfsmount *mnt)
-{
-	struct nfs_server *server;
-	struct nfs_server *parent = NFS_SB(data->sb);
-	struct super_block *sb = ERR_PTR(-EINVAL);
-	char *hostname;
-	int error = -ENOMEM;
-	int len;
-
-	server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (server == NULL)
-		goto out_err;
-	memcpy(server, parent, sizeof(*server));
-	atomic_inc(&server->nfs_client->cl_count);
-	hostname = (data->hostname != NULL) ? data->hostname : parent->hostname;
-	len = strlen(hostname) + 1;
-	server->hostname = kmalloc(len, GFP_KERNEL);
-	if (server->hostname == NULL)
-		goto free_server;
-	memcpy(server->hostname, hostname, len);
-
-	sb = fill_sb(server, data);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto free_hostname;
-	}
 
-	if (sb->s_root)
-		goto out_share;
+	sb->s_blocksize_bits = 0;
+	sb->s_blocksize = 0;
+	if (data->bsize)
+		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
 
-	server = fill_server(sb, data);
-	if (IS_ERR(server)) {
-		error = PTR_ERR(server);
-		goto out_deactivate;
+	if (server->flags & NFS_MOUNT_VER3) {
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
+		 */
+		sb->s_flags |= MS_POSIXACL;
+		sb->s_time_gran = 1;
 	}
-	return simple_set_mnt(mnt, sb);
-out_deactivate:
-	up_write(&sb->s_umount);
-	deactivate_super(sb);
-	return error;
-out_share:
-	kfree(server->hostname);
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-	return simple_set_mnt(mnt, sb);
-free_hostname:
-	kfree(server->hostname);
-free_server:
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-out_err:
-	return error;
+
+	sb->s_op = &nfs_sops;
+ 	nfs_initialise_sb(sb);
 }
 
 /*
- * Set up an NFS2/3 superblock
- *
- * The way this works is that the mount process passes a structure
- * in the data argument which contains the server's IP address
- * and the root file handle obtained from the server's mount
- * daemon. We stash these away in the private superblock fields.
+ * Finish setting up a cloned NFS2/3 superblock
  */
-static int
-nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
+static void nfs_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server	*server;
-	rpc_authflavor_t	authflavor;
+	struct nfs_server *server = NFS_SB(sb);
+
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 
-	server           = NFS_SB(sb);
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	if (data->bsize)
-		sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
-	if (data->rsize)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags    = data->flags & NFS_MOUNT_FLAGMASK;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Start lockd here, before we might error out */
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-
-	server->namelen  = data->namlen;
-	server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL);
-	if (!server->hostname)
-		return -ENOMEM;
-	strcpy(server->hostname, data->hostname);
-
-	/* Fill in pseudoflavor for mount version < 5 */
-	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
-		data->pseudoflavor = RPC_AUTH_UNIX;
-	authflavor = data->pseudoflavor;	/* save for sb_init() */
-	/* XXX maybe we want to add a server->pseudoflavor field */
-
-	/* Create RPC client handles */
-	server->client = nfs_create_client(server, data);
-	if (IS_ERR(server->client))
-		return PTR_ERR(server->client);
-
-	/* RFC 2623, sec 2.3.2 */
 	if (server->flags & NFS_MOUNT_VER3) {
-#ifdef CONFIG_NFS_V3_ACL
-		if (!(server->flags & NFS_MOUNT_NOACL)) {
-			server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
-			/* No errors! Assume that Sun nfsacls are supported */
-			if (!IS_ERR(server->client_acl))
-				server->caps |= NFS_CAP_ACLS;
-		}
-#else
-		server->flags &= ~NFS_MOUNT_NOACL;
-#endif /* CONFIG_NFS_V3_ACL */
-		/*
-		 * The VFS shouldn't apply the umask to mode bits. We will
-		 * do so ourselves when necessary.
+		/* The VFS shouldn't apply the umask to mode bits. We will do
+		 * so ourselves when necessary.
 		 */
 		sb->s_flags |= MS_POSIXACL;
-		if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
-			server->namelen = NFS3_MAXNAMLEN;
 		sb->s_time_gran = 1;
-	} else {
-		if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
-			server->namelen = NFS2_MAXNAMLEN;
 	}
 
-	sb->s_op = &nfs_sops;
-	return nfs_sb_init(sb, authflavor);
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs_set_super(struct super_block *s, void *data)
+static int nfs_set_super(struct super_block *s, void *_server)
 {
-	s->s_fs_info = data;
-	return set_anon_super(s, data);
+	struct nfs_server *server = _server;
+	int ret;
+
+	s->s_fs_info = server;
+	ret = set_anon_super(s, server);
+	if (ret == 0)
+		server->s_dev = s->s_dev;
+	return ret;
 }
 
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
+	struct nfs_server *server = data, *old = NFS_SB(sb);
 
-	if (old->addr.sin_addr.s_addr != server->addr.sin_addr.s_addr)
+	if (old->nfs_client != server->nfs_client)
 		return 0;
-	if (old->addr.sin_port != server->addr.sin_port)
+	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
 		return 0;
-	return !nfs_compare_fh(&old->fh, &server->fh);
+	return 1;
 }
 
 static int nfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh *root;
+	struct nfs_fh mntfh;
 	struct nfs_mount_data *data = raw_data;
+	struct dentry *mntroot;
+	int error;
 
-	error = -EINVAL;
-	if (data == NULL) {
-		dprintk("%s: missing data argument\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) {
-		dprintk("%s: bad mount version\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-	switch (data->version) {
-		case 1:
-			data->namlen = 0;
-		case 2:
-			data->bsize  = 0;
-		case 3:
-			if (data->flags & NFS_MOUNT_VER3) {
-				dprintk("%s: mount structure version %d does not support NFSv3\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-			data->root.size = NFS2_FHSIZE;
-			memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE);
-		case 4:
-			if (data->flags & NFS_MOUNT_SECFLAVOUR) {
-				dprintk("%s: mount structure version %d does not support strong security\n",
-						__FUNCTION__,
-						data->version);
-				goto out_err_noserver;
-			}
-		case 5:
-			memset(data->context, 0, sizeof(data->context));
-	}
-#ifndef CONFIG_NFS_V3
-	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
-	error = -EPROTONOSUPPORT;
-	if (data->flags & NFS_MOUNT_VER3) {
-		dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__);
-		goto out_err_noserver;
-	}
-#endif /* CONFIG_NFS_V3 */
+	/* Validate the mount data */
+	error = nfs_validate_mount_data(data, &mntfh);
+	if (error < 0)
+		return error;
 
-	error = -ENOMEM;
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
+	/* Get a volume representation */
+	server = nfs_create_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
 		goto out_err_noserver;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_acl = ERR_PTR(-EINVAL);
-
-	root = &server->fh;
-	if (data->flags & NFS_MOUNT_VER3)
-		root->size = data->root.size;
-	else
-		root->size = NFS2_FHSIZE;
-	error = -EINVAL;
-	if (root->size > sizeof(root->data)) {
-		dprintk("%s: invalid root filehandle\n", __FUNCTION__);
-		goto out_err;
-	}
-	memcpy(root->data, data->root.data, root->size);
-
-	/* We now require that the mount process passes the remote address */
-	memcpy(&server->addr, &data->addr, sizeof(server->addr));
-	if (server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote address!\n",
-				__FUNCTION__);
-		goto out_err;
 	}
 
+	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
-		goto out_err;
+		goto out_err_nosb;
 	}
 
-	if (s->s_root)
-		goto out_share;
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-	s->s_flags = flags;
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_fill_super(s, data);
+	}
 
-	error = nfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
-	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
 
-out_share:
-	kfree(server);
-	return simple_set_mnt(mnt, s);
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	return 0;
 
-out_err:
-	kfree(server);
+out_err_nosb:
+	nfs_free_server(server);
 out_err_noserver:
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	return error;
 }
 
+/*
+ * Destroy an NFS2/3 superblock
+ */
 static void nfs_kill_super(struct super_block *s)
 {
 	struct nfs_server *server = NFS_SB(s);
 
 	kill_anon_super(s);
-
-	if (!IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-	if (!IS_ERR(server->client_acl))
-		rpc_shutdown_client(server->client_acl);
-
-	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	nfs_put_client(server->nfs_client);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
-static struct super_block *nfs_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
-	if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM))
-		lockd_up();
-	return sb;
-}
-
-static int nfs_clone_nfs_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Clone an NFS2/3 server record on xdev traversal (FSID-change)
+ */
+static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data,
+			   struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs_clone_sb, nfs_clone_server, mnt);
-}
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-#ifdef CONFIG_NFS_V4
-static struct rpc_clnt *nfs4_create_client(struct nfs_server *server,
-	int timeo, int retrans, int proto, rpc_authflavor_t flavor)
-{
-	struct nfs_client *clp;
-	struct rpc_clnt *clnt = NULL;
-	int err = -EIO;
-
-	clp = nfs_get_client(server->hostname, &server->addr, 4);
-	if (!clp) {
-		dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
-		return ERR_PTR(err);
-	}
+	dprintk("--> nfs_xdev_get_sb()\n");
 
-	/* Now create transport and client */
-	if (clp->cl_cons_state == NFS_CS_INITING) {
-		clp->rpc_ops = &nfs_v4_clientops;
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
 
-		err = nfs_create_rpc_client(clp, proto, timeo, retrans, flavor);
-		if (err < 0)
-			goto client_init_error;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
 
-		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
-		err = nfs_idmap_new(clp);
-		if (err < 0) {
-			dprintk("%s: failed to create idmapper.\n",
-				__FUNCTION__);
-			goto client_init_error;
-		}
-		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
-		nfs_mark_client_ready(clp, 0);
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
 	}
 
-	clnt = rpc_clone_client(clp->cl_rpcclient);
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs_clone_super(s, data->sb);
+	}
 
-	if (IS_ERR(clnt)) {
-		dprintk("%s: cannot create RPC client. Error = %d\n",
-				__FUNCTION__, err);
-		return clnt;
+	mntroot = nfs_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
 
-	if (clnt->cl_auth->au_flavor != flavor) {
-		struct rpc_auth *auth;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
 
-		auth = rpcauth_create(flavor, clnt);
-		if (IS_ERR(auth)) {
-			dprintk("%s: couldn't create credcache!\n", __FUNCTION__);
-			return (struct rpc_clnt *)auth;
-		}
-	}
+	dprintk("<-- nfs_xdev_get_sb() = 0\n");
+	return 0;
 
-	server->nfs_client = clp;
-	down_write(&clp->cl_sem);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return clnt;
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
+	return error;
 
-client_init_error:
-	nfs_mark_client_ready(clp, err);
-	nfs_put_client(clp);
-	return ERR_PTR(err);
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
+#ifdef CONFIG_NFS_V4
+
 /*
- * Set up an NFS4 superblock
+ * Finish setting up a cloned NFS4 superblock
  */
-static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
+static void nfs4_clone_super(struct super_block *sb,
+			    const struct super_block *old_sb)
 {
-	struct nfs_server *server;
-	rpc_authflavor_t authflavour;
-	int err = -EIO;
-
-	sb->s_blocksize_bits = 0;
-	sb->s_blocksize = 0;
-	server = NFS_SB(sb);
-	if (data->rsize != 0)
-		server->rsize = nfs_block_size(data->rsize, NULL);
-	if (data->wsize != 0)
-		server->wsize = nfs_block_size(data->wsize, NULL);
-	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
-	server->caps = NFS_CAP_ATOMIC_OPEN;
-
-	server->acregmin = data->acregmin*HZ;
-	server->acregmax = data->acregmax*HZ;
-	server->acdirmin = data->acdirmin*HZ;
-	server->acdirmax = data->acdirmax*HZ;
-
-	/* Now create transport and client */
-	authflavour = RPC_AUTH_UNIX;
-	if (data->auth_flavourlen != 0) {
-		if (data->auth_flavourlen != 1) {
-			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
-					__FUNCTION__, data->auth_flavourlen);
-			err = -EINVAL;
-			goto out_fail;
-		}
-		if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) {
-			err = -EFAULT;
-			goto out_fail;
-		}
-	}
-
-	server->client = nfs4_create_client(server, data->timeo, data->retrans,
-					    data->proto, authflavour);
-	if (IS_ERR(server->client)) {
-		err = PTR_ERR(server->client);
-			dprintk("%s: cannot create RPC client. Error = %d\n",
-					__FUNCTION__, err);
-			goto out_fail;
-	}
-
+	sb->s_blocksize_bits = old_sb->s_blocksize_bits;
+	sb->s_blocksize = old_sb->s_blocksize;
+	sb->s_maxbytes = old_sb->s_maxbytes;
 	sb->s_time_gran = 1;
-
-	sb->s_op = &nfs4_sops;
-	err = nfs_sb_init(sb, authflavour);
-
- out_fail:
-	return err;
+	sb->s_op = old_sb->s_op;
+ 	nfs_initialise_sb(sb);
 }
 
-static int nfs4_compare_super(struct super_block *sb, void *data)
+/*
+ * Set up an NFS4 superblock
+ */
+static void nfs4_fill_super(struct super_block *sb)
 {
-	struct nfs_server *server = data;
-	struct nfs_server *old = NFS_SB(sb);
-
-	if (strcmp(server->hostname, old->hostname) != 0)
-		return 0;
-	if (strcmp(server->mnt_path, old->mnt_path) != 0)
-		return 0;
-	return 1;
+	sb->s_time_gran = 1;
+	sb->s_op = &nfs4_sops;
+	nfs_initialise_sb(sb);
 }
 
-static void *
-nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
+static void *nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 {
 	void *p = NULL;
 
@@ -1157,14 +791,22 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen)
 	return dst;
 }
 
+/*
+ * Get the superblock for an NFS4 mountpoint
+ */
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	int error;
-	struct nfs_server *server;
-	struct super_block *s;
 	struct nfs4_mount_data *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct sockaddr_in addr;
+	rpc_authflavor_t authflavour;
+	struct nfs_fh mntfh;
+	struct dentry *mntroot;
+	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
 	void *p;
+	int error;
 
 	if (data == NULL) {
 		dprintk("%s: missing data argument\n", __FUNCTION__);
@@ -1175,75 +817,107 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		return -EINVAL;
 	}
 
-	server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
-	if (!server)
-		return -ENOMEM;
-	/* Zero out the NFS state stuff */
-	init_nfsv4_state(server);
-	server->client = server->client_acl = ERR_PTR(-EINVAL);
+	/* We now require that the mount process passes the remote address */
+	if (data->host_addrlen != sizeof(addr))
+		return -EINVAL;
+
+	if (copy_from_user(&addr, data->host_addr, sizeof(addr)))
+		return -EFAULT;
+
+	if (addr.sin_family != AF_INET ||
+	    addr.sin_addr.s_addr == INADDR_ANY
+	    ) {
+		dprintk("%s: mount program didn't pass remote IP address!\n",
+				__FUNCTION__);
+		return -EINVAL;
+	}
+
+	/* Grab the authentication type */
+	authflavour = RPC_AUTH_UNIX;
+	if (data->auth_flavourlen != 0) {
+		if (data->auth_flavourlen != 1) {
+			dprintk("%s: Invalid number of RPC auth flavours %d.\n",
+					__FUNCTION__, data->auth_flavourlen);
+			error = -EINVAL;
+			goto out_err_noserver;
+		}
+
+		if (copy_from_user(&authflavour, data->auth_flavours,
+				   sizeof(authflavour))) {
+			error = -EFAULT;
+			goto out_err_noserver;
+		}
+	}
 
 	p = nfs_copy_user_string(NULL, &data->hostname, 256);
 	if (IS_ERR(p))
 		goto out_err;
-	server->hostname = p;
+	hostname = p;
 
 	p = nfs_copy_user_string(NULL, &data->mnt_path, 1024);
 	if (IS_ERR(p))
 		goto out_err;
-	server->mnt_path = p;
+	mntpath = p;
 
-	p = nfs_copy_user_string(server->ip_addr, &data->client_addr,
-			sizeof(server->ip_addr) - 1);
+	dprintk("MNTPATH: %s\n", mntpath);
+
+	p = nfs_copy_user_string(ip_addr, &data->client_addr,
+				 sizeof(ip_addr) - 1);
 	if (IS_ERR(p))
 		goto out_err;
 
-	/* We now require that the mount process passes the remote address */
-	if (data->host_addrlen != sizeof(server->addr)) {
-		error = -EINVAL;
-		goto out_free;
-	}
-	if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) {
-		error = -EFAULT;
-		goto out_free;
-	}
-	if (server->addr.sin_family != AF_INET ||
-	    server->addr.sin_addr.s_addr == INADDR_ANY) {
-		dprintk("%s: mount program didn't pass remote IP address!\n",
-				__FUNCTION__);
-		error = -EINVAL;
-		goto out_free;
+	/* Get a volume representation */
+	server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr,
+				    authflavour, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
 
-	s = sget(fs_type, nfs4_compare_super, nfs_set_super, server);
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
 	if (IS_ERR(s)) {
 		error = PTR_ERR(s);
 		goto out_free;
 	}
 
-	if (s->s_root) {
-		kfree(server->mnt_path);
-		kfree(server->hostname);
-		kfree(server);
-		return simple_set_mnt(mnt, s);
-	}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
 
-	s->s_flags = flags;
+		nfs4_fill_super(s);
+	} else {
+		nfs_free_server(server);
+	}
 
-	error = nfs4_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-	if (error) {
-		up_write(&s->s_umount);
-		deactivate_super(s);
-		return error;
+	mntroot = nfs4_get_root(s, &mntfh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
 	}
+
 	s->s_flags |= MS_ACTIVE;
-	return simple_set_mnt(mnt, s);
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+	kfree(mntpath);
+	kfree(hostname);
+	return 0;
+
 out_err:
 	error = PTR_ERR(p);
+	goto out_err_noserver;
+
 out_free:
-	kfree(server->mnt_path);
-	kfree(server->hostname);
-	kfree(server);
+	nfs_free_server(server);
+out_err_noserver:
+	kfree(mntpath);
+	kfree(hostname);
 	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	goto out_err_noserver;
 }
 
 static void nfs4_kill_super(struct super_block *sb)
@@ -1254,133 +928,140 @@ static void nfs4_kill_super(struct super_block *sb)
 	kill_anon_super(sb);
 
 	nfs4_renewd_prepare_shutdown(server);
-
-	if (server->client != NULL && !IS_ERR(server->client))
-		rpc_shutdown_client(server->client);
-
-	destroy_nfsv4_state(server);
-
-	nfs_free_iostats(server->io_stats);
-	kfree(server->hostname);
-	kfree(server);
-	nfs_release_automount_timer();
+	nfs_free_server(server);
 }
 
 /*
- * Constructs the SERVER-side path
+ * Clone an NFS4 server record on xdev traversal (FSID-change)
  */
-static inline char *nfs4_dup_path(const struct dentry *dentry)
+static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
+			    const char *dev_name, void *raw_data,
+			    struct vfsmount *mnt)
 {
-	char *page = (char *) __get_free_page(GFP_USER);
-	char *path;
+	struct nfs_clone_mount *data = raw_data;
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	int error;
 
-	path = nfs4_path(dentry, page, PAGE_SIZE);
-	if (!IS_ERR(path)) {
-		int len = PAGE_SIZE + page - path;
-		char *tmp = path;
+	dprintk("--> nfs4_xdev_get_sb()\n");
 
-		path = kmalloc(len, GFP_KERNEL);
-		if (path)
-			memcpy(path, tmp, len);
-		else
-			path = ERR_PTR(-ENOMEM);
+	/* create a new volume representation */
+	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
 	}
-	free_page((unsigned long)page);
-	return path;
-}
 
-static struct super_block *nfs4_clone_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	const struct dentry *dentry = data->dentry;
-	struct nfs_client *clp = server->nfs_client;
-	struct super_block *sb;
-
-	server->fsid = data->fattr->fsid;
-	nfs_copy_fh(&server->fh, data->fh);
-	server->mnt_path = nfs4_dup_path(dentry);
-	if (IS_ERR(server->mnt_path)) {
-		sb = (struct super_block *)server->mnt_path;
-		goto err;
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
 	}
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	nfs4_server_capabilities(server, &server->fh);
-
-	down_write(&clp->cl_sem);
-	list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks);
-	up_write(&clp->cl_sem);
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
 
-static int nfs_clone_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
-{
-	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_clone_sb, nfs_clone_server, mnt);
-}
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
 
-static struct super_block *nfs4_referral_sb(struct nfs_server *server, struct nfs_clone_mount *data)
-{
-	struct super_block *sb = ERR_PTR(-ENOMEM);
-	int len;
-
-	len = strlen(data->mnt_path) + 1;
-	server->mnt_path = kmalloc(len, GFP_KERNEL);
-	if (server->mnt_path == NULL)
-		goto err;
-	memcpy(server->mnt_path, data->mnt_path, len);
-	memcpy(&server->addr, data->addr, sizeof(struct sockaddr_in));
-
-	sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server);
-	if (IS_ERR(sb) || sb->s_root)
-		goto free_path;
-	return sb;
-free_path:
-	kfree(server->mnt_path);
-err:
-	server->mnt_path = NULL;
-	return sb;
-}
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_clone_super(s, data->sb);
+	}
 
-static struct nfs_server *nfs4_referral_server(struct super_block *sb, struct nfs_clone_mount *data)
-{
-	struct nfs_server *server = NFS_SB(sb);
-	int proto, timeo, retrans;
-	void *err;
-
-	proto = IPPROTO_TCP;
-	/* Since we are following a referral and there may be alternatives,
-	   set the timeouts and retries to low values */
-	timeo = 2;
-	retrans = 1;
-
-	nfs_put_client(server->nfs_client);
-	server->nfs_client = NULL;
-	server->client = nfs4_create_client(server, timeo, retrans, proto,
-					    data->authflavor);
-	if (IS_ERR((err = server->client)))
-		goto out_err;
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
 
-	sb->s_time_gran = 1;
-	sb->s_op = &nfs4_sops;
-	err = ERR_PTR(nfs_sb_init(sb, data->authflavor));
-	if (!IS_ERR(err))
-		return server;
-out_err:
-	return (struct nfs_server *)err;
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-static int nfs_referral_nfs4_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data,
+				struct vfsmount *mnt)
 {
 	struct nfs_clone_mount *data = raw_data;
-	return nfs_clone_generic_sb(data, nfs4_referral_sb, nfs4_referral_server, mnt);
+	struct super_block *s;
+	struct nfs_server *server;
+	struct dentry *mntroot;
+	struct nfs_fh mntfh;
+	int error;
+
+	dprintk("--> nfs4_referral_get_sb()\n");
+
+	/* create a new volume representation */
+	server = nfs4_create_referral_server(data, &mntfh);
+	if (IS_ERR(server)) {
+		error = PTR_ERR(server);
+		goto out_err_noserver;
+	}
+
+	/* Get a superblock - note that we may end up sharing one that already exists */
+	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
+	if (IS_ERR(s)) {
+		error = PTR_ERR(s);
+		goto out_err_nosb;
+	}
+
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
+	if (!s->s_root) {
+		/* initial superblock/root creation */
+		s->s_flags = flags;
+		nfs4_fill_super(s);
+	}
+
+	mntroot = nfs4_get_root(s, data->fh);
+	if (IS_ERR(mntroot)) {
+		error = PTR_ERR(mntroot);
+		goto error_splat_super;
+	}
+
+	s->s_flags |= MS_ACTIVE;
+	mnt->mnt_sb = s;
+	mnt->mnt_root = mntroot;
+
+	dprintk("<-- nfs4_referral_get_sb() = 0\n");
+	return 0;
+
+out_err_nosb:
+	nfs_free_server(server);
+out_err_noserver:
+	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
+	return error;
+
+error_splat_super:
+	up_write(&s->s_umount);
+	deactivate_super(s);
+	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
+	return error;
 }
 
-#endif
+#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7084ac9..453d446 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1273,7 +1273,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 		if (time_before(complain, jiffies)) {
 			dprintk("NFS: faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
-				NFS_SERVER(data->inode)->hostname,
+				NFS_SERVER(data->inode)->nfs_client->cl_hostname,
 				resp->verf->committed, argp->stable);
 			complain = jiffies + 300 * HZ;
 		}
-- 
cgit v1.1


From 6aaca566503296a73f956908ec98173946134fe2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 22 Aug 2006 20:06:13 -0400
Subject: NFS: Add server and volume lists to /proc

Make two new proc files available:

	/proc/fs/nfsfs/servers
	/proc/fs/nfsfs/volumes

The first lists the servers with which we are currently dealing (struct
nfs_client), and the second lists the volumes we have on those servers (struct
nfs_server).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c   | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/inode.c    |   7 ++
 fs/nfs/internal.h |  12 +++
 3 files changed, 303 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index dafba60..27f6478 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1148,3 +1148,287 @@ out_free_server:
 	dprintk("<-- nfs_clone_server() = error %d\n", error);
 	return ERR_PTR(error);
 }
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *proc_fs_nfs;
+
+static int nfs_server_list_open(struct inode *inode, struct file *file);
+static void *nfs_server_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_server_list_stop(struct seq_file *p, void *v);
+static int nfs_server_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_server_list_ops = {
+	.start	= nfs_server_list_start,
+	.next	= nfs_server_list_next,
+	.stop	= nfs_server_list_stop,
+	.show	= nfs_server_list_show,
+};
+
+static struct file_operations nfs_server_list_fops = {
+	.open		= nfs_server_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+static int nfs_volume_list_open(struct inode *inode, struct file *file);
+static void *nfs_volume_list_start(struct seq_file *p, loff_t *pos);
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
+static void nfs_volume_list_stop(struct seq_file *p, void *v);
+static int nfs_volume_list_show(struct seq_file *m, void *v);
+
+static struct seq_operations nfs_volume_list_ops = {
+	.start	= nfs_volume_list_start,
+	.next	= nfs_volume_list_next,
+	.stop	= nfs_volume_list_stop,
+	.show	= nfs_volume_list_show,
+};
+
+static struct file_operations nfs_volume_list_fops = {
+	.open		= nfs_volume_list_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+};
+
+/*
+ * open "/proc/fs/nfsfs/servers" which provides a summary of servers with which
+ * we're dealing
+ */
+static int nfs_server_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_server_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the server list and return the first item
+ */
+static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_client_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * move to next server
+ */
+static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_client_list.next : _p->next;
+
+	return _p != &nfs_client_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_server_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_server_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_client *clp;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT USE HOSTNAME\n");
+		return 0;
+	}
+
+	/* display one transport per line on subsequent lines */
+	clp = list_entry(v, struct nfs_client, cl_share_link);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   atomic_read(&clp->cl_count),
+		   clp->cl_hostname);
+
+	return 0;
+}
+
+/*
+ * open "/proc/fs/nfsfs/volumes" which provides a summary of extant volumes
+ */
+static int nfs_volume_list_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *m;
+	int ret;
+
+	ret = seq_open(file, &nfs_volume_list_ops);
+	if (ret < 0)
+		return ret;
+
+	m = file->private_data;
+	m->private = PDE(inode)->data;
+
+	return 0;
+}
+
+/*
+ * set up the iterator to start reading from the volume list and return the first item
+ */
+static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
+{
+	struct list_head *_p;
+	loff_t pos = *_pos;
+
+	/* lock the list against modification */
+	spin_lock(&nfs_client_lock);
+
+	/* allow for the header line */
+	if (!pos)
+		return SEQ_START_TOKEN;
+	pos--;
+
+	/* find the n'th element in the list */
+	list_for_each(_p, &nfs_volume_list)
+		if (!pos--)
+			break;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * move to next volume
+ */
+static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
+{
+	struct list_head *_p;
+
+	(*pos)++;
+
+	_p = v;
+	_p = (v == SEQ_START_TOKEN) ? nfs_volume_list.next : _p->next;
+
+	return _p != &nfs_volume_list ? _p : NULL;
+}
+
+/*
+ * clean up after reading from the transports list
+ */
+static void nfs_volume_list_stop(struct seq_file *p, void *v)
+{
+	spin_unlock(&nfs_client_lock);
+}
+
+/*
+ * display a header line followed by a load of call lines
+ */
+static int nfs_volume_list_show(struct seq_file *m, void *v)
+{
+	struct nfs_server *server;
+	struct nfs_client *clp;
+	char dev[8], fsid[17];
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "NV SERVER   PORT DEV     FSID\n");
+		return 0;
+	}
+	/* display one transport per line on subsequent lines */
+	server = list_entry(v, struct nfs_server, master_link);
+	clp = server->nfs_client;
+
+	snprintf(dev, 8, "%u:%u",
+		 MAJOR(server->s_dev), MINOR(server->s_dev));
+
+	snprintf(fsid, 17, "%llx:%llx",
+		 server->fsid.major, server->fsid.minor);
+
+	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
+		   clp->cl_nfsversion,
+		   NIPQUAD(clp->cl_addr.sin_addr),
+		   ntohs(clp->cl_addr.sin_port),
+		   dev,
+		   fsid);
+
+	return 0;
+}
+
+/*
+ * initialise the /proc/fs/nfsfs/ directory
+ */
+int __init nfs_fs_proc_init(void)
+{
+	struct proc_dir_entry *p;
+
+	proc_fs_nfs = proc_mkdir("nfsfs", proc_root_fs);
+	if (!proc_fs_nfs)
+		goto error_0;
+
+	proc_fs_nfs->owner = THIS_MODULE;
+
+	/* a file of servers with which we're dealing */
+	p = create_proc_entry("servers", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_1;
+
+	p->proc_fops = &nfs_server_list_fops;
+	p->owner = THIS_MODULE;
+
+	/* a file of volumes that we have mounted */
+	p = create_proc_entry("volumes", S_IFREG|S_IRUGO, proc_fs_nfs);
+	if (!p)
+		goto error_2;
+
+	p->proc_fops = &nfs_volume_list_fops;
+	p->owner = THIS_MODULE;
+	return 0;
+
+error_2:
+	remove_proc_entry("servers", proc_fs_nfs);
+error_1:
+	remove_proc_entry("nfsfs", proc_root_fs);
+error_0:
+	return -ENOMEM;
+}
+
+/*
+ * clean up the /proc/fs/nfsfs/ directory
+ */
+void nfs_fs_proc_exit(void)
+{
+	remove_proc_entry("volumes", proc_fs_nfs);
+	remove_proc_entry("servers", proc_fs_nfs);
+	remove_proc_entry("nfsfs", proc_root_fs);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a547c58..cb5c65f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1141,6 +1141,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfs_fs_proc_init();
+	if (err)
+		goto out5;
+
 	err = nfs_init_nfspagecache();
 	if (err)
 		goto out4;
@@ -1181,6 +1185,8 @@ out2:
 out3:
 	nfs_destroy_nfspagecache();
 out4:
+	nfs_fs_proc_exit();
+out5:
 	return err;
 }
 
@@ -1195,6 +1201,7 @@ static void __exit exit_nfs_fs(void)
 	rpc_proc_unregister("nfs");
 #endif
 	unregister_nfs_fs();
+	nfs_fs_proc_exit();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e73ba4f..bea0b01 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -47,6 +47,18 @@ extern void nfs_free_server(struct nfs_server *server);
 extern struct nfs_server *nfs_clone_server(struct nfs_server *,
 					   struct nfs_fh *,
 					   struct nfs_fattr *);
+#ifdef CONFIG_PROC_FS
+extern int __init nfs_fs_proc_init(void);
+extern void nfs_fs_proc_exit(void);
+#else
+static inline int nfs_fs_proc_init(void)
+{
+	return 0;
+}
+static inline void nfs_fs_proc_exit(void)
+{
+}
+#endif
 
 /* nfs4namespace.c */
 #ifdef CONFIG_NFS_V4
-- 
cgit v1.1


From 27ba851244f627a302d0fc6469d1ad413fc34fcb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 30 Jul 2006 14:40:56 -0400
Subject: NFS: Fix error handling

Fix an error handling problem: nfs_put_client() can be given a NULL pointer if
nfs_free_server() is asked to destroy a partially initialised record.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 27f6478..700bd58 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -208,6 +208,9 @@ static void nfs_free_client(struct nfs_client *clp)
  */
 void nfs_put_client(struct nfs_client *clp)
 {
+	if (!clp)
+		return;
+
 	dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
 
 	if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) {
-- 
cgit v1.1


From 738a35195941ecf604d3070e2a053e1df3de350b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 30 Jul 2006 14:58:27 -0400
Subject: NFS: Secure the roots of the NFS subtrees in a shared superblock

Invoke security_d_instantiate() on root dentries after allocating them with
dentry_alloc_anon().  Normally dentry_alloc_root() would do that, but we don't
call that as we don't want to assign a name to the root dentry at this point
(we may discover the real name later).

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/getroot.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 977e590..76b08ae 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -33,6 +33,7 @@
 #include <linux/vfs.h>
 #include <linux/namei.h>
 #include <linux/namespace.h>
+#include <linux/security.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -109,6 +110,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	security_d_instantiate(mntroot, inode);
+
 	if (!mntroot->d_op)
 		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
@@ -296,6 +299,8 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
 		return ERR_PTR(-ENOMEM);
 	}
 
+	security_d_instantiate(mntroot, inode);
+
 	if (!mntroot->d_op)
 		mntroot->d_op = server->nfs_client->rpc_ops->dentry_ops;
 
-- 
cgit v1.1


From 36b15c54cd0d6f707a3ac03e4a2a60bb530a95b9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:14 -0400
Subject: NFS: Ensure NFSv2/v3 mounts respect the NFS_MOUNT_SECFLAVOUR flag

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 867b5dc..97cfb14 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -471,12 +471,14 @@ static int nfs_validate_mount_data(struct nfs_mount_data *data,
 						data->version);
 				return -EINVAL;
 			}
-			/* Fill in pseudoflavor for mount version < 5 */
-			data->pseudoflavor = RPC_AUTH_UNIX;
 		case 5:
 			memset(data->context, 0, sizeof(data->context));
 	}
 
+	/* Set the pseudoflavor */
+	if (!(data->flags & NFS_MOUNT_SECFLAVOUR))
+		data->pseudoflavor = RPC_AUTH_UNIX;
+
 #ifndef CONFIG_NFS_V3
 	/* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */
 	if (data->flags & NFS_MOUNT_VER3) {
-- 
cgit v1.1


From 9c5bf38d85a31b946664bcc21078ef5bb10672f7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:14 -0400
Subject: NFS: Fix nfs_alloc_client()

The scheme to indicate which services have been started up appears to be
seriously broken.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 700bd58..471d975 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -113,9 +113,9 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	if (error < 0) {
 		dprintk("%s: couldn't start rpciod! Error = %d\n",
 				__FUNCTION__, error);
-		__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
 		goto error_1;
 	}
+	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
 
 	if (nfsversion == 4) {
 		if (nfs_callback_up() < 0)
@@ -153,8 +153,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	return clp;
 
 error_3:
-	nfs_callback_down();
-	__clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+		nfs_callback_down();
 error_2:
 	rpciod_down();
 	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
@@ -195,7 +195,7 @@ static void nfs_free_client(struct nfs_client *clp)
 		nfs_callback_down();
 
 	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-	rpciod_down();
+		rpciod_down();
 
 	kfree(clp->cl_hostname);
 	kfree(clp);
@@ -881,9 +881,9 @@ static int nfs4_init_client(struct nfs_client *clp,
 	if (error < 0) {
 		dprintk("%s: failed to create idmapper. Error = %d\n",
 			__FUNCTION__, error);
-		__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 		goto error;
 	}
+	__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
 
 	nfs_mark_client_ready(clp, NFS_CS_READY);
 	return 0;
-- 
cgit v1.1


From 44c31be261540acf66ddd730631ead8009cc361d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:17 -0400
Subject: LOCKD: Teach lockd to use the new rpc_peeraddr() API

Hide the details of how the RPC client stores remote peer addresses from
the Network Lock Manager.

Test plan:
Destructive testing (unplugging the network temporarily).  Connectathon
with UDP and TCP.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 89ba0df..50dbb67 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -151,11 +151,13 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
+	struct rpc_clnt		*client = NFS_CLIENT(inode);
+	struct sockaddr_in	addr;
 	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status, proto, vers;
+	int			status, vers;
 
 	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
 	if (NFS_PROTO(inode)->version > 3) {
@@ -163,10 +165,8 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		return -ENOLCK;
 	}
 
-	/* Retrieve transport protocol from NFS client */
-	proto = NFS_CLIENT(inode)->cl_xprt->prot;
-
-	host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+	rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
+	host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers);
 	if (host == NULL)
 		return -ENOLCK;
 
-- 
cgit v1.1


From 39d7bbcb5ba5e9d8d658b70903dd7939400e57db Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:18 -0400
Subject: SUNRPC: remove extraneous header inclusions

include/linux/sunrpc/clnt.h already includes include/linux/sunrpc/xprt.h.
We can remove xprt.h from source files that already include clnt.h.
Likewise include/linux/sunrpc/timer.h.

Test plan:
Compile kernel with CONFIG_NFS enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/mount_clnt.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 445abb4..41274874 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -14,7 +14,6 @@
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/nfs_fs.h>
 
-- 
cgit v1.1


From 6ca948238724c945bd353f51d54ae7d285f3889f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:19 -0400
Subject: SUNRPC: Clean-up after previous patches.

Remove some unused macros related to accessing an RPC peer address

Test plan:
Compile kernel with CONFIG_NFS option enabled.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 38b0e8a..a516a01 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -26,7 +26,6 @@
 #define NLM_HOST_REBIND		(60 * HZ)
 #define NLM_HOST_EXPIRE		((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
 #define NLM_HOST_COLLECT	((nrhosts > NLM_HOST_MAX)? 120 * HZ :  60 * HZ)
-#define NLM_HOST_ADDR(sv)	(&(sv)->s_nlmclnt->cl_xprt->addr)
 
 static struct nlm_host *	nlm_hosts[NLM_HOST_NRHASH];
 static unsigned long		next_gc;
-- 
cgit v1.1


From e1ec78928b4d5a31b7a847e65c6009f4229f7c0f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:20 -0400
Subject: LOCKD: Convert to use new rpc_create() API

Replace xprt_create_proto/rpc_create_client with new rpc_create()
interface in the Network Lock Manager.

Note that the semantics of NLM transports is now "hard" instead of "soft"
to provide a better guarantee that lock requests will get to the server.

Test plan:
Repeated runs of Connectathon locking suite.  Check network trace to ensure
NLM requests are working correctly.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 50 +++++++++++++++++++++++++++-----------------------
 fs/lockd/mon.c  | 41 +++++++++++++++++------------------------
 2 files changed, 44 insertions(+), 47 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index a516a01..703fb03 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -166,7 +166,6 @@ struct rpc_clnt *
 nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
-	struct rpc_xprt	*xprt;
 
 	dprintk("lockd: nlm_bind_host(%08x)\n",
 			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
@@ -178,7 +177,6 @@ nlm_bind_host(struct nlm_host *host)
 	 * RPC rebind is required
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
-		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
@@ -186,31 +184,37 @@ nlm_bind_host(struct nlm_host *host)
 					host->h_nextrebind - jiffies);
 		}
 	} else {
-		xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL);
-		if (IS_ERR(xprt))
-			goto forgetit;
-
-		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->resvport = 1;	/* NLM requires a reserved port */
-
-		/* Existing NLM servers accept AUTH_UNIX only */
-		clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
-					host->h_version, RPC_AUTH_UNIX);
-		if (IS_ERR(clnt))
-			goto forgetit;
-		clnt->cl_autobind = 1;	/* turn on pmap queries */
-		clnt->cl_softrtry = 1; /* All queries are soft */
-
-		host->h_rpcclnt = clnt;
+		unsigned long increment = nlmsvc_timeout * HZ;
+		struct rpc_timeout timeparms = {
+			.to_initval	= increment,
+			.to_increment	= increment,
+			.to_maxval	= increment * 6UL,
+			.to_retries	= 5U,
+		};
+		struct rpc_create_args args = {
+			.protocol	= host->h_proto,
+			.address	= (struct sockaddr *)&host->h_addr,
+			.addrsize	= sizeof(host->h_addr),
+			.timeout	= &timeparms,
+			.servername	= host->h_name,
+			.program	= &nlm_program,
+			.version	= host->h_version,
+			.authflavor	= RPC_AUTH_UNIX,
+			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+					   RPC_CLNT_CREATE_AUTOBIND),
+		};
+
+		clnt = rpc_create(&args);
+		if (!IS_ERR(clnt))
+			host->h_rpcclnt = clnt;
+		else {
+			printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
+			clnt = NULL;
+		}
 	}
 
 	mutex_unlock(&host->h_mutex);
 	return clnt;
-
-forgetit:
-	printk("lockd: couldn't create RPC handle for %s\n", host->h_name);
-	mutex_unlock(&host->h_mutex);
-	return NULL;
 }
 
 /*
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3fc683f..5954dcb 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -109,30 +109,23 @@ nsm_unmonitor(struct nlm_host *host)
 static struct rpc_clnt *
 nsm_create(void)
 {
-	struct rpc_xprt		*xprt;
-	struct rpc_clnt		*clnt;
-	struct sockaddr_in	sin;
-
-	sin.sin_family = AF_INET;
-	sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	sin.sin_port = 0;
-
-	xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-	xprt->resvport = 1;	/* NSM requires a reserved port */
-
-	clnt = rpc_create_client(xprt, "localhost",
-				&nsm_program, SM_VERSION,
-				RPC_AUTH_NULL);
-	if (IS_ERR(clnt))
-		goto out_err;
-	clnt->cl_softrtry = 1;
-	clnt->cl_oneshot  = 1;
-	return clnt;
-
-out_err:
-	return clnt;
+	struct sockaddr_in	sin = {
+		.sin_family	= AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+		.sin_port	= 0,
+	};
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_UDP,
+		.address	= (struct sockaddr *)&sin,
+		.addrsize	= sizeof(sin),
+		.servername	= "localhost",
+		.program	= &nsm_program,
+		.version	= SM_VERSION,
+		.authflavor	= RPC_AUTH_NULL,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
-- 
cgit v1.1


From 41877d207c46f050b709f452703ade20c3b4a096 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:20 -0400
Subject: NFS: Convert NFS client to use new rpc_create() API

Convert NFS client mount logic to use rpc_create() instead of the old
xprt_create_proto/rpc_create_client API.

Test plan:
Mount stress tests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 471d975..12941a8 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -401,8 +401,17 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 						rpc_authflavor_t flavor)
 {
 	struct rpc_timeout	timeparms;
-	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
+	struct rpc_create_args args = {
+		.protocol	= proto,
+		.address	= (struct sockaddr *)&clp->cl_addr,
+		.addrsize	= sizeof(clp->cl_addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_hostname,
+		.program	= &nfs_program,
+		.version	= clp->rpc_ops->version,
+		.authflavor	= flavor,
+	};
 
 	if (!IS_ERR(clp->cl_rpcclient))
 		return 0;
@@ -411,27 +420,13 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	clp->retrans_timeo = timeparms.to_initval;
 	clp->retrans_count = timeparms.to_retries;
 
-	/* create transport and client */
-	xprt = xprt_create_proto(proto, &clp->cl_addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("%s: cannot create RPC transport. Error = %ld\n",
-				__FUNCTION__, PTR_ERR(xprt));
-		return PTR_ERR(xprt);
-	}
-
-	/* Bind to a reserved port! */
-	xprt->resvport = 1;
-	/* Create the client RPC handle */
-	clnt = rpc_create_client(xprt, clp->cl_hostname, &nfs_program,
-				 clp->rpc_ops->version, RPC_AUTH_UNIX);
+	clnt = rpc_create(&args);
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(clnt));
 		return PTR_ERR(clnt);
 	}
 
-	clnt->cl_intr     = 1;
-	clnt->cl_softrtry = 1;
 	clp->cl_rpcclient = clnt;
 	return 0;
 }
-- 
cgit v1.1


From ae5c79476f36512d1100e162606bb5691f2cce5a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:21 -0400
Subject: NFSD: Convert NFS server callback logic to use new rpc_create API

Replace xprt_create_proto/rpc_create_client call in NFS server callback
functions to use new rpc_create() API.

Test plan:
NFSv4 delegation functionality tests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4callback.c | 66 +++++++++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 54b37b1..8583d99 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -375,16 +375,28 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 {
 	struct sockaddr_in	addr;
 	struct nfs4_callback    *cb = &clp->cl_callback;
-	struct rpc_timeout	timeparms;
-	struct rpc_xprt *	xprt;
+	struct rpc_timeout	timeparms = {
+		.to_initval	= (NFSD_LEASE_TIME/4) * HZ,
+		.to_retries	= 5,
+		.to_maxval	= (NFSD_LEASE_TIME/2) * HZ,
+		.to_exponential	= 1,
+	};
 	struct rpc_program *	program = &cb->cb_program;
-	struct rpc_stat *	stat = &cb->cb_stat;
-	struct rpc_clnt *	clnt;
+	struct rpc_create_args args = {
+		.protocol	= IPPROTO_TCP,
+		.address	= (struct sockaddr *)&addr,
+		.addrsize	= sizeof(addr),
+		.timeout	= &timeparms,
+		.servername	= clp->cl_name.data,
+		.program	= program,
+		.version	= nfs_cb_version[1]->number,
+		.authflavor	= RPC_AUTH_UNIX,	/* XXX: need AUTH_GSS... */
+		.flags		= (RPC_CLNT_CREATE_NOPING),
+	};
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
 	};
-	char                    hostname[32];
 	int status;
 
 	if (atomic_read(&cb->cb_set))
@@ -396,51 +408,27 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	addr.sin_port = htons(cb->cb_port);
 	addr.sin_addr.s_addr = htonl(cb->cb_addr);
 
-	/* Initialize timeout */
-	timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
-	timeparms.to_retries = 0;
-	timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
-	timeparms.to_exponential = 1;
-
-	/* Create RPC transport */
-	xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms);
-	if (IS_ERR(xprt)) {
-		dprintk("NFSD: couldn't create callback transport!\n");
-		goto out_err;
-	}
-
 	/* Initialize rpc_program */
 	program->name = "nfs4_cb";
 	program->number = cb->cb_prog;
 	program->nrvers = ARRAY_SIZE(nfs_cb_version);
 	program->version = nfs_cb_version;
-	program->stats = stat;
+	program->stats = &cb->cb_stat;
 
 	/* Initialize rpc_stat */
-	memset(stat, 0, sizeof(struct rpc_stat));
-	stat->program = program;
-
-	/* Create RPC client
- 	 *
-	 * XXX AUTH_UNIX only - need AUTH_GSS....
-	 */
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-	clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
-	if (IS_ERR(clnt)) {
+	memset(program->stats, 0, sizeof(cb->cb_stat));
+	program->stats->program = program;
+
+	/* Create RPC client */
+	cb->cb_client = rpc_create(&args);
+	if (!cb->cb_client) {
 		dprintk("NFSD: couldn't create callback client\n");
 		goto out_err;
 	}
-	clnt->cl_intr = 0;
-	clnt->cl_softrtry = 1;
 
 	/* Kick rpciod, put the call on the wire. */
-
-	if (rpciod_up() != 0) {
-		dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+	if (rpciod_up() != 0)
 		goto out_clnt;
-	}
-
-	cb->cb_client = clnt;
 
 	/* the task holds a reference to the nfs4_client struct */
 	atomic_inc(&clp->cl_count);
@@ -448,7 +436,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -462,7 +450,7 @@ out_rpciod:
 	rpciod_down();
 	cb->cb_client = NULL;
 out_clnt:
-	rpc_shutdown_client(clnt);
+	rpc_shutdown_client(cb->cb_client);
 out_err:
 	dprintk("NFSD: warning: no callback path to client %.*s\n",
 		(int)clp->cl_name.len, clp->cl_name.data);
-- 
cgit v1.1


From d3db90e270791b21cd00d3c094884bffa907cc9e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:22 -0400
Subject: NFS: remove a no-longer-needed error check in nfs_symlink()

In the early days of NFS, there was no duplicate reply cache on the server.
Thus retransmitted non-idempotent requests often found that the request had
already completed on the server.  To avoid passing an unanticipated return
code to unsuspecting applications, NFS clients would often shunt error
codes that implied the request had been retried but already completed.

Thanks to NFS over TCP, duplicate reply caches on the server, and network
performance and reliability improvements, it is safe to remove such checks.

Test plan:
None.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9b496ef..084e8cb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1476,14 +1476,10 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
 					  &attr, &sym_fh, &sym_attr);
 	nfs_end_data_update(dir);
-	if (!error) {
+	if (!error)
 		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	} else {
-		if (error == -EEXIST)
-			printk("nfs_proc_symlink: %s/%s already exists??\n",
-			       dentry->d_parent->d_name.name, dentry->d_name.name);
+	else
 		d_drop(dentry);
-	}
 	unlock_kernel();
 	return error;
 }
-- 
cgit v1.1


From 4f390c152bc87165da4b1f5b7d870b46fb106d4e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:22 -0400
Subject: NFS: Fix double d_drop in nfs_instantiate() error path

If the LOOKUP or GETATTR in nfs_instantiate fail, nfs_instantiate will do a
d_drop before returning.  But some callers already do a d_drop in the case
of an error return.  Make certain we do only one d_drop in all error paths.

This issue was introduced because over time, the symlink proc API diverged
slightly from the create/mkdir/mknod proc API.  To prevent other coding
mistakes of this type, change the symlink proc API to be more like
create/mkdir/mknod and move the nfs_instantiate call into the symlink proc
routines so it is used in exactly the same way for create, mkdir, mknod,
and symlink.

Test plan:
Connectathon, all versions of NFS.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 16 ++++------------
 fs/nfs/nfs3proc.c | 26 ++++++++++++++++----------
 fs/nfs/nfs4proc.c | 31 ++++++++++++++++---------------
 fs/nfs/proc.c     | 29 +++++++++++++++++++++--------
 4 files changed, 57 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 084e8cb..affd3ae 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1147,23 +1147,20 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 		struct inode *dir = dentry->d_parent->d_inode;
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr);
 		if (error)
-			goto out_err;
+			return error;
 	}
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
 		error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr);
 		if (error < 0)
-			goto out_err;
+			return error;
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	error = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out_err;
+		return error;
 	d_instantiate(dentry, inode);
 	return 0;
-out_err:
-	d_drop(dentry);
-	return error;
 }
 
 /*
@@ -1448,8 +1445,6 @@ static int
 nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
 	struct iattr attr;
-	struct nfs_fattr sym_attr;
-	struct nfs_fh sym_fh;
 	struct qstr qsymname;
 	int error;
 
@@ -1473,12 +1468,9 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
-					  &attr, &sym_fh, &sym_attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
 	nfs_end_data_update(dir);
 	if (!error)
-		error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
-	else
 		d_drop(dentry);
 	unlock_kernel();
 	return error;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 9e8258e..d85ac42 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -544,23 +544,23 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		  struct iattr *sattr, struct nfs_fh *fhandle,
-		  struct nfs_fattr *fattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
+		  struct iattr *sattr)
 {
-	struct nfs_fattr	dir_attr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_attr;
 	struct nfs3_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
 		.topath		= path->name,
 		.tolen		= path->len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
 		.dir_attr	= &dir_attr,
-		.fh		= fhandle,
-		.fattr		= fattr
+		.fh		= &fhandle,
+		.fattr		= &fattr
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_SYMLINK],
@@ -571,11 +571,17 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 
 	if (path->len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
+
+	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
+			path->name);
 	nfs_fattr_init(&dir_attr);
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_post_op_update_inode(dir, &dir_attr);
+	if (status != 0)
+		goto out;
+	status = nfs_instantiate(dentry, &fhandle, &fattr);
+out:
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a825547..2d18eac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2084,24 +2084,24 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
 	return err;
 }
 
-static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct qstr *path, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs_fattr dir_fattr;
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
-		.name = name,
+		.name = &dentry->d_name,
 		.attrs = sattr,
 		.ftype = NF4LNK,
 		.bitmask = server->attr_bitmask,
 	};
 	struct nfs4_create_res res = {
 		.server = server,
-		.fh = fhandle,
-		.fattr = fattr,
+		.fh = &fhandle,
+		.fattr = &fattr,
 		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
@@ -2113,27 +2113,28 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 
 	if (path->len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
+
 	arg.u.symlink = path;
-	nfs_fattr_init(fattr);
+	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (!status)
+	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
-	nfs_post_op_update_inode(dir, res.dir_fattr);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
 	return status;
 }
 
-static int nfs4_proc_symlink(struct inode *dir, struct qstr *name,
-		struct qstr *path, struct iattr *sattr, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr)
+static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
+		struct qstr *path, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, name, path, sattr,
-					fhandle, fattr),
+				_nfs4_proc_symlink(dir, dentry, path, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 5a8b940..0b507bf 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -425,14 +425,15 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
-		 struct iattr *sattr, struct nfs_fh *fhandle,
-		 struct nfs_fattr *fattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
+		 struct iattr *sattr)
 {
+	struct nfs_fh fhandle;
+	struct nfs_fattr fattr;
 	struct nfs_symlinkargs	arg = {
 		.fromfh		= NFS_FH(dir),
-		.fromname	= name->name,
-		.fromlen	= name->len,
+		.fromname	= dentry->d_name.name,
+		.fromlen	= dentry->d_name.len,
 		.topath		= path->name,
 		.tolen		= path->len,
 		.sattr		= sattr
@@ -445,11 +446,23 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 
 	if (path->len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
-	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	nfs_fattr_init(fattr);
-	fhandle->size = 0;
+
+	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
+			path->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
+
+	/*
+	 * V2 SYMLINK requests don't return any attributes.  Setting the
+	 * filehandle size to zero indicates to nfs_instantiate that it
+	 * should fill in the data with a LOOKUP call on the wire.
+	 */
+	if (status == 0) {
+		nfs_fattr_init(&fattr);
+		fhandle.size = 0;
+		status = nfs_instantiate(dentry, &fhandle, &fattr);
+	}
+
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
-- 
cgit v1.1


From 873101b33776780d32610fc4c90c7358a5e98f51 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:23 -0400
Subject: NFS: copy symlinks into page cache before sending NFS SYMLINK request

Currently the NFS client does not cache symlinks it creates.  They get
cached only when the NFS client reads them back from the server.

Copy the symlink into the page cache before sending it.

Test plan:
Connectathon, all NFS versions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 68 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index affd3ae..b483e5d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -30,6 +30,7 @@
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
+#include <linux/pagevec.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 
@@ -1441,39 +1442,88 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	return error;
 }
 
-static int
-nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+/*
+ * To create a symbolic link, most file systems instantiate a new inode,
+ * add a page to it containing the path, then write it out to the disk
+ * using prepare_write/commit_write.
+ *
+ * Unfortunately the NFS client can't create the in-core inode first
+ * because it needs a file handle to create an in-core inode (see
+ * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
+ * symlink request has completed on the server.
+ *
+ * So instead we allocate a raw page, copy the symname into it, then do
+ * the SYMLINK request with the page as the buffer.  If it succeeds, we
+ * now have a new file handle and can instantiate an in-core NFS inode
+ * and move the raw page into its mapping.
+ */
+static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 {
+	struct pagevec lru_pvec;
+	struct page *page;
+	char *kaddr;
 	struct iattr attr;
-	struct qstr qsymname;
+	unsigned int pathlen = strlen(symname);
+	struct qstr qsymname = {
+		.name	= symname,
+		.len	= pathlen,
+	};
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
 		dir->i_ino, dentry->d_name.name, symname);
 
-#ifdef NFS_PARANOIA
-if (dentry->d_inode)
-printk("nfs_proc_symlink: %s/%s not negative!\n",
-dentry->d_parent->d_name.name, dentry->d_name.name);
-#endif
-	/*
-	 * Fill in the sattr for the call.
- 	 * Note: SunOS 4.1.2 crashes if the mode isn't initialized!
-	 */
-	attr.ia_valid = ATTR_MODE;
-	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	if (pathlen > PAGE_SIZE)
+		return -ENAMETOOLONG;
 
-	qsymname.name = symname;
-	qsymname.len  = strlen(symname);
+	attr.ia_mode = S_IFLNK | S_IRWXUGO;
+	attr.ia_valid = ATTR_MODE;
 
 	lock_kernel();
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		unlock_kernel();
+		return -ENOMEM;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memcpy(kaddr, symname, pathlen);
+	if (pathlen < PAGE_SIZE)
+		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	/* XXX: eventually this will pass in {page, pathlen},
+	 *	instead of qsymname; need XDR changes for that */
 	nfs_begin_data_update(dir);
 	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
 	nfs_end_data_update(dir);
-	if (!error)
+	if (error != 0) {
+		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
+			dir->i_sb->s_id, dir->i_ino,
+			dentry->d_name.name, symname, error);
 		d_drop(dentry);
+		__free_page(page);
+		unlock_kernel();
+		return error;
+	}
+
+	/*
+	 * No big deal if we can't add this page to the page cache here.
+	 * READLINK will get the missing page from the server if needed.
+	 */
+	pagevec_init(&lru_pvec, 0);
+	if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0,
+							GFP_KERNEL)) {
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+		SetPageUptodate(page);
+		unlock_page(page);
+	} else
+		__free_page(page);
+
 	unlock_kernel();
-	return error;
+	return 0;
 }
 
 static int 
-- 
cgit v1.1


From 94a6d75320b3681e6e728b70e18bd186cb55e682 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 22 Aug 2006 20:06:23 -0400
Subject: NFS: Use cached page as buffer for NFS symlink requests

Now that we have a copy of the symlink path in the page cache, we can pass
a struct page down to the XDR routines instead of a string buffer.

Test plan:
Connectathon, all NFS versions.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      |  8 +-------
 fs/nfs/nfs2xdr.c  | 21 ++++++++++++++++++---
 fs/nfs/nfs3proc.c | 14 +++++++-------
 fs/nfs/nfs3xdr.c  |  7 +++++--
 fs/nfs/nfs4proc.c | 12 +++++++-----
 fs/nfs/nfs4xdr.c  |  8 ++++----
 fs/nfs/proc.c     | 14 +++++++-------
 7 files changed, 49 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b483e5d..51328ae 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1464,10 +1464,6 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 	char *kaddr;
 	struct iattr attr;
 	unsigned int pathlen = strlen(symname);
-	struct qstr qsymname = {
-		.name	= symname,
-		.len	= pathlen,
-	};
 	int error;
 
 	dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id,
@@ -1493,10 +1489,8 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
 	kunmap_atomic(kaddr, KM_USER0);
 
-	/* XXX: eventually this will pass in {page, pathlen},
-	 *	instead of qsymname; need XDR changes for that */
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->symlink(dir, dentry, &qsymname, &attr);
+	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
 	nfs_end_data_update(dir);
 	if (error != 0) {
 		dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n",
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 67391ee..b49501f 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -51,7 +51,7 @@
 #define NFS_createargs_sz	(NFS_diropargs_sz+NFS_sattr_sz)
 #define NFS_renameargs_sz	(NFS_diropargs_sz+NFS_diropargs_sz)
 #define NFS_linkargs_sz		(NFS_fhandle_sz+NFS_diropargs_sz)
-#define NFS_symlinkargs_sz	(NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz)
+#define NFS_symlinkargs_sz	(NFS_diropargs_sz+1+NFS_sattr_sz)
 #define NFS_readdirargs_sz	(NFS_fhandle_sz+2)
 
 #define NFS_attrstat_sz		(1+NFS_fattr_sz)
@@ -351,11 +351,26 @@ nfs_xdr_linkargs(struct rpc_rqst *req, u32 *p, struct nfs_linkargs *args)
 static int
 nfs_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_symlinkargs *args)
 {
+	struct xdr_buf *sndbuf = &req->rq_snd_buf;
+	size_t pad;
+
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
+	sndbuf->len = xdr_adjust_iovec(sndbuf->head, p);
+
+	xdr_encode_pages(sndbuf, args->pages, 0, args->pathlen);
+
+	/*
+	 * xdr_encode_pages may have added a few bytes to ensure the
+	 * pathname ends on a 4-byte boundary.  Start encoding the
+	 * attributes after the pad bytes.
+	 */
+	pad = sndbuf->tail->iov_len;
+	if (pad > 0)
+		p++;
 	p = xdr_encode_sattr(p, args->sattr);
-	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+	sndbuf->len += xdr_adjust_iovec(sndbuf->tail, p) - pad;
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d85ac42..f8688ea 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -544,8 +544,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
-		  struct iattr *sattr)
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		  unsigned int len, struct iattr *sattr)
 {
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr, dir_attr;
@@ -553,8 +553,8 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 		.fromfh		= NFS_FH(dir),
 		.fromname	= dentry->d_name.name,
 		.fromlen	= dentry->d_name.len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct nfs3_diropres	res = {
@@ -569,11 +569,11 @@ nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS3_MAXPATHLEN)
+	if (len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
-			path->name);
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 0250269..16556fa 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -56,7 +56,7 @@
 #define NFS3_writeargs_sz	(NFS3_fh_sz+5)
 #define NFS3_createargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
 #define NFS3_mkdirargs_sz	(NFS3_diropargs_sz+NFS3_sattr_sz)
-#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz)
+#define NFS3_symlinkargs_sz	(NFS3_diropargs_sz+1+NFS3_sattr_sz)
 #define NFS3_mknodargs_sz	(NFS3_diropargs_sz+2+NFS3_sattr_sz)
 #define NFS3_renameargs_sz	(NFS3_diropargs_sz+NFS3_diropargs_sz)
 #define NFS3_linkargs_sz		(NFS3_fh_sz+NFS3_diropargs_sz)
@@ -398,8 +398,11 @@ nfs3_xdr_symlinkargs(struct rpc_rqst *req, u32 *p, struct nfs3_symlinkargs *args
 	p = xdr_encode_fhandle(p, args->fromfh);
 	p = xdr_encode_array(p, args->fromname, args->fromlen);
 	p = xdr_encode_sattr(p, args->sattr);
-	p = xdr_encode_array(p, args->topath, args->tolen);
+	*p++ = htonl(args->pathlen);
 	req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
+
+	/* Copy the page */
+	xdr_encode_pages(&req->rq_snd_buf, args->pages, 0, args->pathlen);
 	return 0;
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2d18eac..7f60beb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2085,7 +2085,7 @@ static int nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *n
 }
 
 static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct qstr *path, struct iattr *sattr)
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fhandle;
@@ -2111,10 +2111,11 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
 	};
 	int			status;
 
-	if (path->len > NFS4_MAXPATHLEN)
+	if (len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	arg.u.symlink = path;
+	arg.u.symlink.pages = &page;
+	arg.u.symlink.len = len;
 	nfs_fattr_init(&fattr);
 	nfs_fattr_init(&dir_fattr);
 	
@@ -2128,13 +2129,14 @@ static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
 }
 
 static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
-		struct qstr *path, struct iattr *sattr)
+		struct page *page, unsigned int len, struct iattr *sattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 	do {
 		err = nfs4_handle_exception(NFS_SERVER(dir),
-				_nfs4_proc_symlink(dir, dentry, path, sattr),
+				_nfs4_proc_symlink(dir, dentry, page,
+							len, sattr),
 				&exception);
 	} while (exception.retry);
 	return err;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 9992606..3dd413f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -128,7 +128,7 @@ static int nfs4_stat_to_errno(int);
 #define decode_link_maxsz	(op_decode_hdr_maxsz + 5)
 #define encode_symlink_maxsz	(op_encode_hdr_maxsz + \
 				1 + nfs4_name_maxsz + \
-				nfs4_path_maxsz + \
+				1 + \
 				nfs4_fattr_maxsz)
 #define decode_symlink_maxsz	(op_decode_hdr_maxsz + 8)
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
@@ -673,9 +673,9 @@ static int encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *c
 
 	switch (create->ftype) {
 	case NF4LNK:
-		RESERVE_SPACE(4 + create->u.symlink->len);
-		WRITE32(create->u.symlink->len);
-		WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+		RESERVE_SPACE(4);
+		WRITE32(create->u.symlink.len);
+		xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
 		break;
 
 	case NF4BLK: case NF4CHR:
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 0b507bf..630e506 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -425,8 +425,8 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 }
 
 static int
-nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
-		 struct iattr *sattr)
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+		 unsigned int len, struct iattr *sattr)
 {
 	struct nfs_fh fhandle;
 	struct nfs_fattr fattr;
@@ -434,8 +434,8 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 		.fromfh		= NFS_FH(dir),
 		.fromname	= dentry->d_name.name,
 		.fromlen	= dentry->d_name.len,
-		.topath		= path->name,
-		.tolen		= path->len,
+		.pages		= &page,
+		.pathlen	= len,
 		.sattr		= sattr
 	};
 	struct rpc_message msg = {
@@ -444,11 +444,11 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct qstr *path,
 	};
 	int			status;
 
-	if (path->len > NFS2_MAXPATHLEN)
+	if (len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
 
-	dprintk("NFS call  symlink %s -> %s\n", dentry->d_name.name,
-			path->name);
+	dprintk("NFS call  symlink %s\n", dentry->d_name.name);
+
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 
-- 
cgit v1.1


From 275a082fe9308e710324e26ccb5363c53d8fd45f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Aug 2006 20:06:24 -0400
Subject: Add a real API for dealing with blk_congestion_wait()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 453d446..38ba5c0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -396,6 +396,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
+	writeback_congestion_end();
 	return err;
 }
 
-- 
cgit v1.1


From 5dd3177ae5012c1e2ad7a9ffdbd0e0d0de2f60e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 01:03:05 -0400
Subject: NFSv4: Fix a use-after-free issue with the nfs server.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c     | 36 +++++++++++++++++++++---------------
 fs/nfs/nfs4renewd.c |  1 +
 fs/nfs/super.c      |  8 +++++---
 3 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 12941a8..f1ff2ae 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -164,6 +164,26 @@ error_0:
 	return NULL;
 }
 
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+#ifdef CONFIG_NFS_V4
+	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+		nfs4_kill_renewd(clp);
+	while (!list_empty(&clp->cl_unused)) {
+		struct nfs4_state_owner *sp;
+
+		sp = list_entry(clp->cl_unused.next,
+				struct nfs4_state_owner,
+				so_list);
+		list_del(&sp->so_list);
+		kfree(sp);
+	}
+	BUG_ON(!list_empty(&clp->cl_state_owners));
+	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+		nfs_idmap_delete(clp);
+#endif
+}
+
 /*
  * Destroy a shared client record
  */
@@ -171,21 +191,7 @@ static void nfs_free_client(struct nfs_client *clp)
 {
 	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
 
-#ifdef CONFIG_NFS_V4
-	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) {
-		while (!list_empty(&clp->cl_unused)) {
-			struct nfs4_state_owner *sp;
-
-			sp = list_entry(clp->cl_unused.next,
-					struct nfs4_state_owner,
-					so_list);
-			list_del(&sp->so_list);
-			kfree(sp);
-		}
-		BUG_ON(!list_empty(&clp->cl_state_owners));
-		nfs_idmap_delete(clp);
-	}
-#endif
+	nfs4_shutdown_client(clp);
 
 	/* -EIO all pending I/O */
 	if (!IS_ERR(clp->cl_rpcclient))
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index f2c8936..7b6df18 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -121,6 +121,7 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
 			__FUNCTION__, (timeout + HZ - 1) / HZ);
 	cancel_delayed_work(&clp->cl_renewd);
 	schedule_delayed_work(&clp->cl_renewd, timeout);
+	set_bit(NFS_CS_RENEWD, &clp->cl_res_state);
 	spin_unlock(&clp->cl_lock);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 97cfb14..665949d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -883,13 +883,15 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		goto out_free;
 	}
 
+	if (s->s_fs_info != server) {
+		nfs_free_server(server);
+		server = NULL;
+	}
+
 	if (!s->s_root) {
 		/* initial superblock/root creation */
 		s->s_flags = flags;
-
 		nfs4_fill_super(s);
-	} else {
-		nfs_free_server(server);
 	}
 
 	mntroot = nfs4_get_root(s, &mntfh);
-- 
cgit v1.1


From 158998b6fe36f6acef087f574c96d44713499cc9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Aug 2006 01:03:17 -0400
Subject: SUNRPC: Make rpc_mkpipe() take the parent dentry as an argument

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index f96dfac..82ad711 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -84,7 +84,6 @@ struct idmap_hashtable {
 };
 
 struct idmap {
-	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -119,10 +118,7 @@ nfs_idmap_new(struct nfs_client *clp)
         if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return -ENOMEM;
 
-	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
-	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
-
-        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+        idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
 	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
 		error = PTR_ERR(idmap->idmap_dentry);
-- 
cgit v1.1


From 6daabf1b04c89f1fbd8eab5450261360943c8e20 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Aug 2006 15:44:16 -0400
Subject: NFS: Fix up compiler warnings on 64-bit platforms in client.c

Fix up warnings from compiling on ppc64.

Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f1ff2ae..a4aa479 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -836,7 +836,9 @@ struct nfs_server *nfs_create_server(const struct nfs_mount_data *data,
 	}
 	memcpy(&server->fsid, &fattr.fsid, sizeof(server->fsid));
 
-	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	BUG_ON(!server->nfs_client);
 	BUG_ON(!server->nfs_client->rpc_ops);
@@ -1002,7 +1004,9 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data,
 	if (error < 0)
 		goto error;
 
-	dprintk("Server FSID: %llx:%llx\n", server->fsid.major, server->fsid.minor);
+	dprintk("Server FSID: %llx:%llx\n",
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 	dprintk("Mount FH: %d\n", mntfh->size);
 
 	error = nfs_probe_fsinfo(server, mntfh, &fattr);
@@ -1074,7 +1078,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 		goto error;
 
 	dprintk("Referral FSID: %llx:%llx\n",
-		server->fsid.major, server->fsid.minor);
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	spin_lock(&nfs_client_lock);
 	list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
@@ -1106,7 +1111,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 	int error;
 
 	dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
-		fattr->fsid.major, fattr->fsid.minor);
+		(unsigned long long) fattr->fsid.major,
+		(unsigned long long) fattr->fsid.minor);
 
 	server = nfs_alloc_server();
 	if (!server)
@@ -1131,7 +1137,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 		goto out_free_server;
 
 	dprintk("Cloned FSID: %llx:%llx\n",
-		server->fsid.major, server->fsid.minor);
+		(unsigned long long) server->fsid.major,
+		(unsigned long long) server->fsid.minor);
 
 	error = nfs_start_lockd(server);
 	if (error < 0)
@@ -1375,7 +1382,8 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 		 MAJOR(server->s_dev), MINOR(server->s_dev));
 
 	snprintf(fsid, 17, "%llx:%llx",
-		 server->fsid.major, server->fsid.minor);
+		 (unsigned long long) server->fsid.major,
+		 (unsigned long long) server->fsid.minor);
 
 	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
 		   clp->cl_nfsversion,
-- 
cgit v1.1


From 058ad9cbf14b3c7480d01b20280cb4d5858f7a50 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 27 Aug 2006 17:23:53 -0400
Subject: NFS: NFS_ROOT should use the new rpc_create API

Teach NFS_ROOT to use the new rpc_create API instead of the old two-call
API for creating an RPC transport.

Test plan:
Compile the kernel with the NFS client build-in, and set CONFIG_NFS_ROOT.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/mount_clnt.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 41274874..d507b02 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -76,22 +76,19 @@ static struct rpc_clnt *
 mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 		int protocol)
 {
-	struct rpc_xprt	*xprt;
-	struct rpc_clnt	*clnt;
-
-	xprt = xprt_create_proto(protocol, srvaddr, NULL);
-	if (IS_ERR(xprt))
-		return (struct rpc_clnt *)xprt;
-
-	clnt = rpc_create_client(xprt, hostname,
-				&mnt_program, version,
-				RPC_AUTH_UNIX);
-	if (!IS_ERR(clnt)) {
-		clnt->cl_softrtry = 1;
-		clnt->cl_oneshot  = 1;
-		clnt->cl_intr = 1;
-	}
-	return clnt;
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= (struct sockaddr *)srvaddr,
+		.addrsize	= sizeof(*srvaddr),
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+				   RPC_CLNT_CREATE_INTR),
+	};
+
+	return rpc_create(&args);
 }
 
 /*
-- 
cgit v1.1


From 297de4f65698ee1e1c75e27d57933b5fa8227e72 Mon Sep 17 00:00:00 2001
From: "andros@citi.umich.edu" <andros@citi.umich.edu>
Date: Tue, 29 Aug 2006 12:19:41 -0400
Subject: Fix a referral error Oops

Fix an oops when the referral server is not responding.
Check the error return from nfs4_set_client() in nfs4_create_referral_server.

Signed-off-by: Andy Adamson <andros@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a4aa479..110f80e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1059,6 +1059,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 			parent_server->client->cl_xprt->prot,
 			parent_client->retrans_timeo,
 			parent_client->retrans_count);
+	if (error < 0)
+		goto error;
 
 	/* Initialise the client representation from the parent server */
 	nfs_server_copy_userdata(server, parent_server);
-- 
cgit v1.1


From fd6840714d9cf6e93f1d42b904860a94df316b85 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Sep 2006 12:27:44 -0400
Subject: NFS: nfs_lookup - don't hash dentry when optimising away the lookup

If the open intents tell us that a given lookup is going to result in a,
exclusive create, we currently optimize away the lookup call itself. The
reason is that the lookup would not be atomic with the create RPC call, so
why do it in the first place?

A problem occurs, however, if the VFS aborts the exclusive create operation
after the lookup, but before the call to create the file/directory: in this
case we will end up with a hashed negative dentry in the dcache that has
never been looked up.
Fix this by only actually hashing the dentry once the create operation has
been successfully completed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 51328ae..3419c2d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -904,9 +904,15 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
 	lock_kernel();
 
-	/* If we're doing an exclusive create, optimize away the lookup */
-	if (nfs_is_exclusive_create(dir, nd))
-		goto no_entry;
+	/*
+	 * If we're doing an exclusive create, optimize away the lookup
+	 * but don't hash the dentry.
+	 */
+	if (nfs_is_exclusive_create(dir, nd)) {
+		d_instantiate(dentry, NULL);
+		res = NULL;
+		goto out_unlock;
+	}
 
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
@@ -1161,6 +1167,8 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	if (IS_ERR(inode))
 		return error;
 	d_instantiate(dentry, inode);
+	if (d_unhashed(dentry))
+		d_rehash(dentry);
 	return 0;
 }
 
-- 
cgit v1.1


From 2dec51466a08ac1c67da41bfd0518d43d983a2eb Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 12 Sep 2006 11:53:23 -0400
Subject: NFSv4: It's perfectly legal for clp to be NULL here....

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 110f80e..ec1938d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -269,7 +269,7 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 	clp = __nfs_find_client(addr, nfsversion);
 	spin_unlock(&nfs_client_lock);
 
-	BUG_ON(clp->cl_cons_state == 0);
+	BUG_ON(clp && clp->cl_cons_state == 0);
 
 	return clp;
 }
-- 
cgit v1.1


From 5f004cf2aa8494708fd8d78e78142b7b2748e765 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: NFS: Make read() return an ESTALE if the file has been deleted

Currently, a read() request will return EIO even if the file has been
deleted on the server, simply because that is what the VM will return
if the call to readpage() fails to update the page.

Ensure that readpage() marks the inode as stale if it receives an ESTALE.
Then return that error to userland.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index dae33c1..69f1549 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -568,8 +568,13 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
-	/* Is this a short read? */
-	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+	if (task->tk_status < 0) {
+		if (task->tk_status == -ESTALE) {
+			set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+			nfs_mark_for_revalidate(data->inode);
+		}
+	} else if (resp->count < argp->count && !resp->eof) {
+		/* This is a short read! */
 		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 		/* Has the server at least made some progress? */
 		if (resp->count != 0) {
@@ -616,6 +621,10 @@ int nfs_readpage(struct file *file, struct page *page)
 	if (error)
 		goto out_error;
 
+	error = -ESTALE;
+	if (NFS_STALE(inode))
+		goto out_error;
+
 	if (file == NULL) {
 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (ctx == NULL)
@@ -678,7 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	};
 	struct inode *inode = mapping->host;
 	struct nfs_server *server = NFS_SERVER(inode);
-	int ret;
+	int ret = -ESTALE;
 
 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
 			inode->i_sb->s_id,
@@ -686,6 +695,9 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			nr_pages);
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+	if (NFS_STALE(inode))
+		goto out;
+
 	if (filp == NULL) {
 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 		if (desc.ctx == NULL)
@@ -701,6 +713,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 			ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
+out:
 	return ret;
 }
 
-- 
cgit v1.1


From 97db8f41792839a6912fd21be8b61dd6c50db58f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: NFS: Don't invalidate the symlink we just stuffed into the cache

And slight optimisation of nfs_end_data_update(): directories never have
delegations anyway.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cb5c65f..a56add0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -717,13 +717,11 @@ void nfs_end_data_update(struct inode *inode)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
-	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Directories and symlinks: invalidate page cache */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
-			spin_lock(&inode->i_lock);
-			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-			spin_unlock(&inode->i_lock);
-		}
+	/* Directories: invalidate page cache */
+	if (S_ISDIR(inode->i_mode)) {
+		spin_lock(&inode->i_lock);
+		nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
 	}
 	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
-- 
cgit v1.1


From 6b30954ebb569fa1b2abdb21f2f4290eec76bf80 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Sep 2006 14:03:14 -0400
Subject: NFSv4: Retry lease recovery if it failed during a synchronous
 operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7f60beb..c218cc4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -793,10 +793,17 @@ out:
 int nfs4_recover_expired_lease(struct nfs_server *server)
 {
 	struct nfs_client *clp = server->nfs_client;
+	int ret;
 
-	if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+	for (;;) {
+		ret = nfs4_wait_clnt_recover(server->client, clp);
+		if (ret != 0)
+			return ret;
+		if (!test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
+			break;
 		nfs4_schedule_state_recovery(clp);
-	return nfs4_wait_clnt_recover(server->client, clp);
+	}
+	return 0;
 }
 
 /*
-- 
cgit v1.1


From c514983d8d2260020543a81589a2b8c7d4bdab4e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 08:25:04 -0400
Subject: NFSv4: Handle the condition NFS4ERR_FILE_OPEN

Retry a few times before we give up: the error is usually due to ordering
issues with asynchronous RPC calls.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c218cc4..c49ac3e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2891,6 +2891,7 @@ int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct
 			if (ret == 0)
 				exception->retry = 1;
 			break;
+		case -NFS4ERR_FILE_OPEN:
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_DELAY:
 			ret = nfs4_delay(server->client, &exception->timeout);
-- 
cgit v1.1


From 2066fe89b459c3c787c811b3369df191cddd93d8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 08:30:46 -0400
Subject: NFSv4: Poll more aggressively when handling NFS4ERR_DELAY

Change the initial retry delay from 1s to 0.1s (and then back off
exponentially).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c49ac3e..47c7e6e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,7 +55,7 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
 
-#define NFS4_POLL_RETRY_MIN	(1*HZ)
+#define NFS4_POLL_RETRY_MIN	(HZ/10)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
 struct nfs4_opendata;
-- 
cgit v1.1


From 51b6ded4d9a94a61035deba1d8f51a54e3a3dd86 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 15 Sep 2006 16:31:56 -0400
Subject: NFSv4: When mounting with a port=0 argument, substitute port=2049

RFC3530 states that the registered port 2049 for the NFS protocol should be
the default configuration in order to allow clients not to use the RPC
binding protocols.
If the mount program sends us a port=0, we therefore substitute port=2049.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 665949d..b99113b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -833,6 +833,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 				__FUNCTION__);
 		return -EINVAL;
 	}
+	/* RFC3530: The default port for NFS is 2049 */
+	if (addr.sin_port == 0)
+		addr.sin_port = NFS_PORT;
 
 	/* Grab the authentication type */
 	authflavour = RPC_AUTH_UNIX;
-- 
cgit v1.1


From aec5e175288c711cbe44750276f61efa3fa3d370 Mon Sep 17 00:00:00 2001
From: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Date: Sat, 16 Sep 2006 21:09:32 -0400
Subject: NFS: Use SEEK_END instead of hardcoded value

Signed-off-by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a146ed3..be997d6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -157,7 +157,7 @@ force_reval:
 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
 {
 	/* origin == SEEK_END => we must revalidate the cached file length */
-	if (origin == 2) {
+	if (origin == SEEK_END) {
 		struct inode *inode = filp->f_mapping->host;
 		int retval = nfs_revalidate_file_size(inode, filp);
 		if (retval < 0)
-- 
cgit v1.1


From f551e44ff11d3e2ec8f37907bb88ec2433cc8b74 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Sep 2006 14:33:04 -0400
Subject: NFS: add comments clarifying the use of nfs_post_op_update()

Comments-only change to clarify a detail of the NFS protocol and how it is
implemented in Linux.

Test plan:
None.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 6 ++++++
 fs/nfs/write.c | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a56add0..e8c143d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -840,6 +840,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
  *
  * After an operation that has changed the inode metadata, mark the
  * attribute cache as being invalid, then try to update it.
+ *
+ * NB: if the server didn't return any post op attributes, this
+ * function will force the retrieval of attributes before the next
+ * NFS request.  Thus it should be used only for operations that
+ * are expected to change one or more attributes, to avoid
+ * unnecessary NFS requests and trips through nfs_update_inode().
  */
 int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 38ba5c0..c12effb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1253,7 +1253,13 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
 		task->tk_pid, task->tk_status);
 
-	/* Call the NFS version-specific code */
+	/*
+	 * ->write_done will attempt to use post-op attributes to detect
+	 * conflicting writes by other clients.  A strict interpretation
+	 * of close-to-open would allow us to continue caching even if
+	 * another writer had changed the file, but some applications
+	 * depend on tighter cache coherency when writing.
+	 */
 	status = NFS_PROTO(data->inode)->write_done(task, data);
 	if (status != 0)
 		return status;
-- 
cgit v1.1


From 026ed5c9185dcc4b2df92e98c3d61a01cea19cbf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 20 Sep 2006 14:33:07 -0400
Subject: NFS: unmark NFS direct I/O as experimental

Remove the EXPERIMENTAL flag from the NFS_DIRECTIO option.

Test plan:
Unset the EXPERIMENTAL kernel build option and check to see that the NFS
direct I/O option is still available.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 5305816..a270026 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1471,8 +1471,8 @@ config NFS_V4
 	  If unsure, say N.
 
 config NFS_DIRECTIO
-	bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
-	depends on NFS_FS && EXPERIMENTAL
+	bool "Allow direct I/O on NFS files"
+	depends on NFS_FS
 	help
 	  This option enables applications to perform uncached I/O on files
 	  in NFS file systems using the O_DIRECT open() flag.  When O_DIRECT
-- 
cgit v1.1


From e18fa700c9a31360bc8f193aa543b7ef7b39a06b Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jeff@garzik.org>
Date: Sun, 24 Sep 2006 11:13:19 -0400
Subject: Move several *_SUPER_MAGIC symbols to include/linux/magic.h.

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 fs/affs/affs.h        | 1 -
 fs/affs/super.c       | 1 +
 fs/autofs/autofs_i.h  | 2 --
 fs/autofs/inode.c     | 1 +
 fs/autofs4/autofs_i.h | 2 --
 fs/autofs4/inode.c    | 1 +
 fs/hpfs/hpfs_fn.h     | 1 -
 fs/hpfs/super.c       | 1 +
 fs/openpromfs/inode.c | 2 +-
 9 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0ddd4cc..1dc8438 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -1,7 +1,6 @@
 #include <linux/types.h>
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
-#include <linux/affs_fs.h>
 #include <linux/amigaffs.h>
 
 /* AmigaOS allows file names with up to 30 characters length.
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5200f49..1735201 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/statfs.h>
 #include <linux/parser.h>
+#include <linux/magic.h>
 #include "affs.h"
 
 extern struct timezone sys_tz;
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index a62327f..c7700d9 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -37,8 +37,6 @@
 #define DPRINTK(D) ((void)0)
 #endif
 
-#define AUTOFS_SUPER_MAGIC 0x0187
-
 /*
  * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
  * kernel will keep the negative response cached for up to the time given
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 65e5ed4..af2efbb 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -16,6 +16,7 @@
 #include <linux/file.h>
 #include <linux/parser.h>
 #include <linux/bitops.h>
+#include <linux/magic.h>
 #include "autofs_i.h"
 #include <linux/module.h>
 
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d6603d0..480ab17 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -40,8 +40,6 @@
 #define DPRINTK(fmt,args...) do {} while(0)
 #endif
 
-#define AUTOFS_SUPER_MAGIC 0x0187
-
 /* Unified info structure.  This is pointed to by both the dentry and
    inode structures.  Each file in the filesystem has an instance of this
    structure.  It holds a reference to the dentry, so dentries are never
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index fde78b1..11a6a9a 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
 #include <linux/parser.h>
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
+#include <linux/magic.h>
 #include "autofs_i.h"
 #include <linux/module.h>
 
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index f687d54..32ab51e 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -12,7 +12,6 @@
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/hpfs_fs.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f798480..8fe51c3 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -11,6 +11,7 @@
 #include <linux/parser.h>
 #include <linux/init.h>
 #include <linux/statfs.h>
+#include <linux/magic.h>
 
 /* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 93a56bd..592a640 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -8,10 +8,10 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/openprom_fs.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/magic.h>
 
 #include <asm/openprom.h>
 #include <asm/oplib.h>
-- 
cgit v1.1


From e2c73698af3dac89328eef2b55f6746e0507d2bc Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 11:37:32 -0700
Subject: ocfs2: Silence dlm error print

An AST can be delivered via the network after a lock has been removed, so no
need to print an error when we see that.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmast.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index f13a4ba..681046d 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -320,8 +320,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
 
 	res = dlm_lookup_lockres(dlm, name, locklen);
 	if (!res) {
-		mlog(ML_ERROR, "got %sast for unknown lockres! "
-			       "cookie=%u:%llu, name=%.*s, namelen=%u\n",
+		mlog(0, "got %sast for unknown lockres! "
+		     "cookie=%u:%llu, name=%.*s, namelen=%u\n",
 		     past->type == DLM_AST ? "" : "b",
 		     dlm_get_lock_cookie_node(cookie),
 		     dlm_get_lock_cookie_seq(cookie),
@@ -462,7 +462,7 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 			mlog(ML_ERROR, "sent AST to node %u, it returned "
 			     "DLM_MIGRATING!\n", lock->ml.node);
 			BUG();
-		} else if (status != DLM_NORMAL) {
+		} else if (status != DLM_NORMAL && status != DLM_IVLOCKID) {
 			mlog(ML_ERROR, "AST to node %u returned %d!\n",
 			     lock->ml.node, status);
 			/* ignore it */
-- 
cgit v1.1


From 3384f3df5ed939a25135e1b2734fb7cdee1720a8 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 11:38:29 -0700
Subject: ocfs2: Allow binary names in the DLM

The OCFS2 DLM uses strlen() to determine lock name length, which excludes
the possibility of putting binary values in the name string. Fix this by
requiring that string length be passed in as a parameter.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/dlmapi.h      |  1 +
 fs/ocfs2/dlm/dlmcommon.h   |  1 +
 fs/ocfs2/dlm/dlmlock.c     | 10 +++++-----
 fs/ocfs2/dlm/dlmmaster.c   |  4 ++--
 fs/ocfs2/dlm/dlmrecovery.c |  3 ++-
 5 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index 53652f5..cfd5cb6 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -182,6 +182,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm,
 			struct dlm_lockstatus *lksb,
 			int flags,
 			const char *name,
+			int namelen,
 			dlm_astlockfunc_t *ast,
 			void *data,
 			dlm_bastlockfunc_t *bast);
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 14530ee..fa96818 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -747,6 +747,7 @@ void dlm_change_lockres_owner(struct dlm_ctxt *dlm,
 			      u8 owner);
 struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 						 const char *lockid,
+						 int namelen,
 						 int flags);
 struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
 					  const char *name,
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 5ca57ec..42a1b91 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -540,8 +540,8 @@ static inline void dlm_get_next_cookie(u8 node_num, u64 *cookie)
 
 enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
 			struct dlm_lockstatus *lksb, int flags,
-			const char *name, dlm_astlockfunc_t *ast, void *data,
-			dlm_bastlockfunc_t *bast)
+			const char *name, int namelen, dlm_astlockfunc_t *ast,
+			void *data, dlm_bastlockfunc_t *bast)
 {
 	enum dlm_status status;
 	struct dlm_lock_resource *res = NULL;
@@ -571,7 +571,7 @@ enum dlm_status dlmlock(struct dlm_ctxt *dlm, int mode,
 	recovery = (flags & LKM_RECOVERY);
 
 	if (recovery &&
-	    (!dlm_is_recovery_lock(name, strlen(name)) || convert) ) {
+	    (!dlm_is_recovery_lock(name, namelen) || convert) ) {
 		dlm_error(status);
 		goto error;
 	}
@@ -643,7 +643,7 @@ retry_convert:
 		}
 
 		status = DLM_IVBUFLEN;
-		if (strlen(name) > DLM_LOCKID_NAME_MAX || strlen(name) < 1) {
+		if (namelen > DLM_LOCKID_NAME_MAX || namelen < 1) {
 			dlm_error(status);
 			goto error;
 		}
@@ -659,7 +659,7 @@ retry_convert:
 			dlm_wait_for_recovery(dlm);
 
 		/* find or create the lock resource */
-		res = dlm_get_lock_resource(dlm, name, flags);
+		res = dlm_get_lock_resource(dlm, name, namelen, flags);
 		if (!res) {
 			status = DLM_IVLOCKID;
 			dlm_error(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9503240..f784177 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -740,6 +740,7 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
  */
 struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 					  const char *lockid,
+					  int namelen,
 					  int flags)
 {
 	struct dlm_lock_resource *tmpres=NULL, *res=NULL;
@@ -748,13 +749,12 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
 	int blocked = 0;
 	int ret, nodenum;
 	struct dlm_node_iter iter;
-	unsigned int namelen, hash;
+	unsigned int hash;
 	int tries = 0;
 	int bit, wait_on_recovery = 0;
 
 	BUG_ON(!lockid);
 
-	namelen = strlen(lockid);
 	hash = dlm_lockid_hash(lockid, namelen);
 
 	mlog(0, "get lockres %s (len %d)\n", lockid, namelen);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 594745f..9d950d7 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2285,7 +2285,8 @@ again:
 	memset(&lksb, 0, sizeof(lksb));
 
 	ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
-		      DLM_RECOVERY_LOCK_NAME, dlm_reco_ast, dlm, dlm_reco_bast);
+		      DLM_RECOVERY_LOCK_NAME, DLM_RECOVERY_LOCK_NAME_LEN,
+		      dlm_reco_ast, dlm, dlm_reco_bast);
 
 	mlog(0, "%s: dlmlock($RECOVERY) returned %d, lksb=%d\n",
 	     dlm->name, ret, lksb.status);
-- 
cgit v1.1


From ea5b3a187e2724fa9d08b2fbd3898c149ed95c6b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 11:39:27 -0700
Subject: ocfs2: Update dlmfs for new dlmlock() API

We just need to add a namelen field to the user_lock_res structure, and
update a few debug prints. Instead of updating all debug prints, I took the
opportunity to remove a few that are likely unnecessary these days.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlm/userdlm.c | 81 +++++++++++++++++++-------------------------------
 fs/ocfs2/dlm/userdlm.h |  1 +
 2 files changed, 31 insertions(+), 51 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index e641b08..eead48b 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -102,10 +102,10 @@ static inline void user_recover_from_dlm_error(struct user_lock_res *lockres)
 	spin_unlock(&lockres->l_lock);
 }
 
-#define user_log_dlm_error(_func, _stat, _lockres) do {		\
-	mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on "	\
-		"resource %s: %s\n", dlm_errname(_stat), _func,	\
-		_lockres->l_name, dlm_errmsg(_stat));		\
+#define user_log_dlm_error(_func, _stat, _lockres) do {			\
+	mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on "		\
+		"resource %.*s: %s\n", dlm_errname(_stat), _func,	\
+		_lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \
 } while (0)
 
 /* WARNING: This function lives in a world where the only three lock
@@ -127,21 +127,22 @@ static void user_ast(void *opaque)
 	struct user_lock_res *lockres = opaque;
 	struct dlm_lockstatus *lksb;
 
-	mlog(0, "AST fired for lockres %s\n", lockres->l_name);
+	mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen,
+	     lockres->l_name);
 
 	spin_lock(&lockres->l_lock);
 
 	lksb = &(lockres->l_lksb);
 	if (lksb->status != DLM_NORMAL) {
-		mlog(ML_ERROR, "lksb status value of %u on lockres %s\n",
-		     lksb->status, lockres->l_name);
+		mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n",
+		     lksb->status, lockres->l_namelen, lockres->l_name);
 		spin_unlock(&lockres->l_lock);
 		return;
 	}
 
 	mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
-			"Lockres %s, requested ivmode. flags 0x%x\n",
-			lockres->l_name, lockres->l_flags);
+			"Lockres %.*s, requested ivmode. flags 0x%x\n",
+			lockres->l_namelen, lockres->l_name, lockres->l_flags);
 
 	/* we're downconverting. */
 	if (lockres->l_requested < lockres->l_level) {
@@ -213,8 +214,8 @@ static void user_bast(void *opaque, int level)
 {
 	struct user_lock_res *lockres = opaque;
 
-	mlog(0, "Blocking AST fired for lockres %s. Blocking level %d\n",
-		lockres->l_name, level);
+	mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n",
+	     lockres->l_namelen, lockres->l_name, level);
 
 	spin_lock(&lockres->l_lock);
 	lockres->l_flags |= USER_LOCK_BLOCKED;
@@ -231,7 +232,8 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 {
 	struct user_lock_res *lockres = opaque;
 
-	mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
+	mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen,
+	     lockres->l_name);
 
 	if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
 		mlog(ML_ERROR, "Dlm returns status %d\n", status);
@@ -244,8 +246,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 	    && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
 		lockres->l_level = LKM_IVMODE;
 	} else if (status == DLM_CANCELGRANT) {
-		mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
-		     lockres->l_name, lockres->l_flags);
 		/* We tried to cancel a convert request, but it was
 		 * already granted. Don't clear the busy flag - the
 		 * ast should've done this already. */
@@ -255,8 +255,6 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
 	} else {
 		BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
 		/* Cancel succeeded, we want to re-queue */
-		mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
-		     lockres->l_name, lockres->l_flags);
 		lockres->l_requested = LKM_IVMODE; /* cancel an
 						    * upconvert
 						    * request. */
@@ -287,13 +285,14 @@ static void user_dlm_unblock_lock(void *opaque)
 	struct user_lock_res *lockres = (struct user_lock_res *) opaque;
 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
 
-	mlog(0, "processing lockres %s\n", lockres->l_name);
+	mlog(0, "processing lockres %.*s\n", lockres->l_namelen,
+	     lockres->l_name);
 
 	spin_lock(&lockres->l_lock);
 
 	mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
-			"Lockres %s, flags 0x%x\n",
-			lockres->l_name, lockres->l_flags);
+			"Lockres %.*s, flags 0x%x\n",
+			lockres->l_namelen, lockres->l_name, lockres->l_flags);
 
 	/* notice that we don't clear USER_LOCK_BLOCKED here. If it's
 	 * set, we want user_ast clear it. */
@@ -305,22 +304,16 @@ static void user_dlm_unblock_lock(void *opaque)
 	 * flag, and finally we might get another bast which re-queues
 	 * us before our ast for the downconvert is called. */
 	if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
-		mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
-			lockres->l_name, lockres->l_flags);
 		spin_unlock(&lockres->l_lock);
 		goto drop_ref;
 	}
 
 	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
-		mlog(0, "lock is in teardown so we do nothing\n");
 		spin_unlock(&lockres->l_lock);
 		goto drop_ref;
 	}
 
 	if (lockres->l_flags & USER_LOCK_BUSY) {
-		mlog(0, "Cancel lock %s, flags 0x%x\n",
-		     lockres->l_name, lockres->l_flags);
-
 		if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
 			spin_unlock(&lockres->l_lock);
 			goto drop_ref;
@@ -372,6 +365,7 @@ static void user_dlm_unblock_lock(void *opaque)
 			 &lockres->l_lksb,
 			 LKM_CONVERT|LKM_VALBLK,
 			 lockres->l_name,
+			 lockres->l_namelen,
 			 user_ast,
 			 lockres,
 			 user_bast);
@@ -420,16 +414,16 @@ int user_dlm_cluster_lock(struct user_lock_res *lockres,
 
 	if (level != LKM_EXMODE &&
 	    level != LKM_PRMODE) {
-		mlog(ML_ERROR, "lockres %s: invalid request!\n",
-		     lockres->l_name);
+		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
+		     lockres->l_namelen, lockres->l_name);
 		status = -EINVAL;
 		goto bail;
 	}
 
-	mlog(0, "lockres %s: asking for %s lock, passed flags = 0x%x\n",
-		lockres->l_name,
-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
-		lkm_flags);
+	mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n",
+	     lockres->l_namelen, lockres->l_name,
+	     (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE",
+	     lkm_flags);
 
 again:
 	if (signal_pending(current)) {
@@ -474,15 +468,13 @@ again:
 		BUG_ON(level == LKM_IVMODE);
 		BUG_ON(level == LKM_NLMODE);
 
-		mlog(0, "lock %s, get lock from %d to level = %d\n",
-			lockres->l_name, lockres->l_level, level);
-
 		/* call dlm_lock to upgrade lock now */
 		status = dlmlock(dlm,
 				 level,
 				 &lockres->l_lksb,
 				 local_flags,
 				 lockres->l_name,
+				 lockres->l_namelen,
 				 user_ast,
 				 lockres,
 				 user_bast);
@@ -498,9 +490,6 @@ again:
 			goto bail;
 		}
 
-		mlog(0, "lock %s, successfull return from dlmlock\n",
-			lockres->l_name);
-
 		user_wait_on_busy_lock(lockres);
 		goto again;
 	}
@@ -508,9 +497,6 @@ again:
 	user_dlm_inc_holders(lockres, level);
 	spin_unlock(&lockres->l_lock);
 
-	mlog(0, "lockres %s: Got %s lock!\n", lockres->l_name,
-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
-
 	status = 0;
 bail:
 	return status;
@@ -538,13 +524,11 @@ void user_dlm_cluster_unlock(struct user_lock_res *lockres,
 {
 	if (level != LKM_EXMODE &&
 	    level != LKM_PRMODE) {
-		mlog(ML_ERROR, "lockres %s: invalid request!\n", lockres->l_name);
+		mlog(ML_ERROR, "lockres %.*s: invalid request!\n",
+		     lockres->l_namelen, lockres->l_name);
 		return;
 	}
 
-	mlog(0, "lockres %s: dropping %s lock\n", lockres->l_name,
-		(level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE");
-
 	spin_lock(&lockres->l_lock);
 	user_dlm_dec_holders(lockres, level);
 	__user_dlm_cond_queue_lockres(lockres);
@@ -602,6 +586,7 @@ void user_dlm_lock_res_init(struct user_lock_res *lockres,
 	memcpy(lockres->l_name,
 	       dentry->d_name.name,
 	       dentry->d_name.len);
+	lockres->l_namelen = dentry->d_name.len;
 }
 
 int user_dlm_destroy_lock(struct user_lock_res *lockres)
@@ -609,11 +594,10 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 	int status = -EBUSY;
 	struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres);
 
-	mlog(0, "asked to destroy %s\n", lockres->l_name);
+	mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name);
 
 	spin_lock(&lockres->l_lock);
 	if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
-		mlog(0, "Lock is already torn down\n");
 		spin_unlock(&lockres->l_lock);
 		return 0;
 	}
@@ -623,8 +607,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 	while (lockres->l_flags & USER_LOCK_BUSY) {
 		spin_unlock(&lockres->l_lock);
 
-		mlog(0, "lock %s is busy\n", lockres->l_name);
-
 		user_wait_on_busy_lock(lockres);
 
 		spin_lock(&lockres->l_lock);
@@ -632,14 +614,12 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 
 	if (lockres->l_ro_holders || lockres->l_ex_holders) {
 		spin_unlock(&lockres->l_lock);
-		mlog(0, "lock %s has holders\n", lockres->l_name);
 		goto bail;
 	}
 
 	status = 0;
 	if (!(lockres->l_flags & USER_LOCK_ATTACHED)) {
 		spin_unlock(&lockres->l_lock);
-		mlog(0, "lock %s is not attached\n", lockres->l_name);
 		goto bail;
 	}
 
@@ -647,7 +627,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
 	lockres->l_flags |= USER_LOCK_BUSY;
 	spin_unlock(&lockres->l_lock);
 
-	mlog(0, "unlocking lockres %s\n", lockres->l_name);
 	status = dlmunlock(dlm,
 			   &lockres->l_lksb,
 			   LKM_VALBLK,
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 04178bc..c400e93 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -53,6 +53,7 @@ struct user_lock_res {
 
 #define USER_DLM_LOCK_ID_MAX_LEN  32
 	char                     l_name[USER_DLM_LOCK_ID_MAX_LEN];
+	int                      l_namelen;
 	int                      l_level;
 	unsigned int             l_ro_holders;
 	unsigned int             l_ex_holders;
-- 
cgit v1.1


From f0681062b8e369d9fb6f3ce10f4e3fc8cea5f910 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 11:40:10 -0700
Subject: ocfs2: Update dlmglue for new dlmlock() API

File system lock names are very regular right now, so we really only need to
pass an extra parameter to dlmlock().

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 151b417..20c6ca8 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -810,6 +810,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			 &lockres->l_lksb,
 			 dlm_flags,
 			 lockres->l_name,
+			 OCFS2_LOCK_ID_MAX_LEN - 1,
 			 lockres->l_ops->ast,
 			 lockres,
 			 lockres->l_ops->bast);
@@ -999,6 +1000,7 @@ again:
 				 &lockres->l_lksb,
 				 lkm_flags|LKM_CONVERT|LKM_VALBLK,
 				 lockres->l_name,
+				 OCFS2_LOCK_ID_MAX_LEN - 1,
 				 lockres->l_ops->ast,
 				 lockres,
 				 lockres->l_ops->bast);
@@ -2419,6 +2421,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 			 &lockres->l_lksb,
 			 dlm_flags,
 			 lockres->l_name,
+			 OCFS2_LOCK_ID_MAX_LEN - 1,
 			 lockres->l_ops->ast,
 			 lockres,
 			 lockres->l_ops->bast);
-- 
cgit v1.1


From d680efe9d8fe0eb99d9dd063a4def6b362cdb40d Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:14:34 -0700
Subject: ocfs2: Add new cluster lock type

Replace the dentry vote mechanism with a cluster lock which covers a set
of dentries. This allows us to force d_delete() only on nodes which actually
care about an unlink.

Every node that does a ->lookup() gets a read only lock on the dentry, until
an unlink during which the unlinking node, will request an exclusive lock,
forcing the other nodes who care about that dentry to d_delete() it. The
effect is that we retain a very lightweight ->d_revalidate(), and at the
same time get to make large improvements to the average case performance of
the ocfs2 unlink and rename operations.

This patch adds the cluster lock type which OCFS2 can attach to
dentries.  A small number of fs/ocfs2/dcache.c functions are stubbed
out so that this change can compile.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dcache.c       |   2 +
 fs/ocfs2/dcache.h       |  27 +++
 fs/ocfs2/dlmglue.c      | 475 +++++++++++++++++++++++++++++++++++++-----------
 fs/ocfs2/dlmglue.h      |  11 ++
 fs/ocfs2/ocfs2_lockid.h |  25 +++
 5 files changed, 436 insertions(+), 104 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 1a01380..aea4577 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -87,6 +87,8 @@ bail:
 	return ret;
 }
 
+DEFINE_SPINLOCK(dentry_attach_lock);
+
 struct dentry_operations ocfs2_dentry_ops = {
 	.d_revalidate		= ocfs2_dentry_revalidate,
 };
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index 9007277..f1423c2 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -28,4 +28,31 @@
 
 extern struct dentry_operations ocfs2_dentry_ops;
 
+struct ocfs2_dentry_lock {
+	unsigned int		dl_count;
+	u64			dl_parent_blkno;
+
+	/*
+	 * The ocfs2_dentry_lock keeps an inode reference until
+	 * dl_lockres has been destroyed. This is usually done in
+	 * ->d_iput() anyway, so there should be minimal impact.
+	 */
+	struct inode		*dl_inode;
+	struct ocfs2_lock_res	dl_lockres;
+};
+
+static inline void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+					 struct ocfs2_dentry_lock *dl)
+{
+}
+
+static inline struct dentry *ocfs2_find_local_alias(struct inode *inode,
+						    u64 parent_blkno,
+						    int skip_unhashed)
+{
+	return NULL;
+}
+
+extern spinlock_t dentry_attach_lock;
+
 #endif /* OCFS2_DCACHE_H */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 20c6ca8..764d15d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -46,6 +46,7 @@
 #include "ocfs2.h"
 
 #include "alloc.h"
+#include "dcache.h"
 #include "dlmglue.h"
 #include "extent_map.h"
 #include "heartbeat.h"
@@ -69,6 +70,9 @@ struct ocfs2_mask_waiter {
 static void ocfs2_inode_ast_func(void *opaque);
 static void ocfs2_inode_bast_func(void *opaque,
 				  int level);
+static void ocfs2_dentry_ast_func(void *opaque);
+static void ocfs2_dentry_bast_func(void *opaque,
+				  int level);
 static void ocfs2_super_ast_func(void *opaque);
 static void ocfs2_super_bast_func(void *opaque,
 				  int level);
@@ -76,32 +80,57 @@ static void ocfs2_rename_ast_func(void *opaque);
 static void ocfs2_rename_bast_func(void *opaque,
 				   int level);
 
+/*
+ * Return value from ocfs2_convert_worker_t functions.
+ *
+ * These control the precise actions of ocfs2_generic_unblock_lock()
+ * and ocfs2_process_blocked_lock()
+ *
+ */
+enum ocfs2_unblock_action {
+	UNBLOCK_CONTINUE	= 0, /* Continue downconvert */
+	UNBLOCK_CONTINUE_POST	= 1, /* Continue downconvert, fire
+				      * ->post_unlock callback */
+	UNBLOCK_STOP_POST	= 2, /* Do not downconvert, fire
+				      * ->post_unlock() callback. */
+};
+
+struct ocfs2_unblock_ctl {
+	int requeue;
+	enum ocfs2_unblock_action unblock_action;
+};
+
 /* so far, all locks have gotten along with the same unlock ast */
 static void ocfs2_unlock_ast_func(void *opaque,
 				  enum dlm_status status);
-static int ocfs2_do_unblock_meta(struct inode *inode,
-				 int *requeue);
 static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
-			      int *requeue);
+			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
-			      int *requeue);
+			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
-			      int *requeue);
+				    struct ocfs2_unblock_ctl *ctl);
+static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
+				     struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
-				  int *requeue);
-typedef void (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
-static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
-				      struct ocfs2_lock_res *lockres,
-				      int *requeue,
-				      ocfs2_convert_worker_t *worker);
+				  struct ocfs2_unblock_ctl *ctl);
+
+static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
+				     struct ocfs2_lock_res *lockres);
 
 struct ocfs2_lock_res_ops {
 	void (*ast)(void *);
 	void (*bast)(void *, int);
 	void (*unlock_ast)(void *, enum dlm_status);
-	int  (*unblock)(struct ocfs2_lock_res *, int *);
+	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
+	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 };
 
+typedef int (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
+static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
+				      struct ocfs2_lock_res *lockres,
+				      struct ocfs2_unblock_ctl *ctl,
+				      ocfs2_convert_worker_t *worker);
+
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.ast		= ocfs2_inode_ast_func,
 	.bast		= ocfs2_inode_bast_func,
@@ -116,9 +145,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.unblock	= ocfs2_unblock_meta,
 };
 
-static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
-				      int blocking);
-
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.ast		= ocfs2_inode_ast_func,
 	.bast		= ocfs2_inode_bast_func,
@@ -140,6 +166,14 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 	.unblock	= ocfs2_unblock_osb_lock,
 };
 
+static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+	.ast		= ocfs2_dentry_ast_func,
+	.bast		= ocfs2_dentry_bast_func,
+	.unlock_ast	= ocfs2_unlock_ast_func,
+	.unblock	= ocfs2_unblock_dentry_lock,
+	.post_unlock	= ocfs2_dentry_post_unlock,
+};
+
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 {
 	return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -172,6 +206,13 @@ static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
 	return (struct inode *) lockres->l_priv;
 }
 
+static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
+{
+	BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
+
+	return (struct ocfs2_dentry_lock *)lockres->l_priv;
+}
+
 static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     struct ocfs2_lock_res *lockres,
 			     int level,
@@ -204,22 +245,6 @@ static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
 						  struct ocfs2_lock_res *lockres,
 						  int new_level);
 
-static char *ocfs2_lock_type_strings[] = {
-	[OCFS2_LOCK_TYPE_META] = "Meta",
-	[OCFS2_LOCK_TYPE_DATA] = "Data",
-	[OCFS2_LOCK_TYPE_SUPER] = "Super",
-	[OCFS2_LOCK_TYPE_RENAME] = "Rename",
-	/* Need to differntiate from [R]ename.. serializing writes is the
-	 * important job it does, anyway. */
-	[OCFS2_LOCK_TYPE_RW] = "Write/Read",
-};
-
-static char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
-{
-	mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
-	return ocfs2_lock_type_strings[type];
-}
-
 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 				  u64 blkno,
 				  u32 generation,
@@ -265,13 +290,9 @@ static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
 static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
 				       struct ocfs2_lock_res *res,
 				       enum ocfs2_lock_type type,
-				       u64 blkno,
-				       u32 generation,
 				       struct ocfs2_lock_res_ops *ops,
 				       void *priv)
 {
-	ocfs2_build_lock_name(type, blkno, generation, res->l_name);
-
 	res->l_type          = type;
 	res->l_ops           = ops;
 	res->l_priv          = priv;
@@ -319,9 +340,59 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			break;
 	};
 
-	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type,
-				   OCFS2_I(inode)->ip_blkno,
-				   inode->i_generation, ops, inode);
+	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
+			      inode->i_generation, res->l_name);
+	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
+}
+
+static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
+{
+	__be64 inode_blkno_be;
+
+	memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
+	       sizeof(__be64));
+
+	return be64_to_cpu(inode_blkno_be);
+}
+
+void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
+				u64 parent, struct inode *inode)
+{
+	int len;
+	u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
+	__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
+	struct ocfs2_lock_res *lockres = &dl->dl_lockres;
+
+	ocfs2_lock_res_init_once(lockres);
+
+	/*
+	 * Unfortunately, the standard lock naming scheme won't work
+	 * here because we have two 16 byte values to use. Instead,
+	 * we'll stuff the inode number as a binary value. We still
+	 * want error prints to show something without garbling the
+	 * display, so drop a null byte in there before the inode
+	 * number. A future version of OCFS2 will likely use all
+	 * binary lock names. The stringified names have been a
+	 * tremendous aid in debugging, but now that the debugfs
+	 * interface exists, we can mangle things there if need be.
+	 *
+	 * NOTE: We also drop the standard "pad" value (the total lock
+	 * name size stays the same though - the last part is all
+	 * zeros due to the memset in ocfs2_lock_res_init_once()
+	 */
+	len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
+		       "%c%016llx",
+		       ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
+		       (long long)parent);
+
+	BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
+
+	memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
+	       sizeof(__be64));
+
+	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
+				   OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
+				   dl);
 }
 
 static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
@@ -330,8 +401,9 @@ static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
 	/* Superblock lockres doesn't come from a slab so we call init
 	 * once on it manually.  */
 	ocfs2_lock_res_init_once(res);
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
+			      0, res->l_name);
 	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
-				   OCFS2_SUPER_BLOCK_BLKNO, 0,
 				   &ocfs2_super_lops, osb);
 }
 
@@ -341,7 +413,8 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
 	/* Rename lockres doesn't come from a slab so we call init
 	 * once on it manually.  */
 	ocfs2_lock_res_init_once(res);
-	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME, 0, 0,
+	ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
+	ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
 				   &ocfs2_rename_lops, osb);
 }
 
@@ -627,9 +700,10 @@ static void ocfs2_generic_bast_func(struct ocfs2_super *osb,
 		ocfs2_schedule_blocked_lock(osb, lockres);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
+	wake_up(&lockres->l_event);
+
 	ocfs2_kick_vote_thread(osb);
 
-	wake_up(&lockres->l_event);
 	mlog_exit_void();
 }
 
@@ -690,9 +764,9 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
 	/* set it to something invalid so if we get called again we
 	 * can catch it. */
 	lockres->l_action = OCFS2_AST_INVALID;
-	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	wake_up(&lockres->l_event);
+	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 
 static void ocfs2_super_ast_func(void *opaque)
@@ -757,6 +831,27 @@ static void ocfs2_rename_bast_func(void *opaque,
 	mlog_exit_void();
 }
 
+static void ocfs2_dentry_ast_func(void *opaque)
+{
+	struct ocfs2_lock_res *lockres = opaque;
+
+	BUG_ON(!lockres);
+
+	ocfs2_generic_ast_func(lockres, 1);
+}
+
+static void ocfs2_dentry_bast_func(void *opaque, int level)
+{
+	struct ocfs2_lock_res *lockres = opaque;
+	struct ocfs2_dentry_lock *dl = lockres->l_priv;
+	struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
+
+	mlog(0, "Dentry bast: level: %d, name: %s\n", level,
+	     lockres->l_name);
+
+	ocfs2_generic_bast_func(osb, lockres, level);
+}
+
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert)
 {
@@ -1076,10 +1171,11 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 	mlog_exit_void();
 }
 
-static int ocfs2_create_new_inode_lock(struct inode *inode,
-				       struct ocfs2_lock_res *lockres)
+int ocfs2_create_new_lock(struct ocfs2_super *osb,
+			  struct ocfs2_lock_res *lockres,
+			  int ex)
 {
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	int level =  ex ? LKM_EXMODE : LKM_PRMODE;
 	unsigned long flags;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
@@ -1087,7 +1183,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	return ocfs2_lock_create(osb, lockres, LKM_EXMODE, LKM_LOCAL);
+	return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL);
 }
 
 /* Grants us an EX lock on the data and metadata resources, skipping
@@ -1099,6 +1195,7 @@ static int ocfs2_create_new_inode_lock(struct inode *inode,
 int ocfs2_create_new_inode_locks(struct inode *inode)
 {
 	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
 	BUG_ON(!inode);
 	BUG_ON(!ocfs2_inode_is_new(inode));
@@ -1115,22 +1212,19 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 	 * on a resource which has an invalid one -- we'll set it
 	 * valid when we release the EX. */
 
-	ret = ocfs2_create_new_inode_lock(inode,
-					  &OCFS2_I(inode)->ip_rw_lockres);
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 
-	ret = ocfs2_create_new_inode_lock(inode,
-					  &OCFS2_I(inode)->ip_meta_lockres);
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 
-	ret = ocfs2_create_new_inode_lock(inode,
-					  &OCFS2_I(inode)->ip_data_lockres);
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
@@ -1809,6 +1903,34 @@ void ocfs2_rename_unlock(struct ocfs2_super *osb)
 	ocfs2_cluster_unlock(osb, lockres, LKM_EXMODE);
 }
 
+int ocfs2_dentry_lock(struct dentry *dentry, int ex)
+{
+	int ret;
+	int level = ex ? LKM_EXMODE : LKM_PRMODE;
+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+
+	BUG_ON(!dl);
+
+	if (ocfs2_is_hard_readonly(osb))
+		return -EROFS;
+
+	ret = ocfs2_cluster_lock(osb, &dl->dl_lockres, level, 0, 0);
+	if (ret < 0)
+		mlog_errno(ret);
+
+	return ret;
+}
+
+void ocfs2_dentry_unlock(struct dentry *dentry, int ex)
+{
+	int level = ex ? LKM_EXMODE : LKM_PRMODE;
+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
+
+	ocfs2_cluster_unlock(osb, &dl->dl_lockres, level);
+}
+
 /* Reference counting of the dlm debug structure. We want this because
  * open references on the debug inodes can live on after a mount, so
  * we can't rely on the ocfs2_super to always exist. */
@@ -1939,9 +2061,16 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 	if (!lockres)
 		return -EINVAL;
 
-	seq_printf(m, "0x%x\t"
-		   "%.*s\t"
-		   "%d\t"
+	seq_printf(m, "0x%x\t", OCFS2_DLM_DEBUG_STR_VERSION);
+
+	if (lockres->l_type == OCFS2_LOCK_TYPE_DENTRY)
+		seq_printf(m, "%.*s%08x\t", OCFS2_DENTRY_LOCK_INO_START - 1,
+			   lockres->l_name,
+			   (unsigned int)ocfs2_get_dentry_lock_ino(lockres));
+	else
+		seq_printf(m, "%.*s\t", OCFS2_LOCK_ID_MAX_LEN, lockres->l_name);
+
+	seq_printf(m, "%d\t"
 		   "0x%lx\t"
 		   "0x%x\t"
 		   "0x%x\t"
@@ -1949,8 +2078,6 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
 		   "%u\t"
 		   "%d\t"
 		   "%d\t",
-		   OCFS2_DLM_DEBUG_STR_VERSION,
-		   OCFS2_LOCK_ID_MAX_LEN, lockres->l_name,
 		   lockres->l_level,
 		   lockres->l_flags,
 		   lockres->l_action,
@@ -2311,25 +2438,21 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 
-static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
+void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
+			       struct ocfs2_lock_res *lockres)
 {
-	int status;
-
-	mlog_entry_void();
-
-	ocfs2_mark_lockres_freeing(&osb->osb_super_lockres);
-
-	status = ocfs2_drop_lock(osb, &osb->osb_super_lockres, NULL);
-	if (status < 0)
-		mlog_errno(status);
-
-	ocfs2_mark_lockres_freeing(&osb->osb_rename_lockres);
+	int ret;
 
-	status = ocfs2_drop_lock(osb, &osb->osb_rename_lockres, NULL);
-	if (status < 0)
-		mlog_errno(status);
+	ocfs2_mark_lockres_freeing(lockres);
+	ret = ocfs2_drop_lock(osb, lockres, NULL);
+	if (ret)
+		mlog_errno(ret);
+}
 
-	mlog_exit(status);
+static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
+{
+	ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
+	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
 }
 
 static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data)
@@ -2599,7 +2722,7 @@ leave:
 
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
-				      int *requeue,
+				      struct ocfs2_unblock_ctl *ctl,
 				      ocfs2_convert_worker_t *worker)
 {
 	unsigned long flags;
@@ -2615,7 +2738,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 
 recheck:
 	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
-		*requeue = 1;
+		ctl->requeue = 1;
 		ret = ocfs2_prepare_cancel_convert(osb, lockres);
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
 		if (ret) {
@@ -2631,7 +2754,7 @@ recheck:
 	if ((lockres->l_blocking == LKM_EXMODE)
 	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		*requeue = 1;
+		ctl->requeue = 1;
 		ret = 0;
 		goto leave;
 	}
@@ -2641,7 +2764,7 @@ recheck:
 	if (lockres->l_blocking == LKM_PRMODE &&
 	    lockres->l_ex_holders) {
 		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		*requeue = 1;
+		ctl->requeue = 1;
 		ret = 0;
 		goto leave;
 	}
@@ -2659,7 +2782,10 @@ recheck:
 	blocking = lockres->l_blocking;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	worker(lockres, blocking);
+	ctl->unblock_action = worker(lockres, blocking);
+
+	if (ctl->unblock_action == UNBLOCK_STOP_POST)
+		goto leave;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	if (blocking != lockres->l_blocking) {
@@ -2669,7 +2795,7 @@ recheck:
 	}
 
 downconvert:
-	*requeue = 0;
+	ctl->requeue = 0;
 	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
 
 	ocfs2_prepare_downconvert(lockres, new_level);
@@ -2680,14 +2806,12 @@ leave:
 	return ret;
 }
 
-static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
-				      int blocking)
+static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
+				     int blocking)
 {
 	struct inode *inode;
 	struct address_space *mapping;
 
-	mlog_entry_void();
-
        	inode = ocfs2_lock_res_inode(lockres);
 	mapping = inode->i_mapping;
 
@@ -2708,11 +2832,11 @@ static void ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 		filemap_fdatawait(mapping);
 	}
 
-	mlog_exit_void();
+	return UNBLOCK_CONTINUE;
 }
 
 int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
-		       int *requeue)
+		       struct ocfs2_unblock_ctl *ctl)
 {
 	int status;
 	struct inode *inode;
@@ -2726,22 +2850,20 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	mlog(0, "unblock inode %llu\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_generic_unblock_lock(osb,
-					    lockres,
-					    requeue,
+	status = ocfs2_generic_unblock_lock(osb, lockres, ctl,
 					    ocfs2_data_convert_worker);
 	if (status < 0)
 		mlog_errno(status);
 
 	mlog(0, "inode %llu, requeue = %d\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
 
 	mlog_exit(status);
 	return status;
 }
 
 static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
-				    int *requeue)
+				    struct ocfs2_unblock_ctl *ctl)
 {
 	int status;
 	struct inode *inode;
@@ -2753,9 +2875,7 @@ static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
 	inode  = ocfs2_lock_res_inode(lockres);
 
 	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
-					    lockres,
-					    requeue,
-					    NULL);
+					    lockres, ctl, NULL);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -2763,9 +2883,8 @@ static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
 	return status;
 }
 
-
-int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
-		       int *requeue)
+static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
+			      struct ocfs2_unblock_ctl *ctl)
 {
 	int status;
 	struct inode *inode;
@@ -2777,21 +2896,165 @@ int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 	mlog(0, "unblock inode %llu\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_do_unblock_meta(inode, requeue);
+	status = ocfs2_do_unblock_meta(inode, &ctl->requeue);
 	if (status < 0)
 		mlog_errno(status);
 
 	mlog(0, "inode %llu, requeue = %d\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, *requeue);
+	     (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
 
 	mlog_exit(status);
 	return status;
 }
 
+/*
+ * Does the final reference drop on our dentry lock. Right now this
+ * happens in the vote thread, but we could choose to simplify the
+ * dlmglue API and push these off to the ocfs2_wq in the future.
+ */
+static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
+				     struct ocfs2_lock_res *lockres)
+{
+	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
+	ocfs2_dentry_lock_put(osb, dl);
+}
+
+/*
+ * d_delete() matching dentries before the lock downconvert.
+ *
+ * At this point, any process waiting to destroy the
+ * dentry_lock due to last ref count is stopped by the
+ * OCFS2_LOCK_QUEUED flag.
+ *
+ * We have two potential problems
+ *
+ * 1) If we do the last reference drop on our dentry_lock (via dput)
+ *    we'll wind up in ocfs2_release_dentry_lock(), waiting on
+ *    the downconvert to finish. Instead we take an elevated
+ *    reference and push the drop until after we've completed our
+ *    unblock processing.
+ *
+ * 2) There might be another process with a final reference,
+ *    waiting on us to finish processing. If this is the case, we
+ *    detect it and exit out - there's no more dentries anyway.
+ */
+static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
+				       int blocking)
+{
+	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
+	struct ocfs2_inode_info *oi = OCFS2_I(dl->dl_inode);
+	struct dentry *dentry;
+	unsigned long flags;
+	int extra_ref = 0;
+
+	/*
+	 * This node is blocking another node from getting a read
+	 * lock. This happens when we've renamed within a
+	 * directory. We've forced the other nodes to d_delete(), but
+	 * we never actually dropped our lock because it's still
+	 * valid. The downconvert code will retain a PR for this node,
+	 * so there's no further work to do.
+	 */
+	if (blocking == LKM_PRMODE)
+		return UNBLOCK_CONTINUE;
+
+	/*
+	 * Mark this inode as potentially orphaned. The code in
+	 * ocfs2_delete_inode() will figure out whether it actually
+	 * needs to be freed or not.
+	 */
+	spin_lock(&oi->ip_lock);
+	oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
+	spin_unlock(&oi->ip_lock);
+
+	/*
+	 * Yuck. We need to make sure however that the check of
+	 * OCFS2_LOCK_FREEING and the extra reference are atomic with
+	 * respect to a reference decrement or the setting of that
+	 * flag.
+	 */
+	spin_lock_irqsave(&lockres->l_lock, flags);
+	spin_lock(&dentry_attach_lock);
+	if (!(lockres->l_flags & OCFS2_LOCK_FREEING)
+	    && dl->dl_count) {
+		dl->dl_count++;
+		extra_ref = 1;
+	}
+	spin_unlock(&dentry_attach_lock);
+	spin_unlock_irqrestore(&lockres->l_lock, flags);
+
+	mlog(0, "extra_ref = %d\n", extra_ref);
+
+	/*
+	 * We have a process waiting on us in ocfs2_dentry_iput(),
+	 * which means we can't have any more outstanding
+	 * aliases. There's no need to do any more work.
+	 */
+	if (!extra_ref)
+		return UNBLOCK_CONTINUE;
+
+	spin_lock(&dentry_attach_lock);
+	while (1) {
+		dentry = ocfs2_find_local_alias(dl->dl_inode,
+						dl->dl_parent_blkno, 1);
+		if (!dentry)
+			break;
+		spin_unlock(&dentry_attach_lock);
+
+		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
+		     dentry->d_name.name);
+
+		/*
+		 * The following dcache calls may do an
+		 * iput(). Normally we don't want that from the
+		 * downconverting thread, but in this case it's ok
+		 * because the requesting node already has an
+		 * exclusive lock on the inode, so it can't be queued
+		 * for a downconvert.
+		 */
+		d_delete(dentry);
+		dput(dentry);
+
+		spin_lock(&dentry_attach_lock);
+	}
+	spin_unlock(&dentry_attach_lock);
+
+	/*
+	 * If we are the last holder of this dentry lock, there is no
+	 * reason to downconvert so skip straight to the unlock.
+	 */
+	if (dl->dl_count == 1)
+		return UNBLOCK_STOP_POST;
+
+	return UNBLOCK_CONTINUE_POST;
+}
+
+static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
+				     struct ocfs2_unblock_ctl *ctl)
+{
+	int ret;
+	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
+	struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
+
+	mlog(0, "unblock dentry lock: %llu\n",
+	     (unsigned long long)OCFS2_I(dl->dl_inode)->ip_blkno);
+
+	ret = ocfs2_generic_unblock_lock(osb,
+					 lockres,
+					 ctl,
+					 ocfs2_dentry_convert_worker);
+	if (ret < 0)
+		mlog_errno(ret);
+
+	mlog(0, "requeue = %d, post = %d\n", ctl->requeue, ctl->unblock_action);
+
+	return ret;
+}
+
 /* Generic unblock function for any lockres whose private data is an
  * ocfs2_super pointer. */
 static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
-				  int *requeue)
+				  struct ocfs2_unblock_ctl *ctl)
 {
 	int status;
 	struct ocfs2_super *osb;
@@ -2804,7 +3067,7 @@ static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
-					    requeue,
+					    ctl,
 					    NULL);
 	if (status < 0)
 		mlog_errno(status);
@@ -2817,7 +3080,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres)
 {
 	int status;
-	int requeue = 0;
+	struct ocfs2_unblock_ctl ctl = {0, 0,};
 	unsigned long flags;
 
 	/* Our reference to the lockres in this function can be
@@ -2842,21 +3105,25 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 		goto unqueue;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	status = lockres->l_ops->unblock(lockres, &requeue);
+	status = lockres->l_ops->unblock(lockres, &ctl);
 	if (status < 0)
 		mlog_errno(status);
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 unqueue:
-	if (lockres->l_flags & OCFS2_LOCK_FREEING || !requeue) {
+	if (lockres->l_flags & OCFS2_LOCK_FREEING || !ctl.requeue) {
 		lockres_clear_flags(lockres, OCFS2_LOCK_QUEUED);
 	} else
 		ocfs2_schedule_blocked_lock(osb, lockres);
 
 	mlog(0, "lockres %s, requeue = %s.\n", lockres->l_name,
-	     requeue ? "yes" : "no");
+	     ctl.requeue ? "yes" : "no");
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
+	if (ctl.unblock_action != UNBLOCK_CONTINUE
+	    && lockres->l_ops->post_unlock)
+		lockres->l_ops->post_unlock(osb, lockres);
+
 	mlog_exit_void();
 }
 
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 243ae86..3402515 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -27,6 +27,8 @@
 #ifndef DLMGLUE_H
 #define DLMGLUE_H
 
+#include "dcache.h"
+
 #define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
@@ -58,8 +60,12 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			       enum ocfs2_lock_type type,
 			       struct inode *inode);
+void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
+				u64 parent, struct inode *inode);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
+int ocfs2_create_new_lock(struct ocfs2_super *osb,
+			  struct ocfs2_lock_res *lockres, int ex);
 int ocfs2_drop_inode_locks(struct inode *inode);
 int ocfs2_data_lock_full(struct inode *inode,
 			 int write,
@@ -93,7 +99,12 @@ void ocfs2_super_unlock(struct ocfs2_super *osb,
 			int ex);
 int ocfs2_rename_lock(struct ocfs2_super *osb);
 void ocfs2_rename_unlock(struct ocfs2_super *osb);
+int ocfs2_dentry_lock(struct dentry *dentry, int ex);
+void ocfs2_dentry_unlock(struct dentry *dentry, int ex);
+
 void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
+void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
+			       struct ocfs2_lock_res *lockres);
 
 /* for the vote thread */
 void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 7dd9e1e..4d5d565 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -35,12 +35,15 @@
 #define OCFS2_LOCK_ID_MAX_LEN  32
 #define OCFS2_LOCK_ID_PAD "000000"
 
+#define OCFS2_DENTRY_LOCK_INO_START 18
+
 enum ocfs2_lock_type {
 	OCFS2_LOCK_TYPE_META = 0,
 	OCFS2_LOCK_TYPE_DATA,
 	OCFS2_LOCK_TYPE_SUPER,
 	OCFS2_LOCK_TYPE_RENAME,
 	OCFS2_LOCK_TYPE_RW,
+	OCFS2_LOCK_TYPE_DENTRY,
 	OCFS2_NUM_LOCK_TYPES
 };
 
@@ -63,6 +66,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 		case OCFS2_LOCK_TYPE_RW:
 			c = 'W';
 			break;
+		case OCFS2_LOCK_TYPE_DENTRY:
+			c = 'N';
+			break;
 		default:
 			c = '\0';
 	}
@@ -70,4 +76,23 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
 	return c;
 }
 
+static char *ocfs2_lock_type_strings[] = {
+	[OCFS2_LOCK_TYPE_META] = "Meta",
+	[OCFS2_LOCK_TYPE_DATA] = "Data",
+	[OCFS2_LOCK_TYPE_SUPER] = "Super",
+	[OCFS2_LOCK_TYPE_RENAME] = "Rename",
+	/* Need to differntiate from [R]ename.. serializing writes is the
+	 * important job it does, anyway. */
+	[OCFS2_LOCK_TYPE_RW] = "Write/Read",
+	[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
+};
+
+static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
+{
+#ifdef __KERNEL__
+	mlog_bug_on_msg(type >= OCFS2_NUM_LOCK_TYPES, "%d\n", type);
+#endif
+	return ocfs2_lock_type_strings[type];
+}
+
 #endif  /* OCFS2_LOCKID_H */
-- 
cgit v1.1


From 80c05846f604bab6d61e9732c262420ee9f5f358 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:43:18 -0700
Subject: ocfs2: Add dentry tracking API

Replace the dentry vote mechanism with a cluster lock which covers a set
of dentries. This allows us to force d_delete() only on nodes which actually
care about an unlink.

Every node that does a ->lookup() gets a read only lock on the dentry, until
an unlink during which the unlinking node, will request an exclusive lock,
forcing the other nodes who care about that dentry to d_delete() it. The
effect is that we retain a very lightweight ->d_revalidate(), and at the
same time get to make large improvements to the average case performance of
the ocfs2 unlink and rename operations.

This patch adds the higher level API and the dentry manipulation code.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dcache.c  | 375 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/ocfs2/dcache.h  |  22 ++--
 fs/ocfs2/sysfile.c |   4 +-
 3 files changed, 369 insertions(+), 32 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index aea4577..09efe24 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -35,15 +35,17 @@
 
 #include "alloc.h"
 #include "dcache.h"
+#include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
 
+
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
 				   struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
 	int ret = 0;    /* if all else fails, just return false */
-	struct ocfs2_super *osb;
+	struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
 
 	mlog_entry("(0x%p, '%.*s')\n", dentry,
 		   dentry->d_name.len, dentry->d_name.name);
@@ -55,28 +57,31 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
 		goto bail;
 	}
 
-	osb = OCFS2_SB(inode->i_sb);
-
 	BUG_ON(!osb);
 
-	if (inode != osb->root_inode) {
-		spin_lock(&OCFS2_I(inode)->ip_lock);
-		/* did we or someone else delete this inode? */
-		if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
-			spin_unlock(&OCFS2_I(inode)->ip_lock);
-			mlog(0, "inode (%llu) deleted, returning false\n",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
-			goto bail;
-		}
+	if (inode == osb->root_inode || is_bad_inode(inode))
+		goto bail;
+
+	spin_lock(&OCFS2_I(inode)->ip_lock);
+	/* did we or someone else delete this inode? */
+	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
 		spin_unlock(&OCFS2_I(inode)->ip_lock);
+		mlog(0, "inode (%llu) deleted, returning false\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
+		goto bail;
+	}
+	spin_unlock(&OCFS2_I(inode)->ip_lock);
 
-		if (!inode->i_nlink) {
-			mlog(0, "Inode %llu orphaned, returning false "
-			     "dir = %d\n",
-			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
-			     S_ISDIR(inode->i_mode));
-			goto bail;
-		}
+	/*
+	 * We don't need a cluster lock to test this because once an
+	 * inode nlink hits zero, it never goes back.
+	 */
+	if (inode->i_nlink == 0) {
+		mlog(0, "Inode %llu orphaned, returning false "
+		     "dir = %d\n",
+		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
+		     S_ISDIR(inode->i_mode));
+		goto bail;
 	}
 
 	ret = 1;
@@ -87,8 +92,340 @@ bail:
 	return ret;
 }
 
+static int ocfs2_match_dentry(struct dentry *dentry,
+			      u64 parent_blkno,
+			      int skip_unhashed)
+{
+	struct inode *parent;
+
+	/*
+	 * ocfs2_lookup() does a d_splice_alias() _before_ attaching
+	 * to the lock data, so we skip those here, otherwise
+	 * ocfs2_dentry_attach_lock() will get its original dentry
+	 * back.
+	 */
+	if (!dentry->d_fsdata)
+		return 0;
+
+	if (!dentry->d_parent)
+		return 0;
+
+	if (skip_unhashed && d_unhashed(dentry))
+		return 0;
+
+	parent = dentry->d_parent->d_inode;
+	/* Negative parent dentry? */
+	if (!parent)
+		return 0;
+
+	/* Name is in a different directory. */
+	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Walk the inode alias list, and find a dentry which has a given
+ * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it
+ * is looking for a dentry_lock reference. The vote thread is looking
+ * to unhash aliases, so we allow it to skip any that already have
+ * that property.
+ */
+struct dentry *ocfs2_find_local_alias(struct inode *inode,
+				      u64 parent_blkno,
+				      int skip_unhashed)
+{
+	struct list_head *p;
+	struct dentry *dentry = NULL;
+
+	spin_lock(&dcache_lock);
+
+	list_for_each(p, &inode->i_dentry) {
+		dentry = list_entry(p, struct dentry, d_alias);
+
+		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
+			mlog(0, "dentry found: %.*s\n",
+			     dentry->d_name.len, dentry->d_name.name);
+
+			dget_locked(dentry);
+			break;
+		}
+
+		dentry = NULL;
+	}
+
+	spin_unlock(&dcache_lock);
+
+	return dentry;
+}
+
 DEFINE_SPINLOCK(dentry_attach_lock);
 
+/*
+ * Attach this dentry to a cluster lock.
+ *
+ * Dentry locks cover all links in a given directory to a particular
+ * inode. We do this so that ocfs2 can build a lock name which all
+ * nodes in the cluster can agree on at all times. Shoving full names
+ * in the cluster lock won't work due to size restrictions. Covering
+ * links inside of a directory is a good compromise because it still
+ * allows us to use the parent directory lock to synchronize
+ * operations.
+ *
+ * Call this function with the parent dir semaphore and the parent dir
+ * cluster lock held.
+ *
+ * The dir semaphore will protect us from having to worry about
+ * concurrent processes on our node trying to attach a lock at the
+ * same time.
+ *
+ * The dir cluster lock (held at either PR or EX mode) protects us
+ * from unlink and rename on other nodes.
+ *
+ * The 'create' flag tells us whether we're doing this as a result of
+ * a file creation.
+ *
+ * A dput() can happen asynchronously due to pruning, so we cover
+ * attaching and detaching the dentry lock with a
+ * dentry_attach_lock.
+ *
+ * A node which has done lookup on a name retains a protected read
+ * lock until final dput. If the user requests and unlink or rename,
+ * the protected read is upgraded to an exclusive lock. Other nodes
+ * who have seen the dentry will then be informed that they need to
+ * downgrade their lock, which will involve d_delete on the
+ * dentry. This happens in ocfs2_dentry_convert_worker().
+ */
+int ocfs2_dentry_attach_lock(struct dentry *dentry,
+			     struct inode *inode,
+			     u64 parent_blkno,
+			     int create)
+{
+	int ret;
+	struct dentry *alias;
+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+	mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n",
+	     dentry->d_name.len, dentry->d_name.name,
+	     (unsigned long long)parent_blkno, create, dl);
+
+	/*
+	 * Negative dentry. We ignore these for now.
+	 *
+	 * XXX: Could we can improve ocfs2_dentry_revalidate() by
+	 * tracking these?
+	 */
+	if (!inode)
+		return 0;
+
+	if (dl) {
+		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+				" \"%.*s\": old parent: %llu, new: %llu\n",
+				dentry->d_name.len, dentry->d_name.name,
+				(unsigned long long)parent_blkno,
+				(unsigned long long)dl->dl_parent_blkno);
+		return 0;
+	}
+
+	alias = ocfs2_find_local_alias(inode, parent_blkno, 0);
+	if (alias) {
+		/*
+		 * Great, an alias exists, which means we must have a
+		 * dentry lock already. We can just grab the lock off
+		 * the alias and add it to the list.
+		 *
+		 * We're depending here on the fact that this dentry
+		 * was found and exists in the dcache and so must have
+		 * a reference to the dentry_lock because we can't
+		 * race creates. Final dput() cannot happen on it
+		 * since we have it pinned, so our reference is safe.
+		 */
+		dl = alias->d_fsdata;
+		mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n",
+				(unsigned long long)parent_blkno,
+				(unsigned long long)OCFS2_I(inode)->ip_blkno,
+				create);
+
+		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
+				" \"%.*s\": old parent: %llu, new: %llu\n",
+				dentry->d_name.len, dentry->d_name.name,
+				(unsigned long long)parent_blkno,
+				(unsigned long long)dl->dl_parent_blkno);
+
+		mlog(0, "Found: %s\n", dl->dl_lockres.l_name);
+
+		goto out_attach;
+	}
+
+	/*
+	 * There are no other aliases
+	 */
+	dl = kmalloc(sizeof(*dl), GFP_NOFS);
+	if (!dl) {
+		ret = -ENOMEM;
+		mlog_errno(ret);
+		return ret;
+	}
+
+	dl->dl_count = 0;
+	/*
+	 * Does this have to happen below, for all attaches, in case
+	 * the struct inode gets blown away by votes?
+	 */
+	dl->dl_inode = igrab(inode);
+	dl->dl_parent_blkno = parent_blkno;
+	ocfs2_dentry_lock_res_init(dl, parent_blkno, inode);
+
+out_attach:
+	spin_lock(&dentry_attach_lock);
+	dentry->d_fsdata = dl;
+	dl->dl_count++;
+	spin_unlock(&dentry_attach_lock);
+
+	/*
+	 * Creation of a new file means that nobody can possibly have
+	 * this name in the system, which means that acquiry of those
+	 * locks can easily be optimized.
+	 */
+	if (create) {
+		ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb),
+					    &dl->dl_lockres, 0);
+		if (ret)
+			mlog_errno(ret);
+		goto out;
+	}
+
+	/*
+	 * This actually gets us our PRMODE level lock. From now on,
+	 * we'll have a notification if one of these names is
+	 * destroyed on another node.
+	 */
+	ret = ocfs2_dentry_lock(dentry, 0);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+	ocfs2_dentry_unlock(dentry, 0);
+
+out:
+	dput(alias);
+
+	return ret;
+}
+
+/*
+ * ocfs2_dentry_iput() and friends.
+ *
+ * At this point, our particular dentry is detached from the inodes
+ * alias list, so there's no way that the locking code can find it.
+ *
+ * The interesting stuff happens when we determine that our lock needs
+ * to go away because this is the last subdir alias in the
+ * system. This function needs to handle a couple things:
+ *
+ * 1) Synchronizing lock shutdown with the downconvert threads. This
+ *    is already handled for us via the lockres release drop function
+ *    called in ocfs2_release_dentry_lock()
+ *
+ * 2) A race may occur when we're doing our lock shutdown and
+ *    another process wants to create a new dentry lock. Right now we
+ *    let them race, which means that for a very short while, this
+ *    node might have two locks on a lock resource. This should be a
+ *    problem though because one of them is in the process of being
+ *    thrown out.
+ */
+static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
+				   struct ocfs2_dentry_lock *dl)
+{
+	ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
+	ocfs2_lock_res_free(&dl->dl_lockres);
+	iput(dl->dl_inode);
+	kfree(dl);
+}
+
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+			   struct ocfs2_dentry_lock *dl)
+{
+	int unlock = 0;
+
+	BUG_ON(dl->dl_count == 0);
+
+	spin_lock(&dentry_attach_lock);
+	dl->dl_count--;
+	unlock = !dl->dl_count;
+	spin_unlock(&dentry_attach_lock);
+
+	if (unlock)
+		ocfs2_drop_dentry_lock(osb, dl);
+}
+
+static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
+
+	mlog_bug_on_msg(!dl && !(dentry->d_flags & DCACHE_DISCONNECTED),
+			"dentry: %.*s\n", dentry->d_name.len,
+			dentry->d_name.name);
+
+	if (!dl)
+		goto out;
+
+	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
+			dentry->d_name.len, dentry->d_name.name,
+			dl->dl_count);
+
+	ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
+
+out:
+	iput(inode);
+}
+
+/*
+ * d_move(), but keep the locks in sync.
+ *
+ * When we are done, "dentry" will have the parent dir and name of
+ * "target", which will be thrown away.
+ *
+ * We manually update the lock of "dentry" if need be.
+ *
+ * "target" doesn't have it's dentry lock touched - we allow the later
+ * dput() to handle this for us.
+ *
+ * This is called during ocfs2_rename(), while holding parent
+ * directory locks. The dentries have already been deleted on other
+ * nodes via ocfs2_remote_dentry_delete().
+ *
+ * Normally, the VFS handles the d_move() for the file sytem, after
+ * the ->rename() callback. OCFS2 wants to handle this internally, so
+ * the new lock can be created atomically with respect to the cluster.
+ */
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
+		       struct inode *old_dir, struct inode *new_dir)
+{
+	int ret;
+	struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
+	struct inode *inode = dentry->d_inode;
+
+	/*
+	 * Move within the same directory, so the actual lock info won't
+	 * change.
+	 *
+	 * XXX: Is there any advantage to dropping the lock here?
+	 */
+	if (old_dir == new_dir)
+		return;
+
+	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
+
+	dentry->d_fsdata = NULL;
+	ret = ocfs2_dentry_attach_lock(dentry, inode,
+				       OCFS2_I(new_dir)->ip_blkno, 0);
+	if (ret)
+		mlog_errno(ret);
+}
+
 struct dentry_operations ocfs2_dentry_ops = {
 	.d_revalidate		= ocfs2_dentry_revalidate,
+	.d_iput			= ocfs2_dentry_iput,
 };
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index f1423c2..e53abe7 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -41,17 +41,17 @@ struct ocfs2_dentry_lock {
 	struct ocfs2_lock_res	dl_lockres;
 };
 
-static inline void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
-					 struct ocfs2_dentry_lock *dl)
-{
-}
-
-static inline struct dentry *ocfs2_find_local_alias(struct inode *inode,
-						    u64 parent_blkno,
-						    int skip_unhashed)
-{
-	return NULL;
-}
+int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
+			     u64 parent_blkno, int create);
+
+void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
+			   struct ocfs2_dentry_lock *dl);
+
+struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
+				      int skip_unhashed);
+
+void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
+		       struct inode *old_dir, struct inode *new_dir);
 
 extern spinlock_t dentry_attach_lock;
 
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index fc29cb7..9843500 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -28,11 +28,11 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 
-#include "ocfs2.h"
-
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
 
+#include "ocfs2.h"
+
 #include "alloc.h"
 #include "dir.h"
 #include "inode.h"
-- 
cgit v1.1


From 379dfe9d0db99ed33fb089fcb9c07f5f92566e9e Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:21:03 -0700
Subject: ocfs2: Hook rest of the file system into dentry locking API

Actually replace the vote calls with the new dentry operations. Make any
necessary adjustments to get the scheme to work.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/tcp_internal.h |   5 +-
 fs/ocfs2/export.c               |   4 ++
 fs/ocfs2/inode.c                |  10 ++--
 fs/ocfs2/namei.c                | 116 ++++++++++++++++++++++++++++------------
 4 files changed, 94 insertions(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index ff9e2e2..da42b51 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,11 +44,14 @@
  * locking semantics of the file system using the protocol.  It should 
  * be somewhere else, I'm sure, but right now it isn't.
  *
+ * New in version 3:
+ * 	- Replace dentry votes with a cluster lock
+ *
  * New in version 2:
  * 	- full 64 bit i_size in the metadata lock lvbs
  * 	- introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 2ULL
+#define O2NET_PROTOCOL_VERSION 3ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ec55ab3..ffcd797 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -33,6 +33,7 @@
 
 #include "dir.h"
 #include "dlmglue.h"
+#include "dcache.h"
 #include "export.h"
 #include "inode.h"
 
@@ -77,6 +78,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
 		mlog_errno(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
+	result->d_op = &ocfs2_dentry_ops;
 
 	mlog_exit_ptr(result);
 	return result;
@@ -127,6 +129,8 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 		parent = ERR_PTR(-ENOMEM);
 	}
 
+	parent->d_op = &ocfs2_dentry_ops;
+
 bail_unlock:
 	ocfs2_meta_unlock(dir, 0);
 
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7bcf691..66ca7a8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1025,12 +1025,10 @@ void ocfs2_drop_inode(struct inode *inode)
 	/* Testing ip_orphaned_slot here wouldn't work because we may
 	 * not have gotten a delete_inode vote from any other nodes
 	 * yet. */
-	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED) {
-		mlog(0, "Inode was orphaned on another node, clearing nlink.\n");
-		inode->i_nlink = 0;
-	}
-
-	generic_drop_inode(inode);
+	if (oi->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)
+		generic_delete_inode(inode);
+	else
+		generic_drop_inode(inode);
 
 	mlog_exit_void();
 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 0d3e939..5a942e0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -199,10 +199,32 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	spin_unlock(&oi->ip_lock);
 
 bail_add:
-
 	dentry->d_op = &ocfs2_dentry_ops;
 	ret = d_splice_alias(inode, dentry);
 
+	if (inode) {
+		/*
+		 * If d_splice_alias() finds a DCACHE_DISCONNECTED
+		 * dentry, it will d_move() it on top of ourse. The
+		 * return value will indicate this however, so in
+		 * those cases, we switch them around for the locking
+		 * code.
+		 *
+		 * NOTE: This dentry already has ->d_op set from
+		 * ocfs2_get_parent() and ocfs2_get_dentry()
+		 */
+		if (ret)
+			dentry = ret;
+
+		status = ocfs2_dentry_attach_lock(dentry, inode,
+						  OCFS2_I(dir)->ip_blkno, 0);
+		if (status) {
+			mlog_errno(status);
+			ret = ERR_PTR(status);
+			goto bail_unlock;
+		}
+	}
+
 bail_unlock:
 	/* Don't drop the cluster lock until *after* the d_add --
 	 * unlink on another node will message us to remove that
@@ -418,6 +440,13 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno, 1);
+	if (status) {
+		mlog_errno(status);
+		goto leave;
+	}
+
 	insert_inode_hash(inode);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
@@ -725,6 +754,13 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
+	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno,
+				       0);
+	if (err) {
+		mlog_errno(err);
+		goto bail;
+	}
+
 	atomic_inc(&inode->i_count);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
@@ -743,6 +779,23 @@ bail:
 	return err;
 }
 
+/*
+ * Takes and drops an exclusive lock on the given dentry. This will
+ * force other nodes to drop it.
+ */
+static int ocfs2_remote_dentry_delete(struct dentry *dentry)
+{
+	int ret;
+
+	ret = ocfs2_dentry_lock(dentry, 1);
+	if (ret)
+		mlog_errno(ret);
+	else
+		ocfs2_dentry_unlock(dentry, 1);
+
+	return ret;
+}
+
 static int ocfs2_unlink(struct inode *dir,
 			struct dentry *dentry)
 {
@@ -832,8 +885,7 @@ static int ocfs2_unlink(struct inode *dir,
 	else
 		inode->i_nlink--;
 
-	status = ocfs2_request_unlink_vote(inode, dentry,
-					   (unsigned int) inode->i_nlink);
+	status = ocfs2_remote_dentry_delete(dentry);
 	if (status < 0) {
 		/* This vote should succeed under all normal
 		 * circumstances. */
@@ -1019,7 +1071,6 @@ static int ocfs2_rename(struct inode *old_dir,
 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
 						    // this is the 1st dirent bh
 	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
-	unsigned int links_count;
 
 	/* At some point it might be nice to break this function up a
 	 * bit. */
@@ -1093,23 +1144,26 @@ static int ocfs2_rename(struct inode *old_dir,
 		}
 	}
 
-	if (S_ISDIR(old_inode->i_mode)) {
-		/* Directories actually require metadata updates to
-		 * the directory info so we can't get away with not
-		 * doing node locking on it. */
-		status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
-		if (status < 0) {
-			if (status != -ENOENT)
-				mlog_errno(status);
-			goto bail;
-		}
-
-		status = ocfs2_request_rename_vote(old_inode, old_dentry);
-		if (status < 0) {
+	/*
+	 * Though we don't require an inode meta data update if
+	 * old_inode is not a directory, we lock anyway here to ensure
+	 * the vote thread on other nodes won't have to concurrently
+	 * downconvert the inode and the dentry locks.
+	 */
+	status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
+	if (status < 0) {
+		if (status != -ENOENT)
 			mlog_errno(status);
-			goto bail;
-		}
+		goto bail;
+	}
+
+	status = ocfs2_remote_dentry_delete(old_dentry);
+	if (status < 0) {
+		mlog_errno(status);
+		goto bail;
+	}
 
+	if (S_ISDIR(old_inode->i_mode)) {
 		status = -EIO;
 		old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
 		if (!old_inode_de_bh)
@@ -1123,14 +1177,6 @@ static int ocfs2_rename(struct inode *old_dir,
 		if (!new_inode && new_dir!=old_dir &&
 		    new_dir->i_nlink >= OCFS2_LINK_MAX)
 			goto bail;
-	} else {
-		/* Ah, the simple case - we're a file so just send a
-		 * message. */
-		status = ocfs2_request_rename_vote(old_inode, old_dentry);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
 	}
 
 	status = -ENOENT;
@@ -1202,13 +1248,7 @@ static int ocfs2_rename(struct inode *old_dir,
 			goto bail;
 		}
 
-		if (S_ISDIR(new_inode->i_mode))
-			links_count = 0;
-		else
-			links_count = (unsigned int) (new_inode->i_nlink - 1);
-
-		status = ocfs2_request_unlink_vote(new_inode, new_dentry,
-						   links_count);
+		status = ocfs2_remote_dentry_delete(new_dentry);
 		if (status < 0) {
 			mlog_errno(status);
 			goto bail;
@@ -1387,6 +1427,7 @@ static int ocfs2_rename(struct inode *old_dir,
 		}
 	}
 
+	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
 	status = 0;
 bail:
 	if (rename_lock)
@@ -1675,6 +1716,13 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
+	status = ocfs2_dentry_attach_lock(dentry, inode,
+					  OCFS2_I(dir)->ip_blkno, 1);
+	if (status) {
+		mlog_errno(status);
+		goto bail;
+	}
+
 	insert_inode_hash(inode);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
-- 
cgit v1.1


From 1390334b4c697b7588d5661fcf6acaeec409cf4c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:21:43 -0700
Subject: ocfs2: Remove the dentry vote

This is unused now.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/vote.c | 180 +-------------------------------------------------------
 fs/ocfs2/vote.h |   5 --
 2 files changed, 2 insertions(+), 183 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index cf70fe2..5b4dca7 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -74,9 +74,6 @@ struct ocfs2_vote_msg
 		__be32 v_orphaned_slot;	/* Used during delete votes */
 		__be32 v_nlink;		/* Used during unlink votes */
 	} md1;				/* Message type dependant 1 */
-	__be32 v_unlink_namelen;
-	__be64 v_unlink_parent;
-	u8  v_unlink_dirent[OCFS2_VOTE_FILENAME_LEN];
 };
 
 /* Responses are given these values to maintain backwards
@@ -100,8 +97,6 @@ struct ocfs2_vote_work {
 enum ocfs2_vote_request {
 	OCFS2_VOTE_REQ_INVALID = 0,
 	OCFS2_VOTE_REQ_DELETE,
-	OCFS2_VOTE_REQ_UNLINK,
-	OCFS2_VOTE_REQ_RENAME,
 	OCFS2_VOTE_REQ_MOUNT,
 	OCFS2_VOTE_REQ_UMOUNT,
 	OCFS2_VOTE_REQ_LAST
@@ -261,103 +256,13 @@ done:
 	return response;
 }
 
-static int ocfs2_match_dentry(struct dentry *dentry,
-			      u64 parent_blkno,
-			      unsigned int namelen,
-			      const char *name)
-{
-	struct inode *parent;
-
-	if (!dentry->d_parent) {
-		mlog(0, "Detached from parent.\n");
-		return 0;
-	}
-
-	parent = dentry->d_parent->d_inode;
-	/* Negative parent dentry? */
-	if (!parent)
-		return 0;
-
-	/* Name is in a different directory. */
-	if (OCFS2_I(parent)->ip_blkno != parent_blkno)
-		return 0;
-
-	if (dentry->d_name.len != namelen)
-		return 0;
-
-	/* comparison above guarantees this is safe. */
-	if (memcmp(dentry->d_name.name, name, namelen))
-		return 0;
-
-	return 1;
-}
-
-static void ocfs2_process_dentry_request(struct inode *inode,
-					 int rename,
-					 unsigned int new_nlink,
-					 u64 parent_blkno,
-					 unsigned int namelen,
-					 const char *name)
-{
-	struct dentry *dentry = NULL;
-	struct list_head *p;
-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
-
-	mlog(0, "parent %llu, namelen = %u, name = %.*s\n",
-	     (unsigned long long)parent_blkno, namelen, namelen, name);
-
-	spin_lock(&dcache_lock);
-
-	/* Another node is removing this name from the system. It is
-	 * up to us to find the corresponding dentry and if it exists,
-	 * unhash it from the dcache. */
-	list_for_each(p, &inode->i_dentry) {
-		dentry = list_entry(p, struct dentry, d_alias);
-
-		if (ocfs2_match_dentry(dentry, parent_blkno, namelen, name)) {
-			mlog(0, "dentry found: %.*s\n",
-			     dentry->d_name.len, dentry->d_name.name);
-
-			dget_locked(dentry);
-			break;
-		}
-
-		dentry = NULL;
-	}
-
-	spin_unlock(&dcache_lock);
-
-	if (dentry) {
-		d_delete(dentry);
-		dput(dentry);
-	}
-
-	/* rename votes don't send link counts */
-	if (!rename) {
-		mlog(0, "new_nlink = %u\n", new_nlink);
-
-		/* We don't have the proper locks here to directly
-		 * change i_nlink and besides, the vote is sent
-		 * *before* the operation so it may have failed on the
-		 * other node. This passes a hint to ocfs2_drop_inode
-		 * to force ocfs2_delete_inode, who will take the
-		 * proper cluster locks to sort things out. */
-		if (new_nlink == 0) {
-			spin_lock(&oi->ip_lock);
-			oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED;
-			spin_unlock(&OCFS2_I(inode)->ip_lock);
-		}
-	}
-}
-
 static void ocfs2_process_vote(struct ocfs2_super *osb,
 			       struct ocfs2_vote_msg *msg)
 {
 	int net_status, vote_response;
 	int orphaned_slot = 0;
-	int rename = 0;
-	unsigned int node_num, generation, new_nlink, namelen;
-	u64 blkno, parent_blkno;
+	unsigned int node_num, generation;
+	u64 blkno;
 	enum ocfs2_vote_request request;
 	struct inode *inode = NULL;
 	struct ocfs2_msg_hdr *hdr = &msg->v_hdr;
@@ -437,18 +342,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb,
 		vote_response = ocfs2_process_delete_request(inode,
 							     &orphaned_slot);
 		break;
-	case OCFS2_VOTE_REQ_RENAME:
-		rename = 1;
-		/* fall through */
-	case OCFS2_VOTE_REQ_UNLINK:
-		parent_blkno = be64_to_cpu(msg->v_unlink_parent);
-		namelen = be32_to_cpu(msg->v_unlink_namelen);
-		/* new_nlink will be ignored in case of a rename vote */
-		new_nlink = be32_to_cpu(msg->md1.v_nlink);
-		ocfs2_process_dentry_request(inode, rename, new_nlink,
-					     parent_blkno, namelen,
-					     msg->v_unlink_dirent);
-		break;
 	default:
 		mlog(ML_ERROR, "node %u, invalid request: %u\n",
 		     node_num, request);
@@ -889,75 +782,6 @@ int ocfs2_request_delete_vote(struct inode *inode)
 	return status;
 }
 
-static void ocfs2_setup_unlink_vote(struct ocfs2_vote_msg *request,
-				    struct dentry *dentry)
-{
-	struct inode *parent = dentry->d_parent->d_inode;
-
-	/* We need some values which will uniquely identify a dentry
-	 * on the other nodes so that they can find it and run
-	 * d_delete against it. Parent directory block and full name
-	 * should suffice. */
-
-	mlog(0, "unlink/rename request: parent: %llu name: %.*s\n",
-	     (unsigned long long)OCFS2_I(parent)->ip_blkno, dentry->d_name.len,
-	     dentry->d_name.name);
-
-	request->v_unlink_parent = cpu_to_be64(OCFS2_I(parent)->ip_blkno);
-	request->v_unlink_namelen = cpu_to_be32(dentry->d_name.len);
-	memcpy(request->v_unlink_dirent, dentry->d_name.name,
-	       dentry->d_name.len);
-}
-
-int ocfs2_request_unlink_vote(struct inode *inode,
-			      struct dentry *dentry,
-			      unsigned int nlink)
-{
-	int status;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_vote_msg *request;
-
-	if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
-		return -ENAMETOOLONG;
-
-	status = -ENOMEM;
-	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
-					 inode->i_generation,
-					 OCFS2_VOTE_REQ_UNLINK, nlink);
-	if (request) {
-		ocfs2_setup_unlink_vote(request, dentry);
-
-		status = ocfs2_request_vote(inode, request, NULL);
-
-		kfree(request);
-	}
-	return status;
-}
-
-int ocfs2_request_rename_vote(struct inode *inode,
-			      struct dentry *dentry)
-{
-	int status;
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-	struct ocfs2_vote_msg *request;
-
-	if (dentry->d_name.len > OCFS2_VOTE_FILENAME_LEN)
-		return -ENAMETOOLONG;
-
-	status = -ENOMEM;
-	request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno,
-					 inode->i_generation,
-					 OCFS2_VOTE_REQ_RENAME, 0);
-	if (request) {
-		ocfs2_setup_unlink_vote(request, dentry);
-
-		status = ocfs2_request_vote(inode, request, NULL);
-
-		kfree(request);
-	}
-	return status;
-}
-
 int ocfs2_request_mount_vote(struct ocfs2_super *osb)
 {
 	int status;
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h
index 9cce607..53ebc1c6 100644
--- a/fs/ocfs2/vote.h
+++ b/fs/ocfs2/vote.h
@@ -39,11 +39,6 @@ static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb)
 }
 
 int ocfs2_request_delete_vote(struct inode *inode);
-int ocfs2_request_unlink_vote(struct inode *inode,
-			      struct dentry *dentry,
-			      unsigned int nlink);
-int ocfs2_request_rename_vote(struct inode *inode,
-			      struct dentry *dentry);
 int ocfs2_request_mount_vote(struct ocfs2_super *osb);
 int ocfs2_request_umount_vote(struct ocfs2_super *osb);
 int ocfs2_register_net_handlers(struct ocfs2_super *osb);
-- 
cgit v1.1


From 349457ccf2592c14bdf13b6706170ae2e94931b1 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:22:21 -0700
Subject: [PATCH] Allow file systems to manually d_move() inside of ->rename()

Some file systems want to manually d_move() the dentries involved in a
rename.  We can do this by making use of the FS_ODD_RENAME flag if we just
have nfs_rename() unconditionally do the d_move().  While there, we rename
the flag to be more descriptive.

OCFS2 uses this to protect that part of the rename operation with a cluster
lock.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 fs/namei.c     |  6 +++---
 fs/nfs/dir.c   |  3 +--
 fs/nfs/super.c | 10 +++++-----
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc..6b591c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2370,7 +2370,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 		dput(new_dentry);
 	}
 	if (!error)
-		d_move(old_dentry,new_dentry);
+		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
+			d_move(old_dentry,new_dentry);
 	return error;
 }
 
@@ -2393,8 +2394,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 	else
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (!error) {
-		/* The following d_move() should become unconditional */
-		if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME))
+		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 			d_move(old_dentry, new_dentry);
 	}
 	if (target)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 3419c2d..7432f1a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1669,8 +1669,7 @@ out:
 	if (rehash)
 		d_rehash(rehash);
 	if (!error) {
-		if (!S_ISDIR(old_inode->i_mode))
-			d_move(old_dentry, new_dentry);
+		d_move(old_dentry, new_dentry);
 		nfs_renew_times(new_dentry);
 		nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
 	}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b99113b..e8d4003 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -71,7 +71,7 @@ static struct file_system_type nfs_fs_type = {
 	.name		= "nfs",
 	.get_sb		= nfs_get_sb,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 struct file_system_type nfs_xdev_fs_type = {
@@ -79,7 +79,7 @@ struct file_system_type nfs_xdev_fs_type = {
 	.name		= "nfs",
 	.get_sb		= nfs_xdev_get_sb,
 	.kill_sb	= nfs_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 static struct super_operations nfs_sops = {
@@ -107,7 +107,7 @@ static struct file_system_type nfs4_fs_type = {
 	.name		= "nfs4",
 	.get_sb		= nfs4_get_sb,
 	.kill_sb	= nfs4_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 struct file_system_type nfs4_xdev_fs_type = {
@@ -115,7 +115,7 @@ struct file_system_type nfs4_xdev_fs_type = {
 	.name		= "nfs4",
 	.get_sb		= nfs4_xdev_get_sb,
 	.kill_sb	= nfs4_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 struct file_system_type nfs4_referral_fs_type = {
@@ -123,7 +123,7 @@ struct file_system_type nfs4_referral_fs_type = {
 	.name		= "nfs4",
 	.get_sb		= nfs4_referral_get_sb,
 	.kill_sb	= nfs4_kill_super,
-	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
 static struct super_operations nfs4_sops = {
-- 
cgit v1.1


From 1ba9da2ffa54b56a6346746248bfa38124d499a6 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 8 Sep 2006 14:22:54 -0700
Subject: ocfs2: manually d_move() during ocfs2_rename()

Make use of FS_RENAME_DOES_D_MOVE to avoid a race condition that can occur
during ->rename() if we d_move() outside of the parent directory cluster
locks, and another node discovers the new name (created during the rename)
and unlinks it. d_move() will unconditionally rehash a dentry - which will
leave stale data in the system.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dcache.c | 5 ++++-
 fs/ocfs2/super.c  | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 09efe24..18a3190 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -414,7 +414,7 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 	 * XXX: Is there any advantage to dropping the lock here?
 	 */
 	if (old_dir == new_dir)
-		return;
+		goto out_move;
 
 	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
 
@@ -423,6 +423,9 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 				       OCFS2_I(new_dir)->ip_blkno, 0);
 	if (ret)
 		mlog_errno(ret);
+
+out_move:
+	d_move(dentry, target);
 }
 
 struct dentry_operations ocfs2_dentry_ops = {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index d17e33e..33a6de6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -682,7 +682,7 @@ static struct file_system_type ocfs2_fs_type = {
 	.kill_sb        = kill_block_super, /* set to the generic one
 					     * right now, but do we
 					     * need to change that? */
-	.fs_flags       = FS_REQUIRES_DEV,
+	.fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
 	.next           = NULL
 };
 
-- 
cgit v1.1


From 0027dd5bc213bc639e09dd002a4ab56bd18317c3 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 21 Sep 2006 16:51:28 -0700
Subject: ocfs2: Remove special casing for inode creation in
 ocfs2_dentry_attach_lock()

We can't use LKM_LOCAL for new dentry locks because an unlink and subsequent
re-create of a name/inode pair may result in the lock still being mastered
somewhere in the cluster.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dcache.c | 39 +++++++++------------------------------
 fs/ocfs2/dcache.h |  2 +-
 fs/ocfs2/namei.c  | 10 ++++------
 3 files changed, 14 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 18a3190..014e739 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -183,9 +183,6 @@ DEFINE_SPINLOCK(dentry_attach_lock);
  * The dir cluster lock (held at either PR or EX mode) protects us
  * from unlink and rename on other nodes.
  *
- * The 'create' flag tells us whether we're doing this as a result of
- * a file creation.
- *
  * A dput() can happen asynchronously due to pruning, so we cover
  * attaching and detaching the dentry lock with a
  * dentry_attach_lock.
@@ -199,16 +196,15 @@ DEFINE_SPINLOCK(dentry_attach_lock);
  */
 int ocfs2_dentry_attach_lock(struct dentry *dentry,
 			     struct inode *inode,
-			     u64 parent_blkno,
-			     int create)
+			     u64 parent_blkno)
 {
 	int ret;
 	struct dentry *alias;
 	struct ocfs2_dentry_lock *dl = dentry->d_fsdata;
 
-	mlog(0, "Attach \"%.*s\", parent %llu, create %d, fsdata: %p\n",
+	mlog(0, "Attach \"%.*s\", parent %llu, fsdata: %p\n",
 	     dentry->d_name.len, dentry->d_name.name,
-	     (unsigned long long)parent_blkno, create, dl);
+	     (unsigned long long)parent_blkno, dl);
 
 	/*
 	 * Negative dentry. We ignore these for now.
@@ -242,10 +238,9 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 		 * since we have it pinned, so our reference is safe.
 		 */
 		dl = alias->d_fsdata;
-		mlog_bug_on_msg(!dl, "parent %llu, ino %llu, create %d\n",
+		mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n",
 				(unsigned long long)parent_blkno,
-				(unsigned long long)OCFS2_I(inode)->ip_blkno,
-				create);
+				(unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
 				" \"%.*s\": old parent: %llu, new: %llu\n",
@@ -284,31 +279,16 @@ out_attach:
 	spin_unlock(&dentry_attach_lock);
 
 	/*
-	 * Creation of a new file means that nobody can possibly have
-	 * this name in the system, which means that acquiry of those
-	 * locks can easily be optimized.
-	 */
-	if (create) {
-		ret = ocfs2_create_new_lock(OCFS2_SB(inode->i_sb),
-					    &dl->dl_lockres, 0);
-		if (ret)
-			mlog_errno(ret);
-		goto out;
-	}
-
-	/*
 	 * This actually gets us our PRMODE level lock. From now on,
 	 * we'll have a notification if one of these names is
 	 * destroyed on another node.
 	 */
 	ret = ocfs2_dentry_lock(dentry, 0);
-	if (ret) {
+	if (!ret)
+		ocfs2_dentry_unlock(dentry, 0);
+	else
 		mlog_errno(ret);
-		goto out;
-	}
-	ocfs2_dentry_unlock(dentry, 0);
 
-out:
 	dput(alias);
 
 	return ret;
@@ -419,8 +399,7 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 	ocfs2_dentry_lock_put(osb, dentry->d_fsdata);
 
 	dentry->d_fsdata = NULL;
-	ret = ocfs2_dentry_attach_lock(dentry, inode,
-				       OCFS2_I(new_dir)->ip_blkno, 0);
+	ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno);
 	if (ret)
 		mlog_errno(ret);
 
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index e53abe7..c091c34 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -42,7 +42,7 @@ struct ocfs2_dentry_lock {
 };
 
 int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
-			     u64 parent_blkno, int create);
+			     u64 parent_blkno);
 
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
 			   struct ocfs2_dentry_lock *dl);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 5a942e0..6fa9788 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -217,7 +217,7 @@ bail_add:
 			dentry = ret;
 
 		status = ocfs2_dentry_attach_lock(dentry, inode,
-						  OCFS2_I(dir)->ip_blkno, 0);
+						  OCFS2_I(dir)->ip_blkno);
 		if (status) {
 			mlog_errno(status);
 			ret = ERR_PTR(status);
@@ -441,7 +441,7 @@ static int ocfs2_mknod(struct inode *dir,
 	}
 
 	status = ocfs2_dentry_attach_lock(dentry, inode,
-					  OCFS2_I(dir)->ip_blkno, 1);
+					  OCFS2_I(dir)->ip_blkno);
 	if (status) {
 		mlog_errno(status);
 		goto leave;
@@ -754,8 +754,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto bail;
 	}
 
-	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno,
-				       0);
+	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
 	if (err) {
 		mlog_errno(err);
 		goto bail;
@@ -1716,8 +1715,7 @@ static int ocfs2_symlink(struct inode *dir,
 		goto bail;
 	}
 
-	status = ocfs2_dentry_attach_lock(dentry, inode,
-					  OCFS2_I(dir)->ip_blkno, 1);
+	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
 	if (status) {
 		mlog_errno(status);
 		goto bail;
-- 
cgit v1.1


From 4d3b83f7364269b66cdda271f680bd99e77afd96 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 15:22:18 -0700
Subject: ocfs2: Free up some space in the lvb

lvb_version doesn't need to be a whole 32 bits. Make it an 8 bit field to
free up some space. This should be backwards compatible until we use one of
the fields, in which case we'd bump the lvb version anyway.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 6 +++---
 fs/ocfs2/dlmglue.h | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 764d15d..f80fb14 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1413,7 +1413,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 
 	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
-	lvb->lvb_version   = cpu_to_be32(OCFS2_LVB_VERSION);
+	lvb->lvb_version   = OCFS2_LVB_VERSION;
 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
 	lvb->lvb_iuid      = cpu_to_be32(inode->i_uid);
@@ -1486,7 +1486,7 @@ static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
-	if (be32_to_cpu(lvb->lvb_version) == OCFS2_LVB_VERSION)
+	if (lvb->lvb_version == OCFS2_LVB_VERSION)
 		return 1;
 	return 0;
 }
@@ -3167,7 +3167,7 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 	mlog(level, "LVB information for %s (called from %s:%u):\n",
 	     lockres->l_name, function, line);
 	mlog(level, "version: %u, clusters: %u\n",
-	     be32_to_cpu(lvb->lvb_version), be32_to_cpu(lvb->lvb_iclusters));
+	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters));
 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 3402515..3476a16 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -32,7 +32,9 @@
 #define OCFS2_LVB_VERSION 3
 
 struct ocfs2_meta_lvb {
-	__be32       lvb_version;
+	__be16       lvb_reserved0;
+	__u8         lvb_reserved1;
+	__u8         lvb_version;
 	__be32       lvb_iclusters;
 	__be32       lvb_iuid;
 	__be32       lvb_igid;
-- 
cgit v1.1


From f9e2d82e6395cfa0802446b54b63cc412089d82c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 15:35:49 -0700
Subject: ocfs2: Encode i_generation in the meta data lvb

When i_generation is removed from the lockname, this will help us determine
whether a meta data lvb has information that is in sync with the local
struct inode.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 14 +++++++++-----
 fs/ocfs2/dlmglue.h |  5 +++--
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f80fb14..6cd84df 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1427,6 +1427,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	lvb->lvb_imtime_packed =
 		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
+	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
 
 	mlog_meta_lvb(0, lockres);
 
@@ -1482,11 +1483,13 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	mlog_exit_void();
 }
 
-static inline int ocfs2_meta_lvb_is_trustable(struct ocfs2_lock_res *lockres)
+static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode,
+					      struct ocfs2_lock_res *lockres)
 {
 	struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
-	if (lvb->lvb_version == OCFS2_LVB_VERSION)
+	if (lvb->lvb_version == OCFS2_LVB_VERSION
+	    && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation)
 		return 1;
 	return 0;
 }
@@ -1583,7 +1586,7 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 	 * map (directories, bitmap files, etc) */
 	ocfs2_extent_map_trunc(inode, 0);
 
-	if (ocfs2_meta_lvb_is_trustable(lockres)) {
+	if (ocfs2_meta_lvb_is_trustable(inode, lockres)) {
 		mlog(0, "Trusting LVB on inode %llu\n",
 		     (unsigned long long)oi->ip_blkno);
 		ocfs2_refresh_inode_from_lvb(inode);
@@ -3166,8 +3169,9 @@ void ocfs2_dump_meta_lvb_info(u64 level,
 
 	mlog(level, "LVB information for %s (called from %s:%u):\n",
 	     lockres->l_name, function, line);
-	mlog(level, "version: %u, clusters: %u\n",
-	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters));
+	mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
+	     lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
+	     be32_to_cpu(lvb->lvb_igeneration));
 	mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
 	     (unsigned long long)be64_to_cpu(lvb->lvb_isize),
 	     be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 3476a16..45a74f4 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -29,7 +29,7 @@
 
 #include "dcache.h"
 
-#define OCFS2_LVB_VERSION 3
+#define OCFS2_LVB_VERSION 4
 
 struct ocfs2_meta_lvb {
 	__be16       lvb_reserved0;
@@ -45,7 +45,8 @@ struct ocfs2_meta_lvb {
 	__be16       lvb_imode;
 	__be16       lvb_inlink;
 	__be32       lvb_iattr;
-	__be32       lvb_reserved[2];
+	__be32       lvb_igeneration;
+	__be32       lvb_reserved2;
 };
 
 /* ocfs2_meta_lock_full() and ocfs2_data_lock_full() 'arg_flags' flags */
-- 
cgit v1.1


From 24c19ef40474c3930597f31ae233dc06319bd881 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Fri, 22 Sep 2006 17:28:19 -0700
Subject: ocfs2: Remove i_generation from inode lock names

OCFS2 puts inode meta data in the "lock value block" provided by the DLM.
Typically, i_generation is encoded in the lock name so that a deleted inode
on and a new one in the same block don't share the same lvb.

Unfortunately, that scheme means that the read in ocfs2_read_locked_inode()
is potentially thrown away as soon as the meta data lock is taken - we
cannot encode the lock name without first knowing i_generation, which
requires a disk read.

This patch encodes i_generation in the inode meta data lvb, and removes the
value from the inode meta data lock name. This way, the read can be covered
by a lock, and at the same time we can distinguish between an up to date and
a stale LVB.

This will help cold-cache stat(2) performance in particular.

Since this patch changes the protocol version, we take the opportunity to do
a minor re-organization of two of the LVB fields.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/cluster/tcp_internal.h |   5 +-
 fs/ocfs2/dlmglue.c              |  42 ++++++++++--
 fs/ocfs2/dlmglue.h              |   7 +-
 fs/ocfs2/export.c               |   4 +-
 fs/ocfs2/inode.c                | 146 ++++++++++++++++++++++++++++++----------
 fs/ocfs2/inode.h                |   8 ++-
 fs/ocfs2/journal.c              |   3 +-
 fs/ocfs2/namei.c                |   2 +-
 fs/ocfs2/super.c                |   4 +-
 fs/ocfs2/sysfile.c              |   2 +-
 10 files changed, 170 insertions(+), 53 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index da42b51..4b46aac 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,6 +44,9 @@
  * locking semantics of the file system using the protocol.  It should 
  * be somewhere else, I'm sure, but right now it isn't.
  *
+ * New in version 4:
+ * 	- Remove i_generation from lock names for better stat performance.
+ *
  * New in version 3:
  * 	- Replace dentry votes with a cluster lock
  *
@@ -51,7 +54,7 @@
  * 	- full 64 bit i_size in the metadata lock lvbs
  * 	- introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 3ULL
+#define O2NET_PROTOCOL_VERSION 4ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 6cd84df..ecb3cba 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,6 +320,7 @@ void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
 
 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			       enum ocfs2_lock_type type,
+			       unsigned int generation,
 			       struct inode *inode)
 {
 	struct ocfs2_lock_res_ops *ops;
@@ -341,7 +342,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 	};
 
 	ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
-			      inode->i_generation, res->l_name);
+			      generation, res->l_name);
 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
 }
 
@@ -1173,17 +1174,19 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
 
 int ocfs2_create_new_lock(struct ocfs2_super *osb,
 			  struct ocfs2_lock_res *lockres,
-			  int ex)
+			  int ex,
+			  int local)
 {
 	int level =  ex ? LKM_EXMODE : LKM_PRMODE;
 	unsigned long flags;
+	int lkm_flags = local ? LKM_LOCAL : 0;
 
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
 	lockres_or_flags(lockres, OCFS2_LOCK_LOCAL);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	return ocfs2_lock_create(osb, lockres, level, LKM_LOCAL);
+	return ocfs2_lock_create(osb, lockres, level, lkm_flags);
 }
 
 /* Grants us an EX lock on the data and metadata resources, skipping
@@ -1212,19 +1215,23 @@ int ocfs2_create_new_inode_locks(struct inode *inode)
 	 * on a resource which has an invalid one -- we'll set it
 	 * valid when we release the EX. */
 
-	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1);
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_rw_lockres, 1, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 
-	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1);
+	/*
+	 * We don't want to use LKM_LOCAL on a meta data lock as they
+	 * don't use a generation in their lock names.
+	 */
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
 	}
 
-	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1);
+	ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1);
 	if (ret) {
 		mlog_errno(ret);
 		goto bail;
@@ -1413,6 +1420,16 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 
 	lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
 
+	/*
+	 * Invalidate the LVB of a deleted inode - this way other
+	 * nodes are forced to go to disk and discover the new inode
+	 * status.
+	 */
+	if (oi->ip_flags & OCFS2_INODE_DELETED) {
+		lvb->lvb_version = 0;
+		goto out;
+	}
+
 	lvb->lvb_version   = OCFS2_LVB_VERSION;
 	lvb->lvb_isize	   = cpu_to_be64(i_size_read(inode));
 	lvb->lvb_iclusters = cpu_to_be32(oi->ip_clusters);
@@ -1429,6 +1446,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
 
+out:
 	mlog_meta_lvb(0, lockres);
 
 	mlog_exit_void();
@@ -1727,6 +1745,18 @@ int ocfs2_meta_lock_full(struct inode *inode,
 		wait_event(osb->recovery_event,
 			   ocfs2_node_map_is_empty(osb, &osb->recovery_map));
 
+	/*
+	 * We only see this flag if we're being called from
+	 * ocfs2_read_locked_inode(). It means we're locking an inode
+	 * which hasn't been populated yet, so clear the refresh flag
+	 * and let the caller handle it.
+	 */
+	if (inode->i_state & I_NEW) {
+		status = 0;
+		ocfs2_complete_lock_res_refresh(lockres, 0);
+		goto bail;
+	}
+
 	/* This is fun. The caller may want a bh back, or it may
 	 * not. ocfs2_meta_lock_update definitely wants one in, but
 	 * may or may not read one, depending on what's in the
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 45a74f4..4a27693 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -32,9 +32,9 @@
 #define OCFS2_LVB_VERSION 4
 
 struct ocfs2_meta_lvb {
-	__be16       lvb_reserved0;
-	__u8         lvb_reserved1;
 	__u8         lvb_version;
+	__u8         lvb_reserved0;
+	__be16       lvb_reserved1;
 	__be32       lvb_iclusters;
 	__be32       lvb_iuid;
 	__be32       lvb_igid;
@@ -62,13 +62,14 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb);
 void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res);
 void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 			       enum ocfs2_lock_type type,
+			       unsigned int generation,
 			       struct inode *inode);
 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 				u64 parent, struct inode *inode);
 void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
 int ocfs2_create_new_inode_locks(struct inode *inode);
 int ocfs2_create_new_lock(struct ocfs2_super *osb,
-			  struct ocfs2_lock_res *lockres, int ex);
+			  struct ocfs2_lock_res *lockres, int ex, int local);
 int ocfs2_drop_inode_locks(struct inode *inode);
 int ocfs2_data_lock_full(struct inode *inode,
 			 int write,
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index ffcd797..fb91089 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -58,7 +58,7 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp)
 		return ERR_PTR(-ESTALE);
 	}
 
-	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno);
+	inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0);
 
 	if (IS_ERR(inode)) {
 		mlog_errno(PTR_ERR(inode));
@@ -115,7 +115,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
 		goto bail_unlock;
 	}
 
-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
 	if (IS_ERR(inode)) {
 		mlog(ML_ERROR, "Unable to create inode %llu\n",
 		     (unsigned long long)blkno);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 66ca7a8..69d3db5 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -54,8 +54,6 @@
 
 #include "buffer_head_io.h"
 
-#define OCFS2_FI_FLAG_NOWAIT	0x1
-#define OCFS2_FI_FLAG_DELETE	0x2
 struct ocfs2_find_inode_args
 {
 	u64		fi_blkno;
@@ -109,7 +107,7 @@ struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 	return ilookup5(osb->sb, args.fi_ino, ocfs2_find_actor, &args);
 }
 
-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, int flags)
 {
 	struct inode *inode = NULL;
 	struct super_block *sb = osb->sb;
@@ -127,7 +125,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno)
 	}
 
 	args.fi_blkno = blkno;
-	args.fi_flags = 0;
+	args.fi_flags = flags;
 	args.fi_ino = ino_from_blkno(sb, blkno);
 
 	inode = iget5_locked(sb, args.fi_ino, ocfs2_find_actor,
@@ -297,15 +295,11 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 	OCFS2_I(inode)->ip_orphaned_slot = OCFS2_INVALID_SLOT;
 	OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
 
-	if (create_ino)
-		inode->i_ino = ino_from_blkno(inode->i_sb,
-			       le64_to_cpu(fe->i_blkno));
-
-	mlog(0, "blkno = %llu, ino = %lu, create_ino = %s\n",
-	     (unsigned long long)fe->i_blkno, inode->i_ino, create_ino ? "true" : "false");
-
 	inode->i_nlink = le16_to_cpu(fe->i_links_count);
 
+	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
+		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
+
 	if (fe->i_flags & cpu_to_le32(OCFS2_LOCAL_ALLOC_FL)) {
 		OCFS2_I(inode)->ip_flags |= OCFS2_INODE_BITMAP;
 		mlog(0, "local alloc inode: i_ino=%lu\n", inode->i_ino);
@@ -343,12 +337,28 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
 		    break;
 	}
 
+	if (create_ino) {
+		inode->i_ino = ino_from_blkno(inode->i_sb,
+			       le64_to_cpu(fe->i_blkno));
+
+		/*
+		 * If we ever want to create system files from kernel,
+		 * the generation argument to
+		 * ocfs2_inode_lock_res_init() will have to change.
+		 */
+		BUG_ON(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL));
+
+		ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
+					  OCFS2_LOCK_TYPE_META, 0, inode);
+	}
+
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_rw_lockres,
-				  OCFS2_LOCK_TYPE_RW, inode);
-	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
-				  OCFS2_LOCK_TYPE_META, inode);
+				  OCFS2_LOCK_TYPE_RW, inode->i_generation,
+				  inode);
+
 	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_data_lockres,
-				  OCFS2_LOCK_TYPE_DATA, inode);
+				  OCFS2_LOCK_TYPE_DATA, inode->i_generation,
+				  inode);
 
 	ocfs2_set_inode_flags(inode);
 	inode->i_flags |= S_NOATIME;
@@ -366,15 +376,15 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 	struct ocfs2_super *osb;
 	struct ocfs2_dinode *fe;
 	struct buffer_head *bh = NULL;
-	int status;
-	int sysfile = 0;
+	int status, can_lock;
+	u32 generation = 0;
 
 	mlog_entry("(0x%p, 0x%p)\n", inode, args);
 
 	status = -EINVAL;
 	if (inode == NULL || inode->i_sb == NULL) {
 		mlog(ML_ERROR, "bad inode\n");
-		goto bail;
+		return status;
 	}
 	sb = inode->i_sb;
 	osb = OCFS2_SB(sb);
@@ -382,50 +392,110 @@ static int ocfs2_read_locked_inode(struct inode *inode,
 	if (!args) {
 		mlog(ML_ERROR, "bad inode args\n");
 		make_bad_inode(inode);
-		goto bail;
+		return status;
 	}
 
-	/* Read the FE off disk. This is safe because the kernel only
-	 * does one read_inode2 for a new inode, and if it doesn't
-	 * exist yet then nobody can be working on it! */
-	status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, NULL);
+	/*
+	 * To improve performance of cold-cache inode stats, we take
+	 * the cluster lock here if possible.
+	 *
+	 * Generally, OCFS2 never trusts the contents of an inode
+	 * unless it's holding a cluster lock, so taking it here isn't
+	 * a correctness issue as much as it is a performance
+	 * improvement.
+	 *
+	 * There are three times when taking the lock is not a good idea:
+	 *
+	 * 1) During startup, before we have initialized the DLM.
+	 *
+	 * 2) If we are reading certain system files which never get
+	 *    cluster locks (local alloc, truncate log).
+	 *
+	 * 3) If the process doing the iget() is responsible for
+	 *    orphan dir recovery. We're holding the orphan dir lock and
+	 *    can get into a deadlock with another process on another
+	 *    node in ->delete_inode().
+	 *
+	 * #1 and #2 can be simply solved by never taking the lock
+	 * here for system files (which are the only type we read
+	 * during mount). It's a heavier approach, but our main
+	 * concern is user-accesible files anyway.
+	 *
+	 * #3 works itself out because we'll eventually take the
+	 * cluster lock before trusting anything anyway.
+	 */
+	can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
+		&& !(args->fi_flags & OCFS2_FI_FLAG_NOLOCK);
+
+	/*
+	 * To maintain backwards compatibility with older versions of
+	 * ocfs2-tools, we still store the generation value for system
+	 * files. The only ones that actually matter to userspace are
+	 * the journals, but it's easier and inexpensive to just flag
+	 * all system files similarly.
+	 */
+	if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE)
+		generation = osb->fs_generation;
+
+	ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres,
+				  OCFS2_LOCK_TYPE_META,
+				  generation, inode);
+
+	if (can_lock) {
+		status = ocfs2_meta_lock(inode, NULL, NULL, 0);
+		if (status) {
+			make_bad_inode(inode);
+			mlog_errno(status);
+			return status;
+		}
+	}
+
+	status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0,
+				  can_lock ? inode : NULL);
 	if (status < 0) {
 		mlog_errno(status);
-		make_bad_inode(inode);
 		goto bail;
 	}
 
+	status = -EINVAL;
 	fe = (struct ocfs2_dinode *) bh->b_data;
 	if (!OCFS2_IS_VALID_DINODE(fe)) {
 		mlog(ML_ERROR, "Invalid dinode #%llu: signature = %.*s\n",
 		     (unsigned long long)fe->i_blkno, 7, fe->i_signature);
-		make_bad_inode(inode);
 		goto bail;
 	}
 
-	if (fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL))
-		sysfile = 1;
+	/*
+	 * This is a code bug. Right now the caller needs to
+	 * understand whether it is asking for a system file inode or
+	 * not so the proper lock names can be built.
+	 */
+	mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) !=
+			!!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE),
+			"Inode %llu: system file state is ambigous\n",
+			(unsigned long long)args->fi_blkno);
 
 	if (S_ISCHR(le16_to_cpu(fe->i_mode)) ||
 	    S_ISBLK(le16_to_cpu(fe->i_mode)))
     		inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev));
 
-	status = -EINVAL;
 	if (ocfs2_populate_inode(inode, fe, 0) < 0) {
 		mlog(ML_ERROR, "populate failed! i_blkno=%llu, i_ino=%lu\n",
 		     (unsigned long long)fe->i_blkno, inode->i_ino);
-		make_bad_inode(inode);
 		goto bail;
 	}
 
 	BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
 
-	if (sysfile)
-	       OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SYSTEM_FILE;
-
 	status = 0;
 
 bail:
+	if (can_lock)
+		ocfs2_meta_unlock(inode, 0);
+
+	if (status < 0)
+		make_bad_inode(inode);
+
 	if (args && bh)
 		brelse(bh);
 
@@ -898,9 +968,15 @@ void ocfs2_delete_inode(struct inode *inode)
 		goto bail_unlock_inode;
 	}
 
-	/* Mark the inode as successfully deleted. This is important
-	 * for ocfs2_clear_inode as it will check this flag and skip
-	 * any checkpointing work */
+	/*
+	 * Mark the inode as successfully deleted.
+	 *
+	 * This is important for ocfs2_clear_inode() as it will check
+	 * this flag and skip any checkpointing work
+	 *
+	 * ocfs2_stuff_meta_lvb() also uses this flag to invalidate
+	 * the LVB for other nodes.
+	 */
 	OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED;
 
 bail_unlock_inode:
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 4d1e539..9957810 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -122,7 +122,13 @@ struct buffer_head *ocfs2_bread(struct inode *inode, int block,
 void ocfs2_clear_inode(struct inode *inode);
 void ocfs2_delete_inode(struct inode *inode);
 void ocfs2_drop_inode(struct inode *inode);
-struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff);
+
+/* Flags for ocfs2_iget() */
+#define OCFS2_FI_FLAG_NOWAIT	0x1
+#define OCFS2_FI_FLAG_DELETE	0x2
+#define OCFS2_FI_FLAG_SYSFILE	0x4
+#define OCFS2_FI_FLAG_NOLOCK	0x8
+struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, int flags);
 struct inode *ocfs2_ilookup_for_vote(struct ocfs2_super *osb,
 				     u64 blkno,
 				     int delete_vote);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f92bf1d..fd9734d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1493,7 +1493,8 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
 			if (de->name_len == 2 && !strncmp("..", de->name, 2))
 				continue;
 
-			iter = ocfs2_iget(osb, le64_to_cpu(de->inode));
+			iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
+					  OCFS2_FI_FLAG_NOLOCK);
 			if (IS_ERR(iter))
 				continue;
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6fa9788..849c3b4 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -179,7 +179,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
 	if (status < 0)
 		goto bail_add;
 
-	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
+	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
 	if (IS_ERR(inode)) {
 		mlog(ML_ERROR, "Unable to create inode %llu\n",
 		     (unsigned long long)blkno);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 33a6de6..4c29cd7 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -202,7 +202,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 
 	mlog_entry_void();
 
-	new = ocfs2_iget(osb, osb->root_blkno);
+	new = ocfs2_iget(osb, osb->root_blkno, OCFS2_FI_FLAG_SYSFILE);
 	if (IS_ERR(new)) {
 		status = PTR_ERR(new);
 		mlog_errno(status);
@@ -210,7 +210,7 @@ static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb)
 	}
 	osb->root_inode = new;
 
-	new = ocfs2_iget(osb, osb->system_dir_blkno);
+	new = ocfs2_iget(osb, osb->system_dir_blkno, OCFS2_FI_FLAG_SYSFILE);
 	if (IS_ERR(new)) {
 		status = PTR_ERR(new);
 		mlog_errno(status);
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 9843500..5df6e35 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -115,7 +115,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
 		goto bail;
 	}
 
-	inode = ocfs2_iget(osb, blkno);
+	inode = ocfs2_iget(osb, blkno, OCFS2_FI_FLAG_SYSFILE);
 	if (IS_ERR(inode)) {
 		mlog_errno(PTR_ERR(inode));
 		inode = NULL;
-- 
cgit v1.1


From f625c9793b6cc64aeb1b6387039d09019c214352 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 21:24:53 -0700
Subject: ocfs2: Clean up lock resource refresh flags

Use of the refresh mechanism is lock-type wide, so move knowledge of that to
the ocfs2_lock_res_ops structure.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 49 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ecb3cba..c189178 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -117,14 +117,35 @@ static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres);
 
+/*
+ * OCFS2 Lock Resource Operations
+ *
+ * These fine tune the behavior of the generic dlmglue locking infrastructure.
+ */
 struct ocfs2_lock_res_ops {
 	void (*ast)(void *);
 	void (*bast)(void *, int);
 	void (*unlock_ast)(void *, enum dlm_status);
 	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
+
+	/*
+	 * LOCK_TYPE_* flags which describe the specific requirements
+	 * of a lock type. Descriptions of each individual flag follow.
+	 */
+	int flags;
 };
 
+/*
+ * Some locks want to "refresh" potentially stale data when a
+ * meaningful (PRMODE or EXMODE) lock level is first obtained. If this
+ * flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
+ * individual lockres l_flags member from the ast function. It is
+ * expected that the locking wrapper will clear the
+ * OCFS2_LOCK_NEEDS_REFRESH flag when done.
+ */
+#define LOCK_TYPE_REQUIRES_REFRESH 0x1
+
 typedef int (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
@@ -136,6 +157,7 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_inode_lock,
+	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
@@ -143,6 +165,7 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_meta,
+	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
@@ -150,6 +173,7 @@ static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_data,
+	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
@@ -157,6 +181,7 @@ static struct ocfs2_lock_res_ops ocfs2_super_lops = {
 	.bast		= ocfs2_super_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
+	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
@@ -164,6 +189,7 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 	.bast		= ocfs2_rename_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
+	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
@@ -172,6 +198,7 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
+	.flags		= 0,
 };
 
 static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
@@ -569,7 +596,8 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
 	 * information is already up to data. Convert from NL to
 	 * *anything* however should mark ourselves as needing an
 	 * update */
-	if (lockres->l_level == LKM_NLMODE)
+	if (lockres->l_level == LKM_NLMODE &&
+	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 
 	lockres->l_level = lockres->l_requested;
@@ -586,7 +614,8 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
 	BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
 
 	if (lockres->l_requested > LKM_NLMODE &&
-	    !(lockres->l_flags & OCFS2_LOCK_LOCAL))
+	    !(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
+	    lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
 		lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
 
 	lockres->l_level = lockres->l_requested;
@@ -645,10 +674,6 @@ static void ocfs2_inode_ast_func(void *opaque)
 		BUG();
 	}
 
-	/* data and rw locking ignores refresh flag for now. */
-	if (lockres->l_type != OCFS2_LOCK_TYPE_META)
-		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
-
 	/* set it to something invalid so if we get called again we
 	 * can catch it. */
 	lockres->l_action = OCFS2_AST_INVALID;
@@ -730,8 +755,7 @@ static void ocfs2_inode_bast_func(void *opaque, int level)
 	mlog_exit_void();
 }
 
-static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
-				   int ignore_refresh)
+static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
 {
 	struct dlm_lockstatus *lksb = &lockres->l_lksb;
 	unsigned long flags;
@@ -759,9 +783,6 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres,
 		BUG();
 	}
 
-	if (ignore_refresh)
-		lockres_clear_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
-
 	/* set it to something invalid so if we get called again we
 	 * can catch it. */
 	lockres->l_action = OCFS2_AST_INVALID;
@@ -778,7 +799,7 @@ static void ocfs2_super_ast_func(void *opaque)
 	mlog(0, "Superblock AST fired\n");
 
 	BUG_ON(!ocfs2_is_super_lock(lockres));
-	ocfs2_generic_ast_func(lockres, 0);
+	ocfs2_generic_ast_func(lockres);
 
 	mlog_exit_void();
 }
@@ -809,7 +830,7 @@ static void ocfs2_rename_ast_func(void *opaque)
 
 	BUG_ON(!ocfs2_is_rename_lock(lockres));
 
-	ocfs2_generic_ast_func(lockres, 1);
+	ocfs2_generic_ast_func(lockres);
 
 	mlog_exit_void();
 }
@@ -838,7 +859,7 @@ static void ocfs2_dentry_ast_func(void *opaque)
 
 	BUG_ON(!lockres);
 
-	ocfs2_generic_ast_func(lockres, 1);
+	ocfs2_generic_ast_func(lockres);
 }
 
 static void ocfs2_dentry_bast_func(void *opaque, int level)
-- 
cgit v1.1


From e92d57df273a3a7e57688e1d4f5a894870d550d2 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 21:34:35 -0700
Subject: ocfs2: combine inode and generic AST functions

There is extremely little difference between the two now. We can remove the
callback from ocfs2_lock_res_ops as well.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 120 +++++------------------------------------------------
 1 file changed, 10 insertions(+), 110 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c189178..7eb40a0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -67,16 +67,12 @@ struct ocfs2_mask_waiter {
 	unsigned long		mw_goal;
 };
 
-static void ocfs2_inode_ast_func(void *opaque);
 static void ocfs2_inode_bast_func(void *opaque,
 				  int level);
-static void ocfs2_dentry_ast_func(void *opaque);
 static void ocfs2_dentry_bast_func(void *opaque,
 				  int level);
-static void ocfs2_super_ast_func(void *opaque);
 static void ocfs2_super_bast_func(void *opaque,
 				  int level);
-static void ocfs2_rename_ast_func(void *opaque);
 static void ocfs2_rename_bast_func(void *opaque,
 				   int level);
 
@@ -123,7 +119,6 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
  * These fine tune the behavior of the generic dlmglue locking infrastructure.
  */
 struct ocfs2_lock_res_ops {
-	void (*ast)(void *);
 	void (*bast)(void *, int);
 	void (*unlock_ast)(void *, enum dlm_status);
 	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
@@ -153,7 +148,6 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      ocfs2_convert_worker_t *worker);
 
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
-	.ast		= ocfs2_inode_ast_func,
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_inode_lock,
@@ -161,7 +155,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
-	.ast		= ocfs2_inode_ast_func,
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_meta,
@@ -169,7 +162,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
-	.ast		= ocfs2_inode_ast_func,
 	.bast		= ocfs2_inode_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_data,
@@ -177,7 +169,6 @@ static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
-	.ast		= ocfs2_super_ast_func,
 	.bast		= ocfs2_super_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
@@ -185,7 +176,6 @@ static struct ocfs2_lock_res_ops ocfs2_super_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
-	.ast		= ocfs2_rename_ast_func,
 	.bast		= ocfs2_rename_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
@@ -193,7 +183,6 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
-	.ast		= ocfs2_dentry_ast_func,
 	.bast		= ocfs2_dentry_bast_func,
 	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_dentry_lock,
@@ -625,64 +614,6 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc
 	mlog_exit_void();
 }
 
-static void ocfs2_inode_ast_func(void *opaque)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-	struct inode *inode;
-	struct dlm_lockstatus *lksb;
-	unsigned long flags;
-
-	mlog_entry_void();
-
-	inode = ocfs2_lock_res_inode(lockres);
-
-	mlog(0, "AST fired for inode %llu, l_action = %u, type = %s\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, lockres->l_action,
-	     ocfs2_lock_type_string(lockres->l_type));
-
-	BUG_ON(!ocfs2_is_inode_lock(lockres));
-
-	spin_lock_irqsave(&lockres->l_lock, flags);
-
-	lksb = &(lockres->l_lksb);
-	if (lksb->status != DLM_NORMAL) {
-		mlog(ML_ERROR, "ocfs2_inode_ast_func: lksb status value of %u "
-		     "on inode %llu\n", lksb->status,
-		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
-		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		mlog_exit_void();
-		return;
-	}
-
-	switch(lockres->l_action) {
-	case OCFS2_AST_ATTACH:
-		ocfs2_generic_handle_attach_action(lockres);
-		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
-		break;
-	case OCFS2_AST_CONVERT:
-		ocfs2_generic_handle_convert_action(lockres);
-		break;
-	case OCFS2_AST_DOWNCONVERT:
-		ocfs2_generic_handle_downconvert_action(lockres);
-		break;
-	default:
-		mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
-		     "lockres flags = 0x%lx, unlock action: %u\n",
-		     lockres->l_name, lockres->l_action, lockres->l_flags,
-		     lockres->l_unlock_action);
-
-		BUG();
-	}
-
-	/* set it to something invalid so if we get called again we
-	 * can catch it. */
-	lockres->l_action = OCFS2_AST_INVALID;
-	spin_unlock_irqrestore(&lockres->l_lock, flags);
-	wake_up(&lockres->l_event);
-
-	mlog_exit_void();
-}
-
 static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
 				     int level)
 {
@@ -755,8 +686,9 @@ static void ocfs2_inode_bast_func(void *opaque, int level)
 	mlog_exit_void();
 }
 
-static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
+static void ocfs2_locking_ast(void *opaque)
 {
+	struct ocfs2_lock_res *lockres = opaque;
 	struct dlm_lockstatus *lksb = &lockres->l_lksb;
 	unsigned long flags;
 
@@ -772,6 +704,7 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
 	switch(lockres->l_action) {
 	case OCFS2_AST_ATTACH:
 		ocfs2_generic_handle_attach_action(lockres);
+		lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
 		break;
 	case OCFS2_AST_CONVERT:
 		ocfs2_generic_handle_convert_action(lockres);
@@ -780,6 +713,10 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
 		ocfs2_generic_handle_downconvert_action(lockres);
 		break;
 	default:
+		mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
+		     "lockres flags = 0x%lx, unlock action: %u\n",
+		     lockres->l_name, lockres->l_action, lockres->l_flags,
+		     lockres->l_unlock_action);
 		BUG();
 	}
 
@@ -791,19 +728,6 @@ static void ocfs2_generic_ast_func(struct ocfs2_lock_res *lockres)
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 
-static void ocfs2_super_ast_func(void *opaque)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-
-	mlog_entry_void();
-	mlog(0, "Superblock AST fired\n");
-
-	BUG_ON(!ocfs2_is_super_lock(lockres));
-	ocfs2_generic_ast_func(lockres);
-
-	mlog_exit_void();
-}
-
 static void ocfs2_super_bast_func(void *opaque,
 				  int level)
 {
@@ -820,21 +744,6 @@ static void ocfs2_super_bast_func(void *opaque,
 	mlog_exit_void();
 }
 
-static void ocfs2_rename_ast_func(void *opaque)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-
-	mlog_entry_void();
-
-	mlog(0, "Rename AST fired\n");
-
-	BUG_ON(!ocfs2_is_rename_lock(lockres));
-
-	ocfs2_generic_ast_func(lockres);
-
-	mlog_exit_void();
-}
-
 static void ocfs2_rename_bast_func(void *opaque,
 				   int level)
 {
@@ -853,15 +762,6 @@ static void ocfs2_rename_bast_func(void *opaque,
 	mlog_exit_void();
 }
 
-static void ocfs2_dentry_ast_func(void *opaque)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-
-	BUG_ON(!lockres);
-
-	ocfs2_generic_ast_func(lockres);
-}
-
 static void ocfs2_dentry_bast_func(void *opaque, int level)
 {
 	struct ocfs2_lock_res *lockres = opaque;
@@ -928,7 +828,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			 dlm_flags,
 			 lockres->l_name,
 			 OCFS2_LOCK_ID_MAX_LEN - 1,
-			 lockres->l_ops->ast,
+			 ocfs2_locking_ast,
 			 lockres,
 			 lockres->l_ops->bast);
 	if (status != DLM_NORMAL) {
@@ -1118,7 +1018,7 @@ again:
 				 lkm_flags|LKM_CONVERT|LKM_VALBLK,
 				 lockres->l_name,
 				 OCFS2_LOCK_ID_MAX_LEN - 1,
-				 lockres->l_ops->ast,
+				 ocfs2_locking_ast,
 				 lockres,
 				 lockres->l_ops->bast);
 		if (status != DLM_NORMAL) {
@@ -2599,7 +2499,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 			 dlm_flags,
 			 lockres->l_name,
 			 OCFS2_LOCK_ID_MAX_LEN - 1,
-			 lockres->l_ops->ast,
+			 ocfs2_locking_ast,
 			 lockres,
 			 lockres->l_ops->bast);
 	if (status != DLM_NORMAL) {
-- 
cgit v1.1


From 2a45f2d13e1dd91bc110801f5818379f2699509c Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 21:36:58 -0700
Subject: ocfs2: remove ->unlock_ast() callback from ocfs2_lock_res_ops

This was always defined to the same function in all locks, so clean things
up by removing and passing ocfs2_unlock_ast() directly to the DLM.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7eb40a0..7532f80 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -96,9 +96,6 @@ struct ocfs2_unblock_ctl {
 	enum ocfs2_unblock_action unblock_action;
 };
 
-/* so far, all locks have gotten along with the same unlock ast */
-static void ocfs2_unlock_ast_func(void *opaque,
-				  enum dlm_status status);
 static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
@@ -120,7 +117,6 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
  */
 struct ocfs2_lock_res_ops {
 	void (*bast)(void *, int);
-	void (*unlock_ast)(void *, enum dlm_status);
 	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 
@@ -149,42 +145,36 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.bast		= ocfs2_inode_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_inode_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.bast		= ocfs2_inode_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_meta,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.bast		= ocfs2_inode_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_data,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
 	.bast		= ocfs2_super_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 	.bast		= ocfs2_rename_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.bast		= ocfs2_dentry_bast_func,
-	.unlock_ast	= ocfs2_unlock_ast_func,
 	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
 	.flags		= 0,
@@ -2221,7 +2211,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb)
 	mlog_exit_void();
 }
 
-static void ocfs2_unlock_ast_func(void *opaque, enum dlm_status status)
+static void ocfs2_unlock_ast(void *opaque, enum dlm_status status)
 {
 	struct ocfs2_lock_res *lockres = opaque;
 	unsigned long flags;
@@ -2345,7 +2335,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
 	mlog(0, "lock %s\n", lockres->l_name);
 
 	status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK,
-			   lockres->l_ops->unlock_ast, lockres);
+			   ocfs2_unlock_ast, lockres);
 	if (status != DLM_NORMAL) {
 		ocfs2_log_dlm_error("dlmunlock", status, lockres);
 		mlog(ML_ERROR, "lockres flags: %lu\n", lockres->l_flags);
@@ -2560,7 +2550,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 	status = dlmunlock(osb->dlm,
 			   &lockres->l_lksb,
 			   LKM_CANCEL,
-			   lockres->l_ops->unlock_ast,
+			   ocfs2_unlock_ast,
 			   lockres);
 	if (status != DLM_NORMAL) {
 		ocfs2_log_dlm_error("dlmunlock", status, lockres);
-- 
cgit v1.1


From 54a7e7552e484c08db221e49c4519ccdeb8882d0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 21:49:13 -0700
Subject: ocfs2: Add ->get_osb() dlmglue locking operation

Will be used to find the ocfs2_super structure from a given lockres.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 7532f80..2e4d43a 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -75,6 +75,8 @@ static void ocfs2_super_bast_func(void *opaque,
 				  int level);
 static void ocfs2_rename_bast_func(void *opaque,
 				   int level);
+static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
+static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 
 /*
  * Return value from ocfs2_convert_worker_t functions.
@@ -116,6 +118,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
  * These fine tune the behavior of the generic dlmglue locking infrastructure.
  */
 struct ocfs2_lock_res_ops {
+	/*
+	 * Translate an ocfs2_lock_res * into an ocfs2_super *. Define
+	 * this callback if ->l_priv is not an ocfs2_super pointer
+	 */
+	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
 	void (*bast)(void *, int);
 	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
@@ -144,18 +151,21 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      ocfs2_convert_worker_t *worker);
 
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
+	.get_osb	= ocfs2_get_inode_osb,
 	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_inode_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
+	.get_osb	= ocfs2_get_inode_osb,
 	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_meta,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
+	.get_osb	= ocfs2_get_inode_osb,
 	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_data,
 	.flags		= 0,
@@ -174,6 +184,7 @@ static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+	.get_osb	= ocfs2_get_dentry_osb,
 	.bast		= ocfs2_dentry_bast_func,
 	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
@@ -219,6 +230,14 @@ static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res
 	return (struct ocfs2_dentry_lock *)lockres->l_priv;
 }
 
+static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
+{
+	if (lockres->l_ops->get_osb)
+		return lockres->l_ops->get_osb(lockres);
+
+	return (struct ocfs2_super *)lockres->l_priv;
+}
+
 static int ocfs2_lock_create(struct ocfs2_super *osb,
 			     struct ocfs2_lock_res *lockres,
 			     int level,
@@ -352,6 +371,13 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
 	ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
 }
 
+static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
+{
+	struct inode *inode = ocfs2_lock_res_inode(lockres);
+
+	return OCFS2_SB(inode->i_sb);
+}
+
 static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
 {
 	__be64 inode_blkno_be;
@@ -362,6 +388,13 @@ static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
 	return be64_to_cpu(inode_blkno_be);
 }
 
+static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
+{
+	struct ocfs2_dentry_lock *dl = lockres->l_priv;
+
+	return OCFS2_SB(dl->dl_inode->i_sb);
+}
+
 void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
 				u64 parent, struct inode *inode)
 {
-- 
cgit v1.1


From aa2623ad80577b37637914e809bafa36994ccdf1 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 21:58:23 -0700
Subject: ocfs2: combine inode and generic blocking AST functions

There is extremely little difference between the two now. We can remove the
callback from ocfs2_lock_res_ops as well.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 123 +++++------------------------------------------------
 1 file changed, 11 insertions(+), 112 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 2e4d43a..3d5c6a0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -67,14 +67,6 @@ struct ocfs2_mask_waiter {
 	unsigned long		mw_goal;
 };
 
-static void ocfs2_inode_bast_func(void *opaque,
-				  int level);
-static void ocfs2_dentry_bast_func(void *opaque,
-				  int level);
-static void ocfs2_super_bast_func(void *opaque,
-				  int level);
-static void ocfs2_rename_bast_func(void *opaque,
-				   int level);
 static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 
@@ -123,7 +115,6 @@ struct ocfs2_lock_res_ops {
 	 * this callback if ->l_priv is not an ocfs2_super pointer
 	 */
 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
-	void (*bast)(void *, int);
 	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 
@@ -152,40 +143,34 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_inode_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_meta,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.bast		= ocfs2_inode_bast_func,
 	.unblock	= ocfs2_unblock_data,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
-	.bast		= ocfs2_super_bast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
-	.bast		= ocfs2_rename_bast_func,
 	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.get_osb	= ocfs2_get_dentry_osb,
-	.bast		= ocfs2_dentry_bast_func,
 	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
 	.flags		= 0,
@@ -198,24 +183,6 @@ static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
 		lockres->l_type == OCFS2_LOCK_TYPE_RW;
 }
 
-static inline int ocfs2_is_super_lock(struct ocfs2_lock_res *lockres)
-{
-	return lockres->l_type == OCFS2_LOCK_TYPE_SUPER;
-}
-
-static inline int ocfs2_is_rename_lock(struct ocfs2_lock_res *lockres)
-{
-	return lockres->l_type == OCFS2_LOCK_TYPE_RENAME;
-}
-
-static inline struct ocfs2_super *ocfs2_lock_res_super(struct ocfs2_lock_res *lockres)
-{
-	BUG_ON(!ocfs2_is_super_lock(lockres)
-	       && !ocfs2_is_rename_lock(lockres));
-
-	return (struct ocfs2_super *) lockres->l_priv;
-}
-
 static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
 {
 	BUG_ON(!ocfs2_is_inode_lock(lockres));
@@ -663,17 +630,19 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
 	return needs_downconvert;
 }
 
-static void ocfs2_generic_bast_func(struct ocfs2_super *osb,
-				    struct ocfs2_lock_res *lockres,
-				    int level)
+static void ocfs2_blocking_ast(void *opaque, int level)
 {
+	struct ocfs2_lock_res *lockres = opaque;
+	struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
 	int needs_downconvert;
 	unsigned long flags;
 
-	mlog_entry_void();
-
 	BUG_ON(level <= LKM_NLMODE);
 
+	mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
+	     lockres->l_name, level, lockres->l_level,
+	     ocfs2_lock_type_string(lockres->l_type));
+
 	spin_lock_irqsave(&lockres->l_lock, flags);
 	needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
 	if (needs_downconvert)
@@ -683,30 +652,6 @@ static void ocfs2_generic_bast_func(struct ocfs2_super *osb,
 	wake_up(&lockres->l_event);
 
 	ocfs2_kick_vote_thread(osb);
-
-	mlog_exit_void();
-}
-
-static void ocfs2_inode_bast_func(void *opaque, int level)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-	struct inode *inode;
-	struct ocfs2_super *osb;
-
-	mlog_entry_void();
-
-	BUG_ON(!ocfs2_is_inode_lock(lockres));
-
-	inode = ocfs2_lock_res_inode(lockres);
-	osb = OCFS2_SB(inode->i_sb);
-
-	mlog(0, "BAST fired for inode %llu, blocking %d, level %d type %s\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, level,
-	     lockres->l_level, ocfs2_lock_type_string(lockres->l_type));
-
-	ocfs2_generic_bast_func(osb, lockres, level);
-
-	mlog_exit_void();
 }
 
 static void ocfs2_locking_ast(void *opaque)
@@ -751,52 +696,6 @@ static void ocfs2_locking_ast(void *opaque)
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 }
 
-static void ocfs2_super_bast_func(void *opaque,
-				  int level)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-	struct ocfs2_super *osb;
-
-	mlog_entry_void();
-	mlog(0, "Superblock BAST fired\n");
-
-	BUG_ON(!ocfs2_is_super_lock(lockres));
-       	osb = ocfs2_lock_res_super(lockres);
-	ocfs2_generic_bast_func(osb, lockres, level);
-
-	mlog_exit_void();
-}
-
-static void ocfs2_rename_bast_func(void *opaque,
-				   int level)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-	struct ocfs2_super *osb;
-
-	mlog_entry_void();
-
-	mlog(0, "Rename BAST fired\n");
-
-	BUG_ON(!ocfs2_is_rename_lock(lockres));
-
-	osb = ocfs2_lock_res_super(lockres);
-	ocfs2_generic_bast_func(osb, lockres, level);
-
-	mlog_exit_void();
-}
-
-static void ocfs2_dentry_bast_func(void *opaque, int level)
-{
-	struct ocfs2_lock_res *lockres = opaque;
-	struct ocfs2_dentry_lock *dl = lockres->l_priv;
-	struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
-
-	mlog(0, "Dentry bast: level: %d, name: %s\n", level,
-	     lockres->l_name);
-
-	ocfs2_generic_bast_func(osb, lockres, level);
-}
-
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
 						int convert)
 {
@@ -853,7 +752,7 @@ static int ocfs2_lock_create(struct ocfs2_super *osb,
 			 OCFS2_LOCK_ID_MAX_LEN - 1,
 			 ocfs2_locking_ast,
 			 lockres,
-			 lockres->l_ops->bast);
+			 ocfs2_blocking_ast);
 	if (status != DLM_NORMAL) {
 		ocfs2_log_dlm_error("dlmlock", status, lockres);
 		ret = -EINVAL;
@@ -1043,7 +942,7 @@ again:
 				 OCFS2_LOCK_ID_MAX_LEN - 1,
 				 ocfs2_locking_ast,
 				 lockres,
-				 lockres->l_ops->bast);
+				 ocfs2_blocking_ast);
 		if (status != DLM_NORMAL) {
 			if ((lkm_flags & LKM_NOQUEUE) &&
 			    (status == DLM_NOTQUEUED))
@@ -2524,7 +2423,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 			 OCFS2_LOCK_ID_MAX_LEN - 1,
 			 ocfs2_locking_ast,
 			 lockres,
-			 lockres->l_ops->bast);
+			 ocfs2_blocking_ast);
 	if (status != DLM_NORMAL) {
 		ocfs2_log_dlm_error("dlmlock", status, lockres);
 		ret = -EINVAL;
@@ -3040,7 +2939,7 @@ static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
 
 	mlog(0, "Unblock lockres %s\n", lockres->l_name);
 
-	osb = ocfs2_lock_res_super(lockres);
+	osb = ocfs2_get_lockres_osb(lockres);
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
-- 
cgit v1.1


From b80fc012e03f8f207911b5eafe6916b000e03c8b Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 12 Sep 2006 22:08:14 -0700
Subject: ocfs2: don't unconditionally pass LVB flags

Allow a lock type to specifiy whether it makes use of the LVB. The only type
which does this right now is the meta data lock. This should save us some
space on network messages since they won't have to needlessly transmit value
blocks.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3d5c6a0..4056218 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -135,6 +135,11 @@ struct ocfs2_lock_res_ops {
  */
 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
 
+/*
+ * Indicate that a lock type makes use of the lock value block.
+ */
+#define LOCK_TYPE_USES_LVB		0x2
+
 typedef int (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
@@ -150,7 +155,7 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.unblock	= ocfs2_unblock_meta,
-	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
+	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
@@ -870,6 +875,9 @@ static int ocfs2_cluster_lock(struct ocfs2_super *osb,
 
 	ocfs2_init_mask_waiter(&mw);
 
+	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+		lkm_flags |= LKM_VALBLK;
+
 again:
 	wait = 0;
 
@@ -937,7 +945,7 @@ again:
 		status = dlmlock(osb->dlm,
 				 level,
 				 &lockres->l_lksb,
-				 lkm_flags|LKM_CONVERT|LKM_VALBLK,
+				 lkm_flags|LKM_CONVERT,
 				 lockres->l_name,
 				 OCFS2_LOCK_ID_MAX_LEN - 1,
 				 ocfs2_locking_ast,
@@ -2212,11 +2220,15 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
 {
 	enum dlm_status status;
 	unsigned long flags;
+	int lkm_flags = 0;
 
 	/* We didn't get anywhere near actually using this lockres. */
 	if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED))
 		goto out;
 
+	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+		lkm_flags |= LKM_VALBLK;
+
 	spin_lock_irqsave(&lockres->l_lock, flags);
 
 	mlog_bug_on_msg(!(lockres->l_flags & OCFS2_LOCK_FREEING),
@@ -2266,7 +2278,7 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
 
 	mlog(0, "lock %s\n", lockres->l_name);
 
-	status = dlmunlock(osb->dlm, &lockres->l_lksb, LKM_VALBLK,
+	status = dlmunlock(osb->dlm, &lockres->l_lksb, lkm_flags,
 			   ocfs2_unlock_ast, lockres);
 	if (status != DLM_NORMAL) {
 		ocfs2_log_dlm_error("dlmunlock", status, lockres);
-- 
cgit v1.1


From f7fbfdd1fc91543253ba742a926a29c289f8e6ca Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:02:29 -0700
Subject: ocfs2: Check for refreshing locks in generic unblock function

Tidy up the exit path a bit too.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 4056218..ebc4a49 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2640,22 +2640,22 @@ recheck:
 	/* if we're blocking an exclusive and we have *any* holders,
 	 * then requeue. */
 	if ((lockres->l_blocking == LKM_EXMODE)
-	    && (lockres->l_ex_holders || lockres->l_ro_holders)) {
-		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		ctl->requeue = 1;
-		ret = 0;
-		goto leave;
-	}
+	    && (lockres->l_ex_holders || lockres->l_ro_holders))
+		goto leave_requeue;
 
 	/* If it's a PR we're blocking, then only
 	 * requeue if we've got any EX holders */
 	if (lockres->l_blocking == LKM_PRMODE &&
-	    lockres->l_ex_holders) {
-		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		ctl->requeue = 1;
-		ret = 0;
-		goto leave;
-	}
+	    lockres->l_ex_holders)
+		goto leave_requeue;
+
+	/*
+	 * Can we get a lock in this state if the holder counts are
+	 * zero? The meta data unblock code used to check this.
+	 */
+	if ((lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
+	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
+		goto leave_requeue;
 
 	/* If we get here, then we know that there are no more
 	 * incompatible holders (and anyone asking for an incompatible
@@ -2692,6 +2692,13 @@ downconvert:
 leave:
 	mlog_exit(ret);
 	return ret;
+
+leave_requeue:
+	spin_unlock_irqrestore(&lockres->l_lock, flags);
+	ctl->requeue = 1;
+
+	mlog_exit(0);
+	return 0;
 }
 
 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
-- 
cgit v1.1


From 16d5b9567ad5241b5c6e0cc4778c1af6c04bb801 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:10:12 -0700
Subject: ocfs2: Add ->check_downconvert callback in dlmglue

This will allow lock types to force a requeue of a lock downconvert.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ebc4a49..9e8ed60 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -119,6 +119,18 @@ struct ocfs2_lock_res_ops {
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 
 	/*
+	 * Allow a lock type to add checks to determine whether it is
+	 * safe to downconvert a lock. Return 0 to re-queue the
+	 * downconvert at a later time, nonzero to continue.
+	 *
+	 * For most locks, the default checks that there are no
+	 * incompatible holders are sufficient.
+	 *
+	 * Called with the lockres spinlock held.
+	 */
+	int (*check_downconvert)(struct ocfs2_lock_res *, int);
+
+	/*
 	 * LOCK_TYPE_* flags which describe the specific requirements
 	 * of a lock type. Descriptions of each individual flag follow.
 	 */
@@ -2657,6 +2669,12 @@ recheck:
 	    && (lockres->l_flags & OCFS2_LOCK_REFRESHING))
 		goto leave_requeue;
 
+	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
+
+	if (lockres->l_ops->check_downconvert
+	    && !lockres->l_ops->check_downconvert(lockres, new_level))
+		goto leave_requeue;
+
 	/* If we get here, then we know that there are no more
 	 * incompatible holders (and anyone asking for an incompatible
 	 * lock is blocked). We can now downconvert the lock */
@@ -2684,7 +2702,6 @@ recheck:
 
 downconvert:
 	ctl->requeue = 0;
-	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
 
 	ocfs2_prepare_downconvert(lockres, new_level);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
-- 
cgit v1.1


From 5ef0d4ea087740908f4fb57606f6c09e3b90c477 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:21:52 -0700
Subject: ocfs2: Add ->set_lvb callback in dlmglue

This allows a lock type to set the value block before downconvert.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 9e8ed60..faa6f57 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -131,6 +131,17 @@ struct ocfs2_lock_res_ops {
 	int (*check_downconvert)(struct ocfs2_lock_res *, int);
 
 	/*
+	 * Allows a lock type to populate the lock value block. This
+	 * is called on downconvert, and when we drop a lock.
+	 *
+	 * Locks that want to use this should set LOCK_TYPE_USES_LVB
+	 * in the flags field.
+	 *
+	 * Called with the lockres spinlock held.
+	 */
+	void (*set_lvb)(struct ocfs2_lock_res *);
+
+	/*
 	 * LOCK_TYPE_* flags which describe the specific requirements
 	 * of a lock type. Descriptions of each individual flag follow.
 	 */
@@ -148,7 +159,8 @@ struct ocfs2_lock_res_ops {
 #define LOCK_TYPE_REQUIRES_REFRESH 0x1
 
 /*
- * Indicate that a lock type makes use of the lock value block.
+ * Indicate that a lock type makes use of the lock value block. The
+ * ->set_lvb lock type callback must be defined.
  */
 #define LOCK_TYPE_USES_LVB		0x2
 
@@ -2629,6 +2641,7 @@ static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 	int blocking;
 	int new_level;
 	int ret = 0;
+	int set_lvb = 0;
 
 	mlog_entry_void();
 
@@ -2703,9 +2716,23 @@ recheck:
 downconvert:
 	ctl->requeue = 0;
 
+	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
+		if (lockres->l_level == LKM_EXMODE)
+			set_lvb = 1;
+
+		/*
+		 * We only set the lvb if the lock has been fully
+		 * refreshed - otherwise we risk setting stale
+		 * data. Otherwise, there's no need to actually clear
+		 * out the lvb here as it's value is still valid.
+		 */
+		if (set_lvb && !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
+			lockres->l_ops->set_lvb(lockres);
+	}
+
 	ocfs2_prepare_downconvert(lockres, new_level);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
-	ret = ocfs2_downconvert_lock(osb, lockres, new_level, 0);
+	ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
 leave:
 	mlog_exit(ret);
 	return ret;
-- 
cgit v1.1


From 810d5aeba18825c754cf47db59eb83814a54bb27 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:39:52 -0700
Subject: ocfs2: Have the metadata lock use generic dlmglue functions

Fill in the ->check_downconvert and ->set_lvb callbacks with meta data
specific operations and switch ocfs2_unblock_meta() to call
ocfs2_generic_unblock_lock()

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index faa6f57..c8177d0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -92,6 +92,10 @@ struct ocfs2_unblock_ctl {
 
 static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl);
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+					int new_level);
+static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
+
 static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
@@ -179,6 +183,8 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.unblock	= ocfs2_unblock_meta,
+	.check_downconvert = ocfs2_check_meta_downconvert,
+	.set_lvb	= ocfs2_set_meta_lvb,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
 };
 
@@ -2822,6 +2828,29 @@ static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
 	return status;
 }
 
+static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
+					int new_level)
+{
+	struct inode *inode = ocfs2_lock_res_inode(lockres);
+	int checkpointed = ocfs2_inode_fully_checkpointed(inode);
+
+	BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
+	BUG_ON(lockres->l_level != LKM_EXMODE && !checkpointed);
+
+	if (checkpointed)
+		return 1;
+
+	ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb));
+	return 0;
+}
+
+static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
+{
+	struct inode *inode = ocfs2_lock_res_inode(lockres);
+
+	__ocfs2_stuff_meta_lvb(inode);
+}
+
 static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl)
 {
@@ -2835,7 +2864,8 @@ static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 	mlog(0, "unblock inode %llu\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_do_unblock_meta(inode, &ctl->requeue);
+	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
+					    lockres, ctl, NULL);
 	if (status < 0)
 		mlog_errno(status);
 
-- 
cgit v1.1


From 08280f11de91beac2f5234ce5fc2ed246dfe6a86 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:41:56 -0700
Subject: ocfs2: Remove unused dlmglue functions

The meta data unblocking code no longer needs ocfs2_do_unblock_meta() or
ocfs2_can_downconvert_meta_lock(), so remove them.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 103 -----------------------------------------------------
 1 file changed, 103 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c8177d0..399d6e2 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -268,9 +268,6 @@ static int ocfs2_meta_lock_update(struct inode *inode,
 				  struct buffer_head **bh);
 static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
 static inline int ocfs2_highest_compat_lock_level(int level);
-static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
-						  struct ocfs2_lock_res *lockres,
-						  int new_level);
 
 static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
 				  u64 blkno,
@@ -2538,106 +2535,6 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 	return ret;
 }
 
-static inline int ocfs2_can_downconvert_meta_lock(struct inode *inode,
-						  struct ocfs2_lock_res *lockres,
-						  int new_level)
-{
-	int ret;
-
-	mlog_entry_void();
-
-	BUG_ON(new_level != LKM_NLMODE && new_level != LKM_PRMODE);
-
-	if (lockres->l_flags & OCFS2_LOCK_REFRESHING) {
-		ret = 0;
-		mlog(0, "lockres %s currently being refreshed -- backing "
-		     "off!\n", lockres->l_name);
-	} else if (new_level == LKM_PRMODE)
-		ret = !lockres->l_ex_holders &&
-			ocfs2_inode_fully_checkpointed(inode);
-	else /* Must be NLMODE we're converting to. */
-		ret = !lockres->l_ro_holders && !lockres->l_ex_holders &&
-			ocfs2_inode_fully_checkpointed(inode);
-
-	mlog_exit(ret);
-	return ret;
-}
-
-static int ocfs2_do_unblock_meta(struct inode *inode,
-				 int *requeue)
-{
-	int new_level;
-	int set_lvb = 0;
-	int ret = 0;
-	struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres;
-	unsigned long flags;
-
-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-
-	mlog_entry_void();
-
-	spin_lock_irqsave(&lockres->l_lock, flags);
-
-	BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
-
-	mlog(0, "l_level=%d, l_blocking=%d\n", lockres->l_level,
-	     lockres->l_blocking);
-
-	BUG_ON(lockres->l_level != LKM_EXMODE &&
-	       lockres->l_level != LKM_PRMODE);
-
-	if (lockres->l_flags & OCFS2_LOCK_BUSY) {
-		*requeue = 1;
-		ret = ocfs2_prepare_cancel_convert(osb, lockres);
-		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		if (ret) {
-			ret = ocfs2_cancel_convert(osb, lockres);
-			if (ret < 0)
-				mlog_errno(ret);
-		}
-		goto leave;
-	}
-
-	new_level = ocfs2_highest_compat_lock_level(lockres->l_blocking);
-
-	mlog(0, "l_level=%d, l_blocking=%d, new_level=%d\n",
-	     lockres->l_level, lockres->l_blocking, new_level);
-
-	if (ocfs2_can_downconvert_meta_lock(inode, lockres, new_level)) {
-		if (lockres->l_level == LKM_EXMODE)
-			set_lvb = 1;
-
-		/* If the lock hasn't been refreshed yet (rare), then
-		 * our memory inode values are old and we skip
-		 * stuffing the lvb. There's no need to actually clear
-		 * out the lvb here as it's value is still valid. */
-		if (!(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH)) {
-			if (set_lvb)
-				__ocfs2_stuff_meta_lvb(inode);
-		} else
-			mlog(0, "lockres %s: downconverting stale lock!\n",
-			     lockres->l_name);
-
-		mlog(0, "calling ocfs2_downconvert_lock with l_level=%d, "
-		     "l_blocking=%d, new_level=%d\n",
-		     lockres->l_level, lockres->l_blocking, new_level);
-
-		ocfs2_prepare_downconvert(lockres, new_level);
-		spin_unlock_irqrestore(&lockres->l_lock, flags);
-		ret = ocfs2_downconvert_lock(osb, lockres, new_level, set_lvb);
-		goto leave;
-	}
-	if (!ocfs2_inode_fully_checkpointed(inode))
-		ocfs2_start_checkpoint(osb);
-
-	*requeue = 1;
-	spin_unlock_irqrestore(&lockres->l_lock, flags);
-	ret = 0;
-leave:
-	mlog_exit(ret);
-	return ret;
-}
-
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
 				      struct ocfs2_unblock_ctl *ctl,
-- 
cgit v1.1


From cc567d89b3af4294580c9c97610d2c1018032e33 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 21:52:21 -0700
Subject: ocfs2: move downconvert worker to lockres ops

This way lock types don't have to manually pass it to
ocfs2_generic_unblock_lock().

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 50 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 32 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 399d6e2..d92756d 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -71,7 +71,7 @@ static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
 static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 
 /*
- * Return value from ocfs2_convert_worker_t functions.
+ * Return value from ->downconvert_worker functions.
  *
  * These control the precise actions of ocfs2_generic_unblock_lock()
  * and ocfs2_process_blocked_lock()
@@ -98,16 +98,23 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
 
 static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 			      struct ocfs2_unblock_ctl *ctl);
+static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
+				     int blocking);
+
 static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
 				    struct ocfs2_unblock_ctl *ctl);
+
 static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
 				     struct ocfs2_unblock_ctl *ctl);
-static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
-				  struct ocfs2_unblock_ctl *ctl);
+static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
+				       int blocking);
 
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres);
 
+static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
+				  struct ocfs2_unblock_ctl *ctl);
+
 /*
  * OCFS2 Lock Resource Operations
  *
@@ -146,6 +153,17 @@ struct ocfs2_lock_res_ops {
 	void (*set_lvb)(struct ocfs2_lock_res *);
 
 	/*
+	 * Called from the downconvert thread when it is determined
+	 * that a lock will be downconverted. This is called without
+	 * any locks held so the function can do work that might
+	 * schedule (syncing out data, etc).
+	 *
+	 * This should return any one of the ocfs2_unblock_action
+	 * values, depending on what it wants the thread to do.
+	 */
+	int (*downconvert_worker)(struct ocfs2_lock_res *, int);
+
+	/*
 	 * LOCK_TYPE_* flags which describe the specific requirements
 	 * of a lock type. Descriptions of each individual flag follow.
 	 */
@@ -168,11 +186,9 @@ struct ocfs2_lock_res_ops {
  */
 #define LOCK_TYPE_USES_LVB		0x2
 
-typedef int (ocfs2_convert_worker_t)(struct ocfs2_lock_res *, int);
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
-				      struct ocfs2_unblock_ctl *ctl,
-				      ocfs2_convert_worker_t *worker);
+				      struct ocfs2_unblock_ctl *ctl);
 
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
@@ -191,6 +207,7 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
 	.unblock	= ocfs2_unblock_data,
+	.downconvert_worker = ocfs2_data_convert_worker,
 	.flags		= 0,
 };
 
@@ -208,6 +225,7 @@ static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.get_osb	= ocfs2_get_dentry_osb,
 	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
+	.downconvert_worker = ocfs2_dentry_convert_worker,
 	.flags		= 0,
 };
 
@@ -2537,8 +2555,7 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 
 static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
 				      struct ocfs2_lock_res *lockres,
-				      struct ocfs2_unblock_ctl *ctl,
-				      ocfs2_convert_worker_t *worker)
+				      struct ocfs2_unblock_ctl *ctl)
 {
 	unsigned long flags;
 	int blocking;
@@ -2594,7 +2611,7 @@ recheck:
 	/* If we get here, then we know that there are no more
 	 * incompatible holders (and anyone asking for an incompatible
 	 * lock is blocked). We can now downconvert the lock */
-	if (!worker)
+	if (!lockres->l_ops->downconvert_worker)
 		goto downconvert;
 
 	/* Some lockres types want to do a bit of work before
@@ -2604,7 +2621,7 @@ recheck:
 	blocking = lockres->l_blocking;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	ctl->unblock_action = worker(lockres, blocking);
+	ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
 
 	if (ctl->unblock_action == UNBLOCK_STOP_POST)
 		goto leave;
@@ -2692,8 +2709,7 @@ int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
 	mlog(0, "unblock inode %llu\n",
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
-	status = ocfs2_generic_unblock_lock(osb, lockres, ctl,
-					    ocfs2_data_convert_worker);
+	status = ocfs2_generic_unblock_lock(osb, lockres, ctl);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -2717,7 +2733,7 @@ static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
 	inode  = ocfs2_lock_res_inode(lockres);
 
 	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
-					    lockres, ctl, NULL);
+					    lockres, ctl);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -2762,7 +2778,7 @@ static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
 	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
-					    lockres, ctl, NULL);
+					    lockres, ctl);
 	if (status < 0)
 		mlog_errno(status);
 
@@ -2907,8 +2923,7 @@ static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
 
 	ret = ocfs2_generic_unblock_lock(osb,
 					 lockres,
-					 ctl,
-					 ocfs2_dentry_convert_worker);
+					 ctl);
 	if (ret < 0)
 		mlog_errno(ret);
 
@@ -2933,8 +2948,7 @@ static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
 
 	status = ocfs2_generic_unblock_lock(osb,
 					    lockres,
-					    ctl,
-					    NULL);
+					    ctl);
 	if (status < 0)
 		mlog_errno(status);
 
-- 
cgit v1.1


From b5e500e23e532795fbf79a3cdbcb014f207fdb2a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 13 Sep 2006 22:01:16 -0700
Subject: ocfs2: Remove ->unblock lockres operation

Have ocfs2_process_blocked_lock() call ocfs2_generic_unblock_lock(), which
gets to be ocfs2_unblock_lock() now that it's the only possible unblock
function.

Remove the ->unblock() callback from the structure, and all lock type
specific unblock functions.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 152 +++--------------------------------------------------
 1 file changed, 6 insertions(+), 146 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index d92756d..182ff24 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -73,7 +73,7 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
 /*
  * Return value from ->downconvert_worker functions.
  *
- * These control the precise actions of ocfs2_generic_unblock_lock()
+ * These control the precise actions of ocfs2_unblock_lock()
  * and ocfs2_process_blocked_lock()
  *
  */
@@ -90,31 +90,19 @@ struct ocfs2_unblock_ctl {
 	enum ocfs2_unblock_action unblock_action;
 };
 
-static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
-			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level);
 static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
 
-static int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
-			      struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 				     int blocking);
 
-static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
-				    struct ocfs2_unblock_ctl *ctl);
-
-static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
-				     struct ocfs2_unblock_ctl *ctl);
 static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 				       int blocking);
 
 static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
 				     struct ocfs2_lock_res *lockres);
 
-static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
-				  struct ocfs2_unblock_ctl *ctl);
-
 /*
  * OCFS2 Lock Resource Operations
  *
@@ -126,7 +114,7 @@ struct ocfs2_lock_res_ops {
 	 * this callback if ->l_priv is not an ocfs2_super pointer
 	 */
 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
-	int  (*unblock)(struct ocfs2_lock_res *, struct ocfs2_unblock_ctl *);
+
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 
 	/*
@@ -186,19 +174,13 @@ struct ocfs2_lock_res_ops {
  */
 #define LOCK_TYPE_USES_LVB		0x2
 
-static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
-				      struct ocfs2_lock_res *lockres,
-				      struct ocfs2_unblock_ctl *ctl);
-
 static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.unblock	= ocfs2_unblock_inode_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.unblock	= ocfs2_unblock_meta,
 	.check_downconvert = ocfs2_check_meta_downconvert,
 	.set_lvb	= ocfs2_set_meta_lvb,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
@@ -206,24 +188,20 @@ static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
 
 static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
 	.get_osb	= ocfs2_get_inode_osb,
-	.unblock	= ocfs2_unblock_data,
 	.downconvert_worker = ocfs2_data_convert_worker,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_super_lops = {
-	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= LOCK_TYPE_REQUIRES_REFRESH,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
-	.unblock	= ocfs2_unblock_osb_lock,
 	.flags		= 0,
 };
 
 static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
 	.get_osb	= ocfs2_get_dentry_osb,
-	.unblock	= ocfs2_unblock_dentry_lock,
 	.post_unlock	= ocfs2_dentry_post_unlock,
 	.downconvert_worker = ocfs2_dentry_convert_worker,
 	.flags		= 0,
@@ -2553,9 +2531,9 @@ static int ocfs2_cancel_convert(struct ocfs2_super *osb,
 	return ret;
 }
 
-static int ocfs2_generic_unblock_lock(struct ocfs2_super *osb,
-				      struct ocfs2_lock_res *lockres,
-				      struct ocfs2_unblock_ctl *ctl)
+static int ocfs2_unblock_lock(struct ocfs2_super *osb,
+			      struct ocfs2_lock_res *lockres,
+			      struct ocfs2_unblock_ctl *ctl)
 {
 	unsigned long flags;
 	int blocking;
@@ -2694,53 +2672,6 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE;
 }
 
-int ocfs2_unblock_data(struct ocfs2_lock_res *lockres,
-		       struct ocfs2_unblock_ctl *ctl)
-{
-	int status;
-	struct inode *inode;
-	struct ocfs2_super *osb;
-
-	mlog_entry_void();
-
-	inode = ocfs2_lock_res_inode(lockres);
-	osb = OCFS2_SB(inode->i_sb);
-
-	mlog(0, "unblock inode %llu\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
-
-	status = ocfs2_generic_unblock_lock(osb, lockres, ctl);
-	if (status < 0)
-		mlog_errno(status);
-
-	mlog(0, "inode %llu, requeue = %d\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
-
-	mlog_exit(status);
-	return status;
-}
-
-static int ocfs2_unblock_inode_lock(struct ocfs2_lock_res *lockres,
-				    struct ocfs2_unblock_ctl *ctl)
-{
-	int status;
-	struct inode *inode;
-
-	mlog_entry_void();
-
-	mlog(0, "Unblock lockres %s\n", lockres->l_name);
-
-	inode  = ocfs2_lock_res_inode(lockres);
-
-	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
-					    lockres, ctl);
-	if (status < 0)
-		mlog_errno(status);
-
-	mlog_exit(status);
-	return status;
-}
-
 static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
 					int new_level)
 {
@@ -2764,31 +2695,6 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
 	__ocfs2_stuff_meta_lvb(inode);
 }
 
-static int ocfs2_unblock_meta(struct ocfs2_lock_res *lockres,
-			      struct ocfs2_unblock_ctl *ctl)
-{
-	int status;
-	struct inode *inode;
-
-	mlog_entry_void();
-
-       	inode = ocfs2_lock_res_inode(lockres);
-
-	mlog(0, "unblock inode %llu\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno);
-
-	status = ocfs2_generic_unblock_lock(OCFS2_SB(inode->i_sb),
-					    lockres, ctl);
-	if (status < 0)
-		mlog_errno(status);
-
-	mlog(0, "inode %llu, requeue = %d\n",
-	     (unsigned long long)OCFS2_I(inode)->ip_blkno, ctl->requeue);
-
-	mlog_exit(status);
-	return status;
-}
-
 /*
  * Does the final reference drop on our dentry lock. Right now this
  * happens in the vote thread, but we could choose to simplify the
@@ -2911,51 +2817,6 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 	return UNBLOCK_CONTINUE_POST;
 }
 
-static int ocfs2_unblock_dentry_lock(struct ocfs2_lock_res *lockres,
-				     struct ocfs2_unblock_ctl *ctl)
-{
-	int ret;
-	struct ocfs2_dentry_lock *dl = ocfs2_lock_res_dl(lockres);
-	struct ocfs2_super *osb = OCFS2_SB(dl->dl_inode->i_sb);
-
-	mlog(0, "unblock dentry lock: %llu\n",
-	     (unsigned long long)OCFS2_I(dl->dl_inode)->ip_blkno);
-
-	ret = ocfs2_generic_unblock_lock(osb,
-					 lockres,
-					 ctl);
-	if (ret < 0)
-		mlog_errno(ret);
-
-	mlog(0, "requeue = %d, post = %d\n", ctl->requeue, ctl->unblock_action);
-
-	return ret;
-}
-
-/* Generic unblock function for any lockres whose private data is an
- * ocfs2_super pointer. */
-static int ocfs2_unblock_osb_lock(struct ocfs2_lock_res *lockres,
-				  struct ocfs2_unblock_ctl *ctl)
-{
-	int status;
-	struct ocfs2_super *osb;
-
-	mlog_entry_void();
-
-	mlog(0, "Unblock lockres %s\n", lockres->l_name);
-
-	osb = ocfs2_get_lockres_osb(lockres);
-
-	status = ocfs2_generic_unblock_lock(osb,
-					    lockres,
-					    ctl);
-	if (status < 0)
-		mlog_errno(status);
-
-	mlog_exit(status);
-	return status;
-}
-
 void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 				struct ocfs2_lock_res *lockres)
 {
@@ -2971,7 +2832,6 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 
 	BUG_ON(!lockres);
 	BUG_ON(!lockres->l_ops);
-	BUG_ON(!lockres->l_ops->unblock);
 
 	mlog(0, "lockres %s blocked.\n", lockres->l_name);
 
@@ -2985,7 +2845,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb,
 		goto unqueue;
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
-	status = lockres->l_ops->unblock(lockres, &ctl);
+	status = ocfs2_unblock_lock(osb, lockres, &ctl);
 	if (status < 0)
 		mlog_errno(status);
 
-- 
cgit v1.1


From 0d5dc6c2dd7a3cd2b2f505b0625c4ec9c0e5b4f0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Thu, 14 Sep 2006 14:44:51 -0700
Subject: ocfs2: Teach ocfs2_drop_lock() to use ->set_lvb() callback

With this, we don't need to pass an additional struct with function pointer.

Now that the callbacks are fully used, comment the remaining API.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
---
 fs/ocfs2/dlmglue.c | 59 ++++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 182ff24..de88706 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -107,6 +107,14 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
  * OCFS2 Lock Resource Operations
  *
  * These fine tune the behavior of the generic dlmglue locking infrastructure.
+ *
+ * The most basic of lock types can point ->l_priv to their respective
+ * struct ocfs2_super and allow the default actions to manage things.
+ *
+ * Right now, each lock type also needs to implement an init function,
+ * and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
+ * should be called when the lock is no longer needed (i.e., object
+ * destruction time).
  */
 struct ocfs2_lock_res_ops {
 	/*
@@ -115,6 +123,15 @@ struct ocfs2_lock_res_ops {
 	 */
 	struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
 
+	/*
+	 * Optionally called in the downconvert (or "vote") thread
+	 * after a successful downconvert. The lockres will not be
+	 * referenced after this callback is called, so it is safe to
+	 * free memory, etc.
+	 *
+	 * The exact semantics of when this is called are controlled
+	 * by ->downconvert_worker()
+	 */
 	void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
 
 	/*
@@ -2230,16 +2247,8 @@ complete_unlock:
 	mlog_exit_void();
 }
 
-typedef void (ocfs2_pre_drop_cb_t)(struct ocfs2_lock_res *, void *);
-
-struct drop_lock_cb {
-	ocfs2_pre_drop_cb_t	*drop_func;
-	void			*drop_data;
-};
-
 static int ocfs2_drop_lock(struct ocfs2_super *osb,
-			   struct ocfs2_lock_res *lockres,
-			   struct drop_lock_cb *dcb)
+			   struct ocfs2_lock_res *lockres)
 {
 	enum dlm_status status;
 	unsigned long flags;
@@ -2274,8 +2283,12 @@ static int ocfs2_drop_lock(struct ocfs2_super *osb,
 		spin_lock_irqsave(&lockres->l_lock, flags);
 	}
 
-	if (dcb)
-		dcb->drop_func(lockres, dcb->drop_data);
+	if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB) {
+		if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
+		    lockres->l_level == LKM_EXMODE &&
+		    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
+			lockres->l_ops->set_lvb(lockres);
+	}
 
 	if (lockres->l_flags & OCFS2_LOCK_BUSY)
 		mlog(ML_ERROR, "destroying busy lock: \"%s\"\n",
@@ -2355,7 +2368,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb,
 	int ret;
 
 	ocfs2_mark_lockres_freeing(lockres);
-	ret = ocfs2_drop_lock(osb, lockres, NULL);
+	ret = ocfs2_drop_lock(osb, lockres);
 	if (ret)
 		mlog_errno(ret);
 }
@@ -2366,22 +2379,9 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
 	ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
 }
 
-static void ocfs2_meta_pre_drop(struct ocfs2_lock_res *lockres, void *data)
-{
-	struct inode *inode = data;
-
-	/* the metadata lock requires a bit more work as we have an
-	 * LVB to worry about. */
-	if (lockres->l_flags & OCFS2_LOCK_ATTACHED &&
-	    lockres->l_level == LKM_EXMODE &&
-	    !(lockres->l_flags & OCFS2_LOCK_NEEDS_REFRESH))
-		__ocfs2_stuff_meta_lvb(inode);
-}
-
 int ocfs2_drop_inode_locks(struct inode *inode)
 {
 	int status, err;
-	struct drop_lock_cb meta_dcb = { ocfs2_meta_pre_drop, inode, };
 
 	mlog_entry_void();
 
@@ -2389,24 +2389,21 @@ int ocfs2_drop_inode_locks(struct inode *inode)
 	 * ocfs2_clear_inode has done it for us. */
 
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
-			      &OCFS2_I(inode)->ip_data_lockres,
-			      NULL);
+			      &OCFS2_I(inode)->ip_data_lockres);
 	if (err < 0)
 		mlog_errno(err);
 
 	status = err;
 
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
-			      &OCFS2_I(inode)->ip_meta_lockres,
-			      &meta_dcb);
+			      &OCFS2_I(inode)->ip_meta_lockres);
 	if (err < 0)
 		mlog_errno(err);
 	if (err < 0 && !status)
 		status = err;
 
 	err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb),
-			      &OCFS2_I(inode)->ip_rw_lockres,
-			      NULL);
+			      &OCFS2_I(inode)->ip_rw_lockres);
 	if (err < 0)
 		mlog_errno(err);
 	if (err < 0 && !status)
-- 
cgit v1.1