302 files changed, 9380 insertions, 4872 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 296482f..9ee5343 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -832,7 +832,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	 * moved b under k and client parallely did a lookup for
 	 * k/b.
 	 */
-	res = d_materialise_unique(dentry, inode);
+	res = d_splice_alias(inode, dentry);
 	if (!res)
 		v9fs_fid_add(dentry, fid);
 	else if (!IS_ERR(res))
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 02b64f4..6054c16b 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -826,8 +826,8 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
 	struct dentry *dir_dentry;
 	struct posix_acl *dacl = NULL, *pacl = NULL;
 
-	p9_debug(P9_DEBUG_VFS, " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n",
-		 dir->i_ino, dentry->d_name.name, omode,
+	p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n",
+		 dir->i_ino, dentry, omode,
 		 MAJOR(rdev), MINOR(rdev));
 
 	if (!new_valid_dev(rdev))
diff --git a/fs/Kconfig b/fs/Kconfig
index db5dc15..664991a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -67,6 +67,7 @@ source "fs/quota/Kconfig"
 
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
+source "fs/overlayfs/Kconfig"
 
 menu "Caches"
 
diff --git a/fs/Makefile b/fs/Makefile
index 90c8852..da0bbb4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS)		+= qnx6/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
+obj-$(CONFIG_OVERLAY_FS)	+= overlayfs/
 obj-$(CONFIG_UDF_FS)		+= udf/
 obj-$(CONFIG_SUN_OPENPROMFS)	+= openpromfs/
 obj-$(CONFIG_OMFS_FS)		+= omfs/
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index abc8539..937ce87 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,7 +125,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
 	struct dentry *dentry;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index e217c51..d0609a2 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -348,9 +348,9 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
 	u32 block = 0;
 	int retval;
 
-	pr_debug("%s(dir=%u, inode=%u, \"%*s\", type=%d)\n",
+	pr_debug("%s(dir=%u, inode=%u, \"%pd\", type=%d)\n",
 		 __func__, (u32)dir->i_ino,
-	         (u32)inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name, type);
+	         (u32)inode->i_ino, dentry, type);
 
 	retval = -EIO;
 	bh = affs_bread(sb, inode->i_ino);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 035bd31..bbc3853 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -190,8 +190,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 	toupper_t toupper = affs_get_toupper(sb);
 	u32 key;
 
-	pr_debug("%s(\"%.*s\")\n",
-		 __func__, (int)dentry->d_name.len, dentry->d_name.name);
+	pr_debug("%s(\"%pd\")\n", __func__, dentry);
 
 	bh = affs_bread(sb, dir->i_ino);
 	if (!bh)
@@ -219,8 +218,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	struct buffer_head *bh;
 	struct inode *inode = NULL;
 
-	pr_debug("%s(\"%.*s\")\n",
-		 __func__, (int)dentry->d_name.len, dentry->d_name.name);
+	pr_debug("%s(\"%pd\")\n", __func__, dentry);
 
 	affs_lock_dir(dir);
 	bh = affs_find_entry(dir, dentry);
@@ -250,9 +248,9 @@ affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 int
 affs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("%s(dir=%d, %lu \"%.*s\")\n",
+	pr_debug("%s(dir=%d, %lu \"%pd\")\n",
 		 __func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
-		(int)dentry->d_name.len, dentry->d_name.name);
+		dentry);
 
 	return affs_remove_header(dentry);
 }
@@ -264,9 +262,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 	struct inode	*inode;
 	int		 error;
 
-	pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
-		 __func__, dir->i_ino, (int)dentry->d_name.len,
-		 dentry->d_name.name,mode);
+	pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+		 __func__, dir->i_ino, dentry, mode);
 
 	inode = affs_new_inode(dir);
 	if (!inode)
@@ -294,9 +291,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	struct inode		*inode;
 	int			 error;
 
-	pr_debug("%s(%lu,\"%.*s\",0%ho)\n",
-		 __func__, dir->i_ino, (int)dentry->d_name.len,
-		 dentry->d_name.name, mode);
+	pr_debug("%s(%lu,\"%pd\",0%ho)\n",
+		 __func__, dir->i_ino, dentry, mode);
 
 	inode = affs_new_inode(dir);
 	if (!inode)
@@ -321,9 +317,9 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 int
 affs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("%s(dir=%u, %lu \"%.*s\")\n",
+	pr_debug("%s(dir=%u, %lu \"%pd\")\n",
 		__func__, (u32)dir->i_ino, dentry->d_inode->i_ino,
-		 (int)dentry->d_name.len, dentry->d_name.name);
+		 dentry);
 
 	return affs_remove_header(dentry);
 }
@@ -338,9 +334,8 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 	int			 i, maxlen, error;
 	char			 c, lc;
 
-	pr_debug("%s(%lu,\"%.*s\" -> \"%s\")\n",
-		 __func__, dir->i_ino, (int)dentry->d_name.len,
-		 dentry->d_name.name, symname);
+	pr_debug("%s(%lu,\"%pd\" -> \"%s\")\n",
+		 __func__, dir->i_ino, dentry, symname);
 
 	maxlen = AFFS_SB(sb)->s_hashsize * sizeof(u32) - 1;
 	inode  = affs_new_inode(dir);
@@ -409,9 +404,9 @@ affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
 	struct inode *inode = old_dentry->d_inode;
 
-	pr_debug("%s(%u, %u, \"%.*s\")\n",
+	pr_debug("%s(%u, %u, \"%pd\")\n",
 		 __func__, (u32)inode->i_ino, (u32)dir->i_ino,
-		 (int)dentry->d_name.len,dentry->d_name.name);
+		 dentry);
 
 	return affs_add_entry(dir, inode, dentry, ST_LINKFILE);
 }
@@ -424,10 +419,9 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct buffer_head *bh = NULL;
 	int retval;
 
-	pr_debug("%s(old=%u,\"%*s\" to new=%u,\"%*s\")\n",
-		 __func__, (u32)old_dir->i_ino, (int)old_dentry->d_name.len,
-		 old_dentry->d_name.name, (u32)new_dir->i_ino,
-		(int)new_dentry->d_name.len, new_dentry->d_name.name);
+	pr_debug("%s(old=%u,\"%pd\" to new=%u,\"%pd\")\n",
+		 __func__, (u32)old_dir->i_ino, old_dentry,
+		 (u32)new_dir->i_ino, new_dentry);
 
 	retval = affs_check_name(new_dentry->d_name.name,
 				 new_dentry->d_name.len,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a1645b8..4ec35e9 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      bool excl);
@@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
  *   uniquifier through dtype
  */
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
-			      loff_t fpos, u64 ino, unsigned dtype)
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
+			      int nlen, loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_lookup_cookie *cookie = _cookie;
+	struct afs_lookup_cookie *cookie =
+		container_of(ctx, struct afs_lookup_cookie, ctx);
 
 	_enter("{%s,%u},%s,%u,,%llu,%u",
 	       cookie->name.name, cookie->name.len, name, nlen,
@@ -433,7 +434,7 @@ static int afs_do_lookup(struct inode *dir, struct dentry *dentry,
 	};
 	int ret;
 
-	_enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name);
+	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
 
 	/* search the directory */
 	ret = afs_dir_iterate(dir, &cookie.ctx, key);
@@ -465,8 +466,8 @@ static struct inode *afs_try_auto_mntpt(
 	struct afs_vnode *vnode = AFS_FS_I(dir);
 	struct inode *inode;
 
-	_enter("%d, %p{%s}, {%x:%u}, %p",
-	       ret, dentry, devname, vnode->fid.vid, vnode->fid.vnode, key);
+	_enter("%d, %p{%pd}, {%x:%u}, %p",
+	       ret, dentry, dentry, vnode->fid.vid, vnode->fid.vnode, key);
 
 	if (ret != -ENOENT ||
 	    !test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
@@ -501,8 +502,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 
 	vnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},%p{%s},",
-	       vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
+	_enter("{%x:%u},%p{%pd},",
+	       vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
 
 	ASSERTCMP(dentry->d_inode, ==, NULL);
 
@@ -588,11 +589,11 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	vnode = AFS_FS_I(dentry->d_inode);
 
 	if (dentry->d_inode)
-		_enter("{v={%x:%u} n=%s fl=%lx},",
-		       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+		_enter("{v={%x:%u} n=%pd fl=%lx},",
+		       vnode->fid.vid, vnode->fid.vnode, dentry,
 		       vnode->flags);
 	else
-		_enter("{neg n=%s}", dentry->d_name.name);
+		_enter("{neg n=%pd}", dentry);
 
 	key = afs_request_key(AFS_FS_S(dentry->d_sb)->volume->cell);
 	if (IS_ERR(key))
@@ -607,7 +608,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		afs_validate(dir, key);
 
 	if (test_bit(AFS_VNODE_DELETED, &dir->flags)) {
-		_debug("%s: parent dir deleted", dentry->d_name.name);
+		_debug("%pd: parent dir deleted", dentry);
 		goto out_bad;
 	}
 
@@ -625,16 +626,16 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		if (!dentry->d_inode)
 			goto out_bad;
 		if (is_bad_inode(dentry->d_inode)) {
-			printk("kAFS: afs_d_revalidate: %s/%s has bad inode\n",
-			       parent->d_name.name, dentry->d_name.name);
+			printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
+			       dentry);
 			goto out_bad;
 		}
 
 		/* if the vnode ID has changed, then the dirent points to a
 		 * different file */
 		if (fid.vnode != vnode->fid.vnode) {
-			_debug("%s: dirent changed [%u != %u]",
-			       dentry->d_name.name, fid.vnode,
+			_debug("%pd: dirent changed [%u != %u]",
+			       dentry, fid.vnode,
 			       vnode->fid.vnode);
 			goto not_found;
 		}
@@ -643,8 +644,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		 * been deleted and replaced, and the original vnode ID has
 		 * been reused */
 		if (fid.unique != vnode->fid.unique) {
-			_debug("%s: file deleted (uq %u -> %u I:%u)",
-			       dentry->d_name.name, fid.unique,
+			_debug("%pd: file deleted (uq %u -> %u I:%u)",
+			       dentry, fid.unique,
 			       vnode->fid.unique,
 			       dentry->d_inode->i_generation);
 			spin_lock(&vnode->lock);
@@ -656,14 +657,14 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 	case -ENOENT:
 		/* the filename is unknown */
-		_debug("%s: dirent not found", dentry->d_name.name);
+		_debug("%pd: dirent not found", dentry);
 		if (dentry->d_inode)
 			goto not_found;
 		goto out_valid;
 
 	default:
-		_debug("failed to iterate dir %s: %d",
-		       parent->d_name.name, ret);
+		_debug("failed to iterate dir %pd: %d",
+		       parent, ret);
 		goto out_bad;
 	}
 
@@ -681,8 +682,7 @@ not_found:
 	spin_unlock(&dentry->d_lock);
 
 out_bad:
-	_debug("dropping dentry %s/%s",
-	       parent->d_name.name, dentry->d_name.name);
+	_debug("dropping dentry %pd2", dentry);
 	dput(parent);
 	key_put(key);
 
@@ -698,7 +698,7 @@ out_bad:
  */
 static int afs_d_delete(const struct dentry *dentry)
 {
-	_enter("%s", dentry->d_name.name);
+	_enter("%pd", dentry);
 
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
 		goto zap;
@@ -721,7 +721,7 @@ zap:
  */
 static void afs_d_release(struct dentry *dentry)
 {
-	_enter("%s", dentry->d_name.name);
+	_enter("%pd", dentry);
 }
 
 /*
@@ -740,8 +740,8 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s},%ho",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+	_enter("{%x:%u},{%pd},%ho",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -801,8 +801,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s}",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+	_enter("{%x:%u},{%pd}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -843,8 +843,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s}",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
+	_enter("{%x:%u},{%pd}",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
 	ret = -ENAMETOOLONG;
 	if (dentry->d_name.len >= AFSNAMEMAX)
@@ -917,8 +917,8 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s},%ho,",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
+	_enter("{%x:%u},{%pd},%ho,",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -980,10 +980,10 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	vnode = AFS_FS_I(from->d_inode);
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%x:%u},{%s}",
+	_enter("{%x:%u},{%x:%u},{%pd}",
 	       vnode->fid.vid, vnode->fid.vnode,
 	       dvnode->fid.vid, dvnode->fid.vnode,
-	       dentry->d_name.name);
+	       dentry);
 
 	key = afs_request_key(dvnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -1025,8 +1025,8 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 
 	dvnode = AFS_FS_I(dir);
 
-	_enter("{%x:%u},{%s},%s",
-	       dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
+	_enter("{%x:%u},{%pd},%s",
+	       dvnode->fid.vid, dvnode->fid.vnode, dentry,
 	       content);
 
 	ret = -EINVAL;
@@ -1093,11 +1093,11 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	orig_dvnode = AFS_FS_I(old_dir);
 	new_dvnode = AFS_FS_I(new_dir);
 
-	_enter("{%x:%u},{%x:%u},{%x:%u},{%s}",
+	_enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
 	       orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
 	       vnode->fid.vid, vnode->fid.vnode,
 	       new_dvnode->fid.vid, new_dvnode->fid.vnode,
-	       new_dentry->d_name.name);
+	       new_dentry);
 
 	key = afs_request_key(orig_dvnode->volume->cell);
 	if (IS_ERR(key)) {
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 2946712..8a1d38e 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -462,8 +462,8 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u},{n=%s},%x",
-	       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+	_enter("{%x:%u},{n=%pd},%x",
+	       vnode->fid.vid, vnode->fid.vnode, dentry,
 	       attr->ia_valid);
 
 	if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 9682c33..938c5ab 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -106,14 +106,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
 				       struct dentry *dentry,
 				       unsigned int flags)
 {
-	_enter("%p,%p{%p{%s},%s}",
-	       dir,
-	       dentry,
-	       dentry->d_parent,
-	       dentry->d_parent ?
-	       dentry->d_parent->d_name.name : (const unsigned char *) "",
-	       dentry->d_name.name);
-
+	_enter("%p,%p{%pd2}", dir, dentry, dentry);
 	return ERR_PTR(-EREMOTE);
 }
 
@@ -122,14 +115,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir,
  */
 static int afs_mntpt_open(struct inode *inode, struct file *file)
 {
-	_enter("%p,%p{%p{%s},%s}",
-	       inode, file,
-	       file->f_path.dentry->d_parent,
-	       file->f_path.dentry->d_parent ?
-	       file->f_path.dentry->d_parent->d_name.name :
-	       (const unsigned char *) "",
-	       file->f_path.dentry->d_name.name);
-
+	_enter("%p,%p{%pD2}", inode, file, file);
 	return -EREMOTE;
 }
 
@@ -146,7 +132,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 	bool rwpath = false;
 	int ret;
 
-	_enter("{%s}", mntpt->d_name.name);
+	_enter("{%pd}", mntpt);
 
 	BUG_ON(!mntpt->d_inode);
 
@@ -242,7 +228,7 @@ struct vfsmount *afs_d_automount(struct path *path)
 {
 	struct vfsmount *newmnt;
 
-	_enter("{%s}", path->dentry->d_name.name);
+	_enter("{%pd}", path->dentry);
 
 	newmnt = afs_mntpt_do_automount(path->dentry);
 	if (IS_ERR(newmnt))
diff --git a/fs/afs/write.c b/fs/afs/write.c
index ab6adfd..c13cb08 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -682,14 +682,13 @@ int afs_writeback_all(struct afs_vnode *vnode)
  */
 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = file->f_mapping->host;
+	struct inode *inode = file_inode(file);
 	struct afs_writeback *wb, *xwb;
-	struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 	int ret;
 
-	_enter("{%x:%u},{n=%s},%d",
-	       vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+	_enter("{%x:%u},{n=%pD},%d",
+	       vnode->fid.vid, vnode->fid.vnode, file,
 	       datasync);
 
 	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
diff --git a/fs/aio.c b/fs/aio.c
index 84a7510..14b9315 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -165,6 +165,15 @@ static struct vfsmount *aio_mnt;
 static const struct file_operations aio_ring_fops;
 static const struct address_space_operations aio_ctx_aops;
 
+/* Backing dev info for aio fs.
+ * -no dirty page accounting or writeback happens
+ */
+static struct backing_dev_info aio_fs_backing_dev_info = {
+	.name           = "aiofs",
+	.state          = 0,
+	.capabilities   = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_MAP_COPY,
+};
+
 static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 {
 	struct qstr this = QSTR_INIT("[aio]", 5);
@@ -176,6 +185,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 
 	inode->i_mapping->a_ops = &aio_ctx_aops;
 	inode->i_mapping->private_data = ctx;
+	inode->i_mapping->backing_dev_info = &aio_fs_backing_dev_info;
 	inode->i_size = PAGE_SIZE * nr_pages;
 
 	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
@@ -220,6 +230,9 @@ static int __init aio_setup(void)
 	if (IS_ERR(aio_mnt))
 		panic("Failed to create aio fs mount.");
 
+	if (bdi_init(&aio_fs_backing_dev_info))
+		panic("Failed to init aio fs backing dev info.");
+
 	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
@@ -281,11 +294,6 @@ static const struct file_operations aio_ring_fops = {
 	.mmap = aio_ring_mmap,
 };
 
-static int aio_set_page_dirty(struct page *page)
-{
-	return 0;
-}
-
 #if IS_ENABLED(CONFIG_MIGRATION)
 static int aio_migratepage(struct address_space *mapping, struct page *new,
 			struct page *old, enum migrate_mode mode)
@@ -357,7 +365,7 @@ out:
 #endif
 
 static const struct address_space_operations aio_ctx_aops = {
-	.set_page_dirty = aio_set_page_dirty,
+	.set_page_dirty = __set_page_dirty_no_writeback,
 #if IS_ENABLED(CONFIG_MIGRATION)
 	.migratepage	= aio_migratepage,
 #endif
@@ -412,7 +420,6 @@ static int aio_setup_ring(struct kioctx *ctx)
 		pr_debug("pid(%d) page[%d]->count=%d\n",
 			 current->pid, i, page_count(page));
 		SetPageUptodate(page);
-		SetPageDirty(page);
 		unlock_page(page);
 
 		ctx->ring_pages[i] = page;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 683a5b9..bfdbaba 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -41,8 +41,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
 	struct path path = {.mnt = mnt, .dentry = dentry};
 	int status = 1;
 
-	DPRINTK("dentry %p %.*s",
-		dentry, (int)dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("dentry %p %pd", dentry, dentry);
 
 	path_get(&path);
 
@@ -85,7 +84,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
 	spin_lock(&root->d_lock);
 
 	if (prev)
-		next = prev->d_u.d_child.next;
+		next = prev->d_child.next;
 	else {
 		prev = dget_dlock(root);
 		next = prev->d_subdirs.next;
@@ -99,13 +98,13 @@ cont:
 		return NULL;
 	}
 
-	q = list_entry(next, struct dentry, d_u.d_child);
+	q = list_entry(next, struct dentry, d_child);
 
 	spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
 	/* Already gone or negative dentry (under construction) - try next */
 	if (!d_count(q) || !simple_positive(q)) {
 		spin_unlock(&q->d_lock);
-		next = q->d_u.d_child.next;
+		next = q->d_child.next;
 		goto cont;
 	}
 	dget_dlock(q);
@@ -155,13 +154,13 @@ again:
 				goto relock;
 			}
 			spin_unlock(&p->d_lock);
-			next = p->d_u.d_child.next;
+			next = p->d_child.next;
 			p = parent;
 			if (next != &parent->d_subdirs)
 				break;
 		}
 	}
-	ret = list_entry(next, struct dentry, d_u.d_child);
+	ret = list_entry(next, struct dentry, d_child);
 
 	spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
 	/* Negative dentry - try next */
@@ -192,8 +191,7 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
 				unsigned long timeout,
 				int do_now)
 {
-	DPRINTK("top %p %.*s",
-		top, (int) top->d_name.len, top->d_name.name);
+	DPRINTK("top %p %pd", top, top);
 
 	/* If it's busy update the expiry counters */
 	if (!may_umount_tree(mnt)) {
@@ -221,8 +219,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 	struct autofs_info *top_ino = autofs4_dentry_ino(top);
 	struct dentry *p;
 
-	DPRINTK("top %p %.*s",
-		top, (int)top->d_name.len, top->d_name.name);
+	DPRINTK("top %p %pd", top, top);
 
 	/* Negative dentry - give up */
 	if (!simple_positive(top))
@@ -230,8 +227,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
 
 	p = NULL;
 	while ((p = get_next_positive_dentry(p, top))) {
-		DPRINTK("dentry %p %.*s",
-			p, (int) p->d_name.len, p->d_name.name);
+		DPRINTK("dentry %p %pd", p, p);
 
 		/*
 		 * Is someone visiting anywhere in the subtree ?
@@ -277,13 +273,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
 {
 	struct dentry *p;
 
-	DPRINTK("parent %p %.*s",
-		parent, (int)parent->d_name.len, parent->d_name.name);
+	DPRINTK("parent %p %pd", parent, parent);
 
 	p = NULL;
 	while ((p = get_next_positive_dentry(p, parent))) {
-		DPRINTK("dentry %p %.*s",
-			p, (int) p->d_name.len, p->d_name.name);
+		DPRINTK("dentry %p %pd", p, p);
 
 		if (d_mountpoint(p)) {
 			/* Can we umount this guy */
@@ -368,8 +362,7 @@ static struct dentry *should_expire(struct dentry *dentry,
 	 *	   offset (autofs-5.0+).
 	 */
 	if (d_mountpoint(dentry)) {
-		DPRINTK("checking mountpoint %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
+		DPRINTK("checking mountpoint %p %pd", dentry, dentry);
 
 		/* Can we umount this guy */
 		if (autofs4_mount_busy(mnt, dentry))
@@ -382,8 +375,7 @@ static struct dentry *should_expire(struct dentry *dentry,
 	}
 
 	if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
-		DPRINTK("checking symlink %p %.*s",
-			dentry, (int)dentry->d_name.len, dentry->d_name.name);
+		DPRINTK("checking symlink %p %pd", dentry, dentry);
 		/*
 		 * A symlink can't be "busy" in the usual sense so
 		 * just check last used for expire timeout.
@@ -479,8 +471,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	return NULL;
 
 found:
-	DPRINTK("returning %p %.*s",
-		expired, (int)expired->d_name.len, expired->d_name.name);
+	DPRINTK("returning %p %pd", expired, expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
 	smp_mb();
 	ino->flags &= ~AUTOFS_INF_NO_RCU;
@@ -489,7 +480,7 @@ found:
 	spin_lock(&sbi->lookup_lock);
 	spin_lock(&expired->d_parent->d_lock);
 	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
 	spin_unlock(&expired->d_lock);
 	spin_unlock(&expired->d_parent->d_lock);
 	spin_unlock(&sbi->lookup_lock);
@@ -512,8 +503,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	if (ino->flags & AUTOFS_INF_EXPIRING) {
 		spin_unlock(&sbi->fs_lock);
 
-		DPRINTK("waiting for expire %p name=%.*s",
-			 dentry, dentry->d_name.len, dentry->d_name.name);
+		DPRINTK("waiting for expire %p name=%pd", dentry, dentry);
 
 		status = autofs4_wait(sbi, dentry, NFY_NONE);
 		wait_for_completion(&ino->expire_complete);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d76d083..dbb5b72 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -108,8 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
 	struct dentry *dentry = file->f_path.dentry;
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 
-	DPRINTK("file=%p dentry=%p %.*s",
-		file, dentry, dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("file=%p dentry=%p %pD", file, dentry, dentry);
 
 	if (autofs4_oz_mode(sbi))
 		goto out;
@@ -279,8 +278,7 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk)
 	if (ino->flags & AUTOFS_INF_PENDING) {
 		if (rcu_walk)
 			return -ECHILD;
-		DPRINTK("waiting for mount name=%.*s",
-			dentry->d_name.len, dentry->d_name.name);
+		DPRINTK("waiting for mount name=%pd", dentry);
 		status = autofs4_wait(sbi, dentry, NFY_MOUNT);
 		DPRINTK("mount wait done status=%d", status);
 	}
@@ -340,8 +338,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
 
-	DPRINTK("dentry=%p %.*s",
-		dentry, dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("dentry=%p %pd", dentry, dentry);
 
 	/* The daemon never triggers a mount. */
 	if (autofs4_oz_mode(sbi))
@@ -428,8 +425,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
 
-	DPRINTK("dentry=%p %.*s",
-		dentry, dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("dentry=%p %pd", dentry, dentry);
 
 	/* The daemon never waits. */
 	if (autofs4_oz_mode(sbi)) {
@@ -504,7 +500,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
 	struct autofs_info *ino;
 	struct dentry *active;
 
-	DPRINTK("name = %.*s", dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("name = %pd", dentry);
 
 	/* File name too long to exist */
 	if (dentry->d_name.len > NAME_MAX)
@@ -558,8 +554,7 @@ static int autofs4_dir_symlink(struct inode *dir,
 	size_t size = strlen(symname);
 	char *cp;
 
-	DPRINTK("%s <- %.*s", symname,
-		dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("%s <- %pd", symname, dentry);
 
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
@@ -687,7 +682,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
 	/* only consider parents below dentrys in the root */
 	if (IS_ROOT(parent->d_parent))
 		return;
-	d_child = &dentry->d_u.d_child;
+	d_child = &dentry->d_child;
 	/* Set parent managed if it's becoming empty */
 	if (d_child->next == &parent->d_subdirs &&
 	    d_child->prev == &parent->d_subdirs)
@@ -701,8 +696,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	struct autofs_info *p_ino;
 	
-	DPRINTK("dentry %p, removing %.*s",
-		dentry, dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("dentry %p, removing %pd", dentry, dentry);
 
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
@@ -744,8 +738,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
 	if (!autofs4_oz_mode(sbi))
 		return -EACCES;
 
-	DPRINTK("dentry %p, creating %.*s",
-		dentry, dentry->d_name.len, dentry->d_name.name);
+	DPRINTK("dentry %p, creating %pd", dentry, dentry);
 
 	BUG_ON(!ino);
 
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 4cf61ec..b94d1cc 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -172,8 +172,8 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	char *utfname;
 	const char *name = dentry->d_name.name;
 
-	befs_debug(sb, "---> %s name %s inode %ld", __func__,
-		   dentry->d_name.name, dir->i_ino);
+	befs_debug(sb, "---> %s name %pd inode %ld", __func__,
+		   dentry, dir->i_ino);
 
 	/* Convert to UTF-8 */
 	if (BEFS_SB(sb)->nls) {
@@ -191,8 +191,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 	}
 
 	if (ret == BEFS_BT_NOT_FOUND) {
-		befs_debug(sb, "<--- %s %s not found", __func__,
-			   dentry->d_name.name);
+		befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
 		return ERR_PTR(-ENOENT);
 
 	} else if (ret != BEFS_OK || offset == 0) {
@@ -222,10 +221,9 @@ befs_readdir(struct file *file, struct dir_context *ctx)
 	size_t keysize;
 	unsigned char d_type;
 	char keybuf[BEFS_NAME_LEN + 1];
-	const char *dirname = file->f_path.dentry->d_name.name;
 
-	befs_debug(sb, "---> %s name %s, inode %ld, ctx->pos %lld",
-		  __func__, dirname, inode->i_ino, ctx->pos);
+	befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
+		  __func__, file, inode->i_ino, ctx->pos);
 
 more:
 	result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
@@ -233,8 +231,8 @@ more:
 
 	if (result == BEFS_ERR) {
 		befs_debug(sb, "<--- %s ERROR", __func__);
-		befs_error(sb, "IO error reading %s (inode %lu)",
-			   dirname, inode->i_ino);
+		befs_error(sb, "IO error reading %pD (inode %lu)",
+			   file, inode->i_ino);
 		return -EIO;
 
 	} else if (result == BEFS_BT_END) {
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 929dec0..4c55668 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -292,8 +292,8 @@ static int load_aout_binary(struct linux_binprm * bprm)
 		if ((fd_offset & ~PAGE_MASK) != 0 && printk_ratelimit())
 		{
 			printk(KERN_WARNING 
-			       "fd_offset is not page aligned. Please convert program: %s\n",
-			       bprm->file->f_path.dentry->d_name.name);
+			       "fd_offset is not page aligned. Please convert program: %pD\n",
+			       bprm->file);
 		}
 
 		if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
@@ -375,8 +375,8 @@ static int load_aout_library(struct file *file)
 		if (printk_ratelimit())
 		{
 			printk(KERN_WARNING 
-			       "N_TXTOFF is not page aligned. Please convert library: %s\n",
-			       file->f_path.dentry->d_name.name);
+			       "N_TXTOFF is not page aligned. Please convert library: %pD\n",
+			       file);
 		}
 		vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
 		
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d8fc060..3a6175f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1994,18 +1994,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 	shdr4extnum->sh_info = segs;
 }
 
-static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
-				     unsigned long mm_flags)
-{
-	struct vm_area_struct *vma;
-	size_t size = 0;
-
-	for (vma = first_vma(current, gate_vma); vma != NULL;
-	     vma = next_vma(vma, gate_vma))
-		size += vma_dump_size(vma, mm_flags);
-	return size;
-}
-
 /*
  * Actual dumper
  *
@@ -2017,7 +2005,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
-	int segs;
+	int segs, i;
+	size_t vma_data_size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff;
@@ -2026,6 +2015,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
+	elf_addr_t *vma_filesz = NULL;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -2093,7 +2083,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
+	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
+	if (!vma_filesz)
+		goto end_coredump;
+
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+			vma = next_vma(vma, gate_vma)) {
+		unsigned long dump_size;
+
+		dump_size = vma_dump_size(vma, cprm->mm_flags);
+		vma_filesz[i++] = dump_size;
+		vma_data_size += dump_size;
+	}
+
+	offset += vma_data_size;
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -2113,7 +2116,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		struct elf_phdr phdr;
 
@@ -2121,7 +2124,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
+		phdr.p_filesz = vma_filesz[i++];
 		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2149,12 +2152,12 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
 		unsigned long end;
 
-		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
+		end = vma->vm_start + vma_filesz[i++];
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
@@ -2187,6 +2190,7 @@ end_coredump:
 cleanup:
 	free_note_info(&info);
 	kfree(shdr4extnum);
+	kfree(vma_filesz);
 	kfree(phdr4note);
 	kfree(elf);
 out:
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index fd8beb9..70789e1 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -1,21 +1,14 @@
 /*
- *  binfmt_misc.c
+ * binfmt_misc.c
  *
- *  Copyright (C) 1997 Richard Günther
+ * Copyright (C) 1997 Richard Günther
  *
- *  binfmt_misc detects binaries via a magic or filename extension and invokes
- *  a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and
- *  binfmt_mz.
- *
- *  1997-04-25 first version
- *  [...]
- *  1997-05-19 cleanup
- *  1997-06-26 hpa: pass the real filename rather than argv[0]
- *  1997-06-30 minor cleanup
- *  1997-08-09 removed extension stripping, locking cleanup
- *  2001-02-28 AV: rewritten into something that resembles C. Original didn't.
+ * binfmt_misc detects binaries via a magic or filename extension and invokes
+ * a specified wrapper. See Documentation/binfmt_misc.txt for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -30,8 +23,13 @@
 #include <linux/mount.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
+#ifdef DEBUG
+# define USE_DEBUG 1
+#else
+# define USE_DEBUG 0
+#endif
 
 enum {
 	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
@@ -41,9 +39,9 @@ static LIST_HEAD(entries);
 static int enabled = 1;
 
 enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1<<31)
-#define MISC_FMT_OPEN_BINARY (1<<30)
-#define MISC_FMT_CREDENTIALS (1<<29)
+#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
+#define MISC_FMT_OPEN_BINARY (1 << 30)
+#define MISC_FMT_CREDENTIALS (1 << 29)
 
 typedef struct {
 	struct list_head list;
@@ -87,20 +85,24 @@ static Node *check_file(struct linux_binprm *bprm)
 	char *p = strrchr(bprm->interp, '.');
 	struct list_head *l;
 
+	/* Walk all the registered handlers. */
 	list_for_each(l, &entries) {
 		Node *e = list_entry(l, Node, list);
 		char *s;
 		int j;
 
+		/* Make sure this one is currently enabled. */
 		if (!test_bit(Enabled, &e->flags))
 			continue;
 
+		/* Do matching based on extension if applicable. */
 		if (!test_bit(Magic, &e->flags)) {
 			if (p && !strcmp(e->magic, p + 1))
 				return e;
 			continue;
 		}
 
+		/* Do matching based on magic & mask. */
 		s = bprm->buf + e->offset;
 		if (e->mask) {
 			for (j = 0; j < e->size; j++)
@@ -123,7 +125,7 @@ static Node *check_file(struct linux_binprm *bprm)
 static int load_misc_binary(struct linux_binprm *bprm)
 {
 	Node *fmt;
-	struct file * interp_file = NULL;
+	struct file *interp_file = NULL;
 	char iname[BINPRM_BUF_SIZE];
 	const char *iname_addr = iname;
 	int retval;
@@ -131,7 +133,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 
 	retval = -ENOEXEC;
 	if (!enabled)
-		goto _ret;
+		goto ret;
 
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
@@ -140,25 +142,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
 	read_unlock(&entries_lock);
 	if (!fmt)
-		goto _ret;
+		goto ret;
 
 	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
 		retval = remove_arg_zero(bprm);
 		if (retval)
-			goto _ret;
+			goto ret;
 	}
 
 	if (fmt->flags & MISC_FMT_OPEN_BINARY) {
 
 		/* if the binary should be opened on behalf of the
 		 * interpreter than keep it open and assign descriptor
-		 * to it */
- 		fd_binary = get_unused_fd();
- 		if (fd_binary < 0) {
- 			retval = fd_binary;
- 			goto _ret;
- 		}
- 		fd_install(fd_binary, bprm->file);
+		 * to it
+		 */
+		fd_binary = get_unused_fd_flags(0);
+		if (fd_binary < 0) {
+			retval = fd_binary;
+			goto ret;
+		}
+		fd_install(fd_binary, bprm->file);
 
 		/* if the binary is not readable than enforce mm->dumpable=0
 		   regardless of the interpreter's permissions */
@@ -171,32 +174,32 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
 		bprm->interp_data = fd_binary;
 
- 	} else {
- 		allow_write_access(bprm->file);
- 		fput(bprm->file);
- 		bprm->file = NULL;
- 	}
+	} else {
+		allow_write_access(bprm->file);
+		fput(bprm->file);
+		bprm->file = NULL;
+	}
 	/* make argv[1] be the path to the binary */
-	retval = copy_strings_kernel (1, &bprm->interp, bprm);
+	retval = copy_strings_kernel(1, &bprm->interp, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 	bprm->argc++;
 
 	/* add the interp as argv[0] */
-	retval = copy_strings_kernel (1, &iname_addr, bprm);
+	retval = copy_strings_kernel(1, &iname_addr, bprm);
 	if (retval < 0)
-		goto _error;
-	bprm->argc ++;
+		goto error;
+	bprm->argc++;
 
 	/* Update interp in case binfmt_script needs it. */
 	retval = bprm_change_interp(iname, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-	interp_file = open_exec (iname);
-	retval = PTR_ERR (interp_file);
-	if (IS_ERR (interp_file))
-		goto _error;
+	interp_file = open_exec(iname);
+	retval = PTR_ERR(interp_file);
+	if (IS_ERR(interp_file))
+		goto error;
 
 	bprm->file = interp_file;
 	if (fmt->flags & MISC_FMT_CREDENTIALS) {
@@ -207,23 +210,23 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		memset(bprm->buf, 0, BINPRM_BUF_SIZE);
 		retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
 	} else
-		retval = prepare_binprm (bprm);
+		retval = prepare_binprm(bprm);
 
 	if (retval < 0)
-		goto _error;
+		goto error;
 
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-_ret:
+ret:
 	return retval;
-_error:
+error:
 	if (fd_binary > 0)
 		sys_close(fd_binary);
 	bprm->interp_flags = 0;
 	bprm->interp_data = 0;
-	goto _ret;
+	goto ret;
 }
 
 /* Command parsers */
@@ -250,36 +253,40 @@ static char *scanarg(char *s, char del)
 	return s;
 }
 
-static char * check_special_flags (char * sfs, Node * e)
+static char *check_special_flags(char *sfs, Node *e)
 {
-	char * p = sfs;
+	char *p = sfs;
 	int cont = 1;
 
 	/* special flags */
 	while (cont) {
 		switch (*p) {
-			case 'P':
-				p++;
-				e->flags |= MISC_FMT_PRESERVE_ARGV0;
-				break;
-			case 'O':
-				p++;
-				e->flags |= MISC_FMT_OPEN_BINARY;
-				break;
-			case 'C':
-				p++;
-				/* this flags also implies the
-				   open-binary flag */
-				e->flags |= (MISC_FMT_CREDENTIALS |
-						MISC_FMT_OPEN_BINARY);
-				break;
-			default:
-				cont = 0;
+		case 'P':
+			pr_debug("register: flag: P (preserve argv0)\n");
+			p++;
+			e->flags |= MISC_FMT_PRESERVE_ARGV0;
+			break;
+		case 'O':
+			pr_debug("register: flag: O (open binary)\n");
+			p++;
+			e->flags |= MISC_FMT_OPEN_BINARY;
+			break;
+		case 'C':
+			pr_debug("register: flag: C (preserve creds)\n");
+			p++;
+			/* this flags also implies the
+			   open-binary flag */
+			e->flags |= (MISC_FMT_CREDENTIALS |
+					MISC_FMT_OPEN_BINARY);
+			break;
+		default:
+			cont = 0;
 		}
 	}
 
 	return p;
 }
+
 /*
  * This registers a new binary format, it recognises the syntax
  * ':name:type:offset:magic:mask:interpreter:flags'
@@ -292,6 +299,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	char *buf, *p;
 	char del;
 
+	pr_debug("register: received %zu bytes\n", count);
+
 	/* some sanity checks */
 	err = -EINVAL;
 	if ((count < 11) || (count > MAX_REGISTER_LENGTH))
@@ -299,7 +308,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	err = -ENOMEM;
 	memsize = sizeof(Node) + count + 8;
-	e = kmalloc(memsize, GFP_USER);
+	e = kmalloc(memsize, GFP_KERNEL);
 	if (!e)
 		goto out;
 
@@ -307,98 +316,175 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	memset(e, 0, sizeof(Node));
 	if (copy_from_user(buf, buffer, count))
-		goto Efault;
+		goto efault;
 
 	del = *p++;	/* delimeter */
 
-	memset(buf+count, del, 8);
+	pr_debug("register: delim: %#x {%c}\n", del, del);
+
+	/* Pad the buffer with the delim to simplify parsing below. */
+	memset(buf + count, del, 8);
 
+	/* Parse the 'name' field. */
 	e->name = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->name[0] ||
 	    !strcmp(e->name, ".") ||
 	    !strcmp(e->name, "..") ||
 	    strchr(e->name, '/'))
-		goto Einval;
+		goto einval;
+
+	pr_debug("register: name: {%s}\n", e->name);
+
+	/* Parse the 'type' field. */
 	switch (*p++) {
-		case 'E': e->flags = 1<<Enabled; break;
-		case 'M': e->flags = (1<<Enabled) | (1<<Magic); break;
-		default: goto Einval;
+	case 'E':
+		pr_debug("register: type: E (extension)\n");
+		e->flags = 1 << Enabled;
+		break;
+	case 'M':
+		pr_debug("register: type: M (magic)\n");
+		e->flags = (1 << Enabled) | (1 << Magic);
+		break;
+	default:
+		goto einval;
 	}
 	if (*p++ != del)
-		goto Einval;
+		goto einval;
+
 	if (test_bit(Magic, &e->flags)) {
-		char *s = strchr(p, del);
+		/* Handle the 'M' (magic) format. */
+		char *s;
+
+		/* Parse the 'offset' field. */
+		s = strchr(p, del);
 		if (!s)
-			goto Einval;
+			goto einval;
 		*s++ = '\0';
 		e->offset = simple_strtoul(p, &p, 10);
 		if (*p++)
-			goto Einval;
+			goto einval;
+		pr_debug("register: offset: %#x\n", e->offset);
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
-		if (!e->magic[0])
-			goto Einval;
+		if (p == e->magic)
+			goto einval;
+		if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[raw]: ",
+				DUMP_PREFIX_NONE, e->magic, p - e->magic);
+
+		/* Parse the 'mask' field. */
 		e->mask = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
-		if (!e->mask[0])
+		if (p == e->mask) {
 			e->mask = NULL;
+			pr_debug("register:  mask[raw]: none\n");
+		} else if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register:  mask[raw]: ",
+				DUMP_PREFIX_NONE, e->mask, p - e->mask);
+
+		/*
+		 * Decode the magic & mask fields.
+		 * Note: while we might have accepted embedded NUL bytes from
+		 * above, the unescape helpers here will stop at the first one
+		 * it encounters.
+		 */
 		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
 		if (e->mask &&
 		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
-			goto Einval;
+			goto einval;
 		if (e->size + e->offset > BINPRM_BUF_SIZE)
-			goto Einval;
+			goto einval;
+		pr_debug("register: magic/mask length: %i\n", e->size);
+		if (USE_DEBUG) {
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[decoded]: ",
+				DUMP_PREFIX_NONE, e->magic, e->size);
+
+			if (e->mask) {
+				int i;
+				char *masked = kmalloc(e->size, GFP_KERNEL);
+
+				print_hex_dump_bytes(
+					KBUILD_MODNAME ": register:  mask[decoded]: ",
+					DUMP_PREFIX_NONE, e->mask, e->size);
+
+				if (masked) {
+					for (i = 0; i < e->size; ++i)
+						masked[i] = e->magic[i] & e->mask[i];
+					print_hex_dump_bytes(
+						KBUILD_MODNAME ": register:  magic[masked]: ",
+						DUMP_PREFIX_NONE, masked, e->size);
+
+					kfree(masked);
+				}
+			}
+		}
 	} else {
+		/* Handle the 'E' (extension) format. */
+
+		/* Skip the 'offset' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 		if (!e->magic[0] || strchr(e->magic, '/'))
-			goto Einval;
+			goto einval;
+		pr_debug("register: extension: {%s}\n", e->magic);
+
+		/* Skip the 'mask' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 	}
+
+	/* Parse the 'interpreter' field. */
 	e->interpreter = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->interpreter[0])
-		goto Einval;
-
-
-	p = check_special_flags (p, e);
+		goto einval;
+	pr_debug("register: interpreter: {%s}\n", e->interpreter);
 
+	/* Parse the 'flags' field. */
+	p = check_special_flags(p, e);
 	if (*p == '\n')
 		p++;
 	if (p != buf + count)
-		goto Einval;
+		goto einval;
+
 	return e;
 
 out:
 	return ERR_PTR(err);
 
-Efault:
+efault:
 	kfree(e);
 	return ERR_PTR(-EFAULT);
-Einval:
+einval:
 	kfree(e);
 	return ERR_PTR(-EINVAL);
 }
@@ -417,7 +503,7 @@ static int parse_command(const char __user *buffer, size_t count)
 		return -EFAULT;
 	if (!count)
 		return 0;
-	if (s[count-1] == '\n')
+	if (s[count - 1] == '\n')
 		count--;
 	if (count == 1 && s[0] == '0')
 		return 1;
@@ -434,7 +520,7 @@ static void entry_status(Node *e, char *page)
 {
 	char *dp;
 	char *status = "disabled";
-	const char * flags = "flags: ";
+	const char *flags = "flags: ";
 
 	if (test_bit(Enabled, &e->flags))
 		status = "enabled";
@@ -448,19 +534,15 @@ static void entry_status(Node *e, char *page)
 	dp = page + strlen(page);
 
 	/* print the special flags */
-	sprintf (dp, "%s", flags);
-	dp += strlen (flags);
-	if (e->flags & MISC_FMT_PRESERVE_ARGV0) {
-		*dp ++ = 'P';
-	}
-	if (e->flags & MISC_FMT_OPEN_BINARY) {
-		*dp ++ = 'O';
-	}
-	if (e->flags & MISC_FMT_CREDENTIALS) {
-		*dp ++ = 'C';
-	}
-	*dp ++ = '\n';
-
+	sprintf(dp, "%s", flags);
+	dp += strlen(flags);
+	if (e->flags & MISC_FMT_PRESERVE_ARGV0)
+		*dp++ = 'P';
+	if (e->flags & MISC_FMT_OPEN_BINARY)
+		*dp++ = 'O';
+	if (e->flags & MISC_FMT_CREDENTIALS)
+		*dp++ = 'C';
+	*dp++ = '\n';
 
 	if (!test_bit(Magic, &e->flags)) {
 		sprintf(dp, "extension .%s\n", e->magic);
@@ -488,7 +570,7 @@ static void entry_status(Node *e, char *page)
 
 static struct inode *bm_get_inode(struct super_block *sb, int mode)
 {
-	struct inode * inode = new_inode(sb);
+	struct inode *inode = new_inode(sb);
 
 	if (inode) {
 		inode->i_ino = get_next_ino();
@@ -528,13 +610,14 @@ static void kill_node(Node *e)
 /* /<entry> */
 
 static ssize_t
-bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
+bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	Node *e = file_inode(file)->i_private;
 	ssize_t res;
 	char *page;
 
-	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+	page = (char *) __get_free_page(GFP_KERNEL);
+	if (!page)
 		return -ENOMEM;
 
 	entry_status(e, page);
@@ -553,20 +636,28 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	int res = parse_command(buffer, count);
 
 	switch (res) {
-		case 1: clear_bit(Enabled, &e->flags);
-			break;
-		case 2: set_bit(Enabled, &e->flags);
-			break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
-
-			kill_node(e);
-
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+	case 1:
+		/* Disable this handler. */
+		clear_bit(Enabled, &e->flags);
+		break;
+	case 2:
+		/* Enable this handler. */
+		set_bit(Enabled, &e->flags);
+		break;
+	case 3:
+		/* Delete this handler. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
+
+		kill_node(e);
+
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -654,26 +745,36 @@ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 }
 
-static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+static ssize_t bm_status_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	int res = parse_command(buffer, count);
 	struct dentry *root;
 
 	switch (res) {
-		case 1: enabled = 0; break;
-		case 2: enabled = 1; break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
-
-			while (!list_empty(&entries))
-				kill_node(list_entry(entries.next, Node, list));
-
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+	case 1:
+		/* Disable all handlers. */
+		enabled = 0;
+		break;
+	case 2:
+		/* Enable all handlers. */
+		enabled = 1;
+		break;
+	case 3:
+		/* Delete all handlers. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
+
+		while (!list_empty(&entries))
+			kill_node(list_entry(entries.next, Node, list));
+
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -690,14 +791,16 @@ static const struct super_operations s_ops = {
 	.evict_inode	= bm_evict_inode,
 };
 
-static int bm_fill_super(struct super_block * sb, void * data, int silent)
+static int bm_fill_super(struct super_block *sb, void *data, int silent)
 {
+	int err;
 	static struct tree_descr bm_files[] = {
 		[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
 		[3] = {"register", &bm_register_operations, S_IWUSR},
 		/* last one */ {""}
 	};
-	int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
+
+	err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
 	if (!err)
 		sb->s_op = &s_ops;
 	return err;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index cc9d411..b48c41b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sb = get_active_super(bdev);
 	if (!sb)
 		goto out;
-	error = freeze_super(sb);
+	if (sb->s_op->freeze_super)
+		error = sb->s_op->freeze_super(sb);
+	else
+		error = freeze_super(sb);
 	if (error) {
 		deactivate_super(sb);
 		bdev->bd_fsfreeze_count--;
@@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	if (!sb)
 		goto out;
 
-	error = thaw_super(sb);
+	if (sb->s_op->thaw_super)
+		error = sb->s_op->thaw_super(sb);
+	else
+		error = thaw_super(sb);
 	if (error) {
 		bdev->bd_fsfreeze_count++;
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
@@ -1585,7 +1591,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *bd_inode = file->f_mapping->host;
@@ -1599,6 +1605,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	iov_iter_truncate(to, size);
 	return generic_file_read_iter(iocb, to);
 }
+EXPORT_SYMBOL_GPL(blkdev_read_iter);
 
 /*
  * Try to release a page associated with block device when the system
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index d3220d3..dcd9be3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1011,8 +1011,6 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 		bytes = min(bytes, working_bytes);
 		kaddr = kmap_atomic(page_out);
 		memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
-		if (*pg_index == (vcnt - 1) && *pg_offset == 0)
-			memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
 		kunmap_atomic(kaddr);
 		flush_dcache_page(page_out);
 
@@ -1054,3 +1052,34 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
 
 	return 1;
 }
+
+/*
+ * When uncompressing data, we need to make sure and zero any parts of
+ * the biovec that were not filled in by the decompression code.  pg_index
+ * and pg_offset indicate the last page and the last offset of that page
+ * that have been filled in.  This will zero everything remaining in the
+ * biovec.
+ */
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+				   unsigned long pg_index,
+				   unsigned long pg_offset)
+{
+	while (pg_index < vcnt) {
+		struct page *page = bvec[pg_index].bv_page;
+		unsigned long off = bvec[pg_index].bv_offset;
+		unsigned long len = bvec[pg_index].bv_len;
+
+		if (pg_offset < off)
+			pg_offset = off;
+		if (pg_offset < off + len) {
+			unsigned long bytes = off + len - pg_offset;
+			char *kaddr;
+
+			kaddr = kmap_atomic(page);
+			memset(kaddr + pg_offset, 0, bytes);
+			kunmap_atomic(kaddr);
+		}
+		pg_index++;
+		pg_offset = 0;
+	}
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0c803b4..d181f70 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -45,7 +45,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 				  unsigned long nr_pages);
 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
-
+void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
+				   unsigned long pg_index,
+				   unsigned long pg_offset);
 struct btrfs_compress_op {
 	struct list_head *(*alloc_workspace)(void);
 
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc616..150822e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 {
 	int i;
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	/* lockdep really cares that we take all of these spinlocks
-	 * in the right order.  If any of the locks in the path are not
-	 * currently blocking, it is going to complain.  So, make really
-	 * really sure by forcing the path to blocking before we clear
-	 * the path blocking.
-	 */
 	if (held) {
 		btrfs_set_lock_blocking_rw(held, held_rw);
 		if (held_rw == BTRFS_WRITE_LOCK)
@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 			held_rw = BTRFS_READ_LOCK_BLOCKING;
 	}
 	btrfs_set_path_blocking(p);
-#endif
 
 	for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
 		if (p->nodes[i] && p->locks[i]) {
@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
 		}
 	}
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
 	if (held)
 		btrfs_clear_lock_blocking_rw(held, held_rw);
-#endif
 }
 
 /* this also releases the path */
@@ -2893,7 +2883,7 @@ cow_done:
 					}
 					p->locks[level] = BTRFS_WRITE_LOCK;
 				} else {
-					err = btrfs_try_tree_read_lock(b);
+					err = btrfs_tree_read_lock_atomic(b);
 					if (!err) {
 						btrfs_set_path_blocking(p);
 						btrfs_tree_read_lock(b);
@@ -3025,7 +3015,7 @@ again:
 			}
 
 			level = btrfs_header_level(b);
-			err = btrfs_try_tree_read_lock(b);
+			err = btrfs_tree_read_lock_atomic(b);
 			if (!err) {
 				btrfs_set_path_blocking(p);
 				btrfs_tree_read_lock(b);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d557264e..fe69edd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3276,7 +3276,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
 int btrfs_async_run_delayed_refs(struct btrfs_root *root,
 				 unsigned long count, int wait);
-int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 bytenr,
 			     u64 offset, int metadata, u64 *refs, u64 *flags);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ad0f47..1bf9f89 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3817,19 +3817,19 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	struct btrfs_super_block *sb = fs_info->super_copy;
 	int ret = 0;
 
-	if (sb->root_level > BTRFS_MAX_LEVEL) {
-		printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n",
-				sb->root_level, BTRFS_MAX_LEVEL);
+	if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
+				btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
 		ret = -EINVAL;
 	}
-	if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
-		printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n",
-				sb->chunk_root_level, BTRFS_MAX_LEVEL);
+	if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n",
+				btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
 		ret = -EINVAL;
 	}
-	if (sb->log_root_level > BTRFS_MAX_LEVEL) {
-		printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n",
-				sb->log_root_level, BTRFS_MAX_LEVEL);
+	if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+		printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n",
+				btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
 		ret = -EINVAL;
 	}
 
@@ -3837,15 +3837,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	 * The common minimum, we don't know if we can trust the nodesize/sectorsize
 	 * items yet, they'll be verified later. Issue just a warning.
 	 */
-	if (!IS_ALIGNED(sb->root, 4096))
+	if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
 		printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
 				sb->root);
-	if (!IS_ALIGNED(sb->chunk_root, 4096))
+	if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
 		printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
 				sb->chunk_root);
-	if (!IS_ALIGNED(sb->log_root, 4096))
+	if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
 		printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
-				sb->log_root);
+				btrfs_super_log_root(sb));
 
 	if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
 		printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
@@ -3857,13 +3857,13 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	 * Hint to catch really bogus numbers, bitflips or so, more exact checks are
 	 * done later
 	 */
-	if (sb->num_devices > (1UL << 31))
+	if (btrfs_super_num_devices(sb) > (1UL << 31))
 		printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
-				sb->num_devices);
+				btrfs_super_num_devices(sb));
 
-	if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) {
+	if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
 		printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
-				sb->bytenr, BTRFS_SUPER_INFO_OFFSET);
+				btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
 		ret = -EINVAL;
 	}
 
@@ -3871,14 +3871,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 	 * The generation is a global counter, we'll trust it more than the others
 	 * but it's still possible that it's the one that's wrong.
 	 */
-	if (sb->generation < sb->chunk_root_generation)
+	if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
 		printk(KERN_WARNING
 			"BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
-			sb->generation, sb->chunk_root_generation);
-	if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1)
+			btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb));
+	if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
+	    && btrfs_super_cache_generation(sb) != (u64)-1)
 		printk(KERN_WARNING
 			"BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
-			sb->generation, sb->cache_generation);
+			btrfs_super_generation(sb), btrfs_super_cache_generation(sb));
 
 	return ret;
 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d565895..47c1ba1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -710,8 +710,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
 	rcu_read_unlock();
 }
 
-/* simple helper to search for an existing extent at a given offset */
-int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
+/* simple helper to search for an existing data extent at a given offset */
+int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
 {
 	int ret;
 	struct btrfs_key key;
@@ -726,12 +726,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
 	key.type = BTRFS_EXTENT_ITEM_KEY;
 	ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
 				0, 0);
-	if (ret > 0) {
-		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-		if (key.objectid == start &&
-		    key.type == BTRFS_METADATA_ITEM_KEY)
-			ret = 0;
-	}
 	btrfs_free_path(path);
 	return ret;
 }
@@ -786,7 +780,6 @@ search_again:
 	else
 		key.type = BTRFS_EXTENT_ITEM_KEY;
 
-again:
 	ret = btrfs_search_slot(trans, root->fs_info->extent_root,
 				&key, path, 0, 0);
 	if (ret < 0)
@@ -802,13 +795,6 @@ again:
 			    key.offset == root->nodesize)
 				ret = 0;
 		}
-		if (ret) {
-			key.objectid = bytenr;
-			key.type = BTRFS_EXTENT_ITEM_KEY;
-			key.offset = root->nodesize;
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 
 	if (ret == 0) {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 783a943..84a2d18 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -413,7 +413,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 	ret = 0;
 fail:
 	while (ret < 0 && !list_empty(&tmplist)) {
-		sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+		sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
 		list_del(&sums->list);
 		kfree(sums);
 	}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d23362f..ff0dcc0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5303,7 +5303,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 			return ERR_CAST(inode);
 	}
 
-	return d_materialise_unique(dentry, inode);
+	return d_splice_alias(inode, dentry);
 }
 
 unsigned char btrfs_filetype_table[] = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d2b76e..080fe66 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -765,23 +765,6 @@ out:
 	return ret;
 }
 
-/*  copy of check_sticky in fs/namei.c()
-* It's inline, so penalty for filesystems that don't use sticky bit is
-* minimal.
-*/
-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
-{
-	kuid_t fsuid = current_fsuid();
-
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
-	if (uid_eq(inode->i_uid, fsuid))
-		return 0;
-	if (uid_eq(dir->i_uid, fsuid))
-		return 0;
-	return !capable(CAP_FOWNER);
-}
-
 /*  copy of may_delete in fs/namei.c()
  *	Check whether we can remove a link victim from directory dir, check
  *  whether the type of victim is right.
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
 		return error;
 	if (IS_APPEND(dir))
 		return -EPERM;
-	if (btrfs_check_sticky(dir, victim->d_inode)||
-		IS_APPEND(victim->d_inode)||
+	if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
 	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
 		return -EPERM;
 	if (isdir) {
@@ -5314,7 +5296,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 		ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1);
 		if (ret)
 			return ret;
-		ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
+		ret = btrfs_sync_fs(file_inode(file)->i_sb, 1);
 		/*
 		 * The transaction thread may want to do more work,
 		 * namely it pokes the cleaner ktread that will start
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5665d21..f8229ef 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -128,6 +128,26 @@ again:
 }
 
 /*
+ * take a spinning read lock.
+ * returns 1 if we get the read lock and 0 if we don't
+ * this won't wait for blocking writers
+ */
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
+{
+	if (atomic_read(&eb->blocking_writers))
+		return 0;
+
+	read_lock(&eb->lock);
+	if (atomic_read(&eb->blocking_writers)) {
+		read_unlock(&eb->lock);
+		return 0;
+	}
+	atomic_inc(&eb->read_locks);
+	atomic_inc(&eb->spinning_readers);
+	return 1;
+}
+
+/*
  * returns 1 if we get the read lock and 0 if we don't
  * this won't wait for blocking writers
  */
@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 	    atomic_read(&eb->blocking_readers))
 		return 0;
 
-	if (!write_trylock(&eb->lock))
-		return 0;
-
+	write_lock(&eb->lock);
 	if (atomic_read(&eb->blocking_writers) ||
 	    atomic_read(&eb->blocking_readers)) {
 		write_unlock(&eb->lock);
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index b81e0e9..c44a9d5 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
 int btrfs_try_tree_read_lock(struct extent_buffer *eb);
 int btrfs_try_tree_write_lock(struct extent_buffer *eb);
+int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
+
 
 static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
 {
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index 78285f3..617553c 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -373,6 +373,8 @@ cont:
 	}
 done:
 	kunmap(pages_in[page_in_index]);
+	if (!ret)
+		btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
 	return ret;
 }
 
@@ -410,10 +412,23 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 		goto out;
 	}
 
+	/*
+	 * the caller is already checking against PAGE_SIZE, but lets
+	 * move this check closer to the memcpy/memset
+	 */
+	destlen = min_t(unsigned long, destlen, PAGE_SIZE);
 	bytes = min_t(unsigned long, destlen, out_len - start_byte);
 
 	kaddr = kmap_atomic(dest_page);
 	memcpy(kaddr, workspace->buf + start_byte, bytes);
+
+	/*
+	 * btrfs_getblock is doing a zero on the tail of the page too,
+	 * but this will cover anything missing from the decompressed
+	 * data.
+	 */
+	if (bytes < destlen)
+		memset(kaddr+bytes, 0, destlen-bytes);
 	kunmap_atomic(kaddr);
 out:
 	return ret;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index a2b97ef..54bd91e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2151,6 +2151,7 @@ static void __exit exit_btrfs_fs(void)
 	extent_map_exit();
 	extent_io_exit();
 	btrfs_interface_exit();
+	btrfs_end_io_wq_exit();
 	unregister_filesystem(&btrfs_fs_type);
 	btrfs_exit_sysfs();
 	btrfs_cleanup_fs_uuids();
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1475979..286213c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -672,7 +672,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 			 * is this extent already allocated in the extent
 			 * allocation tree?  If so, just add a reference
 			 */
-			ret = btrfs_lookup_extent(root, ins.objectid,
+			ret = btrfs_lookup_data_extent(root, ins.objectid,
 						ins.offset);
 			if (ret == 0) {
 				ret = btrfs_inc_extent_ref(trans, root,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 759fa4e..fb22fd8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -299,6 +299,8 @@ done:
 	zlib_inflateEnd(&workspace->strm);
 	if (data_in)
 		kunmap(pages_in[page_in_index]);
+	if (!ret)
+		btrfs_clear_biovec_end(bvec, vcnt, page_out_index, pg_offset);
 	return ret;
 }
 
@@ -310,10 +312,14 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	int ret = 0;
 	int wbits = MAX_WBITS;
-	unsigned long bytes_left = destlen;
+	unsigned long bytes_left;
 	unsigned long total_out = 0;
+	unsigned long pg_offset = 0;
 	char *kaddr;
 
+	destlen = min_t(unsigned long, destlen, PAGE_SIZE);
+	bytes_left = destlen;
+
 	workspace->strm.next_in = data_in;
 	workspace->strm.avail_in = srclen;
 	workspace->strm.total_in = 0;
@@ -341,7 +347,6 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 		unsigned long buf_start;
 		unsigned long buf_offset;
 		unsigned long bytes;
-		unsigned long pg_offset = 0;
 
 		ret = zlib_inflate(&workspace->strm, Z_NO_FLUSH);
 		if (ret != Z_OK && ret != Z_STREAM_END)
@@ -384,6 +389,17 @@ next:
 		ret = 0;
 
 	zlib_inflateEnd(&workspace->strm);
+
+	/*
+	 * this should only happen if zlib returned fewer bytes than we
+	 * expected.  btrfs_get_block is responsible for zeroing from the
+	 * end of the inline extent (destlen) to the end of the page
+	 */
+	if (pg_offset < destlen) {
+		kaddr = kmap_atomic(dest_page);
+		memset(kaddr + pg_offset, 0, destlen - pg_offset);
+		kunmap_atomic(kaddr);
+	}
 	return ret;
 }
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 9614adc..20805db 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -128,21 +128,15 @@ __clear_page_buffers(struct page *page)
 	page_cache_release(page);
 }
 
-
-static int quiet_error(struct buffer_head *bh)
-{
-	if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
-		return 0;
-	return 1;
-}
-
-
-static void buffer_io_error(struct buffer_head *bh)
+static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
 	char b[BDEVNAME_SIZE];
-	printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
+
+	if (!test_bit(BH_Quiet, &bh->b_state))
+		printk_ratelimited(KERN_ERR
+			"Buffer I/O error on dev %s, logical block %llu%s\n",
 			bdevname(bh->b_bdev, b),
-			(unsigned long long)bh->b_blocknr);
+			(unsigned long long)bh->b_blocknr, msg);
 }
 
 /*
@@ -177,17 +171,10 @@ EXPORT_SYMBOL(end_buffer_read_sync);
 
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
-	char b[BDEVNAME_SIZE];
-
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (!quiet_error(bh)) {
-			buffer_io_error(bh);
-			printk(KERN_WARNING "lost page write due to "
-					"I/O error on %s\n",
-				       bdevname(bh->b_bdev, b));
-		}
+		buffer_io_error(bh, ", lost sync page write");
 		set_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
 	}
@@ -304,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
 		set_buffer_uptodate(bh);
 	} else {
 		clear_buffer_uptodate(bh);
-		if (!quiet_error(bh))
-			buffer_io_error(bh);
+		buffer_io_error(bh, ", async page read");
 		SetPageError(page);
 	}
 
@@ -353,7 +339,6 @@ still_busy:
  */
 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
-	char b[BDEVNAME_SIZE];
 	unsigned long flags;
 	struct buffer_head *first;
 	struct buffer_head *tmp;
@@ -365,12 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 	if (uptodate) {
 		set_buffer_uptodate(bh);
 	} else {
-		if (!quiet_error(bh)) {
-			buffer_io_error(bh);
-			printk(KERN_WARNING "lost page write due to "
-					"I/O error on %s\n",
-			       bdevname(bh->b_bdev, b));
-		}
+		buffer_io_error(bh, ", lost async page write");
 		set_bit(AS_EIO, &page->mapping->flags);
 		set_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
@@ -993,7 +973,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
-		pgoff_t index, int size, int sizebits)
+	      pgoff_t index, int size, int sizebits, gfp_t gfp)
 {
 	struct inode *inode = bdev->bd_inode;
 	struct page *page;
@@ -1002,8 +982,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
 
-	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
-	gfp_mask |= __GFP_MOVABLE;
+	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
 	 * will endlessly loop on improvised global reclaim.  Prefer
@@ -1060,7 +1040,7 @@ failed:
  * that page was dirty, the buffers are set dirty also.
  */
 static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
 	pgoff_t index;
 	int sizebits;
@@ -1087,11 +1067,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	}
 
 	/* Create a page with the proper size buffers.. */
-	return grow_dev_page(bdev, block, index, size, sizebits);
+	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
 }
 
-static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+struct buffer_head *
+__getblk_slow(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	/* Size must be multiple of hard sectorsize */
 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1113,13 +1094,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 		if (bh)
 			return bh;
 
-		ret = grow_buffers(bdev, block, size);
+		ret = grow_buffers(bdev, block, size, gfp);
 		if (ret < 0)
 			return NULL;
 		if (ret == 0)
 			free_more_memory();
 	}
 }
+EXPORT_SYMBOL(__getblk_slow);
 
 /*
  * The relationship between dirty buffers and dirty pages:
@@ -1373,24 +1355,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__find_get_block);
 
 /*
- * __getblk will locate (and, if necessary, create) the buffer_head
+ * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
  * which corresponds to the passed block_device, block and size. The
  * returned buffer has its reference count incremented.
  *
- * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
- * attempt is failing.  FIXME, perhaps?
+ * __getblk_gfp() will lock up the machine if grow_dev_page's
+ * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
  */
 struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, unsigned size)
+__getblk_gfp(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	struct buffer_head *bh = __find_get_block(bdev, block, size);
 
 	might_sleep();
 	if (bh == NULL)
-		bh = __getblk_slow(bdev, block, size);
+		bh = __getblk_slow(bdev, block, size, gfp);
 	return bh;
 }
-EXPORT_SYMBOL(__getblk);
+EXPORT_SYMBOL(__getblk_gfp);
 
 /*
  * Do async read-ahead on a buffer..
@@ -1406,24 +1389,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__breadahead);
 
 /**
- *  __bread() - reads a specified block and returns the bh
+ *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
  *  @block: number of block
  *  @size: size (in bytes) to read
- * 
+ *  @gfp: page allocation flag
+ *
  *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache can be allocated from non-movable area
+ *  not to prevent page migration if you set gfp to zero.
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+__bread_gfp(struct block_device *bdev, sector_t block,
+		   unsigned size, gfp_t gfp)
 {
-	struct buffer_head *bh = __getblk(bdev, block, size);
+	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
 
 	if (likely(bh) && !buffer_uptodate(bh))
 		bh = __bread_slow(bh);
 	return bh;
 }
-EXPORT_SYMBOL(__bread);
+EXPORT_SYMBOL(__bread_gfp);
 
 /*
  * invalidate_bh_lrus() is called rarely - but not only at unmount.
@@ -2082,6 +2069,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
+	loff_t old_size = inode->i_size;
 	int i_size_changed = 0;
 
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2101,6 +2089,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 	unlock_page(page);
 	page_cache_release(page);
 
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
 	 * makes the holding time of page lock longer. Second, it forces lock
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index e12f189..7f8e83f 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -102,8 +102,7 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
 	struct cachefiles_object *object;
 	struct rb_node *p;
 
-	_enter(",'%*.*s'",
-	       dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+	_enter(",'%pd'", dentry);
 
 	write_lock(&cache->active_lock);
 
@@ -273,9 +272,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 	char nbuffer[8 + 8 + 1];
 	int ret;
 
-	_enter(",'%*.*s','%*.*s'",
-	       dir->d_name.len, dir->d_name.len, dir->d_name.name,
-	       rep->d_name.len, rep->d_name.len, rep->d_name.name);
+	_enter(",'%pd','%pd'", dir, rep);
 
 	_debug("remove %p from %p", rep, dir);
 
@@ -597,8 +594,7 @@ lookup_again:
 	/* if we've found that the terminal object exists, then we need to
 	 * check its attributes and delete it if it's out of date */
 	if (!object->new) {
-		_debug("validate '%*.*s'",
-		       next->d_name.len, next->d_name.len, next->d_name.name);
+		_debug("validate '%pd'", next);
 
 		ret = cachefiles_check_object_xattr(object, auxdata);
 		if (ret == -ESTALE) {
@@ -827,8 +823,8 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 	unsigned long start;
 	int ret;
 
-	//_enter(",%*.*s/,%s",
-	//       dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+	//_enter(",%pd/,%s",
+	//       dir, filename);
 
 	/* look up the victim */
 	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
@@ -910,8 +906,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
 	struct dentry *victim;
 	int ret;
 
-	_enter(",%*.*s/,%s",
-	       dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+	_enter(",%pd/,%s", dir, filename);
 
 	victim = cachefiles_check_active(cache, dir, filename);
 	if (IS_ERR(victim))
@@ -969,8 +964,8 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
 {
 	struct dentry *victim;
 
-	//_enter(",%*.*s/,%s",
-	//       dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
+	//_enter(",%pd/,%s",
+	//       dir, filename);
 
 	victim = cachefiles_check_active(cache, dir, filename);
 	if (IS_ERR(victim))
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index acbc1f0..a8a6874 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -51,9 +51,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
 	}
 
 	if (ret != -EEXIST) {
-		pr_err("Can't set xattr on %*.*s [%lu] (err %d)\n",
-		       dentry->d_name.len, dentry->d_name.len,
-		       dentry->d_name.name, dentry->d_inode->i_ino,
+		pr_err("Can't set xattr on %pd [%lu] (err %d)\n",
+		       dentry, dentry->d_inode->i_ino,
 		       -ret);
 		goto error;
 	}
@@ -64,9 +63,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
 		if (ret == -ERANGE)
 			goto bad_type_length;
 
-		pr_err("Can't read xattr on %*.*s [%lu] (err %d)\n",
-		       dentry->d_name.len, dentry->d_name.len,
-		       dentry->d_name.name, dentry->d_inode->i_ino,
+		pr_err("Can't read xattr on %pd [%lu] (err %d)\n",
+		       dentry, dentry->d_inode->i_ino,
 		       -ret);
 		goto error;
 	}
@@ -92,9 +90,8 @@ bad_type_length:
 
 bad_type:
 	xtype[2] = 0;
-	pr_err("Cache object %*.*s [%lu] type %s not %s\n",
-	       dentry->d_name.len, dentry->d_name.len,
-	       dentry->d_name.name, dentry->d_inode->i_ino,
+	pr_err("Cache object %pd [%lu] type %s not %s\n",
+	       dentry, dentry->d_inode->i_ino,
 	       xtype, type);
 	ret = -EIO;
 	goto error;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 659f2ea..cefca66 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2638,7 +2638,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
 
 	for (i = 0; i < CEPH_CAP_BITS; i++)
 		if ((dirty & (1 << i)) &&
-		    flush_tid == ci->i_cap_flush_tid[i])
+		    (u16)flush_tid == ci->i_cap_flush_tid[i])
 			cleaned |= 1 << i;
 
 	dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 5d5a4c8..1b23551 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -83,10 +83,9 @@ static int mdsc_show(struct seq_file *s, void *p)
 			if (IS_ERR(path))
 				path = NULL;
 			spin_lock(&req->r_dentry->d_lock);
-			seq_printf(s, " #%llx/%.*s (%s)",
+			seq_printf(s, " #%llx/%pd (%s)",
 				   ceph_ino(req->r_dentry->d_parent->d_inode),
-				   req->r_dentry->d_name.len,
-				   req->r_dentry->d_name.name,
+				   req->r_dentry,
 				   path ? path : "");
 			spin_unlock(&req->r_dentry->d_lock);
 			kfree(path);
@@ -103,11 +102,10 @@ static int mdsc_show(struct seq_file *s, void *p)
 			if (IS_ERR(path))
 				path = NULL;
 			spin_lock(&req->r_old_dentry->d_lock);
-			seq_printf(s, " #%llx/%.*s (%s)",
+			seq_printf(s, " #%llx/%pd (%s)",
 				   req->r_old_dentry_dir ?
 				   ceph_ino(req->r_old_dentry_dir) : 0,
-				   req->r_old_dentry->d_name.len,
-				   req->r_old_dentry->d_name.name,
+				   req->r_old_dentry,
 				   path ? path : "");
 			spin_unlock(&req->r_old_dentry->d_lock);
 			kfree(path);
@@ -150,8 +148,8 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
 	spin_lock(&mdsc->dentry_lru_lock);
 	list_for_each_entry(di, &mdsc->dentry_lru, lru) {
 		struct dentry *dentry = di->dentry;
-		seq_printf(s, "%p %p\t%.*s\n",
-			   di, dentry, dentry->d_name.len, dentry->d_name.name);
+		seq_printf(s, "%p %p\t%pd\n",
+			   di, dentry, dentry);
 	}
 	spin_unlock(&mdsc->dentry_lru_lock);
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e6d63f8..681a853 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -111,7 +111,7 @@ static int fpos_cmp(loff_t l, loff_t r)
 /*
  * When possible, we try to satisfy a readdir by peeking at the
  * dcache.  We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
  * falling back to a "normal" sync readdir if any dentries in the dir
  * are dropped.
  *
@@ -123,7 +123,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			    u32 shared_gen)
 {
 	struct ceph_file_info *fi = file->private_data;
-	struct dentry *parent = file->f_dentry;
+	struct dentry *parent = file->f_path.dentry;
 	struct inode *dir = parent->d_inode;
 	struct list_head *p;
 	struct dentry *dentry, *last;
@@ -147,11 +147,11 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		p = parent->d_subdirs.prev;
 		dout(" initial p %p/%p\n", p->prev, p->next);
 	} else {
-		p = last->d_u.d_child.prev;
+		p = last->d_child.prev;
 	}
 
 more:
-	dentry = list_entry(p, struct dentry, d_u.d_child);
+	dentry = list_entry(p, struct dentry, d_child);
 	di = ceph_dentry(dentry);
 	while (1) {
 		dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
@@ -168,13 +168,13 @@ more:
 		    ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
 		    fpos_cmp(ctx->pos, di->offset) <= 0)
 			break;
-		dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry,
-		     dentry->d_name.len, dentry->d_name.name, di->offset,
+		dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
+		     dentry, di->offset,
 		     ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
 		     !dentry->d_inode ? " null" : "");
 		spin_unlock(&dentry->d_lock);
 		p = p->prev;
-		dentry = list_entry(p, struct dentry, d_u.d_child);
+		dentry = list_entry(p, struct dentry, d_child);
 		di = ceph_dentry(dentry);
 	}
 
@@ -190,8 +190,8 @@ more:
 		goto out;
 	}
 
-	dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
-	     dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+	dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
+	     dentry, dentry, dentry->d_inode);
 	if (!dir_emit(ctx, dentry->d_name.name,
 		      dentry->d_name.len,
 		      ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -274,7 +274,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		off = 1;
 	}
 	if (ctx->pos == 1) {
-		ino_t ino = parent_ino(file->f_dentry);
+		ino_t ino = parent_ino(file->f_path.dentry);
 		dout("readdir off 1 -> '..'\n");
 		if (!dir_emit(ctx, "..", 2,
 			    ceph_translate_ino(inode->i_sb, ino),
@@ -337,7 +337,7 @@ more:
 		}
 		req->r_inode = inode;
 		ihold(inode);
-		req->r_dentry = dget(file->f_dentry);
+		req->r_dentry = dget(file->f_path.dentry);
 		/* hints to request -> mds selection code */
 		req->r_direct_mode = USE_AUTH_MDS;
 		req->r_direct_hash = ceph_frag_value(frag);
@@ -538,8 +538,8 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
 	    strcmp(dentry->d_name.name,
 		   fsc->mount_options->snapdir_name) == 0) {
 		struct inode *inode = ceph_get_snapdir(parent);
-		dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n",
-		     dentry, dentry->d_name.len, dentry->d_name.name, inode);
+		dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n",
+		     dentry, dentry, inode);
 		BUG_ON(!d_unhashed(dentry));
 		d_add(dentry, inode);
 		err = 0;
@@ -603,8 +603,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	int op;
 	int err;
 
-	dout("lookup %p dentry %p '%.*s'\n",
-	     dir, dentry, dentry->d_name.len, dentry->d_name.name);
+	dout("lookup %p dentry %p '%pd'\n",
+	     dir, dentry, dentry);
 
 	if (dentry->d_name.len > NAME_MAX)
 		return ERR_PTR(-ENAMETOOLONG);
@@ -774,8 +774,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* mkdir .snap/foo is a MKSNAP */
 		op = CEPH_MDS_OP_MKSNAP;
-		dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
-		     dentry->d_name.len, dentry->d_name.name, dentry);
+		dout("mksnap dir %p snap '%pd' dn %p\n", dir,
+		     dentry, dentry);
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
 		dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
 		op = CEPH_MDS_OP_MKDIR;
@@ -888,8 +888,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 
 	if (ceph_snap(dir) == CEPH_SNAPDIR) {
 		/* rmdir .snap/foo is RMSNAP */
-		dout("rmsnap dir %p '%.*s' dn %p\n", dir, dentry->d_name.len,
-		     dentry->d_name.name, dentry);
+		dout("rmsnap dir %p '%pd' dn %p\n", dir, dentry, dentry);
 		op = CEPH_MDS_OP_RMSNAP;
 	} else if (ceph_snap(dir) == CEPH_NOSNAP) {
 		dout("unlink/rmdir dir %p dn %p inode %p\n",
@@ -1063,16 +1062,15 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
 	if (flags & LOOKUP_RCU)
 		return -ECHILD;
 
-	dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
-	     dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
-	     ceph_dentry(dentry)->offset);
+	dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
+	     dentry, dentry->d_inode, ceph_dentry(dentry)->offset);
 
 	dir = ceph_get_dentry_parent_inode(dentry);
 
 	/* always trust cached snapped dentries, snapdir dentry */
 	if (ceph_snap(dir) != CEPH_NOSNAP) {
-		dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
-		     dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
+		dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
+		     dentry, dentry->d_inode);
 		valid = 1;
 	} else if (dentry->d_inode &&
 		   ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
@@ -1265,8 +1263,7 @@ void ceph_dentry_lru_add(struct dentry *dn)
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 
-	dout("dentry_lru_add %p %p '%.*s'\n", di, dn,
-	     dn->d_name.len, dn->d_name.name);
+	dout("dentry_lru_add %p %p '%pd'\n", di, dn, dn);
 	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_lru_lock);
 	list_add_tail(&di->lru, &mdsc->dentry_lru);
@@ -1279,8 +1276,8 @@ void ceph_dentry_lru_touch(struct dentry *dn)
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 
-	dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn,
-	     dn->d_name.len, dn->d_name.name, di->offset);
+	dout("dentry_lru_touch %p %p '%pd' (offset %lld)\n", di, dn, dn,
+	     di->offset);
 	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_lru_lock);
 	list_move_tail(&di->lru, &mdsc->dentry_lru);
@@ -1292,8 +1289,7 @@ void ceph_dentry_lru_del(struct dentry *dn)
 	struct ceph_dentry_info *di = ceph_dentry(dn);
 	struct ceph_mds_client *mdsc;
 
-	dout("dentry_lru_del %p %p '%.*s'\n", di, dn,
-	     dn->d_name.len, dn->d_name.name);
+	dout("dentry_lru_del %p %p '%pd'\n", di, dn, dn);
 	mdsc = ceph_sb_to_client(dn->d_sb)->mdsc;
 	spin_lock(&mdsc->dentry_lru_lock);
 	list_del_init(&di->lru);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d7e0da8..9f8e357 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -211,7 +211,7 @@ int ceph_open(struct inode *inode, struct file *file)
 
 	req->r_num_caps = 1;
 	if (flags & O_CREAT)
-		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+		parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry);
 	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 	iput(parent_inode);
 	if (!err)
@@ -238,8 +238,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct ceph_acls_info acls = {};
 	int err;
 
-	dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
-	     dir, dentry, dentry->d_name.len, dentry->d_name.name,
+	dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
+	     dir, dentry, dentry,
 	     d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
 
 	if (dentry->d_name.len > NAME_MAX)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7b61390..a5593d5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -967,7 +967,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 	/* dn must be unhashed */
 	if (!d_unhashed(dn))
 		d_drop(dn);
-	realdn = d_materialise_unique(dn, in);
+	realdn = d_splice_alias(in, dn);
 	if (IS_ERR(realdn)) {
 		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
 		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
@@ -1186,20 +1186,18 @@ retry_lookup:
 			struct inode *olddir = req->r_old_dentry_dir;
 			BUG_ON(!olddir);
 
-			dout(" src %p '%.*s' dst %p '%.*s'\n",
+			dout(" src %p '%pd' dst %p '%pd'\n",
 			     req->r_old_dentry,
-			     req->r_old_dentry->d_name.len,
-			     req->r_old_dentry->d_name.name,
-			     dn, dn->d_name.len, dn->d_name.name);
+			     req->r_old_dentry,
+			     dn, dn);
 			dout("fill_trace doing d_move %p -> %p\n",
 			     req->r_old_dentry, dn);
 
 			d_move(req->r_old_dentry, dn);
-			dout(" src %p '%.*s' dst %p '%.*s'\n",
+			dout(" src %p '%pd' dst %p '%pd'\n",
+			     req->r_old_dentry,
 			     req->r_old_dentry,
-			     req->r_old_dentry->d_name.len,
-			     req->r_old_dentry->d_name.name,
-			     dn, dn->d_name.len, dn->d_name.name);
+			     dn, dn);
 
 			/* ensure target dentry is invalidated, despite
 			   rehashing bug in vfs_rename_dir */
@@ -1399,7 +1397,7 @@ retry_lookup:
 			/* reorder parent's d_subdirs */
 			spin_lock(&parent->d_lock);
 			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-			list_move(&dn->d_u.d_child, &parent->d_subdirs);
+			list_move(&dn->d_child, &parent->d_subdirs);
 			spin_unlock(&dn->d_lock);
 			spin_unlock(&parent->d_lock);
 		}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f77f770..67b2007 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -117,7 +117,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 			goto out;
 		}
 		major = i;
-		ret = major;
 	}
 
 	cd->major = major;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 44ec726..9c56ef7 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -34,27 +34,9 @@
 void
 cifs_dump_mem(char *label, void *data, int length)
 {
-	int i, j;
-	int *intptr = data;
-	char *charptr = data;
-	char buf[10], line[80];
-
-	printk(KERN_DEBUG "%s: dump of %d bytes of data at 0x%p\n",
-		label, length, data);
-	for (i = 0; i < length; i += 16) {
-		line[0] = 0;
-		for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
-			sprintf(buf, " %08x", intptr[i / 4 + j]);
-			strcat(line, buf);
-		}
-		buf[0] = ' ';
-		buf[2] = 0;
-		for (j = 0; (j < 16) && (i + j < length); j++) {
-			buf[1] = isprint(charptr[i + j]) ? charptr[i + j] : '.';
-			strcat(line, buf);
-		}
-		printk(KERN_DEBUG "%s\n", line);
-	}
+	pr_debug("%s: dump of %d bytes of data at 0x%p\n", label, length, data);
+	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 16, 4,
+		       data, length, true);
 }
 
 #ifdef CONFIG_CIFS_DEBUG
@@ -68,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	printk(KERN_ERR "CIFS VFS: %pV", &vaf);
+	pr_err("CIFS VFS: %pV", &vaf);
 
 	va_end(args);
 }
@@ -274,6 +256,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
 	char c;
+	bool bv;
 	int rc;
 	struct list_head *tmp1, *tmp2, *tmp3;
 	struct TCP_Server_Info *server;
@@ -284,7 +267,7 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 	if (rc)
 		return rc;
 
-	if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
+	if (strtobool(&c, &bv) == 0) {
 #ifdef CONFIG_CIFS_STATS2
 		atomic_set(&totBufAllocCount, 0);
 		atomic_set(&totSmBufAllocCount, 0);
@@ -451,15 +434,14 @@ static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	char c;
+	bool bv;
 	int rc;
 
 	rc = get_user(c, buffer);
 	if (rc)
 		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		cifsFYI = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		cifsFYI = 1;
+	if (strtobool(&c, &bv) == 0)
+		cifsFYI = bv;
 	else if ((c > '1') && (c <= '9'))
 		cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
 
@@ -490,15 +472,18 @@ static ssize_t cifs_linux_ext_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
 	char c;
+	bool bv;
 	int rc;
 
 	rc = get_user(c, buffer);
 	if (rc)
 		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		linuxExtEnabled = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		linuxExtEnabled = 1;
+
+	rc = strtobool(&c, &bv);
+	if (rc)
+		return rc;
+
+	linuxExtEnabled = bv;
 
 	return count;
 }
@@ -527,15 +512,18 @@ static ssize_t cifs_lookup_cache_proc_write(struct file *file,
 		const char __user *buffer, size_t count, loff_t *ppos)
 {
 	char c;
+	bool bv;
 	int rc;
 
 	rc = get_user(c, buffer);
 	if (rc)
 		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		lookupCacheEnabled = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		lookupCacheEnabled = 1;
+
+	rc = strtobool(&c, &bv);
+	if (rc)
+		return rc;
+
+	lookupCacheEnabled = bv;
 
 	return count;
 }
@@ -564,15 +552,18 @@ static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	char c;
+	bool bv;
 	int rc;
 
 	rc = get_user(c, buffer);
 	if (rc)
 		return rc;
-	if (c == '0' || c == 'n' || c == 'N')
-		traceSMB = 0;
-	else if (c == '1' || c == 'y' || c == 'Y')
-		traceSMB = 1;
+
+	rc = strtobool(&c, &bv);
+	if (rc)
+		return rc;
+
+	traceSMB = bv;
 
 	return count;
 }
@@ -630,6 +621,7 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
 	unsigned int flags;
 	char flags_string[12];
 	char c;
+	bool bv;
 
 	if ((count < 1) || (count > 11))
 		return -EINVAL;
@@ -642,11 +634,8 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
 	if (count < 3) {
 		/* single char or single char followed by null */
 		c = flags_string[0];
-		if (c == '0' || c == 'n' || c == 'N') {
-			global_secflags = CIFSSEC_DEF; /* default */
-			return count;
-		} else if (c == '1' || c == 'y' || c == 'Y') {
-			global_secflags = CIFSSEC_MAX;
+		if (strtobool(&c, &bv) == 0) {
+			global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF;
 			return count;
 		} else if (!isdigit(c)) {
 			cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index c99b40f..f40fbac 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -53,13 +53,12 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
 do {									\
 	if (type == FYI) {						\
 		if (cifsFYI & CIFS_INFO) {				\
-			printk(KERN_DEBUG "%s: " fmt,			\
-			       __FILE__, ##__VA_ARGS__);		\
+			pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__);	\
 		}							\
 	} else if (type == VFS) {					\
 		cifs_vfs_err(fmt, ##__VA_ARGS__);			\
 	} else if (type == NOISY && type != 0) {			\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
+		pr_debug(fmt, ##__VA_ARGS__);				\
 	}								\
 } while (0)
 
@@ -71,7 +70,7 @@ do {									\
 #define cifs_dbg(type, fmt, ...)					\
 do {									\
 	if (0)								\
-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);			\
+		pr_debug(fmt, ##__VA_ARGS__);				\
 } while (0)
 #endif
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6d00c41..1ea780b 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
 	1, 1, {0, 0, 0, 0, 0, 1}, {0} };
 /* security id for Authenticated Users system group */
 static const struct cifs_sid sid_authusers = {
-	1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+	1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 9d7996e..d72fe37 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -209,8 +209,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
 {
-	struct super_block *sb = file->f_path.dentry->d_sb;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 	struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 	struct TCP_Server_Info *server = tcon->ses->server;
 
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 002e0c1..252f5c1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -136,5 +136,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.05"
+#define CIFS_VERSION   "2.06"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 02a33e5..6e13911 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1168,6 +1168,12 @@ CIFS_SB(struct super_block *sb)
 	return sb->s_fs_info;
 }
 
+static inline struct cifs_sb_info *
+CIFS_FILE_SB(struct file *file)
+{
+	return CIFS_SB(file_inode(file)->i_sb);
+}
+
 static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb)
 {
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 61d00a6..fa13d5e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
 		}
 		parm_data = (struct cifs_posix_lock *)
 			((char *)&pSMBr->hdr.Protocol + data_offset);
-		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
 			pLockData->fl_type = F_UNLCK;
 		else {
 			if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_RDLCK))
+					cpu_to_le16(CIFS_RDLCK))
 				pLockData->fl_type = F_RDLCK;
 			else if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_WRLCK))
+					cpu_to_le16(CIFS_WRLCK))
 				pLockData->fl_type = F_WRLCK;
 
 			pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	pSMB->TotalParameterCount = 0;
-	pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+	pSMB->TotalDataCount = cpu_to_le32(2);
 	pSMB->MaxParameterCount = 0;
 	pSMB->MaxDataCount = 0;
 	pSMB->MaxSetupCount = 4;
 	pSMB->Reserved = 0;
 	pSMB->ParameterOffset = 0;
-	pSMB->DataCount = __constant_cpu_to_le32(2);
+	pSMB->DataCount = cpu_to_le32(2);
 	pSMB->DataOffset =
 		cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
 				compression_state) - 4);  /* 84 */
 	pSMB->SetupCount = 4;
-	pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
 	pSMB->ParameterCount = 0;
-	pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+	pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
 	pSMB->IsFsctl = 1; /* FSCTL */
 	pSMB->IsRootFlag = 0;
 	pSMB->Fid = fid; /* file handle always le */
 	/* 3 byte pad, followed by 2 byte compress state */
-	pSMB->ByteCount = __constant_cpu_to_le16(5);
+	pSMB->ByteCount = cpu_to_le16(5);
 	inc_rfc1001_len(pSMB, 5);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
 	cifs_acl->version = cpu_to_le16(1);
 	if (acl_type == ACL_TYPE_ACCESS) {
 		cifs_acl->access_entry_count = cpu_to_le16(count);
-		cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
 	} else if (acl_type == ACL_TYPE_DEFAULT) {
 		cifs_acl->default_entry_count = cpu_to_le16(count);
-		cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
 	} else {
 		cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
 		return 0;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 24fa08d..2a772da 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1466,9 +1466,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			vol->seal = 1;
 			break;
 		case Opt_noac:
-			printk(KERN_WARNING "CIFS: Mount option noac not "
-				"supported. Instead set "
-				"/proc/fs/cifs/LookupCacheEnabled to 0\n");
+			pr_warn("CIFS: Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
 			break;
 		case Opt_fsc:
 #ifndef CONFIG_CIFS_FSCACHE
@@ -1598,7 +1596,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 			if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
 							CIFS_MAX_USERNAME_LEN) {
-				printk(KERN_WARNING "CIFS: username too long\n");
+				pr_warn("CIFS: username too long\n");
 				goto cifs_parse_mount_err;
 			}
 			vol->username = kstrdup(string, GFP_KERNEL);
@@ -1662,8 +1660,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			temp_len = strlen(value);
 			vol->password = kzalloc(temp_len+1, GFP_KERNEL);
 			if (vol->password == NULL) {
-				printk(KERN_WARNING "CIFS: no memory "
-						    "for password\n");
+				pr_warn("CIFS: no memory for password\n");
 				goto cifs_parse_mount_err;
 			}
 
@@ -1687,8 +1684,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 			if (!cifs_convert_address(dstaddr, string,
 					strlen(string))) {
-				printk(KERN_ERR "CIFS: bad ip= option (%s).\n",
-					string);
+				pr_err("CIFS: bad ip= option (%s).\n", string);
 				goto cifs_parse_mount_err;
 			}
 			got_ip = true;
@@ -1700,15 +1696,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 
 			if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
 					== CIFS_MAX_DOMAINNAME_LEN) {
-				printk(KERN_WARNING "CIFS: domain name too"
-						    " long\n");
+				pr_warn("CIFS: domain name too long\n");
 				goto cifs_parse_mount_err;
 			}
 
 			vol->domainname = kstrdup(string, GFP_KERNEL);
 			if (!vol->domainname) {
-				printk(KERN_WARNING "CIFS: no memory "
-						    "for domainname\n");
+				pr_warn("CIFS: no memory for domainname\n");
 				goto cifs_parse_mount_err;
 			}
 			cifs_dbg(FYI, "Domain name set\n");
@@ -1721,8 +1715,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			if (!cifs_convert_address(
 					(struct sockaddr *)&vol->srcaddr,
 					string, strlen(string))) {
-				printk(KERN_WARNING "CIFS:  Could not parse"
-						    " srcaddr: %s\n", string);
+				pr_warn("CIFS: Could not parse srcaddr: %s\n",
+					string);
 				goto cifs_parse_mount_err;
 			}
 			break;
@@ -1732,8 +1726,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 				goto out_nomem;
 
 			if (strnlen(string, 1024) >= 65) {
-				printk(KERN_WARNING "CIFS: iocharset name "
-						    "too long.\n");
+				pr_warn("CIFS: iocharset name too long.\n");
 				goto cifs_parse_mount_err;
 			}
 
@@ -1741,8 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 				vol->iocharset = kstrdup(string,
 							 GFP_KERNEL);
 				if (!vol->iocharset) {
-					printk(KERN_WARNING "CIFS: no memory"
-							    "for charset\n");
+					pr_warn("CIFS: no memory for charset\n");
 					goto cifs_parse_mount_err;
 				}
 			}
@@ -1773,9 +1765,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			 * set at top of the function
 			 */
 			if (i == RFC1001_NAME_LEN && string[i] != 0)
-				printk(KERN_WARNING "CIFS: netbiosname"
-				       " longer than 15 truncated.\n");
-
+				pr_warn("CIFS: netbiosname longer than 15 truncated.\n");
 			break;
 		case Opt_servern:
 			/* servernetbiosname specified override *SMBSERVER */
@@ -1801,8 +1791,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			/* The string has 16th byte zero still from
 			   set at top of the function  */
 			if (i == RFC1001_NAME_LEN && string[i] != 0)
-				printk(KERN_WARNING "CIFS: server net"
-				       "biosname longer than 15 truncated.\n");
+				pr_warn("CIFS: server netbiosname longer than 15 truncated.\n");
 			break;
 		case Opt_ver:
 			string = match_strdup(args);
@@ -1814,8 +1803,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 				break;
 			}
 			/* For all other value, error */
-			printk(KERN_WARNING "CIFS: Invalid version"
-					    " specified\n");
+			pr_warn("CIFS: Invalid version specified\n");
 			goto cifs_parse_mount_err;
 		case Opt_vers:
 			string = match_strdup(args);
@@ -1856,7 +1844,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 	}
 
 	if (!sloppy && invalid) {
-		printk(KERN_ERR "CIFS: Unknown mount option \"%s\"\n", invalid);
+		pr_err("CIFS: Unknown mount option \"%s\"\n", invalid);
 		goto cifs_parse_mount_err;
 	}
 
@@ -1882,8 +1870,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 		/* No ip= option specified? Try to get it from UNC */
 		if (!cifs_convert_address(dstaddr, &vol->UNC[2],
 						strlen(&vol->UNC[2]))) {
-			printk(KERN_ERR "Unable to determine destination "
-					"address.\n");
+			pr_err("Unable to determine destination address.\n");
 			goto cifs_parse_mount_err;
 		}
 	}
@@ -1894,20 +1881,18 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 	if (uid_specified)
 		vol->override_uid = override_uid;
 	else if (override_uid == 1)
-		printk(KERN_NOTICE "CIFS: ignoring forceuid mount option "
-				   "specified with no uid= option.\n");
+		pr_notice("CIFS: ignoring forceuid mount option specified with no uid= option.\n");
 
 	if (gid_specified)
 		vol->override_gid = override_gid;
 	else if (override_gid == 1)
-		printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
-				   "specified with no gid= option.\n");
+		pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n");
 
 	kfree(mountdata_copy);
 	return 0;
 
 out_nomem:
-	printk(KERN_WARNING "Could not allocate temporary buffer\n");
+	pr_warn("Could not allocate temporary buffer\n");
 cifs_parse_mount_err:
 	kfree(string);
 	kfree(mountdata_copy);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3e4d00a..96b7e9b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1066,7 +1066,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;
@@ -1401,7 +1401,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -1586,7 +1586,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
 	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
 			tcon->ses->server);
 
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	cifs_sb = CIFS_FILE_SB(file);
 	netfid = cfile->fid.netfid;
 	cinode = CIFS_I(file_inode(file));
 
@@ -2305,7 +2305,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 	struct cifsFileInfo *smbfile = file->private_data;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 	struct inode *inode = file->f_mapping->host;
 
 	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
@@ -2585,7 +2585,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
 	iov_iter_truncate(from, len);
 
 	INIT_LIST_HEAD(&wdata_list);
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	cifs_sb = CIFS_FILE_SB(file);
 	open_file = file->private_data;
 	tcon = tlink_tcon(open_file->tlink);
 
@@ -3010,7 +3010,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	INIT_LIST_HEAD(&rdata_list);
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	cifs_sb = CIFS_FILE_SB(file);
 	open_file = file->private_data;
 	tcon = tlink_tcon(open_file->tlink);
 
@@ -3155,7 +3155,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
 	__u32 pid;
 
 	xid = get_xid();
-	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	cifs_sb = CIFS_FILE_SB(file);
 
 	/* FIXME: set up handlers for larger reads and/or convert to async */
 	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
@@ -3462,7 +3462,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 	int rc;
 	struct list_head tmplist;
 	struct cifsFileInfo *open_file = file->private_data;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 	struct TCP_Server_Info *server;
 	pid_t pid;
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 197cb50..0c3ce464 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -895,7 +895,7 @@ inode_has_hashed_dentries(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&inode->i_lock);
 			return true;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index b7415d5..3379463 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -513,39 +513,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 void
 dump_smb(void *buf, int smb_buf_length)
 {
-	int i, j;
-	char debug_line[17];
-	unsigned char *buffer = buf;
-
 	if (traceSMB == 0)
 		return;
 
-	for (i = 0, j = 0; i < smb_buf_length; i++, j++) {
-		if (i % 8 == 0) {
-			/* have reached the beginning of line */
-			printk(KERN_DEBUG "| ");
-			j = 0;
-		}
-		printk("%0#4x ", buffer[i]);
-		debug_line[2 * j] = ' ';
-		if (isprint(buffer[i]))
-			debug_line[1 + (2 * j)] = buffer[i];
-		else
-			debug_line[1 + (2 * j)] = '_';
-
-		if (i % 8 == 7) {
-			/* reached end of line, time to print ascii */
-			debug_line[16] = 0;
-			printk(" | %s\n", debug_line);
-		}
-	}
-	for (; j < 8; j++) {
-		printk("     ");
-		debug_line[2 * j] = ' ';
-		debug_line[1 + (2 * j)] = ' ';
-	}
-	printk(" | %s\n", debug_line);
-	return;
+	print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 8, 2, buf,
+		       smb_buf_length, true);
 }
 
 void
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8fd2a95..8eaf20a 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -123,7 +123,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	if (alias && !IS_ERR(alias))
 		dput(alias);
 out:
@@ -261,7 +261,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
 	int rc = 0;
 	char *full_path = NULL;
 	struct cifsFileInfo *cifsFile;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 	struct tcon_link *tlink = NULL;
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
@@ -561,7 +561,7 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
 	loff_t first_entry_in_buffer;
 	loff_t index_to_find = pos;
 	struct cifsFileInfo *cfile = file->private_data;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 	struct TCP_Server_Info *server = tcon->ses->server;
 	/* check if index in the buffer */
 
@@ -679,7 +679,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
 		char *scratch_buf, unsigned int max_len)
 {
 	struct cifsFileInfo *file_info = file->private_data;
-	struct super_block *sb = file->f_path.dentry->d_sb;
+	struct super_block *sb = file_inode(file)->i_sb;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 	struct cifs_dirent de = { NULL, };
 	struct cifs_fattr fattr;
@@ -753,7 +753,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
 		 */
 		fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
 
-	cifs_prime_dcache(file->f_dentry, &name, &fattr);
+	cifs_prime_dcache(file->f_path.dentry, &name, &fattr);
 
 	ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
 	return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype);
@@ -794,10 +794,6 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
 		if it before then restart search
 		if after then keep searching till find it */
 
-	if (file->private_data == NULL) {
-		rc = -EINVAL;
-		goto rddir2_exit;
-	}
 	cifsFile = file->private_data;
 	if (cifsFile->srch_inf.endOfSearch) {
 		if (cifsFile->srch_inf.emptyDir) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 57db63f..bce6fdc 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 					CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
 					USHRT_MAX));
 	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-	pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+	pSMB->req.VcNumber = cpu_to_le16(1);
 
 	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 
@@ -1303,6 +1303,11 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
 	if (le16_to_cpu(pSMB->resp.Action) & GUEST_LOGIN)
 		cifs_dbg(FYI, "Guest login\n"); /* BB mark SesInfo struct? */
 
+	if (ses->Suid != smb_buf->Uid) {
+		ses->Suid = smb_buf->Uid;
+		cifs_dbg(FYI, "UID changed! new UID = %llu\n", ses->Suid);
+	}
+
 	bytes_remaining = get_bcc(smb_buf);
 	bcc_ptr = pByteArea(smb_buf);
 	blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 4599294..7198eac 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		return -EINVAL;
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	}
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1a08a34..f1cefc9 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
  *  indexed by command in host byte order
  */
 static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
-	/* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
-	/* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
-	/* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
-	/* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
-	/* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
-	/* SMB2_CREATE */ __constant_cpu_to_le16(89),
-	/* SMB2_CLOSE */ __constant_cpu_to_le16(60),
-	/* SMB2_FLUSH */ __constant_cpu_to_le16(4),
-	/* SMB2_READ */ __constant_cpu_to_le16(17),
-	/* SMB2_WRITE */ __constant_cpu_to_le16(17),
-	/* SMB2_LOCK */ __constant_cpu_to_le16(4),
-	/* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+	/* SMB2_NEGOTIATE */ cpu_to_le16(65),
+	/* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+	/* SMB2_LOGOFF */ cpu_to_le16(4),
+	/* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+	/* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+	/* SMB2_CREATE */ cpu_to_le16(89),
+	/* SMB2_CLOSE */ cpu_to_le16(60),
+	/* SMB2_FLUSH */ cpu_to_le16(4),
+	/* SMB2_READ */ cpu_to_le16(17),
+	/* SMB2_WRITE */ cpu_to_le16(17),
+	/* SMB2_LOCK */ cpu_to_le16(4),
+	/* SMB2_IOCTL */ cpu_to_le16(49),
 	/* BB CHECK this ... not listed in documentation */
-	/* SMB2_CANCEL */ __constant_cpu_to_le16(0),
-	/* SMB2_ECHO */ __constant_cpu_to_le16(4),
-	/* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
-	/* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
-	/* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
-	/* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+	/* SMB2_CANCEL */ cpu_to_le16(0),
+	/* SMB2_ECHO */ cpu_to_le16(4),
+	/* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+	/* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+	/* SMB2_QUERY_INFO */ cpu_to_le16(9),
+	/* SMB2_SET_INFO */ cpu_to_le16(2),
 	/* BB FIXME can also be 44 for lease break */
-	/* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+	/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
 int
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c5f521b..93fd058 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -600,7 +600,7 @@ smb2_clone_range(const unsigned int xid,
 		goto cchunk_out;
 
 	/* For now array only one chunk long, will make more flexible later */
-	pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+	pcchunk->ChunkCount = cpu_to_le32(1);
 	pcchunk->Reserved = 0;
 	pcchunk->Reserved2 = 0;
 
@@ -1102,6 +1102,64 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 	return rc;
 }
 
+static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
+			    loff_t off, loff_t len, bool keep_size)
+{
+	struct inode *inode;
+	struct cifsInodeInfo *cifsi;
+	struct cifsFileInfo *cfile = file->private_data;
+	long rc = -EOPNOTSUPP;
+	unsigned int xid;
+
+	xid = get_xid();
+
+	inode = cfile->dentry->d_inode;
+	cifsi = CIFS_I(inode);
+
+	/* if file not oplocked can't be sure whether asking to extend size */
+	if (!CIFS_CACHE_READ(cifsi))
+		if (keep_size == false)
+			return -EOPNOTSUPP;
+
+	/*
+	 * Files are non-sparse by default so falloc may be a no-op
+	 * Must check if file sparse. If not sparse, and not extending
+	 * then no need to do anything since file already allocated
+	 */
+	if ((cifsi->cifsAttrs & FILE_ATTRIBUTE_SPARSE_FILE) == 0) {
+		if (keep_size == true)
+			return 0;
+		/* check if extending file */
+		else if (i_size_read(inode) >= off + len)
+			/* not extending file and already not sparse */
+			return 0;
+		/* BB: in future add else clause to extend file */
+		else
+			return -EOPNOTSUPP;
+	}
+
+	if ((keep_size == true) || (i_size_read(inode) >= off + len)) {
+		/*
+		 * Check if falloc starts within first few pages of file
+		 * and ends within a few pages of the end of file to
+		 * ensure that most of file is being forced to be
+		 * fallocated now. If so then setting whole file sparse
+		 * ie potentially making a few extra pages at the beginning
+		 * or end of the file non-sparse via set_sparse is harmless.
+		 */
+		if ((off > 8192) || (off + len + 8192 < i_size_read(inode)))
+			return -EOPNOTSUPP;
+
+		rc = smb2_set_sparse(xid, tcon, cfile, inode, false);
+	}
+	/* BB: else ... in future add code to extend file and set sparse */
+
+
+	free_xid(xid);
+	return rc;
+}
+
+
 static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
 			   loff_t off, loff_t len)
 {
@@ -1112,7 +1170,10 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode,
 		if (mode & FALLOC_FL_KEEP_SIZE)
 			return smb3_zero_range(file, tcon, off, len, true);
 		return smb3_zero_range(file, tcon, off, len, false);
-	}
+	} else if (mode == FALLOC_FL_KEEP_SIZE)
+		return smb3_simple_falloc(file, tcon, off, len, true);
+	else if (mode == 0)
+		return smb3_simple_falloc(file, tcon, off, len, false);
 
 	return -EOPNOTSUPP;
 }
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 8f1672b..3417340 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -431,8 +431,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	if (rc)
 		goto neg_exit;
 	if (blob_length)
-		rc = decode_neg_token_init(security_blob, blob_length,
-				   &server->sec_type);
+		rc = decode_negTokenInit(security_blob, blob_length, server);
 	if (rc == 1)
 		rc = 0;
 	else if (rc == 0) {
@@ -1359,7 +1358,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	char *ret_data = NULL;
 
 	fsctl_input.CompressionState =
-			__constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+			cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index e3188ab..ce85847 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -85,7 +85,7 @@
 /* BB FIXME - analyze following length BB */
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
 
 /*
  * SMB2 Header Definition
@@ -96,7 +96,7 @@
  *
  */
 
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
 
 struct smb2_hdr {
 	__be32 smb2_buf_length;	/* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
 } __packed;
 
 /* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM	__constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM	cpu_to_le16(0x0001)
 
 /*
  *	SMB2 flag definitions
  */
-#define SMB2_FLAGS_SERVER_TO_REDIR	__constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND	__constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS	__constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED		__constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS	__constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR	cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND	cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS	cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED		cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS	cpu_to_le32(0x10000000)
 
 /*
  *	Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
  *
  */
 
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
 
 struct smb2_err_rsp {
 	struct smb2_hdr hdr;
@@ -502,12 +502,12 @@ struct create_context {
 #define SMB2_LEASE_HANDLE_CACHING_HE	0x02
 #define SMB2_LEASE_WRITE_CACHING_HE	0x04
 
-#define SMB2_LEASE_NONE			__constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING		__constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING	__constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING	__constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE			cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING		cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING	cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING	cpu_to_le32(0x04)
 
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
 
 #define SMB2_LEASE_KEY_SIZE 16
 
@@ -836,6 +836,25 @@ struct smb2_query_directory_rsp {
 #define SMB2_O_INFO_SECURITY	0x03
 #define SMB2_O_INFO_QUOTA	0x04
 
+/* Security info type additionalinfo flags. See MS-SMB2 (2.2.37) or MS-DTYP */
+#define OWNER_SECINFO   0x00000001
+#define GROUP_SECINFO   0x00000002
+#define DACL_SECINFO   0x00000004
+#define SACL_SECINFO   0x00000008
+#define LABEL_SECINFO   0x00000010
+#define ATTRIBUTE_SECINFO   0x00000020
+#define SCOPE_SECINFO   0x00000040
+#define BACKUP_SECINFO   0x00010000
+#define UNPROTECTED_SACL_SECINFO   0x10000000
+#define UNPROTECTED_DACL_SECINFO   0x20000000
+#define PROTECTED_SACL_SECINFO   0x40000000
+#define PROTECTED_DACL_SECINFO   0x80000000
+
+/* Flags used for FileFullEAinfo */
+#define SL_RESTART_SCAN		0x00000001
+#define SL_RETURN_SINGLE_ENTRY	0x00000002
+#define SL_INDEX_SPECIFIED	0x00000004
+
 struct smb2_query_info_req {
 	struct smb2_hdr hdr;
 	__le16 StructureSize; /* Must be 41 */
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 9d087f4..126f46b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -99,9 +99,9 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
 	   something is wrong, unless it is quite a slow link or server */
 	if ((now - midEntry->when_alloc) > HZ) {
 		if ((cifsFYI & CIFS_TIMER) && (midEntry->command != command)) {
-			printk(KERN_DEBUG " CIFS slow rsp: cmd %d mid %llu",
+			pr_debug(" CIFS slow rsp: cmd %d mid %llu",
 			       midEntry->command, midEntry->mid);
-			printk(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+			pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
 			       now - midEntry->when_alloc,
 			       now - midEntry->when_sent,
 			       now - midEntry->when_received);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 278f8fd..46ee6f2 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
 	struct dentry *de;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(de, &parent->d_subdirs, d_child) {
 		/* don't know what to do with negative dentries */
 		if (de->d_inode ) 
 			coda_flag_inode(de->d_inode, flag);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 1326d38..f1714cf 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -40,12 +40,6 @@ int coda_iscontrol(const char *name, size_t length)
                 (strncmp(name, CODA_CONTROL, CODA_CONTROLLEN) == 0));
 }
 
-/* recognize /coda inode */
-int coda_isroot(struct inode *i)
-{
-    return ( i->i_sb->s_root->d_inode == i );
-}
-
 unsigned short coda_flags_to_cflags(unsigned short flags)
 {
 	unsigned short coda_flags = 0;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index d42b725..d6f7a76 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -52,7 +52,6 @@ int coda_setattr(struct dentry *, struct iattr *);
 
 /* this file:  heloers */
 char *coda_f2s(struct CodaFid *f);
-int coda_isroot(struct inode *i);
 int coda_iscontrol(const char *name, size_t length);
 
 void coda_vattr_to_iattr(struct inode *, struct coda_vattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9c3dedc..7ff0259 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -107,7 +107,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
 	}
 
 	/* control object, create inode on the fly */
-	if (coda_isroot(dir) && coda_iscontrol(name, length)) {
+	if (is_root_inode(dir) && coda_iscontrol(name, length)) {
 		inode = coda_cnode_makectl(sb);
 		type = CODA_NOCACHE;
 	} else {
@@ -195,7 +195,7 @@ static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool
 	struct CodaFid newfid;
 	struct coda_vattr attrs;
 
-	if (coda_isroot(dir) && coda_iscontrol(name, length))
+	if (is_root_inode(dir) && coda_iscontrol(name, length))
 		return -EPERM;
 
 	error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 
@@ -227,7 +227,7 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
 	int error;
 	struct CodaFid newfid;
 
-	if (coda_isroot(dir) && coda_iscontrol(name, len))
+	if (is_root_inode(dir) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	attrs.va_mode = mode;
@@ -261,7 +261,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 	int len = de->d_name.len;
 	int error;
 
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+	if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	error = venus_link(dir_inode->i_sb, coda_i2f(inode),
@@ -287,7 +287,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
 	int symlen;
 	int error;
 
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+	if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	symlen = strlen(symname);
@@ -507,7 +507,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
 		return -ECHILD;
 
 	inode = de->d_inode;
-	if (!inode || coda_isroot(inode))
+	if (!inode || is_root_inode(inode))
 		goto out;
 	if (is_bad_inode(inode))
 		goto bad;
diff --git a/fs/compat.c b/fs/compat.c
index b13df99..6fd272d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -847,10 +847,12 @@ struct compat_readdir_callback {
 	int result;
 };
 
-static int compat_fillonedir(void *__buf, const char *name, int namlen,
-			loff_t offset, u64 ino, unsigned int d_type)
+static int compat_fillonedir(struct dir_context *ctx, const char *name,
+			     int namlen, loff_t offset, u64 ino,
+			     unsigned int d_type)
 {
-	struct compat_readdir_callback *buf = __buf;
+	struct compat_readdir_callback *buf =
+		container_of(ctx, struct compat_readdir_callback, ctx);
 	struct compat_old_linux_dirent __user *dirent;
 	compat_ulong_t d_ino;
 
@@ -915,11 +917,12 @@ struct compat_getdents_callback {
 	int error;
 };
 
-static int compat_filldir(void *__buf, const char *name, int namlen,
+static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct compat_linux_dirent __user * dirent;
-	struct compat_getdents_callback *buf = __buf;
+	struct compat_getdents_callback *buf =
+		container_of(ctx, struct compat_getdents_callback, ctx);
 	compat_ulong_t d_ino;
 	int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
 		namlen + 2, sizeof(compat_long_t));
@@ -1001,11 +1004,13 @@ struct compat_getdents_callback64 {
 	int error;
 };
 
-static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset,
-		     u64 ino, unsigned int d_type)
+static int compat_filldir64(struct dir_context *ctx, const char *name,
+			    int namlen, loff_t offset, u64 ino,
+			    unsigned int d_type)
 {
 	struct linux_dirent64 __user *dirent;
-	struct compat_getdents_callback64 *buf = __buf;
+	struct compat_getdents_callback64 *buf =
+		container_of(ctx, struct compat_getdents_callback64, ctx);
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 	u64 off;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 668dcab..c9c298b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -386,7 +386,7 @@ static void remove_dir(struct dentry * d)
 	if (d->d_inode)
 		simple_rmdir(parent->d_inode,d);
 
-	pr_debug(" o %s removing done (%d)\n",d->d_name.name, d_count(d));
+	pr_debug(" o %pd removing done (%d)\n", d, d_count(d));
 
 	dput(parent);
 }
diff --git a/fs/dcache.c b/fs/dcache.c
index d5a23fd..e368d4f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -44,7 +44,7 @@
 /*
  * Usage:
  * dcache->d_inode->i_lock protects:
- *   - i_dentry, d_alias, d_inode of aliases
+ *   - i_dentry, d_u.d_alias, d_inode of aliases
  * dcache_hash_bucket lock protects:
  *   - the dcache hash table
  * s_anon bl list spinlock protects:
@@ -59,7 +59,7 @@
  *   - d_unhashed()
  *   - d_parent and d_subdirs
  *   - childrens' d_child and d_parent
- *   - d_alias, d_inode
+ *   - d_u.d_alias, d_inode
  *
  * Ordering:
  * dentry->d_inode->i_lock
@@ -252,14 +252,12 @@ static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 
-	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
 static void __d_free_external(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	kfree(external_name(dentry));
 	kmem_cache_free(dentry_cache, dentry); 
 }
@@ -271,6 +269,7 @@ static inline int dname_external(const struct dentry *dentry)
 
 static void dentry_free(struct dentry *dentry)
 {
+	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
 	if (unlikely(dname_external(dentry))) {
 		struct external_name *p = external_name(dentry);
 		if (likely(atomic_dec_and_test(&p->u.count))) {
@@ -311,7 +310,7 @@ static void dentry_iput(struct dentry * dentry)
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
 		dentry->d_inode = NULL;
-		hlist_del_init(&dentry->d_alias);
+		hlist_del_init(&dentry->d_u.d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&inode->i_lock);
 		if (!inode->i_nlink)
@@ -336,7 +335,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	struct inode *inode = dentry->d_inode;
 	__d_clear_type(dentry);
 	dentry->d_inode = NULL;
-	hlist_del_init(&dentry->d_alias);
+	hlist_del_init(&dentry->d_u.d_alias);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
@@ -496,7 +495,7 @@ static void __dentry_kill(struct dentry *dentry)
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
-	list_del(&dentry->d_u.d_child);
+	__list_del_entry(&dentry->d_child);
 	/*
 	 * Inform d_walk() that we are no longer attached to the
 	 * dentry tree
@@ -722,7 +721,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
 
 again:
 	discon_alias = NULL;
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&alias->d_lock);
  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
@@ -772,12 +771,13 @@ void d_prune_aliases(struct inode *inode)
 	struct dentry *dentry;
 restart:
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_lockref.count) {
 			struct dentry *parent = lock_parent(dentry);
 			if (likely(!dentry->d_lockref.count)) {
 				__dentry_kill(dentry);
+				dput(parent);
 				goto restart;
 			}
 			if (parent)
@@ -1050,7 +1050,7 @@ repeat:
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
 
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1082,33 +1082,31 @@ resume:
 	/*
 	 * All done at this level ... ascend and resume the search.
 	 */
+	rcu_read_lock();
+ascend:
 	if (this_parent != parent) {
 		struct dentry *child = this_parent;
 		this_parent = child->d_parent;
 
-		rcu_read_lock();
 		spin_unlock(&child->d_lock);
 		spin_lock(&this_parent->d_lock);
 
-		/*
-		 * might go back up the wrong parent if we have had a rename
-		 * or deletion
-		 */
-		if (this_parent != child->d_parent ||
-			 (child->d_flags & DCACHE_DENTRY_KILLED) ||
-			 need_seqretry(&rename_lock, seq)) {
-			spin_unlock(&this_parent->d_lock);
-			rcu_read_unlock();
+		/* might go back up the wrong parent if we have had a rename. */
+		if (need_seqretry(&rename_lock, seq))
 			goto rename_retry;
+		next = child->d_child.next;
+		while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+			if (next == &this_parent->d_subdirs)
+				goto ascend;
+			child = list_entry(next, struct dentry, d_child);
+			next = next->next;
 		}
 		rcu_read_unlock();
-		next = child->d_u.d_child.next;
 		goto resume;
 	}
-	if (need_seqretry(&rename_lock, seq)) {
-		spin_unlock(&this_parent->d_lock);
+	if (need_seqretry(&rename_lock, seq))
 		goto rename_retry;
-	}
+	rcu_read_unlock();
 	if (finish)
 		finish(data);
 
@@ -1118,6 +1116,9 @@ out_unlock:
 	return;
 
 rename_retry:
+	spin_unlock(&this_parent->d_lock);
+	rcu_read_unlock();
+	BUG_ON(seq & 1);
 	if (!retry)
 		return;
 	seq = 1;
@@ -1454,8 +1455,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
-	INIT_HLIST_NODE(&dentry->d_alias);
-	INIT_LIST_HEAD(&dentry->d_u.d_child);
+	INIT_HLIST_NODE(&dentry->d_u.d_alias);
+	INIT_LIST_HEAD(&dentry->d_child);
 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
 
 	this_cpu_inc(nr_dentry);
@@ -1485,7 +1486,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	 */
 	__dget_dlock(parent);
 	dentry->d_parent = parent;
-	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+	list_add(&dentry->d_child, &parent->d_subdirs);
 	spin_unlock(&parent->d_lock);
 
 	return dentry;
@@ -1578,7 +1579,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	spin_lock(&dentry->d_lock);
 	__d_set_type(dentry, add_flags);
 	if (inode)
-		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
 	dentry->d_inode = inode;
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1602,7 +1603,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 	if (inode)
 		spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
@@ -1641,7 +1642,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 		return NULL;
 	}
 
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		/*
 		 * Don't need alias->d_lock here, because aliases with
 		 * d_parent == entry->d_parent are not subject to name or
@@ -1667,7 +1668,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
 	struct dentry *result;
 
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 
 	if (inode)
 		spin_lock(&inode->i_lock);
@@ -1698,7 +1699,7 @@ EXPORT_SYMBOL(d_instantiate_unique);
  */
 int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 
 	spin_lock(&inode->i_lock);
 	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
@@ -1737,7 +1738,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 
 	if (hlist_empty(&inode->i_dentry))
 		return NULL;
-	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
 	__dget(alias);
 	return alias;
 }
@@ -1799,7 +1800,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
 	tmp->d_flags |= add_flags;
-	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
+	hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
 	hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -1888,51 +1889,19 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 	 * if not go ahead and create it now.
 	 */
 	found = d_hash_and_lookup(dentry->d_parent, name);
-	if (unlikely(IS_ERR(found)))
-		goto err_out;
 	if (!found) {
 		new = d_alloc(dentry->d_parent, name);
 		if (!new) {
 			found = ERR_PTR(-ENOMEM);
-			goto err_out;
-		}
-
-		found = d_splice_alias(inode, new);
-		if (found) {
-			dput(new);
-			return found;
-		}
-		return new;
-	}
-
-	/*
-	 * If a matching dentry exists, and it's not negative use it.
-	 *
-	 * Decrement the reference count to balance the iget() done
-	 * earlier on.
-	 */
-	if (found->d_inode) {
-		if (unlikely(found->d_inode != inode)) {
-			/* This can't happen because bad inodes are unhashed. */
-			BUG_ON(!is_bad_inode(inode));
-			BUG_ON(!is_bad_inode(found->d_inode));
+		} else {
+			found = d_splice_alias(inode, new);
+			if (found) {
+				dput(new);
+				return found;
+			}
+			return new;
 		}
-		iput(inode);
-		return found;
 	}
-
-	/*
-	 * Negative dentry: instantiate it unless the inode is a directory and
-	 * already has a dentry.
-	 */
-	new = d_splice_alias(inode, found);
-	if (new) {
-		dput(found);
-		found = new;
-	}
-	return found;
-
-err_out:
 	iput(inode);
 	return found;
 }
@@ -2234,7 +2203,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 	struct dentry *child;
 
 	spin_lock(&dparent->d_lock);
-	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &dparent->d_subdirs, d_child) {
 		if (dentry == child) {
 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 			__dget_dlock(dentry);
@@ -2392,6 +2361,8 @@ static void swap_names(struct dentry *dentry, struct dentry *target)
 			 */
 			unsigned int i;
 			BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long)));
+			kmemcheck_mark_initialized(dentry->d_iname, DNAME_INLINE_LEN);
+			kmemcheck_mark_initialized(target->d_iname, DNAME_INLINE_LEN);
 			for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) {
 				swap(((long *) &dentry->d_iname)[i],
 				     ((long *) &target->d_iname)[i]);
@@ -2525,13 +2496,13 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
 		/* splicing a tree */
 		dentry->d_parent = target->d_parent;
 		target->d_parent = target;
-		list_del_init(&target->d_u.d_child);
-		list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+		list_del_init(&target->d_child);
+		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
 	} else {
 		/* swapping two dentries */
 		swap(dentry->d_parent, target->d_parent);
-		list_move(&target->d_u.d_child, &target->d_parent->d_subdirs);
-		list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+		list_move(&target->d_child, &target->d_parent->d_subdirs);
+		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
 		if (exchange)
 			fsnotify_d_move(target);
 		fsnotify_d_move(dentry);
@@ -2607,11 +2578,11 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
-static struct dentry *__d_unalias(struct inode *inode,
+static int __d_unalias(struct inode *inode,
 		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
-	struct dentry *ret = ERR_PTR(-EBUSY);
+	int ret = -EBUSY;
 
 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
@@ -2626,7 +2597,7 @@ static struct dentry *__d_unalias(struct inode *inode,
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
 	__d_move(alias, dentry, false);
-	ret = alias;
+	ret = 0;
 out_err:
 	spin_unlock(&inode->i_lock);
 	if (m2)
@@ -2661,128 +2632,57 @@ out_err:
  */
 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
-	struct dentry *new = NULL;
-
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 
-	if (inode && S_ISDIR(inode->i_mode)) {
-		spin_lock(&inode->i_lock);
-		new = __d_find_any_alias(inode);
-		if (new) {
-			if (!IS_ROOT(new)) {
-				spin_unlock(&inode->i_lock);
-				dput(new);
-				return ERR_PTR(-EIO);
-			}
-			if (d_ancestor(new, dentry)) {
-				spin_unlock(&inode->i_lock);
-				dput(new);
-				return ERR_PTR(-EIO);
-			}
-			write_seqlock(&rename_lock);
-			__d_move(new, dentry, false);
-			write_sequnlock(&rename_lock);
-			spin_unlock(&inode->i_lock);
-			security_d_instantiate(new, inode);
-			iput(inode);
-		} else {
-			/* already taking inode->i_lock, so d_add() by hand */
-			__d_instantiate(dentry, inode);
-			spin_unlock(&inode->i_lock);
-			security_d_instantiate(dentry, inode);
-			d_rehash(dentry);
-		}
-	} else {
-		d_instantiate(dentry, inode);
-		if (d_unhashed(dentry))
-			d_rehash(dentry);
-	}
-	return new;
-}
-EXPORT_SYMBOL(d_splice_alias);
-
-/**
- * d_materialise_unique - introduce an inode into the tree
- * @dentry: candidate dentry
- * @inode: inode to bind to the dentry, to which aliases may be attached
- *
- * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one. Caller must hold the
- * i_mutex of the parent directory.
- */
-struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
-{
-	struct dentry *actual;
-
 	BUG_ON(!d_unhashed(dentry));
 
 	if (!inode) {
-		actual = dentry;
 		__d_instantiate(dentry, NULL);
-		d_rehash(actual);
-		goto out_nolock;
+		goto out;
 	}
-
 	spin_lock(&inode->i_lock);
-
 	if (S_ISDIR(inode->i_mode)) {
-		struct dentry *alias;
-
-		/* Does an aliased dentry already exist? */
-		alias = __d_find_alias(inode);
-		if (alias) {
-			actual = alias;
+		struct dentry *new = __d_find_any_alias(inode);
+		if (unlikely(new)) {
 			write_seqlock(&rename_lock);
-
-			if (d_ancestor(alias, dentry)) {
-				/* Check for loops */
-				actual = ERR_PTR(-ELOOP);
+			if (unlikely(d_ancestor(new, dentry))) {
+				write_sequnlock(&rename_lock);
 				spin_unlock(&inode->i_lock);
-			} else if (IS_ROOT(alias)) {
-				/* Is this an anonymous mountpoint that we
-				 * could splice into our tree? */
-				__d_move(alias, dentry, false);
+				dput(new);
+				new = ERR_PTR(-ELOOP);
+				pr_warn_ratelimited(
+					"VFS: Lookup of '%s' in %s %s"
+					" would have caused loop\n",
+					dentry->d_name.name,
+					inode->i_sb->s_type->name,
+					inode->i_sb->s_id);
+			} else if (!IS_ROOT(new)) {
+				int err = __d_unalias(inode, dentry, new);
 				write_sequnlock(&rename_lock);
-				goto found;
+				if (err) {
+					dput(new);
+					new = ERR_PTR(err);
+				}
 			} else {
-				/* Nope, but we must(!) avoid directory
-				 * aliasing. This drops inode->i_lock */
-				actual = __d_unalias(inode, dentry, alias);
-			}
-			write_sequnlock(&rename_lock);
-			if (IS_ERR(actual)) {
-				if (PTR_ERR(actual) == -ELOOP)
-					pr_warn_ratelimited(
-						"VFS: Lookup of '%s' in %s %s"
-						" would have caused loop\n",
-						dentry->d_name.name,
-						inode->i_sb->s_type->name,
-						inode->i_sb->s_id);
-				dput(alias);
+				__d_move(new, dentry, false);
+				write_sequnlock(&rename_lock);
+				spin_unlock(&inode->i_lock);
+				security_d_instantiate(new, inode);
 			}
-			goto out_nolock;
+			iput(inode);
+			return new;
 		}
 	}
-
-	/* Add a unique reference */
-	actual = __d_instantiate_unique(dentry, inode);
-	if (!actual)
-		actual = dentry;
-
-	d_rehash(actual);
-found:
+	/* already taking inode->i_lock, so d_add() by hand */
+	__d_instantiate(dentry, inode);
 	spin_unlock(&inode->i_lock);
-out_nolock:
-	if (actual == dentry) {
-		security_d_instantiate(dentry, inode);
-		return NULL;
-	}
-
-	iput(inode);
-	return actual;
+out:
+	security_d_instantiate(dentry, inode);
+	d_rehash(dentry);
+	return NULL;
 }
-EXPORT_SYMBOL_GPL(d_materialise_unique);
+EXPORT_SYMBOL(d_splice_alias);
 
 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
 {
@@ -3318,7 +3218,7 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
 {
 	inode_dec_link_count(inode);
 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
-		!hlist_unhashed(&dentry->d_alias) ||
+		!hlist_unhashed(&dentry->d_u.d_alias) ||
 		!d_unlinked(dentry));
 	spin_lock(&dentry->d_parent->d_lock);
 	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 76c08c2..8e0f2f4 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -692,18 +692,19 @@ EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
  * because some peripherals have several blocks of identical registers,
  * for example configuration of dma channels
  */
-int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
-			   int nregs, void __iomem *base, char *prefix)
+void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+			  int nregs, void __iomem *base, char *prefix)
 {
-	int i, ret = 0;
+	int i;
 
 	for (i = 0; i < nregs; i++, regs++) {
 		if (prefix)
-			ret += seq_printf(s, "%s", prefix);
-		ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
-				  readl(base + regs->offset));
+			seq_printf(s, "%s", prefix);
+		seq_printf(s, "%s = 0x%08x\n", regs->name,
+			   readl(base + regs->offset));
+		if (seq_has_overflowed(s))
+			break;
 	}
-	return ret;
 }
 EXPORT_SYMBOL_GPL(debugfs_print_regs32);
 
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1e3b99d..05f2960 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -553,7 +553,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 	 * use the d_u.d_child as the rcu head and corrupt this list.
 	 */
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &parent->d_subdirs, d_child) {
 		if (!debugfs_positive(child))
 			continue;
 
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1323c56..eea6491 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -48,8 +48,8 @@ static char *print_lockmode(int mode)
 	}
 }
 
-static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
-			      struct dlm_rsb *res)
+static void print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			       struct dlm_rsb *res)
 {
 	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
 
@@ -68,21 +68,17 @@ static int print_format1_lock(struct seq_file *s, struct dlm_lkb *lkb,
 	if (lkb->lkb_wait_type)
 		seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
 
-	return seq_puts(s, "\n");
+	seq_puts(s, "\n");
 }
 
-static int print_format1(struct dlm_rsb *res, struct seq_file *s)
+static void print_format1(struct dlm_rsb *res, struct seq_file *s)
 {
 	struct dlm_lkb *lkb;
 	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
-	int rv;
 
 	lock_rsb(res);
 
-	rv = seq_printf(s, "\nResource %p Name (len=%d) \"",
-			res, res->res_length);
-	if (rv)
-		goto out;
+	seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
 
 	for (i = 0; i < res->res_length; i++) {
 		if (isprint(res->res_name[i]))
@@ -92,17 +88,16 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
 	}
 
 	if (res->res_nodeid > 0)
-		rv = seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
-				res->res_nodeid);
+		seq_printf(s, "\"\nLocal Copy, Master is node %d\n",
+			   res->res_nodeid);
 	else if (res->res_nodeid == 0)
-		rv = seq_puts(s, "\"\nMaster Copy\n");
+		seq_puts(s, "\"\nMaster Copy\n");
 	else if (res->res_nodeid == -1)
-		rv = seq_printf(s, "\"\nLooking up master (lkid %x)\n",
-			   	res->res_first_lkid);
+		seq_printf(s, "\"\nLooking up master (lkid %x)\n",
+			   res->res_first_lkid);
 	else
-		rv = seq_printf(s, "\"\nInvalid master %d\n",
-				res->res_nodeid);
-	if (rv)
+		seq_printf(s, "\"\nInvalid master %d\n", res->res_nodeid);
+	if (seq_has_overflowed(s))
 		goto out;
 
 	/* Print the LVB: */
@@ -116,8 +111,8 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
 		}
 		if (rsb_flag(res, RSB_VALNOTVALID))
 			seq_puts(s, " (INVALID)");
-		rv = seq_puts(s, "\n");
-		if (rv)
+		seq_puts(s, "\n");
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
@@ -125,32 +120,30 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
 	recover_list = !list_empty(&res->res_recover_list);
 
 	if (root_list || recover_list) {
-		rv = seq_printf(s, "Recovery: root %d recover %d flags %lx "
-				"count %d\n", root_list, recover_list,
-			   	res->res_flags, res->res_recover_locks_count);
-		if (rv)
-			goto out;
+		seq_printf(s, "Recovery: root %d recover %d flags %lx count %d\n",
+			   root_list, recover_list,
+			   res->res_flags, res->res_recover_locks_count);
 	}
 
 	/* Print the locks attached to this resource */
 	seq_puts(s, "Granted Queue\n");
 	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue) {
-		rv = print_format1_lock(s, lkb, res);
-		if (rv)
+		print_format1_lock(s, lkb, res);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	seq_puts(s, "Conversion Queue\n");
 	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue) {
-		rv = print_format1_lock(s, lkb, res);
-		if (rv)
+		print_format1_lock(s, lkb, res);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	seq_puts(s, "Waiting Queue\n");
 	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue) {
-		rv = print_format1_lock(s, lkb, res);
-		if (rv)
+		print_format1_lock(s, lkb, res);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
@@ -159,23 +152,23 @@ static int print_format1(struct dlm_rsb *res, struct seq_file *s)
 
 	seq_puts(s, "Lookup Queue\n");
 	list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
-		rv = seq_printf(s, "%08x %s", lkb->lkb_id,
-				print_lockmode(lkb->lkb_rqmode));
+		seq_printf(s, "%08x %s",
+			   lkb->lkb_id, print_lockmode(lkb->lkb_rqmode));
 		if (lkb->lkb_wait_type)
 			seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
-		rv = seq_puts(s, "\n");
+		seq_puts(s, "\n");
+		if (seq_has_overflowed(s))
+			goto out;
 	}
  out:
 	unlock_rsb(res);
-	return rv;
 }
 
-static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
-			      struct dlm_rsb *r)
+static void print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
+			       struct dlm_rsb *r)
 {
 	u64 xid = 0;
 	u64 us;
-	int rv;
 
 	if (lkb->lkb_flags & DLM_IFL_USER) {
 		if (lkb->lkb_ua)
@@ -188,103 +181,97 @@ static int print_format2_lock(struct seq_file *s, struct dlm_lkb *lkb,
 	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_us
 	   r_nodeid r_len r_name */
 
-	rv = seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
-			lkb->lkb_id,
-			lkb->lkb_nodeid,
-			lkb->lkb_remid,
-			lkb->lkb_ownpid,
-			(unsigned long long)xid,
-			lkb->lkb_exflags,
-			lkb->lkb_flags,
-			lkb->lkb_status,
-			lkb->lkb_grmode,
-			lkb->lkb_rqmode,
-			(unsigned long long)us,
-			r->res_nodeid,
-			r->res_length,
-			r->res_name);
-	return rv;
+	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %llu %u %d \"%s\"\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   (unsigned long long)us,
+		   r->res_nodeid,
+		   r->res_length,
+		   r->res_name);
 }
 
-static int print_format2(struct dlm_rsb *r, struct seq_file *s)
+static void print_format2(struct dlm_rsb *r, struct seq_file *s)
 {
 	struct dlm_lkb *lkb;
-	int rv = 0;
 
 	lock_rsb(r);
 
 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
-		rv = print_format2_lock(s, lkb, r);
-		if (rv)
+		print_format2_lock(s, lkb, r);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
-		rv = print_format2_lock(s, lkb, r);
-		if (rv)
+		print_format2_lock(s, lkb, r);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
-		rv = print_format2_lock(s, lkb, r);
-		if (rv)
+		print_format2_lock(s, lkb, r);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
  out:
 	unlock_rsb(r);
-	return rv;
 }
 
-static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
+static void print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
 			      int rsb_lookup)
 {
 	u64 xid = 0;
-	int rv;
 
 	if (lkb->lkb_flags & DLM_IFL_USER) {
 		if (lkb->lkb_ua)
 			xid = lkb->lkb_ua->xid;
 	}
 
-	rv = seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
-			lkb->lkb_id,
-			lkb->lkb_nodeid,
-			lkb->lkb_remid,
-			lkb->lkb_ownpid,
-			(unsigned long long)xid,
-			lkb->lkb_exflags,
-			lkb->lkb_flags,
-			lkb->lkb_status,
-			lkb->lkb_grmode,
-			lkb->lkb_rqmode,
-			lkb->lkb_last_bast.mode,
-			rsb_lookup,
-			lkb->lkb_wait_type,
-			lkb->lkb_lvbseq,
-			(unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
-			(unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
-	return rv;
+	seq_printf(s, "lkb %x %d %x %u %llu %x %x %d %d %d %d %d %d %u %llu %llu\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   lkb->lkb_last_bast.mode,
+		   rsb_lookup,
+		   lkb->lkb_wait_type,
+		   lkb->lkb_lvbseq,
+		   (unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
+		   (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
 }
 
-static int print_format3(struct dlm_rsb *r, struct seq_file *s)
+static void print_format3(struct dlm_rsb *r, struct seq_file *s)
 {
 	struct dlm_lkb *lkb;
 	int i, lvblen = r->res_ls->ls_lvblen;
 	int print_name = 1;
-	int rv;
 
 	lock_rsb(r);
 
-	rv = seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
-			r,
-			r->res_nodeid,
-			r->res_first_lkid,
-			r->res_flags,
-			!list_empty(&r->res_root_list),
-			!list_empty(&r->res_recover_list),
-			r->res_recover_locks_count,
-			r->res_length);
-	if (rv)
+	seq_printf(s, "rsb %p %d %x %lx %d %d %u %d ",
+		   r,
+		   r->res_nodeid,
+		   r->res_first_lkid,
+		   r->res_flags,
+		   !list_empty(&r->res_root_list),
+		   !list_empty(&r->res_recover_list),
+		   r->res_recover_locks_count,
+		   r->res_length);
+	if (seq_has_overflowed(s))
 		goto out;
 
 	for (i = 0; i < r->res_length; i++) {
@@ -292,7 +279,7 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
 			print_name = 0;
 	}
 
-	seq_printf(s, "%s", print_name ? "str " : "hex");
+	seq_puts(s, print_name ? "str " : "hex");
 
 	for (i = 0; i < r->res_length; i++) {
 		if (print_name)
@@ -300,8 +287,8 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
 		else
 			seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
 	}
-	rv = seq_puts(s, "\n");
-	if (rv)
+	seq_puts(s, "\n");
+	if (seq_has_overflowed(s))
 		goto out;
 
 	if (!r->res_lvbptr)
@@ -311,65 +298,62 @@ static int print_format3(struct dlm_rsb *r, struct seq_file *s)
 
 	for (i = 0; i < lvblen; i++)
 		seq_printf(s, " %02x", (unsigned char)r->res_lvbptr[i]);
-	rv = seq_puts(s, "\n");
-	if (rv)
+	seq_puts(s, "\n");
+	if (seq_has_overflowed(s))
 		goto out;
 
  do_locks:
 	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
-		rv = print_format3_lock(s, lkb, 0);
-		if (rv)
+		print_format3_lock(s, lkb, 0);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
-		rv = print_format3_lock(s, lkb, 0);
-		if (rv)
+		print_format3_lock(s, lkb, 0);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
-		rv = print_format3_lock(s, lkb, 0);
-		if (rv)
+		print_format3_lock(s, lkb, 0);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
 
 	list_for_each_entry(lkb, &r->res_lookup, lkb_rsb_lookup) {
-		rv = print_format3_lock(s, lkb, 1);
-		if (rv)
+		print_format3_lock(s, lkb, 1);
+		if (seq_has_overflowed(s))
 			goto out;
 	}
  out:
 	unlock_rsb(r);
-	return rv;
 }
 
-static int print_format4(struct dlm_rsb *r, struct seq_file *s)
+static void print_format4(struct dlm_rsb *r, struct seq_file *s)
 {
 	int our_nodeid = dlm_our_nodeid();
 	int print_name = 1;
-	int i, rv;
+	int i;
 
 	lock_rsb(r);
 
-	rv = seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
-			r,
-			r->res_nodeid,
-			r->res_master_nodeid,
-			r->res_dir_nodeid,
-			our_nodeid,
-			r->res_toss_time,
-			r->res_flags,
-			r->res_length);
-	if (rv)
-		goto out;
+	seq_printf(s, "rsb %p %d %d %d %d %lu %lx %d ",
+		   r,
+		   r->res_nodeid,
+		   r->res_master_nodeid,
+		   r->res_dir_nodeid,
+		   our_nodeid,
+		   r->res_toss_time,
+		   r->res_flags,
+		   r->res_length);
 
 	for (i = 0; i < r->res_length; i++) {
 		if (!isascii(r->res_name[i]) || !isprint(r->res_name[i]))
 			print_name = 0;
 	}
 
-	seq_printf(s, "%s", print_name ? "str " : "hex");
+	seq_puts(s, print_name ? "str " : "hex");
 
 	for (i = 0; i < r->res_length; i++) {
 		if (print_name)
@@ -377,10 +361,9 @@ static int print_format4(struct dlm_rsb *r, struct seq_file *s)
 		else
 			seq_printf(s, " %02x", (unsigned char)r->res_name[i]);
 	}
-	rv = seq_puts(s, "\n");
- out:
+	seq_puts(s, "\n");
+
 	unlock_rsb(r);
-	return rv;
 }
 
 struct rsbtbl_iter {
@@ -390,47 +373,45 @@ struct rsbtbl_iter {
 	int header;
 };
 
-/* seq_printf returns -1 if the buffer is full, and 0 otherwise.
-   If the buffer is full, seq_printf can be called again, but it
-   does nothing and just returns -1.  So, the these printing routines
-   periodically check the return value to avoid wasting too much time
-   trying to print to a full buffer. */
+/*
+ * If the buffer is full, seq_printf can be called again, but it
+ * does nothing.  So, the these printing routines periodically check
+ * seq_has_overflowed to avoid wasting too much time trying to print to
+ * a full buffer.
+ */
 
 static int table_seq_show(struct seq_file *seq, void *iter_ptr)
 {
 	struct rsbtbl_iter *ri = iter_ptr;
-	int rv = 0;
 
 	switch (ri->format) {
 	case 1:
-		rv = print_format1(ri->rsb, seq);
+		print_format1(ri->rsb, seq);
 		break;
 	case 2:
 		if (ri->header) {
-			seq_printf(seq, "id nodeid remid pid xid exflags "
-					"flags sts grmode rqmode time_ms "
-					"r_nodeid r_len r_name\n");
+			seq_puts(seq, "id nodeid remid pid xid exflags flags sts grmode rqmode time_ms r_nodeid r_len r_name\n");
 			ri->header = 0;
 		}
-		rv = print_format2(ri->rsb, seq);
+		print_format2(ri->rsb, seq);
 		break;
 	case 3:
 		if (ri->header) {
-			seq_printf(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
+			seq_puts(seq, "version rsb 1.1 lvb 1.1 lkb 1.1\n");
 			ri->header = 0;
 		}
-		rv = print_format3(ri->rsb, seq);
+		print_format3(ri->rsb, seq);
 		break;
 	case 4:
 		if (ri->header) {
-			seq_printf(seq, "version 4 rsb 2\n");
+			seq_puts(seq, "version 4 rsb 2\n");
 			ri->header = 0;
 		}
-		rv = print_format4(ri->rsb, seq);
+		print_format4(ri->rsb, seq);
 		break;
 	}
 
-	return rv;
+	return 0;
 }
 
 static const struct seq_operations format1_seq_ops;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 83f3d55..35502d4 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -5886,6 +5886,78 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	return error;
 }
 
+/*
+ * The caller asks for an orphan lock on a given resource with a given mode.
+ * If a matching lock exists, it's moved to the owner's list of locks and
+ * the lkid is returned.
+ */
+
+int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+		     int mode, uint32_t flags, void *name, unsigned int namelen,
+		     unsigned long timeout_cs, uint32_t *lkid)
+{
+	struct dlm_lkb *lkb;
+	struct dlm_user_args *ua;
+	int found_other_mode = 0;
+	int found = 0;
+	int rv = 0;
+
+	mutex_lock(&ls->ls_orphans_mutex);
+	list_for_each_entry(lkb, &ls->ls_orphans, lkb_ownqueue) {
+		if (lkb->lkb_resource->res_length != namelen)
+			continue;
+		if (memcmp(lkb->lkb_resource->res_name, name, namelen))
+			continue;
+		if (lkb->lkb_grmode != mode) {
+			found_other_mode = 1;
+			continue;
+		}
+
+		found = 1;
+		list_del_init(&lkb->lkb_ownqueue);
+		lkb->lkb_flags &= ~DLM_IFL_ORPHAN;
+		*lkid = lkb->lkb_id;
+		break;
+	}
+	mutex_unlock(&ls->ls_orphans_mutex);
+
+	if (!found && found_other_mode) {
+		rv = -EAGAIN;
+		goto out;
+	}
+
+	if (!found) {
+		rv = -ENOENT;
+		goto out;
+	}
+
+	lkb->lkb_exflags = flags;
+	lkb->lkb_ownpid = (int) current->pid;
+
+	ua = lkb->lkb_ua;
+
+	ua->proc = ua_tmp->proc;
+	ua->xid = ua_tmp->xid;
+	ua->castparam = ua_tmp->castparam;
+	ua->castaddr = ua_tmp->castaddr;
+	ua->bastparam = ua_tmp->bastparam;
+	ua->bastaddr = ua_tmp->bastaddr;
+	ua->user_lksb = ua_tmp->user_lksb;
+
+	/*
+	 * The lkb reference from the ls_orphans list was not
+	 * removed above, and is now considered the reference
+	 * for the proc locks list.
+	 */
+
+	spin_lock(&ua->proc->locks_spin);
+	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
+	spin_unlock(&ua->proc->locks_spin);
+ out:
+	kfree(ua_tmp);
+	return rv;
+}
+
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 		    uint32_t flags, uint32_t lkid, char *lvb_in)
 {
@@ -6029,7 +6101,7 @@ static int orphan_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
 	struct dlm_args args;
 	int error;
 
-	hold_lkb(lkb);
+	hold_lkb(lkb); /* reference for the ls_orphans list */
 	mutex_lock(&ls->ls_orphans_mutex);
 	list_add_tail(&lkb->lkb_ownqueue, &ls->ls_orphans);
 	mutex_unlock(&ls->ls_orphans_mutex);
@@ -6217,7 +6289,7 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 {
 	int error = 0;
 
-	if (nodeid != dlm_our_nodeid()) {
+	if (nodeid && (nodeid != dlm_our_nodeid())) {
 		error = send_purge(ls, nodeid, pid);
 	} else {
 		dlm_lock_recovery(ls);
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 5e0c72e..ed8ebd3 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -49,6 +49,9 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
 	unsigned long timeout_cs);
+int dlm_user_adopt_orphan(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+	int mode, uint32_t flags, void *name, unsigned int namelen,
+	unsigned long timeout_cs, uint32_t *lkid);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 142e2165..fb85f32 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -238,6 +238,7 @@ static int device_user_lock(struct dlm_user_proc *proc,
 {
 	struct dlm_ls *ls;
 	struct dlm_user_args *ua;
+	uint32_t lkid;
 	int error = -ENOMEM;
 
 	ls = dlm_find_lockspace_local(proc->lockspace);
@@ -260,12 +261,20 @@ static int device_user_lock(struct dlm_user_proc *proc,
 	ua->bastaddr = params->bastaddr;
 	ua->xid = params->xid;
 
-	if (params->flags & DLM_LKF_CONVERT)
+	if (params->flags & DLM_LKF_CONVERT) {
 		error = dlm_user_convert(ls, ua,
 				         params->mode, params->flags,
 				         params->lkid, params->lvb,
 					 (unsigned long) params->timeout);
-	else {
+	} else if (params->flags & DLM_LKF_ORPHAN) {
+		error = dlm_user_adopt_orphan(ls, ua,
+					 params->mode, params->flags,
+					 params->name, params->namelen,
+					 (unsigned long) params->timeout,
+					 &lkid);
+		if (!error)
+			error = lkid;
+	} else {
 		error = dlm_user_request(ls, ua,
 					 params->mode, params->flags,
 					 params->name, params->namelen,
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2f6735d..c2d6604 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1373,7 +1373,7 @@ out:
 int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode)
 {
 	struct dentry *lower_dentry =
-		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
 	ssize_t size;
 	int rc = 0;
 
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index f5bce90..80154ec 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -75,11 +75,11 @@ struct ecryptfs_getdents_callback {
 
 /* Inspired by generic filldir in fs/readdir.c */
 static int
-ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
-		 loff_t offset, u64 ino, unsigned int d_type)
+ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
+		 int lower_namelen, loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct ecryptfs_getdents_callback *buf =
-	    (struct ecryptfs_getdents_callback *)dirent;
+		container_of(ctx, struct ecryptfs_getdents_callback, ctx);
 	size_t name_size;
 	char *name;
 	int rc;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3..c4cd1fd 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
 	s->s_magic = ECRYPTFS_SUPER_MAGIC;
+	s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
+
+	rc = -EINVAL;
+	if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
+		goto out_free;
+	}
 
 	inode = ecryptfs_get_inode(path.dentry->d_inode, s);
 	rc = PTR_ERR(inode);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 564a1fa..4626976 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -419,7 +419,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 	ssize_t size;
 	void *xattr_virt;
 	struct dentry *lower_dentry =
-		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
 	struct inode *lower_inode = lower_dentry->d_inode;
 	int rc;
 
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index cdb2971..90001da 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -47,8 +47,8 @@ static ssize_t efivarfs_file_write(struct file *file,
 
 	if (bytes == -ENOENT) {
 		drop_nlink(inode);
-		d_delete(file->f_dentry);
-		dput(file->f_dentry);
+		d_delete(file->f_path.dentry);
+		dput(file->f_path.dentry);
 	} else {
 		mutex_lock(&inode->i_mutex);
 		i_size_write(inode, datasize + sizeof(attributes));
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 0a48886..6dad117 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -236,6 +236,7 @@ static void efivarfs_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type efivarfs_type = {
+	.owner   = THIS_MODULE,
 	.name    = "efivarfs",
 	.mount   = efivarfs_mount,
 	.kill_sb = efivarfs_kill_sb,
@@ -244,17 +245,23 @@ static struct file_system_type efivarfs_type = {
 static __init int efivarfs_init(void)
 {
 	if (!efi_enabled(EFI_RUNTIME_SERVICES))
-		return 0;
+		return -ENODEV;
 
 	if (!efivars_kobject())
-		return 0;
+		return -ENODEV;
 
 	return register_filesystem(&efivarfs_type);
 }
 
+static __exit void efivarfs_exit(void)
+{
+	unregister_filesystem(&efivarfs_type);
+}
+
 MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr");
 MODULE_DESCRIPTION("EFI Variable Filesystem");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_FS("efivarfs");
 
 module_init(efivarfs_init);
+module_exit(efivarfs_exit);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d6a88e7..4b0a226 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -287,17 +287,14 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
 }
 
 #ifdef CONFIG_PROC_FS
-static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct eventfd_ctx *ctx = f->private_data;
-	int ret;
 
 	spin_lock_irq(&ctx->wqh.lock);
-	ret = seq_printf(m, "eventfd-count: %16llx\n",
-			 (unsigned long long)ctx->count);
+	seq_printf(m, "eventfd-count: %16llx\n",
+		   (unsigned long long)ctx->count);
 	spin_unlock_irq(&ctx->wqh.lock);
-
-	return ret;
 }
 #endif
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7bcfff9..d77f944 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -870,25 +870,22 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 }
 
 #ifdef CONFIG_PROC_FS
-static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct eventpoll *ep = f->private_data;
 	struct rb_node *rbp;
-	int ret = 0;
 
 	mutex_lock(&ep->mtx);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
 
-		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
-				 epi->ffd.fd, epi->event.events,
-				 (long long)epi->event.data);
-		if (ret)
+		seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+			   epi->ffd.fd, epi->event.events,
+			   (long long)epi->event.data);
+		if (seq_has_overflowed(m))
 			break;
 	}
 	mutex_unlock(&ep->mtx);
-
-	return ret;
 }
 #endif
 
diff --git a/fs/exec.c b/fs/exec.c
index 7302b75..01aebe3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -277,6 +277,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 		goto err;
 
 	mm->stack_vm = mm->total_vm = 1;
+	arch_bprm_mm_init(mm, vma);
 	up_write(&mm->mmap_sem);
 	bprm->p = vma->vm_end - sizeof(void *);
 	return 0;
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 389ba83..b47c7b8 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 3bbd469..7d88ef5 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 49f51ab..d7defd5 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index fffe86f..ad9cac6 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 71bf8e4..1a376b4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3f9cafd..f1d3d4e 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4731fd9..2890746 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index cfc0205..7bd8ac8 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
@@ -29,7 +29,7 @@
 
 #include "ore_raid.h"
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
 MODULE_LICENSE("GPL");
 
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 84529b8..27cbdb6 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index cf6375d..a6e7467 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) from 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index ed73ed8..9596550 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 4dd687c..832e262 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 1b4f2f9..5e6a2c0 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2012
  * Sachin Bhamare <sbhamare@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b01fbfb..fdfd206 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result,
 
 	inode = result->d_inode;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		dget(dentry);
 		spin_unlock(&inode->i_lock);
 		if (toput)
@@ -241,10 +241,11 @@ struct getdents_callback {
  * A rather strange filldir function to capture
  * the name matching the specified inode number.
  */
-static int filldir_one(void * __buf, const char * name, int len,
+static int filldir_one(struct dir_context *ctx, const char *name, int len,
 			loff_t pos, u64 ino, unsigned int d_type)
 {
-	struct getdents_callback *buf = __buf;
+	struct getdents_callback *buf =
+		container_of(ctx, struct getdents_callback, ctx);
 	int result = 0;
 
 	buf->sequence++;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d9a17d0..e4279ea 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -689,6 +689,9 @@ struct ext2_inode_info {
 	struct mutex truncate_mutex;
 	struct inode	vfs_inode;
 	struct list_head i_orphan;	/* unlinked but open inodes */
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+#endif
 };
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 170dc41..ae55fdd 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -166,6 +166,10 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
 		return NULL;
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
+#ifdef CONFIG_QUOTA
+	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
+#endif
+
 	return &ei->vfs_inode;
 }
 
@@ -303,6 +307,10 @@ static int ext2_show_options(struct seq_file *seq, struct dentry *root)
 #ifdef CONFIG_QUOTA
 static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off);
 static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
+static struct dquot **ext2_get_dquots(struct inode *inode)
+{
+	return EXT2_I(inode)->i_dquot;
+}
 #endif
 
 static const struct super_operations ext2_sops = {
@@ -320,6 +328,7 @@ static const struct super_operations ext2_sops = {
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext2_quota_read,
 	.quota_write	= ext2_quota_write,
+	.get_dquots	= ext2_get_dquots,
 #endif
 };
 
@@ -1090,6 +1099,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &dquot_operations;
 	sb->s_qcop = &dquot_quotactl_ops;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 
 	root = ext2_iget(sb, EXT2_ROOT_INO);
diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h
index fc3cdcf..f483a80 100644
--- a/fs/ext3/ext3.h
+++ b/fs/ext3/ext3.h
@@ -615,6 +615,10 @@ struct ext3_inode_info {
 	atomic_t i_sync_tid;
 	atomic_t i_datasync_tid;
 
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+#endif
+
 	struct inode vfs_inode;
 };
 
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7015db0..9b4e7d7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -485,6 +485,10 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
 	ei->vfs_inode.i_version = 1;
 	atomic_set(&ei->i_datasync_tid, 0);
 	atomic_set(&ei->i_sync_tid, 0);
+#ifdef CONFIG_QUOTA
+	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
+#endif
+
 	return &ei->vfs_inode;
 }
 
@@ -764,6 +768,10 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off);
 static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
+static struct dquot **ext3_get_dquots(struct inode *inode)
+{
+	return EXT3_I(inode)->i_dquot;
+}
 
 static const struct dquot_operations ext3_quota_operations = {
 	.write_dquot	= ext3_write_dquot,
@@ -803,6 +811,7 @@ static const struct super_operations ext3_sops = {
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext3_quota_read,
 	.quota_write	= ext3_quota_write,
+	.get_dquots	= ext3_get_dquots,
 #endif
 	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
@@ -1354,13 +1363,6 @@ set_qf_format:
 					"not specified.");
 			return 0;
 		}
-	} else {
-		if (sbi->s_jquota_fmt) {
-			ext3_msg(sb, KERN_ERR, "error: journaled quota format "
-					"specified with no journaling "
-					"enabled.");
-			return 0;
-		}
 	}
 #endif
 	return 1;
@@ -2008,6 +2010,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_QUOTA
 	sb->s_qcop = &ext3_qctl_operations;
 	sb->dq_op = &ext3_quota_operations;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 581ef40..83a6f49 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
 }
 
 /* Initializes an uninitialized block bitmap */
-static void ext4_init_block_bitmap(struct super_block *sb,
+static int ext4_init_block_bitmap(struct super_block *sb,
 				   struct buffer_head *bh,
 				   ext4_group_t block_group,
 				   struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
 	/* If checksum is bad mark all blocks used to prevent allocation
 	 * essentially implementing a per-group read-only flag. */
 	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-		ext4_error(sb, "Checksum bad for group %u", block_group);
 		grp = ext4_get_group_info(sb, block_group);
 		if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
 			percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
 					   count);
 		}
 		set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
-		return;
+		return -EIO;
 	}
 	memset(bh->b_data, 0, sb->s_blocksize);
 
@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
 			     sb->s_blocksize * 8, bh->b_data);
 	ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
 	ext4_group_desc_csum_set(sb, block_group, gdp);
+	return 0;
 }
 
 /* Return the number of free blocks in a block group.  It is used when
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
 	}
 	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-		ext4_init_block_bitmap(sb, bh, block_group, desc);
+		int err;
+
+		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
 		set_bitmap_uptodate(bh);
 		set_buffer_uptodate(bh);
 		ext4_unlock_group(sb, block_group);
 		unlock_buffer(bh);
+		if (err)
+			ext4_error(sb, "Checksum bad for grp %u", block_group);
 		return bh;
 	}
 	ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
 	 * Account for the allocated meta blocks.  We will never
 	 * fail EDQUOT for metdata, but we do account for it.
 	 */
-	if (!(*errp) &&
-	    ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
+	if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
 		spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 		spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 		dquot_alloc_block_nofail(inode,
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 3285aa5..b610779 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
 	__u32 provided, calculated;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return 1;
 
 	provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
 	__u32 csum;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return;
 
 	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return 1;
 
 	provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
 	__u32 csum;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return;
 
 	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0bb3f9e..c24143e 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 					&file->f_ra, file,
 					index, 1);
 			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
-			bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);
+			bh = ext4_bread(NULL, inode, map.m_lblk, 0);
+			if (IS_ERR(bh))
+				return PTR_ERR(bh);
 		}
 
-		/*
-		 * We ignore I/O errors on directories so users have a chance
-		 * of recovering data when there's a bad sector
-		 */
 		if (!bh) {
 			if (!dir_has_error) {
 				EXT4_ERROR_FILE(file, 0,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0c225c..db3f772 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -572,15 +572,15 @@ enum {
 
 /*
  * The bit position of these flags must not overlap with any of the
- * EXT4_GET_BLOCKS_*.  They are used by ext4_ext_find_extent(),
+ * EXT4_GET_BLOCKS_*.  They are used by ext4_find_extent(),
  * read_extent_tree_block(), ext4_split_extent_at(),
  * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
  * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
  * caching the extents when reading from the extent tree while a
  * truncate or punch hole operation is in progress.
  */
-#define EXT4_EX_NOCACHE				0x0400
-#define EXT4_EX_FORCE_CACHE			0x0800
+#define EXT4_EX_NOCACHE				0x40000000
+#define EXT4_EX_FORCE_CACHE			0x20000000
 
 /*
  * Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@ struct ext4_inode_info {
 	struct ext4_es_tree i_es_tree;
 	rwlock_t i_es_lock;
 	struct list_head i_es_lru;
+	unsigned int i_es_all_nr;	/* protected by i_es_lock */
 	unsigned int i_es_lru_nr;	/* protected by i_es_lock */
 	unsigned long i_touch_when;	/* jiffies of last accessing */
 
@@ -940,6 +941,10 @@ struct ext4_inode_info {
 	tid_t i_sync_tid;
 	tid_t i_datasync_tid;
 
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+#endif
+
 	/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
 	__u32 i_csum_seed;
 };
@@ -1174,6 +1179,9 @@ struct ext4_super_block {
 #define EXT4_MF_MNTDIR_SAMPLED	0x0001
 #define EXT4_MF_FS_ABORTED	0x0002	/* Fatal error detected */
 
+/* Number of quota types we support */
+#define EXT4_MAXQUOTAS 2
+
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1237,7 +1245,7 @@ struct ext4_sb_info {
 	u32 s_min_batch_time;
 	struct block_device *journal_bdev;
 #ifdef CONFIG_QUOTA
-	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+	char *s_qf_names[EXT4_MAXQUOTAS];	/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
 #endif
 	unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1338,7 @@ struct ext4_sb_info {
 	/* Reclaim extents from extent status tree */
 	struct shrinker s_es_shrinker;
 	struct list_head s_es_lru;
-	unsigned long s_es_last_sorted;
-	struct percpu_counter s_extent_cache_cnt;
+	struct ext4_es_stats s_es_stats;
 	struct mb_cache *s_mb_cache;
 	spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
 
@@ -1399,7 +1406,6 @@ enum {
 	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
 	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
 	EXT4_STATE_NEWENTRY,		/* File just added to dir */
-	EXT4_STATE_DELALLOC_RESERVED,	/* blks already reserved for delalloc */
 	EXT4_STATE_DIOREAD_LOCK,	/* Disable support for dio read
 					   nolocking */
 	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
@@ -2086,10 +2092,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 
 /* inode.c */
-struct buffer_head *ext4_getblk(handle_t *, struct inode *,
-						ext4_lblk_t, int, int *);
-struct buffer_head *ext4_bread(handle_t *, struct inode *,
-						ext4_lblk_t, int, int *);
+struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
+struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
 int ext4_get_block_write(struct inode *inode, sector_t iblock,
 			 struct buffer_head *bh_result, int create);
 int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2113,7 @@ int do_journal_get_write_access(handle_t *handle,
 #define CONVERT_INLINE_DATA	 2
 
 extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
 extern int  ext4_setattr(struct dentry *, struct iattr *);
 extern int  ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2337,18 @@ extern int ext4_register_li_request(struct super_block *sb,
 static inline int ext4_has_group_desc_csum(struct super_block *sb)
 {
 	return EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
-					  EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+					  EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+	       (EXT4_SB(sb)->s_chksum_driver != NULL);
 }
 
+static inline int ext4_has_metadata_csum(struct super_block *sb)
+{
+	WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
+			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+		     !EXT4_SB(sb)->s_chksum_driver);
+
+	return (EXT4_SB(sb)->s_chksum_driver != NULL);
+}
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
 	return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,21 +2744,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
 				      struct ext4_extent *ex1,
 				      struct ext4_extent *ex2);
 extern int ext4_ext_insert_extent(handle_t *, struct inode *,
-				  struct ext4_ext_path *,
+				  struct ext4_ext_path **,
 				  struct ext4_extent *, int);
-extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
-						  struct ext4_ext_path *,
-						  int flags);
+extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
+					      struct ext4_ext_path **,
+					      int flags);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
 extern int ext4_find_delalloc_range(struct inode *inode,
 				    ext4_lblk_t lblk_start,
 				    ext4_lblk_t lblk_end);
 extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
+extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
 extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			__u64 start, __u64 len);
 extern int ext4_ext_precache(struct inode *inode);
 extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
+				struct inode *inode2, ext4_lblk_t lblk1,
+			     ext4_lblk_t lblk2,  ext4_lblk_t count,
+			     int mark_unwritten,int *err);
 
 /* move_extent.c */
 extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2773,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
 extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 start_orig, __u64 start_donor,
 			     __u64 len, __u64 *moved_len);
-extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
-			    struct ext4_extent **extent);
 
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a867f5c..3c93815 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh)
 struct ext4_ext_path {
 	ext4_fsblk_t			p_block;
 	__u16				p_depth;
+	__u16				p_maxdepth;
 	struct ext4_extent		*p_ext;
 	struct ext4_extent_idx		*p_idx;
 	struct ext4_extent_header	*p_hdr;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0074e0d..3445035 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
 	set_buffer_prio(bh);
 	if (ext4_handle_valid(handle)) {
 		err = jbd2_journal_dirty_metadata(handle, bh);
-		/* Errors can only happen if there is a bug */
-		if (WARN_ON_ONCE(err)) {
+		/* Errors can only happen due to aborted journal or a nasty bug */
+		if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
 			ext4_journal_abort_handle(where, line, __func__, bh,
 						  handle, err);
 			if (inode == NULL) {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 17c00ff..9c5b49f 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -102,9 +102,9 @@
 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
 #define EXT4_QUOTA_DEL_BLOCKS(sb) 0
 #endif
-#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
-#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
+#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
 
 static inline int ext4_jbd2_credits_xattr(struct inode *inode)
 {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74292a7..0b16fb4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
 {
 	struct ext4_extent_tail *et;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return 1;
 
 	et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
 {
 	struct ext4_extent_tail *et;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return;
 
 	et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,
 
 static int ext4_split_extent(handle_t *handle,
 				struct inode *inode,
-				struct ext4_ext_path *path,
+				struct ext4_ext_path **ppath,
 				struct ext4_map_blocks *map,
 				int split_flag,
 				int flags);
 
 static int ext4_split_extent_at(handle_t *handle,
 			     struct inode *inode,
-			     struct ext4_ext_path *path,
+			     struct ext4_ext_path **ppath,
 			     ext4_lblk_t split,
 			     int split_flag,
 			     int flags);
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
 	return size;
 }
 
+static inline int
+ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
+			   struct ext4_ext_path **ppath, ext4_lblk_t lblk,
+			   int nofail)
+{
+	struct ext4_ext_path *path = *ppath;
+	int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
+
+	return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
+			EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
+			EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
+			(nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
+}
+
 /*
  * Calculate the number of metadata blocks needed
  * to allocate @blocks
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
 
 void ext4_ext_drop_refs(struct ext4_ext_path *path)
 {
-	int depth = path->p_depth;
-	int i;
+	int depth, i;
 
+	if (!path)
+		return;
+	depth = path->p_depth;
 	for (i = 0; i <= depth; i++, path++)
 		if (path->p_bh) {
 			brelse(path->p_bh);
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
 }
 
 struct ext4_ext_path *
-ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
-		     struct ext4_ext_path *path, int flags)
+ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+		 struct ext4_ext_path **orig_path, int flags)
 {
 	struct ext4_extent_header *eh;
 	struct buffer_head *bh;
-	short int depth, i, ppos = 0, alloc = 0;
+	struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
+	short int depth, i, ppos = 0;
 	int ret;
 
 	eh = ext_inode_hdr(inode);
 	depth = ext_depth(inode);
 
-	/* account possible depth increase */
+	if (path) {
+		ext4_ext_drop_refs(path);
+		if (depth > path[0].p_maxdepth) {
+			kfree(path);
+			*orig_path = path = NULL;
+		}
+	}
 	if (!path) {
+		/* account possible depth increase */
 		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
 				GFP_NOFS);
-		if (!path)
+		if (unlikely(!path))
 			return ERR_PTR(-ENOMEM);
-		alloc = 1;
+		path[0].p_maxdepth = depth + 1;
 	}
 	path[0].p_hdr = eh;
 	path[0].p_bh = NULL;
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
 		bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
 					    flags);
-		if (IS_ERR(bh)) {
+		if (unlikely(IS_ERR(bh))) {
 			ret = PTR_ERR(bh);
 			goto err;
 		}
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 
 err:
 	ext4_ext_drop_refs(path);
-	if (alloc)
-		kfree(path);
+	kfree(path);
+	if (orig_path)
+		*orig_path = NULL;
 	return ERR_PTR(ret);
 }
 
@@ -1238,16 +1261,24 @@ cleanup:
  *   just created block
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
-				 unsigned int flags,
-				 struct ext4_extent *newext)
+				 unsigned int flags)
 {
 	struct ext4_extent_header *neh;
 	struct buffer_head *bh;
-	ext4_fsblk_t newblock;
+	ext4_fsblk_t newblock, goal = 0;
+	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 	int err = 0;
 
-	newblock = ext4_ext_new_meta_block(handle, inode, NULL,
-		newext, &err, flags);
+	/* Try to prepend new index to old one */
+	if (ext_depth(inode))
+		goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
+	if (goal > le32_to_cpu(es->s_first_data_block)) {
+		flags |= EXT4_MB_HINT_TRY_GOAL;
+		goal--;
+	} else
+		goal = ext4_inode_to_goal_block(inode);
+	newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
+					NULL, &err);
 	if (newblock == 0)
 		return err;
 
@@ -1314,9 +1345,10 @@ out:
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 				    unsigned int mb_flags,
 				    unsigned int gb_flags,
-				    struct ext4_ext_path *path,
+				    struct ext4_ext_path **ppath,
 				    struct ext4_extent *newext)
 {
+	struct ext4_ext_path *path = *ppath;
 	struct ext4_ext_path *curp;
 	int depth, i, err = 0;
 
@@ -1340,23 +1372,21 @@ repeat:
 			goto out;
 
 		/* refill path */
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode,
+		path = ext4_find_extent(inode,
 				    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-				    path, gb_flags);
+				    ppath, gb_flags);
 		if (IS_ERR(path))
 			err = PTR_ERR(path);
 	} else {
 		/* tree is full, time to grow in depth */
-		err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext);
+		err = ext4_ext_grow_indepth(handle, inode, mb_flags);
 		if (err)
 			goto out;
 
 		/* refill path */
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode,
+		path = ext4_find_extent(inode,
 				   (ext4_lblk_t)le32_to_cpu(newext->ee_block),
-				    path, gb_flags);
+				    ppath, gb_flags);
 		if (IS_ERR(path)) {
 			err = PTR_ERR(path);
 			goto out;
@@ -1559,7 +1589,7 @@ found_extent:
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
  */
-static ext4_lblk_t
+ext4_lblk_t
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 {
 	int depth;
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
 		sizeof(struct ext4_extent_idx);
 	s += sizeof(struct ext4_extent_header);
 
+	path[1].p_maxdepth = path[0].p_maxdepth;
 	memcpy(path[0].p_hdr, path[1].p_hdr, s);
 	path[0].p_depth = 0;
 	path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@ out:
  * creating new leaf in the no-space case.
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
-				struct ext4_ext_path *path,
+				struct ext4_ext_path **ppath,
 				struct ext4_extent *newext, int gb_flags)
 {
+	struct ext4_ext_path *path = *ppath;
 	struct ext4_extent_header *eh;
 	struct ext4_extent *ex, *fex;
 	struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	ext4_lblk_t next;
 	int mb_flags = 0, unwritten;
 
+	if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+		mb_flags |= EXT4_MB_DELALLOC_RESERVED;
 	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
 		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
 		return -EIO;
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		/*
 		 * Try to see whether we should rather test the extent on
 		 * right from ex, or from the left of ex. This is because
-		 * ext4_ext_find_extent() can return either extent on the
+		 * ext4_find_extent() can return either extent on the
 		 * left, or on the right from the searched position. This
 		 * will make merging more effective.
 		 */
@@ -2008,7 +2042,7 @@ prepend:
 	if (next != EXT_MAX_BLOCKS) {
 		ext_debug("next leaf block - %u\n", next);
 		BUG_ON(npath != NULL);
-		npath = ext4_ext_find_extent(inode, next, NULL, 0);
+		npath = ext4_find_extent(inode, next, NULL, 0);
 		if (IS_ERR(npath))
 			return PTR_ERR(npath);
 		BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@ prepend:
 	 * We're gonna add a new leaf in the tree.
 	 */
 	if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
-		mb_flags = EXT4_MB_USE_RESERVED;
+		mb_flags |= EXT4_MB_USE_RESERVED;
 	err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
-				       path, newext);
+				       ppath, newext);
 	if (err)
 		goto cleanup;
 	depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@ merge:
 	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
 
 cleanup:
-	if (npath) {
-		ext4_ext_drop_refs(npath);
-		kfree(npath);
-	}
+	ext4_ext_drop_refs(npath);
+	kfree(npath);
 	return err;
 }
 
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 		/* find extent for this block */
 		down_read(&EXT4_I(inode)->i_data_sem);
 
-		if (path && ext_depth(inode) != depth) {
-			/* depth was changed. we have to realloc path */
-			kfree(path);
-			path = NULL;
-		}
-
-		path = ext4_ext_find_extent(inode, block, path, 0);
+		path = ext4_find_extent(inode, block, &path, 0);
 		if (IS_ERR(path)) {
 			up_read(&EXT4_I(inode)->i_data_sem);
 			err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 		}
 		ex = path[depth].p_ext;
 		next = ext4_ext_next_allocated_block(path);
-		ext4_ext_drop_refs(path);
 
 		flags = 0;
 		exists = 0;
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 		block = es.es_lblk + es.es_len;
 	}
 
-	if (path) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-	}
-
+	ext4_ext_drop_refs(path);
+	kfree(path);
 	return err;
 }
 
@@ -2826,7 +2848,7 @@ again:
 		ext4_lblk_t ee_block;
 
 		/* find extent for this block */
-		path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
+		path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
 		if (IS_ERR(path)) {
 			ext4_journal_stop(handle);
 			return PTR_ERR(path);
@@ -2854,24 +2876,14 @@ again:
 		 */
 		if (end >= ee_block &&
 		    end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
-			int split_flag = 0;
-
-			if (ext4_ext_is_unwritten(ex))
-				split_flag = EXT4_EXT_MARK_UNWRIT1 |
-					     EXT4_EXT_MARK_UNWRIT2;
-
 			/*
 			 * Split the extent in two so that 'end' is the last
 			 * block in the first new extent. Also we should not
 			 * fail removing space due to ENOSPC so try to use
 			 * reserved block if that happens.
 			 */
-			err = ext4_split_extent_at(handle, inode, path,
-					end + 1, split_flag,
-					EXT4_EX_NOCACHE |
-					EXT4_GET_BLOCKS_PRE_IO |
-					EXT4_GET_BLOCKS_METADATA_NOFAIL);
-
+			err = ext4_force_split_extent_at(handle, inode, &path,
+							 end + 1, 1);
 			if (err < 0)
 				goto out;
 		}
@@ -2893,7 +2905,7 @@ again:
 			ext4_journal_stop(handle);
 			return -ENOMEM;
 		}
-		path[0].p_depth = depth;
+		path[0].p_maxdepth = path[0].p_depth = depth;
 		path[0].p_hdr = ext_inode_hdr(inode);
 		i = 0;
 
@@ -3013,10 +3025,9 @@ again:
 out:
 	ext4_ext_drop_refs(path);
 	kfree(path);
-	if (err == -EAGAIN) {
-		path = NULL;
+	path = NULL;
+	if (err == -EAGAIN)
 		goto again;
-	}
 	ext4_journal_stop(handle);
 
 	return err;
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  */
 static int ext4_split_extent_at(handle_t *handle,
 			     struct inode *inode,
-			     struct ext4_ext_path *path,
+			     struct ext4_ext_path **ppath,
 			     ext4_lblk_t split,
 			     int split_flag,
 			     int flags)
 {
+	struct ext4_ext_path *path = *ppath;
 	ext4_fsblk_t newblock;
 	ext4_lblk_t ee_block;
 	struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
 	if (split_flag & EXT4_EXT_MARK_UNWRIT2)
 		ext4_ext_mark_unwritten(ex2);
 
-	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+	err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
 	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
 		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
 			if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@ fix_extent_len:
  */
 static int ext4_split_extent(handle_t *handle,
 			      struct inode *inode,
-			      struct ext4_ext_path *path,
+			      struct ext4_ext_path **ppath,
 			      struct ext4_map_blocks *map,
 			      int split_flag,
 			      int flags)
 {
+	struct ext4_ext_path *path = *ppath;
 	ext4_lblk_t ee_block;
 	struct ext4_extent *ex;
 	unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,
 				       EXT4_EXT_MARK_UNWRIT2;
 		if (split_flag & EXT4_EXT_DATA_VALID2)
 			split_flag1 |= EXT4_EXT_DATA_VALID1;
-		err = ext4_split_extent_at(handle, inode, path,
+		err = ext4_split_extent_at(handle, inode, ppath,
 				map->m_lblk + map->m_len, split_flag1, flags1);
 		if (err)
 			goto out;
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,
 	 * Update path is required because previous ext4_split_extent_at() may
 	 * result in split of original leaf or extent zeroout.
 	 */
-	ext4_ext_drop_refs(path);
-	path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
+	path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,
 			split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
 						     EXT4_EXT_MARK_UNWRIT2);
 		}
-		err = ext4_split_extent_at(handle, inode, path,
+		err = ext4_split_extent_at(handle, inode, ppath,
 				map->m_lblk, split_flag1, flags);
 		if (err)
 			goto out;
@@ -3364,9 +3376,10 @@ out:
 static int ext4_ext_convert_to_initialized(handle_t *handle,
 					   struct inode *inode,
 					   struct ext4_map_blocks *map,
-					   struct ext4_ext_path *path,
+					   struct ext4_ext_path **ppath,
 					   int flags)
 {
+	struct ext4_ext_path *path = *ppath;
 	struct ext4_sb_info *sbi;
 	struct ext4_extent_header *eh;
 	struct ext4_map_blocks split_map;
@@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		}
 	}
 
-	allocated = ext4_split_extent(handle, inode, path,
-				      &split_map, split_flag, flags);
-	if (allocated < 0)
-		err = allocated;
-
+	err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
+				flags);
+	if (err > 0)
+		err = 0;
 out:
 	/* If we have gotten a failure, don't zero out status tree */
 	if (!err)
@@ -3629,9 +3641,10 @@ out:
 static int ext4_split_convert_extents(handle_t *handle,
 					struct inode *inode,
 					struct ext4_map_blocks *map,
-					struct ext4_ext_path *path,
+					struct ext4_ext_path **ppath,
 					int flags)
 {
+	struct ext4_ext_path *path = *ppath;
 	ext4_lblk_t eof_block;
 	ext4_lblk_t ee_block;
 	struct ext4_extent *ex;
@@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle,
 		split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
 	}
 	flags |= EXT4_GET_BLOCKS_PRE_IO;
-	return ext4_split_extent(handle, inode, path, map, split_flag, flags);
+	return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
 }
 
-static int ext4_convert_initialized_extents(handle_t *handle,
-					    struct inode *inode,
-					    struct ext4_map_blocks *map,
-					    struct ext4_ext_path *path)
-{
-	struct ext4_extent *ex;
-	ext4_lblk_t ee_block;
-	unsigned int ee_len;
-	int depth;
-	int err = 0;
-
-	depth = ext_depth(inode);
-	ex = path[depth].p_ext;
-	ee_block = le32_to_cpu(ex->ee_block);
-	ee_len = ext4_ext_get_actual_len(ex);
-
-	ext_debug("%s: inode %lu, logical"
-		"block %llu, max_blocks %u\n", __func__, inode->i_ino,
-		  (unsigned long long)ee_block, ee_len);
-
-	if (ee_block != map->m_lblk || ee_len > map->m_len) {
-		err = ext4_split_convert_extents(handle, inode, map, path,
-				EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
-		if (err < 0)
-			goto out;
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
-		if (IS_ERR(path)) {
-			err = PTR_ERR(path);
-			goto out;
-		}
-		depth = ext_depth(inode);
-		ex = path[depth].p_ext;
-		if (!ex) {
-			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
-					 (unsigned long) map->m_lblk);
-			err = -EIO;
-			goto out;
-		}
-	}
-
-	err = ext4_ext_get_access(handle, inode, path + depth);
-	if (err)
-		goto out;
-	/* first mark the extent as unwritten */
-	ext4_ext_mark_unwritten(ex);
-
-	/* note: ext4_ext_correct_indexes() isn't needed here because
-	 * borders are not changed
-	 */
-	ext4_ext_try_to_merge(handle, inode, path, ex);
-
-	/* Mark modified extent as dirty */
-	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-out:
-	ext4_ext_show_leaf(inode, path);
-	return err;
-}
-
-
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 						struct inode *inode,
 						struct ext4_map_blocks *map,
-						struct ext4_ext_path *path)
+						struct ext4_ext_path **ppath)
 {
+	struct ext4_ext_path *path = *ppath;
 	struct ext4_extent *ex;
 	ext4_lblk_t ee_block;
 	unsigned int ee_len;
@@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 			     inode->i_ino, (unsigned long long)ee_block, ee_len,
 			     (unsigned long long)map->m_lblk, map->m_len);
 #endif
-		err = ext4_split_convert_extents(handle, inode, map, path,
+		err = ext4_split_convert_extents(handle, inode, map, ppath,
 						 EXT4_GET_BLOCKS_CONVERT);
 		if (err < 0)
-			goto out;
-		ext4_ext_drop_refs(path);
-		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
-		if (IS_ERR(path)) {
-			err = PTR_ERR(path);
-			goto out;
-		}
+			return err;
+		path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+		if (IS_ERR(path))
+			return PTR_ERR(path);
 		depth = ext_depth(inode);
 		ex = path[depth].p_ext;
 	}
@@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
 }
 
 static int
-ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
-			struct ext4_map_blocks *map,
-			struct ext4_ext_path *path, int flags,
-			unsigned int allocated, ext4_fsblk_t newblock)
+convert_initialized_extent(handle_t *handle, struct inode *inode,
+			   struct ext4_map_blocks *map,
+			   struct ext4_ext_path **ppath, int flags,
+			   unsigned int allocated, ext4_fsblk_t newblock)
 {
-	int ret = 0;
+	struct ext4_ext_path *path = *ppath;
+	struct ext4_extent *ex;
+	ext4_lblk_t ee_block;
+	unsigned int ee_len;
+	int depth;
 	int err = 0;
 
 	/*
@@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
 	if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
 		map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
 
-	ret = ext4_convert_initialized_extents(handle, inode, map,
-						path);
-	if (ret >= 0) {
-		ext4_update_inode_fsync_trans(handle, inode, 1);
-		err = check_eofblocks_fl(handle, inode, map->m_lblk,
-					 path, map->m_len);
-	} else
-		err = ret;
+	depth = ext_depth(inode);
+	ex = path[depth].p_ext;
+	ee_block = le32_to_cpu(ex->ee_block);
+	ee_len = ext4_ext_get_actual_len(ex);
+
+	ext_debug("%s: inode %lu, logical"
+		"block %llu, max_blocks %u\n", __func__, inode->i_ino,
+		  (unsigned long long)ee_block, ee_len);
+
+	if (ee_block != map->m_lblk || ee_len > map->m_len) {
+		err = ext4_split_convert_extents(handle, inode, map, ppath,
+				EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
+		if (err < 0)
+			return err;
+		path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
+		if (IS_ERR(path))
+			return PTR_ERR(path);
+		depth = ext_depth(inode);
+		ex = path[depth].p_ext;
+		if (!ex) {
+			EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+					 (unsigned long) map->m_lblk);
+			return -EIO;
+		}
+	}
+
+	err = ext4_ext_get_access(handle, inode, path + depth);
+	if (err)
+		return err;
+	/* first mark the extent as unwritten */
+	ext4_ext_mark_unwritten(ex);
+
+	/* note: ext4_ext_correct_indexes() isn't needed here because
+	 * borders are not changed
+	 */
+	ext4_ext_try_to_merge(handle, inode, path, ex);
+
+	/* Mark modified extent as dirty */
+	err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+	if (err)
+		return err;
+	ext4_ext_show_leaf(inode, path);
+
+	ext4_update_inode_fsync_trans(handle, inode, 1);
+	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
+	if (err)
+		return err;
 	map->m_flags |= EXT4_MAP_UNWRITTEN;
 	if (allocated > map->m_len)
 		allocated = map->m_len;
 	map->m_len = allocated;
-
-	return err ? err : allocated;
+	return allocated;
 }
 
 static int
 ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
 			struct ext4_map_blocks *map,
-			struct ext4_ext_path *path, int flags,
+			struct ext4_ext_path **ppath, int flags,
 			unsigned int allocated, ext4_fsblk_t newblock)
 {
+	struct ext4_ext_path *path = *ppath;
 	int ret = 0;
 	int err = 0;
 	ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
 
 	/* get_block() before submit the IO, split the extent */
 	if (flags & EXT4_GET_BLOCKS_PRE_IO) {
-		ret = ext4_split_convert_extents(handle, inode, map,
-					 path, flags | EXT4_GET_BLOCKS_CONVERT);
+		ret = ext4_split_convert_extents(handle, inode, map, ppath,
+					 flags | EXT4_GET_BLOCKS_CONVERT);
 		if (ret <= 0)
 			goto out;
 		/*
@@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
 	/* IO end_io complete, convert the filled extent to written */
 	if (flags & EXT4_GET_BLOCKS_CONVERT) {
 		ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
-							path);
+							   ppath);
 		if (ret >= 0) {
 			ext4_update_inode_fsync_trans(handle, inode, 1);
 			err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
 	}
 
 	/* buffered write, writepage time, convert*/
-	ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags);
+	ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
 	if (ret >= 0)
 		ext4_update_inode_fsync_trans(handle, inode, 1);
 out:
@@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
 	/* find extent for this block */
-	path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0);
+	path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
 	if (IS_ERR(path)) {
 		err = PTR_ERR(path);
 		path = NULL;
@@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	/*
 	 * consistent leaf must not be empty;
 	 * this situation is possible, though, _during_ tree modification;
-	 * this is why assert can't be put in ext4_ext_find_extent()
+	 * this is why assert can't be put in ext4_find_extent()
 	 */
 	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
 		EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			 */
 			if ((!ext4_ext_is_unwritten(ex)) &&
 			    (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
-				allocated = ext4_ext_convert_initialized_extent(
-						handle, inode, map, path, flags,
-						allocated, newblock);
+				allocated = convert_initialized_extent(
+						handle, inode, map, &path,
+						flags, allocated, newblock);
 				goto out2;
 			} else if (!ext4_ext_is_unwritten(ex))
 				goto out;
 
 			ret = ext4_ext_handle_unwritten_extents(
-				handle, inode, map, path, flags,
+				handle, inode, map, &path, flags,
 				allocated, newblock);
 			if (ret < 0)
 				err = ret;
@@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 	/*
 	 * If we are doing bigalloc, check to see if the extent returned
-	 * by ext4_ext_find_extent() implies a cluster we can use.
+	 * by ext4_find_extent() implies a cluster we can use.
 	 */
 	if (cluster_offset && ex &&
 	    get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags = 0;
 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
 	newblock = ext4_mb_new_blocks(handle, &ar, &err);
 	if (!newblock)
 		goto out2;
@@ -4486,7 +4482,7 @@ got_allocated_blocks:
 		err = check_eofblocks_fl(handle, inode, map->m_lblk,
 					 path, ar.len);
 	if (!err)
-		err = ext4_ext_insert_extent(handle, inode, path,
+		err = ext4_ext_insert_extent(handle, inode, &path,
 					     &newex, flags);
 
 	if (!err && set_unwritten) {
@@ -4619,10 +4615,8 @@ out:
 	map->m_pblk = newblock;
 	map->m_len = allocated;
 out2:
-	if (path) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-	}
+	ext4_ext_drop_refs(path);
+	kfree(path);
 
 	trace_ext4_ext_map_blocks_exit(inode, flags, map,
 				       err ? err : allocated);
@@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		max_blocks -= lblk;
 
 	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
-		EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
+		EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+		EXT4_EX_NOCACHE;
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
@@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		ext4_inode_block_unlocked_dio(inode);
 		inode_dio_wait(inode);
 
+		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
+					     flags, mode);
+		if (ret)
+			goto out_dio;
 		/*
 		 * Remove entire range from the extent status tree.
+		 *
+		 * ext4_es_remove_extent(inode, lblk, max_blocks) is
+		 * NOT sufficient.  I'm not sure why this is the case,
+		 * but let's be conservative and remove the extent
+		 * status tree for the entire inode.  There should be
+		 * no outstanding delalloc extents thanks to the
+		 * filemap_write_and_wait_range() call above.
 		 */
-		ret = ext4_es_remove_extent(inode, lblk, max_blocks);
-		if (ret)
-			goto out_dio;
-
-		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
-					     flags, mode);
+		ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
 		if (ret)
 			goto out_dio;
 	}
@@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 	struct ext4_ext_path *path;
 	int ret = 0, depth;
 	struct ext4_extent *extent;
-	ext4_lblk_t stop_block, current_block;
+	ext4_lblk_t stop_block;
 	ext4_lblk_t ex_start, ex_end;
 
 	/* Let path point to the last extent */
-	path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 
 	depth = path->p_depth;
 	extent = path[depth].p_ext;
-	if (!extent) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-		return ret;
-	}
+	if (!extent)
+		goto out;
 
 	stop_block = le32_to_cpu(extent->ee_block) +
 			ext4_ext_get_actual_len(extent);
-	ext4_ext_drop_refs(path);
-	kfree(path);
 
 	/* Nothing to shift, if hole is at the end of file */
 	if (start >= stop_block)
-		return ret;
+		goto out;
 
 	/*
 	 * Don't start shifting extents until we make sure the hole is big
 	 * enough to accomodate the shift.
 	 */
-	path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+	path = ext4_find_extent(inode, start - 1, &path, 0);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	depth = path->p_depth;
@@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 		ex_start = 0;
 		ex_end = 0;
 	}
-	ext4_ext_drop_refs(path);
-	kfree(path);
 
 	if ((start == ex_start && shift > ex_start) ||
 	    (shift > start - ex_end))
@@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 
 	/* Its safe to start updating extents */
 	while (start < stop_block) {
-		path = ext4_ext_find_extent(inode, start, NULL, 0);
+		path = ext4_find_extent(inode, start, &path, 0);
 		if (IS_ERR(path))
 			return PTR_ERR(path);
 		depth = path->p_depth;
@@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
 					 (unsigned long) start);
 			return -EIO;
 		}
-
-		current_block = le32_to_cpu(extent->ee_block);
-		if (start > current_block) {
+		if (start > le32_to_cpu(extent->ee_block)) {
 			/* Hole, move to the next extent */
-			ret = mext_next_extent(inode, path, &extent);
-			if (ret != 0) {
-				ext4_ext_drop_refs(path);
-				kfree(path);
-				if (ret == 1)
-					ret = 0;
-				break;
+			if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
+				path[depth].p_ext++;
+			} else {
+				start = ext4_ext_next_allocated_block(path);
+				continue;
 			}
 		}
 		ret = ext4_ext_shift_path_extents(path, shift, inode,
 				handle, &start);
-		ext4_ext_drop_refs(path);
-		kfree(path);
 		if (ret)
 			break;
 	}
-
+out:
+	ext4_ext_drop_refs(path);
+	kfree(path);
 	return ret;
 }
 
@@ -5508,3 +5498,199 @@ out_mutex:
 	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
+
+/**
+ * ext4_swap_extents - Swap extents between two inodes
+ *
+ * @inode1:	First inode
+ * @inode2:	Second inode
+ * @lblk1:	Start block for first inode
+ * @lblk2:	Start block for second inode
+ * @count:	Number of blocks to swap
+ * @mark_unwritten: Mark second inode's extents as unwritten after swap
+ * @erp:	Pointer to save error value
+ *
+ * This helper routine does exactly what is promise "swap extents". All other
+ * stuff such as page-cache locking consistency, bh mapping consistency or
+ * extent's data copying must be performed by caller.
+ * Locking:
+ * 		i_mutex is held for both inodes
+ * 		i_data_sem is locked for write for both inodes
+ * Assumptions:
+ *		All pages from requested range are locked for both inodes
+ */
+int
+ext4_swap_extents(handle_t *handle, struct inode *inode1,
+		     struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
+		  ext4_lblk_t count, int unwritten, int *erp)
+{
+	struct ext4_ext_path *path1 = NULL;
+	struct ext4_ext_path *path2 = NULL;
+	int replaced_count = 0;
+
+	BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
+	BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
+	BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+	BUG_ON(!mutex_is_locked(&inode1->i_mutex));
+
+	*erp = ext4_es_remove_extent(inode1, lblk1, count);
+	if (unlikely(*erp))
+		return 0;
+	*erp = ext4_es_remove_extent(inode2, lblk2, count);
+	if (unlikely(*erp))
+		return 0;
+
+	while (count) {
+		struct ext4_extent *ex1, *ex2, tmp_ex;
+		ext4_lblk_t e1_blk, e2_blk;
+		int e1_len, e2_len, len;
+		int split = 0;
+
+		path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
+		if (unlikely(IS_ERR(path1))) {
+			*erp = PTR_ERR(path1);
+			path1 = NULL;
+		finish:
+			count = 0;
+			goto repeat;
+		}
+		path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
+		if (unlikely(IS_ERR(path2))) {
+			*erp = PTR_ERR(path2);
+			path2 = NULL;
+			goto finish;
+		}
+		ex1 = path1[path1->p_depth].p_ext;
+		ex2 = path2[path2->p_depth].p_ext;
+		/* Do we have somthing to swap ? */
+		if (unlikely(!ex2 || !ex1))
+			goto finish;
+
+		e1_blk = le32_to_cpu(ex1->ee_block);
+		e2_blk = le32_to_cpu(ex2->ee_block);
+		e1_len = ext4_ext_get_actual_len(ex1);
+		e2_len = ext4_ext_get_actual_len(ex2);
+
+		/* Hole handling */
+		if (!in_range(lblk1, e1_blk, e1_len) ||
+		    !in_range(lblk2, e2_blk, e2_len)) {
+			ext4_lblk_t next1, next2;
+
+			/* if hole after extent, then go to next extent */
+			next1 = ext4_ext_next_allocated_block(path1);
+			next2 = ext4_ext_next_allocated_block(path2);
+			/* If hole before extent, then shift to that extent */
+			if (e1_blk > lblk1)
+				next1 = e1_blk;
+			if (e2_blk > lblk2)
+				next2 = e1_blk;
+			/* Do we have something to swap */
+			if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
+				goto finish;
+			/* Move to the rightest boundary */
+			len = next1 - lblk1;
+			if (len < next2 - lblk2)
+				len = next2 - lblk2;
+			if (len > count)
+				len = count;
+			lblk1 += len;
+			lblk2 += len;
+			count -= len;
+			goto repeat;
+		}
+
+		/* Prepare left boundary */
+		if (e1_blk < lblk1) {
+			split = 1;
+			*erp = ext4_force_split_extent_at(handle, inode1,
+						&path1, lblk1, 0);
+			if (unlikely(*erp))
+				goto finish;
+		}
+		if (e2_blk < lblk2) {
+			split = 1;
+			*erp = ext4_force_split_extent_at(handle, inode2,
+						&path2,  lblk2, 0);
+			if (unlikely(*erp))
+				goto finish;
+		}
+		/* ext4_split_extent_at() may result in leaf extent split,
+		 * path must to be revalidated. */
+		if (split)
+			goto repeat;
+
+		/* Prepare right boundary */
+		len = count;
+		if (len > e1_blk + e1_len - lblk1)
+			len = e1_blk + e1_len - lblk1;
+		if (len > e2_blk + e2_len - lblk2)
+			len = e2_blk + e2_len - lblk2;
+
+		if (len != e1_len) {
+			split = 1;
+			*erp = ext4_force_split_extent_at(handle, inode1,
+						&path1, lblk1 + len, 0);
+			if (unlikely(*erp))
+				goto finish;
+		}
+		if (len != e2_len) {
+			split = 1;
+			*erp = ext4_force_split_extent_at(handle, inode2,
+						&path2, lblk2 + len, 0);
+			if (*erp)
+				goto finish;
+		}
+		/* ext4_split_extent_at() may result in leaf extent split,
+		 * path must to be revalidated. */
+		if (split)
+			goto repeat;
+
+		BUG_ON(e2_len != e1_len);
+		*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
+		if (unlikely(*erp))
+			goto finish;
+		*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
+		if (unlikely(*erp))
+			goto finish;
+
+		/* Both extents are fully inside boundaries. Swap it now */
+		tmp_ex = *ex1;
+		ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
+		ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
+		ex1->ee_len = cpu_to_le16(e2_len);
+		ex2->ee_len = cpu_to_le16(e1_len);
+		if (unwritten)
+			ext4_ext_mark_unwritten(ex2);
+		if (ext4_ext_is_unwritten(&tmp_ex))
+			ext4_ext_mark_unwritten(ex1);
+
+		ext4_ext_try_to_merge(handle, inode2, path2, ex2);
+		ext4_ext_try_to_merge(handle, inode1, path1, ex1);
+		*erp = ext4_ext_dirty(handle, inode2, path2 +
+				      path2->p_depth);
+		if (unlikely(*erp))
+			goto finish;
+		*erp = ext4_ext_dirty(handle, inode1, path1 +
+				      path1->p_depth);
+		/*
+		 * Looks scarry ah..? second inode already points to new blocks,
+		 * and it was successfully dirtied. But luckily error may happen
+		 * only due to journal error, so full transaction will be
+		 * aborted anyway.
+		 */
+		if (unlikely(*erp))
+			goto finish;
+		lblk1 += len;
+		lblk2 += len;
+		replaced_count += len;
+		count -= len;
+
+	repeat:
+		ext4_ext_drop_refs(path1);
+		kfree(path1);
+		ext4_ext_drop_refs(path2);
+		kfree(path2);
+		path1 = path2 = NULL;
+	}
+	return replaced_count;
+}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0b7e28e..94e7855 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -11,6 +11,8 @@
  */
 #include <linux/rbtree.h>
 #include <linux/list_sort.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include "ext4.h"
 #include "extents_status.h"
 
@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
 	 */
 	if (!ext4_es_is_delayed(es)) {
 		EXT4_I(inode)->i_es_lru_nr++;
-		percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+		percpu_counter_inc(&EXT4_SB(inode->i_sb)->
+					s_es_stats.es_stats_lru_cnt);
 	}
 
+	EXT4_I(inode)->i_es_all_nr++;
+	percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
 	return es;
 }
 
 static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
 {
+	EXT4_I(inode)->i_es_all_nr--;
+	percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
+
 	/* Decrease the lru counter when this es is not delayed */
 	if (!ext4_es_is_delayed(es)) {
 		BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
 		EXT4_I(inode)->i_es_lru_nr--;
-		percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
+		percpu_counter_dec(&EXT4_SB(inode->i_sb)->
+					s_es_stats.es_stats_lru_cnt);
 	}
 
 	kmem_cache_free(ext4_es_cachep, es);
@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 	unsigned short ee_len;
 	int depth, ee_status, es_status;
 
-	path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
+	path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
 	if (IS_ERR(path))
 		return;
 
@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		}
 	}
 out:
-	if (path) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-	}
+	ext4_ext_drop_refs(path);
+	kfree(path);
 }
 
 static void ext4_es_insert_extent_ind_check(struct inode *inode,
@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
 			  struct extent_status *es)
 {
 	struct ext4_es_tree *tree;
+	struct ext4_es_stats *stats;
 	struct extent_status *es1 = NULL;
 	struct rb_node *node;
 	int found = 0;
@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
 	}
 
 out:
+	stats = &EXT4_SB(inode->i_sb)->s_es_stats;
 	if (found) {
 		BUG_ON(!es1);
 		es->es_lblk = es1->es_lblk;
 		es->es_len = es1->es_len;
 		es->es_pblk = es1->es_pblk;
+		stats->es_stats_cache_hits++;
+	} else {
+		stats->es_stats_cache_misses++;
 	}
 
 	read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
 			    struct ext4_inode_info *locked_ei)
 {
 	struct ext4_inode_info *ei;
+	struct ext4_es_stats *es_stats;
 	struct list_head *cur, *tmp;
 	LIST_HEAD(skipped);
+	ktime_t start_time;
+	u64 scan_time;
 	int nr_shrunk = 0;
 	int retried = 0, skip_precached = 1, nr_skipped = 0;
 
+	es_stats = &sbi->s_es_stats;
+	start_time = ktime_get();
 	spin_lock(&sbi->s_es_lru_lock);
 
 retry:
@@ -948,7 +966,8 @@ retry:
 		 * If we have already reclaimed all extents from extent
 		 * status tree, just stop the loop immediately.
 		 */
-		if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0)
+		if (percpu_counter_read_positive(
+				&es_stats->es_stats_lru_cnt) == 0)
 			break;
 
 		ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
@@ -958,7 +977,7 @@ retry:
 		 * time.  Normally we try hard to avoid shrinking
 		 * precached inodes, but we will as a last resort.
 		 */
-		if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
+		if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
 		    (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
 						EXT4_STATE_EXT_PRECACHED))) {
 			nr_skipped++;
@@ -992,7 +1011,7 @@ retry:
 	if ((nr_shrunk == 0) && nr_skipped && !retried) {
 		retried++;
 		list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
-		sbi->s_es_last_sorted = jiffies;
+		es_stats->es_stats_last_sorted = jiffies;
 		ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
 				      i_es_lru);
 		/*
@@ -1010,6 +1029,22 @@ retry:
 	if (locked_ei && nr_shrunk == 0)
 		nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
 
+	scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
+	if (likely(es_stats->es_stats_scan_time))
+		es_stats->es_stats_scan_time = (scan_time +
+				es_stats->es_stats_scan_time*3) / 4;
+	else
+		es_stats->es_stats_scan_time = scan_time;
+	if (scan_time > es_stats->es_stats_max_scan_time)
+		es_stats->es_stats_max_scan_time = scan_time;
+	if (likely(es_stats->es_stats_shrunk))
+		es_stats->es_stats_shrunk = (nr_shrunk +
+				es_stats->es_stats_shrunk*3) / 4;
+	else
+		es_stats->es_stats_shrunk = nr_shrunk;
+
+	trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
+			     nr_skipped, retried);
 	return nr_shrunk;
 }
 
@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
 	struct ext4_sb_info *sbi;
 
 	sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
-	nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
-	trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+	nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+	trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
 	return nr;
 }
 
@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
 	int nr_to_scan = sc->nr_to_scan;
 	int ret, nr_shrunk;
 
-	ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
-	trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+	ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
+	trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
 
 	if (!nr_to_scan)
 		return ret;
 
 	nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
 
-	trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
+	trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
 	return nr_shrunk;
 }
 
-void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
 {
+	return *pos ? NULL : SEQ_START_TOKEN;
+}
+
+static void *
+ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	return NULL;
+}
+
+static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
+{
+	struct ext4_sb_info *sbi = seq->private;
+	struct ext4_es_stats *es_stats = &sbi->s_es_stats;
+	struct ext4_inode_info *ei, *max = NULL;
+	unsigned int inode_cnt = 0;
+
+	if (v != SEQ_START_TOKEN)
+		return 0;
+
+	/* here we just find an inode that has the max nr. of objects */
+	spin_lock(&sbi->s_es_lru_lock);
+	list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
+		inode_cnt++;
+		if (max && max->i_es_all_nr < ei->i_es_all_nr)
+			max = ei;
+		else if (!max)
+			max = ei;
+	}
+	spin_unlock(&sbi->s_es_lru_lock);
+
+	seq_printf(seq, "stats:\n  %lld objects\n  %lld reclaimable objects\n",
+		   percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
+		   percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
+	seq_printf(seq, "  %lu/%lu cache hits/misses\n",
+		   es_stats->es_stats_cache_hits,
+		   es_stats->es_stats_cache_misses);
+	if (es_stats->es_stats_last_sorted != 0)
+		seq_printf(seq, "  %u ms last sorted interval\n",
+			   jiffies_to_msecs(jiffies -
+					    es_stats->es_stats_last_sorted));
+	if (inode_cnt)
+		seq_printf(seq, "  %d inodes on lru list\n", inode_cnt);
+
+	seq_printf(seq, "average:\n  %llu us scan time\n",
+	    div_u64(es_stats->es_stats_scan_time, 1000));
+	seq_printf(seq, "  %lu shrunk objects\n", es_stats->es_stats_shrunk);
+	if (inode_cnt)
+		seq_printf(seq,
+		    "maximum:\n  %lu inode (%u objects, %u reclaimable)\n"
+		    "  %llu us max scan time\n",
+		    max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
+		    div_u64(es_stats->es_stats_max_scan_time, 1000));
+
+	return 0;
+}
+
+static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v)
+{
+}
+
+static const struct seq_operations ext4_es_seq_shrinker_info_ops = {
+	.start = ext4_es_seq_shrinker_info_start,
+	.next  = ext4_es_seq_shrinker_info_next,
+	.stop  = ext4_es_seq_shrinker_info_stop,
+	.show  = ext4_es_seq_shrinker_info_show,
+};
+
+static int
+ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	ret = seq_open(file, &ext4_es_seq_shrinker_info_ops);
+	if (!ret) {
+		struct seq_file *m = file->private_data;
+		m->private = PDE_DATA(inode);
+	}
+
+	return ret;
+}
+
+static int
+ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file)
+{
+	return seq_release(inode, file);
+}
+
+static const struct file_operations ext4_es_seq_shrinker_info_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ext4_es_seq_shrinker_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= ext4_es_seq_shrinker_info_release,
+};
+
+int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
+{
+	int err;
+
 	INIT_LIST_HEAD(&sbi->s_es_lru);
 	spin_lock_init(&sbi->s_es_lru_lock);
-	sbi->s_es_last_sorted = 0;
+	sbi->s_es_stats.es_stats_last_sorted = 0;
+	sbi->s_es_stats.es_stats_shrunk = 0;
+	sbi->s_es_stats.es_stats_cache_hits = 0;
+	sbi->s_es_stats.es_stats_cache_misses = 0;
+	sbi->s_es_stats.es_stats_scan_time = 0;
+	sbi->s_es_stats.es_stats_max_scan_time = 0;
+	err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
+	if (err)
+		return err;
+	err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
+	if (err)
+		goto err1;
+
 	sbi->s_es_shrinker.scan_objects = ext4_es_scan;
 	sbi->s_es_shrinker.count_objects = ext4_es_count;
 	sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
-	register_shrinker(&sbi->s_es_shrinker);
+	err = register_shrinker(&sbi->s_es_shrinker);
+	if (err)
+		goto err2;
+
+	if (sbi->s_proc)
+		proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
+				 &ext4_es_seq_shrinker_info_fops, sbi);
+
+	return 0;
+
+err2:
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
+err1:
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+	return err;
 }
 
 void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
 {
+	if (sbi->s_proc)
+		remove_proc_entry("es_shrinker_info", sbi->s_proc);
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
+	percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
 	unregister_shrinker(&sbi->s_es_shrinker);
 }
 
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f1b62a4..efd5f97 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -64,6 +64,17 @@ struct ext4_es_tree {
 	struct extent_status *cache_es;	/* recently accessed extent */
 };
 
+struct ext4_es_stats {
+	unsigned long es_stats_last_sorted;
+	unsigned long es_stats_shrunk;
+	unsigned long es_stats_cache_hits;
+	unsigned long es_stats_cache_misses;
+	u64 es_stats_scan_time;
+	u64 es_stats_max_scan_time;
+	struct percpu_counter es_stats_all_cnt;
+	struct percpu_counter es_stats_lru_cnt;
+};
+
 extern int __init ext4_init_es(void);
 extern void ext4_exit_es(void);
 extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
 		       (pb & ~ES_MASK));
 }
 
-extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
+extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
 extern void ext4_es_lru_add(struct inode *inode);
 extern void ext4_es_lru_del(struct inode *inode);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index aca7b24..8131be8 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 			iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
 	}
 
+	iocb->private = &overwrite;
 	if (o_direct) {
 		blk_start_plug(&plug);
 
-		iocb->private = &overwrite;
 
 		/* check whether we do a DIO overwrite or not */
 		if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 5b87fc3..ac644c3 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -887,6 +887,10 @@ got:
 		struct buffer_head *block_bitmap_bh;
 
 		block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+		if (!block_bitmap_bh) {
+			err = -EIO;
+			goto out;
+		}
 		BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
 		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
 		if (err) {
@@ -1011,8 +1015,7 @@ got:
 	spin_unlock(&sbi->s_next_gen_lock);
 
 	/* Precompute checksum seed for inode metadata */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+	if (ext4_has_metadata_csum(sb)) {
 		__u32 csum;
 		__le32 inum = cpu_to_le32(inode->i_ino);
 		__le32 gen = cpu_to_le32(inode->i_generation);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index e75f840..36b3696 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
  *	ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *	as described above and return 0.
  */
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
-			     ext4_lblk_t iblock, int indirect_blks,
-			     int *blks, ext4_fsblk_t goal,
-			     ext4_lblk_t *offsets, Indirect *branch)
+static int ext4_alloc_branch(handle_t *handle,
+			     struct ext4_allocation_request *ar,
+			     int indirect_blks, ext4_lblk_t *offsets,
+			     Indirect *branch)
 {
-	struct ext4_allocation_request	ar;
 	struct buffer_head *		bh;
 	ext4_fsblk_t			b, new_blocks[4];
 	__le32				*p;
 	int				i, j, err, len = 1;
 
-	/*
-	 * Set up for the direct block allocation
-	 */
-	memset(&ar, 0, sizeof(ar));
-	ar.inode = inode;
-	ar.len = *blks;
-	ar.logical = iblock;
-	if (S_ISREG(inode->i_mode))
-		ar.flags = EXT4_MB_HINT_DATA;
-
 	for (i = 0; i <= indirect_blks; i++) {
 		if (i == indirect_blks) {
-			ar.goal = goal;
-			new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
+			new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
 		} else
-			goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode,
-							goal, 0, NULL, &err);
+			ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
+					ar->inode, ar->goal,
+					ar->flags & EXT4_MB_DELALLOC_RESERVED,
+					NULL, &err);
 		if (err) {
 			i--;
 			goto failed;
@@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 		if (i == 0)
 			continue;
 
-		bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]);
+		bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
 		if (unlikely(!bh)) {
 			err = -ENOMEM;
 			goto failed;
@@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 		b = new_blocks[i];
 
 		if (i == indirect_blks)
-			len = ar.len;
+			len = ar->len;
 		for (j = 0; j < len; j++)
 			*p++ = cpu_to_le32(b++);
 
@@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 		unlock_buffer(bh);
 
 		BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-		err = ext4_handle_dirty_metadata(handle, inode, bh);
+		err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
 		if (err)
 			goto failed;
 	}
-	*blks = ar.len;
 	return 0;
 failed:
 	for (; i >= 0; i--) {
@@ -396,10 +385,10 @@ failed:
 		 * existing before ext4_alloc_branch() was called.
 		 */
 		if (i > 0 && i != indirect_blks && branch[i].bh)
-			ext4_forget(handle, 1, inode, branch[i].bh,
+			ext4_forget(handle, 1, ar->inode, branch[i].bh,
 				    branch[i].bh->b_blocknr);
-		ext4_free_blocks(handle, inode, NULL, new_blocks[i],
-				 (i == indirect_blks) ? ar.len : 1, 0);
+		ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
+				 (i == indirect_blks) ? ar->len : 1, 0);
 	}
 	return err;
 }
@@ -419,9 +408,9 @@ failed:
  * inode (->i_blocks, etc.). In case of success we end up with the full
  * chain to new block and return 0.
  */
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
-			      ext4_lblk_t block, Indirect *where, int num,
-			      int blks)
+static int ext4_splice_branch(handle_t *handle,
+			      struct ext4_allocation_request *ar,
+			      Indirect *where, int num)
 {
 	int i;
 	int err = 0;
@@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 	 * Update the host buffer_head or inode to point to more just allocated
 	 * direct blocks blocks
 	 */
-	if (num == 0 && blks > 1) {
+	if (num == 0 && ar->len > 1) {
 		current_block = le32_to_cpu(where->key) + 1;
-		for (i = 1; i < blks; i++)
+		for (i = 1; i < ar->len; i++)
 			*(where->p + i) = cpu_to_le32(current_block++);
 	}
 
@@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 		 */
 		jbd_debug(5, "splicing indirect only\n");
 		BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
-		err = ext4_handle_dirty_metadata(handle, inode, where->bh);
+		err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
 		if (err)
 			goto err_out;
 	} else {
 		/*
 		 * OK, we spliced it into the inode itself on a direct block.
 		 */
-		ext4_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, ar->inode);
 		jbd_debug(5, "splicing direct\n");
 	}
 	return err;
@@ -484,11 +473,11 @@ err_out:
 		 * need to revoke the block, which is why we don't
 		 * need to set EXT4_FREE_BLOCKS_METADATA.
 		 */
-		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+		ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
 				 EXT4_FREE_BLOCKS_FORGET);
 	}
-	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
-			 blks, 0);
+	ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
+			 ar->len, 0);
 
 	return err;
 }
@@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 			struct ext4_map_blocks *map,
 			int flags)
 {
+	struct ext4_allocation_request ar;
 	int err = -EIO;
 	ext4_lblk_t offsets[4];
 	Indirect chain[4];
 	Indirect *partial;
-	ext4_fsblk_t goal;
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
@@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 		return -ENOSPC;
 	}
 
-	goal = ext4_find_goal(inode, map->m_lblk, partial);
+	/* Set up for the direct block allocation */
+	memset(&ar, 0, sizeof(ar));
+	ar.inode = inode;
+	ar.logical = map->m_lblk;
+	if (S_ISREG(inode->i_mode))
+		ar.flags = EXT4_MB_HINT_DATA;
+	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
+
+	ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
 
 	/* the number of blocks need to allocate for [d,t]indirect blocks */
 	indirect_blks = (chain + depth) - partial - 1;
@@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 	 * Next look up the indirect map to count the totoal number of
 	 * direct blocks to allocate for this branch.
 	 */
-	count = ext4_blks_to_allocate(partial, indirect_blks,
-				      map->m_len, blocks_to_boundary);
+	ar.len = ext4_blks_to_allocate(partial, indirect_blks,
+				       map->m_len, blocks_to_boundary);
+
 	/*
 	 * Block out ext4_truncate while we alter the tree
 	 */
-	err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
-				&count, goal,
+	err = ext4_alloc_branch(handle, &ar, indirect_blks,
 				offsets + (partial - chain), partial);
 
 	/*
@@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
 	 * may need to return -EAGAIN upwards in the worst case.  --sct
 	 */
 	if (!err)
-		err = ext4_splice_branch(handle, inode, map->m_lblk,
-					 partial, indirect_blks, count);
+		err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
 	if (err)
 		goto cleanup;
 
 	map->m_flags |= EXT4_MAP_NEW;
 
 	ext4_update_inode_fsync_trans(handle, inode, 1);
+	count = ar.len;
 got_it:
 	map->m_flags |= EXT4_MAP_MAPPED;
 	map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index bea662b..3ea6269 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -594,6 +594,7 @@ retry:
 	if (ret) {
 		unlock_page(page);
 		page_cache_release(page);
+		page = NULL;
 		ext4_orphan_add(handle, inode);
 		up_write(&EXT4_I(inode)->xattr_sem);
 		sem_held = 0;
@@ -613,7 +614,8 @@ retry:
 	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
-	block_commit_write(page, from, to);
+	if (page)
+		block_commit_write(page, from, to);
 out:
 	if (page) {
 		unlock_page(page);
@@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
 	memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
 		inline_size - EXT4_INLINE_DOTDOT_SIZE);
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	inode->i_size = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3aa26e9..3356ab5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
 
 	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 	    cpu_to_le32(EXT4_OS_LINUX) ||
-	    !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	    !ext4_has_metadata_csum(inode->i_sb))
 		return 1;
 
 	provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
 
 	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 	    cpu_to_le32(EXT4_OS_LINUX) ||
-	    !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	    !ext4_has_metadata_csum(inode->i_sb))
 		return;
 
 	csum = ext4_inode_csum(inode, raw, ei);
@@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode)
 		goto no_delete;
 	}
 
-	if (!is_bad_inode(inode))
-		dquot_initialize(inode);
+	if (is_bad_inode(inode))
+		goto no_delete;
+	dquot_initialize(inode);
 
 	if (ext4_should_order_data(inode))
 		ext4_begin_ordered_truncate(inode, 0);
 	truncate_inode_pages_final(&inode->i_data);
 
 	WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
-	if (is_bad_inode(inode))
-		goto no_delete;
 
 	/*
 	 * Protect us against freezing - iput() caller didn't have to have any
@@ -590,20 +587,12 @@ found:
 	/*
 	 * New blocks allocate and/or writing to unwritten extent
 	 * will possibly result in updating i_data, so we take
-	 * the write lock of i_data_sem, and call get_blocks()
+	 * the write lock of i_data_sem, and call get_block()
 	 * with create == 1 flag.
 	 */
 	down_write(&EXT4_I(inode)->i_data_sem);
 
 	/*
-	 * if the caller is from delayed allocation writeout path
-	 * we have already reserved fs blocks for allocation
-	 * let the underlying get_block() function know to
-	 * avoid double accounting
-	 */
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-		ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
-	/*
 	 * We need to check for EXT4 here because migrate
 	 * could have changed the inode type in between
 	 */
@@ -631,8 +620,6 @@ found:
 			(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
 			ext4_da_update_reserve_space(inode, retval, 1);
 	}
-	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
-		ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
 
 	if (retval > 0) {
 		unsigned int status;
@@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
  * `handle' can be NULL if create is zero
  */
 struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
-				ext4_lblk_t block, int create, int *errp)
+				ext4_lblk_t block, int create)
 {
 	struct ext4_map_blocks map;
 	struct buffer_head *bh;
-	int fatal = 0, err;
+	int err;
 
 	J_ASSERT(handle != NULL || create == 0);
 
@@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 	err = ext4_map_blocks(handle, inode, &map,
 			      create ? EXT4_GET_BLOCKS_CREATE : 0);
 
-	/* ensure we send some value back into *errp */
-	*errp = 0;
-
-	if (create && err == 0)
-		err = -ENOSPC;	/* should never happen */
+	if (err == 0)
+		return create ? ERR_PTR(-ENOSPC) : NULL;
 	if (err < 0)
-		*errp = err;
-	if (err <= 0)
-		return NULL;
+		return ERR_PTR(err);
 
 	bh = sb_getblk(inode->i_sb, map.m_pblk);
-	if (unlikely(!bh)) {
-		*errp = -ENOMEM;
-		return NULL;
-	}
+	if (unlikely(!bh))
+		return ERR_PTR(-ENOMEM);
 	if (map.m_flags & EXT4_MAP_NEW) {
 		J_ASSERT(create != 0);
 		J_ASSERT(handle != NULL);
@@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 		 */
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
-		fatal = ext4_journal_get_create_access(handle, bh);
-		if (!fatal && !buffer_uptodate(bh)) {
+		err = ext4_journal_get_create_access(handle, bh);
+		if (unlikely(err)) {
+			unlock_buffer(bh);
+			goto errout;
+		}
+		if (!buffer_uptodate(bh)) {
 			memset(bh->b_data, 0, inode->i_sb->s_blocksize);
 			set_buffer_uptodate(bh);
 		}
 		unlock_buffer(bh);
 		BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
 		err = ext4_handle_dirty_metadata(handle, inode, bh);
-		if (!fatal)
-			fatal = err;
-	} else {
+		if (unlikely(err))
+			goto errout;
+	} else
 		BUFFER_TRACE(bh, "not a new buffer");
-	}
-	if (fatal) {
-		*errp = fatal;
-		brelse(bh);
-		bh = NULL;
-	}
 	return bh;
+errout:
+	brelse(bh);
+	return ERR_PTR(err);
 }
 
 struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
-			       ext4_lblk_t block, int create, int *err)
+			       ext4_lblk_t block, int create)
 {
 	struct buffer_head *bh;
 
-	bh = ext4_getblk(handle, inode, block, create, err);
-	if (!bh)
+	bh = ext4_getblk(handle, inode, block, create);
+	if (IS_ERR(bh))
 		return bh;
-	if (buffer_uptodate(bh))
+	if (!bh || buffer_uptodate(bh))
 		return bh;
 	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
 	wait_on_buffer(bh);
 	if (buffer_uptodate(bh))
 		return bh;
 	put_bh(bh);
-	*err = -EIO;
-	return NULL;
+	return ERR_PTR(-EIO);
 }
 
 int ext4_walk_page_buffers(handle_t *handle,
@@ -1536,7 +1516,7 @@ out_unlock:
 }
 
 /*
- * This is a special get_blocks_t callback which is used by
+ * This is a special get_block_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
  * reserve space for a single block.
  *
@@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
 	 * in data loss.  So use reserved blocks to allocate metadata if
 	 * possible.
 	 *
-	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks
-	 * in question are delalloc blocks.  This affects functions in many
-	 * different parts of the allocation call path.  This flag exists
-	 * primarily because we don't want to change *many* call functions, so
-	 * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
-	 * once the inode's allocation semaphore is taken.
+	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
+	 * the blocks in question are delalloc blocks.  This indicates
+	 * that the blocks and quotas has already been checked when
+	 * the data was copied into the page cache.
 	 */
 	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
 			   EXT4_GET_BLOCKS_METADATA_NOFAIL;
@@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb)
 	return 0;
 }
 
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+	if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+				EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+		return 1;
+
+	if (pos + len <= 0x7fffffffULL)
+		return 1;
+
+	/* We might need to update the superblock to set LARGE_FILE */
+	return 2;
+}
+
 static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 			       loff_t pos, unsigned len, unsigned flags,
 			       struct page **pagep, void **fsdata)
@@ -2565,7 +2557,8 @@ retry_grab:
 	 * of file which has an already mapped buffer.
 	 */
 retry_journal:
-	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+				ext4_da_write_credits(inode, pos, len));
 	if (IS_ERR(handle)) {
 		page_cache_release(page);
 		return PTR_ERR(handle);
@@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file,
 	if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
 		if (ext4_has_inline_data(inode) ||
 		    ext4_da_should_update_i_disksize(page, end)) {
-			down_write(&EXT4_I(inode)->i_data_sem);
-			if (new_i_size > EXT4_I(inode)->i_disksize)
-				EXT4_I(inode)->i_disksize = new_i_size;
-			up_write(&EXT4_I(inode)->i_data_sem);
+			ext4_update_i_disksize(inode, new_i_size);
 			/* We need to mark inode dirty even if
 			 * new_i_size is less that inode->i_size
 			 * bu greater than i_disksize.(hint delalloc)
@@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		ei->i_extra_isize = 0;
 
 	/* Precompute checksum seed for inode metadata */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+	if (ext4_has_metadata_csum(sb)) {
 		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 		__u32 csum;
 		__le32 inum = cpu_to_le32(inode->i_ino);
@@ -4127,6 +4116,13 @@ bad_inode:
 	return ERR_PTR(ret);
 }
 
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+		return ERR_PTR(-EIO);
+	return ext4_iget(sb, ino);
+}
+
 static int ext4_inode_blocks_set(handle_t *handle,
 				struct ext4_inode *raw_inode,
 				struct ext4_inode_info *ei)
@@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle,
 	EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
 	EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
 
-	if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
+	err = ext4_inode_blocks_set(handle, raw_inode, ei);
+	if (err) {
 		spin_unlock(&ei->i_raw_lock);
 		goto out_brelse;
 	}
@@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				ext4_orphan_del(NULL, inode);
 				goto err_out;
 			}
-		} else
+		} else {
+			loff_t oldsize = inode->i_size;
+
 			i_size_write(inode, attr->ia_size);
+			pagecache_isize_extended(inode, oldsize, inode->i_size);
+		}
 
 		/*
 		 * Blocks are going to be removed from the inode. Wait
@@ -4958,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
 	if (val)
 		ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
 	else {
-		jbd2_journal_flush(journal);
+		err = jbd2_journal_flush(journal);
+		if (err < 0) {
+			jbd2_journal_unlock_updates(journal);
+			ext4_inode_resume_unlocked_dio(inode);
+			return err;
+		}
 		ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
 	}
 	ext4_set_aops(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f2252e..bfda18a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,8 +331,7 @@ flags_out:
 		if (!inode_owner_or_capable(inode))
 			return -EPERM;
 
-		if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-				EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+		if (ext4_has_metadata_csum(inode->i_sb)) {
 			ext4_warning(sb, "Setting inode version is not "
 				     "supported with metadata_csum enabled.");
 			return -ENOTTY;
@@ -532,9 +531,17 @@ group_add_out:
 	}
 
 	case EXT4_IOC_SWAP_BOOT:
+	{
+		int err;
 		if (!(filp->f_mode & FMODE_WRITE))
 			return -EBADF;
-		return swap_inode_boot_loader(sb, inode);
+		err = mnt_want_write_file(filp);
+		if (err)
+			return err;
+		err = swap_inode_boot_loader(sb, inode);
+		mnt_drop_write_file(filp);
+		return err;
+	}
 
 	case EXT4_IOC_RESIZE_FS: {
 		ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 748c913..dbfe15c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 			 "start %lu, size %lu, fe_logical %lu",
 			 (unsigned long) start, (unsigned long) size,
 			 (unsigned long) ac->ac_o_ex.fe_logical);
+		BUG();
 	}
-	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
-			start > ac->ac_o_ex.fe_logical);
 	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
 	/* now prepare goal request */
@@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 	if (IS_NOQUOTA(ar->inode))
 		ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
 
-	/*
-	 * For delayed allocation, we could skip the ENOSPC and
-	 * EDQUOT check, as blocks and quotas have been already
-	 * reserved when data being copied into pagecache.
-	 */
-	if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
-		ar->flags |= EXT4_MB_DELALLOC_RESERVED;
-	else {
+	if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
 		/* Without delayed allocation we need to verify
 		 * there is enough free blocks to do block allocation
 		 * and verify allocation doesn't exceed the quota limits.
@@ -4528,8 +4520,7 @@ out:
 	if (inquota && ar->len < inquota)
 		dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
 	if (!ar->len) {
-		if (!ext4_test_inode_state(ar->inode,
-					   EXT4_STATE_DELALLOC_RESERVED))
+		if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
 			/* release all the reserved blocks if non delalloc */
 			percpu_counter_sub(&sbi->s_dirtyclusters_counter,
 						reserv_clstrs);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d3567f2..a432634 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
 	ext4_ext_store_pblock(&newext, lb->first_pblock);
 	/* Locking only for convinience since we are operating on temp inode */
 	down_write(&EXT4_I(inode)->i_data_sem);
-	path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
-
+	path = ext4_find_extent(inode, lb->first_block, NULL, 0);
 	if (IS_ERR(path)) {
 		retval = PTR_ERR(path);
 		path = NULL;
@@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
 				goto err_out;
 		}
 	}
-	retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
+	retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
 err_out:
 	up_write((&EXT4_I(inode)->i_data_sem));
-	if (path) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-	}
+	ext4_ext_drop_refs(path);
+	kfree(path);
 	lb->first_pblock = 0;
 	return retval;
 }
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 32bce84..8313ca3 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
 
 static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 {
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return 1;
 
 	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 
 static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
 {
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return;
 
 	mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 671a74b..9f2311b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -27,120 +27,26 @@
  * @lblock:	logical block number to find an extent path
  * @path:	pointer to an extent path pointer (for output)
  *
- * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value
+ * ext4_find_extent wrapper. Return 0 on success, or a negative error value
  * on failure.
  */
 static inline int
 get_ext_path(struct inode *inode, ext4_lblk_t lblock,
-		struct ext4_ext_path **orig_path)
+		struct ext4_ext_path **ppath)
 {
-	int ret = 0;
 	struct ext4_ext_path *path;
 
-	path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
+	path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
 	if (IS_ERR(path))
-		ret = PTR_ERR(path);
-	else if (path[ext_depth(inode)].p_ext == NULL)
-		ret = -ENODATA;
-	else
-		*orig_path = path;
-
-	return ret;
-}
-
-/**
- * copy_extent_status - Copy the extent's initialization status
- *
- * @src:	an extent for getting initialize status
- * @dest:	an extent to be set the status
- */
-static void
-copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
-{
-	if (ext4_ext_is_unwritten(src))
-		ext4_ext_mark_unwritten(dest);
-	else
-		dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
-}
-
-/**
- * mext_next_extent - Search for the next extent and set it to "extent"
- *
- * @inode:	inode which is searched
- * @path:	this will obtain data for the next extent
- * @extent:	pointer to the next extent we have just gotten
- *
- * Search the next extent in the array of ext4_ext_path structure (@path)
- * and set it to ext4_extent structure (@extent). In addition, the member of
- * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
- * ext4_ext_path structure refers to the last extent, or a negative error
- * value on failure.
- */
-int
-mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
-		      struct ext4_extent **extent)
-{
-	struct ext4_extent_header *eh;
-	int ppos, leaf_ppos = path->p_depth;
-
-	ppos = leaf_ppos;
-	if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
-		/* leaf block */
-		*extent = ++path[ppos].p_ext;
-		path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
-		return 0;
-	}
-
-	while (--ppos >= 0) {
-		if (EXT_LAST_INDEX(path[ppos].p_hdr) >
-		    path[ppos].p_idx) {
-			int cur_ppos = ppos;
-
-			/* index block */
-			path[ppos].p_idx++;
-			path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
-			if (path[ppos+1].p_bh)
-				brelse(path[ppos+1].p_bh);
-			path[ppos+1].p_bh =
-				sb_bread(inode->i_sb, path[ppos].p_block);
-			if (!path[ppos+1].p_bh)
-				return -EIO;
-			path[ppos+1].p_hdr =
-				ext_block_hdr(path[ppos+1].p_bh);
-
-			/* Halfway index block */
-			while (++cur_ppos < leaf_ppos) {
-				path[cur_ppos].p_idx =
-					EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
-				path[cur_ppos].p_block =
-					ext4_idx_pblock(path[cur_ppos].p_idx);
-				if (path[cur_ppos+1].p_bh)
-					brelse(path[cur_ppos+1].p_bh);
-				path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
-					path[cur_ppos].p_block);
-				if (!path[cur_ppos+1].p_bh)
-					return -EIO;
-				path[cur_ppos+1].p_hdr =
-					ext_block_hdr(path[cur_ppos+1].p_bh);
-			}
-
-			path[leaf_ppos].p_ext = *extent = NULL;
-
-			eh = path[leaf_ppos].p_hdr;
-			if (le16_to_cpu(eh->eh_entries) == 0)
-				/* empty leaf is found */
-				return -ENODATA;
-
-			/* leaf block */
-			path[leaf_ppos].p_ext = *extent =
-				EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
-			path[leaf_ppos].p_block =
-					ext4_ext_pblock(path[leaf_ppos].p_ext);
-			return 0;
-		}
+		return PTR_ERR(path);
+	if (path[ext_depth(inode)].p_ext == NULL) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+		*ppath = NULL;
+		return -ENODATA;
 	}
-	/* We found the last extent */
-	return 1;
+	*ppath = path;
+	return 0;
 }
 
 /**
@@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode,
 }
 
 /**
- * mext_insert_across_blocks - Insert extents across leaf block
- *
- * @handle:		journal handle
- * @orig_inode:		original inode
- * @o_start:		first original extent to be changed
- * @o_end:		last original extent to be changed
- * @start_ext:		first new extent to be inserted
- * @new_ext:		middle of new extent to be inserted
- * @end_ext:		last new extent to be inserted
- *
- * Allocate a new leaf block and insert extents into it. Return 0 on success,
- * or a negative error value on failure.
- */
-static int
-mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
-		struct ext4_extent *o_start, struct ext4_extent *o_end,
-		struct ext4_extent *start_ext, struct ext4_extent *new_ext,
-		struct ext4_extent *end_ext)
-{
-	struct ext4_ext_path *orig_path = NULL;
-	ext4_lblk_t eblock = 0;
-	int new_flag = 0;
-	int end_flag = 0;
-	int err = 0;
-
-	if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
-		if (o_start == o_end) {
-
-			/*       start_ext   new_ext    end_ext
-			 * donor |---------|-----------|--------|
-			 * orig  |------------------------------|
-			 */
-			end_flag = 1;
-		} else {
-
-			/*       start_ext   new_ext   end_ext
-			 * donor |---------|----------|---------|
-			 * orig  |---------------|--------------|
-			 */
-			o_end->ee_block = end_ext->ee_block;
-			o_end->ee_len = end_ext->ee_len;
-			ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-		}
-
-		o_start->ee_len = start_ext->ee_len;
-		eblock = le32_to_cpu(start_ext->ee_block);
-		new_flag = 1;
-
-	} else if (start_ext->ee_len && new_ext->ee_len &&
-		   !end_ext->ee_len && o_start == o_end) {
-
-		/*	 start_ext	new_ext
-		 * donor |--------------|---------------|
-		 * orig  |------------------------------|
-		 */
-		o_start->ee_len = start_ext->ee_len;
-		eblock = le32_to_cpu(start_ext->ee_block);
-		new_flag = 1;
-
-	} else if (!start_ext->ee_len && new_ext->ee_len &&
-		   end_ext->ee_len && o_start == o_end) {
-
-		/*	  new_ext	end_ext
-		 * donor |--------------|---------------|
-		 * orig  |------------------------------|
-		 */
-		o_end->ee_block = end_ext->ee_block;
-		o_end->ee_len = end_ext->ee_len;
-		ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
-
-		/*
-		 * Set 0 to the extent block if new_ext was
-		 * the first block.
-		 */
-		if (new_ext->ee_block)
-			eblock = le32_to_cpu(new_ext->ee_block);
-
-		new_flag = 1;
-	} else {
-		ext4_debug("ext4 move extent: Unexpected insert case\n");
-		return -EIO;
-	}
-
-	if (new_flag) {
-		err = get_ext_path(orig_inode, eblock, &orig_path);
-		if (err)
-			goto out;
-
-		if (ext4_ext_insert_extent(handle, orig_inode,
-					orig_path, new_ext, 0))
-			goto out;
-	}
-
-	if (end_flag) {
-		err = get_ext_path(orig_inode,
-				le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
-		if (err)
-			goto out;
-
-		if (ext4_ext_insert_extent(handle, orig_inode,
-					   orig_path, end_ext, 0))
-			goto out;
-	}
-out:
-	if (orig_path) {
-		ext4_ext_drop_refs(orig_path);
-		kfree(orig_path);
-	}
-
-	return err;
-
-}
-
-/**
- * mext_insert_inside_block - Insert new extent to the extent block
- *
- * @o_start:		first original extent to be moved
- * @o_end:		last original extent to be moved
- * @start_ext:		first new extent to be inserted
- * @new_ext:		middle of new extent to be inserted
- * @end_ext:		last new extent to be inserted
- * @eh:			extent header of target leaf block
- * @range_to_move:	used to decide how to insert extent
- *
- * Insert extents into the leaf block. The extent (@o_start) is overwritten
- * by inserted extents.
- */
-static void
-mext_insert_inside_block(struct ext4_extent *o_start,
-			      struct ext4_extent *o_end,
-			      struct ext4_extent *start_ext,
-			      struct ext4_extent *new_ext,
-			      struct ext4_extent *end_ext,
-			      struct ext4_extent_header *eh,
-			      int range_to_move)
-{
-	int i = 0;
-	unsigned long len;
-
-	/* Move the existing extents */
-	if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
-		len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
-			(unsigned long)(o_end + 1);
-		memmove(o_end + 1 + range_to_move, o_end + 1, len);
-	}
-
-	/* Insert start entry */
-	if (start_ext->ee_len)
-		o_start[i++].ee_len = start_ext->ee_len;
-
-	/* Insert new entry */
-	if (new_ext->ee_len) {
-		o_start[i] = *new_ext;
-		ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
-	}
-
-	/* Insert end entry */
-	if (end_ext->ee_len)
-		o_start[i] = *end_ext;
-
-	/* Increment the total entries counter on the extent block */
-	le16_add_cpu(&eh->eh_entries, range_to_move);
-}
-
-/**
- * mext_insert_extents - Insert new extent
- *
- * @handle:	journal handle
- * @orig_inode:	original inode
- * @orig_path:	path indicates first extent to be changed
- * @o_start:	first original extent to be changed
- * @o_end:	last original extent to be changed
- * @start_ext:	first new extent to be inserted
- * @new_ext:	middle of new extent to be inserted
- * @end_ext:	last new extent to be inserted
- *
- * Call the function to insert extents. If we cannot add more extents into
- * the leaf block, we call mext_insert_across_blocks() to create a
- * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
- * on success, or a negative error value on failure.
- */
-static int
-mext_insert_extents(handle_t *handle, struct inode *orig_inode,
-			 struct ext4_ext_path *orig_path,
-			 struct ext4_extent *o_start,
-			 struct ext4_extent *o_end,
-			 struct ext4_extent *start_ext,
-			 struct ext4_extent *new_ext,
-			 struct ext4_extent *end_ext)
-{
-	struct  ext4_extent_header *eh;
-	unsigned long need_slots, slots_range;
-	int	range_to_move, depth, ret;
-
-	/*
-	 * The extents need to be inserted
-	 * start_extent + new_extent + end_extent.
-	 */
-	need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
-		(new_ext->ee_len ? 1 : 0);
-
-	/* The number of slots between start and end */
-	slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
-		/ sizeof(struct ext4_extent);
-
-	/* Range to move the end of extent */
-	range_to_move = need_slots - slots_range;
-	depth = orig_path->p_depth;
-	orig_path += depth;
-	eh = orig_path->p_hdr;
-
-	if (depth) {
-		/* Register to journal */
-		BUFFER_TRACE(orig_path->p_bh, "get_write_access");
-		ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
-		if (ret)
-			return ret;
-	}
-
-	/* Expansion */
-	if (range_to_move > 0 &&
-		(range_to_move > le16_to_cpu(eh->eh_max)
-			- le16_to_cpu(eh->eh_entries))) {
-
-		ret = mext_insert_across_blocks(handle, orig_inode, o_start,
-					o_end, start_ext, new_ext, end_ext);
-		if (ret < 0)
-			return ret;
-	} else
-		mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
-						end_ext, eh, range_to_move);
-
-	return ext4_ext_dirty(handle, orig_inode, orig_path);
-}
-
-/**
- * mext_leaf_block - Move one leaf extent block into the inode.
- *
- * @handle:		journal handle
- * @orig_inode:		original inode
- * @orig_path:		path indicates first extent to be changed
- * @dext:		donor extent
- * @from:		start offset on the target file
- *
- * In order to insert extents into the leaf block, we must divide the extent
- * in the leaf block into three extents. The one is located to be inserted
- * extents, and the others are located around it.
- *
- * Therefore, this function creates structures to save extents of the leaf
- * block, and inserts extents by calling mext_insert_extents() with
- * created extents. Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_leaf_block(handle_t *handle, struct inode *orig_inode,
-		     struct ext4_ext_path *orig_path, struct ext4_extent *dext,
-		     ext4_lblk_t *from)
-{
-	struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
-	struct ext4_extent new_ext, start_ext, end_ext;
-	ext4_lblk_t new_ext_end;
-	int oext_alen, new_ext_alen, end_ext_alen;
-	int depth = ext_depth(orig_inode);
-	int ret;
-
-	start_ext.ee_block = end_ext.ee_block = 0;
-	o_start = o_end = oext = orig_path[depth].p_ext;
-	oext_alen = ext4_ext_get_actual_len(oext);
-	start_ext.ee_len = end_ext.ee_len = 0;
-
-	new_ext.ee_block = cpu_to_le32(*from);
-	ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
-	new_ext.ee_len = dext->ee_len;
-	new_ext_alen = ext4_ext_get_actual_len(&new_ext);
-	new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-
-	/*
-	 * Case: original extent is first
-	 * oext      |--------|
-	 * new_ext      |--|
-	 * start_ext |--|
-	 */
-	if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
-		le32_to_cpu(new_ext.ee_block) <
-		le32_to_cpu(oext->ee_block) + oext_alen) {
-		start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
-					       le32_to_cpu(oext->ee_block));
-		start_ext.ee_block = oext->ee_block;
-		copy_extent_status(oext, &start_ext);
-	} else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
-		prev_ext = oext - 1;
-		/*
-		 * We can merge new_ext into previous extent,
-		 * if these are contiguous and same extent type.
-		 */
-		if (ext4_can_extents_be_merged(orig_inode, prev_ext,
-					       &new_ext)) {
-			o_start = prev_ext;
-			start_ext.ee_len = cpu_to_le16(
-				ext4_ext_get_actual_len(prev_ext) +
-				new_ext_alen);
-			start_ext.ee_block = oext->ee_block;
-			copy_extent_status(prev_ext, &start_ext);
-			new_ext.ee_len = 0;
-		}
-	}
-
-	/*
-	 * Case: new_ext_end must be less than oext
-	 * oext      |-----------|
-	 * new_ext       |-------|
-	 */
-	if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
-		EXT4_ERROR_INODE(orig_inode,
-			"new_ext_end(%u) should be less than or equal to "
-			"oext->ee_block(%u) + oext_alen(%d) - 1",
-			new_ext_end, le32_to_cpu(oext->ee_block),
-			oext_alen);
-		ret = -EIO;
-		goto out;
-	}
-
-	/*
-	 * Case: new_ext is smaller than original extent
-	 * oext    |---------------|
-	 * new_ext |-----------|
-	 * end_ext             |---|
-	 */
-	if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
-		new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
-		end_ext.ee_len =
-			cpu_to_le16(le32_to_cpu(oext->ee_block) +
-			oext_alen - 1 - new_ext_end);
-		copy_extent_status(oext, &end_ext);
-		end_ext_alen = ext4_ext_get_actual_len(&end_ext);
-		ext4_ext_store_pblock(&end_ext,
-			(ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
-		end_ext.ee_block =
-			cpu_to_le32(le32_to_cpu(o_end->ee_block) +
-			oext_alen - end_ext_alen);
-	}
-
-	ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
-				o_end, &start_ext, &new_ext, &end_ext);
-out:
-	return ret;
-}
-
-/**
- * mext_calc_swap_extents - Calculate extents for extent swapping.
- *
- * @tmp_dext:		the extent that will belong to the original inode
- * @tmp_oext:		the extent that will belong to the donor inode
- * @orig_off:		block offset of original inode
- * @donor_off:		block offset of donor inode
- * @max_count:		the maximum length of extents
- *
- * Return 0 on success, or a negative error value on failure.
- */
-static int
-mext_calc_swap_extents(struct ext4_extent *tmp_dext,
-			      struct ext4_extent *tmp_oext,
-			      ext4_lblk_t orig_off, ext4_lblk_t donor_off,
-			      ext4_lblk_t max_count)
-{
-	ext4_lblk_t diff, orig_diff;
-	struct ext4_extent dext_old, oext_old;
-
-	BUG_ON(orig_off != donor_off);
-
-	/* original and donor extents have to cover the same block offset */
-	if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
-	    le32_to_cpu(tmp_oext->ee_block) +
-			ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
-		return -ENODATA;
-
-	if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
-	    le32_to_cpu(tmp_dext->ee_block) +
-			ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
-		return -ENODATA;
-
-	dext_old = *tmp_dext;
-	oext_old = *tmp_oext;
-
-	/* When tmp_dext is too large, pick up the target range. */
-	diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
-
-	ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
-	le32_add_cpu(&tmp_dext->ee_block, diff);
-	le16_add_cpu(&tmp_dext->ee_len, -diff);
-
-	if (max_count < ext4_ext_get_actual_len(tmp_dext))
-		tmp_dext->ee_len = cpu_to_le16(max_count);
-
-	orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
-	ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
-
-	/* Adjust extent length if donor extent is larger than orig */
-	if (ext4_ext_get_actual_len(tmp_dext) >
-	    ext4_ext_get_actual_len(tmp_oext) - orig_diff)
-		tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
-						orig_diff);
-
-	tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
-
-	copy_extent_status(&oext_old, tmp_dext);
-	copy_extent_status(&dext_old, tmp_oext);
-
-	return 0;
-}
-
-/**
  * mext_check_coverage - Check that all extents in range has the same type
  *
  * @inode:		inode in question
@@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
 	}
 	ret = 1;
 out:
-	if (path) {
-		ext4_ext_drop_refs(path);
-		kfree(path);
-	}
+	ext4_ext_drop_refs(path);
+	kfree(path);
 	return ret;
 }
 
 /**
- * mext_replace_branches - Replace original extents with new extents
- *
- * @handle:		journal handle
- * @orig_inode:		original inode
- * @donor_inode:	donor inode
- * @from:		block offset of orig_inode
- * @count:		block count to be replaced
- * @err:		pointer to save return value
- *
- * Replace original inode extents and donor inode extents page by page.
- * We implement this replacement in the following three steps:
- * 1. Save the block information of original and donor inodes into
- *    dummy extents.
- * 2. Change the block information of original inode to point at the
- *    donor inode blocks.
- * 3. Change the block information of donor inode to point at the saved
- *    original inode blocks in the dummy extents.
- *
- * Return replaced block count.
- */
-static int
-mext_replace_branches(handle_t *handle, struct inode *orig_inode,
-			   struct inode *donor_inode, ext4_lblk_t from,
-			   ext4_lblk_t count, int *err)
-{
-	struct ext4_ext_path *orig_path = NULL;
-	struct ext4_ext_path *donor_path = NULL;
-	struct ext4_extent *oext, *dext;
-	struct ext4_extent tmp_dext, tmp_oext;
-	ext4_lblk_t orig_off = from, donor_off = from;
-	int depth;
-	int replaced_count = 0;
-	int dext_alen;
-
-	*err = ext4_es_remove_extent(orig_inode, from, count);
-	if (*err)
-		goto out;
-
-	*err = ext4_es_remove_extent(donor_inode, from, count);
-	if (*err)
-		goto out;
-
-	/* Get the original extent for the block "orig_off" */
-	*err = get_ext_path(orig_inode, orig_off, &orig_path);
-	if (*err)
-		goto out;
-
-	/* Get the donor extent for the head */
-	*err = get_ext_path(donor_inode, donor_off, &donor_path);
-	if (*err)
-		goto out;
-	depth = ext_depth(orig_inode);
-	oext = orig_path[depth].p_ext;
-	tmp_oext = *oext;
-
-	depth = ext_depth(donor_inode);
-	dext = donor_path[depth].p_ext;
-	if (unlikely(!dext))
-		goto missing_donor_extent;
-	tmp_dext = *dext;
-
-	*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
-				      donor_off, count);
-	if (*err)
-		goto out;
-
-	/* Loop for the donor extents */
-	while (1) {
-		/* The extent for donor must be found. */
-		if (unlikely(!dext)) {
-		missing_donor_extent:
-			EXT4_ERROR_INODE(donor_inode,
-				   "The extent for donor must be found");
-			*err = -EIO;
-			goto out;
-		} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
-			EXT4_ERROR_INODE(donor_inode,
-				"Donor offset(%u) and the first block of donor "
-				"extent(%u) should be equal",
-				donor_off,
-				le32_to_cpu(tmp_dext.ee_block));
-			*err = -EIO;
-			goto out;
-		}
-
-		/* Set donor extent to orig extent */
-		*err = mext_leaf_block(handle, orig_inode,
-					   orig_path, &tmp_dext, &orig_off);
-		if (*err)
-			goto out;
-
-		/* Set orig extent to donor extent */
-		*err = mext_leaf_block(handle, donor_inode,
-					   donor_path, &tmp_oext, &donor_off);
-		if (*err)
-			goto out;
-
-		dext_alen = ext4_ext_get_actual_len(&tmp_dext);
-		replaced_count += dext_alen;
-		donor_off += dext_alen;
-		orig_off += dext_alen;
-
-		BUG_ON(replaced_count > count);
-		/* Already moved the expected blocks */
-		if (replaced_count >= count)
-			break;
-
-		if (orig_path)
-			ext4_ext_drop_refs(orig_path);
-		*err = get_ext_path(orig_inode, orig_off, &orig_path);
-		if (*err)
-			goto out;
-		depth = ext_depth(orig_inode);
-		oext = orig_path[depth].p_ext;
-		tmp_oext = *oext;
-
-		if (donor_path)
-			ext4_ext_drop_refs(donor_path);
-		*err = get_ext_path(donor_inode, donor_off, &donor_path);
-		if (*err)
-			goto out;
-		depth = ext_depth(donor_inode);
-		dext = donor_path[depth].p_ext;
-		tmp_dext = *dext;
-
-		*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
-					   donor_off, count - replaced_count);
-		if (*err)
-			goto out;
-	}
-
-out:
-	if (orig_path) {
-		ext4_ext_drop_refs(orig_path);
-		kfree(orig_path);
-	}
-	if (donor_path) {
-		ext4_ext_drop_refs(donor_path);
-		kfree(donor_path);
-	}
-
-	return replaced_count;
-}
-
-/**
  * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
  *
  * @inode1:	the inode structure
  * @inode2:	the inode structure
- * @index:	page index
+ * @index1:	page index
+ * @index2:	page index
  * @page:	result page vector
  *
  * Grab two locked pages for inode's by inode order
  */
 static int
 mext_page_double_lock(struct inode *inode1, struct inode *inode2,
-		      pgoff_t index, struct page *page[2])
+		      pgoff_t index1, pgoff_t index2, struct page *page[2])
 {
 	struct address_space *mapping[2];
 	unsigned fl = AOP_FLAG_NOFS;
@@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
 		mapping[0] = inode1->i_mapping;
 		mapping[1] = inode2->i_mapping;
 	} else {
+		pgoff_t tmp = index1;
+		index1 = index2;
+		index2 = tmp;
 		mapping[0] = inode2->i_mapping;
 		mapping[1] = inode1->i_mapping;
 	}
 
-	page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
+	page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
 	if (!page[0])
 		return -ENOMEM;
 
-	page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
+	page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
 	if (!page[1]) {
 		unlock_page(page[0]);
 		page_cache_release(page[0]);
@@ -893,25 +245,27 @@ out:
  * @o_filp:			file structure of original file
  * @donor_inode:		donor inode
  * @orig_page_offset:		page index on original file
+ * @donor_page_offset:		page index on donor file
  * @data_offset_in_page:	block index where data swapping starts
  * @block_len_in_page:		the number of blocks to be swapped
  * @unwritten:			orig extent is unwritten or not
  * @err:			pointer to save return value
  *
  * Save the data in original inode blocks and replace original inode extents
- * with donor inode extents by calling mext_replace_branches().
+ * with donor inode extents by calling ext4_swap_extents().
  * Finally, write out the saved data in new original inode blocks. Return
  * replaced block count.
  */
 static int
 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
-		  pgoff_t orig_page_offset, int data_offset_in_page,
-		  int block_len_in_page, int unwritten, int *err)
+		     pgoff_t orig_page_offset, pgoff_t donor_page_offset,
+		     int data_offset_in_page,
+		     int block_len_in_page, int unwritten, int *err)
 {
 	struct inode *orig_inode = file_inode(o_filp);
 	struct page *pagep[2] = {NULL, NULL};
 	handle_t *handle;
-	ext4_lblk_t orig_blk_offset;
+	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
 	unsigned int w_flags = 0;
 	unsigned int tmp_data_size, data_size, replaced_size;
@@ -939,6 +293,9 @@ again:
 	orig_blk_offset = orig_page_offset * blocks_per_page +
 		data_offset_in_page;
 
+	donor_blk_offset = donor_page_offset * blocks_per_page +
+		data_offset_in_page;
+
 	/* Calculate data_size */
 	if ((orig_blk_offset + block_len_in_page - 1) ==
 	    ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
@@ -959,7 +316,7 @@ again:
 	replaced_size = data_size;
 
 	*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
-				     pagep);
+				     donor_page_offset, pagep);
 	if (unlikely(*err < 0))
 		goto stop_journal;
 	/*
@@ -978,7 +335,7 @@ again:
 		if (*err)
 			goto drop_data_sem;
 
-		unwritten &= mext_check_coverage(donor_inode, orig_blk_offset,
+		unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
 						 block_len_in_page, 1, err);
 		if (*err)
 			goto drop_data_sem;
@@ -994,9 +351,10 @@ again:
 			*err = -EBUSY;
 			goto drop_data_sem;
 		}
-		replaced_count = mext_replace_branches(handle, orig_inode,
-						donor_inode, orig_blk_offset,
-						block_len_in_page, err);
+		replaced_count = ext4_swap_extents(handle, orig_inode,
+						   donor_inode, orig_blk_offset,
+						   donor_blk_offset,
+						   block_len_in_page, 1, err);
 	drop_data_sem:
 		ext4_double_up_write_data_sem(orig_inode, donor_inode);
 		goto unlock_pages;
@@ -1014,9 +372,9 @@ data_copy:
 		goto unlock_pages;
 	}
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
-	replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
-					       orig_blk_offset,
-					       block_len_in_page, err);
+	replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
+					       orig_blk_offset, donor_blk_offset,
+					   block_len_in_page, 1, err);
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
 	if (*err) {
 		if (replaced_count) {
@@ -1061,9 +419,9 @@ repair_branches:
 	 * Try to swap extents to it's original places
 	 */
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
-	replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
-					       orig_blk_offset,
-					       block_len_in_page, &err2);
+	replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
+					       orig_blk_offset, donor_blk_offset,
+					   block_len_in_page, 0, &err2);
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
 	if (replaced_count != block_len_in_page) {
 		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode,
 		     struct inode *donor_inode, __u64 orig_start,
 		     __u64 donor_start, __u64 *len)
 {
-	ext4_lblk_t orig_blocks, donor_blocks;
+	__u64 orig_eof, donor_eof;
 	unsigned int blkbits = orig_inode->i_blkbits;
 	unsigned int blocksize = 1 << blkbits;
 
+	orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
+	donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
+
+
 	if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
 		ext4_debug("ext4 move extent: suid or sgid is set"
 			   " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode,
 		ext4_debug("ext4 move extent: The argument files should "
 			"not be swapfile [ino:orig %lu, donor %lu]\n",
 			orig_inode->i_ino, donor_inode->i_ino);
-		return -EINVAL;
+		return -EBUSY;
 	}
 
 	/* Ext4 move extent supports only extent based file */
@@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode,
 	}
 
 	/* Start offset should be same */
-	if (orig_start != donor_start) {
+	if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
+	    (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
 		ext4_debug("ext4 move extent: orig and donor's start "
-			"offset are not same [ino:orig %lu, donor %lu]\n",
+			"offset are not alligned [ino:orig %lu, donor %lu]\n",
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
 
 	if ((orig_start >= EXT_MAX_BLOCKS) ||
+	    (donor_start >= EXT_MAX_BLOCKS) ||
 	    (*len > EXT_MAX_BLOCKS) ||
+	    (donor_start + *len >= EXT_MAX_BLOCKS) ||
 	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
 		ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
 			"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
-
-	if (orig_inode->i_size > donor_inode->i_size) {
-		donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits;
-		/* TODO: eliminate this artificial restriction */
-		if (orig_start >= donor_blocks) {
-			ext4_debug("ext4 move extent: orig start offset "
-			"[%llu] should be less than donor file blocks "
-			"[%u] [ino:orig %lu, donor %lu]\n",
-			orig_start, donor_blocks,
-			orig_inode->i_ino, donor_inode->i_ino);
-			return -EINVAL;
-		}
-
-		/* TODO: eliminate this artificial restriction */
-		if (orig_start + *len > donor_blocks) {
-			ext4_debug("ext4 move extent: End offset [%llu] should "
-				"be less than donor file blocks [%u]."
-				"So adjust length from %llu to %llu "
-				"[ino:orig %lu, donor %lu]\n",
-				orig_start + *len, donor_blocks,
-				*len, donor_blocks - orig_start,
-				orig_inode->i_ino, donor_inode->i_ino);
-			*len = donor_blocks - orig_start;
-		}
-	} else {
-		orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
-		if (orig_start >= orig_blocks) {
-			ext4_debug("ext4 move extent: start offset [%llu] "
-				"should be less than original file blocks "
-				"[%u] [ino:orig %lu, donor %lu]\n",
-				 orig_start, orig_blocks,
-				orig_inode->i_ino, donor_inode->i_ino);
-			return -EINVAL;
-		}
-
-		if (orig_start + *len > orig_blocks) {
-			ext4_debug("ext4 move extent: Adjust length "
-				"from %llu to %llu. Because it should be "
-				"less than original file blocks "
-				"[ino:orig %lu, donor %lu]\n",
-				*len, orig_blocks - orig_start,
-				orig_inode->i_ino, donor_inode->i_ino);
-			*len = orig_blocks - orig_start;
-		}
-	}
-
+	if (orig_eof < orig_start + *len - 1)
+		*len = orig_eof - orig_start;
+	if (donor_eof < donor_start + *len - 1)
+		*len = donor_eof - donor_start;
 	if (!*len) {
 		ext4_debug("ext4 move extent: len should not be 0 "
 			"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode,
  *
  * @o_filp:		file structure of the original file
  * @d_filp:		file structure of the donor file
- * @orig_start:		start offset in block for orig
- * @donor_start:	start offset in block for donor
+ * @orig_blk:		start offset in block for orig
+ * @donor_blk:		start offset in block for donor
  * @len:		the number of blocks to be moved
  * @moved_len:		moved block length
  *
  * This function returns 0 and moved block length is set in moved_len
  * if succeed, otherwise returns error value.
  *
- * Note: ext4_move_extents() proceeds the following order.
- * 1:ext4_move_extents() calculates the last block number of moving extent
- *   function by the start block number (orig_start) and the number of blocks
- *   to be moved (len) specified as arguments.
- *   If the {orig, donor}_start points a hole, the extent's start offset
- *   pointed by ext_cur (current extent), holecheck_path, orig_path are set
- *   after hole behind.
- * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
- *   or the ext_cur exceeds the block_end which is last logical block number.
- * 3:To get the length of continues area, call mext_next_extent()
- *   specified with the ext_cur (initial value is holecheck_path) re-cursive,
- *   until find un-continuous extent, the start logical block number exceeds
- *   the block_end or the extent points to the last extent.
- * 4:Exchange the original inode data with donor inode data
- *   from orig_page_offset to seq_end_page.
- *   The start indexes of data are specified as arguments.
- *   That of the original inode is orig_page_offset,
- *   and the donor inode is also orig_page_offset
- *   (To easily handle blocksize != pagesize case, the offset for the
- *   donor inode is block unit).
- * 5:Update holecheck_path and orig_path to points a next proceeding extent,
- *   then returns to step 2.
- * 6:Release holecheck_path, orig_path and set the len to moved_len
- *   which shows the number of moved blocks.
- *   The moved_len is useful for the command to calculate the file offset
- *   for starting next move extent ioctl.
- * 7:Return 0 on success, or a negative error value on failure.
  */
 int
-ext4_move_extents(struct file *o_filp, struct file *d_filp,
-		 __u64 orig_start, __u64 donor_start, __u64 len,
-		 __u64 *moved_len)
+ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
+		  __u64 donor_blk, __u64 len, __u64 *moved_len)
 {
 	struct inode *orig_inode = file_inode(o_filp);
 	struct inode *donor_inode = file_inode(d_filp);
-	struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;
-	struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
-	ext4_lblk_t block_start = orig_start;
-	ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
-	ext4_lblk_t rest_blocks;
-	pgoff_t orig_page_offset = 0, seq_end_page;
-	int ret, depth, last_extent = 0;
+	struct ext4_ext_path *path = NULL;
 	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
-	int data_offset_in_page;
-	int block_len_in_page;
-	int unwritten;
+	ext4_lblk_t o_end, o_start = orig_blk;
+	ext4_lblk_t d_start = donor_blk;
+	int ret;
 
 	if (orig_inode->i_sb != donor_inode->i_sb) {
 		ext4_debug("ext4 move extent: The argument files "
@@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 	/* Protect extent tree against block allocations via delalloc */
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
 	/* Check the filesystem environment whether move_extent can be done */
-	ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
-				    donor_start, &len);
+	ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
+				    donor_blk, &len);
 	if (ret)
 		goto out;
+	o_end = o_start + len;
 
-	file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
-	block_end = block_start + len - 1;
-	if (file_end < block_end)
-		len -= block_end - file_end;
+	while (o_start < o_end) {
+		struct ext4_extent *ex;
+		ext4_lblk_t cur_blk, next_blk;
+		pgoff_t orig_page_index, donor_page_index;
+		int offset_in_page;
+		int unwritten, cur_len;
 
-	ret = get_ext_path(orig_inode, block_start, &orig_path);
-	if (ret)
-		goto out;
-
-	/* Get path structure to check the hole */
-	ret = get_ext_path(orig_inode, block_start, &holecheck_path);
-	if (ret)
-		goto out;
-
-	depth = ext_depth(orig_inode);
-	ext_cur = holecheck_path[depth].p_ext;
-
-	/*
-	 * Get proper starting location of block replacement if block_start was
-	 * within the hole.
-	 */
-	if (le32_to_cpu(ext_cur->ee_block) +
-		ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
-		/*
-		 * The hole exists between extents or the tail of
-		 * original file.
-		 */
-		last_extent = mext_next_extent(orig_inode,
-					holecheck_path, &ext_cur);
-		if (last_extent < 0) {
-			ret = last_extent;
-			goto out;
-		}
-		last_extent = mext_next_extent(orig_inode, orig_path,
-							&ext_dummy);
-		if (last_extent < 0) {
-			ret = last_extent;
+		ret = get_ext_path(orig_inode, o_start, &path);
+		if (ret)
 			goto out;
-		}
-		seq_start = le32_to_cpu(ext_cur->ee_block);
-	} else if (le32_to_cpu(ext_cur->ee_block) > block_start)
-		/* The hole exists at the beginning of original file. */
-		seq_start = le32_to_cpu(ext_cur->ee_block);
-	else
-		seq_start = block_start;
-
-	/* No blocks within the specified range. */
-	if (le32_to_cpu(ext_cur->ee_block) > block_end) {
-		ext4_debug("ext4 move extent: The specified range of file "
-							"may be the hole\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* Adjust start blocks */
-	add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
-			 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
-		     max(le32_to_cpu(ext_cur->ee_block), block_start);
-
-	while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
-		seq_blocks += add_blocks;
-
-		/* Adjust tail blocks */
-		if (seq_start + seq_blocks - 1 > block_end)
-			seq_blocks = block_end - seq_start + 1;
-
-		ext_prev = ext_cur;
-		last_extent = mext_next_extent(orig_inode, holecheck_path,
-						&ext_cur);
-		if (last_extent < 0) {
-			ret = last_extent;
-			break;
-		}
-		add_blocks = ext4_ext_get_actual_len(ext_cur);
-
-		/*
-		 * Extend the length of contiguous block (seq_blocks)
-		 * if extents are contiguous.
-		 */
-		if (ext4_can_extents_be_merged(orig_inode,
-					       ext_prev, ext_cur) &&
-		    block_end >= le32_to_cpu(ext_cur->ee_block) &&
-		    !last_extent)
+		ex = path[path->p_depth].p_ext;
+		next_blk = ext4_ext_next_allocated_block(path);
+		cur_blk = le32_to_cpu(ex->ee_block);
+		cur_len = ext4_ext_get_actual_len(ex);
+		/* Check hole before the start pos */
+		if (cur_blk + cur_len - 1 < o_start) {
+			if (next_blk == EXT_MAX_BLOCKS) {
+				o_start = o_end;
+				ret = -ENODATA;
+				goto out;
+			}
+			d_start += next_blk - o_start;
+			o_start = next_blk;
 			continue;
-
-		/* Is original extent is unwritten */
-		unwritten = ext4_ext_is_unwritten(ext_prev);
-
-		data_offset_in_page = seq_start % blocks_per_page;
-
-		/*
-		 * Calculate data blocks count that should be swapped
-		 * at the first page.
-		 */
-		if (data_offset_in_page + seq_blocks > blocks_per_page) {
-			/* Swapped blocks are across pages */
-			block_len_in_page =
-					blocks_per_page - data_offset_in_page;
-		} else {
-			/* Swapped blocks are in a page */
-			block_len_in_page = seq_blocks;
+		/* Check hole after the start pos */
+		} else if (cur_blk > o_start) {
+			/* Skip hole */
+			d_start += cur_blk - o_start;
+			o_start = cur_blk;
+			/* Extent inside requested range ?*/
+			if (cur_blk >= o_end)
+				goto out;
+		} else { /* in_range(o_start, o_blk, o_len) */
+			cur_len += cur_blk - o_start;
 		}
-
-		orig_page_offset = seq_start >>
-				(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
-		seq_end_page = (seq_start + seq_blocks - 1) >>
-				(PAGE_CACHE_SHIFT - orig_inode->i_blkbits);
-		seq_start = le32_to_cpu(ext_cur->ee_block);
-		rest_blocks = seq_blocks;
-
+		unwritten = ext4_ext_is_unwritten(ex);
+		if (o_end - o_start < cur_len)
+			cur_len = o_end - o_start;
+
+		orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+					       orig_inode->i_blkbits);
+		donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+					       donor_inode->i_blkbits);
+		offset_in_page = o_start % blocks_per_page;
+		if (cur_len > blocks_per_page- offset_in_page)
+			cur_len = blocks_per_page - offset_in_page;
 		/*
 		 * Up semaphore to avoid following problems:
 		 * a. transaction deadlock among ext4_journal_start,
@@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 		 *    in move_extent_per_page
 		 */
 		ext4_double_up_write_data_sem(orig_inode, donor_inode);
-
-		while (orig_page_offset <= seq_end_page) {
-
-			/* Swap original branches with new branches */
-			block_len_in_page = move_extent_per_page(
-						o_filp, donor_inode,
-						orig_page_offset,
-						data_offset_in_page,
-						block_len_in_page,
-						unwritten, &ret);
-
-			/* Count how many blocks we have exchanged */
-			*moved_len += block_len_in_page;
-			if (ret < 0)
-				break;
-			if (*moved_len > len) {
-				EXT4_ERROR_INODE(orig_inode,
-					"We replaced blocks too much! "
-					"sum of replaced: %llu requested: %llu",
-					*moved_len, len);
-				ret = -EIO;
-				break;
-			}
-
-			orig_page_offset++;
-			data_offset_in_page = 0;
-			rest_blocks -= block_len_in_page;
-			if (rest_blocks > blocks_per_page)
-				block_len_in_page = blocks_per_page;
-			else
-				block_len_in_page = rest_blocks;
-		}
-
+		/* Swap original branches with new branches */
+		move_extent_per_page(o_filp, donor_inode,
+				     orig_page_index, donor_page_index,
+				     offset_in_page, cur_len,
+				     unwritten, &ret);
 		ext4_double_down_write_data_sem(orig_inode, donor_inode);
 		if (ret < 0)
 			break;
-
-		/* Decrease buffer counter */
-		if (holecheck_path)
-			ext4_ext_drop_refs(holecheck_path);
-		ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
-		if (ret)
-			break;
-		depth = holecheck_path->p_depth;
-
-		/* Decrease buffer counter */
-		if (orig_path)
-			ext4_ext_drop_refs(orig_path);
-		ret = get_ext_path(orig_inode, seq_start, &orig_path);
-		if (ret)
-			break;
-
-		ext_cur = holecheck_path[depth].p_ext;
-		add_blocks = ext4_ext_get_actual_len(ext_cur);
-		seq_blocks = 0;
-
+		o_start += cur_len;
+		d_start += cur_len;
 	}
+	*moved_len = o_start - orig_blk;
+	if (*moved_len > len)
+		*moved_len = len;
+
 out:
 	if (*moved_len) {
 		ext4_discard_preallocations(orig_inode);
 		ext4_discard_preallocations(donor_inode);
 	}
 
-	if (orig_path) {
-		ext4_ext_drop_refs(orig_path);
-		kfree(orig_path);
-	}
-	if (holecheck_path) {
-		ext4_ext_drop_refs(holecheck_path);
-		kfree(holecheck_path);
-	}
+	ext4_ext_drop_refs(path);
+	kfree(path);
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
 	ext4_inode_resume_unlocked_dio(orig_inode);
 	ext4_inode_resume_unlocked_dio(donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 603e4eb..4262118 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
 					ext4_lblk_t *block)
 {
 	struct buffer_head *bh;
-	int err = 0;
+	int err;
 
 	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
 		     ((inode->i_size >> 10) >=
@@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle,
 
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-	bh = ext4_bread(handle, inode, *block, 1, &err);
-	if (!bh)
-		return ERR_PTR(err);
+	bh = ext4_bread(handle, inode, *block, 1);
+	if (IS_ERR(bh))
+		return bh;
 	inode->i_size += inode->i_sb->s_blocksize;
 	EXT4_I(inode)->i_disksize = inode->i_size;
 	BUFFER_TRACE(bh, "get_write_access");
@@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 {
 	struct buffer_head *bh;
 	struct ext4_dir_entry *dirent;
-	int err = 0, is_dx_block = 0;
+	int is_dx_block = 0;
 
-	bh = ext4_bread(NULL, inode, block, 0, &err);
-	if (!bh) {
-		if (err == 0) {
-			ext4_error_inode(inode, __func__, line, block,
-					       "Directory hole found");
-			return ERR_PTR(-EIO);
-		}
+	bh = ext4_bread(NULL, inode, block, 0);
+	if (IS_ERR(bh)) {
 		__ext4_warning(inode->i_sb, __func__, line,
-			       "error reading directory block "
-			       "(ino %lu, block %lu)", inode->i_ino,
+			       "error %ld reading directory block "
+			       "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
 			       (unsigned long) block);
-		return ERR_PTR(err);
+
+		return bh;
+	}
+	if (!bh) {
+		ext4_error_inode(inode, __func__, line, block, "Directory hole found");
+		return ERR_PTR(-EIO);
 	}
 	dirent = (struct ext4_dir_entry *) bh->b_data;
 	/* Determine whether or not we have an index block */
@@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 		       "directory leaf block found instead of index block");
 		return ERR_PTR(-EIO);
 	}
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
+	if (!ext4_has_metadata_csum(inode->i_sb) ||
 	    buffer_verified(bh))
 		return bh;
 
@@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir);
 static struct dx_frame *dx_probe(const struct qstr *d_name,
 				 struct inode *dir,
 				 struct dx_hash_info *hinfo,
-				 struct dx_frame *frame,
-				 int *err);
+				 struct dx_frame *frame);
 static void dx_release(struct dx_frame *frames);
 static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
 		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
@@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		const struct qstr *d_name,
-		struct ext4_dir_entry_2 **res_dir,
-		int *err);
+		struct ext4_dir_entry_2 **res_dir);
 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			     struct inode *inode);
 
@@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
 {
 	struct ext4_dir_entry_tail *t;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return 1;
 
 	t = get_dirent_tail(inode, dirent);
@@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
 {
 	struct ext4_dir_entry_tail *t;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return;
 
 	t = get_dirent_tail(inode, dirent);
@@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
 	struct dx_tail *t;
 	int count_offset, limit, count;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return 1;
 
 	c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
 	struct dx_tail *t;
 	int count_offset, limit, count;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return;
 
 	c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
 	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
 		EXT4_DIR_REC_LEN(2) - infosize;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(dir->i_sb))
 		entry_space -= sizeof(struct dx_tail);
 	return entry_space / sizeof(struct dx_entry);
 }
@@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
 {
 	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(dir->i_sb))
 		entry_space -= sizeof(struct dx_tail);
 	return entry_space / sizeof(struct dx_entry);
 }
@@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 		struct stats stats;
 		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
+		bh = ext4_bread(NULL,dir, block, 0);
+		if (!bh || IS_ERR(bh))
+			continue;
 		stats = levels?
 		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
 		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
@@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
  */
 static struct dx_frame *
 dx_probe(const struct qstr *d_name, struct inode *dir,
-	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+	 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
 {
 	unsigned count, indirect;
 	struct dx_entry *at, *entries, *p, *q, *m;
 	struct dx_root *root;
-	struct buffer_head *bh;
 	struct dx_frame *frame = frame_in;
+	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
 	u32 hash;
 
-	frame->bh = NULL;
-	bh = ext4_read_dirblock(dir, 0, INDEX);
-	if (IS_ERR(bh)) {
-		*err = PTR_ERR(bh);
-		goto fail;
-	}
-	root = (struct dx_root *) bh->b_data;
+	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
+	if (IS_ERR(frame->bh))
+		return (struct dx_frame *) frame->bh;
+
+	root = (struct dx_root *) frame->bh->b_data;
 	if (root->info.hash_version != DX_HASH_TEA &&
 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
 	    root->info.hash_version != DX_HASH_LEGACY) {
 		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
 			     root->info.hash_version);
-		brelse(bh);
-		*err = ERR_BAD_DX_DIR;
 		goto fail;
 	}
 	hinfo->hash_version = root->info.hash_version;
@@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
 	if (root->info.unused_flags & 1) {
 		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
 			     root->info.unused_flags);
-		brelse(bh);
-		*err = ERR_BAD_DX_DIR;
 		goto fail;
 	}
 
 	if ((indirect = root->info.indirect_levels) > 1) {
 		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
 			     root->info.indirect_levels);
-		brelse(bh);
-		*err = ERR_BAD_DX_DIR;
 		goto fail;
 	}
 
@@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
 	if (dx_get_limit(entries) != dx_root_limit(dir,
 						   root->info.info_length)) {
 		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
-		brelse(bh);
-		*err = ERR_BAD_DX_DIR;
 		goto fail;
 	}
 
 	dxtrace(printk("Look up %x", hash));
-	while (1)
-	{
+	while (1) {
 		count = dx_get_count(entries);
 		if (!count || count > dx_get_limit(entries)) {
 			ext4_warning(dir->i_sb,
 				     "dx entry: no count or count > limit");
-			brelse(bh);
-			*err = ERR_BAD_DX_DIR;
-			goto fail2;
+			goto fail;
 		}
 
 		p = entries + 1;
 		q = entries + count - 1;
-		while (p <= q)
-		{
+		while (p <= q) {
 			m = p + (q - p)/2;
 			dxtrace(printk("."));
 			if (dx_get_hash(m) > hash)
@@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
 				p = m + 1;
 		}
 
-		if (0) // linear search cross check
-		{
+		if (0) { // linear search cross check
 			unsigned n = count - 1;
 			at = entries;
 			while (n--)
@@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
 
 		at = p - 1;
 		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
-		frame->bh = bh;
 		frame->entries = entries;
 		frame->at = at;
-		if (!indirect--) return frame;
-		bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
-		if (IS_ERR(bh)) {
-			*err = PTR_ERR(bh);
-			goto fail2;
+		if (!indirect--)
+			return frame;
+		frame++;
+		frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
+		if (IS_ERR(frame->bh)) {
+			ret_err = (struct dx_frame *) frame->bh;
+			frame->bh = NULL;
+			goto fail;
 		}
-		entries = ((struct dx_node *) bh->b_data)->entries;
+		entries = ((struct dx_node *) frame->bh->b_data)->entries;
 
 		if (dx_get_limit(entries) != dx_node_limit (dir)) {
 			ext4_warning(dir->i_sb,
 				     "dx entry: limit != node limit");
-			brelse(bh);
-			*err = ERR_BAD_DX_DIR;
-			goto fail2;
+			goto fail;
 		}
-		frame++;
-		frame->bh = NULL;
 	}
-fail2:
+fail:
 	while (frame >= frame_in) {
 		brelse(frame->bh);
 		frame--;
 	}
-fail:
-	if (*err == ERR_BAD_DX_DIR)
+	if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
 		ext4_warning(dir->i_sb,
 			     "Corrupt dir inode %lu, running e2fsck is "
 			     "recommended.", dir->i_ino);
-	return NULL;
+	return ret_err;
 }
 
 static void dx_release (struct dx_frame *frames)
@@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	}
 	hinfo.hash = start_hash;
 	hinfo.minor_hash = 0;
-	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
-	if (!frame)
-		return err;
+	frame = dx_probe(NULL, dir, &hinfo, frames);
+	if (IS_ERR(frame))
+		return PTR_ERR(frame);
 
 	/* Add '.' and '..' from the htree header */
 	if (!start_hash && !start_minor_hash) {
@@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 				   buffer */
 	int num = 0;
 	ext4_lblk_t  nblocks;
-	int i, err = 0;
-	int namelen;
+	int i, namelen;
 
 	*res_dir = NULL;
 	sb = dir->i_sb;
@@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 		goto restart;
 	}
 	if (is_dx(dir)) {
-		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
+		bh = ext4_dx_find_entry(dir, d_name, res_dir);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
 		 * old fashioned way.
 		 */
-		if (err == -ENOENT)
-			return NULL;
-		if (err && err != ERR_BAD_DX_DIR)
-			return ERR_PTR(err);
-		if (bh)
+		if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
 			return bh;
 		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
 			       "falling back\n"));
@@ -1298,10 +1268,10 @@ restart:
 					break;
 				}
 				num++;
-				bh = ext4_getblk(NULL, dir, b++, 0, &err);
-				if (unlikely(err)) {
+				bh = ext4_getblk(NULL, dir, b++, 0);
+				if (unlikely(IS_ERR(bh))) {
 					if (ra_max == 0)
-						return ERR_PTR(err);
+						return bh;
 					break;
 				}
 				bh_use[ra_max] = bh;
@@ -1366,7 +1336,7 @@ cleanup_and_exit:
 }
 
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
-		       struct ext4_dir_entry_2 **res_dir, int *err)
+		       struct ext4_dir_entry_2 **res_dir)
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_hash_info	hinfo;
@@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
 	ext4_lblk_t block;
 	int retval;
 
-	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
-		return NULL;
+	frame = dx_probe(d_name, dir, &hinfo, frames);
+	if (IS_ERR(frame))
+		return (struct buffer_head *) frame;
 	do {
 		block = dx_get_block(frame->at);
 		bh = ext4_read_dirblock(dir, block, DIRENT);
-		if (IS_ERR(bh)) {
-			*err = PTR_ERR(bh);
+		if (IS_ERR(bh))
 			goto errout;
-		}
+
 		retval = search_dirblock(bh, dir, d_name,
 					 block << EXT4_BLOCK_SIZE_BITS(sb),
 					 res_dir);
-		if (retval == 1) { 	/* Success! */
-			dx_release(frames);
-			return bh;
-		}
+		if (retval == 1)
+			goto success;
 		brelse(bh);
 		if (retval == -1) {
-			*err = ERR_BAD_DX_DIR;
+			bh = ERR_PTR(ERR_BAD_DX_DIR);
 			goto errout;
 		}
 
@@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
 					       frames, NULL);
 		if (retval < 0) {
 			ext4_warning(sb,
-			     "error reading index page in directory #%lu",
-			     dir->i_ino);
-			*err = retval;
+			     "error %d reading index page in directory #%lu",
+			     retval, dir->i_ino);
+			bh = ERR_PTR(retval);
 			goto errout;
 		}
 	} while (retval == 1);
 
-	*err = -ENOENT;
+	bh = NULL;
 errout:
 	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
-	dx_release (frames);
-	return NULL;
+success:
+	dx_release(frames);
+	return bh;
 }
 
 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 					 dentry);
 			return ERR_PTR(-EIO);
 		}
-		inode = ext4_iget(dir->i_sb, ino);
+		inode = ext4_iget_normal(dir->i_sb, ino);
 		if (inode == ERR_PTR(-ESTALE)) {
 			EXT4_ERROR_INODE(dir,
 					 "deleted inode referenced: %u",
@@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 		return ERR_PTR(-EIO);
 	}
 
-	return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+	return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
 }
 
 /*
@@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
  */
 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 			struct buffer_head **bh,struct dx_frame *frame,
-			struct dx_hash_info *hinfo, int *error)
+			struct dx_hash_info *hinfo)
 {
 	unsigned blocksize = dir->i_sb->s_blocksize;
 	unsigned count, continued;
@@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	int	csum_size = 0;
 	int	err = 0, i;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(dir->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	bh2 = ext4_append(handle, dir, &newblock);
 	if (IS_ERR(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
-		*error = PTR_ERR(bh2);
-		return NULL;
+		return (struct ext4_dir_entry_2 *) bh2;
 	}
 
 	BUFFER_TRACE(*bh, "get_write_access");
@@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
 	/* Which block gets the new entry? */
-	if (hinfo->hash >= hash2)
-	{
+	if (hinfo->hash >= hash2) {
 		swap(*bh, bh2);
 		de = de2;
 	}
@@ -1638,8 +1604,7 @@ journal_error:
 	brelse(bh2);
 	*bh = NULL;
 	ext4_std_error(dir->i_sb, err);
-	*error = err;
-	return NULL;
+	return ERR_PTR(err);
 }
 
 int ext4_find_dest_de(struct inode *dir, struct inode *inode,
@@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	int		csum_size = 0;
 	int		err;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	if (!de) {
@@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 	struct fake_dirent *fde;
 	int		csum_size = 0;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	blocksize =  dir->i_sb->s_blocksize;
@@ -1853,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 		hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 	ext4fs_dirhash(name, namelen, &hinfo);
+	memset(frames, 0, sizeof(frames));
 	frame = frames;
 	frame->entries = entries;
 	frame->at = entries;
 	frame->bh = bh;
 	bh = bh2;
 
-	ext4_handle_dirty_dx_node(handle, dir, frame->bh);
-	ext4_handle_dirty_dirent_node(handle, dir, bh);
+	retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
+	if (retval)
+		goto out_frames;	
+	retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
+	if (retval)
+		goto out_frames;	
 
-	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
-	if (!de) {
-		/*
-		 * Even if the block split failed, we have to properly write
-		 * out all the changes we did so far. Otherwise we can end up
-		 * with corrupted filesystem.
-		 */
-		ext4_mark_inode_dirty(handle, dir);
-		dx_release(frames);
-		return retval;
+	de = do_split(handle,dir, &bh, frame, &hinfo);
+	if (IS_ERR(de)) {
+		retval = PTR_ERR(de);
+		goto out_frames;
 	}
 	dx_release(frames);
 
 	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
 	brelse(bh);
 	return retval;
+out_frames:
+	/*
+	 * Even if the block split failed, we have to properly write
+	 * out all the changes we did so far. Otherwise we can end up
+	 * with corrupted filesystem.
+	 */
+	ext4_mark_inode_dirty(handle, dir);
+	dx_release(frames);
+	return retval;
 }
 
 /*
@@ -1904,8 +1875,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 	ext4_lblk_t block, blocks;
 	int	csum_size = 0;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	sb = dir->i_sb;
@@ -1982,9 +1952,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	struct ext4_dir_entry_2 *de;
 	int err;
 
-	frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
-	if (!frame)
-		return err;
+	frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
+	if (IS_ERR(frame))
+		return PTR_ERR(frame);
 	entries = frame->entries;
 	at = frame->at;
 	bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
@@ -2095,9 +2065,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			goto cleanup;
 		}
 	}
-	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
-	if (!de)
+	de = do_split(handle, dir, &bh, frame, &hinfo);
+	if (IS_ERR(de)) {
+		err = PTR_ERR(de);
 		goto cleanup;
+	}
 	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
 	goto cleanup;
 
@@ -2167,8 +2139,7 @@ static int ext4_delete_entry(handle_t *handle,
 			return err;
 	}
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(dir->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	BUFFER_TRACE(bh, "get_write_access");
@@ -2387,8 +2358,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
 	int csum_size = 0;
 	int err;
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(dir->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2403,10 +2373,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
 	dir_block = ext4_append(handle, inode, &block);
 	if (IS_ERR(dir_block))
 		return PTR_ERR(dir_block);
-	BUFFER_TRACE(dir_block, "get_write_access");
-	err = ext4_journal_get_write_access(handle, dir_block);
-	if (err)
-		goto out;
 	de = (struct ext4_dir_entry_2 *)dir_block->b_data;
 	ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
 	set_nlink(inode, 2);
@@ -2573,7 +2539,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	int err = 0, rc;
 	bool dirty = false;
 
-	if (!sbi->s_journal)
+	if (!sbi->s_journal || is_bad_inode(inode))
 		return 0;
 
 	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
@@ -3190,6 +3156,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
 	}
 }
 
+static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
+					      int credits, handle_t **h)
+{
+	struct inode *wh;
+	handle_t *handle;
+	int retries = 0;
+
+	/*
+	 * for inode block, sb block, group summaries,
+	 * and inode bitmap
+	 */
+	credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
+		    EXT4_XATTR_TRANS_BLOCKS + 4);
+retry:
+	wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
+					 &ent->dentry->d_name, 0, NULL,
+					 EXT4_HT_DIR, credits);
+
+	handle = ext4_journal_current_handle();
+	if (IS_ERR(wh)) {
+		if (handle)
+			ext4_journal_stop(handle);
+		if (PTR_ERR(wh) == -ENOSPC &&
+		    ext4_should_retry_alloc(ent->dir->i_sb, &retries))
+			goto retry;
+	} else {
+		*h = handle;
+		init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
+		wh->i_op = &ext4_special_inode_operations;
+	}
+	return wh;
+}
+
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
@@ -3199,7 +3198,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
  * This comes from rename(const char *oldpath, const char *newpath)
  */
 static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
-		       struct inode *new_dir, struct dentry *new_dentry)
+		       struct inode *new_dir, struct dentry *new_dentry,
+		       unsigned int flags)
 {
 	handle_t *handle = NULL;
 	struct ext4_renament old = {
@@ -3214,6 +3214,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	};
 	int force_reread;
 	int retval;
+	struct inode *whiteout = NULL;
+	int credits;
+	u8 old_file_type;
 
 	dquot_initialize(old.dir);
 	dquot_initialize(new.dir);
@@ -3252,11 +3255,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
 		ext4_alloc_da_blocks(old.inode);
 
-	handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
-		(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
-		 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
+		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
+	if (!(flags & RENAME_WHITEOUT)) {
+		handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+	} else {
+		whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
+		if (IS_ERR(whiteout))
+			return PTR_ERR(whiteout);
+	}
 
 	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
 		ext4_handle_sync(handle);
@@ -3284,13 +3293,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	force_reread = (new.dir->i_ino == old.dir->i_ino &&
 			ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
+
+	old_file_type = old.de->file_type;
+	if (whiteout) {
+		/*
+		 * Do this before adding a new entry, so the old entry is sure
+		 * to be still pointing to the valid old entry.
+		 */
+		retval = ext4_setent(handle, &old, whiteout->i_ino,
+				     EXT4_FT_CHRDEV);
+		if (retval)
+			goto end_rename;
+		ext4_mark_inode_dirty(handle, whiteout);
+	}
 	if (!new.bh) {
 		retval = ext4_add_entry(handle, new.dentry, old.inode);
 		if (retval)
 			goto end_rename;
 	} else {
 		retval = ext4_setent(handle, &new,
-				     old.inode->i_ino, old.de->file_type);
+				     old.inode->i_ino, old_file_type);
 		if (retval)
 			goto end_rename;
 	}
@@ -3305,10 +3327,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 	old.inode->i_ctime = ext4_current_time(old.inode);
 	ext4_mark_inode_dirty(handle, old.inode);
 
-	/*
-	 * ok, that's it
-	 */
-	ext4_rename_delete(handle, &old, force_reread);
+	if (!whiteout) {
+		/*
+		 * ok, that's it
+		 */
+		ext4_rename_delete(handle, &old, force_reread);
+	}
 
 	if (new.inode) {
 		ext4_dec_count(handle, new.inode);
@@ -3344,6 +3368,12 @@ end_rename:
 	brelse(old.dir_bh);
 	brelse(old.bh);
 	brelse(new.bh);
+	if (whiteout) {
+		if (retval)
+			drop_nlink(whiteout);
+		unlock_new_inode(whiteout);
+		iput(whiteout);
+	}
 	if (handle)
 		ext4_journal_stop(handle);
 	return retval;
@@ -3476,18 +3506,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
 			struct inode *new_dir, struct dentry *new_dentry,
 			unsigned int flags)
 {
-	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
 	if (flags & RENAME_EXCHANGE) {
 		return ext4_cross_rename(old_dir, old_dentry,
 					 new_dir, new_dentry);
 	}
-	/*
-	 * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE"
-	 * is equivalent to regular rename.
-	 */
-	return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
+
+	return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
 /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e43b90..ca45883 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
 			break;
 
 		if (meta_bg == 0)
-			backup_block = group * bpg + blk_off;
+			backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
 		else
 			backup_block = (ext4_group_first_block_no(sb, group) +
 					ext4_bg_has_super(sb, group));
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
 {
 	struct buffer_head *bh;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return 0;
 
 	bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 05c1592..63e802b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
 static void ext4_clear_journal_err(struct super_block *sb,
 				   struct ext4_super_block *es);
 static int ext4_sync_fs(struct super_block *sb, int wait);
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
 static int ext4_remount(struct super_block *sb, int *flags, char *data);
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
@@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
 static int ext4_superblock_csum_verify(struct super_block *sb,
 				       struct ext4_super_block *es)
 {
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return 1;
 
 	return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
 {
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(sb))
 		return;
 
 	es->s_checksum = ext4_superblock_csum(sb, es);
@@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb)
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
-	percpu_counter_destroy(&sbi->s_extent_cache_cnt);
 	brelse(sbi->s_sbh);
 #ifdef CONFIG_QUOTA
-	for (i = 0; i < MAXQUOTAS; i++)
+	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		kfree(sbi->s_qf_names[i]);
 #endif
 
@@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ext4_es_init_tree(&ei->i_es_tree);
 	rwlock_init(&ei->i_es_lock);
 	INIT_LIST_HEAD(&ei->i_es_lru);
+	ei->i_es_all_nr = 0;
 	ei->i_es_lru_nr = 0;
 	ei->i_touch_when = 0;
 	ei->i_reserved_data_blocks = 0;
@@ -895,6 +892,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	spin_lock_init(&(ei->i_block_reservation_lock));
 #ifdef CONFIG_QUOTA
 	ei->i_reserved_quota = 0;
+	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
 #endif
 	ei->jinode = NULL;
 	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
@@ -1002,7 +1000,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
 	 * Currently we don't know the generation for parent directory, so
 	 * a generation of 0 means "accept any"
 	 */
-	inode = ext4_iget(sb, ino);
+	inode = ext4_iget_normal(sb, ino);
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 	if (generation && inode->i_generation != generation) {
@@ -1071,6 +1069,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 			     unsigned int flags);
 static int ext4_enable_quotas(struct super_block *sb);
 
+static struct dquot **ext4_get_dquots(struct inode *inode)
+{
+	return EXT4_I(inode)->i_dquot;
+}
+
 static const struct dquot_operations ext4_quota_operations = {
 	.get_reserved_space = ext4_get_reserved_space,
 	.write_dquot	= ext4_write_dquot,
@@ -1120,25 +1123,7 @@ static const struct super_operations ext4_sops = {
 #ifdef CONFIG_QUOTA
 	.quota_read	= ext4_quota_read,
 	.quota_write	= ext4_quota_write,
-#endif
-	.bdev_try_to_free_page = bdev_try_to_free_page,
-};
-
-static const struct super_operations ext4_nojournal_sops = {
-	.alloc_inode	= ext4_alloc_inode,
-	.destroy_inode	= ext4_destroy_inode,
-	.write_inode	= ext4_write_inode,
-	.dirty_inode	= ext4_dirty_inode,
-	.drop_inode	= ext4_drop_inode,
-	.evict_inode	= ext4_evict_inode,
-	.sync_fs	= ext4_sync_fs_nojournal,
-	.put_super	= ext4_put_super,
-	.statfs		= ext4_statfs,
-	.remount_fs	= ext4_remount,
-	.show_options	= ext4_show_options,
-#ifdef CONFIG_QUOTA
-	.quota_read	= ext4_quota_read,
-	.quota_write	= ext4_quota_write,
+	.get_dquots	= ext4_get_dquots,
 #endif
 	.bdev_try_to_free_page = bdev_try_to_free_page,
 };
@@ -1712,13 +1697,6 @@ static int parse_options(char *options, struct super_block *sb,
 					"not specified");
 			return 0;
 		}
-	} else {
-		if (sbi->s_jquota_fmt) {
-			ext4_msg(sb, KERN_ERR, "journaled quota format "
-					"specified with no journaling "
-					"enabled");
-			return 0;
-		}
 	}
 #endif
 	if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2016,8 +1994,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
 	__u16 crc = 0;
 	__le32 le_group = cpu_to_le32(block_group);
 
-	if ((sbi->s_es->s_feature_ro_compat &
-	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+	if (ext4_has_metadata_csum(sbi->s_sb)) {
 		/* Use new metadata_csum algorithm */
 		__le16 save_csum;
 		__u32 csum32;
@@ -2035,6 +2012,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
 	}
 
 	/* old crc16 code */
+	if (!(sbi->s_es->s_feature_ro_compat &
+	      cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+		return 0;
+
 	offset = offsetof(struct ext4_group_desc, bg_checksum);
 
 	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2191,7 +2172,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		/* don't clear list on RO mount w/ errors */
 		if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
-			jbd_debug(1, "Errors on filesystem, "
+			ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
 				  "clearing orphan list.\n");
 			es->s_last_orphan = 0;
 		}
@@ -2207,7 +2188,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 	/* Needed for iput() to work correctly and not trash data */
 	sb->s_flags |= MS_ACTIVE;
 	/* Turn on quotas so that they are updated correctly */
-	for (i = 0; i < MAXQUOTAS; i++) {
+	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 		if (EXT4_SB(sb)->s_qf_names[i]) {
 			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
@@ -2263,7 +2244,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 		       PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
-	for (i = 0; i < MAXQUOTAS; i++) {
+	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 		if (sb_dqopt(sb)->files[i])
 			dquot_quota_off(sb, i);
 	}
@@ -2548,6 +2529,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
 	return count;
 }
 
+static ssize_t es_ui_show(struct ext4_attr *a,
+			   struct ext4_sb_info *sbi, char *buf)
+{
+
+	unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
+			   a->u.offset);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
+}
+
 static ssize_t reserved_clusters_show(struct ext4_attr *a,
 				  struct ext4_sb_info *sbi, char *buf)
 {
@@ -2601,14 +2592,29 @@ static struct ext4_attr ext4_attr_##_name = {			\
 		.offset = offsetof(struct ext4_sb_info, _elname),\
 	},							\
 }
+
+#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)		\
+static struct ext4_attr ext4_attr_##_name = {				\
+	.attr = {.name = __stringify(_name), .mode = _mode },		\
+	.show	= _show,						\
+	.store	= _store,						\
+	.u = {								\
+		.offset = offsetof(struct ext4_super_block, _elname),	\
+	},								\
+}
+
 #define EXT4_ATTR(name, mode, show, store) \
 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 
 #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
+
+#define EXT4_RO_ATTR_ES_UI(name, elname)	\
+	EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
 #define EXT4_RW_ATTR_SBI_UI(name, elname)	\
 	EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
+
 #define ATTR_LIST(name) &ext4_attr_##name.attr
 #define EXT4_DEPRECATED_ATTR(_name, _val)	\
 static struct ext4_attr ext4_attr_##_name = {			\
@@ -2641,6 +2647,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
 EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
 EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
+EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
+EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
+EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
 
 static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(delayed_allocation_blocks),
@@ -2664,6 +2673,9 @@ static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(warning_ratelimit_burst),
 	ATTR_LIST(msg_ratelimit_interval_ms),
 	ATTR_LIST(msg_ratelimit_burst),
+	ATTR_LIST(errors_count),
+	ATTR_LIST(first_error_time),
+	ATTR_LIST(last_error_time),
 	NULL,
 };
 
@@ -2723,9 +2735,25 @@ static void ext4_feat_release(struct kobject *kobj)
 	complete(&ext4_feat->f_kobj_unregister);
 }
 
+static ssize_t ext4_feat_show(struct kobject *kobj,
+			      struct attribute *attr, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "supported\n");
+}
+
+/*
+ * We can not use ext4_attr_show/store because it relies on the kobject
+ * being embedded in the ext4_sb_info structure which is definitely not
+ * true in this case.
+ */
+static const struct sysfs_ops ext4_feat_ops = {
+	.show	= ext4_feat_show,
+	.store	= NULL,
+};
+
 static struct kobj_type ext4_feat_ktype = {
 	.default_attrs	= ext4_feat_attrs,
-	.sysfs_ops	= &ext4_attr_ops,
+	.sysfs_ops	= &ext4_feat_ops,
 	.release	= ext4_feat_release,
 };
 
@@ -3179,8 +3207,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 	int compat, incompat;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+	if (ext4_has_metadata_csum(sb)) {
 		/* journal checksum v3 */
 		compat = 0;
 		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3190,6 +3217,10 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 		incompat = 0;
 	}
 
+	jbd2_journal_clear_features(sbi->s_journal,
+			JBD2_FEATURE_COMPAT_CHECKSUM, 0,
+			JBD2_FEATURE_INCOMPAT_CSUM_V3 |
+			JBD2_FEATURE_INCOMPAT_CSUM_V2);
 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
 		ret = jbd2_journal_set_features(sbi->s_journal,
 				compat, 0,
@@ -3202,11 +3233,8 @@ static int set_journal_csum_feature_set(struct super_block *sb)
 		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
 	} else {
-		jbd2_journal_clear_features(sbi->s_journal,
-				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
-				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
-				JBD2_FEATURE_INCOMPAT_CSUM_V3 |
-				JBD2_FEATURE_INCOMPAT_CSUM_V2);
+		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
+				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
 	}
 
 	return ret;
@@ -3436,7 +3464,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		logical_sb_block = sb_block;
 	}
 
-	if (!(bh = sb_bread(sb, logical_sb_block))) {
+	if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
 		ext4_msg(sb, KERN_ERR, "unable to read superblock");
 		goto out_fail;
 	}
@@ -3487,8 +3515,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	/* Precompute checksum seed for all metadata */
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (ext4_has_metadata_csum(sb))
 		sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
 					       sizeof(es->s_uuid));
 
@@ -3506,6 +3533,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
 	set_opt(sb, POSIX_ACL);
 #endif
+	/* don't forget to enable journal_csum when metadata_csum is enabled. */
+	if (ext4_has_metadata_csum(sb))
+		set_opt(sb, JOURNAL_CHECKSUM);
+
 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
 		set_opt(sb, JOURNAL_DATA);
 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3519,8 +3550,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		set_opt(sb, ERRORS_CONT);
 	else
 		set_opt(sb, ERRORS_RO);
-	if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
-		set_opt(sb, BLOCK_VALIDITY);
+	/* block_validity enabled by default; disable with noblock_validity */
+	set_opt(sb, BLOCK_VALIDITY);
 	if (def_mount_opts & EXT4_DEFM_DISCARD)
 		set_opt(sb, DISCARD);
 
@@ -3646,7 +3677,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		brelse(bh);
 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
 		offset = do_div(logical_sb_block, blocksize);
-		bh = sb_bread(sb, logical_sb_block);
+		bh = sb_bread_unmovable(sb, logical_sb_block);
 		if (!bh) {
 			ext4_msg(sb, KERN_ERR,
 			       "Can't read superblock on 2nd try");
@@ -3868,7 +3899,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
 	for (i = 0; i < db_count; i++) {
 		block = descriptor_loc(sb, logical_sb_block, i);
-		sbi->s_group_desc[i] = sb_bread(sb, block);
+		sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
 		if (!sbi->s_group_desc[i]) {
 			ext4_msg(sb, KERN_ERR,
 			       "can't read group descriptor %d", i);
@@ -3890,13 +3921,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_err_report.data = (unsigned long) sb;
 
 	/* Register extent status tree shrinker */
-	ext4_es_register_shrinker(sbi);
-
-	err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
-	if (err) {
-		ext4_msg(sb, KERN_ERR, "insufficient memory");
+	if (ext4_es_register_shrinker(sbi))
 		goto failed_mount3;
-	}
 
 	sbi->s_stripe = ext4_get_stripe_size(sbi);
 	sbi->s_extent_max_zeroout_kb = 32;
@@ -3904,11 +3930,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	if (!test_opt(sb, NOLOAD) &&
-	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-		sb->s_op = &ext4_sops;
-	else
-		sb->s_op = &ext4_nojournal_sops;
+	sb->s_op = &ext4_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -3917,6 +3939,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		sb->s_qcop = &ext4_qctl_sysfile_operations;
 	else
 		sb->s_qcop = &ext4_qctl_operations;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
 
@@ -3932,7 +3955,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
 	    !(sb->s_flags & MS_RDONLY))
 		if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
-			goto failed_mount3;
+			goto failed_mount3a;
 
 	/*
 	 * The first inode we look at is the journal inode.  Don't try
@@ -3941,7 +3964,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	if (!test_opt(sb, NOLOAD) &&
 	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
 		if (ext4_load_journal(sb, es, journal_devnum))
-			goto failed_mount3;
+			goto failed_mount3a;
 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -4229,10 +4252,10 @@ failed_mount_wq:
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
-failed_mount3:
+failed_mount3a:
 	ext4_es_unregister_shrinker(sbi);
+failed_mount3:
 	del_timer_sync(&sbi->s_err_report);
-	percpu_counter_destroy(&sbi->s_extent_cache_cnt);
 	if (sbi->s_mmp_tsk)
 		kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -4247,7 +4270,7 @@ failed_mount:
 		remove_proc_entry(sb->s_id, ext4_proc_root);
 	}
 #ifdef CONFIG_QUOTA
-	for (i = 0; i < MAXQUOTAS; i++)
+	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		kfree(sbi->s_qf_names[i]);
 #endif
 	ext4_blkdev_remove(sbi);
@@ -4375,6 +4398,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 		goto out_bdev;
 	}
 
+	if ((le32_to_cpu(es->s_feature_ro_compat) &
+	     EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+	    es->s_checksum != ext4_superblock_csum(sb, es)) {
+		ext4_msg(sb, KERN_ERR, "external journal has "
+				       "corrupt superblock");
+		brelse(bh);
+		goto out_bdev;
+	}
+
 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
 		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
 		brelse(bh);
@@ -4677,15 +4709,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	 * being sent at the end of the function. But we can skip it if
 	 * transaction_commit will do it for us.
 	 */
-	target = jbd2_get_latest_transaction(sbi->s_journal);
-	if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
-	    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+	if (sbi->s_journal) {
+		target = jbd2_get_latest_transaction(sbi->s_journal);
+		if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
+		    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
+			needs_barrier = true;
+
+		if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
+			if (wait)
+				ret = jbd2_log_wait_commit(sbi->s_journal,
+							   target);
+		}
+	} else if (wait && test_opt(sb, BARRIER))
 		needs_barrier = true;
-
-	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
-		if (wait)
-			ret = jbd2_log_wait_commit(sbi->s_journal, target);
-	}
 	if (needs_barrier) {
 		int err;
 		err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
@@ -4696,19 +4732,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
 	return ret;
 }
 
-static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
-{
-	int ret = 0;
-
-	trace_ext4_sync_fs(sb, wait);
-	flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
-	dquot_writeback_dquots(sb, -1);
-	if (wait && test_opt(sb, BARRIER))
-		ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
-
-	return ret;
-}
-
 /*
  * LVM calls this function before a (read-only) snapshot is created.  This
  * gives us a chance to flush the journal completely and mark the fs clean.
@@ -4727,23 +4750,26 @@ static int ext4_freeze(struct super_block *sb)
 
 	journal = EXT4_SB(sb)->s_journal;
 
-	/* Now we set up the journal barrier. */
-	jbd2_journal_lock_updates(journal);
+	if (journal) {
+		/* Now we set up the journal barrier. */
+		jbd2_journal_lock_updates(journal);
 
-	/*
-	 * Don't clear the needs_recovery flag if we failed to flush
-	 * the journal.
-	 */
-	error = jbd2_journal_flush(journal);
-	if (error < 0)
-		goto out;
+		/*
+		 * Don't clear the needs_recovery flag if we failed to
+		 * flush the journal.
+		 */
+		error = jbd2_journal_flush(journal);
+		if (error < 0)
+			goto out;
+	}
 
 	/* Journal blocked and flushed, clear needs_recovery flag. */
 	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 	error = ext4_commit_super(sb, 1);
 out:
-	/* we rely on upper layer to stop further updates */
-	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+	if (journal)
+		/* we rely on upper layer to stop further updates */
+		jbd2_journal_unlock_updates(journal);
 	return error;
 }
 
@@ -4774,7 +4800,7 @@ struct ext4_mount_options {
 	u32 s_min_batch_time, s_max_batch_time;
 #ifdef CONFIG_QUOTA
 	int s_jquota_fmt;
-	char *s_qf_names[MAXQUOTAS];
+	char *s_qf_names[EXT4_MAXQUOTAS];
 #endif
 };
 
@@ -4804,7 +4830,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	old_opts.s_max_batch_time = sbi->s_max_batch_time;
 #ifdef CONFIG_QUOTA
 	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
-	for (i = 0; i < MAXQUOTAS; i++)
+	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		if (sbi->s_qf_names[i]) {
 			old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
 							 GFP_KERNEL);
@@ -4828,6 +4854,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
+	    test_opt(sb, JOURNAL_CHECKSUM)) {
+		ext4_msg(sb, KERN_ERR, "changing journal_checksum "
+			 "during remount not supported");
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
 		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
 			ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -4965,7 +4999,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 
 #ifdef CONFIG_QUOTA
 	/* Release old quota file names */
-	for (i = 0; i < MAXQUOTAS; i++)
+	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		kfree(old_opts.s_qf_names[i]);
 	if (enable_quota) {
 		if (sb_any_quota_suspended(sb))
@@ -4994,7 +5028,7 @@ restore_opts:
 	sbi->s_max_batch_time = old_opts.s_max_batch_time;
 #ifdef CONFIG_QUOTA
 	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
-	for (i = 0; i < MAXQUOTAS; i++) {
+	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
 		kfree(sbi->s_qf_names[i]);
 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
 	}
@@ -5197,7 +5231,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 {
 	int err;
 	struct inode *qf_inode;
-	unsigned long qf_inums[MAXQUOTAS] = {
+	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
 		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
 		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
 	};
@@ -5225,13 +5259,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
 static int ext4_enable_quotas(struct super_block *sb)
 {
 	int type, err = 0;
-	unsigned long qf_inums[MAXQUOTAS] = {
+	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
 		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
 		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
 	};
 
 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
-	for (type = 0; type < MAXQUOTAS; type++) {
+	for (type = 0; type < EXT4_MAXQUOTAS; type++) {
 		if (qf_inums[type]) {
 			err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
 						DQUOT_USAGE_ENABLED);
@@ -5309,7 +5343,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
-	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
 	size_t toread;
@@ -5324,9 +5357,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 	while (toread > 0) {
 		tocopy = sb->s_blocksize - offset < toread ?
 				sb->s_blocksize - offset : toread;
-		bh = ext4_bread(NULL, inode, blk, 0, &err);
-		if (err)
-			return err;
+		bh = ext4_bread(NULL, inode, blk, 0);
+		if (IS_ERR(bh))
+			return PTR_ERR(bh);
 		if (!bh)	/* A hole? */
 			memset(data, 0, tocopy);
 		else
@@ -5347,8 +5380,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
-	int err = 0;
-	int offset = off & (sb->s_blocksize - 1);
+	int err, offset = off & (sb->s_blocksize - 1);
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
 
@@ -5369,14 +5401,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 		return -EIO;
 	}
 
-	bh = ext4_bread(handle, inode, blk, 1, &err);
+	bh = ext4_bread(handle, inode, blk, 1);
+	if (IS_ERR(bh))
+		return PTR_ERR(bh);
 	if (!bh)
 		goto out;
 	BUFFER_TRACE(bh, "get write access");
 	err = ext4_journal_get_write_access(handle, bh);
 	if (err) {
 		brelse(bh);
-		goto out;
+		return err;
 	}
 	lock_buffer(bh);
 	memcpy(bh->b_data+offset, data, len);
@@ -5385,8 +5419,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	err = ext4_handle_dirty_metadata(handle, NULL, bh);
 	brelse(bh);
 out:
-	if (err)
-		return err;
 	if (inode->i_size < off + len) {
 		i_size_write(inode, off + len);
 		EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e738733..1e09fc7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
 					sector_t block_nr,
 					struct ext4_xattr_header *hdr)
 {
-	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+	if (ext4_has_metadata_csum(inode->i_sb) &&
 	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
 		return 0;
 	return 1;
@@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode,
 				      sector_t block_nr,
 				      struct ext4_xattr_header *hdr)
 {
-	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
-		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+	if (!ext4_has_metadata_csum(inode->i_sb))
 		return;
 
 	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 }
 
 static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+		       void *value_start)
 {
-	while (!IS_LAST_ENTRY(entry)) {
-		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+	struct ext4_xattr_entry *e = entry;
+
+	while (!IS_LAST_ENTRY(e)) {
+		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
 		if ((void *)next >= end)
 			return -EIO;
-		entry = next;
+		e = next;
 	}
+
+	while (!IS_LAST_ENTRY(entry)) {
+		if (entry->e_value_size != 0 &&
+		    (value_start + le16_to_cpu(entry->e_value_offs) <
+		     (void *)e + sizeof(__u32) ||
+		     value_start + le16_to_cpu(entry->e_value_offs) +
+		    le32_to_cpu(entry->e_value_size) > end))
+			return -EIO;
+		entry = EXT4_XATTR_NEXT(entry);
+	}
+
 	return 0;
 }
 
@@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 		return -EIO;
 	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
 		return -EIO;
-	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+				       bh->b_data);
 	if (!error)
 		set_buffer_verified(bh);
 	return error;
@@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 	header = IHDR(inode, raw_inode);
 	entry = IFIRST(header);
 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-	error = ext4_xattr_check_names(entry, end);
+	error = ext4_xattr_check_names(entry, end, entry);
 	if (error)
 		goto cleanup;
 	error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
-	error = ext4_xattr_check_names(IFIRST(header), end);
+	error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
 	if (error)
 		goto cleanup;
 	error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -899,14 +912,8 @@ inserted:
 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
 
-			/*
-			 * take i_data_sem because we will test
-			 * i_delalloc_reserved_flag in ext4_mb_new_blocks
-			 */
-			down_read(&EXT4_I(inode)->i_data_sem);
 			block = ext4_new_meta_blocks(handle, inode, goal, 0,
 						     NULL, &error);
-			up_read((&EXT4_I(inode)->i_data_sem));
 			if (error)
 				goto cleanup;
 
@@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
 	is->s.here = is->s.first;
 	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
 	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
-		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+		error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+					       IFIRST(header));
 		if (error)
 			return error;
 		/* Find the named attribute. */
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 83b9b5a..1ccb26b 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -162,7 +162,8 @@ fail:
 	return ERR_PTR(-EINVAL);
 }
 
-struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
+						struct page *dpage)
 {
 	int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT;
 	void *value = NULL;
@@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
 	if (type == ACL_TYPE_ACCESS)
 		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
 
-	retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
+	retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_F2FS_ZERO);
 		if (!value)
 			return ERR_PTR(-ENOMEM);
-		retval = f2fs_getxattr(inode, name_index, "", value, retval);
+		retval = f2fs_getxattr(inode, name_index, "", value,
+							retval, dpage);
 	}
 
 	if (retval > 0)
@@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 
+struct posix_acl *f2fs_get_acl(struct inode *inode, int type)
+{
+	return __f2fs_get_acl(inode, type, NULL);
+}
+
 static int __f2fs_set_acl(struct inode *inode, int type,
 			struct posix_acl *acl, struct page *ipage)
 {
@@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	if (acl) {
 		value = f2fs_acl_to_disk(acl, &size);
 		if (IS_ERR(value)) {
-			cond_clear_inode_flag(fi, FI_ACL_MODE);
+			clear_inode_flag(fi, FI_ACL_MODE);
 			return (int)PTR_ERR(value);
 		}
 	}
@@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
 	if (!error)
 		set_cached_acl(inode, type, acl);
 
-	cond_clear_inode_flag(fi, FI_ACL_MODE);
+	clear_inode_flag(fi, FI_ACL_MODE);
 	return error;
 }
 
@@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	return __f2fs_set_acl(inode, type, acl, NULL);
 }
 
-int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage)
+/*
+ * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create
+ * are copied from posix_acl.c
+ */
+static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl,
+							gfp_t flags)
+{
+	struct posix_acl *clone = NULL;
+
+	if (acl) {
+		int size = sizeof(struct posix_acl) + acl->a_count *
+				sizeof(struct posix_acl_entry);
+		clone = kmemdup(acl, size, flags);
+		if (clone)
+			atomic_set(&clone->a_refcount, 1);
+	}
+	return clone;
+}
+
+static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
+{
+	struct posix_acl_entry *pa, *pe;
+	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
+	umode_t mode = *mode_p;
+	int not_equiv = 0;
+
+	/* assert(atomic_read(acl->a_refcount) == 1); */
+
+	FOREACH_ACL_ENTRY(pa, acl, pe) {
+		switch(pa->e_tag) {
+		case ACL_USER_OBJ:
+			pa->e_perm &= (mode >> 6) | ~S_IRWXO;
+			mode &= (pa->e_perm << 6) | ~S_IRWXU;
+			break;
+
+		case ACL_USER:
+		case ACL_GROUP:
+			not_equiv = 1;
+			break;
+
+		case ACL_GROUP_OBJ:
+			group_obj = pa;
+			break;
+
+		case ACL_OTHER:
+			pa->e_perm &= mode | ~S_IRWXO;
+			mode &= pa->e_perm | ~S_IRWXO;
+			break;
+
+		case ACL_MASK:
+			mask_obj = pa;
+			not_equiv = 1;
+			break;
+
+		default:
+			return -EIO;
+		}
+	}
+
+	if (mask_obj) {
+		mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+		mode &= (mask_obj->e_perm << 3) | ~S_IRWXG;
+	} else {
+		if (!group_obj)
+			return -EIO;
+		group_obj->e_perm &= (mode >> 3) | ~S_IRWXO;
+		mode &= (group_obj->e_perm << 3) | ~S_IRWXG;
+	}
+
+	*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
+        return not_equiv;
+}
+
+static int f2fs_acl_create(struct inode *dir, umode_t *mode,
+		struct posix_acl **default_acl, struct posix_acl **acl,
+		struct page *dpage)
+{
+	struct posix_acl *p;
+	int ret;
+
+	if (S_ISLNK(*mode) || !IS_POSIXACL(dir))
+		goto no_acl;
+
+	p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage);
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EOPNOTSUPP))
+			goto apply_umask;
+		return PTR_ERR(p);
+	}
+
+	if (!p)
+		goto apply_umask;
+
+	*acl = f2fs_acl_clone(p, GFP_NOFS);
+	if (!*acl)
+		return -ENOMEM;
+
+	ret = f2fs_acl_create_masq(*acl, mode);
+	if (ret < 0) {
+		posix_acl_release(*acl);
+		return -ENOMEM;
+	}
+
+	if (ret == 0) {
+		posix_acl_release(*acl);
+		*acl = NULL;
+	}
+
+	if (!S_ISDIR(*mode)) {
+		posix_acl_release(p);
+		*default_acl = NULL;
+	} else {
+		*default_acl = p;
+	}
+	return 0;
+
+apply_umask:
+	*mode &= ~current_umask();
+no_acl:
+	*default_acl = NULL;
+	*acl = NULL;
+	return 0;
+}
+
+int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
+							struct page *dpage)
 {
-	struct posix_acl *default_acl, *acl;
+	struct posix_acl *default_acl = NULL, *acl = NULL;
 	int error = 0;
 
-	error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
+	error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage);
 	if (error)
 		return error;
 
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index e086465..997ca8e 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -38,14 +38,15 @@ struct f2fs_acl_header {
 
 extern struct posix_acl *f2fs_get_acl(struct inode *, int);
 extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int f2fs_init_acl(struct inode *, struct inode *, struct page *);
+extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
+							struct page *);
 #else
 #define f2fs_check_acl	NULL
 #define f2fs_get_acl	NULL
 #define f2fs_set_acl	NULL
 
 static inline int f2fs_init_acl(struct inode *inode, struct inode *dir,
-							struct page *page)
+				struct page *ipage, struct page *dpage)
 {
 	return 0;
 }
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd10a03..e6c271f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -72,36 +72,36 @@ out:
 	return page;
 }
 
-struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
-{
-	bool readahead = false;
-	struct page *page;
-
-	page = find_get_page(META_MAPPING(sbi), index);
-	if (!page || (page && !PageUptodate(page)))
-		readahead = true;
-	f2fs_put_page(page, 0);
-
-	if (readahead)
-		ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
-	return get_meta_page(sbi, index);
-}
-
-static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
+static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi,
+						block_t blkaddr, int type)
 {
 	switch (type) {
 	case META_NAT:
-		return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
+		break;
 	case META_SIT:
-		return SIT_BLK_CNT(sbi);
+		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
+			return false;
+		break;
 	case META_SSA:
+		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
+			blkaddr < SM_I(sbi)->ssa_blkaddr))
+			return false;
+		break;
 	case META_CP:
-		return 0;
+		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
+			blkaddr < __start_cp_addr(sbi)))
+			return false;
+		break;
 	case META_POR:
-		return MAX_BLKADDR(sbi);
+		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
+			blkaddr < MAIN_BLKADDR(sbi)))
+			return false;
+		break;
 	default:
 		BUG();
 	}
+
+	return true;
 }
 
 /*
@@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
 	block_t prev_blk_addr = 0;
 	struct page *page;
 	block_t blkno = start;
-	block_t max_blks = get_max_meta_blks(sbi, type);
 
 	struct f2fs_io_info fio = {
 		.type = META,
@@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
 	for (; nrpages-- > 0; blkno++) {
 		block_t blk_addr;
 
+		if (!is_valid_blkaddr(sbi, blkno, type))
+			goto out;
+
 		switch (type) {
 		case META_NAT:
-			/* get nat block addr */
-			if (unlikely(blkno >= max_blks))
+			if (unlikely(blkno >=
+					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
 				blkno = 0;
+			/* get nat block addr */
 			blk_addr = current_nat_addr(sbi,
 					blkno * NAT_ENTRY_PER_BLOCK);
 			break;
 		case META_SIT:
 			/* get sit block addr */
-			if (unlikely(blkno >= max_blks))
-				goto out;
 			blk_addr = current_sit_addr(sbi,
 					blkno * SIT_ENTRY_PER_BLOCK);
 			if (blkno != start && prev_blk_addr + 1 != blk_addr)
@@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
 		case META_SSA:
 		case META_CP:
 		case META_POR:
-			if (unlikely(blkno >= max_blks))
-				goto out;
-			if (unlikely(blkno < SEG0_BLKADDR(sbi)))
-				goto out;
 			blk_addr = blkno;
 			break;
 		default:
@@ -169,6 +166,20 @@ out:
 	return blkno - start;
 }
 
+void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+	struct page *page;
+	bool readahead = false;
+
+	page = find_get_page(META_MAPPING(sbi), index);
+	if (!page || (page && !PageUptodate(page)))
+		readahead = true;
+	f2fs_put_page(page, 0);
+
+	if (readahead)
+		ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
+}
+
 static int f2fs_write_meta_page(struct page *page,
 				struct writeback_control *wbc)
 {
@@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page,
 
 	if (unlikely(sbi->por_doing))
 		goto redirty_out;
-	if (wbc->for_reclaim)
+	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
 		goto redirty_out;
 	if (unlikely(f2fs_cp_error(sbi)))
 		goto redirty_out;
@@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page,
 	write_meta_page(sbi, page);
 	dec_page_count(sbi, F2FS_DIRTY_META);
 	unlock_page(page);
+
+	if (wbc->for_reclaim)
+		f2fs_submit_merged_bio(sbi, META, WRITE);
 	return 0;
 
 redirty_out:
@@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = {
 
 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
 {
+	struct inode_management *im = &sbi->im[type];
 	struct ino_entry *e;
 retry:
-	spin_lock(&sbi->ino_lock[type]);
+	if (radix_tree_preload(GFP_NOFS)) {
+		cond_resched();
+		goto retry;
+	}
+
+	spin_lock(&im->ino_lock);
 
-	e = radix_tree_lookup(&sbi->ino_root[type], ino);
+	e = radix_tree_lookup(&im->ino_root, ino);
 	if (!e) {
 		e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
 		if (!e) {
-			spin_unlock(&sbi->ino_lock[type]);
+			spin_unlock(&im->ino_lock);
+			radix_tree_preload_end();
 			goto retry;
 		}
-		if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
-			spin_unlock(&sbi->ino_lock[type]);
+		if (radix_tree_insert(&im->ino_root, ino, e)) {
+			spin_unlock(&im->ino_lock);
 			kmem_cache_free(ino_entry_slab, e);
+			radix_tree_preload_end();
 			goto retry;
 		}
 		memset(e, 0, sizeof(struct ino_entry));
 		e->ino = ino;
 
-		list_add_tail(&e->list, &sbi->ino_list[type]);
+		list_add_tail(&e->list, &im->ino_list);
+		if (type != ORPHAN_INO)
+			im->ino_num++;
 	}
-	spin_unlock(&sbi->ino_lock[type]);
+	spin_unlock(&im->ino_lock);
+	radix_tree_preload_end();
 }
 
 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
 {
+	struct inode_management *im = &sbi->im[type];
 	struct ino_entry *e;
 
-	spin_lock(&sbi->ino_lock[type]);
-	e = radix_tree_lookup(&sbi->ino_root[type], ino);
+	spin_lock(&im->ino_lock);
+	e = radix_tree_lookup(&im->ino_root, ino);
 	if (e) {
 		list_del(&e->list);
-		radix_tree_delete(&sbi->ino_root[type], ino);
-		if (type == ORPHAN_INO)
-			sbi->n_orphans--;
-		spin_unlock(&sbi->ino_lock[type]);
+		radix_tree_delete(&im->ino_root, ino);
+		im->ino_num--;
+		spin_unlock(&im->ino_lock);
 		kmem_cache_free(ino_entry_slab, e);
 		return;
 	}
-	spin_unlock(&sbi->ino_lock[type]);
+	spin_unlock(&im->ino_lock);
 }
 
 void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
@@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
 /* mode should be APPEND_INO or UPDATE_INO */
 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
 {
+	struct inode_management *im = &sbi->im[mode];
 	struct ino_entry *e;
-	spin_lock(&sbi->ino_lock[mode]);
-	e = radix_tree_lookup(&sbi->ino_root[mode], ino);
-	spin_unlock(&sbi->ino_lock[mode]);
+
+	spin_lock(&im->ino_lock);
+	e = radix_tree_lookup(&im->ino_root, ino);
+	spin_unlock(&im->ino_lock);
 	return e ? true : false;
 }
 
@@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi)
 	int i;
 
 	for (i = APPEND_INO; i <= UPDATE_INO; i++) {
-		spin_lock(&sbi->ino_lock[i]);
-		list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
+		struct inode_management *im = &sbi->im[i];
+
+		spin_lock(&im->ino_lock);
+		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
 			list_del(&e->list);
-			radix_tree_delete(&sbi->ino_root[i], e->ino);
+			radix_tree_delete(&im->ino_root, e->ino);
 			kmem_cache_free(ino_entry_slab, e);
+			im->ino_num--;
 		}
-		spin_unlock(&sbi->ino_lock[i]);
+		spin_unlock(&im->ino_lock);
 	}
 }
 
 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 {
+	struct inode_management *im = &sbi->im[ORPHAN_INO];
 	int err = 0;
 
-	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
-	if (unlikely(sbi->n_orphans >= sbi->max_orphans))
+	spin_lock(&im->ino_lock);
+	if (unlikely(im->ino_num >= sbi->max_orphans))
 		err = -ENOSPC;
 	else
-		sbi->n_orphans++;
-	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+		im->ino_num++;
+	spin_unlock(&im->ino_lock);
 
 	return err;
 }
 
 void release_orphan_inode(struct f2fs_sb_info *sbi)
 {
-	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
-	f2fs_bug_on(sbi, sbi->n_orphans == 0);
-	sbi->n_orphans--;
-	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+	struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+	spin_lock(&im->ino_lock);
+	f2fs_bug_on(sbi, im->ino_num == 0);
+	im->ino_num--;
+	spin_unlock(&im->ino_lock);
 }
 
 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
@@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 	struct f2fs_orphan_block *orphan_blk = NULL;
 	unsigned int nentries = 0;
 	unsigned short index;
-	unsigned short orphan_blocks =
-			(unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
+	unsigned short orphan_blocks;
 	struct page *page = NULL;
 	struct ino_entry *orphan = NULL;
+	struct inode_management *im = &sbi->im[ORPHAN_INO];
+
+	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
 
 	for (index = 0; index < orphan_blocks; index++)
 		grab_meta_page(sbi, start_blk + index);
 
 	index = 1;
-	spin_lock(&sbi->ino_lock[ORPHAN_INO]);
-	head = &sbi->ino_list[ORPHAN_INO];
+	spin_lock(&im->ino_lock);
+	head = &im->ino_list;
 
 	/* loop for each orphan inode entry and write them in Jornal block */
 	list_for_each_entry(orphan, head, list) {
@@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
 		f2fs_put_page(page, 1);
 	}
 
-	spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
+	spin_unlock(&im->ino_lock);
 }
 
 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
@@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
 	struct dir_inode_entry *entry;
 	struct inode *inode;
 retry:
+	if (unlikely(f2fs_cp_error(sbi)))
+		return;
+
 	spin_lock(&sbi->dir_inode_lock);
 
 	head = &sbi->dir_inode_list;
@@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
+	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
 	nid_t last_nid = nm_i->next_scan_nid;
 	block_t start_blk;
 	struct page *cp_page;
@@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	else
 		clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
 
-	orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
+	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
 	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
 			orphan_blocks);
 
@@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 				orphan_blocks);
 	}
 
-	if (sbi->n_orphans)
+	if (orphan_num)
 		set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 	else
 		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
@@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_put_page(cp_page, 1);
 	}
 
-	if (sbi->n_orphans) {
+	if (orphan_num) {
 		write_orphan_inodes(sbi, start_blk);
 		start_blk += orphan_blocks;
 	}
@@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	/* Here, we only have one bio having CP pack */
 	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
 
+	/* wait for previous submitted meta pages writeback */
+	wait_on_all_pages_writeback(sbi);
+
 	release_dirty_inode(sbi);
 
 	if (unlikely(f2fs_cp_error(sbi)))
@@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
 	int i;
 
 	for (i = 0; i < MAX_INO_ENTRY; i++) {
-		INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
-		spin_lock_init(&sbi->ino_lock[i]);
-		INIT_LIST_HEAD(&sbi->ino_list[i]);
+		struct inode_management *im = &sbi->im[i];
+
+		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
+		spin_lock_init(&im->ino_lock);
+		INIT_LIST_HEAD(&im->ino_list);
+		im->ino_num = 0;
 	}
 
 	/*
@@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi)
 	 * orphan entries with the limitation one reserved segment
 	 * for cp pack we can have max 1020*504 orphan entries
 	 */
-	sbi->n_orphans = 0;
 	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
 			NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8e58c4c..7ec697b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err)
 		dec_page_count(sbi, F2FS_WRITEBACK);
 	}
 
-	if (sbi->wait_io) {
-		complete(sbi->wait_io);
-		sbi->wait_io = NULL;
-	}
-
 	if (!get_pages(sbi, F2FS_WRITEBACK) &&
 			!list_empty(&sbi->cp_wait.task_list))
 		wake_up(&sbi->cp_wait);
@@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 static void __submit_merged_bio(struct f2fs_bio_info *io)
 {
 	struct f2fs_io_info *fio = &io->fio;
-	int rw;
 
 	if (!io->bio)
 		return;
 
-	rw = fio->rw;
-
-	if (is_read_io(rw)) {
-		trace_f2fs_submit_read_bio(io->sbi->sb, rw,
-						fio->type, io->bio);
-		submit_bio(rw, io->bio);
-	} else {
-		trace_f2fs_submit_write_bio(io->sbi->sb, rw,
-						fio->type, io->bio);
-		/*
-		 * META_FLUSH is only from the checkpoint procedure, and we
-		 * should wait this metadata bio for FS consistency.
-		 */
-		if (fio->type == META_FLUSH) {
-			DECLARE_COMPLETION_ONSTACK(wait);
-			io->sbi->wait_io = &wait;
-			submit_bio(rw, io->bio);
-			wait_for_completion(&wait);
-		} else {
-			submit_bio(rw, io->bio);
-		}
-	}
+	if (is_read_io(fio->rw))
+		trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw,
+							fio->type, io->bio);
+	else
+		trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw,
+							fio->type, io->bio);
 
+	submit_bio(fio->rw, io->bio);
 	io->bio = NULL;
 }
 
@@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
 	bool need_put = dn->inode_page ? false : true;
 	int err;
 
-	/* if inode_page exists, index should be zero */
-	f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
-
 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
 	if (err)
 		return err;
@@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-	int ret;
+	int ret = -EAGAIN;
 
 	trace_f2fs_readpage(page, DATA);
 
 	/* If the file has inline data, try to read it directly */
 	if (f2fs_has_inline_data(inode))
 		ret = f2fs_read_inline_data(inode, page);
-	else
+	if (ret == -EAGAIN)
 		ret = mpage_readpage(page, get_data_block);
 
 	return ret;
@@ -859,10 +835,11 @@ write:
 	else if (has_not_enough_free_secs(sbi, 0))
 		goto redirty_out;
 
+	err = -EAGAIN;
 	f2fs_lock_op(sbi);
-	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
-		err = f2fs_write_inline_data(inode, page, offset);
-	else
+	if (f2fs_has_inline_data(inode))
+		err = f2fs_write_inline_data(inode, page);
+	if (err == -EAGAIN)
 		err = do_write_data_page(page, &fio);
 	f2fs_unlock_op(sbi);
 done:
@@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 {
 	struct inode *inode = mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct page *page;
+	struct page *page, *ipage;
 	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
 	struct dnode_of_data dn;
 	int err = 0;
@@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
 	f2fs_balance_fs(sbi);
-repeat:
-	err = f2fs_convert_inline_data(inode, pos + len, NULL);
-	if (err)
-		goto fail;
 
+	/*
+	 * We should check this at this moment to avoid deadlock on inode page
+	 * and #0 page. The locking rule for inline_data conversion should be:
+	 * lock_page(page #0) -> lock_page(inode_page)
+	 */
+	if (index != 0) {
+		err = f2fs_convert_inline_inode(inode);
+		if (err)
+			goto fail;
+	}
+repeat:
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
-	/* to avoid latency during memory pressure */
-	unlock_page(page);
-
 	*pagep = page;
 
-	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
-		goto inline_data;
-
 	f2fs_lock_op(sbi);
-	set_new_dnode(&dn, inode, NULL, NULL, 0);
-	err = f2fs_reserve_block(&dn, index);
-	f2fs_unlock_op(sbi);
-	if (err) {
-		f2fs_put_page(page, 0);
-		goto fail;
-	}
-inline_data:
-	lock_page(page);
-	if (unlikely(page->mapping != mapping)) {
-		f2fs_put_page(page, 1);
-		goto repeat;
+
+	/* check inline_data */
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage)) {
+		err = PTR_ERR(ipage);
+		goto unlock_fail;
 	}
 
-	f2fs_wait_on_page_writeback(page, DATA);
+	set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+	if (f2fs_has_inline_data(inode)) {
+		if (pos + len <= MAX_INLINE_DATA) {
+			read_inline_data(page, ipage);
+			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+			sync_inode_page(&dn);
+			goto put_next;
+		}
+		err = f2fs_convert_inline_page(&dn, page);
+		if (err)
+			goto put_fail;
+	}
+	err = f2fs_reserve_block(&dn, index);
+	if (err)
+		goto put_fail;
+put_next:
+	f2fs_put_dnode(&dn);
+	f2fs_unlock_op(sbi);
 
 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
 		return 0;
 
+	f2fs_wait_on_page_writeback(page, DATA);
+
 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
 		unsigned end = start + len;
@@ -1010,18 +1002,10 @@ inline_data:
 	if (dn.data_blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
 	} else {
-		if (f2fs_has_inline_data(inode)) {
-			err = f2fs_read_inline_data(inode, page);
-			if (err) {
-				page_cache_release(page);
-				goto fail;
-			}
-		} else {
-			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
-							READ_SYNC);
-			if (err)
-				goto fail;
-		}
+		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
+					   READ_SYNC);
+		if (err)
+			goto fail;
 
 		lock_page(page);
 		if (unlikely(!PageUptodate(page))) {
@@ -1038,6 +1022,12 @@ out:
 	SetPageUptodate(page);
 	clear_cold_data(page);
 	return 0;
+
+put_fail:
+	f2fs_put_dnode(&dn);
+unlock_fail:
+	f2fs_unlock_op(sbi);
+	f2fs_put_page(page, 1);
 fail:
 	f2fs_write_failed(mapping, pos + len);
 	return err;
@@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file,
 
 	trace_f2fs_write_end(inode, pos, len, copied);
 
-	if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
-		register_inmem_page(inode, page);
-	else
-		set_page_dirty(page);
+	set_page_dirty(page);
 
 	if (pos + copied > i_size_read(inode)) {
 		i_size_write(inode, pos + copied);
@@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	int err;
 
-	/* Let buffer I/O handle the inline data case. */
-	if (f2fs_has_inline_data(inode))
-		return 0;
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
 
 	if (check_direct_IO(inode, rw, iter, offset))
 		return 0;
@@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
 	if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
 		return;
 
+	if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+		invalidate_inmem_page(inode, page);
+
 	if (PageDirty(page))
 		inode_dec_dirty_pages(inode);
 	ClearPagePrivate(page);
@@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page)
 	trace_f2fs_set_page_dirty(page, DATA);
 
 	SetPageUptodate(page);
+
+	if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) {
+		register_inmem_page(inode, page);
+		return 1;
+	}
+
 	mark_inode_dirty(inode);
 
 	if (!PageDirty(page)) {
@@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 
-	if (f2fs_has_inline_data(inode))
-		return 0;
-
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		int err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
 	return generic_block_bmap(mapping, block, get_data_block);
 }
 
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0a91ab8..91e8f69 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
 	si->ndirty_dirs = sbi->n_dirty_dirs;
 	si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
+	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
 	si->rsvd_segs = reserved_segments(sbi);
 	si->overp_segs = overprovision_segments(sbi);
 	si->valid_count = valid_user_blocks(sbi);
 	si->valid_node_count = valid_node_count(sbi);
 	si->valid_inode_count = valid_inode_count(sbi);
-	si->inline_inode = sbi->inline_inode;
+	si->inline_inode = atomic_read(&sbi->inline_inode);
+	si->inline_dir = atomic_read(&sbi->inline_dir);
 	si->utilization = utilization(sbi);
 
 	si->free_segs = free_segments(sbi);
@@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_stat_info *si = F2FS_STAT(sbi);
 	unsigned npages;
+	int i;
 
 	if (si->base_mem)
 		goto get_cache;
@@ -167,8 +170,9 @@ get_cache:
 	si->cache_mem += npages << PAGE_CACHE_SHIFT;
 	npages = META_MAPPING(sbi)->nrpages;
 	si->cache_mem += npages << PAGE_CACHE_SHIFT;
-	si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry);
 	si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry);
+	for (i = 0; i <= UPDATE_INO; i++)
+		si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
 }
 
 static int stat_show(struct seq_file *s, void *v)
@@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->valid_count - si->valid_node_count);
 		seq_printf(s, "  - Inline_data Inode: %u\n",
 			   si->inline_inode);
+		seq_printf(s, "  - Inline_dentry Inode: %u\n",
+			   si->inline_dir);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
 			   si->hit_ext, si->total_ext);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
+		seq_printf(s, "  - inmem: %4d\n",
+			   si->inmem_pages);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
 			   si->ndirty_node, si->node_pages);
 		seq_printf(s, "  - dents: %4d in dirs:%4d\n",
@@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	si->sbi = sbi;
 	sbi->stat_info = si;
 
+	atomic_set(&sbi->inline_inode, 0);
+	atomic_set(&sbi->inline_dir, 0);
+
 	mutex_lock(&f2fs_stat_mutex);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
 	mutex_unlock(&f2fs_stat_mutex);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index b54f871..b1a7d57 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level)
 		return 4;
 }
 
-static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
+unsigned char f2fs_filetype_table[F2FS_FT_MAX] = {
 	[F2FS_FT_UNKNOWN]	= DT_UNKNOWN,
 	[F2FS_FT_REG_FILE]	= DT_REG,
 	[F2FS_FT_DIR]		= DT_DIR,
@@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
 	[S_IFLNK >> S_SHIFT]	= F2FS_FT_SYMLINK,
 };
 
-static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
+void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
 {
 	umode_t mode = inode->i_mode;
 	de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
@@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash,
 }
 
 static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
-			struct qstr *name, int *max_slots,
-			f2fs_hash_t namehash, struct page **res_page)
+				struct qstr *name, int *max_slots,
+				struct page **res_page)
+{
+	struct f2fs_dentry_block *dentry_blk;
+	struct f2fs_dir_entry *de;
+	struct f2fs_dentry_ptr d;
+
+	dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
+
+	make_dentry_ptr(&d, (void *)dentry_blk, 1);
+	de = find_target_dentry(name, max_slots, &d);
+
+	if (de)
+		*res_page = dentry_page;
+	else
+		kunmap(dentry_page);
+
+	/*
+	 * For the most part, it should be a bug when name_len is zero.
+	 * We stop here for figuring out where the bugs has occurred.
+	 */
+	f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0);
+	return de;
+}
+
+struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots,
+						struct f2fs_dentry_ptr *d)
 {
 	struct f2fs_dir_entry *de;
 	unsigned long bit_pos = 0;
-	struct f2fs_dentry_block *dentry_blk = kmap(dentry_page);
-	const void *dentry_bits = &dentry_blk->dentry_bitmap;
+	f2fs_hash_t namehash = f2fs_dentry_hash(name);
 	int max_len = 0;
 
-	while (bit_pos < NR_DENTRY_IN_BLOCK) {
-		if (!test_bit_le(bit_pos, dentry_bits)) {
+	if (max_slots)
+		*max_slots = 0;
+	while (bit_pos < d->max) {
+		if (!test_bit_le(bit_pos, d->bitmap)) {
 			if (bit_pos == 0)
 				max_len = 1;
-			else if (!test_bit_le(bit_pos - 1, dentry_bits))
+			else if (!test_bit_le(bit_pos - 1, d->bitmap))
 				max_len++;
 			bit_pos++;
 			continue;
 		}
-		de = &dentry_blk->dentry[bit_pos];
-		if (early_match_name(name->len, namehash, de)) {
-			if (!memcmp(dentry_blk->filename[bit_pos],
-							name->name,
-							name->len)) {
-				*res_page = dentry_page;
-				goto found;
-			}
-		}
-		if (max_len > *max_slots) {
+		de = &d->dentry[bit_pos];
+		if (early_match_name(name->len, namehash, de) &&
+			!memcmp(d->filename[bit_pos], name->name, name->len))
+			goto found;
+
+		if (max_slots && *max_slots >= 0 && max_len > *max_slots) {
 			*max_slots = max_len;
 			max_len = 0;
 		}
 
-		/*
-		 * For the most part, it should be a bug when name_len is zero.
-		 * We stop here for figuring out where the bugs has occurred.
-		 */
-		f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len);
+		/* remain bug on condition */
+		if (unlikely(!de->name_len))
+			d->max = -1;
 
 		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
 	}
 
 	de = NULL;
-	kunmap(dentry_page);
 found:
-	if (max_len > *max_slots)
+	if (max_slots && max_len > *max_slots)
 		*max_slots = max_len;
 	return de;
 }
@@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 	struct page *dentry_page;
 	struct f2fs_dir_entry *de = NULL;
 	bool room = false;
-	int max_slots = 0;
+	int max_slots;
 
 	f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
 
@@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			continue;
 		}
 
-		de = find_in_block(dentry_page, name, &max_slots,
-					namehash, res_page);
+		de = find_in_block(dentry_page, name, &max_slots, res_page);
 		if (de)
 			break;
 
@@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
 	unsigned int max_depth;
 	unsigned int level;
 
+	if (f2fs_has_inline_dentry(dir))
+		return find_in_inline_dir(dir, child, res_page);
+
 	if (npages == 0)
 		return NULL;
 
@@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
 	struct f2fs_dir_entry *de;
 	struct f2fs_dentry_block *dentry_blk;
 
+	if (f2fs_has_inline_dentry(dir))
+		return f2fs_parent_inline_dir(dir, p);
+
 	page = get_lock_data_page(dir, 0);
 	if (IS_ERR(page))
 		return NULL;
@@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
 	de = f2fs_find_entry(dir, qstr, &page);
 	if (de) {
 		res = le32_to_cpu(de->ino);
-		kunmap(page);
+		f2fs_dentry_kunmap(dir, page);
 		f2fs_put_page(page, 0);
 	}
 
@@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr)
 void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
 		struct page *page, struct inode *inode)
 {
+	enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
 	lock_page(page);
-	f2fs_wait_on_page_writeback(page, DATA);
+	f2fs_wait_on_page_writeback(page, type);
 	de->ino = cpu_to_le32(inode->i_ino);
 	set_de_type(de, inode);
-	kunmap(page);
+	if (!f2fs_has_inline_dentry(dir))
+		kunmap(page);
 	set_page_dirty(page);
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(dir);
@@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name)
 	return 0;
 }
 
-static int make_empty_dir(struct inode *inode,
-		struct inode *parent, struct page *page)
+void do_make_empty_dir(struct inode *inode, struct inode *parent,
+					struct f2fs_dentry_ptr *d)
 {
-	struct page *dentry_page;
-	struct f2fs_dentry_block *dentry_blk;
 	struct f2fs_dir_entry *de;
 
-	dentry_page = get_new_data_page(inode, page, 0, true);
-	if (IS_ERR(dentry_page))
-		return PTR_ERR(dentry_page);
-
-
-	dentry_blk = kmap_atomic(dentry_page);
-
-	de = &dentry_blk->dentry[0];
+	de = &d->dentry[0];
 	de->name_len = cpu_to_le16(1);
 	de->hash_code = 0;
 	de->ino = cpu_to_le32(inode->i_ino);
-	memcpy(dentry_blk->filename[0], ".", 1);
+	memcpy(d->filename[0], ".", 1);
 	set_de_type(de, inode);
 
-	de = &dentry_blk->dentry[1];
+	de = &d->dentry[1];
 	de->hash_code = 0;
 	de->name_len = cpu_to_le16(2);
 	de->ino = cpu_to_le32(parent->i_ino);
-	memcpy(dentry_blk->filename[1], "..", 2);
+	memcpy(d->filename[1], "..", 2);
 	set_de_type(de, inode);
 
-	test_and_set_bit_le(0, &dentry_blk->dentry_bitmap);
-	test_and_set_bit_le(1, &dentry_blk->dentry_bitmap);
+	test_and_set_bit_le(0, (void *)d->bitmap);
+	test_and_set_bit_le(1, (void *)d->bitmap);
+}
+
+static int make_empty_dir(struct inode *inode,
+		struct inode *parent, struct page *page)
+{
+	struct page *dentry_page;
+	struct f2fs_dentry_block *dentry_blk;
+	struct f2fs_dentry_ptr d;
+
+	if (f2fs_has_inline_dentry(inode))
+		return make_empty_inline_dir(inode, parent, page);
+
+	dentry_page = get_new_data_page(inode, page, 0, true);
+	if (IS_ERR(dentry_page))
+		return PTR_ERR(dentry_page);
+
+	dentry_blk = kmap_atomic(dentry_page);
+
+	make_dentry_ptr(&d, (void *)dentry_blk, 1);
+	do_make_empty_dir(inode, parent, &d);
+
 	kunmap_atomic(dentry_blk);
 
 	set_page_dirty(dentry_page);
@@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode,
 	return 0;
 }
 
-static struct page *init_inode_metadata(struct inode *inode,
-		struct inode *dir, const struct qstr *name)
+struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
+			const struct qstr *name, struct page *dpage)
 {
 	struct page *page;
 	int err;
@@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode,
 				goto error;
 		}
 
-		err = f2fs_init_acl(inode, dir, page);
+		err = f2fs_init_acl(inode, dir, page, dpage);
 		if (err)
 			goto put_error;
 
@@ -395,7 +433,7 @@ error:
 	return ERR_PTR(err);
 }
 
-static void update_parent_metadata(struct inode *dir, struct inode *inode,
+void update_parent_metadata(struct inode *dir, struct inode *inode,
 						unsigned int current_depth)
 {
 	if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) {
@@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode,
 		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
 }
 
-static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots)
+int room_for_filename(const void *bitmap, int slots, int max_slots)
 {
 	int bit_start = 0;
 	int zero_start, zero_end;
 next:
-	zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap,
-						NR_DENTRY_IN_BLOCK,
-						bit_start);
-	if (zero_start >= NR_DENTRY_IN_BLOCK)
-		return NR_DENTRY_IN_BLOCK;
+	zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start);
+	if (zero_start >= max_slots)
+		return max_slots;
 
-	zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap,
-						NR_DENTRY_IN_BLOCK,
-						zero_start);
+	zero_end = find_next_bit_le(bitmap, max_slots, zero_start);
 	if (zero_end - zero_start >= slots)
 		return zero_start;
 
 	bit_start = zero_end + 1;
 
-	if (zero_end + 1 >= NR_DENTRY_IN_BLOCK)
-		return NR_DENTRY_IN_BLOCK;
+	if (zero_end + 1 >= max_slots)
+		return max_slots;
 	goto next;
 }
 
@@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
 	int err = 0;
 	int i;
 
+	if (f2fs_has_inline_dentry(dir)) {
+		err = f2fs_add_inline_entry(dir, name, inode);
+		if (!err || err != -EAGAIN)
+			return err;
+		else
+			err = 0;
+	}
+
 	dentry_hash = f2fs_dentry_hash(name);
 	level = 0;
 	current_depth = F2FS_I(dir)->i_current_depth;
@@ -491,7 +533,8 @@ start:
 			return PTR_ERR(dentry_page);
 
 		dentry_blk = kmap(dentry_page);
-		bit_pos = room_for_filename(dentry_blk, slots);
+		bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+						slots, NR_DENTRY_IN_BLOCK);
 		if (bit_pos < NR_DENTRY_IN_BLOCK)
 			goto add_dentry;
 
@@ -506,7 +549,7 @@ add_dentry:
 	f2fs_wait_on_page_writeback(dentry_page, DATA);
 
 	down_write(&F2FS_I(inode)->i_sem);
-	page = init_inode_metadata(inode, dir, name);
+	page = init_inode_metadata(inode, dir, name, NULL);
 	if (IS_ERR(page)) {
 		err = PTR_ERR(page);
 		goto fail;
@@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
 	int err = 0;
 
 	down_write(&F2FS_I(inode)->i_sem);
-	page = init_inode_metadata(inode, dir, NULL);
+	page = init_inode_metadata(inode, dir, NULL, NULL);
 	if (IS_ERR(page)) {
 		err = PTR_ERR(page);
 		goto fail;
@@ -560,26 +603,57 @@ fail:
 	return err;
 }
 
+void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+
+	down_write(&F2FS_I(inode)->i_sem);
+
+	if (S_ISDIR(inode->i_mode)) {
+		drop_nlink(dir);
+		if (page)
+			update_inode(dir, page);
+		else
+			update_inode_page(dir);
+	}
+	inode->i_ctime = CURRENT_TIME;
+
+	drop_nlink(inode);
+	if (S_ISDIR(inode->i_mode)) {
+		drop_nlink(inode);
+		i_size_write(inode, 0);
+	}
+	up_write(&F2FS_I(inode)->i_sem);
+	update_inode_page(inode);
+
+	if (inode->i_nlink == 0)
+		add_orphan_inode(sbi, inode->i_ino);
+	else
+		release_orphan_inode(sbi);
+}
+
 /*
  * It only removes the dentry from the dentry page, corresponding name
  * entry in name page does not need to be touched during deletion.
  */
 void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
-						struct inode *inode)
+					struct inode *dir, struct inode *inode)
 {
 	struct	f2fs_dentry_block *dentry_blk;
 	unsigned int bit_pos;
-	struct inode *dir = page->mapping->host;
 	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
 	int i;
 
+	if (f2fs_has_inline_dentry(dir))
+		return f2fs_delete_inline_entry(dentry, page, dir, inode);
+
 	lock_page(page);
 	f2fs_wait_on_page_writeback(page, DATA);
 
 	dentry_blk = page_address(page);
 	bit_pos = dentry - dentry_blk->dentry;
 	for (i = 0; i < slots; i++)
-		test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+		clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
 
 	/* Let's check and deallocate this dentry page */
 	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
@@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 
-	if (inode) {
-		struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-
-		down_write(&F2FS_I(inode)->i_sem);
-
-		if (S_ISDIR(inode->i_mode)) {
-			drop_nlink(dir);
-			update_inode_page(dir);
-		}
-		inode->i_ctime = CURRENT_TIME;
-		drop_nlink(inode);
-		if (S_ISDIR(inode->i_mode)) {
-			drop_nlink(inode);
-			i_size_write(inode, 0);
-		}
-		up_write(&F2FS_I(inode)->i_sem);
-		update_inode_page(inode);
-
-		if (inode->i_nlink == 0)
-			add_orphan_inode(sbi, inode->i_ino);
-		else
-			release_orphan_inode(sbi);
-	}
+	if (inode)
+		f2fs_drop_nlink(dir, inode, NULL);
 
 	if (bit_pos == NR_DENTRY_IN_BLOCK) {
 		truncate_hole(dir, page->index, page->index + 1);
@@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir)
 	unsigned long bidx;
 	struct page *dentry_page;
 	unsigned int bit_pos;
-	struct	f2fs_dentry_block *dentry_blk;
+	struct f2fs_dentry_block *dentry_blk;
 	unsigned long nblock = dir_blocks(dir);
 
+	if (f2fs_has_inline_dentry(dir))
+		return f2fs_empty_inline_dir(dir);
+
 	for (bidx = 0; bidx < nblock; bidx++) {
 		dentry_page = get_lock_data_page(dir, bidx);
 		if (IS_ERR(dentry_page)) {
@@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir)
 				return false;
 		}
 
-
 		dentry_blk = kmap_atomic(dentry_page);
 		if (bidx == 0)
 			bit_pos = 2;
@@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir)
 	return true;
 }
 
+bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
+						unsigned int start_pos)
+{
+	unsigned char d_type = DT_UNKNOWN;
+	unsigned int bit_pos;
+	struct f2fs_dir_entry *de = NULL;
+
+	bit_pos = ((unsigned long)ctx->pos % d->max);
+
+	while (bit_pos < d->max) {
+		bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos);
+		if (bit_pos >= d->max)
+			break;
+
+		de = &d->dentry[bit_pos];
+		if (de->file_type < F2FS_FT_MAX)
+			d_type = f2fs_filetype_table[de->file_type];
+		else
+			d_type = DT_UNKNOWN;
+		if (!dir_emit(ctx, d->filename[bit_pos],
+					le16_to_cpu(de->name_len),
+					le32_to_cpu(de->ino), d_type))
+			return true;
+
+		bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
+		ctx->pos = start_pos + bit_pos;
+	}
+	return false;
+}
+
 static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct inode *inode = file_inode(file);
 	unsigned long npages = dir_blocks(inode);
-	unsigned int bit_pos = 0;
 	struct f2fs_dentry_block *dentry_blk = NULL;
-	struct f2fs_dir_entry *de = NULL;
 	struct page *dentry_page = NULL;
 	struct file_ra_state *ra = &file->f_ra;
 	unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
-	unsigned char d_type = DT_UNKNOWN;
+	struct f2fs_dentry_ptr d;
 
-	bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK);
+	if (f2fs_has_inline_dentry(inode))
+		return f2fs_read_inline_dir(file, ctx);
 
 	/* readahead for multi pages of dir */
 	if (npages - n > 1 && !ra_has_index(ra, n))
@@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 			continue;
 
 		dentry_blk = kmap(dentry_page);
-		while (bit_pos < NR_DENTRY_IN_BLOCK) {
-			bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
-							NR_DENTRY_IN_BLOCK,
-							bit_pos);
-			if (bit_pos >= NR_DENTRY_IN_BLOCK)
-				break;
-
-			de = &dentry_blk->dentry[bit_pos];
-			if (de->file_type < F2FS_FT_MAX)
-				d_type = f2fs_filetype_table[de->file_type];
-			else
-				d_type = DT_UNKNOWN;
-			if (!dir_emit(ctx,
-					dentry_blk->filename[bit_pos],
-					le16_to_cpu(de->name_len),
-					le32_to_cpu(de->ino), d_type))
-				goto stop;
 
-			bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
-			ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos;
-		}
-		bit_pos = 0;
+		make_dentry_ptr(&d, (void *)dentry_blk, 1);
+
+		if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK))
+			goto stop;
+
 		ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
 		kunmap(dentry_page);
 		f2fs_put_page(dentry_page, 1);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 8171e80..ec58bb2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -46,8 +46,10 @@
 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY	0x00000040
 #define F2FS_MOUNT_INLINE_XATTR		0x00000080
 #define F2FS_MOUNT_INLINE_DATA		0x00000100
-#define F2FS_MOUNT_FLUSH_MERGE		0x00000200
-#define F2FS_MOUNT_NOBARRIER		0x00000400
+#define F2FS_MOUNT_INLINE_DENTRY	0x00000200
+#define F2FS_MOUNT_FLUSH_MERGE		0x00000400
+#define F2FS_MOUNT_NOBARRIER		0x00000800
+#define F2FS_MOUNT_FASTBOOT		0x00001000
 
 #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
 /*
  * For INODE and NODE manager
  */
+/* for directory operations */
+struct f2fs_dentry_ptr {
+	const void *bitmap;
+	struct f2fs_dir_entry *dentry;
+	__u8 (*filename)[F2FS_SLOT_LEN];
+	int max;
+};
+
+static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d,
+					void *src, int type)
+{
+	if (type == 1) {
+		struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src;
+		d->max = NR_DENTRY_IN_BLOCK;
+		d->bitmap = &t->dentry_bitmap;
+		d->dentry = t->dentry;
+		d->filename = t->filename;
+	} else {
+		struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src;
+		d->max = NR_INLINE_DENTRY;
+		d->bitmap = &t->dentry_bitmap;
+		d->dentry = t->dentry;
+		d->filename = t->filename;
+	}
+}
+
 /*
  * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1
  * as its node offset to distinguish from index node blocks.
@@ -269,6 +297,7 @@ struct f2fs_inode_info {
 	struct extent_info ext;		/* in-memory extent cache entry */
 	struct dir_inode_entry *dirty_dir;	/* the pointer of dirty dir */
 
+	struct radix_tree_root inmem_root;	/* radix tree for inmem pages */
 	struct list_head inmem_pages;	/* inmemory pages managed by f2fs */
 	struct mutex inmem_lock;	/* lock for inmemory pages */
 };
@@ -303,7 +332,7 @@ struct f2fs_nm_info {
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
 	struct radix_tree_root nat_set_root;/* root of the nat set cache */
-	rwlock_t nat_tree_lock;		/* protect nat_tree_lock */
+	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
@@ -433,6 +462,7 @@ enum count_type {
 	F2FS_DIRTY_DENTS,
 	F2FS_DIRTY_NODES,
 	F2FS_DIRTY_META,
+	F2FS_INMEM_PAGES,
 	NR_COUNT_TYPE,
 };
 
@@ -470,6 +500,14 @@ struct f2fs_bio_info {
 	struct rw_semaphore io_rwsem;	/* blocking op for bio */
 };
 
+/* for inner inode cache management */
+struct inode_management {
+	struct radix_tree_root ino_root;	/* ino entry array */
+	spinlock_t ino_lock;			/* for ino entry lock */
+	struct list_head ino_list;		/* inode list head */
+	unsigned long ino_num;			/* number of entries */
+};
+
 struct f2fs_sb_info {
 	struct super_block *sb;			/* pointer to VFS super block */
 	struct proc_dir_entry *s_proc;		/* proc entry */
@@ -488,7 +526,6 @@ struct f2fs_sb_info {
 	/* for bio operations */
 	struct f2fs_bio_info read_io;			/* for read bios */
 	struct f2fs_bio_info write_io[NR_PAGE_TYPE];	/* for write bios */
-	struct completion *wait_io;		/* for completion bios */
 
 	/* for checkpoint */
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
@@ -500,13 +537,9 @@ struct f2fs_sb_info {
 	bool por_doing;				/* recovery is doing or not */
 	wait_queue_head_t cp_wait;
 
-	/* for inode management */
-	struct radix_tree_root ino_root[MAX_INO_ENTRY];	/* ino entry array */
-	spinlock_t ino_lock[MAX_INO_ENTRY];		/* for ino entry lock */
-	struct list_head ino_list[MAX_INO_ENTRY];	/* inode list head */
+	struct inode_management im[MAX_INO_ENTRY];      /* manage inode cache */
 
 	/* for orphan inode, use 0'th array */
-	unsigned int n_orphans;			/* # of orphan inodes */
 	unsigned int max_orphans;		/* max orphan inodes */
 
 	/* for directory inode management */
@@ -557,7 +590,8 @@ struct f2fs_sb_info {
 	unsigned int segment_count[2];		/* # of allocated segments */
 	unsigned int block_count[2];		/* # of allocated blocks */
 	int total_hit_ext, read_hit_ext;	/* extent cache hit ratio */
-	int inline_inode;			/* # of inline_data inodes */
+	atomic_t inline_inode;			/* # of inline_data inodes */
+	atomic_t inline_dir;			/* # of inline_dentry inodes */
 	int bg_gc;				/* background gc calls */
 	unsigned int n_dirty_dirs;		/* # of dir inodes */
 #endif
@@ -988,6 +1022,13 @@ retry:
 	return entry;
 }
 
+static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
+				unsigned long index, void *item)
+{
+	while (radix_tree_insert(root, index, item))
+		cond_resched();
+}
+
 #define RAW_IS_INODE(p)	((p)->footer.nid == (p)->footer.ino)
 
 static inline bool IS_INODE(struct page *page)
@@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr)
 	return mask & *addr;
 }
 
-static inline int f2fs_set_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr)
 {
 	int mask;
 	int ret;
@@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr)
 	return ret;
 }
 
-static inline int f2fs_clear_bit(unsigned int nr, char *addr)
+static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr)
 {
 	int mask;
 	int ret;
@@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr)
 	return ret;
 }
 
+static inline void f2fs_change_bit(unsigned int nr, char *addr)
+{
+	int mask;
+
+	addr += (nr >> 3);
+	mask = 1 << (7 - (nr & 0x07));
+	*addr ^= mask;
+}
+
 /* used for f2fs_inode_info->flags */
 enum {
 	FI_NEW_INODE,		/* indicate newly allocated inode */
@@ -1057,11 +1107,13 @@ enum {
 	FI_NO_EXTENT,		/* not to use the extent cache */
 	FI_INLINE_XATTR,	/* used for inline xattr */
 	FI_INLINE_DATA,		/* used for inline data*/
+	FI_INLINE_DENTRY,	/* used for inline dentry */
 	FI_APPEND_WRITE,	/* inode has appended data */
 	FI_UPDATE_WRITE,	/* inode has in-place-update data */
 	FI_NEED_IPU,		/* used for ipu per file */
 	FI_ATOMIC_FILE,		/* indicate atomic file */
 	FI_VOLATILE_FILE,	/* indicate volatile file */
+	FI_DATA_EXIST,		/* indicate data exists */
 };
 
 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode)
 	set_inode_flag(fi, FI_ACL_MODE);
 }
 
-static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag)
-{
-	if (is_inode_flag_set(fi, FI_ACL_MODE)) {
-		clear_inode_flag(fi, FI_ACL_MODE);
-		return 1;
-	}
-	return 0;
-}
-
 static inline void get_inline_info(struct f2fs_inode_info *fi,
 					struct f2fs_inode *ri)
 {
@@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
 		set_inode_flag(fi, FI_INLINE_XATTR);
 	if (ri->i_inline & F2FS_INLINE_DATA)
 		set_inode_flag(fi, FI_INLINE_DATA);
+	if (ri->i_inline & F2FS_INLINE_DENTRY)
+		set_inode_flag(fi, FI_INLINE_DENTRY);
+	if (ri->i_inline & F2FS_DATA_EXIST)
+		set_inode_flag(fi, FI_DATA_EXIST);
 }
 
 static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
 		ri->i_inline |= F2FS_INLINE_XATTR;
 	if (is_inode_flag_set(fi, FI_INLINE_DATA))
 		ri->i_inline |= F2FS_INLINE_DATA;
+	if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+		ri->i_inline |= F2FS_INLINE_DENTRY;
+	if (is_inode_flag_set(fi, FI_DATA_EXIST))
+		ri->i_inline |= F2FS_DATA_EXIST;
 }
 
 static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode)
 	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
 }
 
+static inline void f2fs_clear_inline_inode(struct inode *inode)
+{
+	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+	clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline int f2fs_exist_data(struct inode *inode)
+{
+	return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+}
+
 static inline bool f2fs_is_atomic_file(struct inode *inode)
 {
 	return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page)
 	return (void *)&(ri->i_addr[1]);
 }
 
+static inline int f2fs_has_inline_dentry(struct inode *inode)
+{
+	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+}
+
+static inline void *inline_dentry_addr(struct page *page)
+{
+	struct f2fs_inode *ri = F2FS_INODE(page);
+	return (void *)&(ri->i_addr[1]);
+}
+
+static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page)
+{
+	if (!f2fs_has_inline_dentry(dir))
+		kunmap(page);
+}
+
 static inline int f2fs_readonly(struct super_block *sb)
 {
 	return sb->s_flags & MS_RDONLY;
@@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child);
 /*
  * dir.c
  */
+extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
+void set_de_type(struct f2fs_dir_entry *, struct inode *);
+struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *,
+			struct f2fs_dentry_ptr *);
+bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
+			unsigned int);
+void do_make_empty_dir(struct inode *, struct inode *,
+			struct f2fs_dentry_ptr *);
+struct page *init_inode_metadata(struct inode *, struct inode *,
+			const struct qstr *, struct page *);
+void update_parent_metadata(struct inode *, struct inode *, unsigned int);
+int room_for_filename(const void *, int, int);
+void f2fs_drop_nlink(struct inode *, struct inode *, struct page *);
 struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *,
 							struct page **);
 struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **);
@@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *,
 				struct page *, struct inode *);
 int update_dent_inode(struct inode *, const struct qstr *);
 int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *);
-void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *);
+void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *,
+							struct inode *);
 int f2fs_do_tmpfile(struct inode *, struct inode *);
 int f2fs_make_empty(struct inode *, struct inode *);
 bool f2fs_empty_dir(struct inode *);
@@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void);
  * segment.c
  */
 void register_inmem_page(struct inode *, struct page *);
+void invalidate_inmem_page(struct inode *, struct page *);
 void commit_inmem_pages(struct inode *, bool);
 void f2fs_balance_fs(struct f2fs_sb_info *);
 void f2fs_balance_fs_bg(struct f2fs_sb_info *);
@@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void);
  */
 struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
 struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
-struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t);
 int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
+void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
 long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
 void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
 void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
@@ -1405,7 +1499,7 @@ struct f2fs_stat_info {
 	int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
 	int nats, sits, fnids;
 	int total_count, utilization;
-	int bg_gc, inline_inode;
+	int bg_gc, inline_inode, inline_dir, inmem_pages;
 	unsigned int valid_count, valid_node_count, valid_inode_count;
 	unsigned int bimodal, avg_vblocks;
 	int util_free, util_valid, util_invalid;
@@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 #define stat_inc_inline_inode(inode)					\
 	do {								\
 		if (f2fs_has_inline_data(inode))			\
-			((F2FS_I_SB(inode))->inline_inode++);		\
+			(atomic_inc(&F2FS_I_SB(inode)->inline_inode));	\
 	} while (0)
 #define stat_dec_inline_inode(inode)					\
 	do {								\
 		if (f2fs_has_inline_data(inode))			\
-			((F2FS_I_SB(inode))->inline_inode--);		\
+			(atomic_dec(&F2FS_I_SB(inode)->inline_inode));	\
+	} while (0)
+#define stat_inc_inline_dir(inode)					\
+	do {								\
+		if (f2fs_has_inline_dentry(inode))			\
+			(atomic_inc(&F2FS_I_SB(inode)->inline_dir));	\
+	} while (0)
+#define stat_dec_inline_dir(inode)					\
+	do {								\
+		if (f2fs_has_inline_dentry(inode))			\
+			(atomic_dec(&F2FS_I_SB(inode)->inline_dir));	\
 	} while (0)
-
 #define stat_inc_seg_type(sbi, curseg)					\
 		((sbi)->segment_count[(curseg)->alloc_type]++)
 #define stat_inc_block_count(sbi, curseg)				\
@@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void);
 #define stat_inc_read_hit(sb)
 #define stat_inc_inline_inode(inode)
 #define stat_dec_inline_inode(inode)
+#define stat_inc_inline_dir(inode)
+#define stat_dec_inline_dir(inode)
 #define stat_inc_seg_type(sbi, curseg)
 #define stat_inc_block_count(sbi, curseg)
 #define stat_inc_seg_count(si, type)
@@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations;
  * inline.c
  */
 bool f2fs_may_inline(struct inode *);
+void read_inline_data(struct page *, struct page *);
 int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
-int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
-void truncate_inline_data(struct inode *, u64);
+int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
+int f2fs_convert_inline_inode(struct inode *);
+int f2fs_write_inline_data(struct inode *, struct page *);
+void truncate_inline_data(struct page *, u64);
 bool recover_inline_data(struct inode *, struct page *);
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
+							struct page **);
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
+int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
+int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *);
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
+						struct inode *, struct inode *);
+bool f2fs_empty_inline_dir(struct inode *);
+int f2fs_read_inline_dir(struct file *, struct dir_context *);
 #endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8e68bb6..3c27e0e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 
 	sb_start_pagefault(inode->i_sb);
 
-	/* force to convert with normal data indices */
-	err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
-	if (err)
-		goto out;
+	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
 
 	/* block allocation */
 	f2fs_lock_op(sbi);
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = f2fs_reserve_block(&dn, page->index);
-	f2fs_unlock_op(sbi);
-	if (err)
+	if (err) {
+		f2fs_unlock_op(sbi);
 		goto out;
+	}
+	f2fs_put_dnode(&dn);
+	f2fs_unlock_op(sbi);
 
 	file_update_time(vma->vm_file);
 	lock_page(page);
@@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode)
 		need_cp = true;
 	else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
 		need_cp = true;
+	else if (test_opt(sbi, FASTBOOT))
+		need_cp = true;
+	else if (sbi->active_logs == 2)
+		need_cp = true;
 
 	return need_cp;
 }
 
+static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
+{
+	struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
+	bool ret = false;
+	/* But we need to avoid that there are some inode updates */
+	if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino))
+		ret = true;
+	f2fs_put_page(i, 0);
+	return ret;
+}
+
+static void try_to_fix_pino(struct inode *inode)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	nid_t pino;
+
+	down_write(&fi->i_sem);
+	fi->xattr_ver = 0;
+	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
+			get_parent_ino(inode, &pino)) {
+		fi->i_pino = pino;
+		file_got_pino(inode);
+		up_write(&fi->i_sem);
+
+		mark_inode_dirty_sync(inode);
+		f2fs_write_inode(inode, NULL);
+	} else {
+		up_write(&fi->i_sem);
+	}
+}
+
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
@@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 		return ret;
 	}
 
+	/* if the inode is dirty, let's recover all the time */
+	if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
+		update_inode_page(inode);
+		goto go_write;
+	}
+
 	/*
 	 * if there is no written data, don't waste time to write recovery info.
 	 */
 	if (!is_inode_flag_set(fi, FI_APPEND_WRITE) &&
 			!exist_written_data(sbi, ino, APPEND_INO)) {
-		struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
 
-		/* But we need to avoid that there are some inode updates */
-		if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) {
-			f2fs_put_page(i, 0);
+		/* it may call write_inode just prior to fsync */
+		if (need_inode_page_update(sbi, ino))
 			goto go_write;
-		}
-		f2fs_put_page(i, 0);
 
 		if (is_inode_flag_set(fi, FI_UPDATE_WRITE) ||
 				exist_written_data(sbi, ino, UPDATE_INO))
@@ -196,49 +233,36 @@ go_write:
 	up_read(&fi->i_sem);
 
 	if (need_cp) {
-		nid_t pino;
-
 		/* all the dirty node pages should be flushed for POR */
 		ret = f2fs_sync_fs(inode->i_sb, 1);
 
-		down_write(&fi->i_sem);
-		F2FS_I(inode)->xattr_ver = 0;
-		if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
-					get_parent_ino(inode, &pino)) {
-			F2FS_I(inode)->i_pino = pino;
-			file_got_pino(inode);
-			up_write(&fi->i_sem);
-			mark_inode_dirty_sync(inode);
-			ret = f2fs_write_inode(inode, NULL);
-			if (ret)
-				goto out;
-		} else {
-			up_write(&fi->i_sem);
-		}
-	} else {
+		/*
+		 * We've secured consistency through sync_fs. Following pino
+		 * will be used only for fsynced inodes after checkpoint.
+		 */
+		try_to_fix_pino(inode);
+		goto out;
+	}
 sync_nodes:
-		sync_node_pages(sbi, ino, &wbc);
-
-		if (need_inode_block_update(sbi, ino)) {
-			mark_inode_dirty_sync(inode);
-			ret = f2fs_write_inode(inode, NULL);
-			if (ret)
-				goto out;
-			goto sync_nodes;
-		}
+	sync_node_pages(sbi, ino, &wbc);
 
-		ret = wait_on_node_pages_writeback(sbi, ino);
-		if (ret)
-			goto out;
+	if (need_inode_block_update(sbi, ino)) {
+		mark_inode_dirty_sync(inode);
+		f2fs_write_inode(inode, NULL);
+		goto sync_nodes;
+	}
+
+	ret = wait_on_node_pages_writeback(sbi, ino);
+	if (ret)
+		goto out;
 
-		/* once recovery info is written, don't need to tack this */
-		remove_dirty_inode(sbi, ino, APPEND_INO);
-		clear_inode_flag(fi, FI_APPEND_WRITE);
+	/* once recovery info is written, don't need to tack this */
+	remove_dirty_inode(sbi, ino, APPEND_INO);
+	clear_inode_flag(fi, FI_APPEND_WRITE);
 flush_out:
-		remove_dirty_inode(sbi, ino, UPDATE_INO);
-		clear_inode_flag(fi, FI_UPDATE_WRITE);
-		ret = f2fs_issue_flush(F2FS_I_SB(inode));
-	}
+	remove_dirty_inode(sbi, ino, UPDATE_INO);
+	clear_inode_flag(fi, FI_UPDATE_WRITE);
+	ret = f2fs_issue_flush(sbi);
 out:
 	trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
 	return ret;
@@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 		goto fail;
 
 	/* handle inline data case */
-	if (f2fs_has_inline_data(inode)) {
+	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
 		if (whence == SEEK_HOLE)
 			data_ofs = isize;
 		goto found;
@@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
 
 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct inode *inode = file_inode(file);
+
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		int err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
+
 	file_accessed(file);
 	vma->vm_ops = &f2fs_file_vm_ops;
 	return 0;
@@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn)
 	truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
 }
 
-static void truncate_partial_data_page(struct inode *inode, u64 from)
+static int truncate_partial_data_page(struct inode *inode, u64 from)
 {
 	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
 	struct page *page;
 
-	if (f2fs_has_inline_data(inode))
-		return truncate_inline_data(inode, from);
-
 	if (!offset)
-		return;
+		return 0;
 
 	page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
 	if (IS_ERR(page))
-		return;
+		return 0;
 
 	lock_page(page);
 	if (unlikely(!PageUptodate(page) ||
@@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
 	f2fs_wait_on_page_writeback(page, DATA);
 	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
 	set_page_dirty(page);
-
 out:
 	f2fs_put_page(page, 1);
+	return 0;
 }
 
 int truncate_blocks(struct inode *inode, u64 from, bool lock)
@@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	struct dnode_of_data dn;
 	pgoff_t free_from;
 	int count = 0, err = 0;
+	struct page *ipage;
 
 	trace_f2fs_truncate_blocks_enter(inode, from);
 
-	if (f2fs_has_inline_data(inode))
-		goto done;
-
 	free_from = (pgoff_t)
-			((from + blocksize - 1) >> (sbi->log_blocksize));
+		((from + blocksize - 1) >> (sbi->log_blocksize));
 
 	if (lock)
 		f2fs_lock_op(sbi);
 
-	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage)) {
+		err = PTR_ERR(ipage);
+		goto out;
+	}
+
+	if (f2fs_has_inline_data(inode)) {
+		f2fs_put_page(ipage, 1);
+		goto out;
+	}
+
+	set_new_dnode(&dn, inode, ipage, NULL, 0);
 	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
 	if (err) {
 		if (err == -ENOENT)
 			goto free_next;
-		if (lock)
-			f2fs_unlock_op(sbi);
-		trace_f2fs_truncate_blocks_exit(inode, err);
-		return err;
+		goto out;
 	}
 
 	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	f2fs_put_dnode(&dn);
 free_next:
 	err = truncate_inode_blocks(inode, free_from);
+out:
 	if (lock)
 		f2fs_unlock_op(sbi);
-done:
+
 	/* lastly zero out the first data page */
-	truncate_partial_data_page(inode, from);
+	if (!err)
+		err = truncate_partial_data_page(inode, from);
 
 	trace_f2fs_truncate_blocks_exit(inode, err);
 	return err;
@@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode)
 
 	trace_f2fs_truncate(inode);
 
+	/* we should check inline_data size */
+	if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) {
+		if (f2fs_convert_inline_inode(inode))
+			return;
+	}
+
 	if (!truncate_blocks(inode, i_size_read(inode), true)) {
 		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		mark_inode_dirty(inode);
@@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 		return err;
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
-		if (err)
-			return err;
-
 		if (attr->ia_size != i_size_read(inode)) {
 			truncate_setsize(inode, attr->ia_size);
 			f2fs_truncate(inode);
@@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	if (offset >= inode->i_size)
 		return ret;
 
-	ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
-	if (ret)
-		return ret;
+	if (f2fs_has_inline_data(inode)) {
+		ret = f2fs_convert_inline_inode(inode);
+		if (ret)
+			return ret;
+	}
 
 	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
 	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 	if (ret)
 		return ret;
 
-	ret = f2fs_convert_inline_data(inode, offset + len, NULL);
-	if (ret)
-		return ret;
+	if (f2fs_has_inline_data(inode)) {
+		ret = f2fs_convert_inline_inode(inode);
+		if (ret)
+			return ret;
+	}
 
 	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
 	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
 	set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
 
-	return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
+	return f2fs_convert_inline_inode(inode);
+}
+
+static int f2fs_release_file(struct inode *inode, struct file *filp)
+{
+	/* some remained atomic pages should discarded */
+	if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+		commit_inmem_pages(inode, true);
+	return 0;
 }
 
 static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
 		return -EACCES;
 
 	set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
-	return 0;
+
+	return f2fs_convert_inline_inode(inode);
 }
 
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
@@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = {
 	.read_iter	= generic_file_read_iter,
 	.write_iter	= generic_file_write_iter,
 	.open		= generic_file_open,
+	.release	= f2fs_release_file,
 	.mmap		= f2fs_file_mmap,
 	.fsync		= f2fs_sync_file,
 	.fallocate	= f2fs_fallocate,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2a8f4ac..eec0933 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	int err = 0;
 
-	if (!test_opt(sbi, BG_GC))
-		goto out;
 	gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
 	if (!gc_th) {
 		err = -ENOMEM;
@@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = {
 	.get_victim = get_victim_by_default,
 };
 
-static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist)
+static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
 {
 	struct inode_entry *ie;
 
-	list_for_each_entry(ie, ilist, list)
-		if (ie->inode->i_ino == ino)
-			return ie->inode;
+	ie = radix_tree_lookup(&gc_list->iroot, ino);
+	if (ie)
+		return ie->inode;
 	return NULL;
 }
 
-static void add_gc_inode(struct inode *inode, struct list_head *ilist)
+static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
 {
 	struct inode_entry *new_ie;
 
-	if (inode == find_gc_inode(inode->i_ino, ilist)) {
+	if (inode == find_gc_inode(gc_list, inode->i_ino)) {
 		iput(inode);
 		return;
 	}
-
 	new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS);
 	new_ie->inode = inode;
-	list_add_tail(&new_ie->list, ilist);
+retry:
+	if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) {
+		cond_resched();
+		goto retry;
+	}
+	list_add_tail(&new_ie->list, &gc_list->ilist);
 }
 
-static void put_gc_inode(struct list_head *ilist)
+static void put_gc_inode(struct gc_inode_list *gc_list)
 {
 	struct inode_entry *ie, *next_ie;
-	list_for_each_entry_safe(ie, next_ie, ilist, list) {
+	list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
+		radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
 		iput(ie->inode);
 		list_del(&ie->list);
 		kmem_cache_free(winode_slab, ie);
@@ -553,7 +556,7 @@ out:
  * the victim data block is ignored.
  */
 static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
-		struct list_head *ilist, unsigned int segno, int gc_type)
+		struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
 {
 	struct super_block *sb = sbi->sb;
 	struct f2fs_summary *entry;
@@ -605,27 +608,27 @@ next_step:
 
 			data_page = find_data_page(inode,
 					start_bidx + ofs_in_node, false);
-			if (IS_ERR(data_page))
-				goto next_iput;
+			if (IS_ERR(data_page)) {
+				iput(inode);
+				continue;
+			}
 
 			f2fs_put_page(data_page, 0);
-			add_gc_inode(inode, ilist);
-		} else {
-			inode = find_gc_inode(dni.ino, ilist);
-			if (inode) {
-				start_bidx = start_bidx_of_node(nofs,
-								F2FS_I(inode));
-				data_page = get_lock_data_page(inode,
+			add_gc_inode(gc_list, inode);
+			continue;
+		}
+
+		/* phase 3 */
+		inode = find_gc_inode(gc_list, dni.ino);
+		if (inode) {
+			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+			data_page = get_lock_data_page(inode,
 						start_bidx + ofs_in_node);
-				if (IS_ERR(data_page))
-					continue;
-				move_data_page(inode, data_page, gc_type);
-				stat_inc_data_blk_count(sbi, 1);
-			}
+			if (IS_ERR(data_page))
+				continue;
+			move_data_page(inode, data_page, gc_type);
+			stat_inc_data_blk_count(sbi, 1);
 		}
-		continue;
-next_iput:
-		iput(inode);
 	}
 
 	if (++phase < 4)
@@ -646,18 +649,20 @@ next_iput:
 }
 
 static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
-						int gc_type, int type)
+			int gc_type)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
 	int ret;
+
 	mutex_lock(&sit_i->sentry_lock);
-	ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS);
+	ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
+					      NO_CHECK_TYPE, LFS);
 	mutex_unlock(&sit_i->sentry_lock);
 	return ret;
 }
 
 static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
-				struct list_head *ilist, int gc_type)
+				struct gc_inode_list *gc_list, int gc_type)
 {
 	struct page *sum_page;
 	struct f2fs_summary_block *sum;
@@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
 		gc_node_segment(sbi, sum->entries, segno, gc_type);
 		break;
 	case SUM_TYPE_DATA:
-		gc_data_segment(sbi, sum->entries, ilist, segno, gc_type);
+		gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type);
 		break;
 	}
 	blk_finish_plug(&plug);
@@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
 
 int f2fs_gc(struct f2fs_sb_info *sbi)
 {
-	struct list_head ilist;
 	unsigned int segno, i;
 	int gc_type = BG_GC;
 	int nfree = 0;
 	int ret = -1;
-	struct cp_control cpc = {
-		.reason = CP_SYNC,
+	struct cp_control cpc;
+	struct gc_inode_list gc_list = {
+		.ilist = LIST_HEAD_INIT(gc_list.ilist),
+		.iroot = RADIX_TREE_INIT(GFP_NOFS),
 	};
 
-	INIT_LIST_HEAD(&ilist);
+	cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
+
 gc_more:
 	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
 		goto stop;
@@ -709,7 +716,7 @@ gc_more:
 		write_checkpoint(sbi, &cpc);
 	}
 
-	if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE))
+	if (!__get_victim(sbi, &segno, gc_type))
 		goto stop;
 	ret = 0;
 
@@ -719,7 +726,7 @@ gc_more:
 								META_SSA);
 
 	for (i = 0; i < sbi->segs_per_sec; i++)
-		do_garbage_collect(sbi, segno + i, &ilist, gc_type);
+		do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
 
 	if (gc_type == FG_GC) {
 		sbi->cur_victim_sec = NULL_SEGNO;
@@ -735,7 +742,7 @@ gc_more:
 stop:
 	mutex_unlock(&sbi->gc_mutex);
 
-	put_gc_inode(&ilist);
+	put_gc_inode(&gc_list);
 	return ret;
 }
 
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 16f0b2b..6ff7ad3 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -40,6 +40,11 @@ struct inode_entry {
 	struct inode *inode;
 };
 
+struct gc_inode_list {
+	struct list_head ilist;
+	struct radix_tree_root iroot;
+};
+
 /*
  * inline functions
  */
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 88036fd..f2d3c58 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,35 +15,44 @@
 
 bool f2fs_may_inline(struct inode *inode)
 {
-	block_t nr_blocks;
-	loff_t i_size;
-
 	if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
 		return false;
 
 	if (f2fs_is_atomic_file(inode))
 		return false;
 
-	nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
-	if (inode->i_blocks > nr_blocks)
+	if (!S_ISREG(inode->i_mode))
 		return false;
 
-	i_size = i_size_read(inode);
-	if (i_size > MAX_INLINE_DATA)
+	if (i_size_read(inode) > MAX_INLINE_DATA)
 		return false;
 
 	return true;
 }
 
-int f2fs_read_inline_data(struct inode *inode, struct page *page)
+void read_inline_data(struct page *page, struct page *ipage)
 {
-	struct page *ipage;
 	void *src_addr, *dst_addr;
 
-	if (page->index) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
-		goto out;
-	}
+	if (PageUptodate(page))
+		return;
+
+	f2fs_bug_on(F2FS_P_SB(page), page->index);
+
+	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+
+	/* Copy the whole inline data block */
+	src_addr = inline_data_addr(ipage);
+	dst_addr = kmap_atomic(page);
+	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+	flush_dcache_page(page);
+	kunmap_atomic(dst_addr);
+	SetPageUptodate(page);
+}
+
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+	struct page *ipage;
 
 	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
 	if (IS_ERR(ipage)) {
@@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
 		return PTR_ERR(ipage);
 	}
 
-	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+	if (!f2fs_has_inline_data(inode)) {
+		f2fs_put_page(ipage, 1);
+		return -EAGAIN;
+	}
 
-	/* Copy the whole inline data block */
-	src_addr = inline_data_addr(ipage);
-	dst_addr = kmap(page);
-	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
-	kunmap(page);
-	f2fs_put_page(ipage, 1);
+	if (page->index)
+		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	else
+		read_inline_data(page, ipage);
 
-out:
 	SetPageUptodate(page);
+	f2fs_put_page(ipage, 1);
 	unlock_page(page);
-
 	return 0;
 }
 
-static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 {
-	int err = 0;
-	struct page *ipage;
-	struct dnode_of_data dn;
 	void *src_addr, *dst_addr;
 	block_t new_blk_addr;
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct f2fs_io_info fio = {
 		.type = DATA,
 		.rw = WRITE_SYNC | REQ_PRIO,
 	};
+	int dirty, err;
 
-	f2fs_lock_op(sbi);
-	ipage = get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage)) {
-		err = PTR_ERR(ipage);
-		goto out;
-	}
+	f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
 
-	/* someone else converted inline_data already */
-	if (!f2fs_has_inline_data(inode))
-		goto out;
+	if (!f2fs_exist_data(dn->inode))
+		goto clear_out;
 
-	/*
-	 * i_addr[0] is not used for inline data,
-	 * so reserving new block will not destroy inline data
-	 */
-	set_new_dnode(&dn, inode, ipage, NULL, 0);
-	err = f2fs_reserve_block(&dn, 0);
+	err = f2fs_reserve_block(dn, 0);
 	if (err)
-		goto out;
+		return err;
 
 	f2fs_wait_on_page_writeback(page, DATA);
+
+	if (PageUptodate(page))
+		goto no_update;
+
 	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 
 	/* Copy the whole inline data block */
-	src_addr = inline_data_addr(ipage);
-	dst_addr = kmap(page);
+	src_addr = inline_data_addr(dn->inode_page);
+	dst_addr = kmap_atomic(page);
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
-	kunmap(page);
+	flush_dcache_page(page);
+	kunmap_atomic(dst_addr);
 	SetPageUptodate(page);
+no_update:
+	/* clear dirty state */
+	dirty = clear_page_dirty_for_io(page);
 
 	/* write data page to try to make data consistent */
 	set_page_writeback(page);
-	write_data_page(page, &dn, &new_blk_addr, &fio);
-	update_extent_cache(new_blk_addr, &dn);
+
+	write_data_page(page, dn, &new_blk_addr, &fio);
+	update_extent_cache(new_blk_addr, dn);
 	f2fs_wait_on_page_writeback(page, DATA);
+	if (dirty)
+		inode_dec_dirty_pages(dn->inode);
 
-	/* clear inline data and flag after data writeback */
-	zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-	stat_dec_inline_inode(inode);
+	/* this converted inline_data should be recovered. */
+	set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE);
 
-	sync_inode_page(&dn);
-	f2fs_put_dnode(&dn);
-out:
-	f2fs_unlock_op(sbi);
-	return err;
+	/* clear inline data and flag after data writeback */
+	truncate_inline_data(dn->inode_page, 0);
+clear_out:
+	stat_dec_inline_inode(dn->inode);
+	f2fs_clear_inline_inode(dn->inode);
+	sync_inode_page(dn);
+	f2fs_put_dnode(dn);
+	return 0;
 }
 
-int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
-						struct page *page)
+int f2fs_convert_inline_inode(struct inode *inode)
 {
-	struct page *new_page = page;
-	int err;
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct dnode_of_data dn;
+	struct page *ipage, *page;
+	int err = 0;
 
-	if (!f2fs_has_inline_data(inode))
-		return 0;
-	else if (to_size <= MAX_INLINE_DATA)
-		return 0;
+	page = grab_cache_page(inode->i_mapping, 0);
+	if (!page)
+		return -ENOMEM;
+
+	f2fs_lock_op(sbi);
 
-	if (!page || page->index != 0) {
-		new_page = grab_cache_page(inode->i_mapping, 0);
-		if (!new_page)
-			return -ENOMEM;
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage)) {
+		err = PTR_ERR(ipage);
+		goto out;
 	}
 
-	err = __f2fs_convert_inline_data(inode, new_page);
-	if (!page || page->index != 0)
-		f2fs_put_page(new_page, 1);
+	set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+	if (f2fs_has_inline_data(inode))
+		err = f2fs_convert_inline_page(&dn, page);
+
+	f2fs_put_dnode(&dn);
+out:
+	f2fs_unlock_op(sbi);
+
+	f2fs_put_page(page, 1);
 	return err;
 }
 
-int f2fs_write_inline_data(struct inode *inode,
-				struct page *page, unsigned size)
+int f2fs_write_inline_data(struct inode *inode, struct page *page)
 {
 	void *src_addr, *dst_addr;
-	struct page *ipage;
 	struct dnode_of_data dn;
 	int err;
 
@@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode,
 	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
 	if (err)
 		return err;
-	ipage = dn.inode_page;
 
-	f2fs_wait_on_page_writeback(ipage, NODE);
-	zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-	src_addr = kmap(page);
-	dst_addr = inline_data_addr(ipage);
-	memcpy(dst_addr, src_addr, size);
-	kunmap(page);
-
-	/* Release the first data block if it is allocated */
 	if (!f2fs_has_inline_data(inode)) {
-		truncate_data_blocks_range(&dn, 1);
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-		stat_inc_inline_inode(inode);
+		f2fs_put_dnode(&dn);
+		return -EAGAIN;
 	}
 
+	f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+	f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+	src_addr = kmap_atomic(page);
+	dst_addr = inline_data_addr(dn.inode_page);
+	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+	kunmap_atomic(src_addr);
+
 	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+	set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
 	sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
-
 	return 0;
 }
 
-void truncate_inline_data(struct inode *inode, u64 from)
+void truncate_inline_data(struct page *ipage, u64 from)
 {
-	struct page *ipage;
+	void *addr;
 
 	if (from >= MAX_INLINE_DATA)
 		return;
 
-	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
-	if (IS_ERR(ipage))
-		return;
-
 	f2fs_wait_on_page_writeback(ipage, NODE);
 
-	zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
-				INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-	set_page_dirty(ipage);
-	f2fs_put_page(ipage, 1);
+	addr = inline_data_addr(ipage);
+	memset(addr + from, 0, MAX_INLINE_DATA - from);
 }
 
 bool recover_inline_data(struct inode *inode, struct page *npage)
@@ -236,6 +241,10 @@ process_inline:
 		src_addr = inline_data_addr(npage);
 		dst_addr = inline_data_addr(ipage);
 		memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+
+		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+		set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
 		update_inode(inode, ipage);
 		f2fs_put_page(ipage, 1);
 		return true;
@@ -244,16 +253,279 @@ process_inline:
 	if (f2fs_has_inline_data(inode)) {
 		ipage = get_node_page(sbi, inode->i_ino);
 		f2fs_bug_on(sbi, IS_ERR(ipage));
-		f2fs_wait_on_page_writeback(ipage, NODE);
-		zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-		clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+		truncate_inline_data(ipage, 0);
+		f2fs_clear_inline_inode(inode);
 		update_inode(inode, ipage);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 		truncate_blocks(inode, 0, false);
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 		goto process_inline;
 	}
 	return false;
 }
+
+struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
+				struct qstr *name, struct page **res_page)
+{
+	struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+	struct f2fs_inline_dentry *inline_dentry;
+	struct f2fs_dir_entry *de;
+	struct f2fs_dentry_ptr d;
+	struct page *ipage;
+
+	ipage = get_node_page(sbi, dir->i_ino);
+	if (IS_ERR(ipage))
+		return NULL;
+
+	inline_dentry = inline_data_addr(ipage);
+
+	make_dentry_ptr(&d, (void *)inline_dentry, 2);
+	de = find_target_dentry(name, NULL, &d);
+
+	unlock_page(ipage);
+	if (de)
+		*res_page = ipage;
+	else
+		f2fs_put_page(ipage, 0);
+
+	/*
+	 * For the most part, it should be a bug when name_len is zero.
+	 * We stop here for figuring out where the bugs has occurred.
+	 */
+	f2fs_bug_on(sbi, d.max < 0);
+	return de;
+}
+
+struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir,
+							struct page **p)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+	struct page *ipage;
+	struct f2fs_dir_entry *de;
+	struct f2fs_inline_dentry *dentry_blk;
+
+	ipage = get_node_page(sbi, dir->i_ino);
+	if (IS_ERR(ipage))
+		return NULL;
+
+	dentry_blk = inline_data_addr(ipage);
+	de = &dentry_blk->dentry[1];
+	*p = ipage;
+	unlock_page(ipage);
+	return de;
+}
+
+int make_empty_inline_dir(struct inode *inode, struct inode *parent,
+							struct page *ipage)
+{
+	struct f2fs_inline_dentry *dentry_blk;
+	struct f2fs_dentry_ptr d;
+
+	dentry_blk = inline_data_addr(ipage);
+
+	make_dentry_ptr(&d, (void *)dentry_blk, 2);
+	do_make_empty_dir(inode, parent, &d);
+
+	set_page_dirty(ipage);
+
+	/* update i_size to MAX_INLINE_DATA */
+	if (i_size_read(inode) < MAX_INLINE_DATA) {
+		i_size_write(inode, MAX_INLINE_DATA);
+		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
+	}
+	return 0;
+}
+
+static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
+				struct f2fs_inline_dentry *inline_dentry)
+{
+	struct page *page;
+	struct dnode_of_data dn;
+	struct f2fs_dentry_block *dentry_blk;
+	int err;
+
+	page = grab_cache_page(dir->i_mapping, 0);
+	if (!page)
+		return -ENOMEM;
+
+	set_new_dnode(&dn, dir, ipage, NULL, 0);
+	err = f2fs_reserve_block(&dn, 0);
+	if (err)
+		goto out;
+
+	f2fs_wait_on_page_writeback(page, DATA);
+	zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+
+	dentry_blk = kmap_atomic(page);
+
+	/* copy data from inline dentry block to new dentry block */
+	memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
+					INLINE_DENTRY_BITMAP_SIZE);
+	memcpy(dentry_blk->dentry, inline_dentry->dentry,
+			sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
+	memcpy(dentry_blk->filename, inline_dentry->filename,
+					NR_INLINE_DENTRY * F2FS_SLOT_LEN);
+
+	kunmap_atomic(dentry_blk);
+	SetPageUptodate(page);
+	set_page_dirty(page);
+
+	/* clear inline dir and flag after data writeback */
+	truncate_inline_data(ipage, 0);
+
+	stat_dec_inline_dir(dir);
+	clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
+
+	if (i_size_read(dir) < PAGE_CACHE_SIZE) {
+		i_size_write(dir, PAGE_CACHE_SIZE);
+		set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+	}
+
+	sync_inode_page(&dn);
+out:
+	f2fs_put_page(page, 1);
+	return err;
+}
+
+int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
+						struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+	struct page *ipage;
+	unsigned int bit_pos;
+	f2fs_hash_t name_hash;
+	struct f2fs_dir_entry *de;
+	size_t namelen = name->len;
+	struct f2fs_inline_dentry *dentry_blk = NULL;
+	int slots = GET_DENTRY_SLOTS(namelen);
+	struct page *page;
+	int err = 0;
+	int i;
+
+	name_hash = f2fs_dentry_hash(name);
+
+	ipage = get_node_page(sbi, dir->i_ino);
+	if (IS_ERR(ipage))
+		return PTR_ERR(ipage);
+
+	dentry_blk = inline_data_addr(ipage);
+	bit_pos = room_for_filename(&dentry_blk->dentry_bitmap,
+						slots, NR_INLINE_DENTRY);
+	if (bit_pos >= NR_INLINE_DENTRY) {
+		err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
+		if (!err)
+			err = -EAGAIN;
+		goto out;
+	}
+
+	down_write(&F2FS_I(inode)->i_sem);
+	page = init_inode_metadata(inode, dir, name, ipage);
+	if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+		goto fail;
+	}
+
+	f2fs_wait_on_page_writeback(ipage, NODE);
+	de = &dentry_blk->dentry[bit_pos];
+	de->hash_code = name_hash;
+	de->name_len = cpu_to_le16(namelen);
+	memcpy(dentry_blk->filename[bit_pos], name->name, name->len);
+	de->ino = cpu_to_le32(inode->i_ino);
+	set_de_type(de, inode);
+	for (i = 0; i < slots; i++)
+		test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
+	set_page_dirty(ipage);
+
+	/* we don't need to mark_inode_dirty now */
+	F2FS_I(inode)->i_pino = dir->i_ino;
+	update_inode(inode, page);
+	f2fs_put_page(page, 1);
+
+	update_parent_metadata(dir, inode, 0);
+fail:
+	up_write(&F2FS_I(inode)->i_sem);
+
+	if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
+		update_inode(dir, ipage);
+		clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
+	}
+out:
+	f2fs_put_page(ipage, 1);
+	return err;
+}
+
+void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
+					struct inode *dir, struct inode *inode)
+{
+	struct f2fs_inline_dentry *inline_dentry;
+	int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+	unsigned int bit_pos;
+	int i;
+
+	lock_page(page);
+	f2fs_wait_on_page_writeback(page, NODE);
+
+	inline_dentry = inline_data_addr(page);
+	bit_pos = dentry - inline_dentry->dentry;
+	for (i = 0; i < slots; i++)
+		test_and_clear_bit_le(bit_pos + i,
+				&inline_dentry->dentry_bitmap);
+
+	set_page_dirty(page);
+
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+
+	if (inode)
+		f2fs_drop_nlink(dir, inode, page);
+
+	f2fs_put_page(page, 1);
+}
+
+bool f2fs_empty_inline_dir(struct inode *dir)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+	struct page *ipage;
+	unsigned int bit_pos = 2;
+	struct f2fs_inline_dentry *dentry_blk;
+
+	ipage = get_node_page(sbi, dir->i_ino);
+	if (IS_ERR(ipage))
+		return false;
+
+	dentry_blk = inline_data_addr(ipage);
+	bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
+					NR_INLINE_DENTRY,
+					bit_pos);
+
+	f2fs_put_page(ipage, 1);
+
+	if (bit_pos < NR_INLINE_DENTRY)
+		return false;
+
+	return true;
+}
+
+int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx)
+{
+	struct inode *inode = file_inode(file);
+	struct f2fs_inline_dentry *inline_dentry = NULL;
+	struct page *ipage = NULL;
+	struct f2fs_dentry_ptr d;
+
+	if (ctx->pos == NR_INLINE_DENTRY)
+		return 0;
+
+	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+	if (IS_ERR(ipage))
+		return PTR_ERR(ipage);
+
+	inline_dentry = inline_data_addr(ipage);
+
+	make_dentry_ptr(&d, (void *)inline_dentry, 2);
+
+	if (!f2fs_fill_dentries(ctx, &d, 0))
+		ctx->pos = NR_INLINE_DENTRY;
+
+	f2fs_put_page(ipage, 1);
+	return 0;
+}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0deead4..196cc78 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
 	}
 }
 
+static int __recover_inline_status(struct inode *inode, struct page *ipage)
+{
+	void *inline_data = inline_data_addr(ipage);
+	struct f2fs_inode *ri;
+	void *zbuf;
+
+	zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS);
+	if (!zbuf)
+		return -ENOMEM;
+
+	if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) {
+		kfree(zbuf);
+		return 0;
+	}
+	kfree(zbuf);
+
+	f2fs_wait_on_page_writeback(ipage, NODE);
+	set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+	ri = F2FS_INODE(ipage);
+	set_raw_inline(F2FS_I(inode), ri);
+	set_page_dirty(ipage);
+	return 0;
+}
+
 static int do_read_inode(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct page *node_page;
 	struct f2fs_inode *ri;
+	int err = 0;
 
 	/* Check if ino is within scope */
 	if (check_nid_range(sbi, inode->i_ino)) {
@@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode)
 	get_extent_info(&fi->ext, ri->i_ext);
 	get_inline_info(fi, ri);
 
+	/* check data exist */
+	if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+		err = __recover_inline_status(inode, node_page);
+
 	/* get rdev by using inline_info */
 	__get_inode_rdev(inode, ri);
 
 	f2fs_put_page(node_page, 1);
-	return 0;
+
+	stat_inc_inline_inode(inode);
+	stat_inc_inline_dir(inode);
+
+	return err;
 }
 
 struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -156,7 +190,7 @@ make_now:
 		inode->i_op = &f2fs_dir_inode_operations;
 		inode->i_fop = &f2fs_dir_operations;
 		inode->i_mapping->a_ops = &f2fs_dblock_aops;
-		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
 	} else if (S_ISLNK(inode->i_mode)) {
 		inode->i_op = &f2fs_symlink_inode_operations;
 		inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode)
 
 	f2fs_lock_op(sbi);
 	remove_inode_page(inode);
-	stat_dec_inline_inode(inode);
 	f2fs_unlock_op(sbi);
 
 	sb_end_intwrite(inode->i_sb);
 no_delete:
+	stat_dec_inline_dir(inode);
+	stat_dec_inline_inode(inode);
 	invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
 	if (xnid)
 		invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
@@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode)
 		f2fs_truncate(inode);
 
 	remove_inode_page(inode);
-	stat_dec_inline_inode(inode);
 
+	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+	clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
 	alloc_nid_failed(sbi, inode->i_ino);
 	f2fs_unlock_op(sbi);
 
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0d2526e..547a2de 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 		nid_free = true;
 		goto out;
 	}
+
+	if (f2fs_may_inline(inode))
+		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+	if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
+		set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
+
 	trace_f2fs_new_inode(inode, 0);
 	mark_inode_dirty(inode);
 	return inode;
@@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	alloc_nid_done(sbi, ino);
 
+	stat_inc_inline_inode(inode);
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 out:
 	handle_failed_inode(inode);
@@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 	f2fs_unlock_op(sbi);
 
 	d_instantiate(dentry, inode);
+
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 out:
 	clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
@@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 	de = f2fs_find_entry(dir, &dentry->d_name, &page);
 	if (de) {
 		nid_t ino = le32_to_cpu(de->ino);
-		kunmap(page);
+		f2fs_dentry_kunmap(dir, page);
 		f2fs_put_page(page, 0);
 
 		inode = f2fs_iget(dir->i_sb, ino);
 		if (IS_ERR(inode))
 			return ERR_CAST(inode);
-
-		stat_inc_inline_inode(inode);
 	}
 
 	return d_splice_alias(inode, dentry);
@@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 	err = acquire_orphan_inode(sbi);
 	if (err) {
 		f2fs_unlock_op(sbi);
-		kunmap(page);
+		f2fs_dentry_kunmap(dir, page);
 		f2fs_put_page(page, 0);
 		goto fail;
 	}
-	f2fs_delete_entry(de, page, inode);
+	f2fs_delete_entry(de, page, dir, inode);
 	f2fs_unlock_op(sbi);
 
 	/* In order to evict this inode, we set it dirty */
 	mark_inode_dirty(inode);
+
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 fail:
 	trace_f2fs_unlink_exit(inode, err);
 	return err;
@@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return err;
 out:
 	handle_failed_inode(inode);
@@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		goto out_fail;
 	f2fs_unlock_op(sbi);
 
+	stat_inc_inline_dir(inode);
 	alloc_nid_done(sbi, inode->i_ino);
 
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
 
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 
 out_fail:
@@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 	f2fs_unlock_op(sbi);
 
 	alloc_nid_done(sbi, inode->i_ino);
+
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
+
+	if (IS_DIRSYNC(dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 out:
 	handle_failed_inode(inode);
@@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	old_inode->i_ctime = CURRENT_TIME;
 	mark_inode_dirty(old_inode);
 
-	f2fs_delete_entry(old_entry, old_page, NULL);
+	f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
 
 	if (old_dir_entry) {
 		if (old_dir != new_dir) {
@@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 						old_dir_page, new_dir);
 			update_inode_page(old_inode);
 		} else {
-			kunmap(old_dir_page);
+			f2fs_dentry_kunmap(old_inode, old_dir_page);
 			f2fs_put_page(old_dir_page, 0);
 		}
 		drop_nlink(old_dir);
@@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	}
 
 	f2fs_unlock_op(sbi);
+
+	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 
 put_out_dir:
 	f2fs_unlock_op(sbi);
-	kunmap(new_page);
+	f2fs_dentry_kunmap(new_dir, new_page);
 	f2fs_put_page(new_page, 0);
 out_dir:
 	if (old_dir_entry) {
-		kunmap(old_dir_page);
+		f2fs_dentry_kunmap(old_inode, old_dir_page);
 		f2fs_put_page(old_dir_page, 0);
 	}
 out_old:
-	kunmap(old_page);
+	f2fs_dentry_kunmap(old_dir, old_page);
 	f2fs_put_page(old_page, 0);
 out:
 	return err;
@@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	update_inode_page(new_dir);
 
 	f2fs_unlock_op(sbi);
+
+	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+		f2fs_sync_fs(sbi->sb, 1);
 	return 0;
 out_undo:
 	/* Still we may fail to recover name info of f2fs_inode here */
@@ -596,19 +626,19 @@ out_unlock:
 	f2fs_unlock_op(sbi);
 out_new_dir:
 	if (new_dir_entry) {
-		kunmap(new_dir_page);
+		f2fs_dentry_kunmap(new_inode, new_dir_page);
 		f2fs_put_page(new_dir_page, 0);
 	}
 out_old_dir:
 	if (old_dir_entry) {
-		kunmap(old_dir_page);
+		f2fs_dentry_kunmap(old_inode, old_dir_page);
 		f2fs_put_page(old_dir_page, 0);
 	}
 out_new:
-	kunmap(new_page);
+	f2fs_dentry_kunmap(new_dir, new_page);
 	f2fs_put_page(new_page, 0);
 out_old:
-	kunmap(old_page);
+	f2fs_dentry_kunmap(old_dir, old_page);
 	f2fs_put_page(old_page, 0);
 out:
 	return err;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 44b8afe..f83326c 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct sysinfo val;
+	unsigned long avail_ram;
 	unsigned long mem_size = 0;
 	bool res = false;
 
 	si_meminfo(&val);
-	/* give 25%, 25%, 50% memory for each components respectively */
+
+	/* only uses low memory */
+	avail_ram = val.totalram - val.totalhigh;
+
+	/* give 25%, 25%, 50%, 50% memory for each components respectively */
 	if (type == FREE_NIDS) {
-		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12;
-		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+		mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
+							PAGE_CACHE_SHIFT;
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == NAT_ENTRIES) {
-		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
-		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
+		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
+							PAGE_CACHE_SHIFT;
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == DIRTY_DENTS) {
 		if (sbi->sb->s_bdi->dirty_exceeded)
 			return false;
 		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
-		res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+	} else if (type == INO_ENTRIES) {
+		int i;
+
+		if (sbi->sb->s_bdi->dirty_exceeded)
+			return false;
+		for (i = 0; i <= UPDATE_INO; i++)
+			mem_size += (sbi->im[i].ino_num *
+				sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	}
 	return res;
 }
@@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 
 	if (get_nat_flag(ne, IS_DIRTY))
 		return;
-retry:
+
 	head = radix_tree_lookup(&nm_i->nat_set_root, set);
 	if (!head) {
 		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
@@ -140,11 +156,7 @@ retry:
 		INIT_LIST_HEAD(&head->set_list);
 		head->set = set;
 		head->entry_cnt = 0;
-
-		if (radix_tree_insert(&nm_i->nat_set_root, set, head)) {
-			cond_resched();
-			goto retry;
-		}
+		f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
 	}
 	list_move_tail(&ne->list, &head->entry_list);
 	nm_i->dirty_nat_cnt++;
@@ -155,7 +167,7 @@ retry:
 static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 						struct nat_entry *ne)
 {
-	nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK;
+	nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
 	struct nat_entry_set *head;
 
 	head = radix_tree_lookup(&nm_i->nat_set_root, set);
@@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
 	struct nat_entry *e;
 	bool is_cp = true;
 
-	read_lock(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = false;
-	read_unlock(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
@@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino)
 	struct nat_entry *e;
 	bool fsynced = false;
 
-	read_lock(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_FSYNCED_INODE))
 		fsynced = true;
-	read_unlock(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return fsynced;
 }
 
@@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
 	struct nat_entry *e;
 	bool need_update = true;
 
-	read_lock(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
 			(get_nat_flag(e, IS_CHECKPOINTED) ||
 			 get_nat_flag(e, HAS_FSYNCED_INODE)))
 		need_update = false;
-	read_unlock(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return need_update;
 }
 
@@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 {
 	struct nat_entry *new;
 
-	new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
-	if (!new)
-		return NULL;
-	if (radix_tree_insert(&nm_i->nat_root, nid, new)) {
-		kmem_cache_free(nat_entry_slab, new);
-		return NULL;
-	}
+	new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
+	f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
 	memset(new, 0, sizeof(struct nat_entry));
 	nat_set_nid(new, nid);
 	nat_reset_flag(new);
@@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
 						struct f2fs_nat_entry *ne)
 {
 	struct nat_entry *e;
-retry:
-	write_lock(&nm_i->nat_tree_lock);
+
+	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, nid);
-		if (!e) {
-			write_unlock(&nm_i->nat_tree_lock);
-			goto retry;
-		}
 		node_info_from_raw_nat(&e->ni, ne);
 	}
-	write_unlock(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 }
 
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
@@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
-retry:
-	write_lock(&nm_i->nat_tree_lock);
+
+	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, ni->nid);
-		if (!e) {
-			write_unlock(&nm_i->nat_tree_lock);
-			goto retry;
-		}
 		e->ni = *ni;
 		f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
 	} else if (new_blkaddr == NEW_ADDR) {
@@ -310,7 +309,7 @@ retry:
 			set_nat_flag(e, HAS_FSYNCED_INODE, true);
 		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
 	}
-	write_unlock(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 	if (available_free_memory(sbi, NAT_ENTRIES))
 		return 0;
 
-	write_lock(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
 		struct nat_entry *ne;
 		ne = list_first_entry(&nm_i->nat_entries,
@@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
 		__del_from_nat_cache(nm_i, ne);
 		nr_shrink--;
 	}
-	write_unlock(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 	return nr_shrink;
 }
 
@@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
 	ni->nid = nid;
 
 	/* Check nat cache */
-	read_lock(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		ni->ino = nat_get_ino(e);
 		ni->blk_addr = nat_get_blkaddr(e);
 		ni->version = nat_get_version(e);
 	}
-	read_unlock(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	if (e)
 		return;
 
@@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page,
 		return 0;
 	}
 
-	if (wbc->for_reclaim)
-		goto redirty_out;
-
-	down_read(&sbi->node_write);
+	if (wbc->for_reclaim) {
+		if (!down_read_trylock(&sbi->node_write))
+			goto redirty_out;
+	} else {
+		down_read(&sbi->node_write);
+	}
 	set_page_writeback(page);
 	write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
 	set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
 	dec_page_count(sbi, F2FS_DIRTY_NODES);
 	up_read(&sbi->node_write);
 	unlock_page(page);
+
+	if (wbc->for_reclaim)
+		f2fs_submit_merged_bio(sbi, NODE, WRITE);
+
 	return 0;
 
 redirty_out:
@@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 
 	if (build) {
 		/* do not add allocated nids */
-		read_lock(&nm_i->nat_tree_lock);
+		down_read(&nm_i->nat_tree_lock);
 		ne = __lookup_nat_cache(nm_i, nid);
 		if (ne &&
 			(!get_nat_flag(ne, IS_CHECKPOINTED) ||
 				nat_get_blkaddr(ne) != NULL_ADDR))
 			allocated = true;
-		read_unlock(&nm_i->nat_tree_lock);
+		up_read(&nm_i->nat_tree_lock);
 		if (allocated)
 			return 0;
 	}
@@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
 	i->nid = nid;
 	i->state = NID_NEW;
 
+	if (radix_tree_preload(GFP_NOFS)) {
+		kmem_cache_free(free_nid_slab, i);
+		return 0;
+	}
+
 	spin_lock(&nm_i->free_nid_list_lock);
 	if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
 		spin_unlock(&nm_i->free_nid_list_lock);
+		radix_tree_preload_end();
 		kmem_cache_free(free_nid_slab, i);
 		return 0;
 	}
 	list_add_tail(&i->list, &nm_i->free_nid_list);
 	nm_i->fcnt++;
 	spin_unlock(&nm_i->free_nid_list_lock);
+	radix_tree_preload_end();
 	return 1;
 }
 
@@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 		nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
 
 		raw_ne = nat_in_journal(sum, i);
-retry:
-		write_lock(&nm_i->nat_tree_lock);
-		ne = __lookup_nat_cache(nm_i, nid);
-		if (ne)
-			goto found;
 
-		ne = grab_nat_entry(nm_i, nid);
+		down_write(&nm_i->nat_tree_lock);
+		ne = __lookup_nat_cache(nm_i, nid);
 		if (!ne) {
-			write_unlock(&nm_i->nat_tree_lock);
-			goto retry;
+			ne = grab_nat_entry(nm_i, nid);
+			node_info_from_raw_nat(&ne->ni, &raw_ne);
 		}
-		node_info_from_raw_nat(&ne->ni, &raw_ne);
-found:
 		__set_nat_cache_dirty(nm_i, ne);
-		write_unlock(&nm_i->nat_tree_lock);
+		up_write(&nm_i->nat_tree_lock);
 	}
 	update_nats_in_cursum(sum, -i);
 	mutex_unlock(&curseg->curseg_mutex);
@@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 		}
 		raw_nat_from_node_info(raw_ne, &ne->ni);
 
-		write_lock(&NM_I(sbi)->nat_tree_lock);
+		down_write(&NM_I(sbi)->nat_tree_lock);
 		nat_reset_flag(ne);
 		__clear_nat_cache_dirty(NM_I(sbi), ne);
-		write_unlock(&NM_I(sbi)->nat_tree_lock);
+		up_write(&NM_I(sbi)->nat_tree_lock);
 
 		if (nat_get_blkaddr(ne) == NULL_ADDR)
 			add_free_nid(sbi, nid, false);
@@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
 	else
 		f2fs_put_page(page, 1);
 
-	if (!set->entry_cnt) {
-		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
-		kmem_cache_free(nat_entry_set_slab, set);
-	}
+	f2fs_bug_on(sbi, set->entry_cnt);
+
+	radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
+	kmem_cache_free(nat_entry_set_slab, set);
 }
 
 /*
@@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	nid_t set_idx = 0;
 	LIST_HEAD(sets);
 
+	if (!nm_i->dirty_nat_cnt)
+		return;
 	/*
 	 * if there are no enough space in journal to store dirty nat
 	 * entries, remove all entries from journal and merge them
@@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
 	if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
-	if (!nm_i->dirty_nat_cnt)
-		return;
-
 	while ((found = __gang_lookup_nat_set(nm_i,
 					set_idx, NATVEC_SIZE, setvec))) {
 		unsigned idx;
@@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
 	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
 	INIT_LIST_HEAD(&nm_i->free_nid_list);
-	INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
-	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC);
+	INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
+	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
 	INIT_LIST_HEAD(&nm_i->nat_entries);
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
-	rwlock_init(&nm_i->nat_tree_lock);
+	init_rwsem(&nm_i->nat_tree_lock);
 
 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 	spin_unlock(&nm_i->free_nid_list_lock);
 
 	/* destroy nat cache */
-	write_lock(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_cache(nm_i,
 					nid, NATVEC_SIZE, natvec))) {
 		unsigned idx;
@@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
 			__del_from_nat_cache(nm_i, natvec[idx]);
 	}
 	f2fs_bug_on(sbi, nm_i->nat_cnt);
-	write_unlock(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 
 	kfree(nm_i->nat_bitmap);
 	sbi->nm_info = NULL;
@@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void)
 	free_nid_slab = f2fs_kmem_cache_create("free_nid",
 			sizeof(struct free_nid));
 	if (!free_nid_slab)
-		goto destory_nat_entry;
+		goto destroy_nat_entry;
 
 	nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
 			sizeof(struct nat_entry_set));
 	if (!nat_entry_set_slab)
-		goto destory_free_nid;
+		goto destroy_free_nid;
 	return 0;
 
-destory_free_nid:
+destroy_free_nid:
 	kmem_cache_destroy(free_nid_slab);
-destory_nat_entry:
+destroy_nat_entry:
 	kmem_cache_destroy(nat_entry_slab);
 fail:
 	return -ENOMEM;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 8d5e6e0d..d10b644 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
 enum mem_type {
 	FREE_NIDS,	/* indicates the free nid list */
 	NAT_ENTRIES,	/* indicates the cached nat entry */
-	DIRTY_DENTS	/* indicates dirty dentry pages */
+	DIRTY_DENTS,	/* indicates dirty dentry pages */
+	INO_ENTRIES,	/* indicates inode entries */
 };
 
 struct nat_entry_set {
@@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
 {
 	unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
 
-	if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
-		f2fs_clear_bit(block_off, nm_i->nat_bitmap);
-	else
-		f2fs_set_bit(block_off, nm_i->nat_bitmap);
+	f2fs_change_bit(block_off, nm_i->nat_bitmap);
 }
 
 static inline void fill_node_footer(struct page *page, nid_t nid,
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index ebd0132..9160a37 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -111,7 +111,7 @@ retry:
 			iput(einode);
 			goto out_unmap_put;
 		}
-		f2fs_delete_entry(de, page, einode);
+		f2fs_delete_entry(de, page, dir, einode);
 		iput(einode);
 		goto retry;
 	}
@@ -129,7 +129,7 @@ retry:
 	goto out;
 
 out_unmap_put:
-	kunmap(page);
+	f2fs_dentry_kunmap(dir, page);
 	f2fs_put_page(page, 0);
 out_err:
 	iput(dir);
@@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
 	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
 
+	ra_meta_pages(sbi, blkaddr, 1, META_POR);
+
 	while (1) {
 		struct fsync_inode_entry *entry;
 
 		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
 			return 0;
 
-		page = get_meta_page_ra(sbi, blkaddr);
+		page = get_meta_page(sbi, blkaddr);
 
 		if (cp_ver != cpver_of_node(page))
 			break;
@@ -227,6 +229,8 @@ next:
 		/* check next segment */
 		blkaddr = next_blkaddr_of_node(page);
 		f2fs_put_page(page, 1);
+
+		ra_meta_pages_cond(sbi, blkaddr);
 	}
 	f2fs_put_page(page, 1);
 	return err;
@@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi,
 		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
 			break;
 
-		page = get_meta_page_ra(sbi, blkaddr);
+		ra_meta_pages_cond(sbi, blkaddr);
+
+		page = get_meta_page(sbi, blkaddr);
 
 		if (cp_ver != cpver_of_node(page)) {
 			f2fs_put_page(page, 1);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 923cb76..42607a6 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page)
 {
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct inmem_pages *new;
+	int err;
+
+	SetPagePrivate(page);
 
 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
 
 	/* add atomic page indices to the list */
 	new->page = page;
 	INIT_LIST_HEAD(&new->list);
-
+retry:
 	/* increase reference count with clean state */
 	mutex_lock(&fi->inmem_lock);
+	err = radix_tree_insert(&fi->inmem_root, page->index, new);
+	if (err == -EEXIST) {
+		mutex_unlock(&fi->inmem_lock);
+		kmem_cache_free(inmem_entry_slab, new);
+		return;
+	} else if (err) {
+		mutex_unlock(&fi->inmem_lock);
+		goto retry;
+	}
 	get_page(page);
 	list_add_tail(&new->list, &fi->inmem_pages);
+	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+	mutex_unlock(&fi->inmem_lock);
+}
+
+void invalidate_inmem_page(struct inode *inode, struct page *page)
+{
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	struct inmem_pages *cur;
+
+	mutex_lock(&fi->inmem_lock);
+	cur = radix_tree_lookup(&fi->inmem_root, page->index);
+	if (cur) {
+		radix_tree_delete(&fi->inmem_root, cur->page->index);
+		f2fs_put_page(cur->page, 0);
+		list_del(&cur->list);
+		kmem_cache_free(inmem_entry_slab, cur);
+		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+	}
 	mutex_unlock(&fi->inmem_lock);
 }
 
@@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort)
 		.rw = WRITE_SYNC,
 	};
 
-	f2fs_balance_fs(sbi);
+	/*
+	 * The abort is true only when f2fs_evict_inode is called.
+	 * Basically, the f2fs_evict_inode doesn't produce any data writes, so
+	 * that we don't need to call f2fs_balance_fs.
+	 * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
+	 * inode becomes free by iget_locked in f2fs_iget.
+	 */
+	if (!abort)
+		f2fs_balance_fs(sbi);
+
 	f2fs_lock_op(sbi);
 
 	mutex_lock(&fi->inmem_lock);
@@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort)
 			do_write_data_page(cur->page, &fio);
 			submit_bio = true;
 		}
+		radix_tree_delete(&fi->inmem_root, cur->page->index);
 		f2fs_put_page(cur->page, 1);
 		list_del(&cur->list);
 		kmem_cache_free(inmem_entry_slab, cur);
+		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
 	}
 	if (submit_bio)
 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
@@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
 {
 	/* check the # of cached NAT entries and prefree segments */
 	if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
-				excess_prefree_segs(sbi))
+			excess_prefree_segs(sbi) ||
+			available_free_memory(sbi, INO_ENTRIES))
 		f2fs_sync_fs(sbi->sb, true);
 }
 
@@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
 	}
 }
 
-static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+static void __add_discard_entry(struct f2fs_sb_info *sbi,
+		struct cp_control *cpc, unsigned int start, unsigned int end)
 {
 	struct list_head *head = &SM_I(sbi)->discard_list;
-	struct discard_entry *new;
+	struct discard_entry *new, *last;
+
+	if (!list_empty(head)) {
+		last = list_last_entry(head, struct discard_entry, list);
+		if (START_BLOCK(sbi, cpc->trim_start) + start ==
+						last->blkaddr + last->len) {
+			last->len += end - start;
+			goto done;
+		}
+	}
+
+	new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
+	INIT_LIST_HEAD(&new->list);
+	new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
+	new->len = end - start;
+	list_add_tail(&new->list, head);
+done:
+	SM_I(sbi)->nr_discards += end - start;
+	cpc->trimmed += end - start;
+}
+
+static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
+{
 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
 	int max_blocks = sbi->blocks_per_seg;
 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
@@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		}
 		mutex_unlock(&dirty_i->seglist_lock);
 
-		new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
-		INIT_LIST_HEAD(&new->list);
-		new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
-		new->len = sbi->blocks_per_seg;
-		list_add_tail(&new->list, head);
-		SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
-		cpc->trimmed += sbi->blocks_per_seg;
+		__add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg);
 		return;
 	}
 
@@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
 	for (i = 0; i < entries; i++)
-		dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
+		dmap[i] = ~(cur_map[i] | ckpt_map[i]);
 
 	while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
@@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		if (end - start < cpc->trim_minlen)
 			continue;
 
-		new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
-		INIT_LIST_HEAD(&new->list);
-		new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
-		new->len = end - start;
-		cpc->trimmed += end - start;
-
-		list_add_tail(&new->list, head);
-		SM_I(sbi)->nr_discards += end - start;
+		__add_discard_entry(sbi, cpc, start, end);
 	}
 }
 
@@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 
 	/* Update valid block bitmap */
 	if (del > 0) {
-		if (f2fs_set_bit(offset, se->cur_valid_map))
+		if (f2fs_test_and_set_bit(offset, se->cur_valid_map))
 			f2fs_bug_on(sbi, 1);
 	} else {
-		if (!f2fs_clear_bit(offset, se->cur_valid_map))
+		if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map))
 			f2fs_bug_on(sbi, 1);
 	}
 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
@@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 						range->len < sbi->blocksize)
 		return -EINVAL;
 
+	cpc.trimmed = 0;
 	if (end <= MAIN_BLKADDR(sbi))
 		goto out;
 
@@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
 	cpc.trim_start = start_segno;
 	cpc.trim_end = end_segno;
 	cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
-	cpc.trimmed = 0;
 
 	/* do checkpoint to issue discard commands safely */
+	mutex_lock(&sbi->gc_mutex);
 	write_checkpoint(sbi, &cpc);
+	mutex_unlock(&sbi->gc_mutex);
 out:
 	range->len = cpc.trimmed << sbi->log_blocksize;
 	return 0;
@@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type)
 		else
 			return CURSEG_COLD_DATA;
 	} else {
-		if (IS_DNODE(page) && !is_cold_node(page))
-			return CURSEG_HOT_NODE;
+		if (IS_DNODE(page) && is_cold_node(page))
+			return CURSEG_WARM_NODE;
 		else
 			return CURSEG_COLD_NODE;
 	}
@@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
 					unsigned int segno)
 {
-	struct sit_info *sit_i = SIT_I(sbi);
-	unsigned int offset = SIT_BLOCK_OFFSET(segno);
-	block_t blk_addr = sit_i->sit_base_addr + offset;
-
-	check_seg_range(sbi, segno);
-
-	/* calculate sit block address */
-	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
-		blk_addr += sit_i->sit_blocks;
-
-	return get_meta_page(sbi, blk_addr);
+	return get_meta_page(sbi, current_sit_addr(sbi, segno));
 }
 
 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * #2, flush sit entries to sit page.
 	 */
 	list_for_each_entry_safe(ses, tmp, head, set_list) {
-		struct page *page;
+		struct page *page = NULL;
 		struct f2fs_sit_block *raw_sit = NULL;
 		unsigned int start_segno = ses->start_segno;
 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
@@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void)
 		goto fail;
 
 	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
-			sizeof(struct nat_entry_set));
+			sizeof(struct sit_entry_set));
 	if (!sit_entry_set_slab)
 		goto destory_discard_entry;
 
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 2495bec..7f327c0 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
 {
 	unsigned int block_off = SIT_BLOCK_OFFSET(start);
 
-	if (f2fs_test_bit(block_off, sit_i->sit_bitmap))
-		f2fs_clear_bit(block_off, sit_i->sit_bitmap);
-	else
-		f2fs_set_bit(block_off, sit_i->sit_bitmap);
+	f2fs_change_bit(block_off, sit_i->sit_bitmap);
 }
 
 static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
  */
 static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
 {
+	if (sbi->sb->s_bdi->dirty_exceeded)
+		return 0;
+
 	if (type == DATA)
 		return sbi->blocks_per_seg;
 	else if (type == NODE)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41d6f70..f71421d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -51,8 +51,10 @@ enum {
 	Opt_disable_ext_identify,
 	Opt_inline_xattr,
 	Opt_inline_data,
+	Opt_inline_dentry,
 	Opt_flush_merge,
 	Opt_nobarrier,
+	Opt_fastboot,
 	Opt_err,
 };
 
@@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = {
 	{Opt_disable_ext_identify, "disable_ext_identify"},
 	{Opt_inline_xattr, "inline_xattr"},
 	{Opt_inline_data, "inline_data"},
+	{Opt_inline_dentry, "inline_dentry"},
 	{Opt_flush_merge, "flush_merge"},
 	{Opt_nobarrier, "nobarrier"},
+	{Opt_fastboot, "fastboot"},
 	{Opt_err, NULL},
 };
 
@@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_inline_data:
 			set_opt(sbi, INLINE_DATA);
 			break;
+		case Opt_inline_dentry:
+			set_opt(sbi, INLINE_DENTRY);
+			break;
 		case Opt_flush_merge:
 			set_opt(sbi, FLUSH_MERGE);
 			break;
 		case Opt_nobarrier:
 			set_opt(sbi, NOBARRIER);
 			break;
+		case Opt_fastboot:
+			set_opt(sbi, FASTBOOT);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
 	fi->i_advise = 0;
 	rwlock_init(&fi->ext.ext_lock);
 	init_rwsem(&fi->i_sem);
+	INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
 	INIT_LIST_HEAD(&fi->inmem_pages);
 	mutex_init(&fi->inmem_lock);
 
@@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 	trace_f2fs_sync_fs(sb, sync);
 
 	if (sync) {
-		struct cp_control cpc = {
-			.reason = CP_SYNC,
-		};
+		struct cp_control cpc;
+
+		cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC;
 		mutex_lock(&sbi->gc_mutex);
 		write_checkpoint(sbi, &cpc);
 		mutex_unlock(&sbi->gc_mutex);
@@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",disable_ext_identify");
 	if (test_opt(sbi, INLINE_DATA))
 		seq_puts(seq, ",inline_data");
+	if (test_opt(sbi, INLINE_DENTRY))
+		seq_puts(seq, ",inline_dentry");
 	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
 		seq_puts(seq, ",flush_merge");
 	if (test_opt(sbi, NOBARRIER))
 		seq_puts(seq, ",nobarrier");
+	if (test_opt(sbi, FASTBOOT))
+		seq_puts(seq, ",fastboot");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
 
 	return 0;
@@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 			f2fs_sync_fs(sb, 1);
 			need_restart_gc = true;
 		}
-	} else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) {
+	} else if (!sbi->gc_thread) {
 		err = start_gc_thread(sbi);
 		if (err)
 			goto restore_opts;
@@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	 */
 	if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
 		destroy_flush_cmd_control(sbi);
-	} else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) {
+	} else if (!SM_I(sbi)->cmd_control_info) {
 		err = create_flush_cmd_control(sbi);
 		if (err)
 			goto restore_gc;
@@ -922,7 +937,7 @@ retry:
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct f2fs_sb_info *sbi;
-	struct f2fs_super_block *raw_super;
+	struct f2fs_super_block *raw_super = NULL;
 	struct buffer_head *raw_super_buf;
 	struct inode *root;
 	long err = -EINVAL;
@@ -1123,7 +1138,7 @@ try_onemore:
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
 	 */
-	if (!f2fs_readonly(sb)) {
+	if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) {
 		/* After POR, we can run background GC thread.*/
 		err = start_gc_thread(sbi);
 		if (err)
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index deca872..5072bf9 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
 	}
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return f2fs_getxattr(dentry->d_inode, type, name, buffer, size);
+	return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
 }
 
 static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 }
 
 int f2fs_getxattr(struct inode *inode, int index, const char *name,
-		void *buffer, size_t buffer_size)
+		void *buffer, size_t buffer_size, struct page *ipage)
 {
 	struct f2fs_xattr_entry *entry;
 	void *base_addr;
@@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 	if (len > F2FS_NAME_LEN)
 		return -ERANGE;
 
-	base_addr = read_all_xattrs(inode, NULL);
+	base_addr = read_all_xattrs(inode, ipage);
 	if (!base_addr)
 		return -ENOMEM;
 
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 34ab7db..969d792 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[];
 
 extern int f2fs_setxattr(struct inode *, int, const char *,
 				const void *, size_t, struct page *, int);
-extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t);
+extern int f2fs_getxattr(struct inode *, int, const char *, void *,
+						size_t, struct page *);
 extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
 #else
 
@@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index,
 	return -EOPNOTSUPP;
 }
 static inline int f2fs_getxattr(struct inode *inode, int index,
-		const char *name, void *buffer, size_t buffer_size)
+			const char *name, void *buffer,
+			size_t buffer_size, struct page *dpage)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3963ede..c5d6bb9 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx)
 }
 
 #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type)			   \
-static int func(void *__buf, const char *name, int name_len,		   \
+static int func(struct dir_context *ctx, const char *name, int name_len,   \
 			     loff_t offset, u64 ino, unsigned int d_type)  \
 {									   \
-	struct fat_ioctl_filldir_callback *buf = __buf;			   \
+	struct fat_ioctl_filldir_callback *buf =			   \
+		container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \
 	struct dirent_type __user *d1 = buf->dirent;			   \
 	struct dirent_type __user *d2 = d1 + 1;				   \
 									   \
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6df8d3d..b8b92c2 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -736,7 +736,12 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	alias = d_find_alias(inode);
-	if (alias && !vfat_d_anon_disconn(alias)) {
+	/*
+	 * Checking "alias->d_parent == dentry->d_parent" to make sure
+	 * FS is not corrupted (especially double linked dir).
+	 */
+	if (alias && alias->d_parent == dentry->d_parent &&
+	    !vfat_d_anon_disconn(alias)) {
 		/*
 		 * This inode has non anonymous-DCACHE_DISCONNECTED
 		 * dentry. This means, the user did ->lookup() by an
@@ -755,12 +760,9 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
 
 out:
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
-	dentry->d_time = dentry->d_parent->d_inode->i_version;
-	dentry = d_splice_alias(inode, dentry);
-	if (dentry)
-		dentry->d_time = dentry->d_parent->d_inode->i_version;
-	return dentry;
-
+	if (!inode)
+		dentry->d_time = dir->i_version;
+	return d_splice_alias(inode, dentry);
 error:
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
 	return ERR_PTR(err);
@@ -793,7 +795,6 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 	inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
 	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
-	dentry->d_time = dentry->d_parent->d_inode->i_version;
 	d_instantiate(dentry, inode);
 out:
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
@@ -824,6 +825,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
 	fat_detach(inode);
+	dentry->d_time = dir->i_version;
 out:
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
 
@@ -849,6 +851,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry)
 	clear_nlink(inode);
 	inode->i_mtime = inode->i_atime = CURRENT_TIME_SEC;
 	fat_detach(inode);
+	dentry->d_time = dir->i_version;
 out:
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
 
@@ -889,7 +892,6 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 	inode->i_mtime = inode->i_atime = inode->i_ctime = ts;
 	/* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
-	dentry->d_time = dentry->d_parent->d_inode->i_version;
 	d_instantiate(dentry, inode);
 
 	mutex_unlock(&MSDOS_SB(sb)->s_lock);
diff --git a/fs/file.c b/fs/file.c
index ab3eb6a..ee738ea 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -869,7 +869,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 	struct file *file = fget_raw(fildes);
 
 	if (file) {
-		ret = get_unused_fd();
+		ret = get_unused_fd_flags(0);
 		if (ret >= 0)
 			fd_install(ret, file);
 		else
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index dbab798..df562cc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -372,7 +372,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 		goto out_iput;
 
-	newent = d_materialise_unique(entry, inode);
+	newent = d_splice_alias(inode, entry);
 	err = PTR_ERR(newent);
 	if (IS_ERR(newent))
 		goto out_err;
@@ -1320,7 +1320,7 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	err = PTR_ERR(alias);
 	if (IS_ERR(alias))
 		goto out;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index caa8d95..bf50259 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1988,7 +1988,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
 		struct page **pagep, void **fsdata)
 {
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
+	struct fuse_conn *fc = get_fuse_conn(file_inode(file));
 	struct page *page;
 	loff_t fsize;
 	int err = -ENOMEM;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 5d4261ff5..c5a34f0 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -365,23 +365,17 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
 
 	ret = gfs2_dir_read_data(ip, hc, hsize);
 	if (ret < 0) {
-		if (is_vmalloc_addr(hc))
-			vfree(hc);
-		else
-			kfree(hc);
+		kvfree(hc);
 		return ERR_PTR(ret);
 	}
 
 	spin_lock(&inode->i_lock);
-	if (ip->i_hash_cache) {
-		if (is_vmalloc_addr(hc))
-			vfree(hc);
-		else
-			kfree(hc);
-	} else {
+	if (likely(!ip->i_hash_cache)) {
 		ip->i_hash_cache = hc;
+		hc = NULL;
 	}
 	spin_unlock(&inode->i_lock);
+	kvfree(hc);
 
 	return ip->i_hash_cache;
 }
@@ -396,10 +390,7 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
 {
 	__be64 *hc = ip->i_hash_cache;
 	ip->i_hash_cache = NULL;
-	if (is_vmalloc_addr(hc))
-		vfree(hc);
-	else
-		kfree(hc);
+	kvfree(hc);
 }
 
 static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1168,10 +1159,7 @@ fail:
 	gfs2_dinode_out(dip, dibh->b_data);
 	brelse(dibh);
 out_kfree:
-	if (is_vmalloc_addr(hc2))
-		vfree(hc2);
-	else
-		kfree(hc2);
+	kvfree(hc2);
 	return error;
 }
 
@@ -1302,14 +1290,6 @@ static void *gfs2_alloc_sort_buffer(unsigned size)
 	return ptr;
 }
 
-static void gfs2_free_sort_buffer(void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-}
-
 static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 			      int *copied, unsigned *depth,
 			      u64 leaf_no)
@@ -1393,7 +1373,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx,
 out_free:
 	for(i = 0; i < leaf; i++)
 		brelse(larr[i]);
-	gfs2_free_sort_buffer(larr);
+	kvfree(larr);
 out:
 	return error;
 }
@@ -2004,10 +1984,7 @@ out_rlist:
 	gfs2_rlist_free(&rlist);
 	gfs2_quota_unhold(dip);
 out:
-	if (is_vmalloc_addr(ht))
-		vfree(ht);
-	else
-		kfree(ht);
+	kvfree(ht);
 	return error;
 }
 
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 8b9b377..c41d255 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -69,10 +69,12 @@ struct get_name_filldir {
 	char *name;
 };
 
-static int get_name_filldir(void *opaque, const char *name, int length,
-			    loff_t offset, u64 inum, unsigned int type)
+static int get_name_filldir(struct dir_context *ctx, const char *name,
+			    int length, loff_t offset, u64 inum,
+			    unsigned int type)
 {
-	struct get_name_filldir *gnfd = opaque;
+	struct get_name_filldir *gnfd =
+		container_of(ctx, struct get_name_filldir, ctx);
 
 	if (inum != gnfd->inum.no_addr)
 		return 0;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 80dd44d..6e600ab 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -337,7 +337,8 @@ static void gfs2_size_hint(struct file *filep, loff_t offset, size_t size)
 	size_t blks = (size + sdp->sd_sb.sb_bsize - 1) >> sdp->sd_sb.sb_bsize_shift;
 	int hint = min_t(size_t, INT_MAX, blks);
 
-	atomic_set(&ip->i_res->rs_sizehint, hint);
+	if (hint > atomic_read(&ip->i_res->rs_sizehint))
+		atomic_set(&ip->i_res->rs_sizehint, hint);
 }
 
 /**
@@ -728,7 +729,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct buffer_head *dibh;
 	int error;
-	loff_t size = len;
 	unsigned int nr_blks;
 	sector_t lblock = offset >> inode->i_blkbits;
 
@@ -762,11 +762,6 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
 			goto out;
 		}
 	}
-	if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
-		i_size_write(inode, offset + size);
-
-	mark_inode_dirty(inode);
-
 out:
 	brelse(dibh);
 	return error;
@@ -796,8 +791,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
 	}
 }
 
-static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
-			   loff_t len)
+static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -811,14 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
 	loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
 	loff_t max_chunk_size = UINT_MAX & bsize_mask;
-	struct gfs2_holder gh;
 
 	next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
 
-	/* We only support the FALLOC_FL_KEEP_SIZE mode */
-	if (mode & ~FALLOC_FL_KEEP_SIZE)
-		return -EOPNOTSUPP;
-
 	offset &= bsize_mask;
 
 	len = next - offset;
@@ -829,17 +818,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 	if (bytes == 0)
 		bytes = sdp->sd_sb.sb_bsize;
 
-	error = gfs2_rs_alloc(ip);
-	if (error)
-		return error;
-
-	mutex_lock(&inode->i_mutex);
-
-	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
-	error = gfs2_glock_nq(&gh);
-	if (unlikely(error))
-		goto out_uninit;
-
 	gfs2_size_hint(file, offset, len);
 
 	while (len > 0) {
@@ -852,8 +830,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
 		}
 		error = gfs2_quota_lock_check(ip);
 		if (error)
-			goto out_unlock;
-
+			return error;
 retry:
 		gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
 
@@ -895,20 +872,64 @@ retry:
 		gfs2_quota_unlock(ip);
 	}
 
-	if (error == 0)
-		error = generic_write_sync(file, pos, count);
-	goto out_unlock;
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && (pos + count) > inode->i_size) {
+		i_size_write(inode, pos + count);
+		/* Marks the inode as dirty */
+		file_update_time(file);
+	}
+
+	return generic_write_sync(file, pos, count);
 
 out_trans_fail:
 	gfs2_inplace_release(ip);
 out_qunlock:
 	gfs2_quota_unlock(ip);
+	return error;
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(file);
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_holder gh;
+	int ret;
+
+	if (mode & ~FALLOC_FL_KEEP_SIZE)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&inode->i_mutex);
+
+	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+	ret = gfs2_glock_nq(&gh);
+	if (ret)
+		goto out_uninit;
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+	    (offset + len) > inode->i_size) {
+		ret = inode_newsize_ok(inode, offset + len);
+		if (ret)
+			goto out_unlock;
+	}
+
+	ret = get_write_access(inode);
+	if (ret)
+		goto out_unlock;
+
+	ret = gfs2_rs_alloc(ip);
+	if (ret)
+		goto out_putw;
+
+	ret = __gfs2_fallocate(file, mode, offset, len);
+	if (ret)
+		gfs2_rs_deltree(ip->i_res);
+out_putw:
+	put_write_access(inode);
 out_unlock:
 	gfs2_glock_dq(&gh);
 out_uninit:
 	gfs2_holder_uninit(&gh);
 	mutex_unlock(&inode->i_mutex);
-	return error;
+	return ret;
 }
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8f0c19d..a23524a 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -836,8 +836,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
 	gh->gh_flags = flags;
 	gh->gh_iflags = 0;
 	gh->gh_ip = _RET_IP_;
-	if (gh->gh_owner_pid)
-		put_pid(gh->gh_owner_pid);
+	put_pid(gh->gh_owner_pid);
 	gh->gh_owner_pid = get_pid(task_pid(current));
 }
 
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 1cc0bba..fe91951 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,6 +28,8 @@
 #include "trans.h"
 #include "dir.h"
 
+struct workqueue_struct *gfs2_freeze_wq;
+
 static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
 {
 	fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
@@ -94,11 +96,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
          * on the stack */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
 	tr.tr_ip = _RET_IP_;
-	sb_start_intwrite(sdp->sd_vfs);
-	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0) {
-		sb_end_intwrite(sdp->sd_vfs);
+	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
 		return;
-	}
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
@@ -469,20 +468,19 @@ static void inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
 
 static void freeze_go_sync(struct gfs2_glock *gl)
 {
+	int error = 0;
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	DEFINE_WAIT(wait);
 
 	if (gl->gl_state == LM_ST_SHARED &&
 	    test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
-		atomic_set(&sdp->sd_log_freeze, 1);
-		wake_up(&sdp->sd_logd_waitq);
-		do {
-			prepare_to_wait(&sdp->sd_log_frozen_wait, &wait,
-					TASK_UNINTERRUPTIBLE);
-			if (atomic_read(&sdp->sd_log_freeze))
-				io_schedule();
-		} while(atomic_read(&sdp->sd_log_freeze));
-		finish_wait(&sdp->sd_log_frozen_wait, &wait);
+		atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
+		error = freeze_super(sdp->sd_vfs);
+		if (error) {
+			printk(KERN_INFO "GFS2: couldn't freeze filesystem: %d\n", error);
+			gfs2_assert_withdraw(sdp, 0);
+		}
+		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
+		gfs2_log_flush(sdp, NULL, FREEZE_FLUSH);
 	}
 }
 
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 7455d26..8ed1857 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -12,6 +12,8 @@
 
 #include "incore.h"
 
+extern struct workqueue_struct *gfs2_freeze_wq;
+
 extern const struct gfs2_glock_operations gfs2_meta_glops;
 extern const struct gfs2_glock_operations gfs2_inode_glops;
 extern const struct gfs2_glock_operations gfs2_rgrp_glops;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 39e7e99..7a2dbbc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -97,6 +97,7 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_CHECK		0x10000000 /* check for unlinked inodes */
 #define GFS2_RDF_UPTODATE	0x20000000 /* rg is up to date */
 #define GFS2_RDF_ERROR		0x40000000 /* error in rg */
+#define GFS2_RDF_PREFERRED	0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK		0xf0000000 /* mask for internal flags */
 	spinlock_t rd_rsspin;           /* protects reservation related vars */
 	struct rb_root rd_rstree;       /* multi-block reservation tree */
@@ -587,6 +588,12 @@ enum {
 	SDF_SKIP_DLM_UNLOCK	= 8,
 };
 
+enum gfs2_freeze_state {
+	SFS_UNFROZEN		= 0,
+	SFS_STARTING_FREEZE	= 1,
+	SFS_FROZEN		= 2,
+};
+
 #define GFS2_FSNAME_LEN		256
 
 struct gfs2_inum_host {
@@ -684,6 +691,7 @@ struct gfs2_sbd {
 	struct gfs2_holder sd_live_gh;
 	struct gfs2_glock *sd_rename_gl;
 	struct gfs2_glock *sd_freeze_gl;
+	struct work_struct sd_freeze_work;
 	wait_queue_head_t sd_glock_wait;
 	atomic_t sd_glock_disposal;
 	struct completion sd_locking_init;
@@ -788,6 +796,9 @@ struct gfs2_sbd {
 	wait_queue_head_t sd_log_flush_wait;
 	int sd_log_error;
 
+	atomic_t sd_reserving_log;
+	wait_queue_head_t sd_reserving_log_wait;
+
 	unsigned int sd_log_flush_head;
 	u64 sd_log_flush_wrapped;
 
@@ -797,12 +808,8 @@ struct gfs2_sbd {
 
 	/* For quiescing the filesystem */
 	struct gfs2_holder sd_freeze_gh;
-	struct gfs2_holder sd_freeze_root_gh;
-	struct gfs2_holder sd_thaw_gh;
-	atomic_t sd_log_freeze;
-	atomic_t sd_frozen_root;
-	wait_queue_head_t sd_frozen_root_wait;
-	wait_queue_head_t sd_log_frozen_wait;
+	atomic_t sd_freeze_state;
+	struct mutex sd_freeze_mutex;
 
 	char sd_fsname[GFS2_FSNAME_LEN];
 	char sd_table_name[GFS2_FSNAME_LEN];
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c4ed823..9054002 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -596,7 +596,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_glock *io_gl;
-	struct dentry *d;
 	int error, free_vfs_inode = 0;
 	u32 aflags = 0;
 	unsigned blocks = 1;
@@ -624,22 +623,18 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl);
 	error = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
-		d = d_splice_alias(inode, dentry);
-		error = PTR_ERR(d);
-		if (IS_ERR(d)) {
-			inode = ERR_CAST(d);
+		if (S_ISDIR(inode->i_mode)) {
+			iput(inode);
+			inode = ERR_PTR(-EISDIR);
 			goto fail_gunlock;
 		}
+		d_instantiate(dentry, inode);
 		error = 0;
 		if (file) {
-			if (S_ISREG(inode->i_mode)) {
-				WARN_ON(d != NULL);
+			if (S_ISREG(inode->i_mode))
 				error = finish_open(file, dentry, gfs2_open_common, opened);
-			} else {
-				error = finish_no_open(file, d);
-			}
-		} else {
-			dput(d);
+			else
+				error = finish_no_open(file, NULL);
 		}
 		gfs2_glock_dq_uninit(ghs);
 		return error;
@@ -1045,11 +1040,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 	if (error)
 		return error;
 
-	error = gfs2_dir_check(&dip->i_inode, name, ip);
-	if (error)
-		return error;
-
-	return 0;
+	return gfs2_dir_check(&dip->i_inode, name, ip);
 }
 
 /**
@@ -1254,11 +1245,8 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (d != NULL)
 		dentry = d;
 	if (dentry->d_inode) {
-		if (!(*opened & FILE_OPENED)) {
-			if (d == NULL)
-				dget(dentry);
-			return finish_no_open(file, dentry);
-		}
+		if (!(*opened & FILE_OPENED))
+			return finish_no_open(file, d);
 		dput(d);
 		return 0;
 	}
@@ -1622,26 +1610,18 @@ int gfs2_permission(struct inode *inode, int mask)
 {
 	struct gfs2_inode *ip;
 	struct gfs2_holder i_gh;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int error;
 	int unlock = 0;
-	int frozen_root = 0;
 
 
 	ip = GFS2_I(inode);
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
-			     inode == sdp->sd_root_dir->d_inode &&
-			     atomic_inc_not_zero(&sdp->sd_frozen_root)))
-			frozen_root = 1;
-		else {
-			if (mask & MAY_NOT_BLOCK)
-				return -ECHILD;
-			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-			if (error)
-				return error;
-			unlock = 1;
-		}
+		if (mask & MAY_NOT_BLOCK)
+			return -ECHILD;
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
 	if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
@@ -1650,8 +1630,6 @@ int gfs2_permission(struct inode *inode, int mask)
 		error = generic_permission(inode, mask);
 	if (unlock)
 		gfs2_glock_dq_uninit(&i_gh);
-	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
-		wake_up(&sdp->sd_frozen_root_wait);
 
 	return error;
 }
@@ -1824,29 +1802,19 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	struct inode *inode = dentry->d_inode;
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	int error;
 	int unlock = 0;
-	int frozen_root = 0;
 
 	if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
-		if (unlikely(gfs2_glock_is_held_excl(sdp->sd_freeze_gl) &&
-			     inode == sdp->sd_root_dir->d_inode &&
-			     atomic_inc_not_zero(&sdp->sd_frozen_root)))
-			frozen_root = 1;
-		else {
-			error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
-			if (error)
-				return error;
-			unlock = 1;
-		}
+		error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
+		if (error)
+			return error;
+		unlock = 1;
 	}
 
 	generic_fillattr(inode, stat);
 	if (unlock)
 		gfs2_glock_dq_uninit(&gh);
-	else if (frozen_root && atomic_dec_and_test(&sdp->sd_frozen_root))
-		wake_up(&sdp->sd_frozen_root_wait);
 
 	return 0;
 }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 3966fad..536e7a6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -339,6 +339,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
+	int ret = 0;
 	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
 	unsigned wanted = blks + reserved_blks;
 	DEFINE_WAIT(wait);
@@ -362,9 +363,13 @@ retry:
 		} while(free_blocks <= wanted);
 		finish_wait(&sdp->sd_log_waitq, &wait);
 	}
+	atomic_inc(&sdp->sd_reserving_log);
 	if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
-				free_blocks - blks) != free_blocks)
+				free_blocks - blks) != free_blocks) {
+		if (atomic_dec_and_test(&sdp->sd_reserving_log))
+			wake_up(&sdp->sd_reserving_log_wait);
 		goto retry;
+	}
 	trace_gfs2_log_blocks(sdp, -blks);
 
 	/*
@@ -377,9 +382,11 @@ retry:
 	down_read(&sdp->sd_log_flush_lock);
 	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
 		gfs2_log_release(sdp, blks);
-		return -EROFS;
+		ret = -EROFS;
 	}
-	return 0;
+	if (atomic_dec_and_test(&sdp->sd_reserving_log))
+		wake_up(&sdp->sd_reserving_log_wait);
+	return ret;
 }
 
 /**
@@ -652,9 +659,12 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 	u32 hash;
 	int rw = WRITE_FLUSH_FUA | REQ_META;
 	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 	lh = page_address(page);
 	clear_page(lh);
 
+	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
+
 	tail = current_tail(sdp);
 
 	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -695,6 +705,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		    enum gfs2_flush_type type)
 {
 	struct gfs2_trans *tr;
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	down_write(&sdp->sd_log_flush_lock);
 
@@ -713,8 +724,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		INIT_LIST_HEAD(&tr->tr_ail1_list);
 		INIT_LIST_HEAD(&tr->tr_ail2_list);
 		tr->tr_first = sdp->sd_log_flush_head;
+		if (unlikely (state == SFS_FROZEN))
+			gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
 	}
 
+	if (unlikely(state == SFS_FROZEN))
+		gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
 	gfs2_assert_withdraw(sdp,
 			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
 
@@ -745,8 +760,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
 
-	if (atomic_read(&sdp->sd_log_freeze))
-		type = FREEZE_FLUSH;
 	if (type != NORMAL_FLUSH) {
 		if (!sdp->sd_log_idle) {
 			for (;;) {
@@ -763,21 +776,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
 		}
 		if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH)
 			gfs2_log_shutdown(sdp);
-		if (type == FREEZE_FLUSH) {
-			int error;
-
-			atomic_set(&sdp->sd_log_freeze, 0);
-			wake_up(&sdp->sd_log_frozen_wait);
-			error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
-						   LM_ST_SHARED, 0,
-						   &sdp->sd_thaw_gh);
-			if (error) {
-				printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
-				gfs2_assert_withdraw(sdp, 0);
-			}
-			else
-				gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
-		}
+		if (type == FREEZE_FLUSH)
+			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 	}
 
 	trace_gfs2_log_flush(sdp, 0);
@@ -888,7 +888,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
 
 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
 {
-	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze));
+	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1));
 }
 
 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 82b6ac8..241a399 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -30,6 +30,7 @@
 #include "quota.h"
 #include "recovery.h"
 #include "dir.h"
+#include "glops.h"
 
 struct workqueue_struct *gfs2_control_wq;
 
@@ -161,9 +162,14 @@ static int __init init_gfs2_fs(void)
 	if (!gfs2_control_wq)
 		goto fail_recovery;
 
+	gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+
+	if (!gfs2_freeze_wq)
+		goto fail_control;
+
 	gfs2_page_pool = mempool_create_page_pool(64, 0);
 	if (!gfs2_page_pool)
-		goto fail_control;
+		goto fail_freeze;
 
 	gfs2_register_debugfs();
 
@@ -171,6 +177,8 @@ static int __init init_gfs2_fs(void)
 
 	return 0;
 
+fail_freeze:
+	destroy_workqueue(gfs2_freeze_wq);
 fail_control:
 	destroy_workqueue(gfs2_control_wq);
 fail_recovery:
@@ -224,6 +232,7 @@ static void __exit exit_gfs2_fs(void)
 	unregister_filesystem(&gfs2meta_fs_type);
 	destroy_workqueue(gfs_recovery_wq);
 	destroy_workqueue(gfs2_control_wq);
+	destroy_workqueue(gfs2_freeze_wq);
 	list_lru_destroy(&gfs2_qd_lru);
 
 	rcu_barrier();
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3eae24..8633ad3 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -129,11 +129,11 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
 	init_rwsem(&sdp->sd_log_flush_lock);
 	atomic_set(&sdp->sd_log_in_flight, 0);
+	atomic_set(&sdp->sd_reserving_log, 0);
+	init_waitqueue_head(&sdp->sd_reserving_log_wait);
 	init_waitqueue_head(&sdp->sd_log_flush_wait);
-	init_waitqueue_head(&sdp->sd_log_frozen_wait);
-	atomic_set(&sdp->sd_log_freeze, 0);
-	atomic_set(&sdp->sd_frozen_root, 0);
-	init_waitqueue_head(&sdp->sd_frozen_root_wait);
+	atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+	mutex_init(&sdp->sd_freeze_mutex);
 
 	return sdp;
 }
@@ -760,15 +760,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
 	set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
 	gfs2_glock_dq_uninit(&ji_gh);
 	jindex = 0;
-	if (!sdp->sd_args.ar_spectator) {
-		error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
-					   &sdp->sd_thaw_gh);
-		if (error) {
-			fs_err(sdp, "can't acquire freeze glock: %d\n", error);
-			goto fail_jinode_gh;
-		}
-	}
-	gfs2_glock_dq_uninit(&sdp->sd_thaw_gh);
+	INIT_WORK(&sdp->sd_freeze_work, gfs2_freeze_func);
 	return 0;
 
 fail_jinode_gh:
@@ -1082,6 +1074,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
 	sb->s_export_op = &gfs2_export_ops;
 	sb->s_xattr = gfs2_xattr_handlers;
 	sb->s_qcop = &gfs2_quotactl_ops;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
 	sb->s_time_gran = 1;
 	sb->s_maxbytes = MAX_LFS_FILESIZE;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 64b29f7..c8b148b 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1360,13 +1360,8 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
 
 	gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
 
-	if (sdp->sd_quota_bitmap) {
-		if (is_vmalloc_addr(sdp->sd_quota_bitmap))
-			vfree(sdp->sd_quota_bitmap);
-		else
-			kfree(sdp->sd_quota_bitmap);
-		sdp->sd_quota_bitmap = NULL;
-	}
+	kvfree(sdp->sd_quota_bitmap);
+	sdp->sd_quota_bitmap = NULL;
 }
 
 static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7474c41..9150207 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -936,7 +936,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
 	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize;
 	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1;
 	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
-	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+	rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
 	if (rgd->rd_data > sdp->sd_max_rg_data)
 		sdp->sd_max_rg_data = rgd->rd_data;
 	spin_lock(&sdp->sd_rindex_spin);
@@ -955,6 +955,36 @@ fail:
 }
 
 /**
+ * set_rgrp_preferences - Run all the rgrps, selecting some we prefer to use
+ * @sdp: the GFS2 superblock
+ *
+ * The purpose of this function is to select a subset of the resource groups
+ * and mark them as PREFERRED. We do it in such a way that each node prefers
+ * to use a unique set of rgrps to minimize glock contention.
+ */
+static void set_rgrp_preferences(struct gfs2_sbd *sdp)
+{
+	struct gfs2_rgrpd *rgd, *first;
+	int i;
+
+	/* Skip an initial number of rgrps, based on this node's journal ID.
+	   That should start each node out on its own set. */
+	rgd = gfs2_rgrpd_get_first(sdp);
+	for (i = 0; i < sdp->sd_lockstruct.ls_jid; i++)
+		rgd = gfs2_rgrpd_get_next(rgd);
+	first = rgd;
+
+	do {
+		rgd->rd_flags |= GFS2_RDF_PREFERRED;
+		for (i = 0; i < sdp->sd_journals; i++) {
+			rgd = gfs2_rgrpd_get_next(rgd);
+			if (rgd == first)
+				break;
+		}
+	} while (rgd != first);
+}
+
+/**
  * gfs2_ri_update - Pull in a new resource index from the disk
  * @ip: pointer to the rindex inode
  *
@@ -973,6 +1003,8 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 	if (error < 0)
 		return error;
 
+	set_rgrp_preferences(sdp);
+
 	sdp->sd_rindex_uptodate = 1;
 	return 0;
 }
@@ -1891,6 +1923,25 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
 }
 
 /**
+ * fast_to_acquire - determine if a resource group will be fast to acquire
+ *
+ * If this is one of our preferred rgrps, it should be quicker to acquire,
+ * because we tried to set ourselves up as dlm lock master.
+ */
+static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
+{
+	struct gfs2_glock *gl = rgd->rd_gl;
+
+	if (gl->gl_state != LM_ST_UNLOCKED && list_empty(&gl->gl_holders) &&
+	    !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
+	    !test_bit(GLF_DEMOTE, &gl->gl_flags))
+		return 1;
+	if (rgd->rd_flags & GFS2_RDF_PREFERRED)
+		return 1;
+	return 0;
+}
+
+/**
  * gfs2_inplace_reserve - Reserve space in the filesystem
  * @ip: the inode to reserve space for
  * @ap: the allocation parameters
@@ -1932,10 +1983,15 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *a
 			rg_locked = 0;
 			if (skip && skip--)
 				goto next_rgrp;
-			if (!gfs2_rs_active(rs) && (loops < 2) &&
-			     gfs2_rgrp_used_recently(rs, 1000) &&
-			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
-				goto next_rgrp;
+			if (!gfs2_rs_active(rs)) {
+				if (loops == 0 &&
+				    !fast_to_acquire(rs->rs_rbm.rgd))
+					goto next_rgrp;
+				if ((loops < 2) &&
+				    gfs2_rgrp_used_recently(rs, 1000) &&
+				    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+					goto next_rgrp;
+			}
 			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
 						   LM_ST_EXCLUSIVE, flags,
 						   &rs->rs_rgd_gh);
@@ -2195,6 +2251,9 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 			trace_gfs2_rs(rs, TRACE_RS_CLAIM);
 			if (rs->rs_free && !ret)
 				goto out;
+			/* We used up our block reservation, so we should
+			   reserve more blocks next time. */
+			atomic_add(RGRP_RSRV_ADDBLKS, &rs->rs_sizehint);
 		}
 		__rs_deltree(rs);
 	}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 5d8f085..b104f4a 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -20,6 +20,7 @@
  */
 #define RGRP_RSRV_MINBYTES 8
 #define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
+#define RGRP_RSRV_ADDBLKS 64
 
 struct gfs2_rgrpd;
 struct gfs2_sbd;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index a346f56..5b327f8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
+#include <linux/kernel.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -399,7 +400,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 {
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
 	struct gfs2_glock *j_gl = ip->i_gl;
-	struct gfs2_holder thaw_gh;
+	struct gfs2_holder freeze_gh;
 	struct gfs2_log_header_host head;
 	int error;
 
@@ -408,7 +409,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 		return error;
 
 	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
-				   &thaw_gh);
+				   &freeze_gh);
 	if (error)
 		goto fail_threads;
 
@@ -434,13 +435,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 
 	set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-	gfs2_glock_dq_uninit(&thaw_gh);
+	gfs2_glock_dq_uninit(&freeze_gh);
 
 	return 0;
 
 fail:
-	thaw_gh.gh_flags |= GL_NOCACHE;
-	gfs2_glock_dq_uninit(&thaw_gh);
+	freeze_gh.gh_flags |= GL_NOCACHE;
+	gfs2_glock_dq_uninit(&freeze_gh);
 fail_threads:
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
@@ -580,14 +581,15 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
 	struct buffer_head *m_bh, *l_bh;
 	int error;
 
+	sb_start_write(sb);
 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 				   &gh);
 	if (error)
-		return error;
+		goto out;
 
 	error = gfs2_meta_inode_buffer(m_ip, &m_bh);
 	if (error)
-		goto out;
+		goto out_unlock;
 
 	spin_lock(&sdp->sd_statfs_spin);
 	gfs2_statfs_change_in(m_sc, m_bh->b_data +
@@ -615,8 +617,10 @@ out_bh2:
 	brelse(l_bh);
 out_bh:
 	brelse(m_bh);
-out:
+out_unlock:
 	gfs2_glock_dq_uninit(&gh);
+out:
+	sb_end_write(sb);
 	return error;
 }
 
@@ -643,14 +647,8 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
 	struct lfcc *lfcc;
 	LIST_HEAD(list);
 	struct gfs2_log_header_host lh;
-	struct gfs2_inode *dip = GFS2_I(sdp->sd_root_dir->d_inode);
 	int error;
 
-	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0,
-				   &sdp->sd_freeze_root_gh);
-	if (error)
-		return error;
-	atomic_set(&sdp->sd_frozen_root, 1);
 	list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
 		lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
 		if (!lfcc) {
@@ -692,11 +690,6 @@ out:
 		gfs2_glock_dq_uninit(&lfcc->gh);
 		kfree(lfcc);
 	}
-	if (error) {
-		atomic_dec(&sdp->sd_frozen_root);
-		wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
-		gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
-	}
 	return error;
 }
 
@@ -834,18 +827,14 @@ out:
 
 static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
-	struct gfs2_holder thaw_gh;
+	struct gfs2_holder freeze_gh;
 	int error;
 
 	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
-				   &thaw_gh);
+				   &freeze_gh);
 	if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
 		return error;
 
-	down_write(&sdp->sd_log_flush_lock);
-	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-	up_write(&sdp->sd_log_flush_lock);
-
 	kthread_stop(sdp->sd_quotad_process);
 	kthread_stop(sdp->sd_logd_process);
 
@@ -853,11 +842,16 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 	gfs2_quota_sync(sdp->sd_vfs, 0);
 	gfs2_statfs_sync(sdp->sd_vfs, 0);
 
+	down_write(&sdp->sd_log_flush_lock);
+	clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+	up_write(&sdp->sd_log_flush_lock);
+
 	gfs2_log_flush(sdp, NULL, SHUTDOWN_FLUSH);
+	wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
 	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
 
-	if (thaw_gh.gh_gl)
-		gfs2_glock_dq_uninit(&thaw_gh);
+	if (freeze_gh.gh_gl)
+		gfs2_glock_dq_uninit(&freeze_gh);
 
 	gfs2_quota_cleanup(sdp);
 
@@ -943,11 +937,41 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
 	gfs2_quota_sync(sb, -1);
-	if (wait && sdp && !atomic_read(&sdp->sd_log_freeze))
+	if (wait && sdp)
 		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
 	return 0;
 }
 
+void gfs2_freeze_func(struct work_struct *work)
+{
+	int error;
+	struct gfs2_holder freeze_gh;
+	struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_freeze_work);
+	struct super_block *sb = sdp->sd_vfs;
+
+	atomic_inc(&sb->s_active);
+	error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, 0,
+				   &freeze_gh);
+	if (error) {
+		printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error);
+		gfs2_assert_withdraw(sdp, 0);
+	}
+	else {
+		atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+		error = thaw_super(sb);
+		if (error) {
+			printk(KERN_INFO "GFS2: couldn't thaw filesystem: %d\n",
+			       error);
+			gfs2_assert_withdraw(sdp, 0);
+		}
+		if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
+			freeze_gh.gh_flags |= GL_NOCACHE;
+		gfs2_glock_dq_uninit(&freeze_gh);
+	}
+	deactivate_super(sb);
+	return;
+}
+
 /**
  * gfs2_freeze - prevent further writes to the filesystem
  * @sb: the VFS structure for the filesystem
@@ -957,10 +981,16 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
 static int gfs2_freeze(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	int error;
+	int error = 0;
 
-	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
-		return -EINVAL;
+	mutex_lock(&sdp->sd_freeze_mutex);
+	if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+		goto out;
+
+	if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
+		error = -EINVAL;
+		goto out;
+	}
 
 	for (;;) {
 		error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
@@ -980,7 +1010,10 @@ static int gfs2_freeze(struct super_block *sb)
 		fs_err(sdp, "retrying...\n");
 		msleep(1000);
 	}
-	return 0;
+	error = 0;
+out:
+	mutex_unlock(&sdp->sd_freeze_mutex);
+	return error;
 }
 
 /**
@@ -993,10 +1026,15 @@ static int gfs2_unfreeze(struct super_block *sb)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 
+	mutex_lock(&sdp->sd_freeze_mutex);
+        if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+	    sdp->sd_freeze_gh.gh_gl == NULL) {
+		mutex_unlock(&sdp->sd_freeze_mutex);
+                return 0;
+	}
+
 	gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
-	atomic_dec(&sdp->sd_frozen_root);
-	wait_event(sdp->sd_frozen_root_wait, atomic_read(&sdp->sd_frozen_root) == 0);
-	gfs2_glock_dq_uninit(&sdp->sd_freeze_root_gh);
+	mutex_unlock(&sdp->sd_freeze_mutex);
 	return 0;
 }
 
@@ -1618,8 +1656,8 @@ const struct super_operations gfs2_super_ops = {
 	.evict_inode		= gfs2_evict_inode,
 	.put_super		= gfs2_put_super,
 	.sync_fs		= gfs2_sync_fs,
-	.freeze_fs 		= gfs2_freeze,
-	.unfreeze_fs		= gfs2_unfreeze,
+	.freeze_super		= gfs2_freeze,
+	.thaw_super		= gfs2_unfreeze,
 	.statfs			= gfs2_statfs,
 	.remount_fs		= gfs2_remount_fs,
 	.drop_inode		= gfs2_drop_inode,
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 90e3322..73c97dc 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -45,6 +45,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc,
 extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh,
 			  struct buffer_head *l_bh);
 extern int gfs2_statfs_sync(struct super_block *sb, int type);
+extern void gfs2_freeze_func(struct work_struct *work);
 
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index 42bfd336..88bff24 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -89,14 +89,17 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 {
 	struct gfs2_trans *tr = current->journal_info;
 	s64 nbuf;
+	int alloced = tr->tr_alloced;
+
 	BUG_ON(!tr);
 	current->journal_info = NULL;
 
 	if (!tr->tr_touched) {
 		gfs2_log_release(sdp, tr->tr_reserved);
-		if (tr->tr_alloced)
+		if (alloced) {
 			kfree(tr);
-		sb_end_intwrite(sdp->sd_vfs);
+			sb_end_intwrite(sdp->sd_vfs);
+		}
 		return;
 	}
 
@@ -109,13 +112,14 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 		gfs2_print_trans(tr);
 
 	gfs2_log_commit(sdp, tr);
-	if (tr->tr_alloced && !tr->tr_attached)
+	if (alloced && !tr->tr_attached)
 			kfree(tr);
 	up_read(&sdp->sd_log_flush_lock);
 
 	if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
 		gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);
-	sb_end_intwrite(sdp->sd_vfs);
+	if (alloced)
+		sb_end_intwrite(sdp->sd_vfs);
 }
 
 static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
@@ -192,6 +196,7 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 {
 	struct gfs2_meta_header *mh;
 	struct gfs2_trans *tr;
+	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
 	tr = current->journal_info;
 	tr->tr_touched = 1;
@@ -205,6 +210,10 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 		       (unsigned long long)bd->bd_bh->b_blocknr);
 		BUG();
 	}
+	if (unlikely(state == SFS_FROZEN)) {
+		printk(KERN_INFO "GFS2:adding buf while frozen\n");
+		gfs2_assert_withdraw(sdp, 0);
+	}
 	gfs2_pin(sdp, bd->bd_bh);
 	mh->__pad0 = cpu_to_be64(0);
 	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index ff0316b..db458ee 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -162,14 +162,16 @@ err2:
  */
 int hfs_cat_keycmp(const btree_key *key1, const btree_key *key2)
 {
-	int retval;
+	__be32 k1p, k2p;
 
-	retval = be32_to_cpu(key1->cat.ParID) - be32_to_cpu(key2->cat.ParID);
-	if (!retval)
-		retval = hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
-				    key2->cat.CName.name, key2->cat.CName.len);
+	k1p = key1->cat.ParID;
+	k2p = key2->cat.ParID;
 
-	return retval;
+	if (k1p != k2p)
+		return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
+
+	return hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
+			  key2->cat.CName.name, key2->cat.CName.len);
 }
 
 /* Try to get a catalog entry for given catalog id */
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4338ff3..5f27551 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -548,10 +548,11 @@ struct hppfs_dirent {
 	struct dentry *dentry;
 };
 
-static int hppfs_filldir(void *d, const char *name, int size,
+static int hppfs_filldir(struct dir_context *ctx, const char *name, int size,
 			 loff_t offset, u64 inode, unsigned int type)
 {
-	struct hppfs_dirent *dirent = d;
+	struct hppfs_dirent *dirent =
+		container_of(ctx, struct hppfs_dirent, ctx);
 
 	if (file_removed(dirent->dentry, name))
 		return 0;
diff --git a/fs/inode.c b/fs/inode.c
index 26753ba..2ed95f7 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,9 +143,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
-#ifdef CONFIG_QUOTA
-	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
-#endif
 	inode->i_pipe = NULL;
 	inode->i_bdev = NULL;
 	inode->i_cdev = NULL;
diff --git a/fs/internal.h b/fs/internal.h
index 9477f8f..757ba2a 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void);
 /*
  * namei.c
  */
-extern int __inode_permission(struct inode *, int);
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
@@ -139,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 
 /*
- * splice.c
- */
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		loff_t *opos, size_t len, unsigned int flags);
-
-/*
  * pipe.c
  */
 extern const struct file_operations pipefifo_fops;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad..77c9a78 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp)
 		return -EPERM;
 
 	/* If filesystem doesn't support freeze feature, return. */
-	if (sb->s_op->freeze_fs == NULL)
+	if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
 		return -EOPNOTSUPP;
 
 	/* Freeze */
+	if (sb->s_op->freeze_super)
+		return sb->s_op->freeze_super(sb);
 	return freeze_super(sb);
 }
 
@@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp)
 		return -EPERM;
 
 	/* Thaw */
+	if (sb->s_op->thaw_super)
+		return sb->s_op->thaw_super(sb);
 	return thaw_super(sb);
 }
 
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 881b3bd..d67a16f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -29,13 +29,9 @@
 #define BEQUIET
 
 static int isofs_hashi(const struct dentry *parent, struct qstr *qstr);
-static int isofs_hash(const struct dentry *parent, struct qstr *qstr);
 static int isofs_dentry_cmpi(const struct dentry *parent,
 		const struct dentry *dentry,
 		unsigned int len, const char *str, const struct qstr *name);
-static int isofs_dentry_cmp(const struct dentry *parent,
-		const struct dentry *dentry,
-		unsigned int len, const char *str, const struct qstr *name);
 
 #ifdef CONFIG_JOLIET
 static int isofs_hashi_ms(const struct dentry *parent, struct qstr *qstr);
@@ -135,10 +131,6 @@ static const struct super_operations isofs_sops = {
 
 static const struct dentry_operations isofs_dentry_ops[] = {
 	{
-		.d_hash		= isofs_hash,
-		.d_compare	= isofs_dentry_cmp,
-	},
-	{
 		.d_hash		= isofs_hashi,
 		.d_compare	= isofs_dentry_cmpi,
 	},
@@ -182,27 +174,6 @@ struct iso9660_options{
  * Compute the hash for the isofs name corresponding to the dentry.
  */
 static int
-isofs_hash_common(struct qstr *qstr, int ms)
-{
-	const char *name;
-	int len;
-
-	len = qstr->len;
-	name = qstr->name;
-	if (ms) {
-		while (len && name[len-1] == '.')
-			len--;
-	}
-
-	qstr->hash = full_name_hash(name, len);
-
-	return 0;
-}
-
-/*
- * Compute the hash for the isofs name corresponding to the dentry.
- */
-static int
 isofs_hashi_common(struct qstr *qstr, int ms)
 {
 	const char *name;
@@ -258,32 +229,40 @@ static int isofs_dentry_cmp_common(
 }
 
 static int
-isofs_hash(const struct dentry *dentry, struct qstr *qstr)
-{
-	return isofs_hash_common(qstr, 0);
-}
-
-static int
 isofs_hashi(const struct dentry *dentry, struct qstr *qstr)
 {
 	return isofs_hashi_common(qstr, 0);
 }
 
 static int
-isofs_dentry_cmp(const struct dentry *parent, const struct dentry *dentry,
+isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
 		unsigned int len, const char *str, const struct qstr *name)
 {
-	return isofs_dentry_cmp_common(len, str, name, 0, 0);
+	return isofs_dentry_cmp_common(len, str, name, 0, 1);
 }
 
+#ifdef CONFIG_JOLIET
+/*
+ * Compute the hash for the isofs name corresponding to the dentry.
+ */
 static int
-isofs_dentry_cmpi(const struct dentry *parent, const struct dentry *dentry,
-		unsigned int len, const char *str, const struct qstr *name)
+isofs_hash_common(struct qstr *qstr, int ms)
 {
-	return isofs_dentry_cmp_common(len, str, name, 0, 1);
+	const char *name;
+	int len;
+
+	len = qstr->len;
+	name = qstr->name;
+	if (ms) {
+		while (len && name[len-1] == '.')
+			len--;
+	}
+
+	qstr->hash = full_name_hash(name, len);
+
+	return 0;
 }
 
-#ifdef CONFIG_JOLIET
 static int
 isofs_hash_ms(const struct dentry *dentry, struct qstr *qstr)
 {
@@ -930,7 +909,8 @@ root_found:
 	if (opt.check == 'r')
 		table++;
 
-	s->s_d_op = &isofs_dentry_ops[table];
+	if (table)
+		s->s_d_op = &isofs_dentry_ops[table - 1];
 
 	/* get the root dentry */
 	s->s_root = d_make_root(inode);
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 9529564..7b543e6 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -18,25 +18,10 @@ static int
 isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
 {
 	struct qstr qstr;
-
-	if (!compare)
-		return 1;
-
-	/* check special "." and ".." files */
-	if (dlen == 1) {
-		/* "." */
-		if (compare[0] == 0) {
-			if (!dentry->d_name.len)
-				return 0;
-			compare = ".";
-		} else if (compare[0] == 1) {
-			compare = "..";
-			dlen = 2;
-		}
-	}
-
 	qstr.name = compare;
 	qstr.len = dlen;
+	if (likely(!dentry->d_op))
+		return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen);
 	return dentry->d_op->d_compare(NULL, NULL, dentry->d_name.len, dentry->d_name.name, &qstr);
 }
 
@@ -146,7 +131,8 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
 				(!(de->flags[-sbi->s_high_sierra] & 1))) &&
 			(sbi->s_showassoc ||
 				(!(de->flags[-sbi->s_high_sierra] & 4)))) {
-			match = (isofs_cmp(dentry, dpnt, dlen) == 0);
+			if (dpnt && (dlen > 1 || dpnt[0] > 1))
+				match = (isofs_cmp(dentry, dpnt, dlen) == 0);
 		}
 		if (match) {
 			isofs_normalize_block_and_offset(de,
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06fe11e..c46a79a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -886,7 +886,7 @@ journal_t * journal_init_inode (struct inode *inode)
 		goto out_err;
 	}
 
-	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
 	if (!bh) {
 		printk(KERN_ERR
 		       "%s: Cannot get buffer for journal superblock\n",
@@ -1373,8 +1373,7 @@ int journal_destroy(journal_t *journal)
 	}
 	mutex_unlock(&journal->j_checkpoint_mutex);
 
-	if (journal->j_inode)
-		iput(journal->j_inode);
+	iput(journal->j_inode);
 	if (journal->j_revoke)
 		journal_destroy_revoke(journal);
 	kfree(journal->j_wbuf);
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 8898bbd..dcead63 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -93,6 +93,7 @@
 #include <linux/bio.h>
 #endif
 #include <linux/log2.h>
+#include <linux/hash.h>
 
 static struct kmem_cache *revoke_record_cache;
 static struct kmem_cache *revoke_table_cache;
@@ -129,15 +130,11 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 
 /* Utility functions to maintain the revoke table */
 
-/* Borrowed from buffer.c: this is a tried and tested block hash function */
 static inline int hash(journal_t *journal, unsigned int block)
 {
 	struct jbd_revoke_table_s *table = journal->j_revoke;
-	int hash_shift = table->hash_shift;
 
-	return ((block << (hash_shift - 6)) ^
-		(block >> 13) ^
-		(block << (hash_shift - 12))) & (table->hash_size - 1);
+	return hash_32(block, table->hash_shift);
 }
 
 static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 7f34f47..988b32e 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_transaction == NULL && !buffer_locked(bh) &&
 	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
-		/*
-		 * Get our reference so that bh cannot be freed before
-		 * we unlock it
-		 */
-		get_bh(bh);
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
-		BUFFER_TRACE(bh, "release");
-		__brelse(bh);
 	}
 	return ret;
 }
@@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 
 	nblocks = jbd2_space_needed(journal);
 	while (jbd2_log_space_left(journal) < nblocks) {
-		if (journal->j_flags & JBD2_ABORT)
-			return;
 		write_unlock(&journal->j_state_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
 
@@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 		 * trace for forensic evidence.
 		 */
 		write_lock(&journal->j_state_lock);
+		if (journal->j_flags & JBD2_ABORT) {
+			mutex_unlock(&journal->j_checkpoint_mutex);
+			return;
+		}
 		spin_lock(&journal->j_list_lock);
 		nblocks = jbd2_space_needed(journal);
 		space_left = jbd2_log_space_left(journal);
@@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
 	}
 }
 
-/*
- * Clean up transaction's list of buffers submitted for io.
- * We wait for any pending IO to complete and remove any clean
- * buffers. Note that we take the buffers in the opposite ordering
- * from the one in which they were submitted for IO.
- *
- * Return 0 on success, and return <0 if some buffers have failed
- * to be written out.
- *
- * Called with j_list_lock held.
- */
-static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
-{
-	struct journal_head *jh;
-	struct buffer_head *bh;
-	tid_t this_tid;
-	int released = 0;
-	int ret = 0;
-
-	this_tid = transaction->t_tid;
-restart:
-	/* Did somebody clean up the transaction in the meanwhile? */
-	if (journal->j_checkpoint_transactions != transaction ||
-			transaction->t_tid != this_tid)
-		return ret;
-	while (!released && transaction->t_checkpoint_io_list) {
-		jh = transaction->t_checkpoint_io_list;
-		bh = jh2bh(jh);
-		get_bh(bh);
-		if (buffer_locked(bh)) {
-			spin_unlock(&journal->j_list_lock);
-			wait_on_buffer(bh);
-			/* the journal_head may have gone by now */
-			BUFFER_TRACE(bh, "brelse");
-			__brelse(bh);
-			spin_lock(&journal->j_list_lock);
-			goto restart;
-		}
-		if (unlikely(buffer_write_io_error(bh)))
-			ret = -EIO;
-
-		/*
-		 * Now in whatever state the buffer currently is, we know that
-		 * it has been written out and so we can drop it from the list
-		 */
-		released = __jbd2_journal_remove_checkpoint(jh);
-		__brelse(bh);
-	}
-
-	return ret;
-}
-
 static void
 __flush_batch(journal_t *journal, int *batch_count)
 {
@@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count)
 }
 
 /*
- * Try to flush one buffer from the checkpoint list to disk.
- *
- * Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.  Return <0 if the buffer has failed to
- * be written out.
- *
- * Called with j_list_lock held and drops it if 1 is returned
- */
-static int __process_buffer(journal_t *journal, struct journal_head *jh,
-			    int *batch_count, transaction_t *transaction)
-{
-	struct buffer_head *bh = jh2bh(jh);
-	int ret = 0;
-
-	if (buffer_locked(bh)) {
-		get_bh(bh);
-		spin_unlock(&journal->j_list_lock);
-		wait_on_buffer(bh);
-		/* the journal_head may have gone by now */
-		BUFFER_TRACE(bh, "brelse");
-		__brelse(bh);
-		ret = 1;
-	} else if (jh->b_transaction != NULL) {
-		transaction_t *t = jh->b_transaction;
-		tid_t tid = t->t_tid;
-
-		transaction->t_chp_stats.cs_forced_to_close++;
-		spin_unlock(&journal->j_list_lock);
-		if (unlikely(journal->j_flags & JBD2_UNMOUNT))
-			/*
-			 * The journal thread is dead; so starting and
-			 * waiting for a commit to finish will cause
-			 * us to wait for a _very_ long time.
-			 */
-			printk(KERN_ERR "JBD2: %s: "
-			       "Waiting for Godot: block %llu\n",
-			       journal->j_devname,
-			       (unsigned long long) bh->b_blocknr);
-		jbd2_log_start_commit(journal, tid);
-		jbd2_log_wait_commit(journal, tid);
-		ret = 1;
-	} else if (!buffer_dirty(bh)) {
-		ret = 1;
-		if (unlikely(buffer_write_io_error(bh)))
-			ret = -EIO;
-		get_bh(bh);
-		BUFFER_TRACE(bh, "remove from checkpoint");
-		__jbd2_journal_remove_checkpoint(jh);
-		spin_unlock(&journal->j_list_lock);
-		__brelse(bh);
-	} else {
-		/*
-		 * Important: we are about to write the buffer, and
-		 * possibly block, while still holding the journal lock.
-		 * We cannot afford to let the transaction logic start
-		 * messing around with this buffer before we write it to
-		 * disk, as that would break recoverability.
-		 */
-		BUFFER_TRACE(bh, "queue");
-		get_bh(bh);
-		J_ASSERT_BH(bh, !buffer_jwrite(bh));
-		journal->j_chkpt_bhs[*batch_count] = bh;
-		__buffer_relink_io(jh);
-		transaction->t_chp_stats.cs_written++;
-		(*batch_count)++;
-		if (*batch_count == JBD2_NR_BATCH) {
-			spin_unlock(&journal->j_list_lock);
-			__flush_batch(journal, batch_count);
-			ret = 1;
-		}
-	}
-	return ret;
-}
-
-/*
  * Perform an actual checkpoint. We take the first transaction on the
  * list of transactions to be checkpointed and send all its buffers
  * to disk. We submit larger chunks of data at once.
@@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
  */
 int jbd2_log_do_checkpoint(journal_t *journal)
 {
-	transaction_t *transaction;
-	tid_t this_tid;
-	int result;
+	struct journal_head	*jh;
+	struct buffer_head	*bh;
+	transaction_t		*transaction;
+	tid_t			this_tid;
+	int			result, batch_count = 0;
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -374,45 +244,117 @@ restart:
 	 * done (maybe it's a new transaction, but it fell at the same
 	 * address).
 	 */
-	if (journal->j_checkpoint_transactions == transaction &&
-			transaction->t_tid == this_tid) {
-		int batch_count = 0;
-		struct journal_head *jh;
-		int retry = 0, err;
-
-		while (!retry && transaction->t_checkpoint_list) {
-			jh = transaction->t_checkpoint_list;
-			retry = __process_buffer(journal, jh, &batch_count,
-						 transaction);
-			if (retry < 0 && !result)
-				result = retry;
-			if (!retry && (need_resched() ||
-				spin_needbreak(&journal->j_list_lock))) {
-				spin_unlock(&journal->j_list_lock);
-				retry = 1;
-				break;
-			}
+	if (journal->j_checkpoint_transactions != transaction ||
+	    transaction->t_tid != this_tid)
+		goto out;
+
+	/* checkpoint all of the transaction's buffers */
+	while (transaction->t_checkpoint_list) {
+		jh = transaction->t_checkpoint_list;
+		bh = jh2bh(jh);
+
+		if (buffer_locked(bh)) {
+			spin_unlock(&journal->j_list_lock);
+			get_bh(bh);
+			wait_on_buffer(bh);
+			/* the journal_head may have gone by now */
+			BUFFER_TRACE(bh, "brelse");
+			__brelse(bh);
+			goto retry;
 		}
+		if (jh->b_transaction != NULL) {
+			transaction_t *t = jh->b_transaction;
+			tid_t tid = t->t_tid;
 
-		if (batch_count) {
-			if (!retry) {
-				spin_unlock(&journal->j_list_lock);
-				retry = 1;
-			}
-			__flush_batch(journal, &batch_count);
+			transaction->t_chp_stats.cs_forced_to_close++;
+			spin_unlock(&journal->j_list_lock);
+			if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+				/*
+				 * The journal thread is dead; so
+				 * starting and waiting for a commit
+				 * to finish will cause us to wait for
+				 * a _very_ long time.
+				 */
+				printk(KERN_ERR
+		"JBD2: %s: Waiting for Godot: block %llu\n",
+		journal->j_devname, (unsigned long long) bh->b_blocknr);
+
+			jbd2_log_start_commit(journal, tid);
+			jbd2_log_wait_commit(journal, tid);
+			goto retry;
+		}
+		if (!buffer_dirty(bh)) {
+			if (unlikely(buffer_write_io_error(bh)) && !result)
+				result = -EIO;
+			BUFFER_TRACE(bh, "remove from checkpoint");
+			if (__jbd2_journal_remove_checkpoint(jh))
+				/* The transaction was released; we're done */
+				goto out;
+			continue;
 		}
+		/*
+		 * Important: we are about to write the buffer, and
+		 * possibly block, while still holding the journal
+		 * lock.  We cannot afford to let the transaction
+		 * logic start messing around with this buffer before
+		 * we write it to disk, as that would break
+		 * recoverability.
+		 */
+		BUFFER_TRACE(bh, "queue");
+		get_bh(bh);
+		J_ASSERT_BH(bh, !buffer_jwrite(bh));
+		journal->j_chkpt_bhs[batch_count++] = bh;
+		__buffer_relink_io(jh);
+		transaction->t_chp_stats.cs_written++;
+		if ((batch_count == JBD2_NR_BATCH) ||
+		    need_resched() ||
+		    spin_needbreak(&journal->j_list_lock))
+			goto unlock_and_flush;
+	}
 
-		if (retry) {
+	if (batch_count) {
+		unlock_and_flush:
+			spin_unlock(&journal->j_list_lock);
+		retry:
+			if (batch_count)
+				__flush_batch(journal, &batch_count);
 			spin_lock(&journal->j_list_lock);
 			goto restart;
+	}
+
+	/*
+	 * Now we issued all of the transaction's buffers, let's deal
+	 * with the buffers that are out for I/O.
+	 */
+restart2:
+	/* Did somebody clean up the transaction in the meanwhile? */
+	if (journal->j_checkpoint_transactions != transaction ||
+	    transaction->t_tid != this_tid)
+		goto out;
+
+	while (transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			spin_unlock(&journal->j_list_lock);
+			get_bh(bh);
+			wait_on_buffer(bh);
+			/* the journal_head may have gone by now */
+			BUFFER_TRACE(bh, "brelse");
+			__brelse(bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart2;
 		}
+		if (unlikely(buffer_write_io_error(bh)) && !result)
+			result = -EIO;
+
 		/*
-		 * Now we have cleaned up the first transaction's checkpoint
-		 * list. Let's clean up the second one
+		 * Now in whatever state the buffer currently is, we
+		 * know that it has been written out and so we can
+		 * drop it from the list
 		 */
-		err = __wait_cp_io(journal, transaction);
-		if (!result)
-			result = err;
+		if (__jbd2_journal_remove_checkpoint(jh))
+			break;
 	}
 out:
 	spin_unlock(&journal->j_list_lock);
@@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * Find all the written-back checkpoint buffers in the given list and
  * release them.
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns 1 if we freed the transaction, 0 otherwise.
  */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
-	int ret, freed = 0;
+	int ret;
+	int freed = 0;
 
-	*released = 0;
 	if (!jh)
 		return 0;
 
@@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
 		ret = __try_to_free_cp_buf(jh);
-		if (ret) {
-			freed++;
-			if (ret == 2) {
-				*released = 1;
-				return freed;
-			}
-		}
+		if (!ret)
+			return freed;
+		if (ret == 2)
+			return 1;
+		freed = 1;
 		/*
 		 * This function only frees up some memory
 		 * if possible so we dont have an obligation
@@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
  */
-
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
-	int ret = 0;
-	int released;
+	int ret;
 
 	transaction = journal->j_checkpoint_transactions;
 	if (!transaction)
-		goto out;
+		return;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
+		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
 		 * preemption requested:
 		 */
 		if (need_resched())
-			goto out;
-		if (released)
+			return;
+		if (ret)
 			continue;
 		/*
 		 * It is essential that we are as careful as in the case of
 		 * t_checkpoint_list with removing the buffer from the list as
 		 * we can possibly see not yet submitted buffers on io_list
 		 */
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
+		ret = journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list);
 		if (need_resched())
-			goto out;
+			return;
+		/*
+		 * Stop scanning if we couldn't free the transaction. This
+		 * avoids pointless scanning of transactions which still
+		 * weren't checkpointed.
+		 */
+		if (!ret)
+			return;
 	} while (transaction != last_transaction);
-out:
-	return ret;
 }
 
 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 19d74d8..1df94fa 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 		goto out_err;
 	}
 
-	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize);
 	if (!bh) {
 		printk(KERN_ERR
 		       "%s: Cannot get buffer for journal superblock\n",
@@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
-	if (jbd2_journal_has_csum_v2or3(journal) &&
-	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
-		/* Can't have checksum v1 and v2 on at the same time! */
-		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
-		       "at the same time!\n");
-		goto out;
-	}
-
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
 	    JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
 		/* Can't have checksum v2 and v3 at the same time! */
@@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
+	if (jbd2_journal_has_csum_v2or3(journal) &&
+	    JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
+		/* Can't have checksum v1 and v2 on at the same time! */
+		printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
+		       "at the same time!\n");
+		goto out;
+	}
+
 	if (!jbd2_verify_csum_type(journal, sb)) {
 		printk(KERN_ERR "JBD2: Unknown checksum type\n");
 		goto out;
@@ -1853,13 +1853,12 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 				journal->j_chksum_driver = NULL;
 				return 0;
 			}
-		}
 
-		/* Precompute checksum seed for all metadata */
-		if (jbd2_journal_has_csum_v2or3(journal))
+			/* Precompute checksum seed for all metadata */
 			journal->j_csum_seed = jbd2_chksum(journal, ~0,
 							   sb->s_uuid,
 							   sizeof(sb->s_uuid));
+		}
 	}
 
 	/* If enabling v1 checksums, downgrade superblock */
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 9b329b5..bcbef08 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal,
 			    !jbd2_descr_block_csum_verify(journal,
 							  bh->b_data)) {
 				err = -EIO;
+				brelse(bh);
 				goto failed;
 			}
 
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index d5e95a1..c6cbaef 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -92,6 +92,7 @@
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/log2.h>
+#include <linux/hash.h>
 #endif
 
 static struct kmem_cache *jbd2_revoke_record_cache;
@@ -130,16 +131,9 @@ static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
 
 /* Utility functions to maintain the revoke table */
 
-/* Borrowed from buffer.c: this is a tried and tested block hash function */
 static inline int hash(journal_t *journal, unsigned long long block)
 {
-	struct jbd2_revoke_table_s *table = journal->j_revoke;
-	int hash_shift = table->hash_shift;
-	int hash = (int)block ^ (int)((block >> 31) >> 1);
-
-	return ((hash << (hash_shift - 6)) ^
-		(hash >> 13) ^
-		(hash << (hash_shift - 12))) & (table->hash_size - 1);
+	return hash_64(block, journal->j_revoke->hash_shift);
 }
 
 static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index cf47f09..fa7e795 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -94,6 +94,9 @@ struct jfs_inode_info {
 			unchar _inline_ea[128];	/* 128: inline extended attr */
 		} link;
 	} u;
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+#endif
 	u32 dev;	/* will die when we get wide dev_t */
 	struct inode	vfs_inode;
 };
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index d59c7de..38fdc53 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -84,7 +84,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
 	struct inode *iplist[2];
 	struct tblock *tblk;
 
-	jfs_info("jfs_create: dip:0x%p name:%s", dip, dentry->d_name.name);
+	jfs_info("jfs_create: dip:0x%p name:%pd", dip, dentry);
 
 	dquot_initialize(dip);
 
@@ -216,7 +216,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 	struct inode *iplist[2];
 	struct tblock *tblk;
 
-	jfs_info("jfs_mkdir: dip:0x%p name:%s", dip, dentry->d_name.name);
+	jfs_info("jfs_mkdir: dip:0x%p name:%pd", dip, dentry);
 
 	dquot_initialize(dip);
 
@@ -352,7 +352,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 	struct inode *iplist[2];
 	struct tblock *tblk;
 
-	jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name);
+	jfs_info("jfs_rmdir: dip:0x%p name:%pd", dip, dentry);
 
 	/* Init inode for quota operations. */
 	dquot_initialize(dip);
@@ -480,7 +480,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 	s64 new_size = 0;
 	int commit_flag;
 
-	jfs_info("jfs_unlink: dip:0x%p name:%s", dip, dentry->d_name.name);
+	jfs_info("jfs_unlink: dip:0x%p name:%pd", dip, dentry);
 
 	/* Init inode for quota operations. */
 	dquot_initialize(dip);
@@ -797,8 +797,7 @@ static int jfs_link(struct dentry *old_dentry,
 	struct btstack btstack;
 	struct inode *iplist[2];
 
-	jfs_info("jfs_link: %s %s", old_dentry->d_name.name,
-		 dentry->d_name.name);
+	jfs_info("jfs_link: %pd %pd", old_dentry, dentry);
 
 	dquot_initialize(dir);
 
@@ -1082,8 +1081,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int commit_flag;
 
 
-	jfs_info("jfs_rename: %s %s", old_dentry->d_name.name,
-		 new_dentry->d_name.name);
+	jfs_info("jfs_rename: %pd %pd", old_dentry, new_dentry);
 
 	dquot_initialize(old_dir);
 	dquot_initialize(new_dir);
@@ -1355,7 +1353,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
 
-	jfs_info("jfs_mknod: %s", dentry->d_name.name);
+	jfs_info("jfs_mknod: %pd", dentry);
 
 	dquot_initialize(dir);
 
@@ -1444,7 +1442,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsig
 	struct component_name key;
 	int rc;
 
-	jfs_info("jfs_lookup: name = %s", dentry->d_name.name);
+	jfs_info("jfs_lookup: name = %pd", dentry);
 
 	if ((rc = get_UCSname(&key, dentry)))
 		return ERR_PTR(rc);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 93e897e..16c3a955 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -117,6 +117,9 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
 	jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
 	if (!jfs_inode)
 		return NULL;
+#ifdef CONFIG_QUOTA
+	memset(&jfs_inode->i_dquot, 0, sizeof(jfs_inode->i_dquot));
+#endif
 	return &jfs_inode->vfs_inode;
 }
 
@@ -537,6 +540,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &dquot_operations;
 	sb->s_qcop = &dquot_quotactl_ops;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 
 	/*
@@ -836,6 +840,10 @@ out:
 	return len - towrite;
 }
 
+static struct dquot **jfs_get_dquots(struct inode *inode)
+{
+	return JFS_IP(inode)->i_dquot;
+}
 #endif
 
 static const struct super_operations jfs_super_operations = {
@@ -854,6 +862,7 @@ static const struct super_operations jfs_super_operations = {
 #ifdef CONFIG_QUOTA
 	.quota_read	= jfs_quota_read,
 	.quota_write	= jfs_quota_write,
+	.get_dquots	= jfs_get_dquots,
 #endif
 };
 
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1c771931..37989f0 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -807,7 +807,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
 	}
 
 	/* instantiate and hash dentry */
-	ret = d_materialise_unique(dentry, inode);
+	ret = d_splice_alias(inode, dentry);
  out_unlock:
 	mutex_unlock(&kernfs_mutex);
 	return ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index 171d284..005843c 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -114,18 +114,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 
 			spin_lock(&dentry->d_lock);
 			/* d_lock not required for cursor */
-			list_del(&cursor->d_u.d_child);
+			list_del(&cursor->d_child);
 			p = dentry->d_subdirs.next;
 			while (n && p != &dentry->d_subdirs) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_u.d_child);
+				next = list_entry(p, struct dentry, d_child);
 				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				if (simple_positive(next))
 					n--;
 				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
-			list_add_tail(&cursor->d_u.d_child, p);
+			list_add_tail(&cursor->d_child, p);
 			spin_unlock(&dentry->d_lock);
 		}
 	}
@@ -150,7 +150,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p, *q = &cursor->d_u.d_child;
+	struct list_head *p, *q = &cursor->d_child;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
@@ -159,7 +159,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 		list_move(q, &dentry->d_subdirs);
 
 	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
-		struct dentry *next = list_entry(p, struct dentry, d_u.d_child);
+		struct dentry *next = list_entry(p, struct dentry, d_child);
 		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 		if (!simple_positive(next)) {
 			spin_unlock(&next->d_lock);
@@ -287,7 +287,7 @@ int simple_empty(struct dentry *dentry)
 	int ret = 0;
 
 	spin_lock(&dentry->d_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &dentry->d_subdirs, d_child) {
 		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 		if (simple_positive(child)) {
 			spin_unlock(&child->d_lock);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 13db95f..5659874 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
 static LIST_HEAD(nlm_blocked);
 static DEFINE_SPINLOCK(nlm_blocked_lock);
 
-#ifdef LOCKD_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 {
 	/*
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index b6f3b84..d12ff4e 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -408,7 +408,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
 {
 	struct super_block *sb = datap;
 
-	return sb == file->f_file->f_path.dentry->d_sb;
+	return sb == file_inode(file->f_file)->i_sb;
 }
 
 /**
diff --git a/fs/namei.c b/fs/namei.c
index 43927d1..db5fe86 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask)
 
 	return security_inode_permission(inode, mask);
 }
+EXPORT_SYMBOL(__inode_permission);
 
 /**
  * sb_permission - Check superblock-level permissions
@@ -2383,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path,
 }
 EXPORT_SYMBOL(kern_path_mountpoint);
 
-/*
- * It's inline, so penalty for filesystems that don't use sticky bit is
- * minimal.
- */
-static inline int check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct inode *dir, struct inode *inode)
 {
 	kuid_t fsuid = current_fsuid();
 
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
 	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
 	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
+EXPORT_SYMBOL(__check_sticky);
 
 /*
  *	Check whether we can remove a link victim from directory dir, check
@@ -2501,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	}
 
 	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
 	return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
@@ -3064,9 +3060,12 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto out;
-	file->f_path.mnt = nd->path.mnt;
-	error = finish_open(file, nd->path.dentry, NULL, opened);
-	if (error) {
+
+	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+	error = vfs_open(&nd->path, file, current_cred());
+	if (!error) {
+		*opened |= FILE_OPENED;
+	} else {
 		if (error == -EOPENSTALE)
 			goto stale_open;
 		goto out;
@@ -3155,7 +3154,8 @@ static int do_tmpfile(int dfd, struct filename *pathname,
 	if (error)
 		goto out2;
 	audit_inode(pathname, nd->path.dentry, 0);
-	error = may_open(&nd->path, op->acc_mode, op->open_flag);
+	/* Don't check for other permissions, the inode was just created */
+	error = may_open(&nd->path, MAY_OPEN, op->open_flag);
 	if (error)
 		goto out2;
 	file->f_path.mnt = nd->path.mnt;
@@ -4210,12 +4210,16 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 	bool should_retry = false;
 	int error;
 
-	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
-	if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
+	if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
+	    (flags & RENAME_EXCHANGE))
 		return -EINVAL;
 
+	if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD))
+		return -EPERM;
+
 retry:
 	from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
 	if (IS_ERR(from)) {
@@ -4347,6 +4351,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
 	return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
+int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	int error = may_create(dir, dentry);
+	if (error)
+		return error;
+
+	if (!dir->i_op->mknod)
+		return -EPERM;
+
+	return dir->i_op->mknod(dir, dentry,
+				S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
+EXPORT_SYMBOL(vfs_whiteout);
+
 int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
 	int len = PTR_ERR(link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fbba8b1..5b66b2b 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	namespace_unlock();
 }
 
+/**
+ * clone_private_mount - create a private clone of a path
+ *
+ * This creates a new vfsmount, which will be the clone of @path.  The new will
+ * not be attached anywhere in the namespace and will be private (i.e. changes
+ * to the originating mount won't be propagated into this).
+ *
+ * Release with mntput().
+ */
+struct vfsmount *clone_private_mount(struct path *path)
+{
+	struct mount *old_mnt = real_mount(path->mnt);
+	struct mount *new_mnt;
+
+	if (IS_MNT_UNBINDABLE(old_mnt))
+		return ERR_PTR(-EINVAL);
+
+	down_read(&namespace_sem);
+	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+	up_read(&namespace_sem);
+	if (IS_ERR(new_mnt))
+		return ERR_CAST(new_mnt);
+
+	return &new_mnt->mnt;
+}
+EXPORT_SYMBOL_GPL(clone_private_mount);
+
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
 		   struct vfsmount *root)
 {
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 7cb751d..0089601 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -198,8 +198,8 @@ ncp_single_volume(struct ncp_server *server)
 
 static inline int ncp_is_server_root(struct inode *inode)
 {
-	return (!ncp_single_volume(NCP_SERVER(inode)) &&
-		inode == inode->i_sb->s_root->d_inode);
+	return !ncp_single_volume(NCP_SERVER(inode)) &&
+		is_root_inode(inode);
 }
 
 
@@ -403,7 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 
 	/* If a pointer is invalid, we search the dentry. */
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dent, &parent->d_subdirs, d_child) {
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget(dent);
@@ -685,8 +685,7 @@ static void
 ncp_read_volume_list(struct file *file, struct dir_context *ctx,
 			struct ncp_cache_control *ctl)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	struct ncp_server *server = NCP_SERVER(inode);
 	struct ncp_volume_info info;
 	struct ncp_entry_info entry;
@@ -721,8 +720,7 @@ static void
 ncp_do_readdir(struct file *file, struct dir_context *ctx,
 						struct ncp_cache_control *ctl)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *dir = dentry->d_inode;
+	struct inode *dir = file_inode(file);
 	struct ncp_server *server = NCP_SERVER(dir);
 	struct nw_search_sequence seq;
 	struct ncp_entry_info entry;
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 77640a8..1dd7007 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -100,8 +100,7 @@ out:
 static ssize_t
 ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	size_t already_read = 0;
 	off_t pos;
 	size_t bufsize;
@@ -109,7 +108,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	void* freepage;
 	size_t freelen;
 
-	ncp_dbg(1, "enter %pd2\n", dentry);
+	ncp_dbg(1, "enter %pD2\n", file);
 
 	pos = *ppos;
 
@@ -167,7 +166,7 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 
 	file_accessed(file);
 
-	ncp_dbg(1, "exit %pd2\n", dentry);
+	ncp_dbg(1, "exit %pD2\n", file);
 outrel:
 	ncp_inode_close(inode);		
 	return already_read ? already_read : error;
@@ -176,15 +175,14 @@ outrel:
 static ssize_t
 ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	size_t already_written = 0;
 	off_t pos;
 	size_t bufsize;
 	int errno;
 	void* bouncebuffer;
 
-	ncp_dbg(1, "enter %pd2\n", dentry);
+	ncp_dbg(1, "enter %pD2\n", file);
 	if ((ssize_t) count < 0)
 		return -EINVAL;
 	pos = *ppos;
@@ -263,7 +261,7 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 			i_size_write(inode, pos);
 		mutex_unlock(&inode->i_mutex);
 	}
-	ncp_dbg(1, "exit %pd2\n", dentry);
+	ncp_dbg(1, "exit %pD2\n", file);
 outrel:
 	ncp_inode_close(inode);		
 	return already_written ? already_written : errno;
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index d5659d9..cf7e043 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -447,7 +447,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
 						result = -EIO;
 					}
 				}
-				result = 0;
 			}
 			mutex_unlock(&server->root_setup_lock);
 
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index b359d12..33b873b 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -30,9 +30,7 @@
 static int ncp_file_mmap_fault(struct vm_area_struct *area,
 					struct vm_fault *vmf)
 {
-	struct file *file = area->vm_file;
-	struct dentry *dentry = file->f_path.dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = file_inode(area->vm_file);
 	char *pg_addr;
 	unsigned int already_read;
 	unsigned int count;
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 52cb19d..b785f74 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -191,7 +191,7 @@ ncp_renew_dentries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
 		if (dentry->d_fsdata == NULL)
 			ncp_age_dentry(server, dentry);
 		else
@@ -207,7 +207,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
 		dentry->d_fsdata = NULL;
 		ncp_age_dentry(server, dentry);
 	}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 5228f20..77fec6a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -378,7 +378,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
 	loff_t offset = header->args.offset;
 	size_t count = header->args.count;
 	struct page **pages = header->args.pages;
-	int pg_index = pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+	int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
 	unsigned int pg_len;
 	struct blk_plug plug;
 	int i;
@@ -812,7 +812,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
 
 	/* Optimize common case that writes from 0 to end of file */
 	end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
-	if (end != NFS_I(inode)->npages) {
+	if (end != inode->i_mapping->nrpages) {
 		rcu_read_lock();
 		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
 		rcu_read_unlock();
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index e966c02..dbe5839 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -65,17 +65,18 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
 
 	dprintk("%s CREATING PIPEFS MESSAGE\n", __func__);
 
+	mutex_lock(&nn->bl_mutex);
 	bl_pipe_msg.bl_wq = &nn->bl_wq;
 
 	b->simple.len += 4;	/* single volume */
 	if (b->simple.len > PAGE_SIZE)
-		return -EIO;
+		goto out_unlock;
 
 	memset(msg, 0, sizeof(*msg));
 	msg->len = sizeof(*bl_msg) + b->simple.len;
 	msg->data = kzalloc(msg->len, gfp_mask);
 	if (!msg->data)
-		goto out;
+		goto out_free_data;
 
 	bl_msg = msg->data;
 	bl_msg->type = BL_DEVICE_MOUNT,
@@ -87,7 +88,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
 	rc = rpc_queue_upcall(nn->bl_device_pipe, msg);
 	if (rc < 0) {
 		remove_wait_queue(&nn->bl_wq, &wq);
-		goto out;
+		goto out_free_data;
 	}
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
@@ -97,19 +98,21 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
 	if (reply->status != BL_DEVICE_REQUEST_PROC) {
 		printk(KERN_WARNING "%s failed to decode device: %d\n",
 			__func__, reply->status);
-		goto out;
+		goto out_free_data;
 	}
 
 	dev = MKDEV(reply->major, reply->minor);
-out:
+out_free_data:
 	kfree(msg->data);
+out_unlock:
+	mutex_unlock(&nn->bl_mutex);
 	return dev;
 }
 
 static ssize_t bl_pipe_downcall(struct file *filp, const char __user *src,
 			 size_t mlen)
 {
-	struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+	struct nfs_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
 					 nfs_net_id);
 
 	if (mlen != sizeof (struct bl_dev_msg))
@@ -232,6 +235,7 @@ static int nfs4blocklayout_net_init(struct net *net)
 	struct nfs_net *nn = net_generic(net, nfs_net_id);
 	struct dentry *dentry;
 
+	mutex_init(&nn->bl_mutex);
 	init_waitqueue_head(&nn->bl_wq);
 	nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0);
 	if (IS_ERR(nn->bl_device_pipe))
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 73466b9..e36a9d7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 		goto out_iput;
 	res->size = i_size_read(inode);
 	res->change_attr = delegation->change_attr;
-	if (nfsi->npages != 0)
+	if (nfsi->nrequests != 0)
 		res->change_attr++;
 	res->ctime = inode->i_ctime;
 	res->mtime = inode->i_mtime;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5853f53..7f3f606 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -125,6 +125,8 @@ again:
 			continue;
 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 			continue;
+		if (!nfs4_valid_open_stateid(state))
+			continue;
 		if (!nfs4_stateid_match(&state->stateid, stateid))
 			continue;
 		get_nfs_open_context(ctx);
@@ -193,7 +195,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 {
 	int res = 0;
 
-	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+	if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+		res = nfs4_proc_delegreturn(inode,
+				delegation->cred,
+				&delegation->stateid,
+				issync);
 	nfs_free_delegation(delegation);
 	return res;
 }
@@ -380,11 +386,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	int err;
+	int err = 0;
 
 	if (delegation == NULL)
 		return 0;
 	do {
+		if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+			break;
 		err = nfs_delegation_claim_opens(inode, &delegation->stateid);
 		if (!issync || err != -EAGAIN)
 			break;
@@ -605,10 +613,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
 	rcu_read_unlock();
 }
 
+static void nfs_revoke_delegation(struct inode *inode)
+{
+	struct nfs_delegation *delegation;
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL) {
+		set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+		nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
+	}
+	rcu_read_unlock();
+}
+
 void nfs_remove_bad_delegation(struct inode *inode)
 {
 	struct nfs_delegation *delegation;
 
+	nfs_revoke_delegation(inode);
 	delegation = nfs_inode_detach_delegation(inode);
 	if (delegation) {
 		nfs_inode_find_state_and_recover(inode, &delegation->stateid);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5c1cce3..e3c20a3 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
 	NFS_DELEGATION_RETURN_IF_CLOSED,
 	NFS_DELEGATION_REFERENCED,
 	NFS_DELEGATION_RETURNING,
+	NFS_DELEGATION_REVOKED,
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06e8cfc..9b0c55c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -133,7 +133,7 @@ out:
 static int
 nfs_closedir(struct inode *inode, struct file *filp)
 {
-	put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data);
+	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
 	return 0;
 }
 
@@ -499,7 +499,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 	if (IS_ERR(inode))
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	if (IS_ERR(alias))
 		goto out;
 	else if (alias) {
@@ -1393,7 +1393,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	nfs_advise_use_readdirplus(dir);
 
 no_entry:
-	res = d_materialise_unique(dentry, inode);
+	res = d_splice_alias(inode, dentry);
 	if (res != NULL) {
 		if (IS_ERR(res))
 			goto out_unblock_sillyrename;
@@ -1527,6 +1527,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		case -ENOENT:
 			d_drop(dentry);
 			d_add(dentry, NULL);
+			nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 			break;
 		case -EISDIR:
 		case -ENOTDIR:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 20cffc8..10bf072 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
+	nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
 	if (dreq->l_ctx != NULL)
 		nfs_put_lock_context(dreq->l_ctx);
 	if (dreq->ctx != NULL)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 46fab1cb..7afb52f 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -145,9 +145,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
 	case -NFS4ERR_DELEG_REVOKED:
 	case -NFS4ERR_ADMIN_REVOKED:
 	case -NFS4ERR_BAD_STATEID:
-		if (state == NULL)
-			break;
-		nfs_remove_bad_delegation(state->inode);
 	case -NFS4ERR_OPENMODE:
 		if (state == NULL)
 			break;
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 9bb806a..bfecac7 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -204,8 +204,7 @@ destroy_ds(struct nfs4_pnfs_ds *ds)
 	ifdebug(FACILITY)
 		print_ds(ds);
 
-	if (ds->ds_clp)
-		nfs_put_client(ds->ds_clp);
+	nfs_put_client(ds->ds_clp);
 
 	while (!list_empty(&ds->ds_addrs)) {
 		da = list_first_entry(&ds->ds_addrs,
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 3ef01f0..d63bea8 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -269,8 +269,8 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp)
 		if (!fscache_maybe_release_page(cookie, page, gfp))
 			return 0;
 
-		nfs_add_fscache_stats(page->mapping->host,
-				      NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+		nfs_inc_fscache_stats(page->mapping->host,
+				      NFSIOS_FSCACHE_PAGES_UNCACHED);
 	}
 
 	return 1;
@@ -293,8 +293,8 @@ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode)
 
 	BUG_ON(!PageLocked(page));
 	fscache_uncache_page(cookie, page);
-	nfs_add_fscache_stats(page->mapping->host,
-			      NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+	nfs_inc_fscache_stats(page->mapping->host,
+			      NFSIOS_FSCACHE_PAGES_UNCACHED);
 }
 
 /*
@@ -343,19 +343,19 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx,
 	case 0: /* read BIO submitted (page in fscache) */
 		dfprintk(FSCACHE,
 			 "NFS:    readpage_from_fscache: BIO submitted\n");
-		nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK, 1);
+		nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_OK);
 		return ret;
 
 	case -ENOBUFS: /* inode not in cache */
 	case -ENODATA: /* page not in cache */
-		nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+		nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
 		dfprintk(FSCACHE,
 			 "NFS:    readpage_from_fscache %d\n", ret);
 		return 1;
 
 	default:
 		dfprintk(FSCACHE, "NFS:    readpage_from_fscache %d\n", ret);
-		nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL, 1);
+		nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_READ_FAIL);
 	}
 	return ret;
 }
@@ -429,11 +429,11 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync)
 
 	if (ret != 0) {
 		fscache_uncache_page(nfs_i_fscache(inode), page);
-		nfs_add_fscache_stats(inode,
-				      NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1);
-		nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1);
+		nfs_inc_fscache_stats(inode,
+				      NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL);
+		nfs_inc_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED);
 	} else {
-		nfs_add_fscache_stats(inode,
-				      NFSIOS_FSCACHE_PAGES_WRITTEN_OK, 1);
+		nfs_inc_fscache_stats(inode,
+				      NFSIOS_FSCACHE_PAGES_WRITTEN_OK);
 	}
 }
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 880618a..9ac3846 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -51,14 +51,14 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
 		 * Otherwise, it may be spliced into the tree by
-		 * d_materialise_unique if a parent directory from the same
+		 * d_splice_alias if a parent directory from the same
 		 * filesystem gets mounted at a later time.
 		 * This again causes shrink_dcache_for_umount_subtree() to
 		 * Oops, since the test for IS_ROOT() will fail.
 		 */
 		spin_lock(&sb->s_root->d_inode->i_lock);
 		spin_lock(&sb->s_root->d_lock);
-		hlist_del_init(&sb->s_root->d_alias);
+		hlist_del_init(&sb->s_root->d_u.d_alias);
 		spin_unlock(&sb->s_root->d_lock);
 		spin_unlock(&sb->s_root->d_inode->i_lock);
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6388a59..4bffe63 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode)
 	nfs_zap_caches_locked(inode);
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL_GPL(nfs_zap_caches);
 
 void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
 {
@@ -626,7 +627,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
 	struct inode *inode = dentry->d_inode;
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
-	int err;
+	int err = 0;
 
 	trace_nfs_getattr_enter(inode);
 	/* Flush out writes to the server in order to update c/mtime.  */
@@ -1149,7 +1150,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
 	if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
 			&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
 			&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
-			&& nfsi->npages == 0) {
+			&& nfsi->nrequests == 0) {
 		i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 		ret |= NFS_INO_INVALID_ATTR;
 	}
@@ -1192,7 +1193,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
 		cur_size = i_size_read(inode);
 		new_isize = nfs_size_to_loff_t(fattr->size);
-		if (cur_size != new_isize && nfsi->npages == 0)
+		if (cur_size != new_isize && nfsi->nrequests == 0)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 	}
 
@@ -1619,7 +1620,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if ((nfsi->npages == 0) || new_isize > cur_isize) {
+			if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 				invalid &= ~NFS_INO_REVAL_PAGECACHE;
@@ -1784,7 +1785,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_LIST_HEAD(&nfsi->commit_info.list);
-	nfsi->npages = 0;
+	nfsi->nrequests = 0;
 	nfsi->commit_info.ncommit = 0;
 	atomic_set(&nfsi->commit_info.rpcs_out, 0);
 	atomic_set(&nfsi->silly_count, 1);
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
index c583248..0cb806f 100644
--- a/fs/nfs/iostat.h
+++ b/fs/nfs/iostat.h
@@ -55,6 +55,11 @@ static inline void nfs_add_fscache_stats(struct inode *inode,
 {
 	this_cpu_add(NFS_SERVER(inode)->io_stats->fscache[stat], addend);
 }
+static inline void nfs_inc_fscache_stats(struct inode *inode,
+					 enum nfs_stat_fscachecounters stat)
+{
+	this_cpu_inc(NFS_SERVER(inode)->io_stats->fscache[stat]);
+}
 #endif
 
 static inline struct nfs_iostats __percpu *nfs_alloc_iostats(void)
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index ef221fb..f0e06e4 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -19,6 +19,7 @@ struct nfs_net {
 	struct rpc_pipe *bl_device_pipe;
 	struct bl_dev_msg bl_mount_reply;
 	wait_queue_head_t bl_wq;
+	struct mutex bl_mutex;
 	struct list_head nfs_client_list;
 	struct list_head nfs_volume_list;
 #if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index d10333a..7afb894 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -6,6 +6,8 @@
 #define __LINUX_FS_NFS_NFS4_2_H
 
 /* nfs4.2proc.c */
+int nfs42_proc_allocate(struct file *, loff_t, loff_t);
+int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 
 /* nfs4.2xdr.h */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 0886f1d..cb17072 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -32,6 +32,81 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
 	return ret;
 }
 
+static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+				 loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(filep);
+	struct nfs42_falloc_args args = {
+		.falloc_fh	= NFS_FH(inode),
+		.falloc_offset	= offset,
+		.falloc_length	= len,
+	};
+	struct nfs42_falloc_res res;
+	struct nfs_server *server = NFS_SERVER(inode);
+	int status;
+
+	msg->rpc_argp = &args;
+	msg->rpc_resp = &res;
+
+	status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+	if (status)
+		return status;
+
+	return nfs4_call_sync(server->client, server, msg,
+			      &args.seq_args, &res.seq_res, 0);
+}
+
+static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+				loff_t offset, loff_t len)
+{
+	struct nfs_server *server = NFS_SERVER(file_inode(filep));
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs42_proc_fallocate(msg, filep, offset, len);
+		if (err == -ENOTSUPP)
+			return -EOPNOTSUPP;
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+
+	return err;
+}
+
+int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
+	};
+	struct inode *inode = file_inode(filep);
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
+		return -EOPNOTSUPP;
+
+	err = nfs42_proc_fallocate(&msg, filep, offset, len);
+	if (err == -EOPNOTSUPP)
+		NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+	return err;
+}
+
+int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE],
+	};
+	struct inode *inode = file_inode(filep);
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
+		return -EOPNOTSUPP;
+
+	err = nfs42_proc_fallocate(&msg, filep, offset, len);
+	if (err == -EOPNOTSUPP)
+		NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+	return err;
+}
+
 loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
 	struct inode *inode = file_inode(filep);
@@ -50,7 +125,7 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 	struct nfs_server *server = NFS_SERVER(inode);
 	int status;
 
-	if (!(server->caps & NFS_CAP_SEEK))
+	if (!nfs_server_capable(inode, NFS_CAP_SEEK))
 		return -ENOTSUPP;
 
 	status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c90469b..038a7e15 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -4,6 +4,15 @@
 #ifndef __LINUX_FS_NFS_NFS4_2XDR_H
 #define __LINUX_FS_NFS_NFS4_2XDR_H
 
+#define encode_fallocate_maxsz		(encode_stateid_maxsz + \
+					 2 /* offset */ + \
+					 2 /* length */)
+#define encode_allocate_maxsz		(op_encode_hdr_maxsz + \
+					 encode_fallocate_maxsz)
+#define decode_allocate_maxsz		(op_decode_hdr_maxsz)
+#define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
+					 encode_fallocate_maxsz)
+#define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
 #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
 					 encode_stateid_maxsz + \
 					 2 /* offset */ + \
@@ -14,6 +23,18 @@
 					 2 /* offset */ + \
 					 2 /* length */)
 
+#define NFS4_enc_allocate_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_allocate_maxsz)
+#define NFS4_dec_allocate_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_allocate_maxsz)
+#define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_deallocate_maxsz)
+#define NFS4_dec_deallocate_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_deallocate_maxsz)
 #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_seek_maxsz)
@@ -22,6 +43,30 @@
 					 decode_seek_maxsz)
 
 
+static void encode_fallocate(struct xdr_stream *xdr,
+			     struct nfs42_falloc_args *args)
+{
+	encode_nfs4_stateid(xdr, &args->falloc_stateid);
+	encode_uint64(xdr, args->falloc_offset);
+	encode_uint64(xdr, args->falloc_length);
+}
+
+static void encode_allocate(struct xdr_stream *xdr,
+			    struct nfs42_falloc_args *args,
+			    struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
+	encode_fallocate(xdr, args);
+}
+
+static void encode_deallocate(struct xdr_stream *xdr,
+			      struct nfs42_falloc_args *args,
+			      struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
+	encode_fallocate(xdr, args);
+}
+
 static void encode_seek(struct xdr_stream *xdr,
 			struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
@@ -33,6 +78,42 @@ static void encode_seek(struct xdr_stream *xdr,
 }
 
 /*
+ * Encode ALLOCATE request
+ */
+static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  struct nfs42_falloc_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->falloc_fh, &hdr);
+	encode_allocate(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
+/*
+ * Encode DEALLOCATE request
+ */
+static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs42_falloc_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->falloc_fh, &hdr);
+	encode_deallocate(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
+/*
  * Encode SEEK request
  */
 static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
@@ -50,6 +131,16 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+	return decode_op_hdr(xdr, OP_ALLOCATE);
+}
+
+static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+	return decode_op_hdr(xdr, OP_DEALLOCATE);
+}
+
 static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
 {
 	int status;
@@ -73,6 +164,54 @@ out_overflow:
 }
 
 /*
+ * Decode ALLOCATE request
+ */
+static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
+				 struct nfs42_falloc_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_allocate(xdr, res);
+out:
+	return status;
+}
+
+/*
+ * Decode DEALLOCATE request
+ */
+static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
+				   struct xdr_stream *xdr,
+				   struct nfs42_falloc_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_deallocate(xdr, res);
+out:
+	return status;
+}
+
+/*
  * Decode SEEK request
  */
 static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index be6cac3..a081787 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
 
 /* nfs4proc.c */
+extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
 extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
 			  struct rpc_message *, struct nfs4_sequence_args *,
 			  struct nfs4_sequence_res *, int);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index ffdb28d..0331125 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -241,28 +241,25 @@ void nfs4_free_client(struct nfs_client *clp)
  */
 static int nfs4_init_callback(struct nfs_client *clp)
 {
+	struct rpc_xprt *xprt;
 	int error;
 
-	if (clp->rpc_ops->version == 4) {
-		struct rpc_xprt *xprt;
+	xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
 
-		xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
-
-		if (nfs4_has_session(clp)) {
-			error = xprt_setup_backchannel(xprt,
-						NFS41_BC_MIN_CALLBACKS);
-			if (error < 0)
-				return error;
-		}
-
-		error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
-		if (error < 0) {
-			dprintk("%s: failed to start callback. Error = %d\n",
-				__func__, error);
+	if (nfs4_has_session(clp)) {
+		error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS);
+		if (error < 0)
 			return error;
-		}
-		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
 	}
+
+	error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
+	if (error < 0) {
+		dprintk("%s: failed to start callback. Error = %d\n",
+			__func__, error);
+		return error;
+	}
+	__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+
 	return 0;
 }
 
@@ -498,8 +495,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 			atomic_inc(&pos->cl_count);
 			spin_unlock(&nn->nfs_client_lock);
 
-			if (prev)
-				nfs_put_client(prev);
+			nfs_put_client(prev);
 			prev = pos;
 
 			status = nfs_wait_client_init_complete(pos);
@@ -517,8 +513,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 		atomic_inc(&pos->cl_count);
 		spin_unlock(&nn->nfs_client_lock);
 
-		if (prev)
-			nfs_put_client(prev);
+		nfs_put_client(prev);
 		prev = pos;
 
 		status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
@@ -549,8 +544,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
 
 	/* No match found. The server lost our clientid */
 out:
-	if (prev)
-		nfs_put_client(prev);
+	nfs_put_client(prev);
 	dprintk("NFS: <-- %s status = %d\n", __func__, status);
 	return status;
 }
@@ -641,8 +635,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 			atomic_inc(&pos->cl_count);
 			spin_unlock(&nn->nfs_client_lock);
 
-			if (prev)
-				nfs_put_client(prev);
+			nfs_put_client(prev);
 			prev = pos;
 
 			status = nfs_wait_client_init_complete(pos);
@@ -675,8 +668,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
 	/* No matching nfs_client found. */
 	spin_unlock(&nn->nfs_client_lock);
 	dprintk("NFS: <-- %s status = %d\n", __func__, status);
-	if (prev)
-		nfs_put_client(prev);
+	nfs_put_client(prev);
 	return status;
 }
 #endif	/* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c51fb4d..8b46389 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 1992  Rick Sladkey
  */
+#include <linux/fs.h>
+#include <linux/falloc.h>
 #include <linux/nfs_fs.h>
 #include "internal.h"
 #include "fscache.h"
@@ -134,6 +136,32 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
 		return nfs_file_llseek(filep, offset, whence);
 	}
 }
+
+static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(filep);
+	long ret;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
+		return -EOPNOTSUPP;
+
+	ret = inode_newsize_ok(inode, offset + len);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		ret = nfs42_proc_deallocate(filep, offset, len);
+	else
+		ret = nfs42_proc_allocate(filep, offset, len);
+	mutex_unlock(&inode->i_mutex);
+
+	nfs_zap_caches(inode);
+	return ret;
+}
 #endif /* CONFIG_NFS_V4_2 */
 
 const struct file_operations nfs4_file_operations = {
@@ -155,6 +183,9 @@ const struct file_operations nfs4_file_operations = {
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+#ifdef CONFIG_NFS_V4_2
+	.fallocate	= nfs42_fallocate,
+#endif /* CONFIG_NFS_V4_2 */
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
 };
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 405bd95..e7f8d5f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -158,8 +158,6 @@ static int nfs4_map_errors(int err)
 		return -EACCES;
 	case -NFS4ERR_MINOR_VERS_MISMATCH:
 		return -EPROTONOSUPPORT;
-	case -NFS4ERR_ACCESS:
-		return -EACCES;
 	case -NFS4ERR_FILE_OPEN:
 		return -EBUSY;
 	default:
@@ -344,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state *state = exception->state;
@@ -370,11 +368,6 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
 		case -NFS4ERR_DELEG_REVOKED:
 		case -NFS4ERR_ADMIN_REVOKED:
 		case -NFS4ERR_BAD_STATEID:
-			if (inode != NULL && nfs4_have_delegation(inode, FMODE_READ)) {
-				nfs_remove_bad_delegation(inode);
-				exception->retry = 1;
-				break;
-			}
 			if (state == NULL)
 				break;
 			ret = nfs4_schedule_stateid_recovery(server, state);
@@ -1654,7 +1647,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
 			nfs_inode_find_state_and_recover(state->inode,
 					stateid);
 			nfs4_schedule_stateid_recovery(server, state);
-			return 0;
+			return -EAGAIN;
 		case -NFS4ERR_DELAY:
 		case -NFS4ERR_GRACE:
 			set_bit(NFS_DELEGATED_STATE, &state->flags);
@@ -2109,46 +2102,60 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	return ret;
 }
 
+static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
+{
+	nfs_remove_bad_delegation(state->inode);
+	write_seqlock(&state->seqlock);
+	nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+	write_sequnlock(&state->seqlock);
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
+}
+
+static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+{
+	if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
+		nfs_finish_clear_delegation_stateid(state);
+}
+
+static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+	/* NFSv4.0 doesn't allow for delegation recovery on open expire */
+	nfs40_clear_delegation_stateid(state);
+	return nfs4_open_expired(sp, state);
+}
+
 #if defined(CONFIG_NFS_V4_1)
-static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
+static void nfs41_check_delegation_stateid(struct nfs4_state *state)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
-	nfs4_stateid *stateid = &state->stateid;
+	nfs4_stateid stateid;
 	struct nfs_delegation *delegation;
-	struct rpc_cred *cred = NULL;
-	int status = -NFS4ERR_BAD_STATEID;
-
-	/* If a state reset has been done, test_stateid is unneeded */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
-		return;
+	struct rpc_cred *cred;
+	int status;
 
 	/* Get the delegation credential for use by test/free_stateid */
 	rcu_read_lock();
 	delegation = rcu_dereference(NFS_I(state->inode)->delegation);
-	if (delegation != NULL &&
-	    nfs4_stateid_match(&delegation->stateid, stateid)) {
-		cred = get_rpccred(delegation->cred);
-		rcu_read_unlock();
-		status = nfs41_test_stateid(server, stateid, cred);
-		trace_nfs4_test_delegation_stateid(state, NULL, status);
-	} else
+	if (delegation == NULL) {
 		rcu_read_unlock();
+		return;
+	}
+
+	nfs4_stateid_copy(&stateid, &delegation->stateid);
+	cred = get_rpccred(delegation->cred);
+	rcu_read_unlock();
+	status = nfs41_test_stateid(server, &stateid, cred);
+	trace_nfs4_test_delegation_stateid(state, NULL, status);
 
 	if (status != NFS_OK) {
 		/* Free the stateid unless the server explicitly
 		 * informs us the stateid is unrecognized. */
 		if (status != -NFS4ERR_BAD_STATEID)
-			nfs41_free_stateid(server, stateid, cred);
-		nfs_remove_bad_delegation(state->inode);
-
-		write_seqlock(&state->seqlock);
-		nfs4_stateid_copy(&state->stateid, &state->open_stateid);
-		write_sequnlock(&state->seqlock);
-		clear_bit(NFS_DELEGATED_STATE, &state->flags);
+			nfs41_free_stateid(server, &stateid, cred);
+		nfs_finish_clear_delegation_stateid(state);
 	}
 
-	if (cred != NULL)
-		put_rpccred(cred);
+	put_rpccred(cred);
 }
 
 /**
@@ -2192,7 +2199,7 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 {
 	int status;
 
-	nfs41_clear_delegation_stateid(state);
+	nfs41_check_delegation_stateid(state);
 	status = nfs41_check_open_stateid(state);
 	if (status != NFS_OK)
 		status = nfs4_open_expired(sp, state);
@@ -2231,19 +2238,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
 	seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
 
 	ret = _nfs4_proc_open(opendata);
-	if (ret != 0) {
-		if (ret == -ENOENT) {
-			dentry = opendata->dentry;
-			if (dentry->d_inode)
-				d_delete(dentry);
-			else if (d_unhashed(dentry))
-				d_add(dentry, NULL);
-
-			nfs_set_verifier(dentry,
-					 nfs_save_change_attribute(opendata->dir->d_inode));
-		}
+	if (ret != 0)
 		goto out;
-	}
 
 	state = nfs4_opendata_to_nfs4_state(opendata);
 	ret = PTR_ERR(state);
@@ -4841,9 +4837,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 		case -NFS4ERR_DELEG_REVOKED:
 		case -NFS4ERR_ADMIN_REVOKED:
 		case -NFS4ERR_BAD_STATEID:
-			if (state == NULL)
-				break;
-			nfs_remove_bad_delegation(state->inode);
 		case -NFS4ERR_OPENMODE:
 			if (state == NULL)
 				break;
@@ -7709,6 +7702,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 
 	dprintk("--> %s\n", __func__);
 
+	/* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+	pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
 	lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
 	if (!lgp->args.layout.pages) {
 		nfs4_layoutget_release(lgp);
@@ -7721,9 +7717,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 	lgp->res.seq_res.sr_slot = NULL;
 	nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
 
-	/* nfs4_layoutget_release calls pnfs_put_layout_hdr */
-	pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return ERR_CAST(task);
@@ -8341,7 +8334,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
 static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
 	.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
 	.state_flag_bit	= NFS_STATE_RECLAIM_NOGRACE,
-	.recover_open	= nfs4_open_expired,
+	.recover_open	= nfs40_open_expired,
 	.recover_lock	= nfs4_lock_expired,
 	.establish_clid = nfs4_init_clientid,
 };
@@ -8408,8 +8401,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
 		| NFS_CAP_CHANGE_ATTR
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
-		| NFS_CAP_ATOMIC_OPEN_V1
-		| NFS_CAP_SEEK,
+		| NFS_CAP_ATOMIC_OPEN_V1,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
@@ -8431,7 +8423,10 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_CHANGE_ATTR
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
-		| NFS_CAP_ATOMIC_OPEN_V1,
+		| NFS_CAP_ATOMIC_OPEN_V1
+		| NFS_CAP_ALLOCATE
+		| NFS_CAP_DEALLOCATE
+		| NFS_CAP_SEEK,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 206c08a..cb4376b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -141,13 +141,15 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
 				XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
 				1 /* sc_prog */ + \
-				XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
-				XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
+				1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
+				1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
 				1) /* sc_cb_ident */
 #define decode_setclientid_maxsz \
 				(op_decode_hdr_maxsz + \
-				2 + \
-				1024) /* large value for CLID_INUSE */
+				2 /* clientid */ + \
+				XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
+				1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
+				1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
 #define encode_setclientid_confirm_maxsz \
 				(op_encode_hdr_maxsz + \
 				3 + (NFS4_VERIFIER_SIZE >> 2))
@@ -7394,6 +7396,8 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_1 */
 #ifdef CONFIG_NFS_V4_2
 	PROC(SEEK,		enc_seek,		dec_seek),
+	PROC(ALLOCATE,		enc_allocate,		dec_allocate),
+	PROC(DEALLOCATE,	enc_deallocate,		dec_deallocate),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index c6e4bda..9e5bc42 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index c89357c..919efd4 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 3a0828d..2641dba 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -6,7 +6,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index b3918f7..f093c7e 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ed0db61..2b5e769 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 static inline void
 nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 {
+	struct inode *inode;
 	WARN_ON_ONCE(prev == req);
 
 	if (!prev) {
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 		 * nfs_page_group_destroy is called */
 		kref_get(&req->wb_head->wb_kref);
 
-		/* grab extra ref if head request has extra ref from
-		 * the write/commit path to handle handoff between write
-		 * and commit lists */
+		/* grab extra ref and bump the request count if head request
+		 * has extra ref from the write/commit path to handle handoff
+		 * between write and commit lists. */
 		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+			inode = page_file_mapping(req->wb_page)->host;
 			set_bit(PG_INODE_REF, &req->wb_flags);
 			kref_get(&req->wb_kref);
+			spin_lock(&inode->i_lock);
+			NFS_I(inode)->nrequests++;
+			spin_unlock(&inode->i_lock);
 		}
 	}
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index beff276..c91a479 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -269,7 +269,7 @@ int nfs_readpage(struct file *file, struct page *page)
 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
 		page, PAGE_CACHE_SIZE, page_file_index(page));
 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
-	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+	nfs_inc_stats(inode, NFSIOS_READPAGES);
 
 	/*
 	 * Try to flush any pending writes to the file..
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1249384..af3af68 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -575,7 +575,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
 	int ret;
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+	nfs_inc_stats(inode, NFSIOS_WRITEPAGES);
 
 	nfs_pageio_cond_complete(pgio, page_file_index(page));
 	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	nfs_lock_request(req);
 
 	spin_lock(&inode->i_lock);
-	if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+	if (!nfsi->nrequests &&
+	    NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
 		inode->i_version++;
 	/*
 	 * Swap-space should not get truncated. Hence no need to plug the race
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		SetPagePrivate(req->wb_page);
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
-	nfsi->npages++;
+	nfsi->nrequests++;
 	/* this a head request for a page group - mark it as having an
-	 * extra reference so sub groups can follow suit */
+	 * extra reference so sub groups can follow suit.
+	 * This flag also informs pgio layer when to bump nrequests when
+	 * adding subrequests. */
 	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
@@ -709,14 +712,16 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 			wake_up_page(head->wb_page, PG_private);
 			clear_bit(PG_MAPPED, &head->wb_flags);
 		}
-		nfsi->npages--;
+		nfsi->nrequests--;
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_lock(&inode->i_lock);
+		nfsi->nrequests--;
 		spin_unlock(&inode->i_lock);
 	}
 
 	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
 		nfs_release_request(req);
-	else
-		WARN_ON_ONCE(1);
 }
 
 static void
@@ -1737,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
+		if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ed2b115..7cbdf1b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -774,8 +774,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
 {
 	if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
 		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
-		dprintk("%s slot is busy\n", __func__);
-		return false;
+		/* Race breaker */
+		if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+			dprintk("%s slot is busy\n", __func__);
+			return false;
+		}
+		rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
 	}
 	return true;
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index cdeb3cf..0beb023 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1272,7 +1272,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
 	 */
 	if (argp->opcnt == resp->opcnt)
 		return false;
-
+	if (next->opnum == OP_ILLEGAL)
+		return false;
 	nextd = OPDESC(next);
 	/*
 	 * Rest of 2.6.3.1.1: certain operations will return WRONGSEC
@@ -1589,7 +1590,8 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
 static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
 				       struct nfsd4_op *op)
 {
-	return NFS4_MAX_SESSIONID_LEN + 20;
+	return (op_encode_hdr_size
+		+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) * sizeof(__be32);
 }
 
 static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1893,6 +1895,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_func = (nfsd4op_func)nfsd4_sequence,
 		.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
 		.op_name = "OP_SEQUENCE",
+		.op_rsize_bop = (nfsd4op_rsize)nfsd4_sequence_rsize,
 	},
 	[OP_DESTROY_CLIENTID] = {
 		.op_func = (nfsd4op_func)nfsd4_destroy_clientid,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a25490a..cc6a760 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -245,10 +245,11 @@ struct nfs4_dir_ctx {
 };
 
 static int
-nfsd4_build_namelist(void *arg, const char *name, int namlen,
+nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct nfs4_dir_ctx *ctx = arg;
+	struct nfs4_dir_ctx *ctx =
+		container_of(__ctx, struct nfs4_dir_ctx, ctx);
 	struct name_list *entry;
 
 	if (namlen != HEXDIR_LEN - 1)
@@ -704,7 +705,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	struct cld_upcall *tmp, *cup;
 	struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
 	uint32_t xid;
-	struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info,
+	struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
 						nfsd_net_id);
 	struct cld_net *cn = nn->cld_net;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index eeea7a9..b1eed4d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1886,7 +1886,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
 			goto out_free;
 		}
 		p = xdr_encode_opaque(p, dentry->d_name.name, len);
-		dprintk("/%s", dentry->d_name.name);
+		dprintk("/%pd", dentry);
 		spin_unlock(&dentry->d_lock);
 		dput(dentry);
 		ncomponents--;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ca73ca7..9506ea5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -231,6 +231,10 @@ static struct file_operations reply_cache_stats_operations = {
  * payload - write methods
  */
 
+static inline struct net *netns(struct file *file)
+{
+	return file_inode(file)->i_sb->s_fs_info;
+}
 
 /**
  * write_unlock_ip - Release all locks used by a client
@@ -252,7 +256,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
 	struct sockaddr *sap = (struct sockaddr *)&address;
 	size_t salen = sizeof(address);
 	char *fo_path;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
+	struct net *net = netns(file);
 
 	/* sanity check */
 	if (size == 0)
@@ -350,7 +354,6 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
 	int len;
 	struct auth_domain *dom;
 	struct knfsd_fh fh;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
 
 	if (size == 0)
 		return -EINVAL;
@@ -385,7 +388,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
 	if (!dom)
 		return -ENOMEM;
 
-	len = exp_rootfh(net, dom, path, &fh,  maxsize);
+	len = exp_rootfh(netns(file), dom, path, &fh,  maxsize);
 	auth_domain_put(dom);
 	if (len)
 		return len;
@@ -429,7 +432,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
 	int rv;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
+	struct net *net = netns(file);
 
 	if (size > 0) {
 		int newthreads;
@@ -480,7 +483,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
 	int len;
 	int npools;
 	int *nthreads;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
+	struct net *net = netns(file);
 
 	mutex_lock(&nfsd_mutex);
 	npools = nfsd_nrpools(net);
@@ -543,8 +546,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size)
 	unsigned minor;
 	ssize_t tlen = 0;
 	char *sep;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 
 	if (size>0) {
 		if (nn->nfsd_serv)
@@ -830,10 +832,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size,
 static ssize_t write_ports(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
 
 	mutex_lock(&nfsd_mutex);
-	rv = __write_ports(file, buf, size, net);
+	rv = __write_ports(file, buf, size, netns(file));
 	mutex_unlock(&nfsd_mutex);
 	return rv;
 }
@@ -865,8 +866,7 @@ int nfsd_max_blksize;
 static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 
 	if (size > 0) {
 		int bsize;
@@ -915,8 +915,7 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
 static ssize_t write_maxconn(struct file *file, char *buf, size_t size)
 {
 	char *mesg = buf;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 	unsigned int maxconn = nn->max_connections;
 
 	if (size > 0) {
@@ -997,8 +996,7 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size,
  */
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
 {
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 	return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn);
 }
 
@@ -1014,8 +1012,7 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
  */
 static ssize_t write_gracetime(struct file *file, char *buf, size_t size)
 {
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 	return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn);
 }
 
@@ -1071,8 +1068,7 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size,
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
 {
 	ssize_t rv;
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 
 	mutex_lock(&nfsd_mutex);
 	rv = __write_recoverydir(file, buf, size, nn);
@@ -1102,8 +1098,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
  */
 static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
 {
-	struct net *net = file->f_dentry->d_sb->s_fs_info;
-	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+	struct nfsd_net *nn = net_generic(netns(file), nfsd_net_id);
 
 	if (size > 0) {
 		switch(buf[0]) {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 747f3b95..33a46a8 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -335,12 +335,15 @@ void		nfsd_lockd_shutdown(void);
 	(NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
-	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL)
+#define NFSD4_2_SECURITY_ATTRS		FATTR4_WORD2_SECURITY_LABEL
 #else
-#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0
+#define NFSD4_2_SECURITY_ATTRS		0
 #endif
 
+#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
+	(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
+	NFSD4_2_SECURITY_ATTRS)
+
 static inline u32 nfsd_suppattrs0(u32 minorversion)
 {
 	return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 989129e..0a82e3c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -930,7 +930,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				unsigned long *cnt, int *stablep)
 {
 	struct svc_export	*exp;
-	struct dentry		*dentry;
 	struct inode		*inode;
 	mm_segment_t		oldfs;
 	__be32			err = 0;
@@ -949,8 +948,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		 */
 		current->flags |= PF_LESS_THROTTLE;
 
-	dentry = file->f_path.dentry;
-	inode = dentry->d_inode;
+	inode = file_inode(file);
 	exp   = fhp->fh_export;
 
 	use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
@@ -1819,10 +1817,12 @@ struct readdir_data {
 	int		full;
 };
 
-static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
-				 loff_t offset, u64 ino, unsigned int d_type)
+static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
+				 int namlen, loff_t offset, u64 ino,
+				 unsigned int d_type)
 {
-	struct readdir_data *buf = __buf;
+	struct readdir_data *buf =
+		container_of(ctx, struct readdir_data, ctx);
 	struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
 	unsigned int reclen;
 
@@ -1842,7 +1842,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
 	return 0;
 }
 
-static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
 				    struct readdir_cd *cdp, loff_t *offsetp)
 {
 	struct buffered_dirent *de;
@@ -1926,7 +1926,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
  */
 __be32
 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
-	     struct readdir_cd *cdp, filldir_t func)
+	     struct readdir_cd *cdp, nfsd_filldir_t func)
 {
 	__be32		err;
 	struct file	*file;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c2ff3f1..b1796d6 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -36,7 +36,7 @@
 /*
  * Callback function for readdir
  */
-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
 int		nfsd_racache_init(int);
@@ -95,7 +95,7 @@ __be32		nfsd_rename(struct svc_rqst *,
 __be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
 				char *name, int len);
 __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
-			     loff_t *, struct readdir_cd *, filldir_t);
+			     loff_t *, struct readdir_cd *, nfsd_filldir_t);
 __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *, int access);
 
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index e9e3325..3a03e0a 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -39,21 +39,15 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 */
 	struct the_nilfs *nilfs;
 	struct inode *inode = file->f_mapping->host;
-	int err;
-
-	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (err)
-		return err;
-	mutex_lock(&inode->i_mutex);
+	int err = 0;
 
 	if (nilfs_inode_dirty(inode)) {
 		if (datasync)
 			err = nilfs_construct_dsync_segment(inode->i_sb, inode,
-							    0, LLONG_MAX);
+							    start, end);
 		else
 			err = nilfs_construct_segment(inode->i_sb);
 	}
-	mutex_unlock(&inode->i_mutex);
 
 	nilfs = inode->i_sb->s_fs_info;
 	if (!err)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index e1fa69b..8b59695 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -49,6 +49,8 @@ struct nilfs_iget_args {
 	int for_gc;
 };
 
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
 void nilfs_inode_add_blocks(struct inode *inode, int n)
 {
 	struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -348,6 +350,17 @@ const struct address_space_operations nilfs_aops = {
 	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
+static int nilfs_insert_inode_locked(struct inode *inode,
+				     struct nilfs_root *root,
+				     unsigned long ino)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = root, .cno = 0, .for_gc = 0
+	};
+
+	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 {
 	struct super_block *sb = dir->i_sb;
@@ -383,7 +396,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		err = nilfs_bmap_read(ii->i_bmap, NULL);
 		if (err < 0)
-			goto failed_bmap;
+			goto failed_after_creation;
 
 		set_bit(NILFS_I_BMAP, &ii->i_state);
 		/* No lock is needed; iget() ensures it. */
@@ -399,21 +412,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	spin_lock(&nilfs->ns_next_gen_lock);
 	inode->i_generation = nilfs->ns_next_generation++;
 	spin_unlock(&nilfs->ns_next_gen_lock);
-	insert_inode_hash(inode);
+	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+		err = -EIO;
+		goto failed_after_creation;
+	}
 
 	err = nilfs_init_acl(inode, dir);
 	if (unlikely(err))
-		goto failed_acl; /* never occur. When supporting
+		goto failed_after_creation; /* never occur. When supporting
 				    nilfs_init_acl(), proper cancellation of
 				    above jobs should be considered */
 
 	return inode;
 
- failed_acl:
- failed_bmap:
+ failed_after_creation:
 	clear_nlink(inode);
+	unlock_new_inode(inode);
 	iput(inode);  /* raw_inode will be deleted through
-			 generic_delete_inode() */
+			 nilfs_evict_inode() */
 	goto failed;
 
  failed_ifile_create_inode:
@@ -461,8 +477,8 @@ int nilfs_read_inode_common(struct inode *inode,
 	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
 	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-	if (inode->i_nlink == 0 && inode->i_mode == 0)
-		return -EINVAL; /* this inode is deleted */
+	if (inode->i_nlink == 0)
+		return -ESTALE; /* this inode is deleted */
 
 	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
 	ii->i_flags = le32_to_cpu(raw_inode->i_flags);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9de78f0..0f84b25 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 	int err = nilfs_add_link(dentry, inode);
 	if (!err) {
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -182,6 +184,7 @@ out:
 out_fail:
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	goto out;
 }
@@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	inode_inc_link_count(inode);
 	ihold(inode);
 
-	err = nilfs_add_nondir(dentry, inode);
-	if (!err)
+	err = nilfs_add_link(dentry, inode);
+	if (!err) {
+		d_instantiate(dentry, inode);
 		err = nilfs_transaction_commit(dir->i_sb);
-	else
+	} else {
+		inode_dec_link_count(inode);
+		iput(inode);
 		nilfs_transaction_abort(dir->i_sb);
+	}
 
 	return err;
 }
@@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	nilfs_mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out:
 	if (!err)
 		err = nilfs_transaction_commit(dir->i_sb);
@@ -255,6 +263,7 @@ out_fail:
 	drop_nlink(inode);
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 out_dir:
 	drop_nlink(dir);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 9da25fe..69bd801 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -808,8 +808,7 @@ void nilfs_put_root(struct nilfs_root *root)
 		spin_lock(&nilfs->ns_cptree_lock);
 		rb_erase(&root->rb_node, &nilfs->ns_cptree);
 		spin_unlock(&nilfs->ns_cptree_lock);
-		if (root->ifile)
-			iput(root->ifile);
+		iput(root->ifile);
 
 		kfree(root);
 	}
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 9d7e2b9..6ffd220 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -20,25 +20,24 @@
 
 #if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
 
-static int show_fdinfo(struct seq_file *m, struct file *f,
-		       int (*show)(struct seq_file *m, struct fsnotify_mark *mark))
+static void show_fdinfo(struct seq_file *m, struct file *f,
+			void (*show)(struct seq_file *m,
+				     struct fsnotify_mark *mark))
 {
 	struct fsnotify_group *group = f->private_data;
 	struct fsnotify_mark *mark;
-	int ret = 0;
 
 	mutex_lock(&group->mark_mutex);
 	list_for_each_entry(mark, &group->marks_list, g_list) {
-		ret = show(m, mark);
-		if (ret)
+		show(m, mark);
+		if (seq_has_overflowed(m))
 			break;
 	}
 	mutex_unlock(&group->mark_mutex);
-	return ret;
 }
 
 #if defined(CONFIG_EXPORTFS)
-static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
 {
 	struct {
 		struct file_handle handle;
@@ -52,71 +51,62 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
 	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
 	if ((ret == FILEID_INVALID) || (ret < 0)) {
 		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
-		return 0;
+		return;
 	}
 
 	f.handle.handle_type = ret;
 	f.handle.handle_bytes = size * sizeof(u32);
 
-	ret = seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
-			 f.handle.handle_bytes, f.handle.handle_type);
+	seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
+		   f.handle.handle_bytes, f.handle.handle_type);
 
 	for (i = 0; i < f.handle.handle_bytes; i++)
-		ret |= seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
-
-	return ret;
+		seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
 }
 #else
-static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
 {
-	return 0;
 }
 #endif
 
 #ifdef CONFIG_INOTIFY_USER
 
-static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 {
 	struct inotify_inode_mark *inode_mark;
 	struct inode *inode;
-	int ret = 0;
 
 	if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
-		return 0;
+		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
 	inode = igrab(mark->i.inode);
 	if (inode) {
-		ret = seq_printf(m, "inotify wd:%x ino:%lx sdev:%x "
-				 "mask:%x ignored_mask:%x ",
-				 inode_mark->wd, inode->i_ino,
-				 inode->i_sb->s_dev,
-				 mark->mask, mark->ignored_mask);
-		ret |= show_mark_fhandle(m, inode);
-		ret |= seq_putc(m, '\n');
+		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
+			   inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
+			   mark->mask, mark->ignored_mask);
+		show_mark_fhandle(m, inode);
+		seq_putc(m, '\n');
 		iput(inode);
 	}
-
-	return ret;
 }
 
-int inotify_show_fdinfo(struct seq_file *m, struct file *f)
+void inotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
-	return show_fdinfo(m, f, inotify_fdinfo);
+	show_fdinfo(m, f, inotify_fdinfo);
 }
 
 #endif /* CONFIG_INOTIFY_USER */
 
 #ifdef CONFIG_FANOTIFY
 
-static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 {
 	unsigned int mflags = 0;
 	struct inode *inode;
-	int ret = 0;
 
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
-		return 0;
+		return;
 
 	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
@@ -124,26 +114,22 @@ static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 		inode = igrab(mark->i.inode);
 		if (!inode)
-			goto out;
-		ret = seq_printf(m, "fanotify ino:%lx sdev:%x "
-				 "mflags:%x mask:%x ignored_mask:%x ",
-				 inode->i_ino, inode->i_sb->s_dev,
-				 mflags, mark->mask, mark->ignored_mask);
-		ret |= show_mark_fhandle(m, inode);
-		ret |= seq_putc(m, '\n');
+			return;
+		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
+			   inode->i_ino, inode->i_sb->s_dev,
+			   mflags, mark->mask, mark->ignored_mask);
+		show_mark_fhandle(m, inode);
+		seq_putc(m, '\n');
 		iput(inode);
 	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
 		struct mount *mnt = real_mount(mark->m.mnt);
 
-		ret = seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x "
-				 "ignored_mask:%x\n", mnt->mnt_id, mflags,
-				 mark->mask, mark->ignored_mask);
+		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
+			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
 	}
-out:
-	return ret;
 }
 
-int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct fsnotify_group *group = f->private_data;
 	unsigned int flags = 0;
@@ -169,7 +155,7 @@ int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
 		   flags, group->fanotify_data.f_flags);
 
-	return show_fdinfo(m, f, fanotify_fdinfo);
+	show_fdinfo(m, f, fanotify_fdinfo);
 }
 
 #endif /* CONFIG_FANOTIFY */
diff --git a/fs/notify/fdinfo.h b/fs/notify/fdinfo.h
index 556afda..9664c49 100644
--- a/fs/notify/fdinfo.h
+++ b/fs/notify/fdinfo.h
@@ -10,11 +10,11 @@ struct file;
 #ifdef CONFIG_PROC_FS
 
 #ifdef CONFIG_INOTIFY_USER
-extern int inotify_show_fdinfo(struct seq_file *m, struct file *f);
+void inotify_show_fdinfo(struct seq_file *m, struct file *f);
 #endif
 
 #ifdef CONFIG_FANOTIFY
-extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f);
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f);
 #endif
 
 #else /* CONFIG_PROC_FS */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9d3e9c5..41e39102 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		struct dentry *child;
 
 		/* run all of the children of the original inode and fix their
 		 * d_flags to indicate parental interest (their parent is the
 		 * original inode) */
 		spin_lock(&alias->d_lock);
-		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+		list_for_each_entry(child, &alias->d_subdirs, d_child) {
 			if (!child->d_inode)
 				continue;
 
@@ -229,8 +229,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 					      &fsnotify_mark_srcu);
 	}
 
+	/*
+	 * We need to merge inode & vfsmount mark lists so that inode mark
+	 * ignore masks are properly reflected for mount mark notifications.
+	 * That's why this traversal is so complicated...
+	 */
 	while (inode_node || vfsmount_node) {
-		inode_group = vfsmount_group = NULL;
+		inode_group = NULL;
+		inode_mark = NULL;
+		vfsmount_group = NULL;
+		vfsmount_mark = NULL;
 
 		if (inode_node) {
 			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
@@ -244,21 +252,19 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 			vfsmount_group = vfsmount_mark->group;
 		}
 
-		if (inode_group > vfsmount_group) {
-			/* handle inode */
-			ret = send_to_group(to_tell, inode_mark, NULL, mask,
-					    data, data_is, cookie, file_name);
-			/* we didn't use the vfsmount_mark */
-			vfsmount_group = NULL;
-		} else if (vfsmount_group > inode_group) {
-			ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
-					    data, data_is, cookie, file_name);
-			inode_group = NULL;
-		} else {
-			ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
-					    mask, data, data_is, cookie,
-					    file_name);
+		if (inode_group && vfsmount_group) {
+			int cmp = fsnotify_compare_groups(inode_group,
+							  vfsmount_group);
+			if (cmp > 0) {
+				inode_group = NULL;
+				inode_mark = NULL;
+			} else if (cmp < 0) {
+				vfsmount_group = NULL;
+				vfsmount_mark = NULL;
+			}
 		}
+		ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
+				    data, data_is, cookie, file_name);
 
 		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
 			goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 9c0898c..3b68b0a 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,6 +12,10 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
 /* protects reads of inode and vfsmount marks list */
 extern struct srcu_struct fsnotify_mark_srcu;
 
+/* compare two groups for sorting of marks lists */
+extern int fsnotify_compare_groups(struct fsnotify_group *a,
+				   struct fsnotify_group *b);
+
 extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
 						__u32 mask);
 /* add a mark to an inode */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 9ce0622..dfbf544 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -194,6 +194,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	int ret = 0;
+	int cmp;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
@@ -219,11 +220,8 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group->priority < lmark->group->priority)
-			continue;
-
-		if ((mark->group->priority == lmark->group->priority) &&
-		    (mark->group < lmark->group))
+		cmp = fsnotify_compare_groups(lmark->group, mark->group);
+		if (cmp < 0)
 			continue;
 
 		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
@@ -288,20 +286,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
 		spin_unlock(&inode->i_lock);
 
 		/* In case the dropping of a reference would nuke next_i. */
-		if ((&next_i->i_sb_list != list) &&
-		    atomic_read(&next_i->i_count)) {
+		while (&next_i->i_sb_list != list) {
 			spin_lock(&next_i->i_lock);
-			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
+			if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
+						atomic_read(&next_i->i_count)) {
 				__iget(next_i);
 				need_iput = next_i;
+				spin_unlock(&next_i->i_lock);
+				break;
 			}
 			spin_unlock(&next_i->i_lock);
+			next_i = list_entry(next_i->i_sb_list.next,
+						struct inode, i_sb_list);
 		}
 
 		/*
-		 * We can safely drop inode_sb_list_lock here because we hold
-		 * references on both inode and next_i.  Also no new inodes
-		 * will be added since the umount has begun.
+		 * We can safely drop inode_sb_list_lock here because either
+		 * we actually hold references on both inode and next_i or
+		 * end of list.  Also no new inodes will be added since the
+		 * umount has begun.
 		 */
 		spin_unlock(&inode_sb_list_lock);
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index daf7665..283aa31 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -227,14 +227,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 	struct fsnotify_event *kevent;
 	char __user *start;
 	int ret;
-	DEFINE_WAIT(wait);
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	start = buf;
 	group = file->private_data;
 
+	add_wait_queue(&group->notification_waitq, &wait);
 	while (1) {
-		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
-
 		mutex_lock(&group->notification_mutex);
 		kevent = get_one_event(group, count);
 		mutex_unlock(&group->notification_mutex);
@@ -264,10 +263,10 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
 		if (start != buf)
 			break;
 
-		schedule();
+		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
 	}
+	remove_wait_queue(&group->notification_waitq, &wait);
 
-	finish_wait(&group->notification_waitq, &wait);
 	if (start != buf && ret != -EFAULT)
 		ret = buf - start;
 	return ret;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d90deaa..34c38fa 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -210,6 +210,42 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas
 }
 
 /*
+ * Sorting function for lists of fsnotify marks.
+ *
+ * Fanotify supports different notification classes (reflected as priority of
+ * notification group). Events shall be passed to notification groups in
+ * decreasing priority order. To achieve this marks in notification lists for
+ * inodes and vfsmounts are sorted so that priorities of corresponding groups
+ * are descending.
+ *
+ * Furthermore correct handling of the ignore mask requires processing inode
+ * and vfsmount marks of each group together. Using the group address as
+ * further sort criterion provides a unique sorting order and thus we can
+ * merge inode and vfsmount lists of marks in linear time and find groups
+ * present in both lists.
+ *
+ * A return value of 1 signifies that b has priority over a.
+ * A return value of 0 signifies that the two marks have to be handled together.
+ * A return value of -1 signifies that a has priority over b.
+ */
+int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
+{
+	if (a == b)
+		return 0;
+	if (!a)
+		return 1;
+	if (!b)
+		return -1;
+	if (a->priority < b->priority)
+		return 1;
+	if (a->priority > b->priority)
+		return -1;
+	if (a < b)
+		return 1;
+	return -1;
+}
+
+/*
  * Attach an initialized mark to a given group and fs object.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group.
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index ac851e8..faefa72 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -153,6 +153,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *lmark, *last = NULL;
 	int ret = 0;
+	int cmp;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
@@ -178,11 +179,8 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group->priority < lmark->group->priority)
-			continue;
-
-		if ((mark->group->priority == lmark->group->priority) &&
-		    (mark->group < lmark->group))
+		cmp = fsnotify_compare_groups(lmark->group, mark->group);
+		if (cmp < 0)
 			continue;
 
 		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 436f360..b3973c2 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -111,8 +111,8 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
 	unsigned long dent_ino;
 	int uname_len;
 
-	ntfs_debug("Looking up %s in directory inode 0x%lx.",
-			dent->d_name.name, dir_ino->i_ino);
+	ntfs_debug("Looking up %pd in directory inode 0x%lx.",
+			dent, dir_ino->i_ino);
 	/* Convert the name of the dentry to Unicode. */
 	uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len,
 			&uname);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1ef547e..d9f2229 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1251,7 +1251,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
 					  NULL);
 	if (ret < 0) {
-		ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
+		mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
 			    "at logical block %llu",
 			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			    (unsigned long long)v_blkno);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index eb9d487..16eff45 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1127,10 +1127,10 @@ static int o2hb_thread(void *data)
 		elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
 
 		mlog(ML_HEARTBEAT,
-		     "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
+		     "start = %lu.%lu, end = %lu.%lu, msec = %u, ret = %d\n",
 		     before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
 		     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
-		     elapsed_msec);
+		     elapsed_msec, ret);
 
 		if (!kthread_should_stop() &&
 		    elapsed_msec < reg->hr_timeout_ms) {
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 97de0fb..2e355e0 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -925,7 +925,7 @@ static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
 			      size_t veclen, size_t total)
 {
 	int ret;
-	struct msghdr msg;
+	struct msghdr msg = {.msg_flags = 0,};
 
 	if (sock == NULL) {
 		ret = -EINVAL;
@@ -1736,7 +1736,7 @@ static void o2net_connect_expired(struct work_struct *work)
 		     o2net_idle_timeout() / 1000,
 		     o2net_idle_timeout() % 1000);
 
-		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
+		o2net_set_nn_state(nn, NULL, 0, 0);
 	}
 	spin_unlock(&nn->nn_lock);
 }
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index e2e05a1..4fda7a5 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -172,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
@@ -251,8 +251,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 
 	if (dl) {
 		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
-				" \"%.*s\": old parent: %llu, new: %llu\n",
-				dentry->d_name.len, dentry->d_name.name,
+				" \"%pd\": old parent: %llu, new: %llu\n",
+				dentry,
 				(unsigned long long)parent_blkno,
 				(unsigned long long)dl->dl_parent_blkno);
 		return 0;
@@ -277,8 +277,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
 				(unsigned long long)OCFS2_I(inode)->ip_blkno);
 
 		mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno,
-				" \"%.*s\": old parent: %llu, new: %llu\n",
-				dentry->d_name.len, dentry->d_name.name,
+				" \"%pd\": old parent: %llu, new: %llu\n",
+				dentry,
 				(unsigned long long)parent_blkno,
 				(unsigned long long)dl->dl_parent_blkno);
 
@@ -406,17 +406,15 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
 			if (inode)
 				ino = (unsigned long long)OCFS2_I(inode)->ip_blkno;
 			mlog(ML_ERROR, "Dentry is missing cluster lock. "
-			     "inode: %llu, d_flags: 0x%x, d_name: %.*s\n",
-			     ino, dentry->d_flags, dentry->d_name.len,
-			     dentry->d_name.name);
+			     "inode: %llu, d_flags: 0x%x, d_name: %pd\n",
+			     ino, dentry->d_flags, dentry);
 		}
 
 		goto out;
 	}
 
-	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %.*s, count: %u\n",
-			dentry->d_name.len, dentry->d_name.name,
-			dl->dl_count);
+	mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n",
+			dentry, dl->dl_count);
 
 	ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl);
 
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0717662..79d56dc 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -744,7 +744,7 @@ restart:
 		if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
 			/* read error, skip block & hope for the best.
 			 * ocfs2_read_dir_block() has released the bh. */
-			ocfs2_error(dir->i_sb, "reading directory %llu, "
+			mlog(ML_ERROR, "reading directory %llu, "
 				    "offset %lu\n",
 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
 				    block);
@@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv {
 	unsigned seen_other;
 	unsigned dx_dir;
 };
-static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
-				   loff_t pos, u64 ino, unsigned type)
+static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
+				   int name_len, loff_t pos, u64 ino,
+				   unsigned type)
 {
-	struct ocfs2_empty_dir_priv *p = priv;
+	struct ocfs2_empty_dir_priv *p =
+		container_of(ctx, struct ocfs2_empty_dir_priv, ctx);
 
 	/*
 	 * Check the positions of "." and ".." records to be sure
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 02d315f..50a59d2 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -877,7 +877,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 	 * to be put in someone's domain map.
 	 * Also, explicitly disallow joining at certain troublesome
 	 * times (ie. during recovery). */
-	if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
+	if (dlm->dlm_state != DLM_CTXT_LEAVING) {
 		int bit = query->node_idx;
 		spin_lock(&dlm->spinlock);
 
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 215e41a..3689b35 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1460,6 +1460,18 @@ way_up_top:
 
 		/* take care of the easy cases up front */
 		spin_lock(&res->spinlock);
+
+		/*
+		 * Right after dlm spinlock was released, dlm_thread could have
+		 * purged the lockres. Check if lockres got unhashed. If so
+		 * start over.
+		 */
+		if (hlist_unhashed(&res->hash_node)) {
+			spin_unlock(&res->spinlock);
+			dlm_lockres_put(res);
+			goto way_up_top;
+		}
+
 		if (res->state & (DLM_LOCK_RES_RECOVERING|
 				  DLM_LOCK_RES_MIGRATING)) {
 			spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 3365839..79b5af5 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1656,14 +1656,18 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	req.namelen = res->lockname.len;
 	memcpy(req.name, res->lockname.name, res->lockname.len);
 
+resend:
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
-	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog(ML_ERROR, "Error %d when sending message %u (key "
 		     "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
 		     dlm->key, nodenum);
-	else {
+	else if (status == -ENOMEM) {
+		mlog_errno(status);
+		msleep(50);
+		goto resend;
+	} else {
 		BUG_ON(status < 0);
 		BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
 		*real_master = (u8) (status & 0xff);
@@ -1705,9 +1709,13 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 			int ret = dlm_dispatch_assert_master(dlm, res,
 							     0, 0, flags);
 			if (ret < 0) {
-				mlog_errno(-ENOMEM);
-				/* retry!? */
-				BUG();
+				mlog_errno(ret);
+				spin_unlock(&res->spinlock);
+				dlm_lockres_put(res);
+				spin_unlock(&dlm->spinlock);
+				dlm_put(dlm);
+				/* sender will take care of this and retry */
+				return ret;
 			} else
 				__dlm_lockres_grab_inflight_worker(dlm, res);
 			spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 09b7d9d..57c40e3 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -565,8 +565,8 @@ static int dlmfs_unlink(struct inode *dir,
 	 * to acquire a lock, this basically destroys our lockres. */
 	status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres);
 	if (status < 0) {
-		mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n",
-		     dentry->d_name.len, dentry->d_name.name, status);
+		mlog(ML_ERROR, "unlink %pd, error %d from destroy\n",
+		     dentry, status);
 		goto bail;
 	}
 	status = simple_unlink(dir, dentry);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 21262f2..1c423af 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -861,8 +861,13 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
 	 * downconverting the lock before the upconvert has fully completed.
+	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
+	 * had already returned.
 	 */
-	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
+		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	else
+		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
 
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 }
@@ -1324,13 +1329,12 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
 
 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
  * if the mask still hadn't reached its goal */
-static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 				      struct ocfs2_mask_waiter *mw)
 {
-	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(&lockres->l_lock, flags);
+	assert_spin_locked(&lockres->l_lock);
 	if (!list_empty(&mw->mw_item)) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			ret = -EBUSY;
@@ -1338,6 +1342,18 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 		list_del_init(&mw->mw_item);
 		init_completion(&mw->mw_complete);
 	}
+
+	return ret;
+}
+
+static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+				      struct ocfs2_mask_waiter *mw)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&lockres->l_lock, flags);
+	ret = __lockres_remove_mask_waiter(lockres, mw);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	return ret;
@@ -1373,6 +1389,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
+	int dlm_locked = 0;
 
 	ocfs2_init_mask_waiter(&mw);
 
@@ -1481,6 +1498,7 @@ again:
 			ocfs2_recover_from_dlm_error(lockres, 1);
 			goto out;
 		}
+		dlm_locked = 1;
 
 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
 		     lockres->l_name);
@@ -1514,10 +1532,17 @@ out:
 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
 		wait = 0;
-		if (lockres_remove_mask_waiter(lockres, &mw))
+		spin_lock_irqsave(&lockres->l_lock, flags);
+		if (__lockres_remove_mask_waiter(lockres, &mw)) {
+			if (dlm_locked)
+				lockres_or_flags(lockres,
+					OCFS2_LOCK_NONBLOCK_FINISHED);
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			ret = -EAGAIN;
-		else
+		} else {
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			goto again;
+		}
 	}
 	if (wait) {
 		ret = ocfs2_wait_for_mask(&mw);
@@ -3725,8 +3750,7 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
 			break;
 		spin_unlock(&dentry_attach_lock);
 
-		mlog(0, "d_delete(%.*s);\n", dentry->d_name.len,
-		     dentry->d_name.name);
+		mlog(0, "d_delete(%pd);\n", dentry);
 
 		/*
 		 * The following dcache calls may do an
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 324dc93..69fb9f7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2381,9 +2381,7 @@ out_dio:
 		if (ret < 0)
 			written = ret;
 
-		if (!ret && ((old_size != i_size_read(inode)) ||
-			     (old_clusters != OCFS2_I(inode)->ip_clusters) ||
-			     has_refcount)) {
+		if (!ret) {
 			ret = jbd2_journal_force_commit(osb->journal->j_journal);
 			if (ret < 0)
 				written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 437de7f..c8b25de 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -540,8 +540,7 @@ bail:
 	if (status < 0)
 		make_bad_inode(inode);
 
-	if (args && bh)
-		brelse(bh);
+	brelse(bh);
 
 	return status;
 }
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index a9b76de..ca3431e 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -80,6 +80,8 @@ struct ocfs2_inode_info
 	 */
 	tid_t i_sync_tid;
 	tid_t i_datasync_tid;
+
+	struct dquot *i_dquot[MAXQUOTAS];
 };
 
 /*
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4b0c688..4f502382 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1982,10 +1982,12 @@ struct ocfs2_orphan_filldir_priv {
 	struct ocfs2_super	*osb;
 };
 
-static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
-				loff_t pos, u64 ino, unsigned type)
+static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
+				int name_len, loff_t pos, u64 ino,
+				unsigned type)
 {
-	struct ocfs2_orphan_filldir_priv *p = priv;
+	struct ocfs2_orphan_filldir_priv *p =
+		container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
 	struct inode *iter;
 
 	if (name_len == 1 && !strncmp(".", name, 1))
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 74caffe..56a768d 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -904,9 +904,6 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	if (!inode)
-		return -ENOENT;
-
 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
 		return -EROFS;
 
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8add6f1..b931e04 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -158,7 +158,7 @@ bail_add:
 		 * NOTE: This dentry already has ->d_op set from
 		 * ocfs2_get_parent() and ocfs2_get_dentry()
 		 */
-		if (ret)
+		if (!IS_ERR_OR_NULL(ret))
 			dentry = ret;
 
 		status = ocfs2_dentry_attach_lock(dentry, inode,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index bbec539..7d6b7d0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -144,6 +144,12 @@ enum ocfs2_unlock_action {
 						     * before the upconvert
 						     * has completed */
 
+#define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster
+						   * lock has already
+						   * returned, do not block
+						   * dc thread from
+						   * downconverting */
+
 struct ocfs2_lock_res_ops;
 
 typedef void (*ocfs2_lock_callback)(int status, unsigned long data);
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index a88b2a4..d5493e3 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -306,7 +306,7 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 	assert_spin_locked(&osb->osb_lock);
 
 	BUG_ON(slot_num < 0);
-	BUG_ON(slot_num > osb->max_slots);
+	BUG_ON(slot_num >= osb->max_slots);
 
 	if (!si->si_slots[slot_num].sl_valid)
 		return -ENOENT;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 93c85bc..8372317 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -143,6 +143,11 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
 static int ocfs2_enable_quotas(struct ocfs2_super *osb);
 static void ocfs2_disable_quotas(struct ocfs2_super *osb);
 
+static struct dquot **ocfs2_get_dquots(struct inode *inode)
+{
+	return OCFS2_I(inode)->i_dquot;
+}
+
 static const struct super_operations ocfs2_sops = {
 	.statfs		= ocfs2_statfs,
 	.alloc_inode	= ocfs2_alloc_inode,
@@ -155,6 +160,7 @@ static const struct super_operations ocfs2_sops = {
 	.show_options   = ocfs2_show_options,
 	.quota_read	= ocfs2_quota_read,
 	.quota_write	= ocfs2_quota_write,
+	.get_dquots	= ocfs2_get_dquots,
 };
 
 enum {
@@ -563,6 +569,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
 
 	oi->i_sync_tid = 0;
 	oi->i_datasync_tid = 0;
+	memset(&oi->i_dquot, 0, sizeof(oi->i_dquot));
 
 	jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
 	return &oi->vfs_inode;
@@ -1622,8 +1629,9 @@ static int __init ocfs2_init(void)
 
 	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
 	if (!ocfs2_debugfs_root) {
-		status = -EFAULT;
+		status = -ENOMEM;
 		mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
+		goto out4;
 	}
 
 	ocfs2_set_locking_protocol();
@@ -2073,6 +2081,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
 	sb->s_export_op = &ocfs2_export_ops;
 	sb->s_qcop = &ocfs2_quotactl_ops;
 	sb->dq_op = &ocfs2_quota_operations;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 	sb->s_xattr = ocfs2_xattr_handlers;
 	sb->s_time_gran = 1;
 	sb->s_flags |= MS_NOATIME;
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 016f01d..662f8de 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1284,7 +1284,7 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
 		return -EOPNOTSUPP;
 
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
-		ret = -ENODATA;
+		return -ENODATA;
 
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;
diff --git a/fs/open.c b/fs/open.c
index d6fd3ac..b1bf3d5 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -516,7 +516,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 	int err = -EBADF;
 
 	if (f.file) {
-		audit_inode(NULL, f.file->f_path.dentry, 0);
+		audit_file(f.file);
 		err = chmod_common(&f.file->f_path, mode);
 		fdput(f);
 	}
@@ -642,7 +642,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	error = mnt_want_write_file(f.file);
 	if (error)
 		goto out_fput;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = chown_common(&f.file->f_path, user, group);
 	mnt_drop_write_file(f.file);
 out_fput:
@@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = get_empty_filp();
 	if (!IS_ERR(f)) {
 		f->f_flags = flags;
-		f->f_path = *path;
-		error = do_dentry_open(f, NULL, cred);
+		error = vfs_open(path, f, cred);
 		if (!error) {
 			/* from now on we need fput() to dispose of f */
 			error = open_check_o_direct(f);
@@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @filp: newly allocated file with f_flag initialized
+ * @cred: credentials to use
+ */
+int vfs_open(const struct path *path, struct file *filp,
+	     const struct cred *cred)
+{
+	struct inode *inode = path->dentry->d_inode;
+
+	if (inode->i_op->dentry_open)
+		return inode->i_op->dentry_open(path->dentry, filp, cred);
+	else {
+		filp->f_path = *path;
+		return do_dentry_open(filp, NULL, cred);
+	}
+}
+EXPORT_SYMBOL(vfs_open);
+
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
new file mode 100644
index 0000000..3435581
--- /dev/null
+++ b/fs/overlayfs/Kconfig
@@ -0,0 +1,10 @@
+config OVERLAY_FS
+	tristate "Overlay filesystem support"
+	help
+	  An overlay filesystem combines two filesystems - an 'upper' filesystem
+	  and a 'lower' filesystem.  When a name exists in both filesystems, the
+	  object in the 'upper' filesystem is visible while the object in the
+	  'lower' filesystem is either hidden or, in the case of directories,
+	  merged with the 'upper' object.
+
+	  For more information see Documentation/filesystems/overlayfs.txt
diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
new file mode 100644
index 0000000..900daed
--- /dev/null
+++ b/fs/overlayfs/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the overlay filesystem.
+#
+
+obj-$(CONFIG_OVERLAY_FS) += overlay.o
+
+overlay-objs := super.o inode.o dir.o readdir.o copy_up.o
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
new file mode 100644
index 0000000..ea10a87
--- /dev/null
+++ b/fs/overlayfs/copy_up.c
@@ -0,0 +1,414 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/splice.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/namei.h>
+#include "overlayfs.h"
+
+#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
+
+int ovl_copy_xattr(struct dentry *old, struct dentry *new)
+{
+	ssize_t list_size, size;
+	char *buf, *name, *value;
+	int error;
+
+	if (!old->d_inode->i_op->getxattr ||
+	    !new->d_inode->i_op->getxattr)
+		return 0;
+
+	list_size = vfs_listxattr(old, NULL, 0);
+	if (list_size <= 0) {
+		if (list_size == -EOPNOTSUPP)
+			return 0;
+		return list_size;
+	}
+
+	buf = kzalloc(list_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	error = -ENOMEM;
+	value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
+	if (!value)
+		goto out;
+
+	list_size = vfs_listxattr(old, buf, list_size);
+	if (list_size <= 0) {
+		error = list_size;
+		goto out_free_value;
+	}
+
+	for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+		size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
+		if (size <= 0) {
+			error = size;
+			goto out_free_value;
+		}
+		error = vfs_setxattr(new, name, value, size, 0);
+		if (error)
+			goto out_free_value;
+	}
+
+out_free_value:
+	kfree(value);
+out:
+	kfree(buf);
+	return error;
+}
+
+static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
+{
+	struct file *old_file;
+	struct file *new_file;
+	loff_t old_pos = 0;
+	loff_t new_pos = 0;
+	int error = 0;
+
+	if (len == 0)
+		return 0;
+
+	old_file = ovl_path_open(old, O_RDONLY);
+	if (IS_ERR(old_file))
+		return PTR_ERR(old_file);
+
+	new_file = ovl_path_open(new, O_WRONLY);
+	if (IS_ERR(new_file)) {
+		error = PTR_ERR(new_file);
+		goto out_fput;
+	}
+
+	/* FIXME: copy up sparse files efficiently */
+	while (len) {
+		size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
+		long bytes;
+
+		if (len < this_len)
+			this_len = len;
+
+		if (signal_pending_state(TASK_KILLABLE, current)) {
+			error = -EINTR;
+			break;
+		}
+
+		bytes = do_splice_direct(old_file, &old_pos,
+					 new_file, &new_pos,
+					 this_len, SPLICE_F_MOVE);
+		if (bytes <= 0) {
+			error = bytes;
+			break;
+		}
+		WARN_ON(old_pos != new_pos);
+
+		len -= bytes;
+	}
+
+	fput(new_file);
+out_fput:
+	fput(old_file);
+	return error;
+}
+
+static char *ovl_read_symlink(struct dentry *realdentry)
+{
+	int res;
+	char *buf;
+	struct inode *inode = realdentry->d_inode;
+	mm_segment_t old_fs;
+
+	res = -EINVAL;
+	if (!inode->i_op->readlink)
+		goto err;
+
+	res = -ENOMEM;
+	buf = (char *) __get_free_page(GFP_KERNEL);
+	if (!buf)
+		goto err;
+
+	old_fs = get_fs();
+	set_fs(get_ds());
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = inode->i_op->readlink(realdentry,
+				    (char __user *)buf, PAGE_SIZE - 1);
+	set_fs(old_fs);
+	if (res < 0) {
+		free_page((unsigned long) buf);
+		goto err;
+	}
+	buf[res] = '\0';
+
+	return buf;
+
+err:
+	return ERR_PTR(res);
+}
+
+static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
+{
+	struct iattr attr = {
+		.ia_valid =
+		     ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
+		.ia_atime = stat->atime,
+		.ia_mtime = stat->mtime,
+	};
+
+	return notify_change(upperdentry, &attr, NULL);
+}
+
+int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
+{
+	int err = 0;
+
+	if (!S_ISLNK(stat->mode)) {
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = stat->mode,
+		};
+		err = notify_change(upperdentry, &attr, NULL);
+	}
+	if (!err) {
+		struct iattr attr = {
+			.ia_valid = ATTR_UID | ATTR_GID,
+			.ia_uid = stat->uid,
+			.ia_gid = stat->gid,
+		};
+		err = notify_change(upperdentry, &attr, NULL);
+	}
+	if (!err)
+		ovl_set_timestamps(upperdentry, stat);
+
+	return err;
+
+}
+
+static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
+			      struct dentry *dentry, struct path *lowerpath,
+			      struct kstat *stat, struct iattr *attr,
+			      const char *link)
+{
+	struct inode *wdir = workdir->d_inode;
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry = NULL;
+	struct dentry *upper = NULL;
+	umode_t mode = stat->mode;
+	int err;
+
+	newdentry = ovl_lookup_temp(workdir, dentry);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out;
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out1;
+
+	/* Can't properly set mode on creation because of the umask */
+	stat->mode &= S_IFMT;
+	err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
+	stat->mode = mode;
+	if (err)
+		goto out2;
+
+	if (S_ISREG(stat->mode)) {
+		struct path upperpath;
+		ovl_path_upper(dentry, &upperpath);
+		BUG_ON(upperpath.dentry != NULL);
+		upperpath.dentry = newdentry;
+
+		err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
+		if (err)
+			goto out_cleanup;
+	}
+
+	err = ovl_copy_xattr(lowerpath->dentry, newdentry);
+	if (err)
+		goto out_cleanup;
+
+	mutex_lock(&newdentry->d_inode->i_mutex);
+	err = ovl_set_attr(newdentry, stat);
+	if (!err && attr)
+		err = notify_change(newdentry, attr, NULL);
+	mutex_unlock(&newdentry->d_inode->i_mutex);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+	if (err)
+		goto out_cleanup;
+
+	ovl_dentry_update(dentry, newdentry);
+	newdentry = NULL;
+
+	/*
+	 * Non-directores become opaque when copied up.
+	 */
+	if (!S_ISDIR(stat->mode))
+		ovl_dentry_set_opaque(dentry, true);
+out2:
+	dput(upper);
+out1:
+	dput(newdentry);
+out:
+	return err;
+
+out_cleanup:
+	ovl_cleanup(wdir, newdentry);
+	goto out;
+}
+
+/*
+ * Copy up a single dentry
+ *
+ * Directory renames only allowed on "pure upper" (already created on
+ * upper filesystem, never copied up).  Directories which are on lower or
+ * are merged may not be renamed.  For these -EXDEV is returned and
+ * userspace has to deal with it.  This means, when copying up a
+ * directory we can rely on it and ancestors being stable.
+ *
+ * Non-directory renames start with copy up of source if necessary.  The
+ * actual rename will only proceed once the copy up was successful.  Copy
+ * up uses upper parent i_mutex for exclusion.  Since rename can change
+ * d_parent it is possible that the copy up will lock the old parent.  At
+ * that point the file will have already been copied up anyway.
+ */
+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+		    struct path *lowerpath, struct kstat *stat,
+		    struct iattr *attr)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	int err;
+	struct kstat pstat;
+	struct path parentpath;
+	struct dentry *upperdir;
+	struct dentry *upperdentry;
+	const struct cred *old_cred;
+	struct cred *override_cred;
+	char *link = NULL;
+
+	ovl_path_upper(parent, &parentpath);
+	upperdir = parentpath.dentry;
+
+	err = vfs_getattr(&parentpath, &pstat);
+	if (err)
+		return err;
+
+	if (S_ISLNK(stat->mode)) {
+		link = ovl_read_symlink(lowerpath->dentry);
+		if (IS_ERR(link))
+			return PTR_ERR(link);
+	}
+
+	err = -ENOMEM;
+	override_cred = prepare_creds();
+	if (!override_cred)
+		goto out_free_link;
+
+	override_cred->fsuid = stat->uid;
+	override_cred->fsgid = stat->gid;
+	/*
+	 * CAP_SYS_ADMIN for copying up extended attributes
+	 * CAP_DAC_OVERRIDE for create
+	 * CAP_FOWNER for chmod, timestamp update
+	 * CAP_FSETID for chmod
+	 * CAP_CHOWN for chown
+	 * CAP_MKNOD for mknod
+	 */
+	cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+	cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+	cap_raise(override_cred->cap_effective, CAP_FOWNER);
+	cap_raise(override_cred->cap_effective, CAP_FSETID);
+	cap_raise(override_cred->cap_effective, CAP_CHOWN);
+	cap_raise(override_cred->cap_effective, CAP_MKNOD);
+	old_cred = override_creds(override_cred);
+
+	err = -EIO;
+	if (lock_rename(workdir, upperdir) != NULL) {
+		pr_err("overlayfs: failed to lock workdir+upperdir\n");
+		goto out_unlock;
+	}
+	upperdentry = ovl_dentry_upper(dentry);
+	if (upperdentry) {
+		unlock_rename(workdir, upperdir);
+		err = 0;
+		/* Raced with another copy-up?  Do the setattr here */
+		if (attr) {
+			mutex_lock(&upperdentry->d_inode->i_mutex);
+			err = notify_change(upperdentry, attr, NULL);
+			mutex_unlock(&upperdentry->d_inode->i_mutex);
+		}
+		goto out_put_cred;
+	}
+
+	err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
+				 stat, attr, link);
+	if (!err) {
+		/* Restore timestamps on parent (best effort) */
+		ovl_set_timestamps(upperdir, &pstat);
+	}
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out_put_cred:
+	revert_creds(old_cred);
+	put_cred(override_cred);
+
+out_free_link:
+	if (link)
+		free_page((unsigned long) link);
+
+	return err;
+}
+
+int ovl_copy_up(struct dentry *dentry)
+{
+	int err;
+
+	err = 0;
+	while (!err) {
+		struct dentry *next;
+		struct dentry *parent;
+		struct path lowerpath;
+		struct kstat stat;
+		enum ovl_path_type type = ovl_path_type(dentry);
+
+		if (type != OVL_PATH_LOWER)
+			break;
+
+		next = dget(dentry);
+		/* find the topmost dentry not yet copied up */
+		for (;;) {
+			parent = dget_parent(next);
+
+			type = ovl_path_type(parent);
+			if (type != OVL_PATH_LOWER)
+				break;
+
+			dput(next);
+			next = parent;
+		}
+
+		ovl_path_lower(next, &lowerpath);
+		err = vfs_getattr(&lowerpath, &stat);
+		if (!err)
+			err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
+
+		dput(parent);
+		dput(next);
+	}
+
+	return err;
+}
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
new file mode 100644
index 0000000..8ffc4b9
--- /dev/null
+++ b/fs/overlayfs/dir.c
@@ -0,0 +1,928 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include "overlayfs.h"
+
+void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
+{
+	int err;
+
+	dget(wdentry);
+	if (S_ISDIR(wdentry->d_inode->i_mode))
+		err = ovl_do_rmdir(wdir, wdentry);
+	else
+		err = ovl_do_unlink(wdir, wdentry);
+	dput(wdentry);
+
+	if (err) {
+		pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
+		       wdentry, err);
+	}
+}
+
+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
+{
+	struct dentry *temp;
+	char name[20];
+
+	snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+
+	temp = lookup_one_len(name, workdir, strlen(name));
+	if (!IS_ERR(temp) && temp->d_inode) {
+		pr_err("overlayfs: workdir/%s already exists\n", name);
+		dput(temp);
+		temp = ERR_PTR(-EIO);
+	}
+
+	return temp;
+}
+
+/* caller holds i_mutex on workdir */
+static struct dentry *ovl_whiteout(struct dentry *workdir,
+				   struct dentry *dentry)
+{
+	int err;
+	struct dentry *whiteout;
+	struct inode *wdir = workdir->d_inode;
+
+	whiteout = ovl_lookup_temp(workdir, dentry);
+	if (IS_ERR(whiteout))
+		return whiteout;
+
+	err = ovl_do_whiteout(wdir, whiteout);
+	if (err) {
+		dput(whiteout);
+		whiteout = ERR_PTR(err);
+	}
+
+	return whiteout;
+}
+
+int ovl_create_real(struct inode *dir, struct dentry *newdentry,
+		    struct kstat *stat, const char *link,
+		    struct dentry *hardlink, bool debug)
+{
+	int err;
+
+	if (newdentry->d_inode)
+		return -ESTALE;
+
+	if (hardlink) {
+		err = ovl_do_link(hardlink, dir, newdentry, debug);
+	} else {
+		switch (stat->mode & S_IFMT) {
+		case S_IFREG:
+			err = ovl_do_create(dir, newdentry, stat->mode, debug);
+			break;
+
+		case S_IFDIR:
+			err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
+			break;
+
+		case S_IFCHR:
+		case S_IFBLK:
+		case S_IFIFO:
+		case S_IFSOCK:
+			err = ovl_do_mknod(dir, newdentry,
+					   stat->mode, stat->rdev, debug);
+			break;
+
+		case S_IFLNK:
+			err = ovl_do_symlink(dir, newdentry, link, debug);
+			break;
+
+		default:
+			err = -EPERM;
+		}
+	}
+	if (!err && WARN_ON(!newdentry->d_inode)) {
+		/*
+		 * Not quite sure if non-instantiated dentry is legal or not.
+		 * VFS doesn't seem to care so check and warn here.
+		 */
+		err = -ENOENT;
+	}
+	return err;
+}
+
+static int ovl_set_opaque(struct dentry *upperdentry)
+{
+	return ovl_do_setxattr(upperdentry, ovl_opaque_xattr, "y", 1, 0);
+}
+
+static void ovl_remove_opaque(struct dentry *upperdentry)
+{
+	int err;
+
+	err = ovl_do_removexattr(upperdentry, ovl_opaque_xattr);
+	if (err) {
+		pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
+			upperdentry->d_name.name, err);
+	}
+}
+
+static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			 struct kstat *stat)
+{
+	int err;
+	enum ovl_path_type type;
+	struct path realpath;
+
+	type = ovl_path_real(dentry, &realpath);
+	err = vfs_getattr(&realpath, stat);
+	if (err)
+		return err;
+
+	stat->dev = dentry->d_sb->s_dev;
+	stat->ino = dentry->d_inode->i_ino;
+
+	/*
+	 * It's probably not worth it to count subdirs to get the
+	 * correct link count.  nlink=1 seems to pacify 'find' and
+	 * other utilities.
+	 */
+	if (type == OVL_PATH_MERGE)
+		stat->nlink = 1;
+
+	return 0;
+}
+
+static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
+			    struct kstat *stat, const char *link,
+			    struct dentry *hardlink)
+{
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *newdentry;
+	int err;
+
+	mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
+				   dentry->d_name.len);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_unlock;
+	err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
+	if (err)
+		goto out_dput;
+
+	ovl_dentry_version_inc(dentry->d_parent);
+	ovl_dentry_update(dentry, newdentry);
+	ovl_copyattr(newdentry->d_inode, inode);
+	d_instantiate(dentry, inode);
+	newdentry = NULL;
+out_dput:
+	dput(newdentry);
+out_unlock:
+	mutex_unlock(&udir->i_mutex);
+	return err;
+}
+
+static int ovl_lock_rename_workdir(struct dentry *workdir,
+				   struct dentry *upperdir)
+{
+	/* Workdir should not be the same as upperdir */
+	if (workdir == upperdir)
+		goto err;
+
+	/* Workdir should not be subdir of upperdir and vice versa */
+	if (lock_rename(workdir, upperdir) != NULL)
+		goto err_unlock;
+
+	return 0;
+
+err_unlock:
+	unlock_rename(workdir, upperdir);
+err:
+	pr_err("overlayfs: failed to lock workdir+upperdir\n");
+	return -EIO;
+}
+
+static struct dentry *ovl_clear_empty(struct dentry *dentry,
+				      struct list_head *list)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct path upperpath;
+	struct dentry *upper;
+	struct dentry *opaquedir;
+	struct kstat stat;
+	int err;
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	ovl_path_upper(dentry, &upperpath);
+	err = vfs_getattr(&upperpath, &stat);
+	if (err)
+		goto out_unlock;
+
+	err = -ESTALE;
+	if (!S_ISDIR(stat.mode))
+		goto out_unlock;
+	upper = upperpath.dentry;
+	if (upper->d_parent->d_inode != udir)
+		goto out_unlock;
+
+	opaquedir = ovl_lookup_temp(workdir, dentry);
+	err = PTR_ERR(opaquedir);
+	if (IS_ERR(opaquedir))
+		goto out_unlock;
+
+	err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
+	if (err)
+		goto out_dput;
+
+	err = ovl_copy_xattr(upper, opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_set_opaque(opaquedir);
+	if (err)
+		goto out_cleanup;
+
+	mutex_lock(&opaquedir->d_inode->i_mutex);
+	err = ovl_set_attr(opaquedir, &stat);
+	mutex_unlock(&opaquedir->d_inode->i_mutex);
+	if (err)
+		goto out_cleanup;
+
+	err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
+	if (err)
+		goto out_cleanup;
+
+	ovl_cleanup_whiteouts(upper, list);
+	ovl_cleanup(wdir, upper);
+	unlock_rename(workdir, upperdir);
+
+	/* dentry's upper doesn't match now, get rid of it */
+	d_drop(dentry);
+
+	return opaquedir;
+
+out_cleanup:
+	ovl_cleanup(wdir, opaquedir);
+out_dput:
+	dput(opaquedir);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	return ERR_PTR(err);
+}
+
+static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
+{
+	int err;
+	struct dentry *ret = NULL;
+	LIST_HEAD(list);
+
+	err = ovl_check_empty_dir(dentry, &list);
+	if (err)
+		ret = ERR_PTR(err);
+	else {
+		/*
+		 * If no upperdentry then skip clearing whiteouts.
+		 *
+		 * Can race with copy-up, since we don't hold the upperdir
+		 * mutex.  Doesn't matter, since copy-up can't create a
+		 * non-empty directory from an empty one.
+		 */
+		if (ovl_dentry_upper(dentry))
+			ret = ovl_clear_empty(dentry, &list);
+	}
+
+	ovl_cache_free(&list);
+
+	return ret;
+}
+
+static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
+				    struct kstat *stat, const char *link,
+				    struct dentry *hardlink)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *upper;
+	struct dentry *newdentry;
+	int err;
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out;
+
+	newdentry = ovl_lookup_temp(workdir, dentry);
+	err = PTR_ERR(newdentry);
+	if (IS_ERR(newdentry))
+		goto out_unlock;
+
+	upper = lookup_one_len(dentry->d_name.name, upperdir,
+			       dentry->d_name.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto out_dput;
+
+	err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
+	if (err)
+		goto out_dput2;
+
+	if (S_ISDIR(stat->mode)) {
+		err = ovl_set_opaque(newdentry);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_do_rename(wdir, newdentry, udir, upper,
+				    RENAME_EXCHANGE);
+		if (err)
+			goto out_cleanup;
+
+		ovl_cleanup(wdir, upper);
+	} else {
+		err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
+		if (err)
+			goto out_cleanup;
+	}
+	ovl_dentry_version_inc(dentry->d_parent);
+	ovl_dentry_update(dentry, newdentry);
+	ovl_copyattr(newdentry->d_inode, inode);
+	d_instantiate(dentry, inode);
+	newdentry = NULL;
+out_dput2:
+	dput(upper);
+out_dput:
+	dput(newdentry);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out:
+	return err;
+
+out_cleanup:
+	ovl_cleanup(wdir, newdentry);
+	goto out_dput2;
+}
+
+static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
+			      const char *link, struct dentry *hardlink)
+{
+	int err;
+	struct inode *inode;
+	struct kstat stat = {
+		.mode = mode,
+		.rdev = rdev,
+	};
+
+	err = -ENOMEM;
+	inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
+	if (!inode)
+		goto out;
+
+	err = ovl_copy_up(dentry->d_parent);
+	if (err)
+		goto out_iput;
+
+	if (!ovl_dentry_is_opaque(dentry)) {
+		err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
+	} else {
+		const struct cred *old_cred;
+		struct cred *override_cred;
+
+		err = -ENOMEM;
+		override_cred = prepare_creds();
+		if (!override_cred)
+			goto out_iput;
+
+		/*
+		 * CAP_SYS_ADMIN for setting opaque xattr
+		 * CAP_DAC_OVERRIDE for create in workdir, rename
+		 * CAP_FOWNER for removing whiteout from sticky dir
+		 */
+		cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+		cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+		cap_raise(override_cred->cap_effective, CAP_FOWNER);
+		old_cred = override_creds(override_cred);
+
+		err = ovl_create_over_whiteout(dentry, inode, &stat, link,
+					       hardlink);
+
+		revert_creds(old_cred);
+		put_cred(override_cred);
+	}
+
+	if (!err)
+		inode = NULL;
+out_iput:
+	iput(inode);
+out:
+	return err;
+}
+
+static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
+			     const char *link)
+{
+	int err;
+
+	err = ovl_want_write(dentry);
+	if (!err) {
+		err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
+		ovl_drop_write(dentry);
+	}
+
+	return err;
+}
+
+static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+		      bool excl)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
+}
+
+static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
+}
+
+static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+		     dev_t rdev)
+{
+	/* Don't allow creation of "whiteout" on overlay */
+	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
+		return -EPERM;
+
+	return ovl_create_object(dentry, mode, rdev, NULL);
+}
+
+static int ovl_symlink(struct inode *dir, struct dentry *dentry,
+		       const char *link)
+{
+	return ovl_create_object(dentry, S_IFLNK, 0, link);
+}
+
+static int ovl_link(struct dentry *old, struct inode *newdir,
+		    struct dentry *new)
+{
+	int err;
+	struct dentry *upper;
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	upper = ovl_dentry_upper(old);
+	err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
+
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	return err;
+}
+
+static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
+{
+	struct dentry *workdir = ovl_workdir(dentry);
+	struct inode *wdir = workdir->d_inode;
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *udir = upperdir->d_inode;
+	struct dentry *whiteout;
+	struct dentry *upper;
+	struct dentry *opaquedir = NULL;
+	int err;
+
+	if (is_dir) {
+		opaquedir = ovl_check_empty_and_clear(dentry);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir))
+			goto out;
+	}
+
+	err = ovl_lock_rename_workdir(workdir, upperdir);
+	if (err)
+		goto out_dput;
+
+	whiteout = ovl_whiteout(workdir, dentry);
+	err = PTR_ERR(whiteout);
+	if (IS_ERR(whiteout))
+		goto out_unlock;
+
+	upper = ovl_dentry_upper(dentry);
+	if (!upper) {
+		upper = lookup_one_len(dentry->d_name.name, upperdir,
+				       dentry->d_name.len);
+		err = PTR_ERR(upper);
+		if (IS_ERR(upper))
+			goto kill_whiteout;
+
+		err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
+		dput(upper);
+		if (err)
+			goto kill_whiteout;
+	} else {
+		int flags = 0;
+
+		if (opaquedir)
+			upper = opaquedir;
+		err = -ESTALE;
+		if (upper->d_parent != upperdir)
+			goto kill_whiteout;
+
+		if (is_dir)
+			flags |= RENAME_EXCHANGE;
+
+		err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+		if (err)
+			goto kill_whiteout;
+
+		if (is_dir)
+			ovl_cleanup(wdir, upper);
+	}
+	ovl_dentry_version_inc(dentry->d_parent);
+out_d_drop:
+	d_drop(dentry);
+	dput(whiteout);
+out_unlock:
+	unlock_rename(workdir, upperdir);
+out_dput:
+	dput(opaquedir);
+out:
+	return err;
+
+kill_whiteout:
+	ovl_cleanup(wdir, whiteout);
+	goto out_d_drop;
+}
+
+static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
+{
+	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
+	struct inode *dir = upperdir->d_inode;
+	struct dentry *upper = ovl_dentry_upper(dentry);
+	int err;
+
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+	err = -ESTALE;
+	if (upper->d_parent == upperdir) {
+		/* Don't let d_delete() think it can reset d_inode */
+		dget(upper);
+		if (is_dir)
+			err = vfs_rmdir(dir, upper);
+		else
+			err = vfs_unlink(dir, upper, NULL);
+		dput(upper);
+		ovl_dentry_version_inc(dentry->d_parent);
+	}
+
+	/*
+	 * Keeping this dentry hashed would mean having to release
+	 * upperpath/lowerpath, which could only be done if we are the
+	 * sole user of this dentry.  Too tricky...  Just unhash for
+	 * now.
+	 */
+	d_drop(dentry);
+	mutex_unlock(&dir->i_mutex);
+
+	return err;
+}
+
+static inline int ovl_check_sticky(struct dentry *dentry)
+{
+	struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
+	struct inode *inode = ovl_dentry_real(dentry)->d_inode;
+
+	if (check_sticky(dir, inode))
+		return -EPERM;
+
+	return 0;
+}
+
+static int ovl_do_remove(struct dentry *dentry, bool is_dir)
+{
+	enum ovl_path_type type;
+	int err;
+
+	err = ovl_check_sticky(dentry);
+	if (err)
+		goto out;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(dentry->d_parent);
+	if (err)
+		goto out_drop_write;
+
+	type = ovl_path_type(dentry);
+	if (type == OVL_PATH_PURE_UPPER) {
+		err = ovl_remove_upper(dentry, is_dir);
+	} else {
+		const struct cred *old_cred;
+		struct cred *override_cred;
+
+		err = -ENOMEM;
+		override_cred = prepare_creds();
+		if (!override_cred)
+			goto out_drop_write;
+
+		/*
+		 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
+		 * CAP_DAC_OVERRIDE for create in workdir, rename
+		 * CAP_FOWNER for removing whiteout from sticky dir
+		 * CAP_FSETID for chmod of opaque dir
+		 * CAP_CHOWN for chown of opaque dir
+		 */
+		cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+		cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+		cap_raise(override_cred->cap_effective, CAP_FOWNER);
+		cap_raise(override_cred->cap_effective, CAP_FSETID);
+		cap_raise(override_cred->cap_effective, CAP_CHOWN);
+		old_cred = override_creds(override_cred);
+
+		err = ovl_remove_and_whiteout(dentry, is_dir);
+
+		revert_creds(old_cred);
+		put_cred(override_cred);
+	}
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_unlink(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, false);
+}
+
+static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	return ovl_do_remove(dentry, true);
+}
+
+static int ovl_rename2(struct inode *olddir, struct dentry *old,
+		       struct inode *newdir, struct dentry *new,
+		       unsigned int flags)
+{
+	int err;
+	enum ovl_path_type old_type;
+	enum ovl_path_type new_type;
+	struct dentry *old_upperdir;
+	struct dentry *new_upperdir;
+	struct dentry *olddentry;
+	struct dentry *newdentry;
+	struct dentry *trap;
+	bool old_opaque;
+	bool new_opaque;
+	bool new_create = false;
+	bool cleanup_whiteout = false;
+	bool overwrite = !(flags & RENAME_EXCHANGE);
+	bool is_dir = S_ISDIR(old->d_inode->i_mode);
+	bool new_is_dir = false;
+	struct dentry *opaquedir = NULL;
+	const struct cred *old_cred = NULL;
+	struct cred *override_cred = NULL;
+
+	err = -EINVAL;
+	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
+		goto out;
+
+	flags &= ~RENAME_NOREPLACE;
+
+	err = ovl_check_sticky(old);
+	if (err)
+		goto out;
+
+	/* Don't copy up directory trees */
+	old_type = ovl_path_type(old);
+	err = -EXDEV;
+	if ((old_type == OVL_PATH_LOWER || old_type == OVL_PATH_MERGE) && is_dir)
+		goto out;
+
+	if (new->d_inode) {
+		err = ovl_check_sticky(new);
+		if (err)
+			goto out;
+
+		if (S_ISDIR(new->d_inode->i_mode))
+			new_is_dir = true;
+
+		new_type = ovl_path_type(new);
+		err = -EXDEV;
+		if (!overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir)
+			goto out;
+
+		err = 0;
+		if (new_type == OVL_PATH_LOWER && old_type == OVL_PATH_LOWER) {
+			if (ovl_dentry_lower(old)->d_inode ==
+			    ovl_dentry_lower(new)->d_inode)
+				goto out;
+		}
+		if (new_type != OVL_PATH_LOWER && old_type != OVL_PATH_LOWER) {
+			if (ovl_dentry_upper(old)->d_inode ==
+			    ovl_dentry_upper(new)->d_inode)
+				goto out;
+		}
+	} else {
+		if (ovl_dentry_is_opaque(new))
+			new_type = OVL_PATH_UPPER;
+		else
+			new_type = OVL_PATH_PURE_UPPER;
+	}
+
+	err = ovl_want_write(old);
+	if (err)
+		goto out;
+
+	err = ovl_copy_up(old);
+	if (err)
+		goto out_drop_write;
+
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+	if (!overwrite) {
+		err = ovl_copy_up(new);
+		if (err)
+			goto out_drop_write;
+	}
+
+	old_opaque = old_type != OVL_PATH_PURE_UPPER;
+	new_opaque = new_type != OVL_PATH_PURE_UPPER;
+
+	if (old_opaque || new_opaque) {
+		err = -ENOMEM;
+		override_cred = prepare_creds();
+		if (!override_cred)
+			goto out_drop_write;
+
+		/*
+		 * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
+		 * CAP_DAC_OVERRIDE for create in workdir
+		 * CAP_FOWNER for removing whiteout from sticky dir
+		 * CAP_FSETID for chmod of opaque dir
+		 * CAP_CHOWN for chown of opaque dir
+		 */
+		cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
+		cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+		cap_raise(override_cred->cap_effective, CAP_FOWNER);
+		cap_raise(override_cred->cap_effective, CAP_FSETID);
+		cap_raise(override_cred->cap_effective, CAP_CHOWN);
+		old_cred = override_creds(override_cred);
+	}
+
+	if (overwrite && (new_type == OVL_PATH_LOWER || new_type == OVL_PATH_MERGE) && new_is_dir) {
+		opaquedir = ovl_check_empty_and_clear(new);
+		err = PTR_ERR(opaquedir);
+		if (IS_ERR(opaquedir)) {
+			opaquedir = NULL;
+			goto out_revert_creds;
+		}
+	}
+
+	if (overwrite) {
+		if (old_opaque) {
+			if (new->d_inode || !new_opaque) {
+				/* Whiteout source */
+				flags |= RENAME_WHITEOUT;
+			} else {
+				/* Switch whiteouts */
+				flags |= RENAME_EXCHANGE;
+			}
+		} else if (is_dir && !new->d_inode && new_opaque) {
+			flags |= RENAME_EXCHANGE;
+			cleanup_whiteout = true;
+		}
+	}
+
+	old_upperdir = ovl_dentry_upper(old->d_parent);
+	new_upperdir = ovl_dentry_upper(new->d_parent);
+
+	trap = lock_rename(new_upperdir, old_upperdir);
+
+	olddentry = ovl_dentry_upper(old);
+	newdentry = ovl_dentry_upper(new);
+	if (newdentry) {
+		if (opaquedir) {
+			newdentry = opaquedir;
+			opaquedir = NULL;
+		} else {
+			dget(newdentry);
+		}
+	} else {
+		new_create = true;
+		newdentry = lookup_one_len(new->d_name.name, new_upperdir,
+					   new->d_name.len);
+		err = PTR_ERR(newdentry);
+		if (IS_ERR(newdentry))
+			goto out_unlock;
+	}
+
+	err = -ESTALE;
+	if (olddentry->d_parent != old_upperdir)
+		goto out_dput;
+	if (newdentry->d_parent != new_upperdir)
+		goto out_dput;
+	if (olddentry == trap)
+		goto out_dput;
+	if (newdentry == trap)
+		goto out_dput;
+
+	if (is_dir && !old_opaque && new_opaque) {
+		err = ovl_set_opaque(olddentry);
+		if (err)
+			goto out_dput;
+	}
+	if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
+		err = ovl_set_opaque(newdentry);
+		if (err)
+			goto out_dput;
+	}
+
+	if (old_opaque || new_opaque) {
+		err = ovl_do_rename(old_upperdir->d_inode, olddentry,
+				    new_upperdir->d_inode, newdentry,
+				    flags);
+	} else {
+		/* No debug for the plain case */
+		BUG_ON(flags & ~RENAME_EXCHANGE);
+		err = vfs_rename(old_upperdir->d_inode, olddentry,
+				 new_upperdir->d_inode, newdentry,
+				 NULL, flags);
+	}
+
+	if (err) {
+		if (is_dir && !old_opaque && new_opaque)
+			ovl_remove_opaque(olddentry);
+		if (!overwrite && new_is_dir && old_opaque && !new_opaque)
+			ovl_remove_opaque(newdentry);
+		goto out_dput;
+	}
+
+	if (is_dir && old_opaque && !new_opaque)
+		ovl_remove_opaque(olddentry);
+	if (!overwrite && new_is_dir && !old_opaque && new_opaque)
+		ovl_remove_opaque(newdentry);
+
+	if (old_opaque != new_opaque) {
+		ovl_dentry_set_opaque(old, new_opaque);
+		if (!overwrite)
+			ovl_dentry_set_opaque(new, old_opaque);
+	}
+
+	if (cleanup_whiteout)
+		ovl_cleanup(old_upperdir->d_inode, newdentry);
+
+	ovl_dentry_version_inc(old->d_parent);
+	ovl_dentry_version_inc(new->d_parent);
+
+out_dput:
+	dput(newdentry);
+out_unlock:
+	unlock_rename(new_upperdir, old_upperdir);
+out_revert_creds:
+	if (old_opaque || new_opaque) {
+		revert_creds(old_cred);
+		put_cred(override_cred);
+	}
+out_drop_write:
+	ovl_drop_write(old);
+out:
+	dput(opaquedir);
+	return err;
+}
+
+const struct inode_operations ovl_dir_inode_operations = {
+	.lookup		= ovl_lookup,
+	.mkdir		= ovl_mkdir,
+	.symlink	= ovl_symlink,
+	.unlink		= ovl_unlink,
+	.rmdir		= ovl_rmdir,
+	.rename2	= ovl_rename2,
+	.link		= ovl_link,
+	.setattr	= ovl_setattr,
+	.create		= ovl_create,
+	.mknod		= ovl_mknod,
+	.permission	= ovl_permission,
+	.getattr	= ovl_dir_getattr,
+	.setxattr	= ovl_setxattr,
+	.getxattr	= ovl_getxattr,
+	.listxattr	= ovl_listxattr,
+	.removexattr	= ovl_removexattr,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
new file mode 100644
index 0000000..07d74b2
--- /dev/null
+++ b/fs/overlayfs/inode.c
@@ -0,0 +1,434 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
+			    bool no_data)
+{
+	int err;
+	struct dentry *parent;
+	struct kstat stat;
+	struct path lowerpath;
+
+	parent = dget_parent(dentry);
+	err = ovl_copy_up(parent);
+	if (err)
+		goto out_dput_parent;
+
+	ovl_path_lower(dentry, &lowerpath);
+	err = vfs_getattr(&lowerpath, &stat);
+	if (err)
+		goto out_dput_parent;
+
+	if (no_data)
+		stat.size = 0;
+
+	err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
+
+out_dput_parent:
+	dput(parent);
+	return err;
+}
+
+int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	int err;
+	struct dentry *upperdentry;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	upperdentry = ovl_dentry_upper(dentry);
+	if (upperdentry) {
+		mutex_lock(&upperdentry->d_inode->i_mutex);
+		err = notify_change(upperdentry, attr, NULL);
+		mutex_unlock(&upperdentry->d_inode->i_mutex);
+	} else {
+		err = ovl_copy_up_last(dentry, attr, false);
+	}
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
+			 struct kstat *stat)
+{
+	struct path realpath;
+
+	ovl_path_real(dentry, &realpath);
+	return vfs_getattr(&realpath, stat);
+}
+
+int ovl_permission(struct inode *inode, int mask)
+{
+	struct ovl_entry *oe;
+	struct dentry *alias = NULL;
+	struct inode *realinode;
+	struct dentry *realdentry;
+	bool is_upper;
+	int err;
+
+	if (S_ISDIR(inode->i_mode)) {
+		oe = inode->i_private;
+	} else if (mask & MAY_NOT_BLOCK) {
+		return -ECHILD;
+	} else {
+		/*
+		 * For non-directories find an alias and get the info
+		 * from there.
+		 */
+		alias = d_find_any_alias(inode);
+		if (WARN_ON(!alias))
+			return -ENOENT;
+
+		oe = alias->d_fsdata;
+	}
+
+	realdentry = ovl_entry_real(oe, &is_upper);
+
+	/* Careful in RCU walk mode */
+	realinode = ACCESS_ONCE(realdentry->d_inode);
+	if (!realinode) {
+		WARN_ON(!(mask & MAY_NOT_BLOCK));
+		err = -ENOENT;
+		goto out_dput;
+	}
+
+	if (mask & MAY_WRITE) {
+		umode_t mode = realinode->i_mode;
+
+		/*
+		 * Writes will always be redirected to upper layer, so
+		 * ignore lower layer being read-only.
+		 *
+		 * If the overlay itself is read-only then proceed
+		 * with the permission check, don't return EROFS.
+		 * This will only happen if this is the lower layer of
+		 * another overlayfs.
+		 *
+		 * If upper fs becomes read-only after the overlay was
+		 * constructed return EROFS to prevent modification of
+		 * upper layer.
+		 */
+		err = -EROFS;
+		if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
+		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+			goto out_dput;
+	}
+
+	err = __inode_permission(realinode, mask);
+out_dput:
+	dput(alias);
+	return err;
+}
+
+
+struct ovl_link_data {
+	struct dentry *realdentry;
+	void *cookie;
+};
+
+static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	void *ret;
+	struct dentry *realdentry;
+	struct inode *realinode;
+
+	realdentry = ovl_dentry_real(dentry);
+	realinode = realdentry->d_inode;
+
+	if (WARN_ON(!realinode->i_op->follow_link))
+		return ERR_PTR(-EPERM);
+
+	ret = realinode->i_op->follow_link(realdentry, nd);
+	if (IS_ERR(ret))
+		return ret;
+
+	if (realinode->i_op->put_link) {
+		struct ovl_link_data *data;
+
+		data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
+		if (!data) {
+			realinode->i_op->put_link(realdentry, nd, ret);
+			return ERR_PTR(-ENOMEM);
+		}
+		data->realdentry = realdentry;
+		data->cookie = ret;
+
+		return data;
+	} else {
+		return NULL;
+	}
+}
+
+static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
+{
+	struct inode *realinode;
+	struct ovl_link_data *data = c;
+
+	if (!data)
+		return;
+
+	realinode = data->realdentry->d_inode;
+	realinode->i_op->put_link(data->realdentry, nd, data->cookie);
+	kfree(data);
+}
+
+static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
+{
+	struct path realpath;
+	struct inode *realinode;
+
+	ovl_path_real(dentry, &realpath);
+	realinode = realpath.dentry->d_inode;
+
+	if (!realinode->i_op->readlink)
+		return -EINVAL;
+
+	touch_atime(&realpath);
+
+	return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
+}
+
+
+static bool ovl_is_private_xattr(const char *name)
+{
+	return strncmp(name, "trusted.overlay.", 14) == 0;
+}
+
+int ovl_setxattr(struct dentry *dentry, const char *name,
+		 const void *value, size_t size, int flags)
+{
+	int err;
+	struct dentry *upperdentry;
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = -EPERM;
+	if (ovl_is_private_xattr(name))
+		goto out_drop_write;
+
+	err = ovl_copy_up(dentry);
+	if (err)
+		goto out_drop_write;
+
+	upperdentry = ovl_dentry_upper(dentry);
+	err = vfs_setxattr(upperdentry, name, value, size, flags);
+
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static bool ovl_need_xattr_filter(struct dentry *dentry,
+				  enum ovl_path_type type)
+{
+	return type == OVL_PATH_UPPER && S_ISDIR(dentry->d_inode->i_mode);
+}
+
+ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
+		     void *value, size_t size)
+{
+	struct path realpath;
+	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+
+	if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+		return -ENODATA;
+
+	return vfs_getxattr(realpath.dentry, name, value, size);
+}
+
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
+{
+	struct path realpath;
+	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+	ssize_t res;
+	int off;
+
+	res = vfs_listxattr(realpath.dentry, list, size);
+	if (res <= 0 || size == 0)
+		return res;
+
+	if (!ovl_need_xattr_filter(dentry, type))
+		return res;
+
+	/* filter out private xattrs */
+	for (off = 0; off < res;) {
+		char *s = list + off;
+		size_t slen = strlen(s) + 1;
+
+		BUG_ON(off + slen > res);
+
+		if (ovl_is_private_xattr(s)) {
+			res -= slen;
+			memmove(s, s + slen, res - off);
+		} else {
+			off += slen;
+		}
+	}
+
+	return res;
+}
+
+int ovl_removexattr(struct dentry *dentry, const char *name)
+{
+	int err;
+	struct path realpath;
+	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
+
+	err = ovl_want_write(dentry);
+	if (err)
+		goto out;
+
+	err = -ENODATA;
+	if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
+		goto out_drop_write;
+
+	if (type == OVL_PATH_LOWER) {
+		err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+		if (err < 0)
+			goto out_drop_write;
+
+		err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
+
+		ovl_path_upper(dentry, &realpath);
+	}
+
+	err = vfs_removexattr(realpath.dentry, name);
+out_drop_write:
+	ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
+				  struct dentry *realdentry)
+{
+	if (type != OVL_PATH_LOWER)
+		return false;
+
+	if (special_file(realdentry->d_inode->i_mode))
+		return false;
+
+	if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
+		return false;
+
+	return true;
+}
+
+static int ovl_dentry_open(struct dentry *dentry, struct file *file,
+		    const struct cred *cred)
+{
+	int err;
+	struct path realpath;
+	enum ovl_path_type type;
+	bool want_write = false;
+
+	type = ovl_path_real(dentry, &realpath);
+	if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
+		want_write = true;
+		err = ovl_want_write(dentry);
+		if (err)
+			goto out;
+
+		if (file->f_flags & O_TRUNC)
+			err = ovl_copy_up_last(dentry, NULL, true);
+		else
+			err = ovl_copy_up(dentry);
+		if (err)
+			goto out_drop_write;
+
+		ovl_path_upper(dentry, &realpath);
+	}
+
+	err = vfs_open(&realpath, file, cred);
+out_drop_write:
+	if (want_write)
+		ovl_drop_write(dentry);
+out:
+	return err;
+}
+
+static const struct inode_operations ovl_file_inode_operations = {
+	.setattr	= ovl_setattr,
+	.permission	= ovl_permission,
+	.getattr	= ovl_getattr,
+	.setxattr	= ovl_setxattr,
+	.getxattr	= ovl_getxattr,
+	.listxattr	= ovl_listxattr,
+	.removexattr	= ovl_removexattr,
+	.dentry_open	= ovl_dentry_open,
+};
+
+static const struct inode_operations ovl_symlink_inode_operations = {
+	.setattr	= ovl_setattr,
+	.follow_link	= ovl_follow_link,
+	.put_link	= ovl_put_link,
+	.readlink	= ovl_readlink,
+	.getattr	= ovl_getattr,
+	.setxattr	= ovl_setxattr,
+	.getxattr	= ovl_getxattr,
+	.listxattr	= ovl_listxattr,
+	.removexattr	= ovl_removexattr,
+};
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+			    struct ovl_entry *oe)
+{
+	struct inode *inode;
+
+	inode = new_inode(sb);
+	if (!inode)
+		return NULL;
+
+	mode &= S_IFMT;
+
+	inode->i_ino = get_next_ino();
+	inode->i_mode = mode;
+	inode->i_flags |= S_NOATIME | S_NOCMTIME;
+
+	switch (mode) {
+	case S_IFDIR:
+		inode->i_private = oe;
+		inode->i_op = &ovl_dir_inode_operations;
+		inode->i_fop = &ovl_dir_operations;
+		break;
+
+	case S_IFLNK:
+		inode->i_op = &ovl_symlink_inode_operations;
+		break;
+
+	case S_IFREG:
+	case S_IFSOCK:
+	case S_IFBLK:
+	case S_IFCHR:
+	case S_IFIFO:
+		inode->i_op = &ovl_file_inode_operations;
+		break;
+
+	default:
+		WARN(1, "illegal file type: %i\n", mode);
+		iput(inode);
+		inode = NULL;
+	}
+
+	return inode;
+
+}
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
new file mode 100644
index 0000000..814bed3
--- /dev/null
+++ b/fs/overlayfs/overlayfs.h
@@ -0,0 +1,191 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+
+struct ovl_entry;
+
+enum ovl_path_type {
+	OVL_PATH_PURE_UPPER,
+	OVL_PATH_UPPER,
+	OVL_PATH_MERGE,
+	OVL_PATH_LOWER,
+};
+
+extern const char *ovl_opaque_xattr;
+
+static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_rmdir(dir, dentry);
+	pr_debug("rmdir(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_unlink(dir, dentry, NULL);
+	pr_debug("unlink(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
+			      struct dentry *new_dentry, bool debug)
+{
+	int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+	if (debug) {
+		pr_debug("link(%pd2, %pd2) = %i\n",
+			 old_dentry, new_dentry, err);
+	}
+	return err;
+}
+
+static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
+			     umode_t mode, bool debug)
+{
+	int err = vfs_create(dir, dentry, mode, true);
+	if (debug)
+		pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
+			       umode_t mode, bool debug)
+{
+	int err = vfs_mkdir(dir, dentry, mode);
+	if (debug)
+		pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
+	return err;
+}
+
+static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
+			       umode_t mode, dev_t dev, bool debug)
+{
+	int err = vfs_mknod(dir, dentry, mode, dev);
+	if (debug) {
+		pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
+			 dentry, mode, dev, err);
+	}
+	return err;
+}
+
+static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
+				 const char *oldname, bool debug)
+{
+	int err = vfs_symlink(dir, dentry, oldname);
+	if (debug)
+		pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
+	return err;
+}
+
+static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
+				  const void *value, size_t size, int flags)
+{
+	int err = vfs_setxattr(dentry, name, value, size, flags);
+	pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
+		 dentry, name, (int) size, (char *) value, flags, err);
+	return err;
+}
+
+static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
+{
+	int err = vfs_removexattr(dentry, name);
+	pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
+	return err;
+}
+
+static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
+				struct inode *newdir, struct dentry *newdentry,
+				unsigned int flags)
+{
+	int err;
+
+	pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
+		 olddentry, newdentry, flags);
+
+	err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+
+	if (err) {
+		pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
+			 olddentry, newdentry, err);
+	}
+	return err;
+}
+
+static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	int err = vfs_whiteout(dir, dentry);
+	pr_debug("whiteout(%pd2) = %i\n", dentry, err);
+	return err;
+}
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry);
+u64 ovl_dentry_version_get(struct dentry *dentry);
+void ovl_dentry_version_inc(struct dentry *dentry);
+void ovl_path_upper(struct dentry *dentry, struct path *path);
+void ovl_path_lower(struct dentry *dentry, struct path *path);
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
+struct dentry *ovl_dentry_upper(struct dentry *dentry);
+struct dentry *ovl_dentry_lower(struct dentry *dentry);
+struct dentry *ovl_dentry_real(struct dentry *dentry);
+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
+struct dentry *ovl_workdir(struct dentry *dentry);
+int ovl_want_write(struct dentry *dentry);
+void ovl_drop_write(struct dentry *dentry);
+bool ovl_dentry_is_opaque(struct dentry *dentry);
+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
+bool ovl_is_whiteout(struct dentry *dentry);
+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags);
+struct file *ovl_path_open(struct path *path, int flags);
+
+struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
+				struct kstat *stat, const char *link);
+
+/* readdir.c */
+extern const struct file_operations ovl_dir_operations;
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
+void ovl_cache_free(struct list_head *list);
+
+/* inode.c */
+int ovl_setattr(struct dentry *dentry, struct iattr *attr);
+int ovl_permission(struct inode *inode, int mask);
+int ovl_setxattr(struct dentry *dentry, const char *name,
+		 const void *value, size_t size, int flags);
+ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
+		     void *value, size_t size);
+ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
+int ovl_removexattr(struct dentry *dentry, const char *name);
+
+struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
+			    struct ovl_entry *oe);
+static inline void ovl_copyattr(struct inode *from, struct inode *to)
+{
+	to->i_uid = from->i_uid;
+	to->i_gid = from->i_gid;
+}
+
+/* dir.c */
+extern const struct inode_operations ovl_dir_inode_operations;
+struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
+int ovl_create_real(struct inode *dir, struct dentry *newdentry,
+		    struct kstat *stat, const char *link,
+		    struct dentry *hardlink, bool debug);
+void ovl_cleanup(struct inode *dir, struct dentry *dentry);
+
+/* copy_up.c */
+int ovl_copy_up(struct dentry *dentry);
+int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
+		    struct path *lowerpath, struct kstat *stat,
+		    struct iattr *attr);
+int ovl_copy_xattr(struct dentry *old, struct dentry *new);
+int ovl_set_attr(struct dentry *upper, struct kstat *stat);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
new file mode 100644
index 0000000..c020599
--- /dev/null
+++ b/fs/overlayfs/readdir.c
@@ -0,0 +1,588 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/file.h>
+#include <linux/xattr.h>
+#include <linux/rbtree.h>
+#include <linux/security.h>
+#include <linux/cred.h>
+#include "overlayfs.h"
+
+struct ovl_cache_entry {
+	unsigned int len;
+	unsigned int type;
+	u64 ino;
+	struct list_head l_node;
+	struct rb_node node;
+	bool is_whiteout;
+	bool is_cursor;
+	char name[];
+};
+
+struct ovl_dir_cache {
+	long refcount;
+	u64 version;
+	struct list_head entries;
+};
+
+struct ovl_readdir_data {
+	struct dir_context ctx;
+	bool is_merge;
+	struct rb_root root;
+	struct list_head *list;
+	struct list_head middle;
+	int count;
+	int err;
+};
+
+struct ovl_dir_file {
+	bool is_real;
+	bool is_upper;
+	struct ovl_dir_cache *cache;
+	struct ovl_cache_entry cursor;
+	struct file *realfile;
+	struct file *upperfile;
+};
+
+static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
+{
+	return container_of(n, struct ovl_cache_entry, node);
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
+						    const char *name, int len)
+{
+	struct rb_node *node = root->rb_node;
+	int cmp;
+
+	while (node) {
+		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
+
+		cmp = strncmp(name, p->name, len);
+		if (cmp > 0)
+			node = p->node.rb_right;
+		else if (cmp < 0 || len < p->len)
+			node = p->node.rb_left;
+		else
+			return p;
+	}
+
+	return NULL;
+}
+
+static struct ovl_cache_entry *ovl_cache_entry_new(const char *name, int len,
+						   u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
+
+	p = kmalloc(size, GFP_KERNEL);
+	if (p) {
+		memcpy(p->name, name, len);
+		p->name[len] = '\0';
+		p->len = len;
+		p->type = d_type;
+		p->ino = ino;
+		p->is_whiteout = false;
+		p->is_cursor = false;
+	}
+
+	return p;
+}
+
+static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
+				  const char *name, int len, u64 ino,
+				  unsigned int d_type)
+{
+	struct rb_node **newp = &rdd->root.rb_node;
+	struct rb_node *parent = NULL;
+	struct ovl_cache_entry *p;
+
+	while (*newp) {
+		int cmp;
+		struct ovl_cache_entry *tmp;
+
+		parent = *newp;
+		tmp = ovl_cache_entry_from_node(*newp);
+		cmp = strncmp(name, tmp->name, len);
+		if (cmp > 0)
+			newp = &tmp->node.rb_right;
+		else if (cmp < 0 || len < tmp->len)
+			newp = &tmp->node.rb_left;
+		else
+			return 0;
+	}
+
+	p = ovl_cache_entry_new(name, len, ino, d_type);
+	if (p == NULL)
+		return -ENOMEM;
+
+	list_add_tail(&p->l_node, rdd->list);
+	rb_link_node(&p->node, parent, newp);
+	rb_insert_color(&p->node, &rdd->root);
+
+	return 0;
+}
+
+static int ovl_fill_lower(struct ovl_readdir_data *rdd,
+			  const char *name, int namelen,
+			  loff_t offset, u64 ino, unsigned int d_type)
+{
+	struct ovl_cache_entry *p;
+
+	p = ovl_cache_entry_find(&rdd->root, name, namelen);
+	if (p) {
+		list_move_tail(&p->l_node, &rdd->middle);
+	} else {
+		p = ovl_cache_entry_new(name, namelen, ino, d_type);
+		if (p == NULL)
+			rdd->err = -ENOMEM;
+		else
+			list_add_tail(&p->l_node, &rdd->middle);
+	}
+
+	return rdd->err;
+}
+
+void ovl_cache_free(struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+	struct ovl_cache_entry *n;
+
+	list_for_each_entry_safe(p, n, list, l_node)
+		kfree(p);
+
+	INIT_LIST_HEAD(list);
+}
+
+static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
+{
+	struct ovl_dir_cache *cache = od->cache;
+
+	list_del_init(&od->cursor.l_node);
+	WARN_ON(cache->refcount <= 0);
+	cache->refcount--;
+	if (!cache->refcount) {
+		if (ovl_dir_cache(dentry) == cache)
+			ovl_set_dir_cache(dentry, NULL);
+
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+	}
+}
+
+static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
+{
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
+
+	rdd->count++;
+	if (!rdd->is_merge)
+		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
+	else
+		return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
+}
+
+static inline int ovl_dir_read(struct path *realpath,
+			       struct ovl_readdir_data *rdd)
+{
+	struct file *realfile;
+	int err;
+
+	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	rdd->ctx.pos = 0;
+	do {
+		rdd->count = 0;
+		rdd->err = 0;
+		err = iterate_dir(realfile, &rdd->ctx);
+		if (err >= 0)
+			err = rdd->err;
+	} while (!err && rdd->count);
+	fput(realfile);
+
+	return err;
+}
+
+static void ovl_dir_reset(struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct ovl_dir_cache *cache = od->cache;
+	struct dentry *dentry = file->f_path.dentry;
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
+		ovl_cache_put(od, dentry);
+		od->cache = NULL;
+	}
+	WARN_ON(!od->is_real && type != OVL_PATH_MERGE);
+	if (od->is_real && type == OVL_PATH_MERGE)
+		od->is_real = false;
+}
+
+static int ovl_dir_mark_whiteouts(struct dentry *dir,
+				  struct ovl_readdir_data *rdd)
+{
+	struct ovl_cache_entry *p;
+	struct dentry *dentry;
+	const struct cred *old_cred;
+	struct cred *override_cred;
+
+	override_cred = prepare_creds();
+	if (!override_cred) {
+		ovl_cache_free(rdd->list);
+		return -ENOMEM;
+	}
+
+	/*
+	 * CAP_DAC_OVERRIDE for lookup
+	 */
+	cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
+	old_cred = override_creds(override_cred);
+
+	mutex_lock(&dir->d_inode->i_mutex);
+	list_for_each_entry(p, rdd->list, l_node) {
+		if (p->is_cursor)
+			continue;
+
+		if (p->type != DT_CHR)
+			continue;
+
+		dentry = lookup_one_len(p->name, dir, p->len);
+		if (IS_ERR(dentry))
+			continue;
+
+		p->is_whiteout = ovl_is_whiteout(dentry);
+		dput(dentry);
+	}
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	revert_creds(old_cred);
+	put_cred(override_cred);
+
+	return 0;
+}
+
+static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
+{
+	int err;
+	struct path lowerpath;
+	struct path upperpath;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.list = list,
+		.root = RB_ROOT,
+		.is_merge = false,
+	};
+
+	ovl_path_lower(dentry, &lowerpath);
+	ovl_path_upper(dentry, &upperpath);
+
+	if (upperpath.dentry) {
+		err = ovl_dir_read(&upperpath, &rdd);
+		if (err)
+			goto out;
+
+		if (lowerpath.dentry) {
+			err = ovl_dir_mark_whiteouts(upperpath.dentry, &rdd);
+			if (err)
+				goto out;
+		}
+	}
+	if (lowerpath.dentry) {
+		/*
+		 * Insert lowerpath entries before upperpath ones, this allows
+		 * offsets to be reasonably constant
+		 */
+		list_add(&rdd.middle, rdd.list);
+		rdd.is_merge = true;
+		err = ovl_dir_read(&lowerpath, &rdd);
+		list_del(&rdd.middle);
+	}
+out:
+	return err;
+}
+
+static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
+{
+	struct ovl_cache_entry *p;
+	loff_t off = 0;
+
+	list_for_each_entry(p, &od->cache->entries, l_node) {
+		if (p->is_cursor)
+			continue;
+		if (off >= pos)
+			break;
+		off++;
+	}
+	list_move_tail(&od->cursor.l_node, &p->l_node);
+}
+
+static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
+{
+	int res;
+	struct ovl_dir_cache *cache;
+
+	cache = ovl_dir_cache(dentry);
+	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
+		cache->refcount++;
+		return cache;
+	}
+	ovl_set_dir_cache(dentry, NULL);
+
+	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
+	if (!cache)
+		return ERR_PTR(-ENOMEM);
+
+	cache->refcount = 1;
+	INIT_LIST_HEAD(&cache->entries);
+
+	res = ovl_dir_read_merged(dentry, &cache->entries);
+	if (res) {
+		ovl_cache_free(&cache->entries);
+		kfree(cache);
+		return ERR_PTR(res);
+	}
+
+	cache->version = ovl_dentry_version_get(dentry);
+	ovl_set_dir_cache(dentry, cache);
+
+	return cache;
+}
+
+static int ovl_iterate(struct file *file, struct dir_context *ctx)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+
+	if (!ctx->pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real)
+		return iterate_dir(od->realfile, ctx);
+
+	if (!od->cache) {
+		struct ovl_dir_cache *cache;
+
+		cache = ovl_cache_get(dentry);
+		if (IS_ERR(cache))
+			return PTR_ERR(cache);
+
+		od->cache = cache;
+		ovl_seek_cursor(od, ctx->pos);
+	}
+
+	while (od->cursor.l_node.next != &od->cache->entries) {
+		struct ovl_cache_entry *p;
+
+		p = list_entry(od->cursor.l_node.next, struct ovl_cache_entry, l_node);
+		/* Skip cursors */
+		if (!p->is_cursor) {
+			if (!p->is_whiteout) {
+				if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
+					break;
+			}
+			ctx->pos++;
+		}
+		list_move(&od->cursor.l_node, &p->l_node);
+	}
+	return 0;
+}
+
+static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+	loff_t res;
+	struct ovl_dir_file *od = file->private_data;
+
+	mutex_lock(&file_inode(file)->i_mutex);
+	if (!file->f_pos)
+		ovl_dir_reset(file);
+
+	if (od->is_real) {
+		res = vfs_llseek(od->realfile, offset, origin);
+		file->f_pos = od->realfile->f_pos;
+	} else {
+		res = -EINVAL;
+
+		switch (origin) {
+		case SEEK_CUR:
+			offset += file->f_pos;
+			break;
+		case SEEK_SET:
+			break;
+		default:
+			goto out_unlock;
+		}
+		if (offset < 0)
+			goto out_unlock;
+
+		if (offset != file->f_pos) {
+			file->f_pos = offset;
+			if (od->cache)
+				ovl_seek_cursor(od, offset);
+		}
+		res = offset;
+	}
+out_unlock:
+	mutex_unlock(&file_inode(file)->i_mutex);
+
+	return res;
+}
+
+static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
+			 int datasync)
+{
+	struct ovl_dir_file *od = file->private_data;
+	struct dentry *dentry = file->f_path.dentry;
+	struct file *realfile = od->realfile;
+
+	/*
+	 * Need to check if we started out being a lower dir, but got copied up
+	 */
+	if (!od->is_upper && ovl_path_type(dentry) != OVL_PATH_LOWER) {
+		struct inode *inode = file_inode(file);
+
+		realfile = lockless_dereference(od->upperfile);
+		if (!realfile) {
+			struct path upperpath;
+
+			ovl_path_upper(dentry, &upperpath);
+			realfile = ovl_path_open(&upperpath, O_RDONLY);
+			smp_mb__before_spinlock();
+			mutex_lock(&inode->i_mutex);
+			if (!od->upperfile) {
+				if (IS_ERR(realfile)) {
+					mutex_unlock(&inode->i_mutex);
+					return PTR_ERR(realfile);
+				}
+				od->upperfile = realfile;
+			} else {
+				/* somebody has beaten us to it */
+				if (!IS_ERR(realfile))
+					fput(realfile);
+				realfile = od->upperfile;
+			}
+			mutex_unlock(&inode->i_mutex);
+		}
+	}
+
+	return vfs_fsync_range(realfile, start, end, datasync);
+}
+
+static int ovl_dir_release(struct inode *inode, struct file *file)
+{
+	struct ovl_dir_file *od = file->private_data;
+
+	if (od->cache) {
+		mutex_lock(&inode->i_mutex);
+		ovl_cache_put(od, file->f_path.dentry);
+		mutex_unlock(&inode->i_mutex);
+	}
+	fput(od->realfile);
+	if (od->upperfile)
+		fput(od->upperfile);
+	kfree(od);
+
+	return 0;
+}
+
+static int ovl_dir_open(struct inode *inode, struct file *file)
+{
+	struct path realpath;
+	struct file *realfile;
+	struct ovl_dir_file *od;
+	enum ovl_path_type type;
+
+	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	type = ovl_path_real(file->f_path.dentry, &realpath);
+	realfile = ovl_path_open(&realpath, file->f_flags);
+	if (IS_ERR(realfile)) {
+		kfree(od);
+		return PTR_ERR(realfile);
+	}
+	INIT_LIST_HEAD(&od->cursor.l_node);
+	od->realfile = realfile;
+	od->is_real = (type != OVL_PATH_MERGE);
+	od->is_upper = (type != OVL_PATH_LOWER);
+	od->cursor.is_cursor = true;
+	file->private_data = od;
+
+	return 0;
+}
+
+const struct file_operations ovl_dir_operations = {
+	.read		= generic_read_dir,
+	.open		= ovl_dir_open,
+	.iterate	= ovl_iterate,
+	.llseek		= ovl_dir_llseek,
+	.fsync		= ovl_dir_fsync,
+	.release	= ovl_dir_release,
+};
+
+int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
+{
+	int err;
+	struct ovl_cache_entry *p;
+
+	err = ovl_dir_read_merged(dentry, list);
+	if (err)
+		return err;
+
+	err = 0;
+
+	list_for_each_entry(p, list, l_node) {
+		if (p->is_whiteout)
+			continue;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		err = -ENOTEMPTY;
+		break;
+	}
+
+	return err;
+}
+
+void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
+{
+	struct ovl_cache_entry *p;
+
+	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+	list_for_each_entry(p, list, l_node) {
+		struct dentry *dentry;
+
+		if (!p->is_whiteout)
+			continue;
+
+		dentry = lookup_one_len(p->name, upper, p->len);
+		if (IS_ERR(dentry)) {
+			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
+			       upper->d_name.name, p->len, p->name,
+			       (int) PTR_ERR(dentry));
+			continue;
+		}
+		ovl_cleanup(upper->d_inode, dentry);
+		dput(dentry);
+	}
+	mutex_unlock(&upper->d_inode->i_mutex);
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
new file mode 100644
index 0000000..f16d318
--- /dev/null
+++ b/fs/overlayfs/super.c
@@ -0,0 +1,833 @@
+/*
+ *
+ * Copyright (C) 2011 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/xattr.h>
+#include <linux/security.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/parser.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/statfs.h>
+#include <linux/seq_file.h>
+#include "overlayfs.h"
+
+MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
+MODULE_DESCRIPTION("Overlay filesystem");
+MODULE_LICENSE("GPL");
+
+#define OVERLAYFS_SUPER_MAGIC 0x794c7630
+
+struct ovl_config {
+	char *lowerdir;
+	char *upperdir;
+	char *workdir;
+};
+
+/* private information held for overlayfs's superblock */
+struct ovl_fs {
+	struct vfsmount *upper_mnt;
+	struct vfsmount *lower_mnt;
+	struct dentry *workdir;
+	long lower_namelen;
+	/* pathnames of lower and upper dirs, for show_options */
+	struct ovl_config config;
+};
+
+struct ovl_dir_cache;
+
+/* private information held for every overlayfs dentry */
+struct ovl_entry {
+	struct dentry *__upperdentry;
+	struct dentry *lowerdentry;
+	struct ovl_dir_cache *cache;
+	union {
+		struct {
+			u64 version;
+			bool opaque;
+		};
+		struct rcu_head rcu;
+	};
+};
+
+const char *ovl_opaque_xattr = "trusted.overlay.opaque";
+
+
+enum ovl_path_type ovl_path_type(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe->__upperdentry) {
+		if (oe->lowerdentry) {
+			if (S_ISDIR(dentry->d_inode->i_mode))
+				return OVL_PATH_MERGE;
+			else
+				return OVL_PATH_UPPER;
+		} else {
+			if (oe->opaque)
+				return OVL_PATH_UPPER;
+			else
+				return OVL_PATH_PURE_UPPER;
+		}
+	} else {
+		return OVL_PATH_LOWER;
+	}
+}
+
+static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe)
+{
+	return lockless_dereference(oe->__upperdentry);
+}
+
+void ovl_path_upper(struct dentry *dentry, struct path *path)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	path->mnt = ofs->upper_mnt;
+	path->dentry = ovl_upperdentry_dereference(oe);
+}
+
+enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path)
+{
+
+	enum ovl_path_type type = ovl_path_type(dentry);
+
+	if (type == OVL_PATH_LOWER)
+		ovl_path_lower(dentry, path);
+	else
+		ovl_path_upper(dentry, path);
+
+	return type;
+}
+
+struct dentry *ovl_dentry_upper(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return ovl_upperdentry_dereference(oe);
+}
+
+struct dentry *ovl_dentry_lower(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->lowerdentry;
+}
+
+struct dentry *ovl_dentry_real(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	struct dentry *realdentry;
+
+	realdentry = ovl_upperdentry_dereference(oe);
+	if (!realdentry)
+		realdentry = oe->lowerdentry;
+
+	return realdentry;
+}
+
+struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper)
+{
+	struct dentry *realdentry;
+
+	realdentry = ovl_upperdentry_dereference(oe);
+	if (realdentry) {
+		*is_upper = true;
+	} else {
+		realdentry = oe->lowerdentry;
+		*is_upper = false;
+	}
+	return realdentry;
+}
+
+struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	return oe->cache;
+}
+
+void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	oe->cache = cache;
+}
+
+void ovl_path_lower(struct dentry *dentry, struct path *path)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	path->mnt = ofs->lower_mnt;
+	path->dentry = oe->lowerdentry;
+}
+
+int ovl_want_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return mnt_want_write(ofs->upper_mnt);
+}
+
+void ovl_drop_write(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	mnt_drop_write(ofs->upper_mnt);
+}
+
+struct dentry *ovl_workdir(struct dentry *dentry)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	return ofs->workdir;
+}
+
+bool ovl_dentry_is_opaque(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	return oe->opaque;
+}
+
+void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+	oe->opaque = opaque;
+}
+
+void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+	WARN_ON(oe->__upperdentry);
+	BUG_ON(!upperdentry->d_inode);
+	/*
+	 * Make sure upperdentry is consistent before making it visible to
+	 * ovl_upperdentry_dereference().
+	 */
+	smp_wmb();
+	oe->__upperdentry = upperdentry;
+}
+
+void ovl_dentry_version_inc(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	oe->version++;
+}
+
+u64 ovl_dentry_version_get(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+	return oe->version;
+}
+
+bool ovl_is_whiteout(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+
+	return inode && IS_WHITEOUT(inode);
+}
+
+static bool ovl_is_opaquedir(struct dentry *dentry)
+{
+	int res;
+	char val;
+	struct inode *inode = dentry->d_inode;
+
+	if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr)
+		return false;
+
+	res = inode->i_op->getxattr(dentry, ovl_opaque_xattr, &val, 1);
+	if (res == 1 && val == 'y')
+		return true;
+
+	return false;
+}
+
+static void ovl_dentry_release(struct dentry *dentry)
+{
+	struct ovl_entry *oe = dentry->d_fsdata;
+
+	if (oe) {
+		dput(oe->__upperdentry);
+		dput(oe->lowerdentry);
+		kfree_rcu(oe, rcu);
+	}
+}
+
+static const struct dentry_operations ovl_dentry_operations = {
+	.d_release = ovl_dentry_release,
+};
+
+static struct ovl_entry *ovl_alloc_entry(void)
+{
+	return kzalloc(sizeof(struct ovl_entry), GFP_KERNEL);
+}
+
+static inline struct dentry *ovl_lookup_real(struct dentry *dir,
+					     struct qstr *name)
+{
+	struct dentry *dentry;
+
+	mutex_lock(&dir->d_inode->i_mutex);
+	dentry = lookup_one_len(name->name, dir, name->len);
+	mutex_unlock(&dir->d_inode->i_mutex);
+
+	if (IS_ERR(dentry)) {
+		if (PTR_ERR(dentry) == -ENOENT)
+			dentry = NULL;
+	} else if (!dentry->d_inode) {
+		dput(dentry);
+		dentry = NULL;
+	}
+	return dentry;
+}
+
+struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
+			  unsigned int flags)
+{
+	struct ovl_entry *oe;
+	struct dentry *upperdir;
+	struct dentry *lowerdir;
+	struct dentry *upperdentry = NULL;
+	struct dentry *lowerdentry = NULL;
+	struct inode *inode = NULL;
+	int err;
+
+	err = -ENOMEM;
+	oe = ovl_alloc_entry();
+	if (!oe)
+		goto out;
+
+	upperdir = ovl_dentry_upper(dentry->d_parent);
+	lowerdir = ovl_dentry_lower(dentry->d_parent);
+
+	if (upperdir) {
+		upperdentry = ovl_lookup_real(upperdir, &dentry->d_name);
+		err = PTR_ERR(upperdentry);
+		if (IS_ERR(upperdentry))
+			goto out_put_dir;
+
+		if (lowerdir && upperdentry) {
+			if (ovl_is_whiteout(upperdentry)) {
+				dput(upperdentry);
+				upperdentry = NULL;
+				oe->opaque = true;
+			} else if (ovl_is_opaquedir(upperdentry)) {
+				oe->opaque = true;
+			}
+		}
+	}
+	if (lowerdir && !oe->opaque) {
+		lowerdentry = ovl_lookup_real(lowerdir, &dentry->d_name);
+		err = PTR_ERR(lowerdentry);
+		if (IS_ERR(lowerdentry))
+			goto out_dput_upper;
+	}
+
+	if (lowerdentry && upperdentry &&
+	    (!S_ISDIR(upperdentry->d_inode->i_mode) ||
+	     !S_ISDIR(lowerdentry->d_inode->i_mode))) {
+		dput(lowerdentry);
+		lowerdentry = NULL;
+		oe->opaque = true;
+	}
+
+	if (lowerdentry || upperdentry) {
+		struct dentry *realdentry;
+
+		realdentry = upperdentry ? upperdentry : lowerdentry;
+		err = -ENOMEM;
+		inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode,
+				      oe);
+		if (!inode)
+			goto out_dput;
+		ovl_copyattr(realdentry->d_inode, inode);
+	}
+
+	oe->__upperdentry = upperdentry;
+	oe->lowerdentry = lowerdentry;
+
+	dentry->d_fsdata = oe;
+	d_add(dentry, inode);
+
+	return NULL;
+
+out_dput:
+	dput(lowerdentry);
+out_dput_upper:
+	dput(upperdentry);
+out_put_dir:
+	kfree(oe);
+out:
+	return ERR_PTR(err);
+}
+
+struct file *ovl_path_open(struct path *path, int flags)
+{
+	return dentry_open(path, flags, current_cred());
+}
+
+static void ovl_put_super(struct super_block *sb)
+{
+	struct ovl_fs *ufs = sb->s_fs_info;
+
+	dput(ufs->workdir);
+	mntput(ufs->upper_mnt);
+	mntput(ufs->lower_mnt);
+
+	kfree(ufs->config.lowerdir);
+	kfree(ufs->config.upperdir);
+	kfree(ufs->config.workdir);
+	kfree(ufs);
+}
+
+/**
+ * ovl_statfs
+ * @sb: The overlayfs super block
+ * @buf: The struct kstatfs to fill in with stats
+ *
+ * Get the filesystem statistics.  As writes always target the upper layer
+ * filesystem pass the statfs to the same filesystem.
+ */
+static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
+	struct dentry *root_dentry = dentry->d_sb->s_root;
+	struct path path;
+	int err;
+
+	ovl_path_upper(root_dentry, &path);
+
+	err = vfs_statfs(&path, buf);
+	if (!err) {
+		buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen);
+		buf->f_type = OVERLAYFS_SUPER_MAGIC;
+	}
+
+	return err;
+}
+
+/**
+ * ovl_show_options
+ *
+ * Prints the mount options for a given superblock.
+ * Returns zero; does not fail.
+ */
+static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ovl_fs *ufs = sb->s_fs_info;
+
+	seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
+	seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
+	seq_printf(m, ",workdir=%s", ufs->config.workdir);
+	return 0;
+}
+
+static const struct super_operations ovl_super_operations = {
+	.put_super	= ovl_put_super,
+	.statfs		= ovl_statfs,
+	.show_options	= ovl_show_options,
+};
+
+enum {
+	OPT_LOWERDIR,
+	OPT_UPPERDIR,
+	OPT_WORKDIR,
+	OPT_ERR,
+};
+
+static const match_table_t ovl_tokens = {
+	{OPT_LOWERDIR,			"lowerdir=%s"},
+	{OPT_UPPERDIR,			"upperdir=%s"},
+	{OPT_WORKDIR,			"workdir=%s"},
+	{OPT_ERR,			NULL}
+};
+
+static char *ovl_next_opt(char **s)
+{
+	char *sbegin = *s;
+	char *p;
+
+	if (sbegin == NULL)
+		return NULL;
+
+	for (p = sbegin; *p; p++) {
+		if (*p == '\\') {
+			p++;
+			if (!*p)
+				break;
+		} else if (*p == ',') {
+			*p = '\0';
+			*s = p + 1;
+			return sbegin;
+		}
+	}
+	*s = NULL;
+	return sbegin;
+}
+
+static int ovl_parse_opt(char *opt, struct ovl_config *config)
+{
+	char *p;
+
+	while ((p = ovl_next_opt(&opt)) != NULL) {
+		int token;
+		substring_t args[MAX_OPT_ARGS];
+
+		if (!*p)
+			continue;
+
+		token = match_token(p, ovl_tokens, args);
+		switch (token) {
+		case OPT_UPPERDIR:
+			kfree(config->upperdir);
+			config->upperdir = match_strdup(&args[0]);
+			if (!config->upperdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_LOWERDIR:
+			kfree(config->lowerdir);
+			config->lowerdir = match_strdup(&args[0]);
+			if (!config->lowerdir)
+				return -ENOMEM;
+			break;
+
+		case OPT_WORKDIR:
+			kfree(config->workdir);
+			config->workdir = match_strdup(&args[0]);
+			if (!config->workdir)
+				return -ENOMEM;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+#define OVL_WORKDIR_NAME "work"
+
+static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
+					 struct dentry *dentry)
+{
+	struct inode *dir = dentry->d_inode;
+	struct dentry *work;
+	int err;
+	bool retried = false;
+
+	err = mnt_want_write(mnt);
+	if (err)
+		return ERR_PTR(err);
+
+	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+retry:
+	work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
+			      strlen(OVL_WORKDIR_NAME));
+
+	if (!IS_ERR(work)) {
+		struct kstat stat = {
+			.mode = S_IFDIR | 0,
+		};
+
+		if (work->d_inode) {
+			err = -EEXIST;
+			if (retried)
+				goto out_dput;
+
+			retried = true;
+			ovl_cleanup(dir, work);
+			dput(work);
+			goto retry;
+		}
+
+		err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
+		if (err)
+			goto out_dput;
+	}
+out_unlock:
+	mutex_unlock(&dir->i_mutex);
+	mnt_drop_write(mnt);
+
+	return work;
+
+out_dput:
+	dput(work);
+	work = ERR_PTR(err);
+	goto out_unlock;
+}
+
+static void ovl_unescape(char *s)
+{
+	char *d = s;
+
+	for (;; s++, d++) {
+		if (*s == '\\')
+			s++;
+		*d = *s;
+		if (!*s)
+			break;
+	}
+}
+
+static int ovl_mount_dir(const char *name, struct path *path)
+{
+	int err;
+	char *tmp = kstrdup(name, GFP_KERNEL);
+
+	if (!tmp)
+		return -ENOMEM;
+
+	ovl_unescape(tmp);
+	err = kern_path(tmp, LOOKUP_FOLLOW, path);
+	if (err) {
+		pr_err("overlayfs: failed to resolve '%s': %i\n", tmp, err);
+		err = -EINVAL;
+	}
+	kfree(tmp);
+	return err;
+}
+
+static bool ovl_is_allowed_fs_type(struct dentry *root)
+{
+	const struct dentry_operations *dop = root->d_op;
+
+	/*
+	 * We don't support:
+	 *  - automount filesystems
+	 *  - filesystems with revalidate (FIXME for lower layer)
+	 *  - filesystems with case insensitive names
+	 */
+	if (dop &&
+	    (dop->d_manage || dop->d_automount ||
+	     dop->d_revalidate || dop->d_weak_revalidate ||
+	     dop->d_compare || dop->d_hash)) {
+		return false;
+	}
+	return true;
+}
+
+/* Workdir should not be subdir of upperdir and vice versa */
+static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
+{
+	bool ok = false;
+
+	if (workdir != upperdir) {
+		ok = (lock_rename(workdir, upperdir) == NULL);
+		unlock_rename(workdir, upperdir);
+	}
+	return ok;
+}
+
+static int ovl_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct path lowerpath;
+	struct path upperpath;
+	struct path workpath;
+	struct inode *root_inode;
+	struct dentry *root_dentry;
+	struct ovl_entry *oe;
+	struct ovl_fs *ufs;
+	struct kstatfs statfs;
+	int err;
+
+	err = -ENOMEM;
+	ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
+	if (!ufs)
+		goto out;
+
+	err = ovl_parse_opt((char *) data, &ufs->config);
+	if (err)
+		goto out_free_config;
+
+	/* FIXME: workdir is not needed for a R/O mount */
+	err = -EINVAL;
+	if (!ufs->config.upperdir || !ufs->config.lowerdir ||
+	    !ufs->config.workdir) {
+		pr_err("overlayfs: missing upperdir or lowerdir or workdir\n");
+		goto out_free_config;
+	}
+
+	err = -ENOMEM;
+	oe = ovl_alloc_entry();
+	if (oe == NULL)
+		goto out_free_config;
+
+	err = ovl_mount_dir(ufs->config.upperdir, &upperpath);
+	if (err)
+		goto out_free_oe;
+
+	err = ovl_mount_dir(ufs->config.lowerdir, &lowerpath);
+	if (err)
+		goto out_put_upperpath;
+
+	err = ovl_mount_dir(ufs->config.workdir, &workpath);
+	if (err)
+		goto out_put_lowerpath;
+
+	err = -EINVAL;
+	if (!S_ISDIR(upperpath.dentry->d_inode->i_mode) ||
+	    !S_ISDIR(lowerpath.dentry->d_inode->i_mode) ||
+	    !S_ISDIR(workpath.dentry->d_inode->i_mode)) {
+		pr_err("overlayfs: upperdir or lowerdir or workdir not a directory\n");
+		goto out_put_workpath;
+	}
+
+	if (upperpath.mnt != workpath.mnt) {
+		pr_err("overlayfs: workdir and upperdir must reside under the same mount\n");
+		goto out_put_workpath;
+	}
+	if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) {
+		pr_err("overlayfs: workdir and upperdir must be separate subtrees\n");
+		goto out_put_workpath;
+	}
+
+	if (!ovl_is_allowed_fs_type(upperpath.dentry)) {
+		pr_err("overlayfs: filesystem of upperdir is not supported\n");
+		goto out_put_workpath;
+	}
+
+	if (!ovl_is_allowed_fs_type(lowerpath.dentry)) {
+		pr_err("overlayfs: filesystem of lowerdir is not supported\n");
+		goto out_put_workpath;
+	}
+
+	err = vfs_statfs(&lowerpath, &statfs);
+	if (err) {
+		pr_err("overlayfs: statfs failed on lowerpath\n");
+		goto out_put_workpath;
+	}
+	ufs->lower_namelen = statfs.f_namelen;
+
+	sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
+				lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
+
+	err = -EINVAL;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+		goto out_put_workpath;
+	}
+
+	ufs->upper_mnt = clone_private_mount(&upperpath);
+	err = PTR_ERR(ufs->upper_mnt);
+	if (IS_ERR(ufs->upper_mnt)) {
+		pr_err("overlayfs: failed to clone upperpath\n");
+		goto out_put_workpath;
+	}
+
+	ufs->lower_mnt = clone_private_mount(&lowerpath);
+	err = PTR_ERR(ufs->lower_mnt);
+	if (IS_ERR(ufs->lower_mnt)) {
+		pr_err("overlayfs: failed to clone lowerpath\n");
+		goto out_put_upper_mnt;
+	}
+
+	ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
+	err = PTR_ERR(ufs->workdir);
+	if (IS_ERR(ufs->workdir)) {
+		pr_err("overlayfs: failed to create directory %s/%s\n",
+		       ufs->config.workdir, OVL_WORKDIR_NAME);
+		goto out_put_lower_mnt;
+	}
+
+	/*
+	 * Make lower_mnt R/O.  That way fchmod/fchown on lower file
+	 * will fail instead of modifying lower fs.
+	 */
+	ufs->lower_mnt->mnt_flags |= MNT_READONLY;
+
+	/* If the upper fs is r/o, we mark overlayfs r/o too */
+	if (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)
+		sb->s_flags |= MS_RDONLY;
+
+	sb->s_d_op = &ovl_dentry_operations;
+
+	err = -ENOMEM;
+	root_inode = ovl_new_inode(sb, S_IFDIR, oe);
+	if (!root_inode)
+		goto out_put_workdir;
+
+	root_dentry = d_make_root(root_inode);
+	if (!root_dentry)
+		goto out_put_workdir;
+
+	mntput(upperpath.mnt);
+	mntput(lowerpath.mnt);
+	path_put(&workpath);
+
+	oe->__upperdentry = upperpath.dentry;
+	oe->lowerdentry = lowerpath.dentry;
+
+	root_dentry->d_fsdata = oe;
+
+	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+	sb->s_op = &ovl_super_operations;
+	sb->s_root = root_dentry;
+	sb->s_fs_info = ufs;
+
+	return 0;
+
+out_put_workdir:
+	dput(ufs->workdir);
+out_put_lower_mnt:
+	mntput(ufs->lower_mnt);
+out_put_upper_mnt:
+	mntput(ufs->upper_mnt);
+out_put_workpath:
+	path_put(&workpath);
+out_put_lowerpath:
+	path_put(&lowerpath);
+out_put_upperpath:
+	path_put(&upperpath);
+out_free_oe:
+	kfree(oe);
+out_free_config:
+	kfree(ufs->config.lowerdir);
+	kfree(ufs->config.upperdir);
+	kfree(ufs->config.workdir);
+	kfree(ufs);
+out:
+	return err;
+}
+
+static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *raw_data)
+{
+	return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
+}
+
+static struct file_system_type ovl_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "overlay",
+	.mount		= ovl_mount,
+	.kill_sb	= kill_anon_super,
+};
+MODULE_ALIAS_FS("overlay");
+
+static int __init ovl_init(void)
+{
+	return register_filesystem(&ovl_fs_type);
+}
+
+static void __exit ovl_exit(void)
+{
+	unregister_filesystem(&ovl_fs_type);
+}
+
+module_init(ovl_init);
+module_exit(ovl_exit);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cd3653e..bd117d0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -157,20 +157,29 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
 	int g;
-	struct fdtable *fdt = NULL;
+	struct task_struct *tracer;
 	const struct cred *cred;
-	pid_t ppid, tpid;
+	pid_t ppid, tpid = 0, tgid, ngid;
+	unsigned int max_fds = 0;
 
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-	tpid = 0;
-	if (pid_alive(p)) {
-		struct task_struct *tracer = ptrace_parent(p);
-		if (tracer)
-			tpid = task_pid_nr_ns(tracer, ns);
-	}
+
+	tracer = ptrace_parent(p);
+	if (tracer)
+		tpid = task_pid_nr_ns(tracer, ns);
+
+	tgid = task_tgid_nr_ns(p, ns);
+	ngid = task_numa_group_id(p);
 	cred = get_task_cred(p);
+
+	task_lock(p);
+	if (p->files)
+		max_fds = files_fdtable(p->files)->max_fds;
+	task_unlock(p);
+	rcu_read_unlock();
+
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -179,12 +188,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
 		"Uid:\t%d\t%d\t%d\t%d\n"
-		"Gid:\t%d\t%d\t%d\t%d\n",
+		"Gid:\t%d\t%d\t%d\t%d\n"
+		"FDSize:\t%d\nGroups:\t",
 		get_task_state(p),
-		task_tgid_nr_ns(p, ns),
-		task_numa_group_id(p),
-		pid_nr_ns(pid, ns),
-		ppid, tpid,
+		tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
 		from_kuid_munged(user_ns, cred->uid),
 		from_kuid_munged(user_ns, cred->euid),
 		from_kuid_munged(user_ns, cred->suid),
@@ -192,20 +199,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		from_kgid_munged(user_ns, cred->gid),
 		from_kgid_munged(user_ns, cred->egid),
 		from_kgid_munged(user_ns, cred->sgid),
-		from_kgid_munged(user_ns, cred->fsgid));
-
-	task_lock(p);
-	if (p->files)
-		fdt = files_fdtable(p->files);
-	seq_printf(m,
-		"FDSize:\t%d\n"
-		"Groups:\t",
-		fdt ? fdt->max_fds : 0);
-	rcu_read_unlock();
+		from_kgid_munged(user_ns, cred->fsgid),
+		max_fds);
 
 	group_info = cred->group_info;
-	task_unlock(p);
-
 	for (g = 0; g < group_info->ngroups; g++)
 		seq_printf(m, "%d ",
 			   from_kgid_munged(user_ns, GROUP_AT(group_info, g)));
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 772efa4..590aeda 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2618,6 +2618,9 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		dput(dentry);
 	}
 
+	if (pid == tgid)
+		return;
+
 	name.name = buf;
 	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);
@@ -2789,7 +2792,7 @@ retry:
 int proc_pid_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct tgid_iter iter;
-	struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info;
+	struct pid_namespace *ns = file_inode(file)->i_sb->s_fs_info;
 	loff_t pos = ctx->pos;
 
 	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
@@ -3095,7 +3098,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 	/* f_version caches the tgid value that the last readdir call couldn't
 	 * return. lseek aka telldir automagically resets f_version to 0.
 	 */
-	ns = file->f_dentry->d_sb->s_fs_info;
+	ns = inode->i_sb->s_fs_info;
 	tid = (int)file->f_version;
 	file->f_version = 0;
 	for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index e11d7c5..8e5ad83 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -53,7 +53,8 @@ static int seq_show(struct seq_file *m, void *v)
 			   (long long)file->f_pos, f_flags,
 			   real_mount(file->f_path.mnt)->mnt_id);
 		if (file->f_op->show_fdinfo)
-			ret = file->f_op->show_fdinfo(m, file);
+			file->f_op->show_fdinfo(m, file);
+		ret = seq_has_overflowed(m);
 		fput(file);
 	}
 
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 317b726..7fea132 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -31,9 +31,73 @@ static DEFINE_SPINLOCK(proc_subdir_lock);
 
 static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
 {
-	if (de->namelen != len)
-		return 0;
-	return !memcmp(name, de->name, len);
+	if (len < de->namelen)
+		return -1;
+	if (len > de->namelen)
+		return 1;
+
+	return memcmp(name, de->name, len);
+}
+
+static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
+{
+	return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+			     subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
+{
+	return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry,
+			     subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
+					      const char *name,
+					      unsigned int len)
+{
+	struct rb_node *node = dir->subdir.rb_node;
+
+	while (node) {
+		struct proc_dir_entry *de = container_of(node,
+							 struct proc_dir_entry,
+							 subdir_node);
+		int result = proc_match(len, name, de);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return de;
+	}
+	return NULL;
+}
+
+static bool pde_subdir_insert(struct proc_dir_entry *dir,
+			      struct proc_dir_entry *de)
+{
+	struct rb_root *root = &dir->subdir;
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct proc_dir_entry *this =
+			container_of(*new, struct proc_dir_entry, subdir_node);
+		int result = proc_match(de->namelen, de->name, this);
+
+		parent = *new;
+		if (result < 0)
+			new = &(*new)->rb_left;
+		else if (result > 0)
+			new = &(*new)->rb_right;
+		else
+			return false;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&de->subdir_node, parent, new);
+	rb_insert_color(&de->subdir_node, root);
+	return true;
 }
 
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -92,10 +156,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 			break;
 
 		len = next - cp;
-		for (de = de->subdir; de ; de = de->next) {
-			if (proc_match(len, cp, de))
-				break;
-		}
+		de = pde_subdir_find(de, cp, len);
 		if (!de) {
 			WARN(1, "name '%s'\n", name);
 			return -ENOENT;
@@ -183,19 +244,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 	struct inode *inode;
 
 	spin_lock(&proc_subdir_lock);
-	for (de = de->subdir; de ; de = de->next) {
-		if (de->namelen != dentry->d_name.len)
-			continue;
-		if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-			pde_get(de);
-			spin_unlock(&proc_subdir_lock);
-			inode = proc_get_inode(dir->i_sb, de);
-			if (!inode)
-				return ERR_PTR(-ENOMEM);
-			d_set_d_op(dentry, &simple_dentry_operations);
-			d_add(dentry, inode);
-			return NULL;
-		}
+	de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
+	if (de) {
+		pde_get(de);
+		spin_unlock(&proc_subdir_lock);
+		inode = proc_get_inode(dir->i_sb, de);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+		d_set_d_op(dentry, &simple_dentry_operations);
+		d_add(dentry, inode);
+		return NULL;
 	}
 	spin_unlock(&proc_subdir_lock);
 	return ERR_PTR(-ENOENT);
@@ -225,7 +283,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		return 0;
 
 	spin_lock(&proc_subdir_lock);
-	de = de->subdir;
+	de = pde_subdir_first(de);
 	i = ctx->pos - 2;
 	for (;;) {
 		if (!de) {
@@ -234,7 +292,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		if (!i)
 			break;
-		de = de->next;
+		de = pde_subdir_next(de);
 		i--;
 	}
 
@@ -249,7 +307,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		spin_lock(&proc_subdir_lock);
 		ctx->pos++;
-		next = de->next;
+		next = pde_subdir_next(de);
 		pde_put(de);
 		de = next;
 	} while (de);
@@ -286,9 +344,8 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	struct proc_dir_entry *tmp;
 	int ret;
-	
+
 	ret = proc_alloc_inum(&dp->low_ino);
 	if (ret)
 		return ret;
@@ -304,21 +361,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		dp->proc_iops = &proc_file_inode_operations;
 	} else {
 		WARN_ON(1);
+		proc_free_inum(dp->low_ino);
 		return -EINVAL;
 	}
 
 	spin_lock(&proc_subdir_lock);
-
-	for (tmp = dir->subdir; tmp; tmp = tmp->next)
-		if (strcmp(tmp->name, dp->name) == 0) {
-			WARN(1, "proc_dir_entry '%s/%s' already registered\n",
-				dir->name, dp->name);
-			break;
-		}
-
-	dp->next = dir->subdir;
 	dp->parent = dir;
-	dir->subdir = dp;
+	if (pde_subdir_insert(dir, dp) == false) {
+		WARN(1, "proc_dir_entry '%s/%s' already registered\n",
+		     dir->name, dp->name);
+		spin_unlock(&proc_subdir_lock);
+		if (S_ISDIR(dp->mode))
+			dir->nlink--;
+		proc_free_inum(dp->low_ino);
+		return -EEXIST;
+	}
 	spin_unlock(&proc_subdir_lock);
 
 	return 0;
@@ -354,6 +411,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->namelen = qstr.len;
 	ent->mode = mode;
 	ent->nlink = nlink;
+	ent->subdir = RB_ROOT;
 	atomic_set(&ent->count, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
@@ -485,7 +543,6 @@ void pde_put(struct proc_dir_entry *pde)
  */
 void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *de = NULL;
 	const char *fn = name;
 	unsigned int len;
@@ -497,14 +554,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			de = *p;
-			*p = de->next;
-			de->next = NULL;
-			break;
-		}
-	}
+	de = pde_subdir_find(parent, fn, len);
+	if (de)
+		rb_erase(&de->subdir_node, &parent->subdir);
 	spin_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -516,16 +568,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
-	WARN(de->subdir, "%s: removing non-empty directory "
-			 "'%s/%s', leaking at least '%s'\n", __func__,
-			 de->parent->name, de->name, de->subdir->name);
+	WARN(pde_subdir_first(de),
+	     "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
+	     __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
 	pde_put(de);
 }
 EXPORT_SYMBOL(remove_proc_entry);
 
 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *root = NULL, *de, *next;
 	const char *fn = name;
 	unsigned int len;
@@ -537,24 +588,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			root = *p;
-			*p = root->next;
-			root->next = NULL;
-			break;
-		}
-	}
+	root = pde_subdir_find(parent, fn, len);
 	if (!root) {
 		spin_unlock(&proc_subdir_lock);
 		return -ENOENT;
 	}
+	rb_erase(&root->subdir_node, &parent->subdir);
+
 	de = root;
 	while (1) {
-		next = de->subdir;
+		next = pde_subdir_first(de);
 		if (next) {
-			de->subdir = next->next;
-			next->next = NULL;
+			rb_erase(&next->subdir_node, &de->subdir);
 			de = next;
 			continue;
 		}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa7a0ee..7fb1a48 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -24,10 +24,9 @@ struct mempolicy;
  * tree) of these proc_dir_entries, so that we can dynamically
  * add new files to /proc.
  *
- * The "next" pointer creates a linked list of one /proc directory,
- * while parent/subdir create the directory structure (every
- * /proc file has a parent, but "subdir" is NULL for all
- * non-directory entries).
+ * parent/subdir are used for the directory structure (every /proc file has a
+ * parent, but "subdir" is empty for all non-directory entries).
+ * subdir_node is used to build the rb tree "subdir" of the parent.
  */
 struct proc_dir_entry {
 	unsigned int low_ino;
@@ -38,7 +37,9 @@ struct proc_dir_entry {
 	loff_t size;
 	const struct inode_operations *proc_iops;
 	const struct file_operations *proc_fops;
-	struct proc_dir_entry *next, *parent, *subdir;
+	struct proc_dir_entry *parent;
+	struct rb_root subdir;
+	struct rb_node subdir_node;
 	void *data;
 	atomic_t count;		/* use count */
 	atomic_t in_use;	/* number of callers into module in progress; */
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a63af3e..1bde894 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net)
 	if (!netd)
 		goto out;
 
+	netd->subdir = RB_ROOT;
 	netd->data = net;
 	netd->nlink = 2;
 	netd->namelen = 3;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 094e44d..e74ac9f 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = {
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
+	.subdir		= RB_ROOT,
 	.name		= "/proc",
 };
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0388c..246eae8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -447,59 +447,92 @@ struct mem_size_stats {
 	u64 pss;
 };
 
+static void smaps_account(struct mem_size_stats *mss, struct page *page,
+		unsigned long size, bool young, bool dirty)
+{
+	int mapcount;
+
+	if (PageAnon(page))
+		mss->anonymous += size;
 
-static void smaps_pte_entry(pte_t ptent, unsigned long addr,
-		unsigned long ptent_size, struct mm_walk *walk)
+	mss->resident += size;
+	/* Accumulate the size in pages that have been accessed. */
+	if (young || PageReferenced(page))
+		mss->referenced += size;
+	mapcount = page_mapcount(page);
+	if (mapcount >= 2) {
+		u64 pss_delta;
+
+		if (dirty || PageDirty(page))
+			mss->shared_dirty += size;
+		else
+			mss->shared_clean += size;
+		pss_delta = (u64)size << PSS_SHIFT;
+		do_div(pss_delta, mapcount);
+		mss->pss += pss_delta;
+	} else {
+		if (dirty || PageDirty(page))
+			mss->private_dirty += size;
+		else
+			mss->private_clean += size;
+		mss->pss += (u64)size << PSS_SHIFT;
+	}
+}
+
+static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+		struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
 	pgoff_t pgoff = linear_page_index(vma, addr);
 	struct page *page = NULL;
-	int mapcount;
 
-	if (pte_present(ptent)) {
-		page = vm_normal_page(vma, addr, ptent);
-	} else if (is_swap_pte(ptent)) {
-		swp_entry_t swpent = pte_to_swp_entry(ptent);
+	if (pte_present(*pte)) {
+		page = vm_normal_page(vma, addr, *pte);
+	} else if (is_swap_pte(*pte)) {
+		swp_entry_t swpent = pte_to_swp_entry(*pte);
 
 		if (!non_swap_entry(swpent))
-			mss->swap += ptent_size;
+			mss->swap += PAGE_SIZE;
 		else if (is_migration_entry(swpent))
 			page = migration_entry_to_page(swpent);
-	} else if (pte_file(ptent)) {
-		if (pte_to_pgoff(ptent) != pgoff)
-			mss->nonlinear += ptent_size;
+	} else if (pte_file(*pte)) {
+		if (pte_to_pgoff(*pte) != pgoff)
+			mss->nonlinear += PAGE_SIZE;
 	}
 
 	if (!page)
 		return;
 
-	if (PageAnon(page))
-		mss->anonymous += ptent_size;
-
 	if (page->index != pgoff)
-		mss->nonlinear += ptent_size;
+		mss->nonlinear += PAGE_SIZE;
 
-	mss->resident += ptent_size;
-	/* Accumulate the size in pages that have been accessed. */
-	if (pte_young(ptent) || PageReferenced(page))
-		mss->referenced += ptent_size;
-	mapcount = page_mapcount(page);
-	if (mapcount >= 2) {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->shared_dirty += ptent_size;
-		else
-			mss->shared_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
-	} else {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->private_dirty += ptent_size;
-		else
-			mss->private_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT);
-	}
+	smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+	struct mem_size_stats *mss = walk->private;
+	struct vm_area_struct *vma = mss->vma;
+	struct page *page;
+
+	/* FOLL_DUMP will return -EFAULT on huge zero page */
+	page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+	if (IS_ERR_OR_NULL(page))
+		return;
+	mss->anonymous_thp += HPAGE_PMD_SIZE;
+	smaps_account(mss, page, HPAGE_PMD_SIZE,
+			pmd_young(*pmd), pmd_dirty(*pmd));
+}
+#else
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+}
+#endif
+
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			   struct mm_walk *walk)
 {
@@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	spinlock_t *ptl;
 
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
+		smaps_pmd_entry(pmd, addr, walk);
 		spin_unlock(ptl);
-		mss->anonymous_thp += HPAGE_PMD_SIZE;
 		return 0;
 	}
 
@@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	 */
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
-		smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
+		smaps_pte_entry(pte, addr, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
 	return 0;
@@ -552,6 +584,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_GROWSDOWN)]	= "gd",
 		[ilog2(VM_PFNMAP)]	= "pf",
 		[ilog2(VM_DENYWRITE)]	= "dw",
+#ifdef CONFIG_X86_INTEL_MPX
+		[ilog2(VM_MPX)]		= "mp",
+#endif
 		[ilog2(VM_LOCKED)]	= "lo",
 		[ilog2(VM_IO)]		= "io",
 		[ilog2(VM_SEQ_READ)]	= "sr",
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index fafb7a0..5041660 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
+#include <linux/syslog.h>
 
 #include "internal.h"
 
@@ -120,6 +121,18 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
 	.show	= pstore_ftrace_seq_show,
 };
 
+static int pstore_check_syslog_permissions(struct pstore_private *ps)
+{
+	switch (ps->type) {
+	case PSTORE_TYPE_DMESG:
+	case PSTORE_TYPE_CONSOLE:
+		return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
+			SYSLOG_FROM_READER);
+	default:
+		return 0;
+	}
+}
+
 static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
 						size_t count, loff_t *ppos)
 {
@@ -138,6 +151,10 @@ static int pstore_file_open(struct inode *inode, struct file *file)
 	int err;
 	const struct seq_operations *sops = NULL;
 
+	err = pstore_check_syslog_permissions(ps);
+	if (err)
+		return err;
+
 	if (ps->type == PSTORE_TYPE_FTRACE)
 		sops = &pstore_ftrace_seq_ops;
 
@@ -174,6 +191,11 @@ static const struct file_operations pstore_file_operations = {
 static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct pstore_private *p = dentry->d_inode->i_private;
+	int err;
+
+	err = pstore_check_syslog_permissions(p);
+	if (err)
+		return err;
 
 	if (p->psi->erase)
 		p->psi->erase(p->type, p->id, p->count,
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 3b57443..ec881b3 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -135,25 +135,27 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
 	return prz;
 }
 
-static void ramoops_read_kmsg_hdr(char *buffer, struct timespec *time,
+static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time,
 				  bool *compressed)
 {
 	char data_type;
+	int header_length = 0;
 
-	if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n",
-			&time->tv_sec, &time->tv_nsec, &data_type) == 3) {
+	if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu-%c\n%n", &time->tv_sec,
+			&time->tv_nsec, &data_type, &header_length) == 3) {
 		if (data_type == 'C')
 			*compressed = true;
 		else
 			*compressed = false;
-	} else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n",
-			&time->tv_sec, &time->tv_nsec) == 2) {
+	} else if (sscanf(buffer, RAMOOPS_KERNMSG_HDR "%lu.%lu\n%n",
+			&time->tv_sec, &time->tv_nsec, &header_length) == 2) {
 			*compressed = false;
 	} else {
 		time->tv_sec = 0;
 		time->tv_nsec = 0;
 		*compressed = false;
 	}
+	return header_length;
 }
 
 static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
@@ -165,6 +167,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	ssize_t ecc_notice_size;
 	struct ramoops_context *cxt = psi->data;
 	struct persistent_ram_zone *prz;
+	int header_length;
 
 	prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt,
 				   cxt->max_dump_cnt, id, type,
@@ -178,7 +181,13 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	if (!prz)
 		return 0;
 
+	if (!persistent_ram_old(prz))
+		return 0;
+
 	size = persistent_ram_old_size(prz);
+	header_length = ramoops_read_kmsg_hdr(persistent_ram_old(prz), time,
+			compressed);
+	size -= header_length;
 
 	/* ECC correction notice */
 	ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
@@ -187,8 +196,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	if (*buf == NULL)
 		return -ENOMEM;
 
-	memcpy(*buf, persistent_ram_old(prz), size);
-	ramoops_read_kmsg_hdr(*buf, time, compressed);
+	memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size);
 	persistent_ram_ecc_string(prz, *buf + size, ecc_notice_size + 1);
 
 	return size + ecc_notice_size;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 8b663b2..8f0acef 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -634,7 +634,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
 			dqstats_inc(DQST_LOOKUPS);
 			err = sb->dq_op->write_dquot(dquot);
 			if (!ret && err)
-				err = ret;
+				ret = err;
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
 		}
@@ -893,6 +893,11 @@ out:
 }
 EXPORT_SYMBOL(dqget);
 
+static inline struct dquot **i_dquot(struct inode *inode)
+{
+	return inode->i_sb->s_op->get_dquots(inode);
+}
+
 static int dqinit_needed(struct inode *inode, int type)
 {
 	int cnt;
@@ -900,9 +905,9 @@ static int dqinit_needed(struct inode *inode, int type)
 	if (IS_NOQUOTA(inode))
 		return 0;
 	if (type != -1)
-		return !inode->i_dquot[type];
+		return !i_dquot(inode)[type];
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (!inode->i_dquot[cnt])
+		if (!i_dquot(inode)[cnt])
 			return 1;
 	return 0;
 }
@@ -965,9 +970,9 @@ static void add_dquot_ref(struct super_block *sb, int type)
 static void remove_inode_dquot_ref(struct inode *inode, int type,
 				   struct list_head *tofree_head)
 {
-	struct dquot *dquot = inode->i_dquot[type];
+	struct dquot *dquot = i_dquot(inode)[type];
 
-	inode->i_dquot[type] = NULL;
+	i_dquot(inode)[type] = NULL;
 	if (!dquot)
 		return;
 
@@ -1402,7 +1407,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 		 * we check it without locking here to avoid unnecessary
 		 * dqget()/dqput() calls.
 		 */
-		if (inode->i_dquot[cnt])
+		if (i_dquot(inode)[cnt])
 			continue;
 		init_needed = 1;
 
@@ -1433,8 +1438,8 @@ static void __dquot_initialize(struct inode *inode, int type)
 		/* We could race with quotaon or dqget() could have failed */
 		if (!got[cnt])
 			continue;
-		if (!inode->i_dquot[cnt]) {
-			inode->i_dquot[cnt] = got[cnt];
+		if (!i_dquot(inode)[cnt]) {
+			i_dquot(inode)[cnt] = got[cnt];
 			got[cnt] = NULL;
 			/*
 			 * Make quota reservation system happy if someone
@@ -1442,7 +1447,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 */
 			rsv = inode_get_rsv_space(inode);
 			if (unlikely(rsv))
-				dquot_resv_space(inode->i_dquot[cnt], rsv);
+				dquot_resv_space(i_dquot(inode)[cnt], rsv);
 		}
 	}
 out_err:
@@ -1472,8 +1477,8 @@ static void __dquot_drop(struct inode *inode)
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		put[cnt] = inode->i_dquot[cnt];
-		inode->i_dquot[cnt] = NULL;
+		put[cnt] = i_dquot(inode)[cnt];
+		i_dquot(inode)[cnt] = NULL;
 	}
 	spin_unlock(&dq_data_lock);
 	dqput_all(put);
@@ -1494,7 +1499,7 @@ void dquot_drop(struct inode *inode)
 	 * add quota pointers back anyway.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
+		if (i_dquot(inode)[cnt])
 			break;
 	}
 
@@ -1595,7 +1600,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = inode->i_dquot;
+	struct dquot **dquots = i_dquot(inode);
 	int reserve = flags & DQUOT_SPACE_RESERVE;
 
 	if (!dquot_active(inode)) {
@@ -1643,11 +1648,11 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode)
+int dquot_alloc_inode(struct inode *inode)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = inode->i_dquot;
+	struct dquot * const *dquots = i_dquot(inode);
 
 	if (!dquot_active(inode))
 		return 0;
@@ -1696,14 +1701,14 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_claim_reserved_space(inode->i_dquot[cnt],
+		if (i_dquot(inode)[cnt])
+			dquot_claim_reserved_space(i_dquot(inode)[cnt],
 							number);
 	}
 	/* Update inode bytes */
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(i_dquot(inode));
 	srcu_read_unlock(&dquot_srcu, index);
 	return 0;
 }
@@ -1725,14 +1730,14 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_reclaim_reserved_space(inode->i_dquot[cnt],
+		if (i_dquot(inode)[cnt])
+			dquot_reclaim_reserved_space(i_dquot(inode)[cnt],
 						     number);
 	}
 	/* Update inode bytes */
 	inode_reclaim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(i_dquot(inode));
 	srcu_read_unlock(&dquot_srcu, index);
 	return;
 }
@@ -1745,7 +1750,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = inode->i_dquot;
+	struct dquot **dquots = i_dquot(inode);
 	int reserve = flags & DQUOT_SPACE_RESERVE, index;
 
 	if (!dquot_active(inode)) {
@@ -1784,11 +1789,11 @@ EXPORT_SYMBOL(__dquot_free_space);
 /*
  * This operation can block, but only after everything is updated
  */
-void dquot_free_inode(const struct inode *inode)
+void dquot_free_inode(struct inode *inode)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = inode->i_dquot;
+	struct dquot * const *dquots = i_dquot(inode);
 	int index;
 
 	if (!dquot_active(inode))
@@ -1865,7 +1870,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		if (!sb_has_quota_active(inode->i_sb, cnt))
 			continue;
 		is_valid[cnt] = 1;
-		transfer_from[cnt] = inode->i_dquot[cnt];
+		transfer_from[cnt] = i_dquot(inode)[cnt];
 		ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
 		if (ret)
 			goto over_quota;
@@ -1901,7 +1906,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		dquot_incr_space(transfer_to[cnt], cur_space);
 		dquot_resv_space(transfer_to[cnt], rsv_space);
 
-		inode->i_dquot[cnt] = transfer_to[cnt];
+		i_dquot(inode)[cnt] = transfer_to[cnt];
 	}
 	spin_unlock(&dq_data_lock);
 
@@ -2743,8 +2748,8 @@ static int __init dquot_init(void)
 	for (i = 0; i < nr_hash; i++)
 		INIT_HLIST_HEAD(dquot_hash + i);
 
-	printk("Dquot-cache hash table entries: %ld (order %ld, %ld bytes)\n",
-			nr_hash, order, (PAGE_SIZE << order));
+	pr_info("VFS: Dquot-cache hash table entries: %ld (order %ld,"
+		" %ld bytes)\n", nr_hash, order, (PAGE_SIZE << order));
 
 	register_shrinker(&dqcache_shrinker);
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7562164..2aa4151 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -47,8 +47,11 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, *(int *)arg);
+	int type = *(int *)arg;
+
+	if (sb->s_qcop && sb->s_qcop->quota_sync &&
+	    (sb->s_quota_types & (1 << type)))
+		sb->s_qcop->quota_sync(sb, type);
 }
 
 static int quota_sync_all(int type)
@@ -297,8 +300,14 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
 	if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
 		return -EINVAL;
+	/*
+	 * Quota not supported on this fs? Check this before s_quota_types
+	 * since they needn't be set if quota is not supported at all.
+	 */
 	if (!sb->s_qcop)
 		return -ENOSYS;
+	if (!(sb->s_quota_types & (1 << type)))
+		return -EINVAL;
 
 	ret = check_quotactl_permission(sb, type, cmd, id);
 	if (ret < 0)
diff --git a/fs/readdir.c b/fs/readdir.c
index 33fd922..ced6791 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -74,10 +74,11 @@ struct readdir_callback {
 	int result;
 };
 
-static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
-		      u64 ino, unsigned int d_type)
+static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
+		      loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct readdir_callback *buf = (struct readdir_callback *) __buf;
+	struct readdir_callback *buf =
+		container_of(ctx, struct readdir_callback, ctx);
 	struct old_linux_dirent __user * dirent;
 	unsigned long d_ino;
 
@@ -148,11 +149,12 @@ struct getdents_callback {
 	int error;
 };
 
-static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
-		   u64 ino, unsigned int d_type)
+static int filldir(struct dir_context *ctx, const char *name, int namlen,
+		   loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct linux_dirent __user * dirent;
-	struct getdents_callback * buf = (struct getdents_callback *) __buf;
+	struct getdents_callback *buf =
+		container_of(ctx, struct getdents_callback, ctx);
 	unsigned long d_ino;
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
@@ -232,11 +234,12 @@ struct getdents_callback64 {
 	int error;
 };
 
-static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
-		     u64 ino, unsigned int d_type)
+static int filldir64(struct dir_context *ctx, const char *name, int namlen,
+		     loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct linux_dirent64 __user *dirent;
-	struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
+	struct getdents_callback64 *buf =
+		container_of(ctx, struct getdents_callback64, ctx);
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 1894d96..bb79cdd 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -97,6 +97,10 @@ struct reiserfs_inode_info {
 #ifdef CONFIG_REISERFS_FS_XATTR
 	struct rw_semaphore i_xattr_sem;
 #endif
+#ifdef CONFIG_QUOTA
+	struct dquot *i_dquot[MAXQUOTAS];
+#endif
+
 	struct inode vfs_inode;
 };
 
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f1376c9..ea63ab1 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -594,6 +594,10 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
 		return NULL;
 	atomic_set(&ei->openers, 0);
 	mutex_init(&ei->tailpack);
+#ifdef CONFIG_QUOTA
+	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
+#endif
+
 	return &ei->vfs_inode;
 }
 
@@ -750,6 +754,11 @@ static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
 				    size_t, loff_t);
 static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
 				   loff_t);
+
+static struct dquot **reiserfs_get_dquots(struct inode *inode)
+{
+	return REISERFS_I(inode)->i_dquot;
+}
 #endif
 
 static const struct super_operations reiserfs_sops = {
@@ -768,6 +777,7 @@ static const struct super_operations reiserfs_sops = {
 #ifdef CONFIG_QUOTA
 	.quota_read = reiserfs_quota_read,
 	.quota_write = reiserfs_quota_write,
+	.get_dquots = reiserfs_get_dquots,
 #endif
 };
 
@@ -1633,6 +1643,7 @@ static int read_super_block(struct super_block *s, int offset)
 #ifdef CONFIG_QUOTA
 	s->s_qcop = &reiserfs_qctl_operations;
 	s->dq_op = &reiserfs_quota_operations;
+	s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
 #endif
 
 	/*
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 7c36898..04b0614 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -188,10 +188,11 @@ struct reiserfs_dentry_buf {
 };
 
 static int
-fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
-		    u64 ino, unsigned int d_type)
+fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
+		   loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct reiserfs_dentry_buf *dbuf = buf;
+	struct reiserfs_dentry_buf *dbuf =
+		container_of(ctx, struct reiserfs_dentry_buf, ctx);
 	struct dentry *dentry;
 
 	WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
@@ -209,9 +210,9 @@ fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
 	} else if (!dentry->d_inode) {
 		/* A directory entry exists, but no file? */
 		reiserfs_error(dentry->d_sb, "xattr-20003",
-			       "Corrupted directory: xattr %s listed but "
-			       "not found for file %s.\n",
-			       dentry->d_name.name, dbuf->xadir->d_name.name);
+			       "Corrupted directory: xattr %pd listed but "
+			       "not found for file %pd.\n",
+			       dentry, dbuf->xadir);
 		dput(dentry);
 		return -EIO;
 	}
@@ -824,10 +825,12 @@ struct listxattr_buf {
 	struct dentry *dentry;
 };
 
-static int listxattr_filler(void *buf, const char *name, int namelen,
-			    loff_t offset, u64 ino, unsigned int d_type)
+static int listxattr_filler(struct dir_context *ctx, const char *name,
+			    int namelen, loff_t offset, u64 ino,
+			    unsigned int d_type)
 {
-	struct listxattr_buf *b = (struct listxattr_buf *)buf;
+	struct listxattr_buf *b =
+		container_of(ctx, struct listxattr_buf, ctx);
 	size_t size;
 
 	if (name[0] != '.' ||
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3857b72..353948b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -16,17 +16,6 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
-
-/*
- * seq_files have a buffer which can may overflow. When this happens a larger
- * buffer is reallocated and all the data will be printed again.
- * The overflow state is true when m->count == m->size.
- */
-static bool seq_overflow(struct seq_file *m)
-{
-	return m->count == m->size;
-}
-
 static void seq_set_overflow(struct seq_file *m)
 {
 	m->count = m->size;
@@ -124,7 +113,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 			error = 0;
 			m->count = 0;
 		}
-		if (seq_overflow(m))
+		if (seq_has_overflowed(m))
 			goto Eoverflow;
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
@@ -267,7 +256,7 @@ Fill:
 			break;
 		}
 		err = m->op->show(m, p);
-		if (seq_overflow(m) || err) {
+		if (seq_has_overflowed(m) || err) {
 			m->count = offs;
 			if (likely(err <= 0))
 				break;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 424b7b6..7e412ad 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -230,7 +230,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 }
 
 #ifdef CONFIG_PROC_FS
-static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void signalfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct signalfd_ctx *ctx = f->private_data;
 	sigset_t sigmask;
@@ -238,8 +238,6 @@ static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
 	sigmask = ctx->sigmask;
 	signotset(&sigmask);
 	render_sigset_t(m, "sigmask:\t", &sigmask);
-
-	return 0;
 }
 #endif
 
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba..75c6058 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 	return ret;
 }
+EXPORT_SYMBOL(do_splice_direct);
 
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 			       struct pipe_inode_info *opipe,
diff --git a/fs/sync.c b/fs/sync.c
index bdc729d..01d9f18 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -154,7 +154,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
 
 	if (!f.file)
 		return -EBADF;
-	sb = f.file->f_dentry->d_sb;
+	sb = f.file->f_path.dentry->d_sb;
 
 	down_read(&sb->s_umount);
 	ret = sync_filesystem(sb);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b46ffa9..b94fa6c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -288,7 +288,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 }
 
 #ifdef CONFIG_PROC_FS
-static int timerfd_show(struct seq_file *m, struct file *file)
+static void timerfd_show(struct seq_file *m, struct file *file)
 {
 	struct timerfd_ctx *ctx = file->private_data;
 	struct itimerspec t;
@@ -298,18 +298,19 @@ static int timerfd_show(struct seq_file *m, struct file *file)
 	t.it_interval = ktime_to_timespec(ctx->tintv);
 	spin_unlock_irq(&ctx->wqh.lock);
 
-	return seq_printf(m,
-			  "clockid: %d\n"
-			  "ticks: %llu\n"
-			  "settime flags: 0%o\n"
-			  "it_value: (%llu, %llu)\n"
-			  "it_interval: (%llu, %llu)\n",
-			  ctx->clockid, (unsigned long long)ctx->ticks,
-			  ctx->settime_flags,
-			  (unsigned long long)t.it_value.tv_sec,
-			  (unsigned long long)t.it_value.tv_nsec,
-			  (unsigned long long)t.it_interval.tv_sec,
-			  (unsigned long long)t.it_interval.tv_nsec);
+	seq_printf(m,
+		   "clockid: %d\n"
+		   "ticks: %llu\n"
+		   "settime flags: 0%o\n"
+		   "it_value: (%llu, %llu)\n"
+		   "it_interval: (%llu, %llu)\n",
+		   ctx->clockid,
+		   (unsigned long long)ctx->ticks,
+		   ctx->settime_flags,
+		   (unsigned long long)t.it_value.tv_sec,
+		   (unsigned long long)t.it_value.tv_nsec,
+		   (unsigned long long)t.it_interval.tv_sec,
+		   (unsigned long long)t.it_interval.tv_nsec);
 }
 #else
 #define timerfd_show NULL
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e229315..3ccb2f1 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -2082,12 +2082,12 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	mutex_init(&sbi->s_alloc_mutex);
 
 	if (!udf_parse_options((char *)options, &uopt, false))
-		goto error_out;
+		goto parse_options_failure;
 
 	if (uopt.flags & (1 << UDF_FLAG_UTF8) &&
 	    uopt.flags & (1 << UDF_FLAG_NLS_MAP)) {
 		udf_err(sb, "utf8 cannot be combined with iocharset\n");
-		goto error_out;
+		goto parse_options_failure;
 	}
 #ifdef CONFIG_UDF_NLS
 	if ((uopt.flags & (1 << UDF_FLAG_NLS_MAP)) && !uopt.nls_map) {
@@ -2237,8 +2237,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	return 0;
 
 error_out:
-	if (sbi->s_vat_inode)
-		iput(sbi->s_vat_inode);
+	iput(sbi->s_vat_inode);
+parse_options_failure:
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
 		unload_nls(sbi->s_nls_map);
@@ -2291,8 +2291,7 @@ static void udf_put_super(struct super_block *sb)
 
 	sbi = UDF_SB(sb);
 
-	if (sbi->s_vat_inode)
-		iput(sbi->s_vat_inode);
+	iput(sbi->s_vat_inode);
 #ifdef CONFIG_UDF_NLS
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
 		unload_nls(sbi->s_nls_map);
diff --git a/fs/xattr.c b/fs/xattr.c
index 64e83ef..4ef6985 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -405,16 +405,14 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		const void __user *,value, size_t, size, int, flags)
 {
 	struct fd f = fdget(fd);
-	struct dentry *dentry;
 	int error = -EBADF;
 
 	if (!f.file)
 		return error;
-	dentry = f.file->f_path.dentry;
-	audit_inode(NULL, dentry, 0);
+	audit_file(f.file);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = setxattr(dentry, name, value, size, flags);
+		error = setxattr(f.file->f_path.dentry, name, value, size, flags);
 		mnt_drop_write_file(f.file);
 	}
 	fdput(f);
@@ -509,7 +507,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = getxattr(f.file->f_path.dentry, name, value, size);
 	fdput(f);
 	return error;
@@ -590,7 +588,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = listxattr(f.file->f_path.dentry, list, size);
 	fdput(f);
 	return error;
@@ -651,16 +649,14 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 {
 	struct fd f = fdget(fd);
-	struct dentry *dentry;
 	int error = -EBADF;
 
 	if (!f.file)
 		return error;
-	dentry = f.file->f_path.dentry;
-	audit_inode(NULL, dentry, 0);
+	audit_file(f.file);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = removexattr(dentry, name);
+		error = removexattr(f.file->f_path.dentry, name);
 		mnt_drop_write_file(f.file);
 	}
 	fdput(f);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 92e8f99..2810026 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1338,7 +1338,10 @@ xfs_free_file_space(
 	goto out;
 }
 
-
+/*
+ * Preallocate and zero a range of a file. This mechanism has the allocation
+ * semantics of fallocate and in addition converts data in the range to zeroes.
+ */
 int
 xfs_zero_file_space(
 	struct xfs_inode	*ip,
@@ -1346,65 +1349,30 @@ xfs_zero_file_space(
 	xfs_off_t		len)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	uint			granularity;
-	xfs_off_t		start_boundary;
-	xfs_off_t		end_boundary;
+	uint			blksize;
 	int			error;
 
 	trace_xfs_zero_file_space(ip);
 
-	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+	blksize = 1 << mp->m_sb.sb_blocklog;
 
 	/*
-	 * Round the range of extents we are going to convert inwards.  If the
-	 * offset is aligned, then it doesn't get changed so we zero from the
-	 * start of the block offset points to.
+	 * Punch a hole and prealloc the range. We use hole punch rather than
+	 * unwritten extent conversion for two reasons:
+	 *
+	 * 1.) Hole punch handles partial block zeroing for us.
+	 *
+	 * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
+	 * by virtue of the hole punch.
 	 */
-	start_boundary = round_up(offset, granularity);
-	end_boundary = round_down(offset + len, granularity);
-
-	ASSERT(start_boundary >= offset);
-	ASSERT(end_boundary <= offset + len);
-
-	if (start_boundary < end_boundary - 1) {
-		/*
-		 * Writeback the range to ensure any inode size updates due to
-		 * appending writes make it to disk (otherwise we could just
-		 * punch out the delalloc blocks).
-		 */
-		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-				start_boundary, end_boundary - 1);
-		if (error)
-			goto out;
-		truncate_pagecache_range(VFS_I(ip), start_boundary,
-					 end_boundary - 1);
-
-		/* convert the blocks */
-		error = xfs_alloc_file_space(ip, start_boundary,
-					end_boundary - start_boundary - 1,
-					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);
-		if (error)
-			goto out;
-
-		/* We've handled the interior of the range, now for the edges */
-		if (start_boundary != offset) {
-			error = xfs_iozero(ip, offset, start_boundary - offset);
-			if (error)
-				goto out;
-		}
-
-		if (end_boundary != offset + len)
-			error = xfs_iozero(ip, end_boundary,
-					   offset + len - end_boundary);
-
-	} else {
-		/*
-		 * It's either a sub-granularity range or the range spanned lies
-		 * partially across two adjacent blocks.
-		 */
-		error = xfs_iozero(ip, offset, len);
-	}
+	error = xfs_free_file_space(ip, offset, len);
+	if (error)
+		goto out;
 
+	error = xfs_alloc_file_space(ip, round_down(offset, blksize),
+				     round_up(offset + len, blksize) -
+				     round_down(offset, blksize),
+				     XFS_BMAPI_PREALLOC);
 out:
 	return error;
 
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f1deb96..894924a 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -236,8 +236,10 @@ xfs_bulkstat_grab_ichunk(
 	XFS_WANT_CORRUPTED_RETURN(stat == 1);
 
 	/* Check if the record contains the inode in request */
-	if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
-		return -EINVAL;
+	if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
+		*icount = 0;
+		return 0;
+	}
 
 	idx = agino - irec->ir_startino + 1;
 	if (idx < XFS_INODES_PER_CHUNK &&
@@ -262,75 +264,76 @@ xfs_bulkstat_grab_ichunk(
 
 #define XFS_BULKSTAT_UBLEFT(ubleft)	((ubleft) >= statstruct_size)
 
+struct xfs_bulkstat_agichunk {
+	char		__user **ac_ubuffer;/* pointer into user's buffer */
+	int		ac_ubleft;	/* bytes left in user's buffer */
+	int		ac_ubelem;	/* spaces used in user's buffer */
+};
+
 /*
  * Process inodes in chunk with a pointer to a formatter function
  * that will iget the inode and fill in the appropriate structure.
  */
-int
+static int
 xfs_bulkstat_ag_ichunk(
 	struct xfs_mount		*mp,
 	xfs_agnumber_t			agno,
 	struct xfs_inobt_rec_incore	*irbp,
 	bulkstat_one_pf			formatter,
 	size_t				statstruct_size,
-	struct xfs_bulkstat_agichunk	*acp)
+	struct xfs_bulkstat_agichunk	*acp,
+	xfs_agino_t			*last_agino)
 {
-	xfs_ino_t			lastino = acp->ac_lastino;
 	char				__user **ubufp = acp->ac_ubuffer;
-	int				ubleft = acp->ac_ubleft;
-	int				ubelem = acp->ac_ubelem;
-	int				chunkidx, clustidx;
+	int				chunkidx;
 	int				error = 0;
-	xfs_agino_t			agino;
+	xfs_agino_t			agino = irbp->ir_startino;
 
-	for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
-	     XFS_BULKSTAT_UBLEFT(ubleft) &&
-	     irbp->ir_freecount < XFS_INODES_PER_CHUNK;
-	     chunkidx++, clustidx++, agino++) {
-		int		fmterror;	/* bulkstat formatter result */
+	for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
+	     chunkidx++, agino++) {
+		int		fmterror;
 		int		ubused;
-		xfs_ino_t	ino = XFS_AGINO_TO_INO(mp, agno, agino);
 
-		ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
+		/* inode won't fit in buffer, we are done */
+		if (acp->ac_ubleft < statstruct_size)
+			break;
 
 		/* Skip if this inode is free */
-		if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
-			lastino = ino;
+		if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
 			continue;
-		}
-
-		/*
-		 * Count used inodes as free so we can tell when the
-		 * chunk is used up.
-		 */
-		irbp->ir_freecount++;
 
 		/* Get the inode and fill in a single buffer */
 		ubused = statstruct_size;
-		error = formatter(mp, ino, *ubufp, ubleft, &ubused, &fmterror);
-		if (fmterror == BULKSTAT_RV_NOTHING) {
-			if (error && error != -ENOENT && error != -EINVAL) {
-				ubleft = 0;
-				break;
-			}
-			lastino = ino;
-			continue;
-		}
-		if (fmterror == BULKSTAT_RV_GIVEUP) {
-			ubleft = 0;
+		error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
+				  *ubufp, acp->ac_ubleft, &ubused, &fmterror);
+
+		if (fmterror == BULKSTAT_RV_GIVEUP ||
+		    (error && error != -ENOENT && error != -EINVAL)) {
+			acp->ac_ubleft = 0;
 			ASSERT(error);
 			break;
 		}
-		if (*ubufp)
-			*ubufp += ubused;
-		ubleft -= ubused;
-		ubelem++;
-		lastino = ino;
+
+		/* be careful not to leak error if at end of chunk */
+		if (fmterror == BULKSTAT_RV_NOTHING || error) {
+			error = 0;
+			continue;
+		}
+
+		*ubufp += ubused;
+		acp->ac_ubleft -= ubused;
+		acp->ac_ubelem++;
 	}
 
-	acp->ac_lastino = lastino;
-	acp->ac_ubleft = ubleft;
-	acp->ac_ubelem = ubelem;
+	/*
+	 * Post-update *last_agino. At this point, agino will always point one
+	 * inode past the last inode we processed successfully. Hence we
+	 * substract that inode when setting the *last_agino cursor so that we
+	 * return the correct cookie to userspace. On the next bulkstat call,
+	 * the inode under the lastino cookie will be skipped as we have already
+	 * processed it here.
+	 */
+	*last_agino = agino - 1;
 
 	return error;
 }
@@ -353,45 +356,33 @@ xfs_bulkstat(
 	xfs_agino_t		agino;	/* inode # in allocation group */
 	xfs_agnumber_t		agno;	/* allocation group number */
 	xfs_btree_cur_t		*cur;	/* btree cursor for ialloc btree */
-	int			end_of_ag; /* set if we've seen the ag end */
-	int			error;	/* error code */
-	int                     fmterror;/* bulkstat formatter result */
-	int			i;	/* loop index */
-	int			icount;	/* count of inodes good in irbuf */
 	size_t			irbsize; /* size of irec buffer in bytes */
-	xfs_ino_t		ino;	/* inode number (filesystem) */
-	xfs_inobt_rec_incore_t	*irbp;	/* current irec buffer pointer */
 	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */
-	xfs_inobt_rec_incore_t	*irbufend; /* end of good irec buffer entries */
-	xfs_ino_t		lastino; /* last inode number returned */
 	int			nirbuf;	/* size of irbuf */
-	int			rval;	/* return value error code */
-	int			tmp;	/* result value from btree calls */
 	int			ubcount; /* size of user's buffer */
-	int			ubleft;	/* bytes left in user's buffer */
-	char			__user *ubufp;	/* pointer into user's buffer */
-	int			ubelem;	/* spaces used in user's buffer */
+	struct xfs_bulkstat_agichunk ac;
+	int			error = 0;
 
 	/*
 	 * Get the last inode value, see if there's nothing to do.
 	 */
-	ino = (xfs_ino_t)*lastinop;
-	lastino = ino;
-	agno = XFS_INO_TO_AGNO(mp, ino);
-	agino = XFS_INO_TO_AGINO(mp, ino);
+	agno = XFS_INO_TO_AGNO(mp, *lastinop);
+	agino = XFS_INO_TO_AGINO(mp, *lastinop);
 	if (agno >= mp->m_sb.sb_agcount ||
-	    ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
+	    *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
 		*done = 1;
 		*ubcountp = 0;
 		return 0;
 	}
 
 	ubcount = *ubcountp; /* statstruct's */
-	ubleft = ubcount * statstruct_size; /* bytes */
-	*ubcountp = ubelem = 0;
+	ac.ac_ubuffer = &ubuffer;
+	ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
+	ac.ac_ubelem = 0;
+
+	*ubcountp = 0;
 	*done = 0;
-	fmterror = 0;
-	ubufp = ubuffer;
+
 	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
 	if (!irbuf)
 		return -ENOMEM;
@@ -402,9 +393,13 @@ xfs_bulkstat(
 	 * Loop over the allocation groups, starting from the last
 	 * inode returned; 0 means start of the allocation group.
 	 */
-	rval = 0;
-	while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
-		cond_resched();
+	while (agno < mp->m_sb.sb_agcount) {
+		struct xfs_inobt_rec_incore	*irbp = irbuf;
+		struct xfs_inobt_rec_incore	*irbufend = irbuf + nirbuf;
+		bool				end_of_ag = false;
+		int				icount = 0;
+		int				stat;
+
 		error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
 		if (error)
 			break;
@@ -414,10 +409,6 @@ xfs_bulkstat(
 		 */
 		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
 					    XFS_BTNUM_INO);
-		irbp = irbuf;
-		irbufend = irbuf + nirbuf;
-		end_of_ag = 0;
-		icount = 0;
 		if (agino > 0) {
 			/*
 			 * In the middle of an allocation group, we need to get
@@ -427,22 +418,23 @@ xfs_bulkstat(
 
 			error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
 			if (error)
-				break;
+				goto del_cursor;
 			if (icount) {
 				irbp->ir_startino = r.ir_startino;
 				irbp->ir_freecount = r.ir_freecount;
 				irbp->ir_free = r.ir_free;
 				irbp++;
-				agino = r.ir_startino + XFS_INODES_PER_CHUNK;
 			}
 			/* Increment to the next record */
-			error = xfs_btree_increment(cur, 0, &tmp);
+			error = xfs_btree_increment(cur, 0, &stat);
 		} else {
 			/* Start of ag.  Lookup the first inode chunk */
-			error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
+			error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
+		}
+		if (error || stat == 0) {
+			end_of_ag = true;
+			goto del_cursor;
 		}
-		if (error)
-			break;
 
 		/*
 		 * Loop through inode btree records in this ag,
@@ -451,10 +443,10 @@ xfs_bulkstat(
 		while (irbp < irbufend && icount < ubcount) {
 			struct xfs_inobt_rec_incore	r;
 
-			error = xfs_inobt_get_rec(cur, &r, &i);
-			if (error || i == 0) {
-				end_of_ag = 1;
-				break;
+			error = xfs_inobt_get_rec(cur, &r, &stat);
+			if (error || stat == 0) {
+				end_of_ag = true;
+				goto del_cursor;
 			}
 
 			/*
@@ -469,77 +461,79 @@ xfs_bulkstat(
 				irbp++;
 				icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
 			}
-			/*
-			 * Set agino to after this chunk and bump the cursor.
-			 */
-			agino = r.ir_startino + XFS_INODES_PER_CHUNK;
-			error = xfs_btree_increment(cur, 0, &tmp);
+			error = xfs_btree_increment(cur, 0, &stat);
+			if (error || stat == 0) {
+				end_of_ag = true;
+				goto del_cursor;
+			}
 			cond_resched();
 		}
+
 		/*
-		 * Drop the btree buffers and the agi buffer.
-		 * We can't hold any of the locks these represent
-		 * when calling iget.
+		 * Drop the btree buffers and the agi buffer as we can't hold any
+		 * of the locks these represent when calling iget. If there is a
+		 * pending error, then we are done.
 		 */
+del_cursor:
 		xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
 		xfs_buf_relse(agbp);
+		if (error)
+			break;
 		/*
-		 * Now format all the good inodes into the user's buffer.
+		 * Now format all the good inodes into the user's buffer. The
+		 * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
+		 * for the next loop iteration.
 		 */
 		irbufend = irbp;
 		for (irbp = irbuf;
-		     irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
-			struct xfs_bulkstat_agichunk ac;
-
-			ac.ac_lastino = lastino;
-			ac.ac_ubuffer = &ubuffer;
-			ac.ac_ubleft = ubleft;
-			ac.ac_ubelem = ubelem;
+		     irbp < irbufend && ac.ac_ubleft >= statstruct_size;
+		     irbp++) {
 			error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
-					formatter, statstruct_size, &ac);
+					formatter, statstruct_size, &ac,
+					&agino);
 			if (error)
-				rval = error;
-
-			lastino = ac.ac_lastino;
-			ubleft = ac.ac_ubleft;
-			ubelem = ac.ac_ubelem;
+				break;
 
 			cond_resched();
 		}
+
 		/*
-		 * Set up for the next loop iteration.
+		 * If we've run out of space or had a formatting error, we
+		 * are now done
 		 */
-		if (XFS_BULKSTAT_UBLEFT(ubleft)) {
-			if (end_of_ag) {
-				agno++;
-				agino = 0;
-			} else
-				agino = XFS_INO_TO_AGINO(mp, lastino);
-		} else
+		if (ac.ac_ubleft < statstruct_size || error)
 			break;
+
+		if (end_of_ag) {
+			agno++;
+			agino = 0;
+		}
 	}
 	/*
 	 * Done, we're either out of filesystem or space to put the data.
 	 */
 	kmem_free(irbuf);
-	*ubcountp = ubelem;
+	*ubcountp = ac.ac_ubelem;
+
 	/*
-	 * Found some inodes, return them now and return the error next time.
+	 * We found some inodes, so clear the error status and return them.
+	 * The lastino pointer will point directly at the inode that triggered
+	 * any error that occurred, so on the next call the error will be
+	 * triggered again and propagated to userspace as there will be no
+	 * formatted inodes in the buffer.
 	 */
-	if (ubelem)
-		rval = 0;
-	if (agno >= mp->m_sb.sb_agcount) {
-		/*
-		 * If we ran out of filesystem, mark lastino as off
-		 * the end of the filesystem, so the next call
-		 * will return immediately.
-		 */
-		*lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
+	if (ac.ac_ubelem)
+		error = 0;
+
+	/*
+	 * If we ran out of filesystem, lastino will point off the end of
+	 * the filesystem so the next call will return immediately.
+	 */
+	*lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
+	if (agno >= mp->m_sb.sb_agcount)
 		*done = 1;
-	} else
-		*lastinop = (xfs_ino_t)lastino;
 
-	return rval;
+	return error;
 }
 
 int
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index aaed080..6ea8b39 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -30,22 +30,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount	*mp,
 			       int		*ubused,
 			       int		*stat);
 
-struct xfs_bulkstat_agichunk {
-	xfs_ino_t	ac_lastino;	/* last inode returned */
-	char		__user **ac_ubuffer;/* pointer into user's buffer */
-	int		ac_ubleft;	/* bytes left in user's buffer */
-	int		ac_ubelem;	/* spaces used in user's buffer */
-};
-
-int
-xfs_bulkstat_ag_ichunk(
-	struct xfs_mount		*mp,
-	xfs_agnumber_t			agno,
-	struct xfs_inobt_rec_incore	*irbp,
-	bulkstat_one_pf			formatter,
-	size_t				statstruct_size,
-	struct xfs_bulkstat_agichunk	*acp);
-
 /*
  * Values for stat return value.
  */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 9f622fe..206b97f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1425,6 +1425,7 @@ xfs_fs_fill_super(
 	sb->s_export_op = &xfs_export_operations;
 #ifdef CONFIG_XFS_QUOTA
 	sb->s_qcop = &xfs_quotactl_operations;
+	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 #endif
 	sb->s_op = &xfs_super_operations;